• Save
Parsing JSON with a single regex
Upcoming SlideShare
Loading in...5
×
 

Parsing JSON with a single regex

on

  • 23,096 views

I explain the features Randal Schwartz used to parse JSON in a single regex.

I explain the features Randal Schwartz used to parse JSON in a single regex.

Statistics

Views

Total Views
23,096
Views on SlideShare
4,009
Embed Views
19,087

Actions

Likes
14
Downloads
2
Comments
0

19 Embeds 19,087

http://blogs.perl.org 18299
http://pradeeppant.com 654
http://cloud.feedly.com 78
http://www.feedspot.com 14
https://twitter.com 10
http://digg.com 9
http://news.int80.biz 6
http://translate.googleusercontent.com 4
http://newsblur.com 3
https://www.google.fr 1
http://www.linkedin.com 1
https://www.google.ca 1
http://www.newsblur.com 1
http://www.hanrss.com 1
http://plus.url.google.com 1
http://165.71.32.238 1
http://webcache.googleusercontent.com 1
http://localhost 1
http://www.pradeeppant.com 1
More...

Accessibility

Categories

Upload Details

Uploaded via as Adobe PDF

Usage Rights

© All Rights Reserved

Report content

Flagged as inappropriate Flag as inappropriate
Flag as inappropriate

Select your reason for flagging this presentation as inappropriate.

Cancel
  • Full Name Full Name Comment goes here.
    Are you sure you want to
    Your message goes here
    Processing…
Post Comment
Edit your comment

    Parsing JSON with a single regex Parsing JSON with a single regex Presentation Transcript

    • Parsing JSON with a single regex brian d foy Houston Perl Mongers, October 17, 2013
    • Mastering Perl, 2e • Read for free now • http://chimera.labs.oreilly.com/ books/1234000001527/index.html • http://goo.gl/lmqAKX • This stuff is in Chapter 2
    • Randal is wicked • JSON is on a single line (minimized) • ASCII only • Fails very quickly • Doesn't handle everything • Uses many advanced regex features • http://www.perlmonks.org/?node_id=995856
    • #!/usr/bin/env perl use Data::Dumper qw(Dumper); my $FROM_JSON = qr{ (?&VALUE) (?{ $_ = $^R->[1] }) (?(DEFINE) (?<OBJECT> (?{ [$^R, {}] }) { (?: (?&KV) # [[$^R, {}], $k, $v] (?{ # warn Dumper { obj1 => $^R }; [$^R->[0][0], {$^R->[1] => $^R->[2]}] }) (?: , (?&KV) # [[$^R, {...}], $k, $v] (?{ # warn Dumper { obj2 => $^R }; [$^R->[0][0], {%{$^R->[0][1]}, $^R->[1] => $^R->[2]}] }) )* )? } ) (?<KV> (?&STRING) # [$^R, "string"] : (?&VALUE) # [[$^R, "string"], $value] (?{ # warn Dumper { kv => $^R }; [$^R->[0][0], $^R->[0][1], $^R->[1]] }) )
    • (?<ARRAY> (?{ [$^R, []] }) [ (?: (?&VALUE) (?{ [$^R->[0][0], [$^R->[1]]] }) (?: , (?&VALUE) (?{ # warn Dumper { atwo => $^R }; [$^R->[0][0], [@{$^R->[0][1]}, $^R->[1]]] }) )* )? ] ) (?<VALUE> s* ( (?&STRING) | (?&NUMBER) | (?&OBJECT) | (?&ARRAY) | true (?{ [$^R, 1] }) | false (?{ [$^R, 0] }) | null (?{ [$^R, undef] }) ) s* )
    • (?<STRING> ( " (?: [^"]+ | ["/bfnrt] # | # u [0-9a-fA-f]{4} )* " ) (?{ [$^R, eval $^N] }) ) (?<NUMBER> ( -? (?: 0 | [1-9]d* ) (?: . d+ )? (?: [eE] [-+]? d+ )? ) (?{ [$^R, eval $^N] }) ) ) }xms;
    • sub from_json { local $_ = shift; local $^R; eval { m{A$FROM_JSONz}; } and return $_; die $@ if $@; return 'no match'; } while (<>) { chomp; print Dumper from_json($_); }
    • my $FROM_JSON = qr{ (?&VALUE) (?{ $_ = $^R->[1] }) (?(DEFINE) (?<OBJECT> (?{ [$^R, {}] }) { (?: (?&KV) # [[$^R, {}], $k, $v] (?{ # warn Dumper { obj1 => $^R }; [$^R->[0][0], {$^R->[1] => $^R->[2]}] }) (?: , (?&KV) # [[$^R, {...}], $k, $v] (?{ # warn Dumper { obj2 => $^R }; [$^R->[0][0], {%{$^R->[0][1]}, $^R->[1] => $^R->[2]}] }) )* )? } ) (?<KV> (?&STRING) # [$^R, "string"] : (?&VALUE) # [[$^R, "string"], $value] (?{ # warn Dumper { kv => $^R }; [$^R->[0][0], $^R->[0][1], $^R->[1]] }) ) (?<ARRAY> (?{ [$^R, []] }) [ (?: (?&VALUE) (?{ [$^R->[0][0], [$^R->[1]]] }) (?: , (?&VALUE) (?{ # warn Dumper { atwo => $^R }; [$^R->[0][0], [@{$^R->[0][1]}, $^R->[1]]] })
    • my $FROM_JSON = qr{ (?&VALUE) (?{ $_ = $^R->[1] }) (?(DEFINE) (?<OBJECT> (?{ [$^R, {}] }) { (?: (?&KV) # [[$^R, {}], $k, $v] (?{ # warn Dumper { obj1 => $^R }; [$^R->[0][0], {$^R->[1] => $^R->[2]}] }) (?: , (?&KV) # [[$^R, {...}], $k, $v] (?{ # warn Dumper { obj2 => $^R }; [$^R->[0][0], {%{$^R->[0][1]}, $^R->[1] => $^R->[2]}] }) )* )? } ) (?<KV> (?&STRING) # [$^R, "string"] : (?&VALUE) # [[$^R, "string"], $value] (?{ # warn Dumper { kv => $^R }; [$^R->[0][0], $^R->[0][1], $^R->[1]] }) ) (?<ARRAY> (?{ [$^R, []] }) [ (?: (?&VALUE) (?{ [$^R->[0][0], [$^R->[1]]] }) (?: , (?&VALUE) (?{ # warn Dumper { atwo => $^R }; [$^R->[0][0], [@{$^R->[0][1]}, $^R->[1]]] })
    • my $FROM_JSON = qr{ (?&VALUE) (?{ $_ = $^R->[1] }) (?(DEFINE) (?<OBJECT> (?{ [$^R, {}] }) { (?: (?&KV) # [[$^R, {}], $k, $v] (?{ # warn Dumper { obj1 => $^R }; [$^R->[0][0], {$^R->[1] => $^R->[2]}] }) (?: , (?&KV) # [[$^R, {...}], $k, $v] (?{ # warn Dumper { obj2 => $^R }; [$^R->[0][0], {%{$^R->[0][1]}, $^R->[1] => $^R->[2]}] }) )* )? } ) (?<KV> (?&STRING) # [$^R, "string"] : (?&VALUE) # [[$^R, "string"], $value] (?{ # warn Dumper { kv => $^R }; [$^R->[0][0], $^R->[0][1], $^R->[1]] }) ) (?<ARRAY> (?{ [$^R, []] }) [ (?: (?&VALUE) (?{ [$^R->[0][0], [$^R->[1]]] }) (?: , (?&VALUE) (?{ # warn Dumper { atwo => $^R }; [$^R->[0][0], [@{$^R->[0][1]}, $^R->[1]]] })
    • • Uses grammars: (?(DEFINE)) • Recurses: (?&KV), et alia • Runs code during the regex: (?{ ... }) • Builds up a data structure: $^R • At the end, replaces the string with a data structure: (?{ $_ = $^R->[1] })
    • $_ =<<'HERE'; Amelia said "I am a camel" HERE say "Matched [$+{said}]!" if m/ ( ['"] ) (?<said>.*?) ( ['"] ) /x;
    • $_ =<<'HERE'; Amelia said 'I am a camel' HERE say "Matched [$+{said}]!" if m/ ( ['"] ) (?<said>.*?) ( 1 ) /x;
    • $_ =<<'HERE'; Amelia said 'I am a camel' HERE say "Matched [$+{said}]!" if m/ ( ['"] ) (?<said>.*?) (?1) /x;
    • $_ =<<'HERE'; Amelia said 'I am a camel" HERE say "Matched [$+{said}]!" if m/ ( ['"] ) (?<said>.*?) (?1) /x;
    • $_ =<<'HERE'; He said 'Amelia said "I am a camel"' HERE say "Matched [$+{said}]!" if m/ ( ['"] ) (?<said>.*?) (?1) # Matches wrong quote! /x;
    • $_ =<<'HERE'; He said 'Amelia said "I am a camel"' HERE say "Matched [$+{said}]!" if m/ (?<said> (?<quote>['"]) (?: [^'"]++ | (?<said> (?1) ) )* g{quote} ) /x; # $1
    • $_ =<<'HERE'; Out "Top 'Middle "Bottom" Middle' Out" HERE say "Matched [$+{said}]!" if m/ (?<said> (?<quote>['"]) (?: [^'"]++ | (?R) )* g{quote} ) (?{ say "Inside regex: $+{said}" }) /x;
    • $_ =<<'HERE'; Out "Top 'Mid "Bottom" Mid' Out" HERE say "Matched [$+{said}]!" if m/ (?(DEFINE) (?<QUOTE> ['"]) (?<NOT_QUOTE> [^'"]) ) (?<said> (?<quote>(?&QUOTE)) (?: (?&NOT_QUOTE)++ | (?R) )* g{quote} ) (?{ say "Inside regex: $+{said}" }) /x;
    • my @matches; say "Matched!" if m/ (?(DEFINE) (?<QUOTE_MARK> ['"]) (?<NOT_QUOTE_MARK> [^'"]) ) ( (?<quote>(?&QUOTE_MARK)) (?: (?&NOT_QUOTE_MARK)++ | (?R) )* g{quote} ) (?{ push @matches, $^N }) /x;
    • say "Matched!" if m/ (?(DEFINE) (?<QUOTE_MARK> ['"]) (?<NOT_QUOTE_MARK> [^'"]) (?<QUOTE> ( (?<quote>(?&QUOTE_MARK)) (?: (?&NOT_QUOTE_MARK)++ | (?&QUOTE) )* g{quote} ) (?{ push @matches, $^N }) ) ) (?&QUOTE) /x;
    • say "Matched!" if m/ (?(DEFINE) (?<QUOTE_MARK> ['"]) (?<NOT_QUOTE_MARK> [^'"]) (?<QUOTE> ( (?<quote>(?&QUOTE_MARK)) (?: (?&NOT_QUOTE_MARK)++ | (?&QUOTE) )* g{quote} ) (?{ [ @{$^R}, $^N ] }) ) ) (?&QUOTE) (?{ @matches = @{ $^R } }) /x;