Parsing JSON with
a single regex
brian d foy
Houston Perl Mongers, October 17, 2013
Mastering Perl, 2e
• Read for free now
• http://chimera.labs.oreilly.com/

books/1234000001527/index.html

• http://goo.gl...
Randal is wicked
• JSON is on a single line (minimized)
• ASCII only
• Fails very quickly
• Doesn't handle everything
• Us...
#!/usr/bin/env perl
use Data::Dumper qw(Dumper);
my $FROM_JSON = qr{
(?&VALUE) (?{ $_ = $^R->[1] })
(?(DEFINE)
(?<OBJECT>
...
(?<ARRAY>
(?{ [$^R, []] })
[
(?: (?&VALUE) (?{ [$^R->[0][0], [$^R->[1]]] })
(?: , (?&VALUE) (?{ # warn Dumper { atwo => $^...
(?<STRING>
(
"
(?:
[^"]+
|
 ["/bfnrt]
#
|
#
 u [0-9a-fA-f]{4}
)*
"
)
(?{ [$^R, eval $^N] })
)
(?<NUMBER>
(
-?
(?: 0 | [1-9...
sub from_json {
local $_ = shift;
local $^R;
eval { m{A$FROM_JSONz}; } and return $_;
die $@ if $@;
return 'no match';
}
w...
my $FROM_JSON = qr{
(?&VALUE) (?{ $_ = $^R->[1] })
(?(DEFINE)
(?<OBJECT>
(?{ [$^R, {}] })
{
(?: (?&KV) # [[$^R, {}], $k, $...
my $FROM_JSON = qr{
(?&VALUE) (?{ $_ = $^R->[1] })
(?(DEFINE)
(?<OBJECT>
(?{ [$^R, {}] })
{
(?: (?&KV) # [[$^R, {}], $k, $...
my $FROM_JSON = qr{
(?&VALUE) (?{ $_ = $^R->[1] })
(?(DEFINE)
(?<OBJECT>
(?{ [$^R, {}] })
{
(?: (?&KV) # [[$^R, {}], $k, $...
• Uses grammars: (?(DEFINE))
• Recurses: (?&KV), et alia
• Runs code during the regex: (?{ ... })
• Builds up a data struc...
$_ =<<'HERE';
Amelia said "I am a camel"
HERE
say "Matched [$+{said}]!" if m/
( ['"] )
(?<said>.*?)
( ['"] )
/x;
$_ =<<'HERE';
Amelia said 'I am a camel'
HERE
say "Matched [$+{said}]!" if m/
( ['"] )
(?<said>.*?)
( 1 )
/x;
$_ =<<'HERE';
Amelia said 'I am a camel'
HERE
say "Matched [$+{said}]!" if m/
( ['"] )
(?<said>.*?)
(?1)
/x;
$_ =<<'HERE';
Amelia said 'I am a camel"
HERE
say "Matched [$+{said}]!" if m/
( ['"] )
(?<said>.*?)
(?1)
/x;
$_ =<<'HERE';
He said 'Amelia said "I am a camel"'
HERE
say "Matched [$+{said}]!" if m/
( ['"] )
(?<said>.*?)
(?1)
# Match...
$_ =<<'HERE';
He said 'Amelia said "I am a camel"'
HERE
say "Matched [$+{said}]!" if m/
(?<said>
(?<quote>['"])
(?:
[^'"]+...
$_ =<<'HERE';
Out "Top 'Middle "Bottom" Middle' Out"
HERE
say "Matched [$+{said}]!" if m/
(?<said>
(?<quote>['"])
(?:
[^'"...
$_ =<<'HERE';
Out "Top 'Mid "Bottom" Mid' Out"
HERE
say "Matched [$+{said}]!" if m/
(?(DEFINE)
(?<QUOTE> ['"])
(?<NOT_QUOT...
my @matches;
say "Matched!" if m/
(?(DEFINE)
(?<QUOTE_MARK> ['"])
(?<NOT_QUOTE_MARK> [^'"])
)
(
(?<quote>(?&QUOTE_MARK))
(...
say "Matched!" if m/
(?(DEFINE)
(?<QUOTE_MARK> ['"])
(?<NOT_QUOTE_MARK> [^'"])
(?<QUOTE>
(
(?<quote>(?&QUOTE_MARK))
(?:
(?...
say "Matched!" if m/
(?(DEFINE)
(?<QUOTE_MARK> ['"])
(?<NOT_QUOTE_MARK> [^'"])
(?<QUOTE>
(
(?<quote>(?&QUOTE_MARK))
(?:
(?...
Upcoming SlideShare
Loading in...5
×

Parsing JSON with a single regex

26,335

Published on

I explain the features Randal Schwartz used to parse JSON in a single regex.

Published in: Technology, Business

Parsing JSON with a single regex

  1. 1. Parsing JSON with a single regex brian d foy Houston Perl Mongers, October 17, 2013
  2. 2. Mastering Perl, 2e • Read for free now • http://chimera.labs.oreilly.com/ books/1234000001527/index.html • http://goo.gl/lmqAKX • This stuff is in Chapter 2
  3. 3. Randal is wicked • JSON is on a single line (minimized) • ASCII only • Fails very quickly • Doesn't handle everything • Uses many advanced regex features • http://www.perlmonks.org/?node_id=995856
  4. 4. #!/usr/bin/env perl use Data::Dumper qw(Dumper); my $FROM_JSON = qr{ (?&VALUE) (?{ $_ = $^R->[1] }) (?(DEFINE) (?<OBJECT> (?{ [$^R, {}] }) { (?: (?&KV) # [[$^R, {}], $k, $v] (?{ # warn Dumper { obj1 => $^R }; [$^R->[0][0], {$^R->[1] => $^R->[2]}] }) (?: , (?&KV) # [[$^R, {...}], $k, $v] (?{ # warn Dumper { obj2 => $^R }; [$^R->[0][0], {%{$^R->[0][1]}, $^R->[1] => $^R->[2]}] }) )* )? } ) (?<KV> (?&STRING) # [$^R, "string"] : (?&VALUE) # [[$^R, "string"], $value] (?{ # warn Dumper { kv => $^R }; [$^R->[0][0], $^R->[0][1], $^R->[1]] }) )
  5. 5. (?<ARRAY> (?{ [$^R, []] }) [ (?: (?&VALUE) (?{ [$^R->[0][0], [$^R->[1]]] }) (?: , (?&VALUE) (?{ # warn Dumper { atwo => $^R }; [$^R->[0][0], [@{$^R->[0][1]}, $^R->[1]]] }) )* )? ] ) (?<VALUE> s* ( (?&STRING) | (?&NUMBER) | (?&OBJECT) | (?&ARRAY) | true (?{ [$^R, 1] }) | false (?{ [$^R, 0] }) | null (?{ [$^R, undef] }) ) s* )
  6. 6. (?<STRING> ( " (?: [^"]+ | ["/bfnrt] # | # u [0-9a-fA-f]{4} )* " ) (?{ [$^R, eval $^N] }) ) (?<NUMBER> ( -? (?: 0 | [1-9]d* ) (?: . d+ )? (?: [eE] [-+]? d+ )? ) (?{ [$^R, eval $^N] }) ) ) }xms;
  7. 7. sub from_json { local $_ = shift; local $^R; eval { m{A$FROM_JSONz}; } and return $_; die $@ if $@; return 'no match'; } while (<>) { chomp; print Dumper from_json($_); }
  8. 8. my $FROM_JSON = qr{ (?&VALUE) (?{ $_ = $^R->[1] }) (?(DEFINE) (?<OBJECT> (?{ [$^R, {}] }) { (?: (?&KV) # [[$^R, {}], $k, $v] (?{ # warn Dumper { obj1 => $^R }; [$^R->[0][0], {$^R->[1] => $^R->[2]}] }) (?: , (?&KV) # [[$^R, {...}], $k, $v] (?{ # warn Dumper { obj2 => $^R }; [$^R->[0][0], {%{$^R->[0][1]}, $^R->[1] => $^R->[2]}] }) )* )? } ) (?<KV> (?&STRING) # [$^R, "string"] : (?&VALUE) # [[$^R, "string"], $value] (?{ # warn Dumper { kv => $^R }; [$^R->[0][0], $^R->[0][1], $^R->[1]] }) ) (?<ARRAY> (?{ [$^R, []] }) [ (?: (?&VALUE) (?{ [$^R->[0][0], [$^R->[1]]] }) (?: , (?&VALUE) (?{ # warn Dumper { atwo => $^R }; [$^R->[0][0], [@{$^R->[0][1]}, $^R->[1]]] })
  9. 9. my $FROM_JSON = qr{ (?&VALUE) (?{ $_ = $^R->[1] }) (?(DEFINE) (?<OBJECT> (?{ [$^R, {}] }) { (?: (?&KV) # [[$^R, {}], $k, $v] (?{ # warn Dumper { obj1 => $^R }; [$^R->[0][0], {$^R->[1] => $^R->[2]}] }) (?: , (?&KV) # [[$^R, {...}], $k, $v] (?{ # warn Dumper { obj2 => $^R }; [$^R->[0][0], {%{$^R->[0][1]}, $^R->[1] => $^R->[2]}] }) )* )? } ) (?<KV> (?&STRING) # [$^R, "string"] : (?&VALUE) # [[$^R, "string"], $value] (?{ # warn Dumper { kv => $^R }; [$^R->[0][0], $^R->[0][1], $^R->[1]] }) ) (?<ARRAY> (?{ [$^R, []] }) [ (?: (?&VALUE) (?{ [$^R->[0][0], [$^R->[1]]] }) (?: , (?&VALUE) (?{ # warn Dumper { atwo => $^R }; [$^R->[0][0], [@{$^R->[0][1]}, $^R->[1]]] })
  10. 10. my $FROM_JSON = qr{ (?&VALUE) (?{ $_ = $^R->[1] }) (?(DEFINE) (?<OBJECT> (?{ [$^R, {}] }) { (?: (?&KV) # [[$^R, {}], $k, $v] (?{ # warn Dumper { obj1 => $^R }; [$^R->[0][0], {$^R->[1] => $^R->[2]}] }) (?: , (?&KV) # [[$^R, {...}], $k, $v] (?{ # warn Dumper { obj2 => $^R }; [$^R->[0][0], {%{$^R->[0][1]}, $^R->[1] => $^R->[2]}] }) )* )? } ) (?<KV> (?&STRING) # [$^R, "string"] : (?&VALUE) # [[$^R, "string"], $value] (?{ # warn Dumper { kv => $^R }; [$^R->[0][0], $^R->[0][1], $^R->[1]] }) ) (?<ARRAY> (?{ [$^R, []] }) [ (?: (?&VALUE) (?{ [$^R->[0][0], [$^R->[1]]] }) (?: , (?&VALUE) (?{ # warn Dumper { atwo => $^R }; [$^R->[0][0], [@{$^R->[0][1]}, $^R->[1]]] })
  11. 11. • Uses grammars: (?(DEFINE)) • Recurses: (?&KV), et alia • Runs code during the regex: (?{ ... }) • Builds up a data structure: $^R • At the end, replaces the string with a data structure: (?{ $_ = $^R->[1] })
  12. 12. $_ =<<'HERE'; Amelia said "I am a camel" HERE say "Matched [$+{said}]!" if m/ ( ['"] ) (?<said>.*?) ( ['"] ) /x;
  13. 13. $_ =<<'HERE'; Amelia said 'I am a camel' HERE say "Matched [$+{said}]!" if m/ ( ['"] ) (?<said>.*?) ( 1 ) /x;
  14. 14. $_ =<<'HERE'; Amelia said 'I am a camel' HERE say "Matched [$+{said}]!" if m/ ( ['"] ) (?<said>.*?) (?1) /x;
  15. 15. $_ =<<'HERE'; Amelia said 'I am a camel" HERE say "Matched [$+{said}]!" if m/ ( ['"] ) (?<said>.*?) (?1) /x;
  16. 16. $_ =<<'HERE'; He said 'Amelia said "I am a camel"' HERE say "Matched [$+{said}]!" if m/ ( ['"] ) (?<said>.*?) (?1) # Matches wrong quote! /x;
  17. 17. $_ =<<'HERE'; He said 'Amelia said "I am a camel"' HERE say "Matched [$+{said}]!" if m/ (?<said> (?<quote>['"]) (?: [^'"]++ | (?<said> (?1) ) )* g{quote} ) /x; # $1
  18. 18. $_ =<<'HERE'; Out "Top 'Middle "Bottom" Middle' Out" HERE say "Matched [$+{said}]!" if m/ (?<said> (?<quote>['"]) (?: [^'"]++ | (?R) )* g{quote} ) (?{ say "Inside regex: $+{said}" }) /x;
  19. 19. $_ =<<'HERE'; Out "Top 'Mid "Bottom" Mid' Out" HERE say "Matched [$+{said}]!" if m/ (?(DEFINE) (?<QUOTE> ['"]) (?<NOT_QUOTE> [^'"]) ) (?<said> (?<quote>(?&QUOTE)) (?: (?&NOT_QUOTE)++ | (?R) )* g{quote} ) (?{ say "Inside regex: $+{said}" }) /x;
  20. 20. my @matches; say "Matched!" if m/ (?(DEFINE) (?<QUOTE_MARK> ['"]) (?<NOT_QUOTE_MARK> [^'"]) ) ( (?<quote>(?&QUOTE_MARK)) (?: (?&NOT_QUOTE_MARK)++ | (?R) )* g{quote} ) (?{ push @matches, $^N }) /x;
  21. 21. say "Matched!" if m/ (?(DEFINE) (?<QUOTE_MARK> ['"]) (?<NOT_QUOTE_MARK> [^'"]) (?<QUOTE> ( (?<quote>(?&QUOTE_MARK)) (?: (?&NOT_QUOTE_MARK)++ | (?&QUOTE) )* g{quote} ) (?{ push @matches, $^N }) ) ) (?&QUOTE) /x;
  22. 22. say "Matched!" if m/ (?(DEFINE) (?<QUOTE_MARK> ['"]) (?<NOT_QUOTE_MARK> [^'"]) (?<QUOTE> ( (?<quote>(?&QUOTE_MARK)) (?: (?&NOT_QUOTE_MARK)++ | (?&QUOTE) )* g{quote} ) (?{ [ @{$^R}, $^N ] }) ) ) (?&QUOTE) (?{ @matches = @{ $^R } }) /x;

×