Regexes
and Grammars
   in Perl 6
Preface
Synopsis 5
Synopsis 5
Regexes and Rules
S05
Damian Conway
Allison Randal
Patrick Michaud
Larry Wall
Moritz Lenz
Created: 24 Jun 2002
Last Modified: 30 Aug 2010
Version: 132
54 pages
Part I
Regexes
Random facts
and terminology
Regular expressions
in Perl 5 were not regular
Regular expressions
in Perl 5 were not regular

Regular expressions
in Perl 6 are called regexes
Regular expressions
in Perl 5 were not regular

Regular expressions
in Perl 6 are called regexes

Which means “kinda like
...
Match object
contains result of matching




          $/
Capture variable indexes
start with 0




          $0
$0, $1, etc.
are part of $/
my $q = "Hotels in Berlin";
$q ~~ /ins(.*)/;


say $0;    # Berlin
say $/[0]; # Berlin
Metacharacters
are everything except
Unicode letters
or numbers
or underscore
Quotes
may be used for creating
atoms


'I will never use PHP again. '*
Repetition


(d+ s?) ** 3

(d+ s?) ** 5..10

d+ ** ','
/x modifier gone


"ab" ~~ / a    b /;
say $/;   # ab
/s, /m modifiers gone


"a1nb2nc3" ~~ /N+/;

"a1nb2nc3" ~~ /^^ .2 $$/;
/e modifier gone


$str =~ s/pattern/{action()}/;
Modifier syntax


@names =
 $str =~ m:i/MiSteR s (w+)/;
Brackets
Capturing group


       (...)
Non-capturing group


       [...]
Character class


      <[ . . . ]>
Embedded closure


      {...}
Embedded closure


              {...}

> "500" ~~ /(d+) {$0 < 200 or fail}/
===SORRY!===
Named rule or token


       <. . .>
Part II
Grammars
Keywords
grammar
rule
token
proto
TOP
grammar Grammar {
    rule TOP {...}
    rule some_rule {...}
    token some_token {...}
}
grammar Grammar {
  rule TOP {...}
  rule some_rule {...}
  token some_token {...}
}
Syntax is similar
to class definition
grammar Grammar {
    rule TOP {...}
    rule some_rule {...}
    token some_token {...}
}
Grammar.parse($string);
Example.
Step by step
Executed by Rakudo


    rakudo.org
Executed by Rakudo


    rakudo.org


Sometimes it fails
City
grammar SearchQuery {

}
grammar SearchQuery {
  rule TOP {

    }
}
grammar SearchQuery {
  rule TOP {
     ^
     $
  }
}
grammar SearchQuery {
  rule TOP {
     ^
        <query>
     $
  }
}
grammar SearchQuery {
  rule TOP {
     ^
        <query>
     $
  }
}


     Easy, isn't it?
Grammars are part
 of the language
grammar SearchQuery {
  rule TOP {
     ^
        <query>
     $
  }
  rule query {
  }
}
grammar SearchQuery {
  rule TOP {
     ^
        <query>
     $
  }
  rule query {
     <city>
  }
}
grammar SearchQuery {
  rule TOP {
     ^
        <query>
     $
  }
  rule query {
     <city>
  }
  token city {
  }
}
grammar SearchQuery {   N. B.
  rule TOP {
     ^
        <query>
     $                  rules
  }
  rule query {
     <c...
token
is a "word"
rule
is a "phrase"
grammar SearchQuery {
  rule TOP {
     ^
        <query>
     $
  }
  rule query {
     <city>
  }
  token city {
  }
}
grammar SearchQuery {
  rule TOP {
     ^
        <query>
     $
  }
  rule query {
     <city>
  }
  token city {
     <c...
^
          <query>
      $
    }
    rule query {
       <city>
    }
    token city {
       <capital>
    }
    token c...
my $result = SearchQuery.parse("Amsterdam");
say $result.perl;
Match.new(
from => 0,
orig => "Amsterdam",
to => 9,
named => {
 query => Match.new(
  from => 0,
  orig => "Amsterdam",
  ...
Match.new(
from => 0,                 Matched text
orig => "Amsterdam",
to => 9,
named => {
 query => Match.new(
  from =>...
Match.new(
from => 0,
orig => "Amsterdam",
to => 9,
named => {
 query => Match.new(          rule query {
  from => 0,    ...
Match.new(
from => 0,
orig => "Amsterdam",
to => 9,
named => {
 query => Match.new(
  from => 0,
  orig => "Amsterdam",
  ...
Match.new(
from => 0,
orig => "Amsterdam",
to => 9,
named => {
 query => Match.new(
  from => 0,
  orig => "Amsterdam",
  ...
Country
rule query {
     <city>
   | <country>
}
rule query {
     <city>
   | <country>
}
rule country {
       'Afghanistan'
     | 'Akrotiri'
     | 'Albania'
     | 'A...
my $result = SearchQuery.parse("Amsterdam");
say $result.perl;

$result = SearchQuery.parse("China");
say $result.perl;
rule query {
     <city> ',' <ws>? <country>
   | <city>
   | <country>
}
rule query {
     <city> ',' <ws>? <country>
   | <city>
   | <country>
}



SearchQuery.parse("Tirana, Albania");
rule query {
     <city> ',' <ws>? <country>
   | <city>
   | <country>
}



SearchQuery.parse("Tirana, Albania");
Capturing
and accessing
Everything goes
to Match object

    $/
SearchQuery.parse("Tirana, Albania");
say $<query><city>;
say $<query><country>;
SearchQuery.parse("Tirana, Albania");
say $<query><city>;
say $<query><country>;


Tirana
Albania
SearchQuery.parse("Tirana, Albania");
say $<query><city>;            Shortcut
say $<query><country>;


say $/<query><city>...
rule query {
   'Hotels in'?
   [
       <city> ',' <ws>? <country>
     | <city>
     | <country>
   ]
}
SearchQuery.parse("Tirana, Albania");
say $<query><city>;
say $<query><country>;


SearchQuery.parse
  ("Hotels in Tirana,...
rule date {
   <day>
   <month>
}
token day {
   d+
   ['st' | 'nd' | 'th']?
}
token month {
     'January'
   | 'February...
SearchQuery.parse("Hotels in Tirana,
Albania from 25th December");


SearchQuery.parse("Hotels in Tirana,
Albania from 25 ...
What will
$<query><date>
    print?
What will
$<query><date>
    print?

 25th December
       or
  25 December
How to check days



token day {
  (d+) {$0 <= 31 or fail}
}
[
          <city> ',' <ws>? <country>
        | <city>
        | <country>
    ]
    [
         'from' <date>
         't...
token guest_number {
    d
  | 'one'
  | 'two'
  | 'three'
  | 'four'
  | 'five'
}
"Hotels in Tirana, Albania from
25 December to 7 January for two"
rule date {
     'today'
   | 'tomorrow'
   |[
       <day>
       <month>
     ]
}
$ perl6 10-all.pl
Hotels in Amsterdam, Netherlands from 1 January to 5
February for three
   City:    Amsterdam
   Country...
__END__

           Andrew Shitov
talks.shitov.ru | andy@shitov.ru
Perl6 grammars
Perl6 grammars
Upcoming SlideShare
Loading in …5
×

Perl6 grammars

23,756 views

Published on

0 Comments
6 Likes
Statistics
Notes
  • Be the first to comment

No Downloads
Views
Total views
23,756
On SlideShare
0
From Embeds
0
Number of Embeds
19,353
Actions
Shares
0
Downloads
33
Comments
0
Likes
6
Embeds 0
No embeds

No notes for slide

Perl6 grammars

  1. 1. Regexes and Grammars in Perl 6
  2. 2. Preface
  3. 3. Synopsis 5
  4. 4. Synopsis 5 Regexes and Rules
  5. 5. S05
  6. 6. Damian Conway Allison Randal Patrick Michaud Larry Wall Moritz Lenz
  7. 7. Created: 24 Jun 2002 Last Modified: 30 Aug 2010 Version: 132
  8. 8. 54 pages
  9. 9. Part I Regexes
  10. 10. Random facts and terminology
  11. 11. Regular expressions in Perl 5 were not regular
  12. 12. Regular expressions in Perl 5 were not regular Regular expressions in Perl 6 are called regexes
  13. 13. Regular expressions in Perl 5 were not regular Regular expressions in Perl 6 are called regexes Which means “kinda like a regular expression”
  14. 14. Match object contains result of matching $/
  15. 15. Capture variable indexes start with 0 $0
  16. 16. $0, $1, etc. are part of $/
  17. 17. my $q = "Hotels in Berlin"; $q ~~ /ins(.*)/; say $0; # Berlin say $/[0]; # Berlin
  18. 18. Metacharacters are everything except Unicode letters or numbers or underscore
  19. 19. Quotes may be used for creating atoms 'I will never use PHP again. '*
  20. 20. Repetition (d+ s?) ** 3 (d+ s?) ** 5..10 d+ ** ','
  21. 21. /x modifier gone "ab" ~~ / a b /; say $/; # ab
  22. 22. /s, /m modifiers gone "a1nb2nc3" ~~ /N+/; "a1nb2nc3" ~~ /^^ .2 $$/;
  23. 23. /e modifier gone $str =~ s/pattern/{action()}/;
  24. 24. Modifier syntax @names = $str =~ m:i/MiSteR s (w+)/;
  25. 25. Brackets
  26. 26. Capturing group (...)
  27. 27. Non-capturing group [...]
  28. 28. Character class <[ . . . ]>
  29. 29. Embedded closure {...}
  30. 30. Embedded closure {...} > "500" ~~ /(d+) {$0 < 200 or fail}/ ===SORRY!===
  31. 31. Named rule or token <. . .>
  32. 32. Part II Grammars
  33. 33. Keywords
  34. 34. grammar rule token proto TOP
  35. 35. grammar Grammar { rule TOP {...} rule some_rule {...} token some_token {...} }
  36. 36. grammar Grammar { rule TOP {...} rule some_rule {...} token some_token {...} } Syntax is similar to class definition
  37. 37. grammar Grammar { rule TOP {...} rule some_rule {...} token some_token {...} } Grammar.parse($string);
  38. 38. Example. Step by step
  39. 39. Executed by Rakudo rakudo.org
  40. 40. Executed by Rakudo rakudo.org Sometimes it fails
  41. 41. City
  42. 42. grammar SearchQuery { }
  43. 43. grammar SearchQuery { rule TOP { } }
  44. 44. grammar SearchQuery { rule TOP { ^ $ } }
  45. 45. grammar SearchQuery { rule TOP { ^ <query> $ } }
  46. 46. grammar SearchQuery { rule TOP { ^ <query> $ } } Easy, isn't it?
  47. 47. Grammars are part of the language
  48. 48. grammar SearchQuery { rule TOP { ^ <query> $ } rule query { } }
  49. 49. grammar SearchQuery { rule TOP { ^ <query> $ } rule query { <city> } }
  50. 50. grammar SearchQuery { rule TOP { ^ <query> $ } rule query { <city> } token city { } }
  51. 51. grammar SearchQuery { N. B. rule TOP { ^ <query> $ rules } rule query { <city> } token token city { } }
  52. 52. token is a "word"
  53. 53. rule is a "phrase"
  54. 54. grammar SearchQuery { rule TOP { ^ <query> $ } rule query { <city> } token city { } }
  55. 55. grammar SearchQuery { rule TOP { ^ <query> $ } rule query { <city> } token city { <capital> } }
  56. 56. ^ <query> $ } rule query { <city> } token city { <capital> } token capital { } }
  57. 57. my $result = SearchQuery.parse("Amsterdam"); say $result.perl;
  58. 58. Match.new( from => 0, orig => "Amsterdam", to => 9, named => { query => Match.new( from => 0, orig => "Amsterdam", to => 9, named => { city => Match.new( from => 0, orig => "Amsterdam", to => 9, named => { capital => Match.new( from => 0, orig => "Amsterdam",
  59. 59. Match.new( from => 0, Matched text orig => "Amsterdam", to => 9, named => { query => Match.new( from => 0, orig => "Amsterdam", to => 9, named => { city => Match.new( from => 0, orig => "Amsterdam", to => 9, named => { capital => Match.new( from => 0, orig => "Amsterdam",
  60. 60. Match.new( from => 0, orig => "Amsterdam", to => 9, named => { query => Match.new( rule query { from => 0, } orig => "Amsterdam", to => 9, named => { city => Match.new( from => 0, orig => "Amsterdam", to => 9, named => { capital => Match.new( from => 0, orig => "Amsterdam",
  61. 61. Match.new( from => 0, orig => "Amsterdam", to => 9, named => { query => Match.new( from => 0, orig => "Amsterdam", to => 9, named => { city => Match.new( token city { from => 0, } orig => "Amsterdam", to => 9, named => { capital => Match.new( from => 0, orig => "Amsterdam",
  62. 62. Match.new( from => 0, orig => "Amsterdam", to => 9, named => { query => Match.new( from => 0, orig => "Amsterdam", to => 9, named => { city => Match.new( from => 0, orig => "Amsterdam", to => 9, named => { capital => Match.new( token capital { from => 0, } orig => "Amsterdam",
  63. 63. Country
  64. 64. rule query { <city> | <country> }
  65. 65. rule query { <city> | <country> } rule country { 'Afghanistan' | 'Akrotiri' | 'Albania' | 'Algeria' | 'American Samoa' | 'Andorra' . . . }
  66. 66. my $result = SearchQuery.parse("Amsterdam"); say $result.perl; $result = SearchQuery.parse("China"); say $result.perl;
  67. 67. rule query { <city> ',' <ws>? <country> | <city> | <country> }
  68. 68. rule query { <city> ',' <ws>? <country> | <city> | <country> } SearchQuery.parse("Tirana, Albania");
  69. 69. rule query { <city> ',' <ws>? <country> | <city> | <country> } SearchQuery.parse("Tirana, Albania");
  70. 70. Capturing and accessing
  71. 71. Everything goes to Match object $/
  72. 72. SearchQuery.parse("Tirana, Albania"); say $<query><city>; say $<query><country>;
  73. 73. SearchQuery.parse("Tirana, Albania"); say $<query><city>; say $<query><country>; Tirana Albania
  74. 74. SearchQuery.parse("Tirana, Albania"); say $<query><city>; Shortcut say $<query><country>; say $/<query><city>; Full syntax say $/<query><country>;
  75. 75. rule query { 'Hotels in'? [ <city> ',' <ws>? <country> | <city> | <country> ] }
  76. 76. SearchQuery.parse("Tirana, Albania"); say $<query><city>; say $<query><country>; SearchQuery.parse ("Hotels in Tirana, Albania"); say $<query><city>; say $<query><country>;
  77. 77. rule date { <day> <month> } token day { d+ ['st' | 'nd' | 'th']? } token month { 'January' | 'February' | 'March' | 'April' . . .
  78. 78. SearchQuery.parse("Hotels in Tirana, Albania from 25th December"); SearchQuery.parse("Hotels in Tirana, Albania from 25 December");
  79. 79. What will $<query><date> print?
  80. 80. What will $<query><date> print? 25th December or 25 December
  81. 81. How to check days token day { (d+) {$0 <= 31 or fail} }
  82. 82. [ <city> ',' <ws>? <country> | <city> | <country> ] [ 'from' <date> 'to' <date> ]? [ 'for' <guest_number> ]? }
  83. 83. token guest_number { d | 'one' | 'two' | 'three' | 'four' | 'five' }
  84. 84. "Hotels in Tirana, Albania from 25 December to 7 January for two"
  85. 85. rule date { 'today' | 'tomorrow' |[ <day> <month> ] }
  86. 86. $ perl6 10-all.pl Hotels in Amsterdam, Netherlands from 1 January to 5 February for three City: Amsterdam Country: Netherlands From: 1 January To: 5 February Guests: three
  87. 87. __END__ Andrew Shitov talks.shitov.ru | andy@shitov.ru

×