Successfully reported this slideshow.
We use your LinkedIn profile and activity data to personalize ads and to show you more relevant ads. You can change your ad preferences anytime.

Creating a compiler in Perl 6

1,659 views

Published on

I will show how to create an interpreter for a simple programming language using Perl 6 grammars.

This talk is not an introduction to Perl 6 regexes and grammars, so we'll use them straight on, but I will add comments so that you can understand what's going on even if you never tried Perl 6 grammars.

There will not be enough time to write the whole compiler, of course, but I will show how you can do that at home.

Published in: Software
  • Be the first to comment

Creating a compiler in Perl 6

  1. 1. Creating a Compiler in Perl 6 Andrew Shitov 
 German Perl Workshop
 Munich, 7 March 2019
  2. 2. PREAMBLE
  3. 3. my $sign = '(Ⅽ㉍[🚚])[22-6]';
  4. 4. my $sign = '(Ⅽ㉍[🚚])[22-6]'; Roman numeral 100
  5. 5. grammar Sign { } say Sign.parse($sign);
  6. 6. grammar Sign { rule TOP { <group>+ } } say Sign.parse($sign);
  7. 7. grammar Sign { rule TOP { <group>+ } rule group { | '(' <element>+ ')' <modifier>? | <element> } } say Sign.parse($sign);
  8. 8. grammar Sign { rule TOP { <group>+ } rule group { | '(' <element>+ ')' <modifier>? | <element> } rule element { <speed> <modifier>? } } say Sign.parse($sign);
  9. 9. grammar Sign { rule TOP { <group>+ } rule group { | '(' <element>+ ')' <modifier>? | <element> } rule element { <speed> <modifier>? } rule speed { <:N> } } say Sign.parse($sign);
  10. 10. "a".uniprop('Script');              # OUTPUT: «LatinNL»  "a" ~~ / <:Script<Latin>> /;        # OUTPUT: «「a」NL»  "a".uniprop('Block');               # OUTPUT: «Basic LatinNL»  "a" ~~ / <:Block('Basic Latin')> /; # OUTPUT: «「a」NL»  .uniprop and <: . . . >
  11. 11. N = Number Nd = Decimal_Number or digit Nl = Letter_Number No = Other_Number Unicode categories
  12. 12. grammar Sign { rule TOP { <group>+ } rule group { | '(' <element>+ ')' <modifier>? | <element> } rule element { <speed> <modifier>? } rule speed { <:N> } } say Sign.parse($sign);
  13. 13. rule element { <speed> <modifier>? } rule speed { <:N> } rule modifier { '[' [ | <type-modifier> | <time-modifier> ] ']' } } say Sign.parse($sign);
  14. 14. rule speed { <:N> } rule modifier { '[' [ | <type-modifier> | <time-modifier> ] ']' } rule type-modifier { '🚚' } } say Sign.parse($sign);
  15. 15. rule modifier { '[' [ | <type-modifier> | <time-modifier> ] ']' } rule type-modifier { '🚚' } rule time-modifier { d+ '-' d+ } } say Sign.parse($sign);
  16. 16. my $sign = '(Ⅽ㉍[🚚])[22-6]'; grammar Sign { . . . } say Sign.parse($sign);
  17. 17. 「(Ⅽ㉍[🚚])[22-6]」 group => 「(Ⅽ㉍[🚚])[22-6]」 element => 「Ⅽ」 speed => 「Ⅽ」 element => 「㉍[🚚]」 speed => 「㉍」 modifier => 「[🚚]」 type-modifier => 「🚚」 modifier => 「[22-6]」 time-modifier => 「22-6」
  18. 18. 「(Ⅽ㉍[🚚])[22-6]」 group => 「(Ⅽ㉍[🚚])[22-6]」 element => 「Ⅽ」 speed => 「Ⅽ」 element => 「㉍[🚚]」 speed => 「㉍」 modifier => 「[🚚]」 type-modifier => 「🚚」 modifier => 「[22-6]」 time-modifier => 「22-6」
  19. 19. 「(Ⅽ㉍[🚚])[22-6]」 group => 「(Ⅽ㉍[🚚])[22-6]」 element => 「Ⅽ」 speed => 「Ⅽ」 element => 「㉍[🚚]」 speed => 「㉍」 modifier => 「[🚚]」 type-modifier => 「🚚」 modifier => 「[22-6]」 time-modifier => 「22-6」
  20. 20. 「(Ⅽ㉍[🚚])[22-6]」 group => 「(Ⅽ㉍[🚚])[22-6]」 element => 「Ⅽ」 speed => 「Ⅽ」 element => 「㉍[🚚]」 speed => 「㉍」 modifier => 「[🚚]」 type-modifier => 「🚚」 modifier => 「[22-6]」 time-modifier => 「22-6」
  21. 21. MAIN DISH
  22. 22. Compiler Translator Interpreter
  23. 23. Lexer + Parser Abstract Syntax Tree (AST) Optimise Converte to byte/binary code
 
 Execute
  24. 24. The Language
  25. 25. Lingua print 10; print 20; ## 1020 say ""; my x = "Hello"; print x; ## Hello say "";
  26. 26. Lingua: create array my a[]; a = 3, 4, 5; say a; ## 3, 4, 5 my b[] = 7, 8, 9; say b; ## 7, 8, 9
  27. 27. Lingua: array elements my arr[]; my brr[] = 3, 4, 5; say brr; ## 3, 4, 5 my x = brr[0]; say x; ## 3 say brr[0]; ## 3 say brr[1]; ## 4 say brr[2]; ## 5
  28. 28. Lingua: hashes my h{}; say h; ## my g{} = "a": "b", "c": "d"; say g; ## a: b, c: d
  29. 29. Lingua: keys and values my h{}; my g{} = "a": "b", "c": "d"; say g; ## a: b, c: d say g{"a"}; ## b my x = g{"a"}; say x; ## b
  30. 30. Lingua: numbers 1 my int = 42; say int; ## 42 my float = 3.14; say float; ## 3.14 my sci = 3E14; say sci; ## 300000000000000 my negative = -1.2; say negative; ## -1.2 my zero = 0; say zero; ## 0
  31. 31. Lingua: numbers 2 my half = .5; say half; ## 0.5 my minus_n = -.3; say minus_n; ## -0.3 my x = +7; say x; ## 7 my y = 4.43E-1; say y; ## 0.443
  32. 32. Lingua: strings my s1; s1 = "Hello, World!"; say s1; ## Hello, World! my s2 = "Another string"; say s2; ## Another string my s3 = ""; # Empty string say s3; ##
  33. 33. Lingua: string indices my abc = "abcdef"; say abc[0]; ## a say abc[3]; ## d say abc[5]; ## f
  34. 34. Lingua: string escaping say ""; ## say ""; ## say "$"; ## $ say """; ## "
  35. 35. Lingua: string interpolation my i = 10; my f = -1.2; my c = 1E-2; my s = "word"; my str = "i=$i, f=$f, c=$c, s=$s"; say str; ## i=10, f=-1.2, c=0.01, s=word
  36. 36. Lingua: variables my a; a = 10; say a; ## 10 my b = 20; say b; ## 20
  37. 37. Lingua: variables 
 as indices and keys my a[] = 2, 4, 6, 8, 10; my i = 3; say a[i]; ## 8 my b{} = "a": 1, "b": 2; my j = "b"; say b{j}; ## 2
  38. 38. Lingua: expressions 1 my x; x = 3 + 4; say x; ## 7 x = 3 - 4; say x; ## -1 x = 7; say x; ## 7 x = 1 + 2 + 3; say x; ## 6
  39. 39. Lingua: expressions 2 x = 1 + 3 + 5 + 7; say x; ## 16 x = 7 + 8 - 3; say x; ## 12 x = 14 - 4 - 3; say x; ## 7 x = 100 - 200 + 300 + 1 - 2; say x; ## 199 x = 3 * 4; say x; ## 12 x = 100 / 25; say x; ## 4 x = 1 + 2 * 3; say x; ## 7
  40. 40. Lingua: expressions 3 x = 2 ** 3 ** 4; say x; ## 4096 x = 10 * (20 - 30); say x; ## -100 x = 10 * 20 - 30; say x; ## 170 x = (5 * 6); say x; ## 30 x = (10); say x; ## 10 x = 1 - (5 * (3 + 4)) / 2; say x; ## -16.5
  41. 41. Lingua: if my flag = 1; if flag say "Printed"; ## Printed flag = 0; if flag say "Ignored"; say "Done"; ## Done
  42. 42. Lingua: inline if-else if 1 say "A" else say "B"; ## A if 0 say "A" else say "B"; ## B my x; if 1 x = 10 else x = 20; say x; ## 10 if 0 x = 30 else x = 40; say x; ## 40
  43. 43. Lingua: if-else blocks my c = 0; if c { say "Not printed"; } else { say "c = $c"; say "ELSE block"; } ## c = 0 ## ELSE block c = 1; if c { say "c = $c"; say "IF block"; } else { say "Not printed either"; }
  44. 44. Lingua: comparisons my x = 10; my y = 20; if x > y say ">" else say "<"; ## < if x < y say ">" else say "<"; ## > if x != y say "!=" else say "=="; ## != if x != x say "!=" else say "=="; ## == if x == y say "==" else say "!="; ## != if x == x say "==" else say "!="; ## == if 5 <= 5 say "5 <= 5"; ## 5 <= 5 if 5 <= 6 say "5 <= 6"; ## 5 <= 6
  45. 45. Lingua: loops 1 my n = 3; loop n say n; ## 3 ## 2 ## 1
  46. 46. Lingua: loops 2 my n = 5; loop n { my n2 = n * n; say "n = $n, n2 = $n2"; } ## n = 5, n2 = 25 ## n = 4, n2 = 16 ## n = 3, n2 = 9 ## n = 2, n2 = 4 ## n = 1, n2 = 1
  47. 47. Lingua: while my n = 1; while n <= 5 { say n; n = n + 1 } ## 1 ## 2 ## 3 ## 4 ## 5 my k = 1; while k < 10 k = k + 1; say k; ## 10
  48. 48. The Code Part 1. Grammar
  49. 49. grammar Lingua {
 rule TOP {
 .*
 }
 }
 
 my $code = 'test.lng'.IO.slurp();
 my $result = Lingua.parse($code);
 say $result;
  50. 50. rule TOP {
 <statement>* %% ';'
 }
  51. 51. rule statement {
 | <variable-declaration>
 | <assignment>
 | <function-call>
 }
  52. 52. rule variable-declaration {
 'my' <variable-name>
 }
 
 rule assignment {
 <variable-name> '=' <value>
 }
 
 rule function-call {
 <function-name> <variable-name>
 }
  53. 53. token variable-name {
 w+
 } token value {
 d+
 } rule function-name {
 'say'
 }
  54. 54. my x;
 x = 42;
 say x; my x;
 x = 42;
 say x statement => 「my x」
 variable-declaration => 「my x」
 variable-name => 「x」
 statement => 「x = 42」
 assignment => 「x = 42」
 variable-name => 「x」
 value => 「42」
 statement => 「say x」
 function-call => 「say x」
 function-name => 「say 」
 variable-name => 「x」
  55. 55. The Code Part 2. Actions
  56. 56. my %var;
 
 grammar Lingua {
 . . .
 }
  57. 57. rule variable-declaration {
 'my' <variable-name> {
 %var{$<variable-name>} = 0;
 }
 }
  58. 58. rule assignment {
 <variable-name> '=' <value> {
 %var{~$<variable-name>} = +$<value>;
 }
 }
  59. 59. rule function-call {
 <function-name> <variable-name> {
 say %var{$<variable-name>}
 if $<function-name> eq 'say';
 }
 }
  60. 60. rule function-call { 
 <function-name> <variable-name> {
 
 say %var{$<variable-name>}
 
 if $<function-name> eq 'say';
 
 }
 
 } Perl 6 Regex Perl 6
  61. 61. The Code Part 2. Actions
  62. 62. class LinguaActions {
 method variable-declaration($/) {
 %var{$<variable-name>} = 0;
 }
 
 method assignment($/) {
 %var{~$<variable-name>} = +$<value>;
 }
 
 method function-call($/) {
 say %var{$<variable-name>}
 if $<function-name> eq 'say';
 } 
 }
  63. 63. Lingua.parse($code, :actions(LinguaActions));
  64. 64. Parsing numbers
  65. 65. my @cases =
 7, 77, -84, '+7', 0,
 3.14, -2.78, 5.0, '.5',
 '', '-', '+',
 '3E4', '-33E55', '3E-3', '-1E-2'; for @cases -> $number {
 my $test = Number.parse($number);
 say ($test ?? 'OK ' !! 'NOT OK ') ~ $number;
 }
  66. 66. grammar Number {
 rule TOP {
 <number>
 }
 
 token number {
 d+
 }
 }
  67. 67. token number {
 '-'? d+
 }
  68. 68. token number {
 <[+-]>? d* ['.' d+]?
 }
  69. 69. token number {
 <sign>? [
 | <integer>
 | <floating-point>
 | <integer> <exponent>
 | <floating-point> <exponent>
 ]
 }
  70. 70. Build the value
  71. 71. method integer($/) {
 $n = +$/;
 }

  72. 72. AST Abstract
 Syntax
 Tree
  73. 73. 2 + 3 * 4 / 5 - 6
  74. 74. 2 + 3 * 4 / 5 - 6
  75. 75. method integer($/) {
 $/.make(+$/);
 }

  76. 76. method number($/) {
 my $n = $<integer>.made;
 $n *= $<sign>.made if $<sign>;
 $/.make($n);
 }
  77. 77. method TOP($/) {
 $/.make($<number>.made);
 }
  78. 78. With AST, you defer execution
  79. 79. my condition = 0;
 if condition say "Passed"; rule function-call {
 ['if' <variable-name>]? <function-name> <value>
 }
  80. 80. my condition = 0;
 if condition say "Passed"; method statement($/) {
 if $<condition> {
 my $condition = $<condition>.made;
 fail unless $condition;
 }
 }
  81. 81. class ASTNode { 
 }
 
 class AST::ScalarDeclaration is ASTNode {
 has Str $.variable-name;
 has $.value;
 }
  82. 82. my a;
  83. 83. my a; my b = 2;
  84. 84. $ grep class LinguaAST.pm class ASTNode { class AST::TOP is ASTNode { class AST::ScalarDeclaration is ASTNode { class AST::NumberValue is ASTNode { class AST::StringValue is ASTNode { class AST::Null is ASTNode { class AST::Variable is ASTNode { class AST::ArrayDeclaration is ASTNode { class AST::HashDeclaration is ASTNode { class AST::ScalarAssignment is ASTNode { class AST::ArrayItemAssignment is ASTNode { class AST::HashItemAssignment is ASTNode { class AST::ArrayAssignment is ASTNode { class AST::HashAssignment is ASTNode { class AST::MathOperations is ASTNode { class AST::ArrayItem is ASTNode { class AST::HashItem is ASTNode { class AST::FunctionCall is ASTNode { class AST::Condition is ASTNode { class AST::Loop is ASTNode { class AST::While is ASTNode {
  85. 85. github.com/ash/lingua

×