YACC
Prof. R. A. Deshmukh
YACC TOOL
 What is YACC ?
 Tool which will produce a parser for a given
grammar.
 YACC (Yet Another Compiler Compiler) is a
program designed to compile a LALR(1) grammar
and to produce the source code of the syntactic
analyzer of the language produced by this
grammar
2
yacc
How YACC Works
(1) Parser generation time
YACC source (*.y)
y.tab.h
y.tab.c
C compiler/linker
(2) Compile time
y.tab.c a.out
a.out
(3) Run time
input output
y.output
3
4
Lex with Yacc
Lex Yacc
yylex() yyparse()
Lex source
(Lexical Rules)
Yacc source
(Grammar Rules)
Input
Parsed
Input
lex.yy.c y.tab.c
return token
call
 Input to yacc is divided into three sections.
... definitions ...
%%
... rules ...
%%
... subroutines ...
5
YACC File Format
6
%{
C declarations
%}
yacc declarations
%%
Grammar rules
%%
Additional C code
 Comments enclosed in /* ... */ may appear
in any of the sections.
YACC Declaration Summary
7
`%start'
Specify the grammar's start symbol
`%union'
Declare the collection of data types that semantic values may
have
`%token'
Declare a terminal symbol (token type name) with no
precedence or associativity specified
`%type'
Declare the type of semantic values for a nonterminal
symbol
YACC Declaration Summary
8
`%right'
Declare a terminal symbol (token type name) that is
right-associative
`%left'
Declare a terminal symbol (token type name) that is
left-associative
`%nonassoc'
Declare a terminal symbol (token type name) that is
nonassociative
 The first non-terminal specified in the grammar
specification section.
 To overwrite it with %start declaraction.
%start non-terminal
9
Rules
10
• Rule format: E -> E+T|T
• E: E ‘+’ T { }
|T {}
• ;
nonterminal : alt 1 {action 1}
| alt 2 {action 2}
. . .
| alt n {action n)
;
Actions are optional; they are C code.
• Actions are usually at the end of a body, but
grammar part is the same as:
nonterminal  alt 1 | alt 2 | ... | alt
n
Works with Lex
YACC
yyparse()
Input programs
12 + 26
LEX
yylex()
How to work ?
11
Works with Lex
YACC
yyparse()
Input programs
12 + 26
LEX
yylex()
call yylex()
[0-9]+
next token is NUM
NUM ‘+’ NUM
12
Yacc specification of simple calculator
%{
# include<stdio.h>
%}
%token digit
%%
S :S E 'n' {printf("ans=%dn",$2);}
| E 'n' {printf("ans=%dn",$1);}
;
E : E '+' T {$$ = $1+$3;}
|T
;
T : T '*' F {$$ = $1 * $3;}
| F
;
F : '(' E ')' {$$ = $2;}
| digit
;
%% 13
main()
{
yyparse();
}
int yyerror(char *msg)
{
printf("%sn",msg);
return 1;
}
yylex()
{
int c;
c=getchar();
while(c==' '||c=='t');
if(isdigit(c))
{
yylval = c-'0';
return digit;
}
return c;
}
Main => yyparse => yylex
2 + 5 n
C(50) => yylval=2, digit =>parser
Digit 2
F 2
T 2
E 2
Yyparse => yylex => ‘+’ =>parser
E ‘+’ 2 _
Yyparse => yylex => ‘5’ => yylval = 5, digit =>yyparse
E ‘+’digit 2 _ 5
E ‘+’ F 2_5
E ‘+’ T 2_5
E 7
Yyparse => yylex => n
E ‘n’ 7
S print(7)
3+5*7 n
[root@localhost ~]# yacc -d -v f1.y
[root@localhost ~]# ls y*
y.output y.tab.c y.tab.h
[root@localhost ~]# gcc y.tab.c
[root@localhost ~]# ./a.out
2+3
ans=5
4*5+6
ans=26
[root@localhost ~]# cat y.tab.h
#define digit 257
[root@localhost ~]#
15
%{
# include<stdio.h>
# include <math.h>
%} #define YYSTYPE
double
%token digit
%left '+' '-'
%left '*' '/'
%right '^'
%nonassoc UMINUS
%start S
%%
16
S :S E 'n' {printf("ans=%fn",
$2);}
| E 'n' {printf("ans=%fn",
$1);}
;
E : E '+' E {$$ = $1+$3;}
|E '*' E {$$ = $1*$3;}
|E'-'E {$$ = $1-$3;}
|E'/'E {$$ = $1/$3;}
|E'^'E {$$ = pow($1,$3);}
| '-' E %prec UMINUS {$$ = -
$2;}
|digit
|'('E')' {$$ = $2;}
;
%%
yylex()
{
int c;
c=getchar();
while(c==' '||c=='t');
if(isdigit(c)||c=='.')
{
ungetc(c,stdin);
scanf("%lf",&yylval);
return digit;
}
if(c=='+'||c=='-'||c=='*'||c=='/'||c=='^'||
c=='('||c==')'||c=='n')
return c;
else
printf("invalid character=%cn",c);
}
17
main()
{
yyparse();
}
int yyerror(char
*msg)
{
printf("%sn",msg);
return 1;
}
[root@localhost ~]# yacc -d -v f2.y
[root@localhost ~]# gcc y.tab.c -lm
[root@localhost ~]# ./a.out
23+56.5*2
ans=136.000000
12*5.5-5^2
ans=41.000000
[root@localhost ~]#
18
%{
# include<math.h>
# include<stdio.h>
struct symtab
{
char *name;
double val;
}SYM[20];
void disp();
struct symtab
*install_id(char *s);
%}
%union{
double v;
struct symtab *p;
} 19
%token SIN COS
%token<v> NUM
%token<p> id
%right ‘=‘
%left '+' '-'
%left '*' '/'
%nonassoc UMINUS
%type<v> E
%%
lines: lines S
|S
;
S: id ‘=‘ E 'n’ {$1->val=$3;}
|E 'n' {printf("ans=%lfn",$1);}
;
E: E'+'E {$$ = $1 + $3;}
| E'-'E {$$=$1-$3;} $i => value of ith symbol of
RHS
|E'*'E {$$=$1*$3;}
|E'/'E {$$=$1/$3;}
| '-' E %prec UMINUS {$$ = -$2;}
|'('E')' {$$ = $2;}
|NUM
|id {$$=$1->val;}
|SIN'('E')‘ {$$=sin(($3*3.14)/180);}
|COS'('E')' {$$=cos(($3*3.14)/180);}
;
%%
20
main()
{
yyparse();
disp();
}
int yyerror()
{
return 1;
}
void disp()
{
struct symtab *k;
for(k=SYM;k<&SYM[20];k++)
{
if(k->name)
{
printf("%st%fn",k->name,k-
>val);
} } }
21
struct symtab * install_id(char *s)
{
struct symtab *k;
for(k=SYM;k<&SYM[20];k++)
{
if(k->name&&!strcmp(k-
>name,s))
return k;
else
if(!k->name)
{
k->name=strdup(s);
return k;
}
}
}
%{
# include<stdio.h>
# include<stdlib.h>
# include "y.tab.h"
%}
num ([0-9]+)(.[0-9]+)?([eE][-+]?
[0-9]+)?
%%
"sin" {return SIN;}
“cos” {return COS;}
[A-Za-z][A-Za-z0-9]*
{yylval.p=install_id(yytext);ret
urn id;}
{num} {yylval.v=atof(yytext);
printf("ans=
%f",yylval.val);return num;}
[n] {return yytext[0];}
. {return yytext[0];} 22
%%
int yywrap()
{
return 1;
}
[root@localhost ~]# yacc -d -v y1.y
[root@localhost ~]# lex y1.l
[root@localhost ~]# gcc -o y1 lex.yy.c y.tab.c -lm
[root@localhost ~]# ./y1
3+4*5
ans =23.000000
a=10
sin(30)
ans =0.499770
a+100
10.000000
ans =110.000000
cos(90)
ans =0.000796
name value
a 10.000000 23
[root@localhost ~]# cat y.tab.h
#define id 257
#define SIN 258
#define COS 259
#define num 260
#define UMINUS 261
typedef union
{
struct symtab *p;
double val;
} YYSTYPE;
extern YYSTYPE yylval;
24

Compiler Design_Syntax Analyzer_Yaac Tool.pptx

  • 1.
  • 2.
    YACC TOOL  Whatis YACC ?  Tool which will produce a parser for a given grammar.  YACC (Yet Another Compiler Compiler) is a program designed to compile a LALR(1) grammar and to produce the source code of the syntactic analyzer of the language produced by this grammar 2
  • 3.
    yacc How YACC Works (1)Parser generation time YACC source (*.y) y.tab.h y.tab.c C compiler/linker (2) Compile time y.tab.c a.out a.out (3) Run time input output y.output 3
  • 4.
    4 Lex with Yacc LexYacc yylex() yyparse() Lex source (Lexical Rules) Yacc source (Grammar Rules) Input Parsed Input lex.yy.c y.tab.c return token call
  • 5.
     Input toyacc is divided into three sections. ... definitions ... %% ... rules ... %% ... subroutines ... 5
  • 6.
    YACC File Format 6 %{ Cdeclarations %} yacc declarations %% Grammar rules %% Additional C code  Comments enclosed in /* ... */ may appear in any of the sections.
  • 7.
    YACC Declaration Summary 7 `%start' Specifythe grammar's start symbol `%union' Declare the collection of data types that semantic values may have `%token' Declare a terminal symbol (token type name) with no precedence or associativity specified `%type' Declare the type of semantic values for a nonterminal symbol
  • 8.
    YACC Declaration Summary 8 `%right' Declarea terminal symbol (token type name) that is right-associative `%left' Declare a terminal symbol (token type name) that is left-associative `%nonassoc' Declare a terminal symbol (token type name) that is nonassociative
  • 9.
     The firstnon-terminal specified in the grammar specification section.  To overwrite it with %start declaraction. %start non-terminal 9
  • 10.
    Rules 10 • Rule format:E -> E+T|T • E: E ‘+’ T { } |T {} • ; nonterminal : alt 1 {action 1} | alt 2 {action 2} . . . | alt n {action n) ; Actions are optional; they are C code. • Actions are usually at the end of a body, but grammar part is the same as: nonterminal  alt 1 | alt 2 | ... | alt n
  • 11.
    Works with Lex YACC yyparse() Inputprograms 12 + 26 LEX yylex() How to work ? 11
  • 12.
    Works with Lex YACC yyparse() Inputprograms 12 + 26 LEX yylex() call yylex() [0-9]+ next token is NUM NUM ‘+’ NUM 12
  • 13.
    Yacc specification ofsimple calculator %{ # include<stdio.h> %} %token digit %% S :S E 'n' {printf("ans=%dn",$2);} | E 'n' {printf("ans=%dn",$1);} ; E : E '+' T {$$ = $1+$3;} |T ; T : T '*' F {$$ = $1 * $3;} | F ; F : '(' E ')' {$$ = $2;} | digit ; %% 13 main() { yyparse(); } int yyerror(char *msg) { printf("%sn",msg); return 1; } yylex() { int c; c=getchar(); while(c==' '||c=='t'); if(isdigit(c)) { yylval = c-'0'; return digit; } return c; }
  • 14.
    Main => yyparse=> yylex 2 + 5 n C(50) => yylval=2, digit =>parser Digit 2 F 2 T 2 E 2 Yyparse => yylex => ‘+’ =>parser E ‘+’ 2 _ Yyparse => yylex => ‘5’ => yylval = 5, digit =>yyparse E ‘+’digit 2 _ 5 E ‘+’ F 2_5 E ‘+’ T 2_5 E 7 Yyparse => yylex => n E ‘n’ 7 S print(7) 3+5*7 n
  • 15.
    [root@localhost ~]# yacc-d -v f1.y [root@localhost ~]# ls y* y.output y.tab.c y.tab.h [root@localhost ~]# gcc y.tab.c [root@localhost ~]# ./a.out 2+3 ans=5 4*5+6 ans=26 [root@localhost ~]# cat y.tab.h #define digit 257 [root@localhost ~]# 15
  • 16.
    %{ # include<stdio.h> # include<math.h> %} #define YYSTYPE double %token digit %left '+' '-' %left '*' '/' %right '^' %nonassoc UMINUS %start S %% 16 S :S E 'n' {printf("ans=%fn", $2);} | E 'n' {printf("ans=%fn", $1);} ; E : E '+' E {$$ = $1+$3;} |E '*' E {$$ = $1*$3;} |E'-'E {$$ = $1-$3;} |E'/'E {$$ = $1/$3;} |E'^'E {$$ = pow($1,$3);} | '-' E %prec UMINUS {$$ = - $2;} |digit |'('E')' {$$ = $2;} ; %%
  • 17.
    yylex() { int c; c=getchar(); while(c==' '||c=='t'); if(isdigit(c)||c=='.') { ungetc(c,stdin); scanf("%lf",&yylval); returndigit; } if(c=='+'||c=='-'||c=='*'||c=='/'||c=='^'|| c=='('||c==')'||c=='n') return c; else printf("invalid character=%cn",c); } 17 main() { yyparse(); } int yyerror(char *msg) { printf("%sn",msg); return 1; }
  • 18.
    [root@localhost ~]# yacc-d -v f2.y [root@localhost ~]# gcc y.tab.c -lm [root@localhost ~]# ./a.out 23+56.5*2 ans=136.000000 12*5.5-5^2 ans=41.000000 [root@localhost ~]# 18
  • 19.
    %{ # include<math.h> # include<stdio.h> structsymtab { char *name; double val; }SYM[20]; void disp(); struct symtab *install_id(char *s); %} %union{ double v; struct symtab *p; } 19 %token SIN COS %token<v> NUM %token<p> id %right ‘=‘ %left '+' '-' %left '*' '/' %nonassoc UMINUS %type<v> E
  • 20.
    %% lines: lines S |S ; S:id ‘=‘ E 'n’ {$1->val=$3;} |E 'n' {printf("ans=%lfn",$1);} ; E: E'+'E {$$ = $1 + $3;} | E'-'E {$$=$1-$3;} $i => value of ith symbol of RHS |E'*'E {$$=$1*$3;} |E'/'E {$$=$1/$3;} | '-' E %prec UMINUS {$$ = -$2;} |'('E')' {$$ = $2;} |NUM |id {$$=$1->val;} |SIN'('E')‘ {$$=sin(($3*3.14)/180);} |COS'('E')' {$$=cos(($3*3.14)/180);} ; %% 20
  • 21.
    main() { yyparse(); disp(); } int yyerror() { return 1; } voiddisp() { struct symtab *k; for(k=SYM;k<&SYM[20];k++) { if(k->name) { printf("%st%fn",k->name,k- >val); } } } 21 struct symtab * install_id(char *s) { struct symtab *k; for(k=SYM;k<&SYM[20];k++) { if(k->name&&!strcmp(k- >name,s)) return k; else if(!k->name) { k->name=strdup(s); return k; } } }
  • 22.
    %{ # include<stdio.h> # include<stdlib.h> #include "y.tab.h" %} num ([0-9]+)(.[0-9]+)?([eE][-+]? [0-9]+)? %% "sin" {return SIN;} “cos” {return COS;} [A-Za-z][A-Za-z0-9]* {yylval.p=install_id(yytext);ret urn id;} {num} {yylval.v=atof(yytext); printf("ans= %f",yylval.val);return num;} [n] {return yytext[0];} . {return yytext[0];} 22 %% int yywrap() { return 1; }
  • 23.
    [root@localhost ~]# yacc-d -v y1.y [root@localhost ~]# lex y1.l [root@localhost ~]# gcc -o y1 lex.yy.c y.tab.c -lm [root@localhost ~]# ./y1 3+4*5 ans =23.000000 a=10 sin(30) ans =0.499770 a+100 10.000000 ans =110.000000 cos(90) ans =0.000796 name value a 10.000000 23
  • 24.
    [root@localhost ~]# caty.tab.h #define id 257 #define SIN 258 #define COS 259 #define num 260 #define UMINUS 261 typedef union { struct symtab *p; double val; } YYSTYPE; extern YYSTYPE yylval; 24