SlideShare a Scribd company logo
1 of 120
project4-ast/.DS_Store
project4-ast/ast.c
#include "symbolTable.h"
#include "ast.h"
extern SymbolTableStackEntryPtr symbolStackTop;
extern int scopeDepth;
//creates a new expression node
AstNodePtr new_ExprNode(ExpKind kind)
{
}
//creates a new statement node
AstNodePtr new_StmtNode(StmtKind kind)
{
}
//creates a new type node for entry into symbol table
Type* new_type(TypeKind kind)
{
}
project4-ast/ast.h
#include "globals.h"
#include "symbolTable.h"
typedef enum {STMT, EXPRESSION, FORMALVAR,
METHOD} NodeKind;
typedef enum {IF_THEN_ELSE_STMT, WHILE_STMT,
RETURN_STMT, COMPOUND_STMT,
EXPRESSION_STMT } StmtKind;
typedef enum {VAR_EXP, ARRAY_EXP, ASSI_EXP,
ADD_EXP, SUB_EXP, MULT_EXP, DIV_EXP,
GT_EXP, LT_EXP, GE_EXP, LE_EXP, EQ_EXP,
NE_EXP, CALL_EXP, CONST_EXP
}ExpKind;
typedef struct node {
NodeKind nKind;
StmtKind sKind;
ExpKind eKind;
struct node *children[MAXCHILDREN]; // max needed for
if_then_else
struct node *sibling; //List of statements, formal argument
Type *nType; //node's type for typechecking expressions for
HW5
ElementPtr nSymbolPtr; // for variable and array expressions
int nValue; // for integer constants
SymbolTablePtr nSymbolTabPtr; //For a block, its symbol
table
char *fname; // for a call expr, the name of the called
function
int nLinenumber;
}AstNode;
typedef AstNode *AstNodePtr;
AstNodePtr new_ExprNode(ExpKind kind) ;
AstNodePtr new_StmtNode(StmtKind kind);
Type* new_type(TypeKind kind);
project4-ast/CCOM4087-Proyect4-AST.pdf
mailto:[email protected]
mailto:[email protected]
mailto:[email protected]
mailto:[email protected]
mailto:[email protected]
mailto:[email protected]
mailto:[email protected]
mailto:[email protected]
mailto:[email protected]
mailto:[email protected]
http://piazza.com
http://piazza.com
http://piazza.com
project4-ast/cmlexer.l
/**********************/
/* C header files */
/**********************/
%{
#include "globals.h"
#include "ast.h"
#include "cmparser.tab.h"
char tokenString[TOKENMAX];
int printoutScan = 0;
%}
%option yylineno
/**********************/
/* start your regular definitions here */
/**********************/
NUMBER [0-9]+
IDENTIFIER [a-zA-Z_$][a-zA-Z_$0-9]*
WHITESPACE [ trn]+
/* start your token specifications here */
/* Token names must come from cmparser.tab.h */
%%
auto { return TOK_ERROR; }
continue { return TOK_ERROR; }
goto { return TOK_ERROR; }
break { return TOK_ERROR; }
default { return TOK_ERROR; }
long { return TOK_ERROR; }
case { return TOK_ERROR; }
do { return TOK_ERROR; }
short { return TOK_ERROR; }
char { return TOK_ERROR; }
double { return TOK_ERROR; }
register { return TOK_ERROR; }
const { return TOK_ERROR; }
for { return TOK_ERROR; }
switch { return TOK_ERROR; }
byte { return TOK_ERROR; }
float { return TOK_ERROR; }
"~" { return TOK_ERROR; }
"&" { return TOK_ERROR; }
"+=" { return TOK_ERROR; }
"^=" { return TOK_ERROR; }
"?" { return TOK_ERROR; }
"|" { return TOK_ERROR; }
"-=" { return TOK_ERROR; }
"%=" { return TOK_ERROR; }
":" { return TOK_ERROR; }
"^" { return TOK_ERROR; }
"*=" { return TOK_ERROR; }
"<<=" { return TOK_ERROR; }
"++" { return TOK_ERROR; }
"<<" { return TOK_ERROR; }
"/=" { return TOK_ERROR; }
">>=" { return TOK_ERROR; }
"--" { return TOK_ERROR; }
">>" { return TOK_ERROR; }
"&=" { return TOK_ERROR; }
">>>=" { return TOK_ERROR; }
"!" { return TOK_ERROR; }
">>>" { return TOK_ERROR; }
"|=" { return TOK_ERROR; }
else { return TOK_ELSE; }
if { return TOK_IF; }
return { return TOK_RETURN; }
void { return TOK_VOID; }
int { return TOK_INT; }
while { return TOK_WHILE; }
"+" { return TOK_PLUS; }
"-" { return TOK_MINUS; }
"*" { return TOK_MULT; }
"/" { return TOK_DIV; }
"<" { return TOK_LT; }
"<=" { return TOK_LE; }
">" { return TOK_GT; }
">=" { return TOK_GE; }
"==" { return TOK_EQ; }
"!=" { return TOK_NE; }
"=" { return TOK_ASSIGN; }
";" { return TOK_SEMI; }
"," { return TOK_COMMA; }
"(" { return TOK_LPAREN; }
")" { return TOK_RPAREN; }
"[" { return TOK_LSQ; }
"]" { return TOK_RSQ; }
"{" { return TOK_LBRACE; }
"}" { return TOK_RBRACE; }
{NUMBER} { yylval.iVal = atoi(yytext); return TOK_NUM; }
{IDENTIFIER} {yylval.cVal = strdup(yytext); return TOK_ID;
}
{WHITESPACE} { }
"//".* { }
"/*"([^*]|*+[^*/])*("*"+)("/") {}
. { return TOK_ERROR; }
%%
/**********************/
/* C support functions */
/**********************/
void printToken(int token, char *str)
{
/* Print the line number, token name and matched lexeme
-- one per line without any additional characters exactly as
below */
/* Example 13:TOK_INT: 37*/
switch(token)
{
case TOK_ELSE:
fprintf(yyout,"%d : TOK_ELSE :
%sn",yylineno,str);
break;
case TOK_IF:
fprintf(yyout,"%d : TOK_IF : %sn",yylineno,str);
break;
case TOK_RETURN:
fprintf(yyout,"%d : TOK_RETURN :
%sn",yylineno,str);
break;
case TOK_VOID:
fprintf(yyout,"%d : TOK_VOID :
%sn",yylineno,str);
break;
case TOK_INT:
fprintf(yyout,"%d : TOK_INT : %sn",yylineno,str);
break;
case TOK_WHILE:
fprintf(yyout,"%d : TOK_WHILE :
%sn",yylineno,str);
break;
case TOK_ID:
fprintf(yyout,"%d : TOK_ID : %sn",yylineno,str);
break;
case TOK_NUM:
fprintf(yyout,"%d : TOK_NUM : %sn",yylineno,str);
break;
case TOK_PLUS:
fprintf(yyout,"%d : TOK_PLUS : %sn",yylineno,str);
break;
case TOK_MINUS:
fprintf(yyout,"%d : TOK_MINUS :
%sn",yylineno,str);
break;
case TOK_MULT:
fprintf(yyout,"%d : TOK_MULT :
%sn",yylineno,str);
break;
case TOK_DIV:
fprintf(yyout,"%d : TOK_DIV : %sn",yylineno,str);
break;
case TOK_LT:
fprintf(yyout,"%d : TOK_LT : %sn",yylineno,str);
break;
case TOK_LE:
fprintf(yyout,"%d : TOK_LE : %sn",yylineno,str);
break;
case TOK_GT:
fprintf(yyout,"%d : TOK_GT : %sn",yylineno,str);
break;
case TOK_GE:
fprintf(yyout,"%d : TOK_GE : %sn",yylineno,str);
break;
case TOK_EQ:
fprintf(yyout,"%d : TOK_EQ : %sn",yylineno,str);
break;
case TOK_NE:
fprintf(yyout,"%d : TOK_NE : %sn",yylineno,str);
break;
case TOK_ASSIGN:
fprintf(yyout,"%d : TOK_ASSIGN :
%sn",yylineno,str);
break;
case TOK_SEMI:
fprintf(yyout,"%d : TOK_SEMI : %sn",yylineno,str);
break;
case TOK_COMMA:
fprintf(yyout,"%d : TOK_COMMA :
%sn",yylineno,str);
break;
case TOK_LPAREN:
fprintf(yyout,"%d : TOK_LPAREN :
%sn",yylineno,str);
break;
case TOK_RPAREN:
fprintf(yyout,"%d : TOK_RPAREN :
%sn",yylineno,str);
break;
case TOK_LSQ:
fprintf(yyout,"%d : TOK_LSQ : %sn",yylineno,str);
break;
case TOK_RSQ:
fprintf(yyout,"%d : TOK_RSQ : %sn",yylineno,str);
break;
case TOK_LBRACE:
fprintf(yyout,"%d : TOK_LBRACE :
%sn",yylineno,str);
break;
case TOK_RBRACE:
fprintf(yyout,"%d : TOK_RBRACE :
%sn",yylineno,str);
break;
case TOK_ERROR:
fprintf(yyout,"%d : TOK_ERROR :
%sn",yylineno,str);
break;
}
}
int gettok(void){
int currentToken;
currentToken=yylex();
if (currentToken == 0) { // means EOF}
return 0;
}
strncpy(tokenString, yytext, TOKENMAX);
if (printoutScan) {
printToken(currentToken,tokenString);
}
return currentToken;
}
int initLex(int argc, char **argv){
if ( argc > 1 )
yyin = fopen( argv[1], "r" );
else
yyin = stdin;
// while (gettok() !=0) ; //gettok returns 0 on EOF
// return;
}
project4-ast/cmparser.y
%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ast.h"
#include "symbolTable.h"
#include "util.h"
/* other external function prototypes */
extern int yylex();
extern int initLex(int , char **);
/* external global variables */
extern int yydebug;
extern int yylineno;
extern SymbolTableStackEntryPtr symbolStackTop;
extern int scopeDepth;
/* function prototypes */
void yyerror(const char *);
/* global variables */
AstNodePtr program;
%}
/* YYSTYPE */
%union
{
AstNodePtr nodePtr;
int iVal;
char *cVal;
Type *type;
}
/* terminals */
%token TOK_ELSE TOK_IF TOK_RETURN TOK_VOID
TOK_INT TOK_WHILE
%token TOK_PLUS TOK_MINUS TOK_MULT TOK_DIV
TOK_LT TOK_LE TOK_GT TOK_GE TOK_EQ TOK_NE
TOK_ASSIGN TOK_SEMI TOK_COMMA
%token TOK_LPAREN TOK_RPAREN TOK_LSQ TOK_RSQ
TOK_LBRACE TOK_RBRACE TOK_ERROR
%token <cVal> TOK_ID
%token <iVal> TOK_NUM
%type <nodePtr> Declarations Functions
%type <type> Type_Specifier
%type <nodePtr> Compound_Stmt Statements Statement
%type <nodePtr> Expr_Statement If_Else_Statement
Selection_Stmt Iteration_Stmt Return_Stmt
%type <nodePtr> Expression Simple_Expression
Additive_Expression Factor Var Call
%type <nodePtr> Args Args_List
/* associativity and precedence */
%nonassoc TOK_IF
%nonassoc TOK_ELSE
%right TOK_ASSIGN
%left TOK_EQ TOK_NE
%nonassoc TOK_LT TOK_GT TOK_LE TOK_GE
%left TOK_PLUS TOK_SUB
%left TOK_MULT TOK_DIV
%nonassoc error
%%
Start : Declarations {
}
;
Declarations : Functions {
program = $1;
}
| Var_Declaration Declarations {
}
;
Functions : Fun_Declaration {
}
| Fun_Declaration Functions {
}
;
Var_Declaration : Type_Specifier TOK_ID TOK_SEMI {
}
| Type_Specifier TOK_ID TOK_LSQ TOK_NUM
TOK_RSQ TOK_SEMI {
}
;
Fun_Declaration : Type_Specifier TOK_ID TOK_LPAREN
Params TOK_RPAREN Compound_Stmt {
}
;
Params : Param_List {
}
| TOK_VOID {
}
;
Param_List : Param_List TOK_COMMA Param {
}
| Param {
}
;
Param : Type_Specifier TOK_ID {
}
| Type_Specifier TOK_ID TOK_LSQ TOK_RSQ {
}
;
Type_Specifier : TOK_INT {
}
| TOK_VOID {
}
;
Compound_Stmt : TOK_LBRACE Statements TOK_RBRACE {
}
| TOK_LBRACE Local_Declarations Statements
TOK_RBRACE {
}
;
Local_Declarations : Var_Declaration Local_Declarations {
}
| Var_Declaration {
}
;
Statements : Statement Statements {
}
| {
}
;
Statement : Expr_Statement {
}
| Compound_Stmt {
}
| Selection_Stmt {
}
| Iteration_Stmt {
}
| Return_Stmt {
}
;
Expr_Statement : Expression TOK_SEMI {
}
| TOK_SEMI {
}
;
Selection_Stmt : If_Else_Statement %prec TOK_IF {
}
| If_Else_Statement TOK_ELSE Statement {
}
;
If_Else_Statement : TOK_IF TOK_LPAREN Expression
TOK_RPAREN Statement {
}
;
Iteration_Stmt : TOK_WHILE TOK_LPAREN Expression
TOK_RPAREN Statement {
}
;
Return_Stmt : TOK_RETURN Expression TOK_SEMI {
}
| TOK_RETURN TOK_SEMI {
}
;
Expression : Var TOK_ASSIGN Expression {
}
| Simple_Expression {
}
;
Var : TOK_ID {
}
| TOK_ID TOK_LSQ Expression TOK_RSQ {
}
;
Simple_Expression : Additive_Expression TOK_GT
Additive_Expression {
}
| Additive_Expression TOK_LT
Additive_Expression {
}
| Additive_Expression TOK_GE
Additive_Expression {
}
| Additive_Expression TOK_LE
Additive_Expression {
}
| Additive_Expression TOK_EQ
Additive_Expression {
}
| Additive_Expression TOK_NE
Additive_Expression {
}
| Additive_Expression {
}
;
Additive_Expression : Additive_Expression TOK_PLUS Term {
}
| Additive_Expression TOK_MINUS Term {
}
| Term {
}
;
Term : Term TOK_MULT Factor {
}
| Term TOK_DIV Factor {
}
| Factor {
}
;
Factor : TOK_LPAREN Expression TOK_RPAREN {
}
| Var {
}
| Call {
}
| TOK_NUM {
}
;
Call : TOK_ID TOK_LPAREN Args TOK_RPAREN {
}
;
Args : Args_List {
}
| {
}
;
Args_List : Args_List TOK_COMMA Expression {
}
| Expression {
}
;
%%
void yyerror (char const *s) {
fprintf (stderr, "Line %d: %sn", yylineno, s);
}
int main(int argc, char **argv){
initLex(argc,argv);
#ifdef YYLLEXER
while (gettok() !=0) ; //gettok returns 0 on EOF
return;
#else
yyparse();
// print_Ast();
#endif
}
project4-ast/globals.h
#define TRUE 1
#define FALSE 0
#define TOKENMAX 512
#define MAXHASHSIZE 32
#define MAXCHILDREN 3
project4-ast/Makefile
# Makefile for cminus
CC=gcc
LEX=flex
YACC=bison
MV=/bin/mv
RM=/bin/rm
CP=/bin/cp
LIBRARIES= -ll
CFLAGS=-g
YFLAGS=-d
PROGRAM=cmparser
OBJECTS=cmparser.o cmlexer.o symbolTable.o ast.o util.o
SOURCES=cmlexer.l cmparser.y symbolTable.c ast.c util.c
CTEMPS=cmlexer.c cmparser.c cmparser.tab.h cmparser.tab.c
cmparser.output
$(PROGRAM): $(OBJECTS)
$(CC) $(CFLAGS) -o $(PROGRAM) $(OBJECTS)
$(LIBRARIES)
util.o: util.c
$(CC) $(CFLAGS) -c util.c
ast.o: ast.c
$(CC) $(CFLAGS) -c ast.c
symbolTable.o: symbolTable.c
$(CC) $(CFLAGS) -c symbolTable.c
cmparser.o: cmparser.c
$(CC) $(CFLAGS) -c cmparser.c
cmparser.c: cmparser.y
$(YACC) cmparser.y -o cmparser.c
cmlexer.o: cmlexer.c cmparser.tab.h globals.h
$(CC) $(CFLAGS) -c cmlexer.c
cmparser.tab.h: cmparser.y
$(YACC) -d cmparser.y
cmlexer.c: cmlexer.l
$(LEX) -ocmlexer.c cmlexer.l
clean:
$(RM) $(PROGRAM) $(OBJECTS) $(CTEMPS)
submit:
mkdir submit
cp $(SOURCES) submit
turnin -ccs473 -past submit
project4-ast/symbolTable.c
#include "symbolTable.h"
// Top should point to the top of the scope stack,
// which is the most recent scope pushed
SymbolTableStackEntryPtr symbolStackTop;
int scopeDepth;
/* global function prototypes */
//allocate the global scope entry and symbol table --and set
scopeDepth to 0
// The global scope remains till the end of the program
int initSymbolTable()
{
//initialize a stack
int i=0;
symbolStackTop = (SymbolTableStackEntryPtr) malloc
(sizeof(struct symbolTableStackEntry));
if (symbolStackTop == NULL) return 0;
symbolStackTop -> symbolTablePtr = (SymbolTable*)
malloc (sizeof(SymbolTable));
for(i=0; i<MAXHASHSIZE; i++)
symbolStackTop -> symbolTablePtr->hashTable[i] =
NULL;
symbolStackTop->prevScope = NULL;
scopeDepth = 0;
//printf("n Initialized Stack with Global Hash Table");
return 1;
}
// Look up a given entry
ElementPtr symLookup(char *name)
{
char *a = name;
SymbolTableStackEntryPtr iter = symbolStackTop;
int ascii = 0,depth = scopeDepth ;
while(*a!='0')
{
ascii+= (int) *a;
a++;
}
while(iter)
{
ElementPtr symelement = iter->symbolTablePtr-
>hashTable[ascii%16];
while(symelement)
{
if(strcmp(symelement->id, name) == 0)
{
//printf("n Found symbol %s at scope
depth %d",name, depth);
return symelement;
}
else
symelement = symelement->next;
}
iter = iter->prevScope;
//printf("n Did not find symbol %s at scope depth
%d",name, depth);
depth--;
}
return NULL;
}
// Insert an element with a specified type in a particular line
number
// initialize the scope depth of the entry from the global var
scopeDepth
//symInsert("counter", typ, 10);
ElementPtr symInsert(char *name, Type *type, int line)
{
ElementPtr new_element = (ElementPtr) malloc
(sizeof(Element));
char *a = name;
int ascii = 0 ;
while(*a!='0')
{
ascii+= (int) *a;
a++;
}
new_element->key = ascii%16;
new_element->id = strdup(name);
new_element->linenumber = line;
new_element->scope = scopeDepth;
new_element->stype = type;
new_element->snode = NULL;
if(!symbolStackTop->symbolTablePtr-
>hashTable[new_element->key]) // if the first element of hash
table for this ascii is null
{
new_element->next = NULL;
symbolStackTop->symbolTablePtr-
>hashTable[new_element->key] = new_element;
}
else
{
new_element->next = symbolStackTop-
>symbolTablePtr->hashTable[new_element->key];
symbolStackTop->symbolTablePtr-
>hashTable[new_element->key] = new_element;
}
//printf("n Inserted %s at scope depth %d with ascii %d
",new_element->id,scopeDepth,new_element->key);
return symbolStackTop->symbolTablePtr-
>hashTable[new_element->key];
}
//push a new entry to the symbol stack
// This should modify the variable top and change the scope
depth
int enterScope()
{
int i;
SymbolTableStackEntryPtr newSymbolStackEntry =
(SymbolTableStackEntryPtr) malloc (sizeof(struct
symbolTableStackEntry));
if (newSymbolStackEntry == NULL) return 0;
newSymbolStackEntry -> symbolTablePtr =
(SymbolTable*) malloc (sizeof(SymbolTable));
for(i=0; i<MAXHASHSIZE; i++)
newSymbolStackEntry -> symbolTablePtr-
>hashTable[i] = NULL;
newSymbolStackEntry->prevScope = symbolStackTop;
symbolStackTop = newSymbolStackEntry;
scopeDepth++;
//printf("n New Scope");
return 1;
}
//pop an entry off the symbol stack
// This should modify top and change scope depth
void leaveScope()
{
if (!symbolStackTop)
printf("n Error .. cannot leave global scope");
else if(!symbolStackTop->prevScope)
symbolStackTop = NULL;
else
symbolStackTop = symbolStackTop->prevScope;
scopeDepth--;
}
// Do not modify this function
void printElement(ElementPtr symelement) {
if (symelement != NULL) {
printf("nLine %d: %s", symelement-
>linenumber,symelement->id);
}
else printf("Wrong call! symbol table entry NULL");
}
//should traverse through the entire symbol table and print it
// must use the printElement function given above
void printSymbolTable()
{
int i;
SymbolTableStackEntryPtr SymbolStackEntry =
symbolStackTop;
while(SymbolStackEntry)
{
for(i=0; i<MAXHASHSIZE;i++)
{
ElementPtr symelement = SymbolStackEntry-
>symbolTablePtr->hashTable[i];
while(symelement)
{
printElement(symelement);
symelement = symelement->next;
}
}
SymbolStackEntry = SymbolStackEntry->prevScope;
}
}
project4-ast/symbolTable.h
#ifndef _SYMBOL_TABLE
#define _SYMBOL_TABLE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "globals.h"
/* data structure of one element in the symbol table */
typedef enum {VOID, INT, ARRAY, FUNCTION} TypeKind;
typedef struct type{
TypeKind kind; /* one from the enum above */
int dimension; /* for arrays */
struct type *function; /*function argument and return types
*/
/* Ignore this field until HW5 */
}Type;
typedef Type *TypePtr;
typedef struct element {
int key;
char * id;
int linenumber;
int scope; /* scope depth at declaration */
Type *stype; /* pointer to the type infomation
*/
struct element *next; /* pointer to the next
symbol with the
same hash table index */
struct node *snode; /* (New) AST Node for method
declartion */
} Element;
typedef Element *ElementPtr;
typedef ElementPtr HashTableEntry;
/* data structure of the symbol table */
typedef struct symbolTable {
HashTableEntry hashTable[MAXHASHSIZE]; /*
hash table */
} SymbolTable;
typedef SymbolTable *SymbolTablePtr;
typedef struct symbolTableStackEntry {
SymbolTablePtr symbolTablePtr;
struct symbolTableStackEntry *prevScope;
} SymbolTableStackEntry;
typedef SymbolTableStackEntry
*SymbolTableStackEntryPtr;
int initSymbolTable() ;
ElementPtr symLookup(char *);
ElementPtr symInsert(char *, Type *, int );
int enterScope();
void leaveScope();
#endif
project4-ast/util.c
/* Original Code: Silvano Bonacia, Fall 2006 */
/* Modifications and changes, Prof. Venkat, Spring 08 */
#include "ast.h"
#include "util.h"
extern AstNode *program;
extern SymbolTableStackEntryPtr symbolStackTop;
extern void printSymbolTable();
#define PAD_STR " "
int indLevel = 0;
void pad (int level) {
int i;
for(i=0;i<level;i++)
printf("%s", PAD_STR);
}
/*
* Print the parameters of the method (parameters being in the
* current scope)
*/
void printMethodParams (SymbolTableStackEntryPtr scope) {
int i;
SymbolTableStackEntryPtr SymbolStackEntry = scope;
for(i=0; i<MAXHASHSIZE;i++) {
ElementPtr symelement = SymbolStackEntry-
>symbolTablePtr->hashTable[i];
while(symelement) {
switch(symelement->stype->kind) {
case INT :
printf("int %s", symelement->id);
break;
case ARRAY :
printf("int %s[]", symelement->id);
break;
case VOID :
printf("void");
break;
case FUNCTION :
break;
}
symelement = symelement->next;
if(symelement)
printf(", ");
}
}
}
/*
* Print the variable declarations of the scope.
* Return the number of variables
*/
int printVarDeclarations (SymbolTablePtr scope)
{
int noVar = 0;
int i;
for(i=0; i<MAXHASHSIZE;i++) {
ElementPtr symelement = scope->hashTable[i];
while(symelement) {
pad(indLevel);
switch(symelement->stype->kind) {
case INT :
printf("int %s;n", symelement->id);
noVar++;
break;
case ARRAY :
printf("int %s[%d];n", symelement->id,
symelement->stype->dimension);
noVar++;
break;
case VOID:
break;
case FUNCTION :
break;
}
symelement = symelement->next;
}
}
return noVar;
}
//traverses through the entire program and symbol table and
prints it
//the output must match the program with the exception of white
spaces
//do a diff -w to ignre white spaces
void printType(TypePtr type, int dummy){
switch(type->kind) {
case INT :
printf("int");
break;
case ARRAY :
printf("int");
break;
case VOID:
printf("void");
break;
case FUNCTION :
printType(type->function, 0);
break;
}
}
void print_Expression(AstNodePtr expr, int endWithSemi) {
if(expr == NULL) {
return;
}
switch(expr->eKind) {
case VAR_EXP :
printf("%s", expr->nSymbolPtr->id);
break;
case ARRAY_EXP :
printf("%s[", expr->nSymbolPtr->id);
print_Expression(expr->children[0], 0);
printf("]");
break;
case ASSI_EXP :
print_Expression(expr->children[0], 0);
printf(" = ");
print_Expression(expr->children[1], 0);
break;
case ADD_EXP :
print_Expression(expr->children[0], 0);
printf(" + ");
print_Expression(expr->children[1], 0);
break;
case SUB_EXP :
print_Expression(expr->children[0], 0);
printf(" - ");
print_Expression(expr->children[1], 0);
break;
case MULT_EXP :
print_Expression(expr->children[0], 0);
printf(" * ");
print_Expression(expr->children[1], 0);
break;
case DIV_EXP :
print_Expression(expr->children[0], 0);
printf(" / ");
print_Expression(expr->children[1], 0);
break;
case GT_EXP :
print_Expression(expr->children[0], 0);
printf(" > ");
print_Expression(expr->children[1], 0);
break;
case LT_EXP :
print_Expression(expr->children[0], 0);
printf(" < ");
print_Expression(expr->children[1], 0);
break;
case GE_EXP :
print_Expression(expr->children[0], 0);
printf(" >= ");
print_Expression(expr->children[1], 0);
break;
case LE_EXP :
print_Expression(expr->children[0], 0);
printf(" <= ");
print_Expression(expr->children[1], 0);
break;
case EQ_EXP :
print_Expression(expr->children[0], 0);
printf(" == ");
print_Expression(expr->children[1], 0);
break;
case NE_EXP :
print_Expression(expr->children[0], 0);
printf(" != ");
print_Expression(expr->children[1], 0);
break;
case CALL_EXP :
printf("%s(", expr->fname);
if(expr->children[0] != NULL) {
print_Expression(expr->children[0], 0);
AstNodePtr ptr = expr->children[0]->sibling;
while(ptr != NULL) {
printf(", ");
print_Expression(ptr, 0);
ptr = ptr->sibling;
}
}
printf(")");
break;
case CONST_EXP :
printf("%d", expr->nValue);
break;
}
if(endWithSemi == 1)
printf(";n");
}
void print_Statement(AstNodePtr stmt) {
if(stmt == NULL)
return;
pad(indLevel);
switch(stmt->sKind) {
case IF_THEN_ELSE_STMT :
printf("if(");
print_Expression(stmt->children[0], 0);
printf(")n");
if(stmt->children[1] != NULL) {
if(stmt->children[1]->sKind != COMPOUND_STMT) {
indLevel++;
print_Statement(stmt->children[1]);
indLevel--;
} else print_Statement(stmt->children[1]);
} else {
pad(indLevel+1);
printf(";n");
}
if(stmt->children[2] != NULL) {
pad(indLevel);
printf("elsen");
if(stmt->children[2]->sKind != COMPOUND_STMT) {
indLevel++;
print_Statement(stmt->children[2]);
indLevel--;
} else print_Statement(stmt->children[2]);
}
break;
case WHILE_STMT :
printf("while(");
print_Expression(stmt->children[0], 0);
printf(")n");
if(stmt->children[1] != NULL) {
if(stmt->children[1]->sKind != COMPOUND_STMT) {
indLevel++;
print_Statement(stmt->children[1]);
indLevel--;
} else print_Statement(stmt->children[1]);
} else {
pad(indLevel+1);
printf(";n");
}
break;
case RETURN_STMT :
printf("return ");
if(stmt->children[0] == NULL)
printf(";n");
else
print_Expression(stmt->children[0], 1);
break;
case COMPOUND_STMT :
printf("{n");
indLevel++;
if(stmt->nSymbolTabPtr != NULL) {
if(printVarDeclarations(stmt->nSymbolTabPtr) > 0)
printf("n");
}
if(stmt->children[0] != NULL)
print_Statement(stmt->children[0]); // Print the first
statement
else {
pad(indLevel);
printf(";n");
}
indLevel--;
pad(indLevel);
printf("}n");
break;
case EXPRESSION_STMT :
print_Expression(stmt->children[0], 1);
break;
}
//printf("sibling : %dn", stmt->sibling);
print_Statement(stmt->sibling); // Print the next statement
}
void print_Ast_Recursion(AstNodePtr root) {
//printf("print_Ast_Recursion: root = %dn", root);
/*
* End the recursion
*/
if(root == NULL)
return;
switch(root->nKind) {
case METHOD :
//printf("root->nKind = METHODn");
printType(root->nType, 0);
printf(" %s(", root->nSymbolPtr->id);
if(root->children[0] == NULL)
printf("void");
else
print_Ast_Recursion(root->children[0]); // print the
parameters of the method
printf(")n");
print_Ast_Recursion(root->children[1]); // print the body of
the method
printf("n");
print_Ast_Recursion(root->sibling); // print the next method
break;
case FORMALVAR :
//printf("root->nKind = FORMALVARn");
printType(root->nSymbolPtr->stype, 0); // print the type
printf(" %s", root->nSymbolPtr->id); // print the name of
the variable
if(root->nSymbolPtr->stype->kind == ARRAY)
printf("[]");
/*
* Print the next parameter if there's one
*/
if(root->sibling != NULL) {
printf(", ");
print_Ast_Recursion(root->sibling);
}
break;
case STMT :
//printf("root->nKind = STMTn");
print_Statement(root);
break;
case EXPRESSION :
//printf("root->nKind = EXPRESSIONn");
print_Expression(root, 1); // I don't think it ever gets here
break;
}
}
void print_Ast() {
/*
* First print the entire symbol table
*/
//printSymbolTable();
/*
* Then print the program
*/
indLevel = 0;
if(printVarDeclarations(symbolStackTop->symbolTablePtr) >
0)
printf("n");
print_Ast_Recursion(program);
}
project4-ast/util.h
extern void printNode(AstNodePtr, int);
extern void printType(TypePtr, int);
lex_yacc.pdf
A GUIDE TO LEX & YACC
BY THOMAS NIEMANN
2
PREFACE
This document explains how to construct a compiler using lex
and yacc. Lex and yacc are
tools used to generate lexical analyzers and parsers. I assume
you can program in C, and
understand data structures such as linked-lists and trees.
The introduction describes the basic building blocks of a
compiler and explains the
interaction between lex and yacc. The next two sections
describe lex and yacc in more
detail. With this background, we construct a sophisticated
calculator. Conventional
arithmetic operations and control statements, such as if-else and
while, are implemented.
With minor changes, we convert the calculator into a compiler
for a stack-based machine.
The remaining sections discuss issues that commonly arise in
compiler writing. Source
code for examples may be downloaded from the web site listed
below.
Permission to reproduce portions of this document is given
provided the web site
listed below is referenced, and no additional restrictions apply.
Source code, when part of
a software project, may be used freely without reference to the
author.
THOMAS NIEMANN
Portland, Oregon
[email protected]
http://members.xoom.com/thomasn
mailto:[email protected]
http://members.xoom.com/thomasn
3
CONTENTS
1. INTRODUCTION
4
2. LEX
6
2.1 Theory 6
2.2 Practice 7
3. YACC
12
3.1 Theory 12
3.2 Practice, Part I 14
3.3 Practice, Part II 17
4. CALCULATOR
20
4.1 Description 20
4.2 Include File 23
4.3 Lex Input 24
4.4 Yacc Input 25
4.5 Interpreter 29
4.6 Compiler 30
5. MORE LEX
32
5.1 Strings 32
5.2 Reserved Words 33
5.3 Debugging Lex 33
6. MORE YACC
35
6.1 Recursion 35
6.2 If-Else Ambiguity 35
6.3 Error Messages 36
6.4 Inherited Attributes 37
6.5 Embedded Actions 37
6.6 Debugging Yacc 38
7. BIBLIOGRAPHY
39
4
1. Introduction
Until 1975, writing a compiler was a very time-consuming
process. Then Lesk [1975]
and Johnson [1975] published papers on lex and yacc. These
utilities greatly simplify
compiler writing. Implementation details for lex and yacc may
be found in Aho [1986].
Lex and yacc are available from
• Mortice Kern Systems (MKS), at http://www.mks.com,
• GNU flex and bison, at http://www.gnu.org,
• Ming, at http://agnes.dida.physik.uni-
essen.de/~janjaap/mingw32,
• Cygnus, at http://www.cygnus.com/misc/gnu-win32, and
• me, at
http://members.xoom.com/thomasn/y_gnu.zip (executables), and
http://members.xoom.com/thomasn/y_gnus.zip (source for
y_gnu.zip).
The version from MKS is a high-quality commercial product
that retails for about
$300US. GNU software is free. Output from flex may be used in
a commercial product,
and, as of version 1.24, the same is true for bison. Ming and
Cygnus are 32-bit Windows-
95/NT ports of the GNU software. My version is based on
Ming’s, but is compiled with
Visual C++ and includes a minor bug fix in the file handling
routine. If you download my
version, be sure to retain directory structure when you unzip.
Lexical Analyzer
Syntax Analyzer
a = b + c * d
id1 = id2 + id3 * id4
=
+
*
id1
source code
tokens
syntax tree
id2
id3 id4
load id3
mul id4
add id2
store id1
Code Generator
generated code
Figure 1-1: Compilation Sequence
http://members.xoom.com/thomasn/y_lex.pdf
http://members.xoom.com/thomasn/y_yacc.pdf
http://www.amazon.com/exec/obidos/ISBN=0201100886/none01
A/#Bibliography
http://www.mks.com/
http://www.gnu.org/
http://agnes.dida.physik.uni-essen.de/~janjaap/mingw32
http://www.cygnus.com/misc/gnu-win32
http://members.xoom.com/thomasn/y_gnu.zip
http://members.xoom.com/thomasn/y_gnus.zip
5
Lex generates C code for a lexical analyzer, or scanner. It uses
patterns that match
strings in the input and converts the strings to tokens. Tokens
are numerical
representations of strings, and simplify processing. This is
illustrated in Figure 1-1.
As lex finds identifiers in the input stream, it enters them in a
symbol table. The
symbol table may also contain other information such as data
type (integer or real) and
location of the variable in memory. All subsequent references to
identifiers refer to the
appropriate symbol table index.
Yacc generates C code for a syntax analyzer, or parser. Yacc
uses grammar rules that
allow it to analyze tokens from lex and create a syntax tree. A
syntax tree imposes a
hierarchical structure on tokens. For example, operator
precedence and associativity are
apparent in the syntax tree. The next step, code generation, does
a depth-first walk of the
syntax tree to generate code. Some compilers produce machine
code, while others, as
shown above, output assembly.
lex
yacc
cc
bas.y
bas.l lex.yy.c
y.tab.c
bas.exe
source
compiled output
(yylex)
(yyparse)
y.tab.h
Figure 1-2: Building a Compiler with Lex/Yacc
Figure 1-2 illustrates the file naming conventions used by lex
and yacc. We'll assume
our goal is to write a BASIC compiler. First, we need to specify
all pattern matching rules
for lex (bas.l) and grammar rules for yacc (bas.y). Commands to
create our compiler,
bas.exe, are listed below:
yacc –d bas.y # create y.tab.h, y.tab.c
lex bas.l # create lex.yy.c
cc lex.yy.c y.tab.c –obas.exe # compile/link
Yacc reads the grammar descriptions in bas.y and generates a
parser, function
yyparse, in file y.tab.c. Included in file bas.y are token
declarations. These are
converted to constant definitions by yacc and placed in file
y.tab.h. Lex reads the
pattern descriptions in bas.l, includes file y.tab.h, and generates
a lexical analyzer,
function yylex, in file lex.yy.c.
Finally, the lexer and parser are compiled and linked together to
form the executable,
bas.exe. From main, we call yyparse to run the compiler.
Function yyparse
automatically calls yylex to obtain each token.
6
2. Lex
2.1 Theory
The first phase in a compiler reads the input source and
converts strings in the source to
tokens. Using regular expressions, we can specify patterns to
lex that allow it to scan and
match strings in the input. Each pattern in lex has an associated
action. Typically an
action returns a token, representing the matched string, for
subsequent use by the parser.
To begin with, however, we will simply print the matched string
rather than return a
token value. We may scan for identifiers using the regular
expression
letter(letter|digit)*
This pattern matches a string of characters that begins with a
single letter, and is followed
by zero or more letters or digits. This example nicely illustrates
operations allowed in
regular expressions:
• repetition, expressed by the “*” operator
• alternation, expressed by the “|” operator
• concatenation
Any regular expression expressions may be expressed as a finite
state automaton
(FSA). We can represent an FSA using states, and transitions
between states. There is one
start state, and one or more final or accepting states.
0 1 2
letter
letter or digit
otherstart
Figure 2-1: Finite State Automaton
In Figure 2-1, state 0 is the start state, and state 2 is the
accepting state. As characters are
read, we make a transition from one state to another. When the
first letter is read, we
transition to state 1. We remain in state 1 as more letters or
digits are read. When we read
a character other than a letter or digit, we transition to state 2,
the accepting state. Any
FSA may be expressed as a computer program. For example, our
3-state machine is
easily programmed:
7
start: goto state0
state0: read c
if c = letter goto state1
goto state0
state1: read c
if c = letter goto state1
if c = digit goto state1
goto state2
state2: accept string
This is the technique used by lex. Regular expressions are
translated by lex to a computer
program that mimics an FSA. Using the next input character,
and current state, the next
state is easily determined by indexing into a computer-
generated state table.
Now we can easily understand some of lex’s limitations. For
example, lex cannot be
used to recognize nested structures such as parentheses. Nested
structures are handled by
incorporating a stack. Whenever we encounter a “(”, we push it
on the stack. When a “)”
is encountered, we match it with the top of the stack, and pop
the stack. Lex, however,
only has states and transitions between states. Since it has no
stack, it is not well suited
for parsing nested structures. Yacc augments an FSA with a
stack, and can process
constructs such as parentheses with ease. The important thing is
to use the right tool for
the job. Lex is good at pattern matching. Yacc is appropriate for
more challenging tasks.
2.2 Practice
Metacharacter Matches
. any character except newline
n newline
* zero or more copies of preceding expression
+ one or more copies of preceding expression
? zero or one copy of preceeding expression
^ beginning of line
$ end of line
a|b a or b
(ab)+ one or more copies of ab (grouping)
"a+b" literal “a+b” (C escapes still work)
[] character class
Table 2-1: Pattern Matching Primitives
8
Expression Matches
abc abc
abc* ab, abc, abcc, abccc, …
abc+ abc, abcc, abccc, …
a(bc)+ abc, abcbc, abcbcbc, …
a(bc)? a, abc
[abc] a, b, c
[a-z] any letter, a through z
[a-z] a, -, z
[-az] -, a, z
[A-Za-z0-9]+ one or more alphanumeric characters
[ tn]+ whitespace
[^ab] anything except: a, b
[a^b] a, ^, b
[a|b] a, |, b
a|b a or b
Table 2-2: Pattern Matching Examples
Regular expressions in lex are composed of metacharacters
(Table 2-1). Pattern matching
examples are shown in Table 2-2. Within a character class,
normal operators lose their
meaning. Two operators allowed in a character class are the
hyphen (“-”) and circumflex
(“^”). When used between two characters, the hyphen represents
a range of characters.
The circumflex, when used as the first character, negates the
expression. If two patterns
match the same string, the longest match wins. In case both
matches are the same length,
then the first pattern listed is used.
Input to Lex is divided into three sections, with %% dividing
the sections. This is best
illustrated by example. The first example is the shortest
possible lex file:
%%
Input is copied to output, one character at a time. The first %%
is always required, as there
must always be a rules section. However, if we don’t specify
any rules, then the default
action is to match everything and copy it to output. Defaults for
input and output are
stdin and stdout, respectively. Here is the same example, with
defaults explicitly
coded:
... definitions ...
%%
... rules ...
%%
... subroutines ...
9
%%
/* match everything except newline */
. ECHO;
/* match newline */
n ECHO;
%%
int yywrap(void) {
return 1;
}
int main(void) {
yylex();
return 0;
}
Two patterns have been specified in the rules section. Each
pattern must begin in column
one. This is followed by whitespace (space, tab or newline), and
an optional action
associated with the pattern. The action may be a single C
statement, or multiple C
statements enclosed in braces. Anything not starting in column
one is copied verbatim to
the generated C file. We may take advantage of this behavior to
specify comments in our
lex file. In this example there are two patterns, “.” and “n”,
with an ECHO action
associated for each pattern. Several macros and variables are
predefined by lex. ECHO is a
macro that writes code matched by the pattern. This is the
default action for any
unmatched strings. Typically, ECHO is defined as:
#define ECHO fwrite(yytext, yyleng, 1, yyout)
Variable yytext is a pointer to the matched string (NULL-
terminated), and yyleng is
the length of the matched string. Variable yyout is the output
file, and defaults to
stdout. Function yywrap is called by lex when input is
exhausted. Return 1 if you are
done, or 0 if more processing is required. Every C program
requires a main function. In
this case, we simply call yylex, the main entry-point for lex.
Some implementations of
lex include copies of main and yywrap in a library, eliminating
the need to code them
explicitly. This is why our first example, the shortest lex
program, functioned properly.
10
name function
int yylex(void) call to invoke lexer, returns token
char *yytext pointer to matched string
yyleng length of matched string
yylval value associated with token
int yywrap(void) wrapup, return 1 if done, 0 if not done
FILE *yyout output file
FILE *yyin input file
INITIAL initial start condition
BEGIN condition switch start condition
ECHO write matched string
Table 2-3: Lex Predefined Variables
Here’s a program that does nothing at all. All input is matched,
but no action is
associated with any pattern, so there will be no output.
%%
.
n
The following example prepends line numbers to each line in a
file. Some
implementations of lex predefine and calculate yylineno. The
input file for lex is yyin,
and defaults to stdin.
%{
int yylineno;
%}
%%
^(.*)n printf("%4dt%s", ++yylineno, yytext);
%%
int main(int argc, char *argv[]) {
yyin = fopen(argv[1], "r");
yylex();
fclose(yyin);
}
The definitions section is composed of substitutions, code, and
start states. Code in
the definitions section is simply copied as-is to the top of the
generated C file, and must
be bracketed with “%{“ and “%}” markers. Substitutions
simplify pattern-matching rules.
For example, we may define digits and letters:
11
digit [0-9]
letter [A-Za-z]
%{
int count;
%}
%%
/* match identifier */
{letter}({letter}|{digit})* count++;
%%
int main(void) {
yylex();
printf("number of identifiers = %dn", count);
return 0;
}
Whitespace must separate the defining term and the associated
expression.
References to substitutions in the rules section are surrounded
by braces ({letter}) to
distinguish them from literals. When we have a match in the
rules section, the associated
C code is executed. Here is a scanner that counts the number of
characters, words, and
lines in a file (similar to Unix wc):
%{
int nchar, nword, nline;
%}
%%
n { nline++; nchar++; }
[^ tn]+ { nword++, nchar += yyleng; }
. { nchar++; }
%%
int main(void) {
yylex();
printf("%dt%dt%dn", nchar, nword, nline);
return 0;
}
12
3. Yacc
3.1 Theory
Grammars for yacc are described using a variant of Backus Naur
Form (BNF). This
technique was pioneered by John Backus and Peter Naur, and
used to describe
ALGOL60. A BNF grammar can be used to express context-free
languages. Most
constructs in modern programming languages can be represented
in BNF. For example,
the grammar for an expression that multiplies and adds numbers
is
1 E -> E + E
2 E -> E * E
3 E -> id
Three productions have been specified. Terms that appear on the
left-hand side (lhs) of a
production, such as E (expression) are nonterminals. Terms such
as id (identifier) are
terminals (tokens returned by lex) and only appear on the right-
hand side (rhs) of a
production. This grammar specifies that an expression may be
the sum of two
expressions, the product of two expressions, or an identifier.
We can use this grammar to
generate expressions:
E -> E * E (r2)
-> E * z (r3)
-> E + E * z (r1)
-> E + y * z (r3)
-> x + y * z (r3)
At each step we expanded a term, replacing the lhs of a
production with the
corresponding rhs. The numbers on the right indicate which rule
applied. To parse an
expression, we actually need to do the reverse operation.
Instead of starting with a single
nonterminal (start symbol) and generating an expression from a
grammar, we need to
reduce an expression to a single nonterminal. This is known as
bottom-up or shift-reduce
parsing, and uses a stack for storing terms. Here is the same
derivation, but in reverse
order:
13
1 . x + y * z shift
2 x . + y * z reduce(r3)
3 E . + y * z shift
4 E + . y * z shift
5 E + y . * z reduce(r3)
6 E + E . * z shift
7 E + E * . z shift
8 E + E * z . reduce(r3)
9 E + E * E . reduce(r2) emit multiply
10 E + E . reduce(r1) emit add
11 E . accept
Terms to the left of the dot are on the stack, while remaining
input is to the right of the
dot. We start by shifting tokens onto the stack. When the top of
the stack matches the rhs
of a production, we replace the matched tokens on the stack
with the lhs of the
production. Conceptually, the matched tokens of the rhs are
popped off the stack, and the
lhs of the production is pushed on the stack. The matched
tokens are known as a handle,
and we are reducing the handle to the lhs of the production.
This process continues until
we have shifted all input to the stack, and only the starting
nonterminal remains on the
stack. In step 1 we shift the x to the stack. Step 2 applies rule r3
to the stack, changing x
to E. We continue shifting and reducing, until a single
nonterminal, the start symbol,
remains in the stack. In step 9, when we reduce rule r2, we emit
the multiply instruction.
Similarly, the add instruction is emitted in step 10. Thus,
multiply has a higher
precedence than addition.
Consider, however, the shift at step 6. Instead of shifting, we
could have reduced,
applying rule r1. This would result in addition having a higher
precedence than
multiplication. This is known as a shift-reduce conflict. Our
grammar is ambiguous, as
there is more than one possible derivation that will yield the
expression. In this case,
operator precedence is affected. As another example,
associativity in the rule
E -> E + E
is ambiguous, for we may recurse on the left or the right. To
remedy the situation, we
could rewrite the grammar, or supply yacc with directives that
indicate which operator
has precedence. The latter method is simpler, and will be
demonstrated in the practice
section.
The following grammar has a reduce-reduce conflict. With an id
on the stack, we
may reduce to T, or reduce to E.
E -> T
E -> id
T -> id
Yacc takes a default action when there is a conflict. For shift-
reduce conflicts, yacc will
shift. For reduce-reduce conflicts, it will use the first rule in the
listing. It also issues a
warning message whenever a conflict exists. The warnings may
be suppressed by making
14
the grammar unambiguous. Several methods for removing
ambiguity will be presented in
subsequent sections.
3.2 Practice, Part I
Input to yacc is divided into three sections. The definitions
section consists of token
declarations, and C code bracketed by “%{“ and “%}”. The BNF
grammar is placed in the
rules section, and user subroutines are added in the subroutines
section.
This is best illustrated by constructing a small calculator that
can add and subtract
numbers. We’ll begin by examining the linkage between lex and
yacc. Here is the
definitions section for the yacc input file:
%token INTEGER
This definition declares an INTEGER token. When we run yacc,
it generates a parser in
file y.tab.c, and also creates an include file, y.tab.h:
#ifndef YYSTYPE
#define YYSTYPE int
#endif
#define INTEGER 258
extern YYSTYPE yylval;
Lex includes this file and utilizes the definitions for token
values. To obtain tokens, yacc
calls yylex. Function yylex has a return type of int, and returns
the token value.
Values associated with the token are returned by lex in variable
yylval. For example,
[0-9]+ {
yylval = atoi(yytext);
return INTEGER;
}
would store the value of the integer in yylval, and return token
INTEGER to yacc. The
type of yylval is determined by YYSTYPE. Since the default
type is integer, this works
well in this case. Token values 0-255 are reserved for character
values. For example, if
you had a rule such as
[-+] return *yytext; /* return operator */
... definitions ...
%%
... rules ...
%%
... subroutines ...
15
the character value for minus or plus is returned. Note that we
placed the minus sign first
so that it wouldn’t be mistaken for a range designator.
Generated token values typically
start around 258, as lex reserves several values for end-of-file
and error processing. Here
is the complete lex input specification for our calculator:
%{
#include "y.tab.h"
%}
%%
[0-9]+ {
yylval = atoi(yytext);
return INTEGER;
}
[-+n] return *yytext;
[ t] ; /* skip whitespace */
. yyerror("invalid character");
%%
int yywrap(void) {
return 1;
}
Internally, yacc maintains two stacks in memory; a parse stack
and a value stack. The
parse stack contains terminals and nonterminals, and represents
the current parsing state.
The value stack is an array of YYSTYPE elements, and
associates a value with each
element in the parse stack. For example, when lex returns an
INTEGER token, yacc shifts
this token to the parse stack. At the same time, the
corresponding yylval is shifted to
the value stack. The parse and value stacks are always
synchronized, so finding a value
related to a token on the stack is easily accomplished. Here is
the yacc input specification
for our calculator:
16
%token INTEGER
%%
program:
program expr 'n' { printf("%dn", $2); }
|
;
expr:
INTEGER { $$ = $1; }
| expr '+' expr { $$ = $1 + $3; }
| expr '-' expr { $$ = $1 - $3; }
;
%%
int yyerror(char *s) {
fprintf(stderr, "%sn", s);
return 0;
}
int main(void) {
yyparse();
return 0;
}
The rules section resembles the BNF grammar discussed earlier.
The left-hand side of a
production, or nonterminal, is entered left-justified, followed by
a colon. This is followed
by the right-hand side of the production. Actions associated
with a rule are entered in
braces.
By utilizing left-recursion, we have specified that a program
consists of zero or more
expressions. Each expression terminates with a newline. When a
newline is detected, we
print the value of the expression. When we apply the rule
expr: expr '+' expr { $$ = $1 + $3; }
we replace the right-hand side of the production in the parse
stack with the left-hand side
of the same production. In this case, we pop “expr '+' expr” and
push “expr”. We
have reduced the stack by popping three terms off the stack, and
pushing back one term.
We may reference positions in the value stack in our C code by
specifying “$1” for the
first term on the right-hand side of the production, “$2” for the
second, and so on. “$$”
designates the top of the stack after reduction has taken place.
The above action adds the
value associated with two expressions, pops three terms off the
value stack, and pushes
back a single sum. Thus, the parse and value stacks remain
synchronized.
17
Numeric values are initially entered on the stack when we
reduce from INTEGER to
expr. After INTEGER is shifted to the stack, we apply the rule
expr: INTEGER { $$ = $1; }
The INTEGER token is popped off the parse stack, followed by
a push of expr. For the
value stack, we pop the integer value off the stack, and then
push it back on again. In
other words, we do nothing. In fact, this is the default action,
and need not be specified.
Finally, when a newline is encountered, the value associated
with expr is printed.
In the event of syntax errors, yacc calls the user-supplied
function yyerror. If you
need to modify the interface to yyerror, you can alter the canned
file that yacc includes
to fit your needs. The last function in our yacc specification is
main … in case you were
wondering where it was. This example still has an ambiguous
grammar. Yacc will issue
shift-reduce warnings, but will still process the grammar using
shift as the default
operation.
3.3 Practice, Part II
In this section we will extend the calculator from the previous
section to incorporate
some new functionality. New features include arithmetic
operators multiply, and divide.
Parentheses may be used to over-ride operator precedence, and
single-character variables
may be specified in assignment statements. The following
illustrates sample input and
calculator output:
user: 3 * (4 + 5)
calc: 27
user: x = 3 * (5 + 4)
user: y = 5
user: x
calc: 27
user: y
calc: 5
user: x + 2*y
calc: 37
The lexical analyzer returns VARIABLE and INTEGER tokens.
For variables, yylval
specifies an index to sym, our symbol table. For this program,
sym merely holds the
value of the associated variable. When INTEGER tokens are
returned, yylval contains
the number scanned. Here is the input specification for lex:
18
%{
#include "y.tab.h"
%}
%%
/* variables */
[a-z] {
yylval = *yytext - 'a';
return VARIABLE;
}
/* integers */
[0-9]+ {
yylval = atoi(yytext);
return INTEGER;
}
/* operators */
[-+()=/*n] { return *yytext; }
/* skip whitespace */
[ t] ;
/* anything else is an error */
. yyerror("invalid character");
%%
int yywrap(void) {
return 1;
}
The input specification for yacc follows. The tokens for
INTEGER and VARIABLE are
utilized by yacc to create #defines in y.tab.h for use in lex. This
is followed by
definitions for the arithmetic operators. We may specify %left,
for left-associative, or
%right, for right associative. The last definition listed has the
highest precedence. Thus,
multiplication and division have higher precedence than
addition and subtraction. All
four operators are left-associative. Using this simple technique,
we are able to
disambiguate our grammar.
%token INTEGER VARIABLE
%left '+' '-'
%left '*' '/'
%{
int sym[26];
%}
19
%%
program:
program statement 'n'
|
;
statement:
expr { printf("%dn", $1); }
| VARIABLE '=' expr { sym[$1] = $3; }
;
expr:
INTEGER
| VARIABLE { $$ = sym[$1]; }
| expr '+' expr { $$ = $1 + $3; }
| expr '-' expr { $$ = $1 - $3; }
| expr '*' expr { $$ = $1 * $3; }
| expr '/' expr { $$ = $1 / $3; }
| '(' expr ')' { $$ = $2; }
;
%%
int yyerror(char *s) {
fprintf(stderr, "%sn", s);
return 0;
}
int main(void) {
yyparse();
return 0;
}
20
4. Calculator
4.1 Description
This version of the calculator is substantially more complex
than previous versions.
Major changes include control constructs such as if-else and
while. In addition, a syntax
tree is constructed during parsing. After parsing, we walk the
syntax tree to produce
output. Two versions of the tree walk routine are supplied:
• an interpreter that executes statements during the tree walk,
and
• a compiler that generates code for a hypothetical stack-based
machine.
To make things more concrete, here is a sample program,
x = 0;
while (x < 3) {
print x;
x = x + 1;
}
with output for the interpretive version,
0
1
2
and output for the compiler version.
push 0
pop x
L000:
push x
push 3
compLT
jz L001
push x
print
push x
push 1
add
pop x
jmp L000
L001:
21
The include file contains declarations for the syntax tree and
symbol table. The
symbol table, sym, allows for single-character variable names.
A node in the syntax tree
may hold a constant (conNodeType), an identifier
(idNodeType), or an internal node
with an operator (oprNodeType). Union nodeType encapsulates
all three variants, and
nodeType.type is used to determine which structure we have.
The lex input file contains patterns for VARIABLE and
INTEGER tokens. In addition,
tokens are defined for 2-character operators such as EQ and NE.
Single-character
operators are simply returned as themselves.
The yacc input file defines YYSTYPE, the type of yylval, as
%union {
int iValue; /* integer value */
char sIndex; /* symbol table index */
nodeType nPtr; /* node pointer */
};
This causes the following to be generated in y.tab.h:
typedef union {
int iValue; /* integer value */
char sIndex; /* symbol table index */
nodeType nPtr; /* node pointer */
} YYSTYPE;
extern YYSTYPE yylval;
Constants, variables, and nodes can be represented by yylval in
the parser’s value stack.
Notice the type definitions
%token <iValue> INTEGER
%type <nPtr> expr
This binds expr to nPtr, and INTEGER to iValue in the
YYSTYPE union. This is
required so that yacc can generate the correct code. For
example, the rule
expr: INTEGER { $$ = con($1); }
should generate the following code. Note that yyvsp[0]
addresses the top of the value
stack, or the value associated with INTEGER.
yylval.nPtr = con(yyvsp[0].iValue);
22
The unary minus operator is given higher priority than binary
operators as follows:
%left GE LE EQ NE '>' '<'
%left '+' '-'
%left '*' '/'
%nonassoc UMINUS
The %nonassoc indicates no associativity is implied. It is
frequently used in conjunction
with %prec to specify precedence of a rule. Thus, we have
expr: '-' expr %prec UMINUS { $$ = node(UMINUS, 1, $2); }
indicating that the precedence of the rule is the same as the
precedence of token UMINUS.
And, as defined above, UMINUS has higher precedence than the
other operators. A similar
technique is used to remove ambiguity associated with the if-
else statement (see If-Else
Ambiguity, p. 35).
The syntax tree is constructed bottom-up, allocating the leaf
nodes when variables
and integers are reduced. When operators are encountered, a
node is allocated and
pointers to previously allocated nodes are entered as operands.
As statements are reduced,
ex is called to do a depth-first walk of the syntax tree. Since the
tree was constructed
bottom-up, a depth-first walk visits nodes in the order that they
were originally allocated.
This results in operators being applied in the order that they
were encountered during
parsing. Two versions of ex, an interpretive version, and a
compiler version, are
included.
23
4.2 Include File
typedef enum { typeCon, typeId, typeOpr } nodeEnum;
/* constants */
typedef struct {
nodeEnum type; /* type of node */
int value; /* value of constant */
} conNodeType;
/* identifiers */
typedef struct {
nodeEnum type; /* type of node */
int i; /* subscript to ident array */
} idNodeType;
/* operators */
typedef struct {
nodeEnum type; /* type of node */
int oper; /* operator */
int nops; /* number of operands */
union nodeTypeTag *op[1]; /* operands (expandable) */
} oprNodeType;
typedef union nodeTypeTag {
nodeEnum type; /* type of node */
conNodeType con; /* constants */
idNodeType id; /* identifiers */
oprNodeType opr; /* operators */
} nodeType;
extern int sym[26];
24
4.3 Lex Input
%{
#include <stdlib.h>
#include "calc3.h"
#include "y.tab.h"
%}
%%
[a-z] {
yylval.sIndex = *yytext - 'a';
return VARIABLE;
}
[0-9]+ {
yylval.iValue = atoi(yytext);
return INTEGER;
}
[-()<>=+*/;{}.] {
return *yytext;
}
">=" return GE;
"<=" return LE;
"==" return EQ;
"!=" return NE;
"while" return WHILE;
"if" return IF;
"else" return ELSE;
"print" return PRINT;
[ tn+] ; /* ignore whitespace */
. yyerror("Unknown character");
%%
int yywrap(void) {
return 1;
}
25
4.4 Yacc Input
%{
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "calc3.h"
/* prototypes */
nodeType *opr(int oper, int nops, ...);
nodeType *id(int i);
nodeType *con(int value);
void freeNode(nodeType *p);
void yyerror(char *s);
int sym[26]; /* symbol table */
%}
%union {
int iValue; /* integer value */
char sIndex; /* symbol table index */
nodeType *nPtr; /* node pointer */
};
%token <iValue> INTEGER
%token <sIndex> VARIABLE
%token WHILE IF PRINT
%nonassoc IFX
%nonassoc ELSE
%left GE LE EQ NE '>' '<'
%left '+' '-'
%left '*' '/'
%nonassoc UMINUS
%type <nPtr> stmt expr stmt_list
%%
26
program:
function '.' { exit(0); }
;
function:
function stmt { ex($2); freeNode($2); }
| /* NULL */
;
stmt:
';' { $$ = opr(';', 2, NULL, NULL); }
| expr ';' { $$ = $1; }
| PRINT expr ';' { $$ = opr(PRINT, 1, $2); }
| VARIABLE '=' expr ';' { $$ = opr('=', 2, id($1), $3); }
| WHILE '(' expr ')' stmt
{ $$ = opr(WHILE, 2, $3, $5); }
| IF '(' expr ')' stmt %prec IFX
{ $$ = opr(IF, 2, $3, $5); }
| IF '(' expr ')' stmt ELSE stmt
{ $$ = opr(IF, 3, $3, $5, $7); }
| '{' stmt_list '}' { $$ = $2; }
;
stmt_list:
stmt { $$ = $1; }
| stmt_list stmt { $$ = opr(';', 2, $1, $2); }
;
expr:
INTEGER { $$ = con($1); }
| VARIABLE { $$ = id($1); }
| '-' expr %prec UMINUS { $$ = opr(UMINUS, 1, $2); }
| expr '+' expr { $$ = opr('+', 2, $1, $3); }
| expr '-' expr { $$ = opr('-', 2, $1, $3); }
| expr '*' expr { $$ = opr('*', 2, $1, $3); }
| expr '/' expr { $$ = opr('/', 2, $1, $3); }
| expr '<' expr { $$ = opr('<', 2, $1, $3); }
| expr '>' expr { $$ = opr('>', 2, $1, $3); }
| expr GE expr { $$ = opr(GE, 2, $1, $3); }
| expr LE expr { $$ = opr(LE, 2, $1, $3); }
| expr NE expr { $$ = opr(NE, 2, $1, $3); }
| expr EQ expr { $$ = opr(EQ, 2, $1, $3); }
| '(' expr ')' { $$ = $2; }
;
%%
27
nodeType *con(int value) {
nodeType *p;
/* allocate node */
if ((p = malloc(sizeof(conNodeType))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeCon;
p->con.value = value;
return p;
}
nodeType *id(int i) {
nodeType *p;
/* allocate node */
if ((p = malloc(sizeof(idNodeType))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeId;
p->id.i = i;
return p;
}
nodeType *opr(int oper, int nops, ...) {
va_list ap;
nodeType *p;
size_t size;
int i;
/* allocate node */
size = sizeof(oprNodeType) + (nops - 1) * sizeof(nodeType*);
if ((p = malloc(size)) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeOpr;
p->opr.oper = oper;
p->opr.nops = nops;
va_start(ap, nops);
for (i = 0; i < nops; i++)
p->opr.op[i] = va_arg(ap, nodeType*);
va_end(ap);
return p;
}
28
void freeNode(nodeType *p) {
int i;
if (!p) return;
if (p->type == typeOpr) {
for (i = 0; i < p->opr.nops; i++)
freeNode(p->opr.op[i]);
}
free (p);
}
void yyerror(char *s) {
fprintf(stdout, "%sn", s);
}
int main(void) {
yyparse();
return 0;
}
29
4.5 Interpreter
#include <stdio.h>
#include "calc3.h"
#include "y.tab.h"
int ex(nodeType *p) {
if (!p) return 0;
switch(p->type) {
case typeCon: return p->con.value;
case typeId: return sym[p->id.i];
case typeOpr:
switch(p->opr.oper) {
case WHILE: while(ex(p->opr.op[0]))
ex(p->opr.op[1]);
return 0;
case IF: if (ex(p->opr.op[0]))
ex(p->opr.op[1]);
else if (p->opr.nops > 2)
ex(p->opr.op[2]);
return 0;
case PRINT: printf("%dn", ex(p->opr.op[0]));
return 0;
case ';': ex(p->opr.op[0]);
return ex(p->opr.op[1]);
case '=': return sym[p->opr.op[0]->id.i] =
ex(p->opr.op[1]);
case UMINUS: return -ex(p->opr.op[0]);
case '+': return ex(p->opr.op[0]) + ex(p->opr.op[1]);
case '-': return ex(p->opr.op[0]) - ex(p->opr.op[1]);
case '*': return ex(p->opr.op[0]) * ex(p->opr.op[1]);
case '/': return ex(p->opr.op[0]) / ex(p->opr.op[1]);
case '<': return ex(p->opr.op[0]) < ex(p->opr.op[1]);
case '>': return ex(p->opr.op[0]) > ex(p->opr.op[1]);
case GE: return ex(p->opr.op[0]) >= ex(p->opr.op[1]);
case LE: return ex(p->opr.op[0]) <= ex(p->opr.op[1]);
case NE: return ex(p->opr.op[0]) != ex(p->opr.op[1]);
case EQ: return ex(p->opr.op[0]) == ex(p->opr.op[1]);
}
}
}
30
4.6 Compiler
#include <stdio.h>
#include "calc3.h"
#include "y.tab.h"
static int lbl;
int ex(nodeType *p) {
int lbl1, lbl2;
if (!p) return 0;
switch(p->type) {
case typeCon:
printf("tpusht%dn", p->con.value);
break;
case typeId:
printf("tpusht%cn", p->id.i + 'a');
break;
case typeOpr:
switch(p->opr.oper) {
case WHILE:
printf("L%03d:n", lbl1 = lbl++);
ex(p->opr.op[0]);
printf("tjztL%03dn", lbl2 = lbl++);
ex(p->opr.op[1]);
printf("tjmptL%03dn", lbl1);
printf("L%03d:n", lbl2);
break;
case IF:
ex(p->opr.op[0]);
if (p->opr.nops > 2) {
/* if else */
printf("tjztL%03dn", lbl1 = lbl++);
ex(p->opr.op[1]);
printf("tjmptL%03dn", lbl2 = lbl++);
printf("L%03d:n", lbl1);
ex(p->opr.op[2]);
printf("L%03d:n", lbl2);
} else {
/* if */
printf("tjztL%03dn", lbl1 = lbl++);
ex(p->opr.op[1]);
printf("L%03d:n", lbl1);
}
break;
31
case PRINT:
ex(p->opr.op[0]);
printf("tprintn");
break;
case '=':
ex(p->opr.op[1]);
printf("tpopt%cn", p->opr.op[0]->id.i + 'a');
break;
case UMINUS:
ex(p->opr.op[0]);
printf("tnegn");
break;
default:
ex(p->opr.op[0]);
ex(p->opr.op[1]);
switch(p->opr.oper) {
case '+': printf("taddn"); break;
case '-': printf("tsubn"); break;
case '*': printf("tmuln"); break;
case '/': printf("tdivn"); break;
case '<': printf("tcompLTn"); break;
case '>': printf("tcompGTn"); break;
case GE: printf("tcompGEn"); break;
case LE: printf("tcompLEn"); break;
case NE: printf("tcompNEn"); break;
case EQ: printf("tcompEQn"); break;
}
}
}
}
32
5. More Lex
5.1 Strings
Quoted strings frequently appear in programming languages.
Here is one way to match a
string in lex:
%{
char *yylval;
#include <string.h>
%}
%%
"[^"n]*["n] {
yylval = strdup(yytext+1);
if (yylval[yyleng-2] != '"')
warning("improperly terminated string");
else
yylval[yyleng-2] = 0;
printf("found '%s'n", yylval);
}
The above example ensures that strings don’t cross line
boundaries, and removes
enclosing quotes. If we wish to add escape sequences, such as n
or ", start states
simplify matters:
%{
char buf[100];
char *s;
%}
%x STRING
%%
" { BEGIN STRING; s = buf; }
<STRING>n { *s++ = 'n'; }
<STRING>t { *s++ = 't'; }
<STRING>" { *s++ = '"'; }
<STRING>" {
*s = 0;
BEGIN 0;
printf("found '%s'n", buf);
}
<STRING>n { printf("invalid string"); exit(1); }
<STRING>. { *s++ = *yytext; }
Exclusive start state STRING is defined in the definition
section. When the scanner
detects a quote, the BEGIN macro shifts lex into the STRING
state. Lex stays in the
STRING state, recognizing only patterns that begin with
<STRING>, until another BEGIN
33
is executed. Thus, we have a mini-environment for scanning
strings. When the trailing
quote is recognized, we switch back to state 0, the initial state.
5.2 Reserved Words
If your program has a large collection of reserved words, it is
more efficient to let lex
simply match a string, and determine in your own code whether
it is a variable or
reserved word. For example, instead of coding
"if" return IF;
"then" return THEN;
"else" return ELSE;
{letter}({letter}|{digit})* {
yylval.id = symLookup(yytext);
return IDENTIFIER;
}
where symLookup returns an index into the symbol table, it is
better to detect reserved
words and identifiers simultaneously, as follows:
{letter}({letter}|{digit})* {
int i;
if ((i = resWord(yytext)) != 0)
return (i);
yylval.id = symLookup(yytext);
return (IDENTIFIER);
}
This technique significantly reduces the number of states
required, and results in smaller
scanner tables.
5.3 Debugging Lex
Lex has facilities that enable debugging. This feature may vary
with different versions of
lex, so you should consult documentation for details. The code
generated by lex in file
lex.yy.c includes debugging statements that are enabled by
specifying command-line
option “-d”. Debug output may be toggled on and off by setting
yy_flex_debug.
Output includes the rule applied and corresponding matched
text. If you’re running lex
and yacc together, specify the following in your yacc input file:
extern int yy_flex_debug;
int main(void) {
yy_flex_debug = 1;
yyparse();
}
34
Alternatively, you may write your own debug code by defining
functions that display
information for the token value, and each variant of the yylval
union. This is illustrated
in the following example. When DEBUG is defined, the debug
functions take effect, and a
trace of tokens and associated values is displayed.
%union {
int ivalue;
...
};
%{
#ifdef DEBUG
int dbgToken(int tok, char *s) {
printf("token %sn", s);
return tok;
}
int dbgTokenIvalue(int tok, char *s) {
printf("token %s (%d)n", s, yylval.ivalue);
return tok;
}
#define RETURN(x) return dbgToken(x, #x)
#define RETURN_ivalue(x) return dbgTokenIvalue(x, #x)
#else
#define RETURN(x) return(x)
#define RETURN_ivalue(x) return(x)
#endif
%}
%%
[0-9]+ {
yylval.ivalue = atoi(yytext);
RETURN_ivalue(INTEGER);
}
"if" RETURN(IF);
"else" RETURN(ELSE);
35
6. More Yacc
6.1 Recursion
When specifying a list, we may do so using left recursion,
list:
item
| list ',' item
;
or right recursion:
list:
item
| item ',' list
If right recursion is used, all items on the list are pushed on the
stack. After the last item
is pushed, we start reducing. With left recursion, we never have
more than three terms on
the stack, since we reduce as we go along. For this reason, it is
advantageous to use left
recursion.
6.2 If-Else Ambiguity
A shift-reduce conflict that frequently occurs involves the if-
else construct. Assume we
have the following rules:
stmt:
IF expr stmt
| IF expr stmt ELSE stmt
...
and the following state:
IF expr stmt IF expr stmt . ELSE stmt
We need to decide if we should shift the ELSE, or reduce the IF
expr stmt at the top
of the stack. If we shift, then we have
IF expr stmt IF expr stmt . ELSE stmt
IF expr stmt IF expr stmt ELSE . stmt
IF expr stmt IF expr stmt ELSE stmt .
IF expr stmt stmt .
where the second ELSE is paired with the second IF. If we
reduce, we have
36
IF expr stmt IF expr stmt . ELSE stmt
IF expr stmt stmt . ELSE stmt
IF expr stmt . ELSE stmt
IF expr stmt ELSE . stmt
IF expr stmt ELSE stmt .
where the second ELSE is paired with the first IF. Modern
programming languages pair
an ELSE with the most recent unpaired IF, so the former
behavior is expected. This
works well with yacc, since default behavior, when a shift-
reduce conflict is encountered,
is to shift.
Although yacc does the right thing, it also issues a shift-reduce
warning message. To
remove the message, give IF-ELSE a higher precedence than the
simple IF statement:
%nonassoc IFX
%nonassoc ELSE
stmt:
IF expr stmt %prec IFX
| IF expr stmt ELSE stmt
6.3 Error Messages
A nice compiler gives the user meaningful error messages. For
example, not much
information is conveyed by the following message:
syntax error
If we track the line number in lex, then we can at least give the
user a line number:
void yyerror(char *s) {
fprintf(stderr, "line %d: %sn", yylineno, s);
}
When yacc discovers a parsing error, default action is to call
yyerror, and then return
from yylex with a return value of one. A more graceful action
flushes the input stream
to a statement delimiter, and continues to scan:
stmt:
';'
| expr ';'
| PRINT expr ';'
| VARIABLE '=' expr ';
| WHILE '(' expr ')' stmt
| IF '(' expr ')' stmt %prec IFX
| IF '(' expr ')' stmt ELSE stmt
| '{' stmt_list '}'
| error ';'
| error '}'
;
37
The error token is a special feature of yacc that will match all
input until the token
following error is found. For this example, when yacc detects an
error in a statement it
will call yyerror, flush input up to the next semicolon or brace,
and resume scanning.
6.4 Inherited Attributes
The examples so far have used synthesized attributes. At any
point in a syntax tree we can
determine the attributes of a node based on the attributes of its
children. Consider the rule
expr: expr '+' expr { $$ = $1 + $3; }
Since we are parsing bottom-up, the values of both operands are
available, and we can
determine the value associated with the left-hand side. An
inherited attribute of a node
depends on the value of a parent or sibling node. The following
grammar defines a C
variable declaration:
decl: type varlist
type: INT | FLOAT
varlist:
VAR { setType($1, $0); }
| varlist ',' VAR { setType($3, $0); }
Here is a sample parse:
. INT VAR
INT . VAR
type . VAR
type VAR .
type varlist .
decl .
When we reduce VAR to varlist, we should annotate the symbol
table with the type of
the variable. However, the type is buried in the stack. This
problem is resolved by
indexing back into the stack. Recall that $1 designates the first
term on the right-hand
side. We can index backwards, using $0, $-1, and so on. In this
case, $0 will do just
fine. If you need to specify a token type, the syntax is
$<tokentype>0, angle brackets
included. In this particular example, care must be taken to
ensure that type always
precedes varlist.
6.5 Embedded Actions
Rules in yacc may contain embedded actions:
list: item1 { do_item1($1); } item2 { do_item2($3); } item3
Note that the actions take a slot in the stack, so do_item2 must
use $3 to reference
item2. Actually, this grammar is transformed by yacc into the
following:
38
list: item1 _rule01 item2 _rule02 item3
_rule01: { do_item1($0); }
_rule02: { do_item2($0); }
6.6 Debugging Yacc
Yacc has facilities that enable debugging. This feature may vary
with different versions
of yacc, so you should consult documentation for details. The
code generated by yacc in
file y.tab.c includes debugging statements that are enabled by
defining YYDEBUG and
setting it to a non-zero value. This may also be done by
specifying command-line option
“-t”. With YYDEBUG properly set, debug output may be
toggled on and off by setting
yydebug. Output includes tokens scanned and shift/reduce
actions.
%{
#define YYDEBUG 1
%}
%%
...
%%
int main(void) {
#if YYDEBUG
yydebug = 1;
#endif
yylex();
}
In addition, you can dump the parse states by specifying
command-line option "-v".
States are dumped to file y.output, and are often useful when
debugging a grammar.
Alternatively, you can write your own debug code by defining a
TRACE macro, as
illustrated below. When DEBUG is defined, a trace of
reductions, by line number, is
displayed.
%{
#ifdef DEBUG
#define TRACE printf("reduce at line %dn", __LINE__);
#else
#define TRACE
#endif
%}
%%
statement_list:
statement
{ TRACE $$ = $1; }
| statement_list statement
{ TRACE $$ = newNode(';', 2, $1, $2); }
;
39
7. Bibliography
Aho, Alfred V., Ravi Sethi and Jeffrey D. Ullman [1986].
Compilers, Prinicples,
Techniques and Tools. Addison-Wesley, Reading,
Massachusetts.
Gardner, Jim, Chris Retterath and Eric Gisin [1988]. MKS Lex
& Yacc. Mortice Kern
Systems Inc., Waterloo, Ontario, Canada.
Johnson, Stephen C. [1975]. Yacc: Yet Another Compiler
Compiler. Computing Science
Technical Report No. 32, Bell Laboratories, Murray hill, New
Jersey.
Lesk, M. E. and E. Schmidt [1975]. Lex – A Lexical Analyzer
Generator. Computing
Science Technical Report No. 39, Bell Laboratories, Murray
Hill, New Jersey.
Levine, John R., Tony Mason and Doug Brown [1992]. Lex &
Yacc. O’Reilly &
Associates, Inc. Sebastopol, California.
http://www.amazon.com/exec/obidos/ISBN=0201100886/none01
A/
http://www.amazon.com/exec/obidos/ISBN=0201100886/none01
A/
http://members.xoom.com/thomasn/y_yacc.pdf
http://members.xoom.com/thomasn/y_lex.pdf
http://www.amazon.com/exec/obidos/ISBN=1565920007/none01
A/PrefaceIntroductionLexTheoryPracticeYaccTheoryPractice,
Part IPractice, Part IICalculatorDescriptionInclude FileLex
InputYacc InputInterpreterCompilerMore LexStringsReserved
WordsDebugging LexMore YaccRecursionIf-Else
AmbiguityError MessagesInherited AttributesEmbedded
ActionsDebugging YaccBibliography
cmparser.y
%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "ast.h"
#include "symbolTable.h"
#include "util.h"
/* other external function prototypes */
extern int yylex();
extern int initLex(int , char **);
extern AstNodePtr new_Node(NodeKind kind);
/* external global variables */
extern int yydebug;
extern int yylineno;
extern SymbolTableStackEntryPtr symbolStackTop;
extern int scopeDepth;
/* function prototypes */
void yyerror(const char *);
/* global variables */
AstNodePtr program;
FILE *outfile;
%}
/* YYSTYPE */
%union
{
AstNodePtr nodePtr;
int iVal;
char *cVal;
Type *type;
}
/* terminals */
%token TOK_ELSE TOK_IF TOK_RETURN TOK_VOID
TOK_INT TOK_WHILE
%token TOK_PLUS TOK_MINUS TOK_MULT TOK_DIV
TOK_LT TOK_LE TOK_GT TOK_GE TOK_EQ TOK_NE
TOK_ASSIGN TOK_SEMI TOK_COMMA
%token TOK_LPAREN TOK_RPAREN TOK_LSQ TOK_RSQ
TOK_LBRACE TOK_RBRACE TOK_ERROR
%token <cVal> TOK_ID
%token <iVal> TOK_NUM
%type <nodePtr> Declarations Functions Fun_Declaration
%type <type> Type_Specifier
%type <nodePtr> Compound_Stmt Statements Statement
%type <nodePtr> Expr_Statement If_Else_Statement
Selection_Stmt Iteration_Stmt Return_Stmt
%type <nodePtr> Expression Simple_Expression
Additive_Expression Factor Var Call Term
%type <nodePtr> Args Args_List
%type <nodePtr> Params Param_List Param
/* associativity and precedence */
%nonassoc TOK_IF
%nonassoc TOK_ELSE
%right TOK_ASSIGN
%left TOK_EQ TOK_NE
%nonassoc TOK_LT TOK_GT TOK_LE TOK_GE
%left TOK_PLUS TOK_SUB
%left TOK_MULT TOK_DIV
%nonassoc error
%%
Start : Declarations {}
;
Declarations : Functions { program = $1;}
| Var_Declaration Declarations { }
;
Functions : Fun_Declaration { $$ = $1 }
| Fun_Declaration Functions {
$1->sibling = $2;
$$ = $1
}
;
Var_Declaration : Type_Specifier TOK_ID TOK_SEMI {
if ($1->kind != INT){
yyerror("non-int variable
declaredn");
yyerror($2);
}
ElementPtr symElement =
symLookup($2);
if(!symElement ||
scopeDepth != symElement->scope)
symInsert($2,$1,yylineno);
else
{
yyerror("Redeclaration
of variable");
yyerror($2);
free($1);
}
}
| Type_Specifier TOK_ID TOK_LSQ TOK_NUM
TOK_RSQ TOK_SEMI {
if ($1->kind
!= INT){
yyerror("non-
int variable declaredn");
yyerror($2);
}
ElementPtr
symElement = symLookup($2);
if(!symElement
|| scopeDepth != symElement->scope)
{ /* reuse
Type for storing dimension */
$1->kind =
ARRAY;
$1-
>dimension = $4;
$1->function
= NULL;
symInsert($2,$1,yylineno);
}
else
{
yyerror("Redeclaration of variable");
yyerror($2);
free($1);
}
}
;
Fun_Declaration : Type_Specifier TOK_ID TOK_LPAREN {
$<nodePtr>$ =
new_Node(METHOD);
$<nodePtr>$->nLinenumber
= yylineno;
$<nodePtr>$->nType = $1;
ElementPtr symElement =
symLookup($2);
if(!symElement ||
scopeDepth != symElement->scope)
{
$<nodePtr>$-
>nSymbolPtr = symInsert($2,$1,yylineno);
$<nodePtr>$-
>nSymbolPtr->stype->function = (Type*) malloc(sizeof(Type));
$<nodePtr>$-
>nSymbolPtr->stype->function->kind = $1->kind;
$<nodePtr>$-
>nSymbolPtr->snode = $<nodePtr>$;
$1->kind =
FUNCTION;
enterScope();
}
else
{
yyerror("Redeclaration
of function");
yyerror($2);
free($1);
}
}
Params TOK_RPAREN {
$<nodePtr>4->children[0] =
$5;
}
Compound_Stmt {
$<nodePtr>4->children[1] =
$8;
$$ = $<nodePtr>4;
leaveScope();
}
;
Params : Param_List {$$ = $1}
| TOK_VOID {$$=NULL;}
;
Param_List : Param_List TOK_COMMA Param {
$<nodePtr>$ = $1;
while($<nodePtr>$->sibling)
$<nodePtr>$ =
$<nodePtr>$->sibling;
$<nodePtr>$->sibling = $3;
$3->sibling = NULL;
$$ = $1;
}
| Param {
$1->sibling = NULL;
$$ = $1;
}
;
Param : Type_Specifier TOK_ID {
ElementPtr symElement =
symLookup($2);
if(!symElement || scopeDepth !=
symElement->scope)
{
$$ = new_Node(FORMALVAR);
$$->nLinenumber = yylineno;
if ($1->kind != INT){
yyerror("non-int variable
declaredn");
}
else{
$$->nSymbolPtr =
symInsert($2,$1,yylineno);
$$->nType = $1;
}
}
else
{
yyerror("Redeclaration of
variable");
yyerror($2);
free($1);
}
}
| Type_Specifier TOK_ID TOK_LSQ TOK_RSQ {
ElementPtr symElement =
symLookup($2);
if(!symElement ||
scopeDepth != symElement->scope)
{
Type *typ;
if ($1->kind != INT){
yyerror("non-int
array declaredn");
}
typ =
new_type(ARRAY);
$$ =
new_Node(FORMALVAR);
$$->nLinenumber =
yylineno;
$$->nSymbolPtr =
symInsert($2,typ,yylineno);
$$->nType = typ;
free($1);
}
else
{
yyerror("Redeclaration
of variablen");
yyerror($2);
free($1);
}
}
;
Type_Specifier : TOK_INT {
$$ = (Type*) new_type(INT);
}
| TOK_VOID {
$$ = (Type*) new_type(VOID);
}
;
Compound_Stmt : TOK_LBRACE {
enterScope();
$<nodePtr>$ =
new_StmtNode(COMPOUND_STMT);
$<nodePtr>$->nSymbolTabPtr =
symbolStackTop->symbolTablePtr;
$<nodePtr>$->nLinenumber = yylineno;
}
Statements { $<nodePtr>2->children[0] = $3; } //
shud point to first statement
TOK_RBRACE {
leaveScope();
$$ = $<nodePtr>2;
}
| TOK_LBRACE {
enterScope();
$<nodePtr>$ =
new_StmtNode(COMPOUND_STMT);
$<nodePtr>$->nSymbolTabPtr =
symbolStackTop->symbolTablePtr;
$<nodePtr>$->nLinenumber = yylineno;
}
Local_Declarations Statements { $<nodePtr>2-
>children[0] = $4;} // shud point to first statement
TOK_RBRACE {
leaveScope();
$$ = $<nodePtr>2;
}
;
Local_Declarations : Var_Declaration Local_Declarations {}
| Var_Declaration {}
;
Statements : Statement Statements {
$1->sibling = $2;
$$ = $1;
}
| {$$=NULL}
;
Statement : Expr_Statement {
$$ = $1;
}
| Compound_Stmt {
$$ = $1;
}
| Selection_Stmt { $$ = $1; }
| Iteration_Stmt {
$$ = $1;
}
| Return_Stmt { $$ = $1; }
;
Expr_Statement : Expression TOK_SEMI {
$$ =
new_StmtNode(EXPRESSION_STMT);
$$->nLinenumber = yylineno;
$$->children[0] = $1;
}
| TOK_SEMI {
$$ =
new_StmtNode(EXPRESSION_STMT) ;
$$->nLinenumber = yylineno;
$$->children[0] = NULL;
}
;
Selection_Stmt : If_Else_Statement %prec TOK_IF {
$$ = $1;
}
| If_Else_Statement TOK_ELSE Statement {
$$ = $1;
$$->children[2]= $3;
}
;
If_Else_Statement : TOK_IF TOK_LPAREN Expression {
$<nodePtr>$ =
new_StmtNode(IF_THEN_ELSE_STMT);
$<nodePtr>$->nLinenumber
= yylineno;
$<nodePtr>$->children[0] =
$3;
}
TOK_RPAREN Statement {
$<nodePtr>4->children[1] =
$6;
$$ = $<nodePtr>4;
}
;
Iteration_Stmt : TOK_WHILE TOK_LPAREN Expression {
$<nodePtr>$ =
new_StmtNode(WHILE_STMT);
$<nodePtr>$->nLinenumber
= yylineno;
$<nodePtr>$->children[0] =
$3;
}
TOK_RPAREN Statement {
$<nodePtr>4->children[1] =
$6;
$$ = $<nodePtr>4;
$<nodePtr>$->nLinenumber
= yylineno;
}
;
Return_Stmt : TOK_RETURN Expression TOK_SEMI {
$$ =
new_StmtNode(RETURN_STMT);
$$->nLinenumber = yylineno;
$$->children[0] = $2;
}
| TOK_RETURN TOK_SEMI {
$$ =
new_StmtNode(RETURN_STMT);
$$->nLinenumber = yylineno;
$$->children[0] = NULL;
}
;
Expression : Var TOK_ASSIGN {
$<nodePtr>$ =
new_ExprNode(ASSI_EXP);
$<nodePtr>$->nLinenumber = yylineno;
$<nodePtr>$->children[0] = $1;
}
Expression {
$<nodePtr>3->children[1] = $4;
$$ = $<nodePtr>3;
}
| Simple_Expression {
$$ = $1;
}
;
Var : TOK_ID {
$$ = new_ExprNode(VAR_EXP);
$$->nLinenumber = yylineno;
$$->nSymbolPtr = symLookup($1);
if(!$$->nSymbolPtr)
{
yyerror("Variable must be declared before use");
yyerror($1);
free($$);
}
}
| TOK_ID TOK_LSQ Expression TOK_RSQ {
$$ = new_ExprNode(ARRAY_EXP);
$$->nLinenumber = yylineno;
$$->nSymbolPtr = symLookup($1);
if(!$$->nSymbolPtr)
{
yyerror("Variable must be
declared before use");
yyerror($1);
free($$);
}
else
$$->children[0] = $3;
}
;
Simple_Expression : Additive_Expression TOK_GT {
$<nodePtr>$ =
new_ExprNode(GT_EXP);
$<nodePtr>$->nLinenumber
= yylineno;
$<nodePtr>$->children[0] =
$1;
}
Additive_Expression {
$<nodePtr>3->children[1] = $4;
$$ = $<nodePtr>3;
}
| Additive_Expression TOK_LT {
$<nodePtr>$ =
new_ExprNode(LT_EXP);
$<nodePtr>$->nLinenumber
= yylineno;
$<nodePtr>$->children[0] =
$1;
}
Additive_Expression {
$<nodePtr>3->children[1] = $4;
$$ = $<nodePtr>3;
}
| Additive_Expression TOK_GE {
$<nodePtr>$ =
new_ExprNode(GE_EXP);
$<nodePtr>$->nLinenumber
= yylineno;
$<nodePtr>$->children[0] =
$1;
}
Additive_Expression {
$<nodePtr>3->children[1] = $4;
$$ = $<nodePtr>3;
}
| Additive_Expression TOK_LE {
$<nodePtr>$ =
new_ExprNode(LE_EXP);
$<nodePtr>$->nLinenumber
= yylineno;
$<nodePtr>$->children[0] =
$1;
}
Additive_Expression {
$<nodePtr>3->children[1] = $4;
$$ = $<nodePtr>3;
}
| Additive_Expression TOK_EQ {
$<nodePtr>$ =
new_ExprNode(EQ_EXP);
$<nodePtr>$->nLinenumber
= yylineno;
$<nodePtr>$->children[0] =
$1;
}
Additive_Expression {
$<nodePtr>3->children[1] = $4;
$$ = $<nodePtr>3;
}
| Additive_Expression TOK_NE {
$<nodePtr>$ =
new_ExprNode(NE_EXP);
$<nodePtr>$->nLinenumber
= yylineno;
$<nodePtr>$->children[0] =
$1;
}
Additive_Expression {
$<nodePtr>3->children[1] = $4;
$$ = $<nodePtr>3;
}
| Additive_Expression { $$ = $1 }
;
Additive_Expression : Additive_Expression TOK_PLUS {
$<nodePtr>$ =
new_ExprNode(ADD_EXP);
$<nodePtr>$->nLinenumber
= yylineno;
$<nodePtr>$->children[0] =
$1;
}
Term {
$<nodePtr>3->children[1] = $4;
$$ = $<nodePtr>3;
}
| Additive_Expression TOK_MINUS {
$<nodePtr>$ =
new_ExprNode(SUB_EXP);
$<nodePtr>$->nLinenumber
= yylineno;
$<nodePtr>$->children[0] =
$1;
}
Term {
$<nodePtr>3->children[1] = $4;
$$ = $<nodePtr>3;
}
| Term {
$$ = $1;
}
;
Term : Term TOK_MULT {
$<nodePtr>$ = new_ExprNode(MULT_EXP);
$<nodePtr>$->nLinenumber = yylineno;
$<nodePtr>$->children[0] = $1;
}
Factor {
$<nodePtr>3->children[1] = $4;
$$ = $<nodePtr>3;
}
| Term TOK_DIV {
$<nodePtr>$ = new_ExprNode(DIV_EXP);
$<nodePtr>$->nLinenumber = yylineno;
$<nodePtr>$->children[0] = $1;
}
Factor {
$<nodePtr>3->children[1] = $4;
$$ = $<nodePtr>3;
}
| Factor {
$$ = $1;
}
;
Factor : TOK_LPAREN Expression TOK_RPAREN { $$ = $2; }
| Var { $$ = $1 }
| Call { $$ = $1 }
| TOK_NUM {
$$ = new_ExprNode(CONST_EXP);
$$->nLinenumber = yylineno;
$$->nValue = $1;
}
;
Call : TOK_ID TOK_LPAREN Args TOK_RPAREN {
$$=
new_ExprNode(CALL_EXP);
$$->nLinenumber = yylineno;
$$->nSymbolPtr =
symLookup($1);
if(!$$->nSymbolPtr)
{
printf("function %s not found,
forward referencen", $1);
$$->fname = strdup($1);
}
$$->children[0] = $3;
}
;
Args : Args_List {$$=$1;}
| { $$ = NULL; }
;
Args_List : Args_List TOK_COMMA Expression
{
$<nodePtr>$ = $1;
while($<nodePtr>$->sibling)
$<nodePtr>$ =
$<nodePtr>$->sibling;
$<nodePtr>$->sibling = $3;
$3->sibling = NULL;
$$ = $1;
}
| Expression {
$1->sibling = NULL;
$$ = $1;
}
;
%%
void yyerror (char const *s) {
fprintf (stderr, "Error at line %d: %sn", yylineno, s);
exit(1);
}
int main(int argc, char **argv){
initLex(argc,argv);
#ifdef YYLLEXER
while (gettok() !=0) ; //gettok returns 0 on EOF
return;
#else
initSymbolTable();
yyparse();
#endif
}
ast.c
#include "symbolTable.h"
#include "ast.h"
extern SymbolTableStackEntryPtr symbolStackTop;
extern int scopeDepth;
//creates a new expression node
AstNodePtr new_ExprNode(ExpKind kind)
{
AstNodePtr node = (AstNodePtr) malloc(sizeof(AstNode));
node->nKind = EXPRESSION;
node->eKind = kind;
return node;
}
//creates a new statement node
AstNodePtr new_StmtNode(StmtKind kind)
{
AstNodePtr node = (AstNodePtr) malloc(sizeof(AstNode));
node->nKind = STMT;
node->sKind = kind;
return node;
}
AstNodePtr new_Node(NodeKind kind)
{
AstNodePtr node = (AstNodePtr) malloc(sizeof(AstNode));
node->nKind = kind;
return node;
}
//creates a new type node for entry into symbol table
Type* new_type(TypeKind kind)
{
Type *typ1 = (Type *)malloc(sizeof(Type));
typ1->kind = kind;
return typ1;
}

More Related Content

Similar to AST Parser for C-like Language

4 operators, expressions &amp; statements
4  operators, expressions &amp; statements4  operators, expressions &amp; statements
4 operators, expressions &amp; statementsMomenMostafa
 
Dti2143 chap 4 control statement part 2
Dti2143 chap 4 control statement part 2Dti2143 chap 4 control statement part 2
Dti2143 chap 4 control statement part 2alish sha
 
java compilerCompiler1.javajava compilerCompiler1.javaimport.docx
java compilerCompiler1.javajava compilerCompiler1.javaimport.docxjava compilerCompiler1.javajava compilerCompiler1.javaimport.docx
java compilerCompiler1.javajava compilerCompiler1.javaimport.docxpriestmanmable
 
Php Crash Course
Php Crash CoursePhp Crash Course
Php Crash Coursemussawir20
 
COMP360 Assembler Write an assembler that reads the source code of an.pdf
COMP360 Assembler Write an assembler that reads the source code of an.pdfCOMP360 Assembler Write an assembler that reads the source code of an.pdf
COMP360 Assembler Write an assembler that reads the source code of an.pdffazalenterprises
 
Liferay Training Struts Portlet
Liferay Training Struts PortletLiferay Training Struts Portlet
Liferay Training Struts PortletSaikrishna Basetti
 
Compiler Design Lab File
Compiler Design Lab FileCompiler Design Lab File
Compiler Design Lab FileKandarp Tiwari
 
Concepts of C [Module 2]
Concepts of C [Module 2]Concepts of C [Module 2]
Concepts of C [Module 2]Abhishek Sinha
 
You should be able to use the same function to check a narrowing fun.pdf
You should be able to use the same function to check a narrowing fun.pdfYou should be able to use the same function to check a narrowing fun.pdf
You should be able to use the same function to check a narrowing fun.pdfsayandas941
 
PHP7 - Scalar Type Hints & Return Types
PHP7 - Scalar Type Hints & Return TypesPHP7 - Scalar Type Hints & Return Types
PHP7 - Scalar Type Hints & Return TypesEric Poe
 
Php Reusing Code And Writing Functions
Php Reusing Code And Writing FunctionsPhp Reusing Code And Writing Functions
Php Reusing Code And Writing Functionsmussawir20
 
Itsecteam shell
Itsecteam shellItsecteam shell
Itsecteam shellady36
 

Similar to AST Parser for C-like Language (20)

4 operators, expressions &amp; statements
4  operators, expressions &amp; statements4  operators, expressions &amp; statements
4 operators, expressions &amp; statements
 
2 data and c
2 data and c2 data and c
2 data and c
 
Dti2143 chap 4 control statement part 2
Dti2143 chap 4 control statement part 2Dti2143 chap 4 control statement part 2
Dti2143 chap 4 control statement part 2
 
java compilerCompiler1.javajava compilerCompiler1.javaimport.docx
java compilerCompiler1.javajava compilerCompiler1.javaimport.docxjava compilerCompiler1.javajava compilerCompiler1.javaimport.docx
java compilerCompiler1.javajava compilerCompiler1.javaimport.docx
 
Flow of control ppt
Flow of control pptFlow of control ppt
Flow of control ppt
 
Php Crash Course
Php Crash CoursePhp Crash Course
Php Crash Course
 
COMP360 Assembler Write an assembler that reads the source code of an.pdf
COMP360 Assembler Write an assembler that reads the source code of an.pdfCOMP360 Assembler Write an assembler that reads the source code of an.pdf
COMP360 Assembler Write an assembler that reads the source code of an.pdf
 
Gps c
Gps cGps c
Gps c
 
Liferay Training Struts Portlet
Liferay Training Struts PortletLiferay Training Struts Portlet
Liferay Training Struts Portlet
 
Compiler Design Lab File
Compiler Design Lab FileCompiler Design Lab File
Compiler Design Lab File
 
Concepts of C [Module 2]
Concepts of C [Module 2]Concepts of C [Module 2]
Concepts of C [Module 2]
 
You should be able to use the same function to check a narrowing fun.pdf
You should be able to use the same function to check a narrowing fun.pdfYou should be able to use the same function to check a narrowing fun.pdf
You should be able to use the same function to check a narrowing fun.pdf
 
Ch4c
Ch4cCh4c
Ch4c
 
PHP7 - Scalar Type Hints & Return Types
PHP7 - Scalar Type Hints & Return TypesPHP7 - Scalar Type Hints & Return Types
PHP7 - Scalar Type Hints & Return Types
 
Stack prgs
Stack prgsStack prgs
Stack prgs
 
Php Reusing Code And Writing Functions
Php Reusing Code And Writing FunctionsPhp Reusing Code And Writing Functions
Php Reusing Code And Writing Functions
 
Itsecteam shell
Itsecteam shellItsecteam shell
Itsecteam shell
 
Unit2 C
Unit2 C Unit2 C
Unit2 C
 
Unit2 C
Unit2 CUnit2 C
Unit2 C
 
Quiz using C++
Quiz using C++Quiz using C++
Quiz using C++
 

More from wkyra78

Melissa HinkhouseWeek 3-Original PostNURS 6050 Policy and A.docx
Melissa HinkhouseWeek 3-Original PostNURS 6050 Policy and A.docxMelissa HinkhouseWeek 3-Original PostNURS 6050 Policy and A.docx
Melissa HinkhouseWeek 3-Original PostNURS 6050 Policy and A.docxwkyra78
 
Melissa HinkhouseAdvanced Pharmacology NURS-6521N-43Professo.docx
Melissa HinkhouseAdvanced Pharmacology NURS-6521N-43Professo.docxMelissa HinkhouseAdvanced Pharmacology NURS-6521N-43Professo.docx
Melissa HinkhouseAdvanced Pharmacology NURS-6521N-43Professo.docxwkyra78
 
Meiner, S. E., & Yeager, J. J. (2019). Chapter 17Chap.docx
Meiner, S. E., & Yeager, J. J. (2019).    Chapter 17Chap.docxMeiner, S. E., & Yeager, J. J. (2019).    Chapter 17Chap.docx
Meiner, S. E., & Yeager, J. J. (2019). Chapter 17Chap.docxwkyra78
 
member is a security software architect in a cloud service provider .docx
member is a security software architect in a cloud service provider .docxmember is a security software architect in a cloud service provider .docx
member is a security software architect in a cloud service provider .docxwkyra78
 
Melissa ShortridgeWeek 6COLLAPSEMy own attitude has ch.docx
Melissa ShortridgeWeek 6COLLAPSEMy own attitude has ch.docxMelissa ShortridgeWeek 6COLLAPSEMy own attitude has ch.docx
Melissa ShortridgeWeek 6COLLAPSEMy own attitude has ch.docxwkyra78
 
Melissa is a 15-year-old high school student. Over the last week.docx
Melissa is a 15-year-old high school student. Over the last week.docxMelissa is a 15-year-old high school student. Over the last week.docx
Melissa is a 15-year-old high school student. Over the last week.docxwkyra78
 
Measurement  of  the  angle  θ          .docx
Measurement  of  the  angle  θ          .docxMeasurement  of  the  angle  θ          .docx
Measurement  of  the  angle  θ          .docxwkyra78
 
Measurement of the angle θ For better understanding .docx
Measurement of the angle θ     For better understanding .docxMeasurement of the angle θ     For better understanding .docx
Measurement of the angle θ For better understanding .docxwkyra78
 
Meaning-Making Forum 2 (Week 5)Meaning-Making Forums 1-4 are thi.docx
Meaning-Making Forum 2 (Week 5)Meaning-Making Forums 1-4 are thi.docxMeaning-Making Forum 2 (Week 5)Meaning-Making Forums 1-4 are thi.docx
Meaning-Making Forum 2 (Week 5)Meaning-Making Forums 1-4 are thi.docxwkyra78
 
MBA6231 - 1.1 - project charter.docxProject Charter Pr.docx
MBA6231 - 1.1 - project charter.docxProject Charter Pr.docxMBA6231 - 1.1 - project charter.docxProject Charter Pr.docx
MBA6231 - 1.1 - project charter.docxProject Charter Pr.docxwkyra78
 
Medication Errors Led to Disastrous Outcomes1. Search th.docx
Medication Errors Led to Disastrous Outcomes1. Search th.docxMedication Errors Led to Disastrous Outcomes1. Search th.docx
Medication Errors Led to Disastrous Outcomes1. Search th.docxwkyra78
 
Meet, call, Skype or Zoom with a retired athlete and interview himh.docx
Meet, call, Skype or Zoom with a retired athlete and interview himh.docxMeet, call, Skype or Zoom with a retired athlete and interview himh.docx
Meet, call, Skype or Zoom with a retired athlete and interview himh.docxwkyra78
 
Medication Administration Make a list of the most common med.docx
Medication Administration Make a list of the most common med.docxMedication Administration Make a list of the most common med.docx
Medication Administration Make a list of the most common med.docxwkyra78
 
media portfolio”about chapter 1 to 15 from the book  Ci.docx
media portfolio”about chapter 1 to 15 from the book  Ci.docxmedia portfolio”about chapter 1 to 15 from the book  Ci.docx
media portfolio”about chapter 1 to 15 from the book  Ci.docxwkyra78
 
MediationNameAMUDate.docx
MediationNameAMUDate.docxMediationNameAMUDate.docx
MediationNameAMUDate.docxwkyra78
 
Media coverage influences the publics perception of the crimina.docx
Media coverage influences the publics perception of the crimina.docxMedia coverage influences the publics perception of the crimina.docx
Media coverage influences the publics perception of the crimina.docxwkyra78
 
Media Content AnalysisPurpose Evaluate the quality and value of.docx
Media Content AnalysisPurpose Evaluate the quality and value of.docxMedia Content AnalysisPurpose Evaluate the quality and value of.docx
Media Content AnalysisPurpose Evaluate the quality and value of.docxwkyra78
 
Mayan gods and goddesses are very much a part of this text.  Their i.docx
Mayan gods and goddesses are very much a part of this text.  Their i.docxMayan gods and goddesses are very much a part of this text.  Their i.docx
Mayan gods and goddesses are very much a part of this text.  Their i.docxwkyra78
 
Media and SocietyIn 1,100 words, complete the followingAn.docx
Media and SocietyIn 1,100 words, complete the followingAn.docxMedia and SocietyIn 1,100 words, complete the followingAn.docx
Media and SocietyIn 1,100 words, complete the followingAn.docxwkyra78
 
MBA 5110 – Business Organization and ManagementMidterm ExamAns.docx
MBA 5110 – Business Organization and ManagementMidterm ExamAns.docxMBA 5110 – Business Organization and ManagementMidterm ExamAns.docx
MBA 5110 – Business Organization and ManagementMidterm ExamAns.docxwkyra78
 

More from wkyra78 (20)

Melissa HinkhouseWeek 3-Original PostNURS 6050 Policy and A.docx
Melissa HinkhouseWeek 3-Original PostNURS 6050 Policy and A.docxMelissa HinkhouseWeek 3-Original PostNURS 6050 Policy and A.docx
Melissa HinkhouseWeek 3-Original PostNURS 6050 Policy and A.docx
 
Melissa HinkhouseAdvanced Pharmacology NURS-6521N-43Professo.docx
Melissa HinkhouseAdvanced Pharmacology NURS-6521N-43Professo.docxMelissa HinkhouseAdvanced Pharmacology NURS-6521N-43Professo.docx
Melissa HinkhouseAdvanced Pharmacology NURS-6521N-43Professo.docx
 
Meiner, S. E., & Yeager, J. J. (2019). Chapter 17Chap.docx
Meiner, S. E., & Yeager, J. J. (2019).    Chapter 17Chap.docxMeiner, S. E., & Yeager, J. J. (2019).    Chapter 17Chap.docx
Meiner, S. E., & Yeager, J. J. (2019). Chapter 17Chap.docx
 
member is a security software architect in a cloud service provider .docx
member is a security software architect in a cloud service provider .docxmember is a security software architect in a cloud service provider .docx
member is a security software architect in a cloud service provider .docx
 
Melissa ShortridgeWeek 6COLLAPSEMy own attitude has ch.docx
Melissa ShortridgeWeek 6COLLAPSEMy own attitude has ch.docxMelissa ShortridgeWeek 6COLLAPSEMy own attitude has ch.docx
Melissa ShortridgeWeek 6COLLAPSEMy own attitude has ch.docx
 
Melissa is a 15-year-old high school student. Over the last week.docx
Melissa is a 15-year-old high school student. Over the last week.docxMelissa is a 15-year-old high school student. Over the last week.docx
Melissa is a 15-year-old high school student. Over the last week.docx
 
Measurement  of  the  angle  θ          .docx
Measurement  of  the  angle  θ          .docxMeasurement  of  the  angle  θ          .docx
Measurement  of  the  angle  θ          .docx
 
Measurement of the angle θ For better understanding .docx
Measurement of the angle θ     For better understanding .docxMeasurement of the angle θ     For better understanding .docx
Measurement of the angle θ For better understanding .docx
 
Meaning-Making Forum 2 (Week 5)Meaning-Making Forums 1-4 are thi.docx
Meaning-Making Forum 2 (Week 5)Meaning-Making Forums 1-4 are thi.docxMeaning-Making Forum 2 (Week 5)Meaning-Making Forums 1-4 are thi.docx
Meaning-Making Forum 2 (Week 5)Meaning-Making Forums 1-4 are thi.docx
 
MBA6231 - 1.1 - project charter.docxProject Charter Pr.docx
MBA6231 - 1.1 - project charter.docxProject Charter Pr.docxMBA6231 - 1.1 - project charter.docxProject Charter Pr.docx
MBA6231 - 1.1 - project charter.docxProject Charter Pr.docx
 
Medication Errors Led to Disastrous Outcomes1. Search th.docx
Medication Errors Led to Disastrous Outcomes1. Search th.docxMedication Errors Led to Disastrous Outcomes1. Search th.docx
Medication Errors Led to Disastrous Outcomes1. Search th.docx
 
Meet, call, Skype or Zoom with a retired athlete and interview himh.docx
Meet, call, Skype or Zoom with a retired athlete and interview himh.docxMeet, call, Skype or Zoom with a retired athlete and interview himh.docx
Meet, call, Skype or Zoom with a retired athlete and interview himh.docx
 
Medication Administration Make a list of the most common med.docx
Medication Administration Make a list of the most common med.docxMedication Administration Make a list of the most common med.docx
Medication Administration Make a list of the most common med.docx
 
media portfolio”about chapter 1 to 15 from the book  Ci.docx
media portfolio”about chapter 1 to 15 from the book  Ci.docxmedia portfolio”about chapter 1 to 15 from the book  Ci.docx
media portfolio”about chapter 1 to 15 from the book  Ci.docx
 
MediationNameAMUDate.docx
MediationNameAMUDate.docxMediationNameAMUDate.docx
MediationNameAMUDate.docx
 
Media coverage influences the publics perception of the crimina.docx
Media coverage influences the publics perception of the crimina.docxMedia coverage influences the publics perception of the crimina.docx
Media coverage influences the publics perception of the crimina.docx
 
Media Content AnalysisPurpose Evaluate the quality and value of.docx
Media Content AnalysisPurpose Evaluate the quality and value of.docxMedia Content AnalysisPurpose Evaluate the quality and value of.docx
Media Content AnalysisPurpose Evaluate the quality and value of.docx
 
Mayan gods and goddesses are very much a part of this text.  Their i.docx
Mayan gods and goddesses are very much a part of this text.  Their i.docxMayan gods and goddesses are very much a part of this text.  Their i.docx
Mayan gods and goddesses are very much a part of this text.  Their i.docx
 
Media and SocietyIn 1,100 words, complete the followingAn.docx
Media and SocietyIn 1,100 words, complete the followingAn.docxMedia and SocietyIn 1,100 words, complete the followingAn.docx
Media and SocietyIn 1,100 words, complete the followingAn.docx
 
MBA 5110 – Business Organization and ManagementMidterm ExamAns.docx
MBA 5110 – Business Organization and ManagementMidterm ExamAns.docxMBA 5110 – Business Organization and ManagementMidterm ExamAns.docx
MBA 5110 – Business Organization and ManagementMidterm ExamAns.docx
 

Recently uploaded

Roles & Responsibilities in Pharmacovigilance
Roles & Responsibilities in PharmacovigilanceRoles & Responsibilities in Pharmacovigilance
Roles & Responsibilities in PharmacovigilanceSamikshaHamane
 
Incoming and Outgoing Shipments in 1 STEP Using Odoo 17
Incoming and Outgoing Shipments in 1 STEP Using Odoo 17Incoming and Outgoing Shipments in 1 STEP Using Odoo 17
Incoming and Outgoing Shipments in 1 STEP Using Odoo 17Celine George
 
Framing an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdf
Framing an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdfFraming an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdf
Framing an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdfUjwalaBharambe
 
Full Stack Web Development Course for Beginners
Full Stack Web Development Course  for BeginnersFull Stack Web Development Course  for Beginners
Full Stack Web Development Course for BeginnersSabitha Banu
 
MARGINALIZATION (Different learners in Marginalized Group
MARGINALIZATION (Different learners in Marginalized GroupMARGINALIZATION (Different learners in Marginalized Group
MARGINALIZATION (Different learners in Marginalized GroupJonathanParaisoCruz
 
Painted Grey Ware.pptx, PGW Culture of India
Painted Grey Ware.pptx, PGW Culture of IndiaPainted Grey Ware.pptx, PGW Culture of India
Painted Grey Ware.pptx, PGW Culture of IndiaVirag Sontakke
 
EPANDING THE CONTENT OF AN OUTLINE using notes.pptx
EPANDING THE CONTENT OF AN OUTLINE using notes.pptxEPANDING THE CONTENT OF AN OUTLINE using notes.pptx
EPANDING THE CONTENT OF AN OUTLINE using notes.pptxRaymartEstabillo3
 
Capitol Tech U Doctoral Presentation - April 2024.pptx
Capitol Tech U Doctoral Presentation - April 2024.pptxCapitol Tech U Doctoral Presentation - April 2024.pptx
Capitol Tech U Doctoral Presentation - April 2024.pptxCapitolTechU
 
Organic Name Reactions for the students and aspirants of Chemistry12th.pptx
Organic Name Reactions  for the students and aspirants of Chemistry12th.pptxOrganic Name Reactions  for the students and aspirants of Chemistry12th.pptx
Organic Name Reactions for the students and aspirants of Chemistry12th.pptxVS Mahajan Coaching Centre
 
CARE OF CHILD IN INCUBATOR..........pptx
CARE OF CHILD IN INCUBATOR..........pptxCARE OF CHILD IN INCUBATOR..........pptx
CARE OF CHILD IN INCUBATOR..........pptxGaneshChakor2
 
Pharmacognosy Flower 3. Compositae 2023.pdf
Pharmacognosy Flower 3. Compositae 2023.pdfPharmacognosy Flower 3. Compositae 2023.pdf
Pharmacognosy Flower 3. Compositae 2023.pdfMahmoud M. Sallam
 
Hierarchy of management that covers different levels of management
Hierarchy of management that covers different levels of managementHierarchy of management that covers different levels of management
Hierarchy of management that covers different levels of managementmkooblal
 
ECONOMIC CONTEXT - PAPER 1 Q3: NEWSPAPERS.pptx
ECONOMIC CONTEXT - PAPER 1 Q3: NEWSPAPERS.pptxECONOMIC CONTEXT - PAPER 1 Q3: NEWSPAPERS.pptx
ECONOMIC CONTEXT - PAPER 1 Q3: NEWSPAPERS.pptxiammrhaywood
 
Biting mechanism of poisonous snakes.pdf
Biting mechanism of poisonous snakes.pdfBiting mechanism of poisonous snakes.pdf
Biting mechanism of poisonous snakes.pdfadityarao40181
 
Computed Fields and api Depends in the Odoo 17
Computed Fields and api Depends in the Odoo 17Computed Fields and api Depends in the Odoo 17
Computed Fields and api Depends in the Odoo 17Celine George
 
Solving Puzzles Benefits Everyone (English).pptx
Solving Puzzles Benefits Everyone (English).pptxSolving Puzzles Benefits Everyone (English).pptx
Solving Puzzles Benefits Everyone (English).pptxOH TEIK BIN
 
18-04-UA_REPORT_MEDIALITERAСY_INDEX-DM_23-1-final-eng.pdf
18-04-UA_REPORT_MEDIALITERAСY_INDEX-DM_23-1-final-eng.pdf18-04-UA_REPORT_MEDIALITERAСY_INDEX-DM_23-1-final-eng.pdf
18-04-UA_REPORT_MEDIALITERAСY_INDEX-DM_23-1-final-eng.pdfssuser54595a
 
Crayon Activity Handout For the Crayon A
Crayon Activity Handout For the Crayon ACrayon Activity Handout For the Crayon A
Crayon Activity Handout For the Crayon AUnboundStockton
 

Recently uploaded (20)

Roles & Responsibilities in Pharmacovigilance
Roles & Responsibilities in PharmacovigilanceRoles & Responsibilities in Pharmacovigilance
Roles & Responsibilities in Pharmacovigilance
 
Incoming and Outgoing Shipments in 1 STEP Using Odoo 17
Incoming and Outgoing Shipments in 1 STEP Using Odoo 17Incoming and Outgoing Shipments in 1 STEP Using Odoo 17
Incoming and Outgoing Shipments in 1 STEP Using Odoo 17
 
Framing an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdf
Framing an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdfFraming an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdf
Framing an Appropriate Research Question 6b9b26d93da94caf993c038d9efcdedb.pdf
 
Model Call Girl in Tilak Nagar Delhi reach out to us at 🔝9953056974🔝
Model Call Girl in Tilak Nagar Delhi reach out to us at 🔝9953056974🔝Model Call Girl in Tilak Nagar Delhi reach out to us at 🔝9953056974🔝
Model Call Girl in Tilak Nagar Delhi reach out to us at 🔝9953056974🔝
 
Full Stack Web Development Course for Beginners
Full Stack Web Development Course  for BeginnersFull Stack Web Development Course  for Beginners
Full Stack Web Development Course for Beginners
 
MARGINALIZATION (Different learners in Marginalized Group
MARGINALIZATION (Different learners in Marginalized GroupMARGINALIZATION (Different learners in Marginalized Group
MARGINALIZATION (Different learners in Marginalized Group
 
Painted Grey Ware.pptx, PGW Culture of India
Painted Grey Ware.pptx, PGW Culture of IndiaPainted Grey Ware.pptx, PGW Culture of India
Painted Grey Ware.pptx, PGW Culture of India
 
EPANDING THE CONTENT OF AN OUTLINE using notes.pptx
EPANDING THE CONTENT OF AN OUTLINE using notes.pptxEPANDING THE CONTENT OF AN OUTLINE using notes.pptx
EPANDING THE CONTENT OF AN OUTLINE using notes.pptx
 
Capitol Tech U Doctoral Presentation - April 2024.pptx
Capitol Tech U Doctoral Presentation - April 2024.pptxCapitol Tech U Doctoral Presentation - April 2024.pptx
Capitol Tech U Doctoral Presentation - April 2024.pptx
 
OS-operating systems- ch04 (Threads) ...
OS-operating systems- ch04 (Threads) ...OS-operating systems- ch04 (Threads) ...
OS-operating systems- ch04 (Threads) ...
 
Organic Name Reactions for the students and aspirants of Chemistry12th.pptx
Organic Name Reactions  for the students and aspirants of Chemistry12th.pptxOrganic Name Reactions  for the students and aspirants of Chemistry12th.pptx
Organic Name Reactions for the students and aspirants of Chemistry12th.pptx
 
CARE OF CHILD IN INCUBATOR..........pptx
CARE OF CHILD IN INCUBATOR..........pptxCARE OF CHILD IN INCUBATOR..........pptx
CARE OF CHILD IN INCUBATOR..........pptx
 
Pharmacognosy Flower 3. Compositae 2023.pdf
Pharmacognosy Flower 3. Compositae 2023.pdfPharmacognosy Flower 3. Compositae 2023.pdf
Pharmacognosy Flower 3. Compositae 2023.pdf
 
Hierarchy of management that covers different levels of management
Hierarchy of management that covers different levels of managementHierarchy of management that covers different levels of management
Hierarchy of management that covers different levels of management
 
ECONOMIC CONTEXT - PAPER 1 Q3: NEWSPAPERS.pptx
ECONOMIC CONTEXT - PAPER 1 Q3: NEWSPAPERS.pptxECONOMIC CONTEXT - PAPER 1 Q3: NEWSPAPERS.pptx
ECONOMIC CONTEXT - PAPER 1 Q3: NEWSPAPERS.pptx
 
Biting mechanism of poisonous snakes.pdf
Biting mechanism of poisonous snakes.pdfBiting mechanism of poisonous snakes.pdf
Biting mechanism of poisonous snakes.pdf
 
Computed Fields and api Depends in the Odoo 17
Computed Fields and api Depends in the Odoo 17Computed Fields and api Depends in the Odoo 17
Computed Fields and api Depends in the Odoo 17
 
Solving Puzzles Benefits Everyone (English).pptx
Solving Puzzles Benefits Everyone (English).pptxSolving Puzzles Benefits Everyone (English).pptx
Solving Puzzles Benefits Everyone (English).pptx
 
18-04-UA_REPORT_MEDIALITERAСY_INDEX-DM_23-1-final-eng.pdf
18-04-UA_REPORT_MEDIALITERAСY_INDEX-DM_23-1-final-eng.pdf18-04-UA_REPORT_MEDIALITERAСY_INDEX-DM_23-1-final-eng.pdf
18-04-UA_REPORT_MEDIALITERAСY_INDEX-DM_23-1-final-eng.pdf
 
Crayon Activity Handout For the Crayon A
Crayon Activity Handout For the Crayon ACrayon Activity Handout For the Crayon A
Crayon Activity Handout For the Crayon A
 

AST Parser for C-like Language

  • 1. project4-ast/.DS_Store project4-ast/ast.c #include "symbolTable.h" #include "ast.h" extern SymbolTableStackEntryPtr symbolStackTop; extern int scopeDepth; //creates a new expression node AstNodePtr new_ExprNode(ExpKind kind) { } //creates a new statement node AstNodePtr new_StmtNode(StmtKind kind) { } //creates a new type node for entry into symbol table Type* new_type(TypeKind kind) { }
  • 2. project4-ast/ast.h #include "globals.h" #include "symbolTable.h" typedef enum {STMT, EXPRESSION, FORMALVAR, METHOD} NodeKind; typedef enum {IF_THEN_ELSE_STMT, WHILE_STMT, RETURN_STMT, COMPOUND_STMT, EXPRESSION_STMT } StmtKind; typedef enum {VAR_EXP, ARRAY_EXP, ASSI_EXP, ADD_EXP, SUB_EXP, MULT_EXP, DIV_EXP, GT_EXP, LT_EXP, GE_EXP, LE_EXP, EQ_EXP, NE_EXP, CALL_EXP, CONST_EXP }ExpKind; typedef struct node { NodeKind nKind; StmtKind sKind; ExpKind eKind; struct node *children[MAXCHILDREN]; // max needed for if_then_else struct node *sibling; //List of statements, formal argument Type *nType; //node's type for typechecking expressions for HW5 ElementPtr nSymbolPtr; // for variable and array expressions int nValue; // for integer constants SymbolTablePtr nSymbolTabPtr; //For a block, its symbol table char *fname; // for a call expr, the name of the called function int nLinenumber; }AstNode;
  • 3. typedef AstNode *AstNodePtr; AstNodePtr new_ExprNode(ExpKind kind) ; AstNodePtr new_StmtNode(StmtKind kind); Type* new_type(TypeKind kind); project4-ast/CCOM4087-Proyect4-AST.pdf mailto:[email protected] mailto:[email protected] mailto:[email protected] mailto:[email protected] mailto:[email protected] mailto:[email protected] mailto:[email protected] mailto:[email protected] mailto:[email protected] mailto:[email protected] http://piazza.com http://piazza.com
  • 4. http://piazza.com project4-ast/cmlexer.l /**********************/ /* C header files */ /**********************/ %{ #include "globals.h" #include "ast.h" #include "cmparser.tab.h" char tokenString[TOKENMAX]; int printoutScan = 0; %} %option yylineno
  • 5. /**********************/ /* start your regular definitions here */ /**********************/ NUMBER [0-9]+ IDENTIFIER [a-zA-Z_$][a-zA-Z_$0-9]* WHITESPACE [ trn]+ /* start your token specifications here */ /* Token names must come from cmparser.tab.h */ %% auto { return TOK_ERROR; } continue { return TOK_ERROR; } goto { return TOK_ERROR; } break { return TOK_ERROR; } default { return TOK_ERROR; } long { return TOK_ERROR; } case { return TOK_ERROR; } do { return TOK_ERROR; } short { return TOK_ERROR; } char { return TOK_ERROR; } double { return TOK_ERROR; } register { return TOK_ERROR; } const { return TOK_ERROR; } for { return TOK_ERROR; } switch { return TOK_ERROR; } byte { return TOK_ERROR; } float { return TOK_ERROR; } "~" { return TOK_ERROR; } "&" { return TOK_ERROR; } "+=" { return TOK_ERROR; } "^=" { return TOK_ERROR; }
  • 6. "?" { return TOK_ERROR; } "|" { return TOK_ERROR; } "-=" { return TOK_ERROR; } "%=" { return TOK_ERROR; } ":" { return TOK_ERROR; } "^" { return TOK_ERROR; } "*=" { return TOK_ERROR; } "<<=" { return TOK_ERROR; } "++" { return TOK_ERROR; } "<<" { return TOK_ERROR; } "/=" { return TOK_ERROR; } ">>=" { return TOK_ERROR; } "--" { return TOK_ERROR; } ">>" { return TOK_ERROR; } "&=" { return TOK_ERROR; } ">>>=" { return TOK_ERROR; } "!" { return TOK_ERROR; } ">>>" { return TOK_ERROR; } "|=" { return TOK_ERROR; } else { return TOK_ELSE; } if { return TOK_IF; } return { return TOK_RETURN; } void { return TOK_VOID; } int { return TOK_INT; } while { return TOK_WHILE; } "+" { return TOK_PLUS; } "-" { return TOK_MINUS; } "*" { return TOK_MULT; } "/" { return TOK_DIV; } "<" { return TOK_LT; } "<=" { return TOK_LE; } ">" { return TOK_GT; } ">=" { return TOK_GE; } "==" { return TOK_EQ; } "!=" { return TOK_NE; } "=" { return TOK_ASSIGN; }
  • 7. ";" { return TOK_SEMI; } "," { return TOK_COMMA; } "(" { return TOK_LPAREN; } ")" { return TOK_RPAREN; } "[" { return TOK_LSQ; } "]" { return TOK_RSQ; } "{" { return TOK_LBRACE; } "}" { return TOK_RBRACE; } {NUMBER} { yylval.iVal = atoi(yytext); return TOK_NUM; } {IDENTIFIER} {yylval.cVal = strdup(yytext); return TOK_ID; } {WHITESPACE} { } "//".* { } "/*"([^*]|*+[^*/])*("*"+)("/") {} . { return TOK_ERROR; } %% /**********************/ /* C support functions */ /**********************/ void printToken(int token, char *str) { /* Print the line number, token name and matched lexeme -- one per line without any additional characters exactly as below */ /* Example 13:TOK_INT: 37*/ switch(token) { case TOK_ELSE: fprintf(yyout,"%d : TOK_ELSE : %sn",yylineno,str); break; case TOK_IF:
  • 8. fprintf(yyout,"%d : TOK_IF : %sn",yylineno,str); break; case TOK_RETURN: fprintf(yyout,"%d : TOK_RETURN : %sn",yylineno,str); break; case TOK_VOID: fprintf(yyout,"%d : TOK_VOID : %sn",yylineno,str); break; case TOK_INT: fprintf(yyout,"%d : TOK_INT : %sn",yylineno,str); break; case TOK_WHILE: fprintf(yyout,"%d : TOK_WHILE : %sn",yylineno,str); break; case TOK_ID: fprintf(yyout,"%d : TOK_ID : %sn",yylineno,str); break; case TOK_NUM: fprintf(yyout,"%d : TOK_NUM : %sn",yylineno,str); break; case TOK_PLUS: fprintf(yyout,"%d : TOK_PLUS : %sn",yylineno,str); break; case TOK_MINUS: fprintf(yyout,"%d : TOK_MINUS : %sn",yylineno,str); break; case TOK_MULT: fprintf(yyout,"%d : TOK_MULT : %sn",yylineno,str); break; case TOK_DIV: fprintf(yyout,"%d : TOK_DIV : %sn",yylineno,str);
  • 9. break; case TOK_LT: fprintf(yyout,"%d : TOK_LT : %sn",yylineno,str); break; case TOK_LE: fprintf(yyout,"%d : TOK_LE : %sn",yylineno,str); break; case TOK_GT: fprintf(yyout,"%d : TOK_GT : %sn",yylineno,str); break; case TOK_GE: fprintf(yyout,"%d : TOK_GE : %sn",yylineno,str); break; case TOK_EQ: fprintf(yyout,"%d : TOK_EQ : %sn",yylineno,str); break; case TOK_NE: fprintf(yyout,"%d : TOK_NE : %sn",yylineno,str); break; case TOK_ASSIGN: fprintf(yyout,"%d : TOK_ASSIGN : %sn",yylineno,str); break; case TOK_SEMI: fprintf(yyout,"%d : TOK_SEMI : %sn",yylineno,str); break; case TOK_COMMA: fprintf(yyout,"%d : TOK_COMMA : %sn",yylineno,str); break; case TOK_LPAREN: fprintf(yyout,"%d : TOK_LPAREN : %sn",yylineno,str); break; case TOK_RPAREN: fprintf(yyout,"%d : TOK_RPAREN :
  • 10. %sn",yylineno,str); break; case TOK_LSQ: fprintf(yyout,"%d : TOK_LSQ : %sn",yylineno,str); break; case TOK_RSQ: fprintf(yyout,"%d : TOK_RSQ : %sn",yylineno,str); break; case TOK_LBRACE: fprintf(yyout,"%d : TOK_LBRACE : %sn",yylineno,str); break; case TOK_RBRACE: fprintf(yyout,"%d : TOK_RBRACE : %sn",yylineno,str); break; case TOK_ERROR: fprintf(yyout,"%d : TOK_ERROR : %sn",yylineno,str); break; } } int gettok(void){ int currentToken; currentToken=yylex(); if (currentToken == 0) { // means EOF} return 0; } strncpy(tokenString, yytext, TOKENMAX); if (printoutScan) { printToken(currentToken,tokenString); } return currentToken;
  • 11. } int initLex(int argc, char **argv){ if ( argc > 1 ) yyin = fopen( argv[1], "r" ); else yyin = stdin; // while (gettok() !=0) ; //gettok returns 0 on EOF // return; } project4-ast/cmparser.y %{ #include <stdio.h> #include <string.h> #include <stdlib.h> #include "ast.h" #include "symbolTable.h" #include "util.h" /* other external function prototypes */ extern int yylex(); extern int initLex(int , char **); /* external global variables */ extern int yydebug; extern int yylineno;
  • 12. extern SymbolTableStackEntryPtr symbolStackTop; extern int scopeDepth; /* function prototypes */ void yyerror(const char *); /* global variables */ AstNodePtr program; %} /* YYSTYPE */ %union { AstNodePtr nodePtr; int iVal; char *cVal; Type *type; } /* terminals */ %token TOK_ELSE TOK_IF TOK_RETURN TOK_VOID TOK_INT TOK_WHILE %token TOK_PLUS TOK_MINUS TOK_MULT TOK_DIV TOK_LT TOK_LE TOK_GT TOK_GE TOK_EQ TOK_NE TOK_ASSIGN TOK_SEMI TOK_COMMA %token TOK_LPAREN TOK_RPAREN TOK_LSQ TOK_RSQ TOK_LBRACE TOK_RBRACE TOK_ERROR %token <cVal> TOK_ID %token <iVal> TOK_NUM
  • 13. %type <nodePtr> Declarations Functions %type <type> Type_Specifier %type <nodePtr> Compound_Stmt Statements Statement %type <nodePtr> Expr_Statement If_Else_Statement Selection_Stmt Iteration_Stmt Return_Stmt %type <nodePtr> Expression Simple_Expression Additive_Expression Factor Var Call %type <nodePtr> Args Args_List /* associativity and precedence */ %nonassoc TOK_IF %nonassoc TOK_ELSE %right TOK_ASSIGN %left TOK_EQ TOK_NE %nonassoc TOK_LT TOK_GT TOK_LE TOK_GE %left TOK_PLUS TOK_SUB %left TOK_MULT TOK_DIV %nonassoc error %% Start : Declarations { } ; Declarations : Functions { program = $1; } | Var_Declaration Declarations { } ; Functions : Fun_Declaration {
  • 14. } | Fun_Declaration Functions { } ; Var_Declaration : Type_Specifier TOK_ID TOK_SEMI { } | Type_Specifier TOK_ID TOK_LSQ TOK_NUM TOK_RSQ TOK_SEMI { } ; Fun_Declaration : Type_Specifier TOK_ID TOK_LPAREN Params TOK_RPAREN Compound_Stmt { } ; Params : Param_List { } | TOK_VOID { } ; Param_List : Param_List TOK_COMMA Param { } | Param { } ;
  • 15. Param : Type_Specifier TOK_ID { } | Type_Specifier TOK_ID TOK_LSQ TOK_RSQ { } ; Type_Specifier : TOK_INT { } | TOK_VOID { } ; Compound_Stmt : TOK_LBRACE Statements TOK_RBRACE { } | TOK_LBRACE Local_Declarations Statements TOK_RBRACE { } ; Local_Declarations : Var_Declaration Local_Declarations { } | Var_Declaration { } ; Statements : Statement Statements { }
  • 16. | { } ; Statement : Expr_Statement { } | Compound_Stmt { } | Selection_Stmt { } | Iteration_Stmt { } | Return_Stmt { } ; Expr_Statement : Expression TOK_SEMI { } | TOK_SEMI { } ; Selection_Stmt : If_Else_Statement %prec TOK_IF { } | If_Else_Statement TOK_ELSE Statement { }
  • 17. ; If_Else_Statement : TOK_IF TOK_LPAREN Expression TOK_RPAREN Statement { } ; Iteration_Stmt : TOK_WHILE TOK_LPAREN Expression TOK_RPAREN Statement { } ; Return_Stmt : TOK_RETURN Expression TOK_SEMI { } | TOK_RETURN TOK_SEMI { } ; Expression : Var TOK_ASSIGN Expression { } | Simple_Expression { } ; Var : TOK_ID { } | TOK_ID TOK_LSQ Expression TOK_RSQ { }
  • 18. ; Simple_Expression : Additive_Expression TOK_GT Additive_Expression { } | Additive_Expression TOK_LT Additive_Expression { } | Additive_Expression TOK_GE Additive_Expression { } | Additive_Expression TOK_LE Additive_Expression { } | Additive_Expression TOK_EQ Additive_Expression { } | Additive_Expression TOK_NE Additive_Expression { } | Additive_Expression { } ; Additive_Expression : Additive_Expression TOK_PLUS Term { } | Additive_Expression TOK_MINUS Term {
  • 19. } | Term { } ; Term : Term TOK_MULT Factor { } | Term TOK_DIV Factor { } | Factor { } ; Factor : TOK_LPAREN Expression TOK_RPAREN { } | Var { } | Call { } | TOK_NUM { } ; Call : TOK_ID TOK_LPAREN Args TOK_RPAREN { } ;
  • 20. Args : Args_List { } | { } ; Args_List : Args_List TOK_COMMA Expression { } | Expression { } ; %% void yyerror (char const *s) { fprintf (stderr, "Line %d: %sn", yylineno, s); } int main(int argc, char **argv){ initLex(argc,argv); #ifdef YYLLEXER while (gettok() !=0) ; //gettok returns 0 on EOF return; #else yyparse(); // print_Ast(); #endif }
  • 21. project4-ast/globals.h #define TRUE 1 #define FALSE 0 #define TOKENMAX 512 #define MAXHASHSIZE 32 #define MAXCHILDREN 3 project4-ast/Makefile # Makefile for cminus CC=gcc LEX=flex YACC=bison MV=/bin/mv RM=/bin/rm CP=/bin/cp LIBRARIES= -ll CFLAGS=-g YFLAGS=-d PROGRAM=cmparser OBJECTS=cmparser.o cmlexer.o symbolTable.o ast.o util.o SOURCES=cmlexer.l cmparser.y symbolTable.c ast.c util.c CTEMPS=cmlexer.c cmparser.c cmparser.tab.h cmparser.tab.c cmparser.output
  • 22. $(PROGRAM): $(OBJECTS) $(CC) $(CFLAGS) -o $(PROGRAM) $(OBJECTS) $(LIBRARIES) util.o: util.c $(CC) $(CFLAGS) -c util.c ast.o: ast.c $(CC) $(CFLAGS) -c ast.c symbolTable.o: symbolTable.c $(CC) $(CFLAGS) -c symbolTable.c cmparser.o: cmparser.c $(CC) $(CFLAGS) -c cmparser.c cmparser.c: cmparser.y $(YACC) cmparser.y -o cmparser.c cmlexer.o: cmlexer.c cmparser.tab.h globals.h $(CC) $(CFLAGS) -c cmlexer.c cmparser.tab.h: cmparser.y $(YACC) -d cmparser.y cmlexer.c: cmlexer.l $(LEX) -ocmlexer.c cmlexer.l clean: $(RM) $(PROGRAM) $(OBJECTS) $(CTEMPS) submit: mkdir submit cp $(SOURCES) submit turnin -ccs473 -past submit
  • 23. project4-ast/symbolTable.c #include "symbolTable.h" // Top should point to the top of the scope stack, // which is the most recent scope pushed SymbolTableStackEntryPtr symbolStackTop; int scopeDepth; /* global function prototypes */ //allocate the global scope entry and symbol table --and set scopeDepth to 0 // The global scope remains till the end of the program int initSymbolTable() { //initialize a stack int i=0; symbolStackTop = (SymbolTableStackEntryPtr) malloc (sizeof(struct symbolTableStackEntry)); if (symbolStackTop == NULL) return 0; symbolStackTop -> symbolTablePtr = (SymbolTable*) malloc (sizeof(SymbolTable)); for(i=0; i<MAXHASHSIZE; i++) symbolStackTop -> symbolTablePtr->hashTable[i] = NULL; symbolStackTop->prevScope = NULL; scopeDepth = 0; //printf("n Initialized Stack with Global Hash Table"); return 1; }
  • 24. // Look up a given entry ElementPtr symLookup(char *name) { char *a = name; SymbolTableStackEntryPtr iter = symbolStackTop; int ascii = 0,depth = scopeDepth ; while(*a!='0') { ascii+= (int) *a; a++; } while(iter) { ElementPtr symelement = iter->symbolTablePtr- >hashTable[ascii%16]; while(symelement) { if(strcmp(symelement->id, name) == 0) { //printf("n Found symbol %s at scope depth %d",name, depth); return symelement; } else symelement = symelement->next; } iter = iter->prevScope; //printf("n Did not find symbol %s at scope depth %d",name, depth); depth--; } return NULL; }
  • 25. // Insert an element with a specified type in a particular line number // initialize the scope depth of the entry from the global var scopeDepth //symInsert("counter", typ, 10); ElementPtr symInsert(char *name, Type *type, int line) { ElementPtr new_element = (ElementPtr) malloc (sizeof(Element)); char *a = name; int ascii = 0 ; while(*a!='0') { ascii+= (int) *a; a++; } new_element->key = ascii%16; new_element->id = strdup(name); new_element->linenumber = line; new_element->scope = scopeDepth; new_element->stype = type; new_element->snode = NULL; if(!symbolStackTop->symbolTablePtr- >hashTable[new_element->key]) // if the first element of hash table for this ascii is null { new_element->next = NULL; symbolStackTop->symbolTablePtr- >hashTable[new_element->key] = new_element; } else { new_element->next = symbolStackTop-
  • 26. >symbolTablePtr->hashTable[new_element->key]; symbolStackTop->symbolTablePtr- >hashTable[new_element->key] = new_element; } //printf("n Inserted %s at scope depth %d with ascii %d ",new_element->id,scopeDepth,new_element->key); return symbolStackTop->symbolTablePtr- >hashTable[new_element->key]; } //push a new entry to the symbol stack // This should modify the variable top and change the scope depth int enterScope() { int i; SymbolTableStackEntryPtr newSymbolStackEntry = (SymbolTableStackEntryPtr) malloc (sizeof(struct symbolTableStackEntry)); if (newSymbolStackEntry == NULL) return 0; newSymbolStackEntry -> symbolTablePtr = (SymbolTable*) malloc (sizeof(SymbolTable)); for(i=0; i<MAXHASHSIZE; i++) newSymbolStackEntry -> symbolTablePtr- >hashTable[i] = NULL; newSymbolStackEntry->prevScope = symbolStackTop; symbolStackTop = newSymbolStackEntry; scopeDepth++; //printf("n New Scope"); return 1; }
  • 27. //pop an entry off the symbol stack // This should modify top and change scope depth void leaveScope() { if (!symbolStackTop) printf("n Error .. cannot leave global scope"); else if(!symbolStackTop->prevScope) symbolStackTop = NULL; else symbolStackTop = symbolStackTop->prevScope; scopeDepth--; } // Do not modify this function void printElement(ElementPtr symelement) { if (symelement != NULL) { printf("nLine %d: %s", symelement- >linenumber,symelement->id); } else printf("Wrong call! symbol table entry NULL"); } //should traverse through the entire symbol table and print it // must use the printElement function given above void printSymbolTable() { int i; SymbolTableStackEntryPtr SymbolStackEntry = symbolStackTop; while(SymbolStackEntry) { for(i=0; i<MAXHASHSIZE;i++) { ElementPtr symelement = SymbolStackEntry-
  • 28. >symbolTablePtr->hashTable[i]; while(symelement) { printElement(symelement); symelement = symelement->next; } } SymbolStackEntry = SymbolStackEntry->prevScope; } } project4-ast/symbolTable.h #ifndef _SYMBOL_TABLE #define _SYMBOL_TABLE #include <stdio.h> #include <string.h> #include <stdlib.h> #include "globals.h"
  • 29. /* data structure of one element in the symbol table */ typedef enum {VOID, INT, ARRAY, FUNCTION} TypeKind; typedef struct type{ TypeKind kind; /* one from the enum above */ int dimension; /* for arrays */ struct type *function; /*function argument and return types */ /* Ignore this field until HW5 */ }Type; typedef Type *TypePtr; typedef struct element { int key; char * id; int linenumber; int scope; /* scope depth at declaration */ Type *stype; /* pointer to the type infomation */ struct element *next; /* pointer to the next symbol with the same hash table index */ struct node *snode; /* (New) AST Node for method declartion */ } Element; typedef Element *ElementPtr; typedef ElementPtr HashTableEntry; /* data structure of the symbol table */ typedef struct symbolTable {
  • 30. HashTableEntry hashTable[MAXHASHSIZE]; /* hash table */ } SymbolTable; typedef SymbolTable *SymbolTablePtr; typedef struct symbolTableStackEntry { SymbolTablePtr symbolTablePtr; struct symbolTableStackEntry *prevScope; } SymbolTableStackEntry; typedef SymbolTableStackEntry *SymbolTableStackEntryPtr; int initSymbolTable() ; ElementPtr symLookup(char *); ElementPtr symInsert(char *, Type *, int ); int enterScope(); void leaveScope(); #endif project4-ast/util.c /* Original Code: Silvano Bonacia, Fall 2006 */ /* Modifications and changes, Prof. Venkat, Spring 08 */ #include "ast.h" #include "util.h" extern AstNode *program; extern SymbolTableStackEntryPtr symbolStackTop; extern void printSymbolTable();
  • 31. #define PAD_STR " " int indLevel = 0; void pad (int level) { int i; for(i=0;i<level;i++) printf("%s", PAD_STR); } /* * Print the parameters of the method (parameters being in the * current scope) */ void printMethodParams (SymbolTableStackEntryPtr scope) { int i; SymbolTableStackEntryPtr SymbolStackEntry = scope; for(i=0; i<MAXHASHSIZE;i++) { ElementPtr symelement = SymbolStackEntry- >symbolTablePtr->hashTable[i]; while(symelement) { switch(symelement->stype->kind) { case INT : printf("int %s", symelement->id); break; case ARRAY : printf("int %s[]", symelement->id); break; case VOID : printf("void"); break; case FUNCTION : break; } symelement = symelement->next;
  • 32. if(symelement) printf(", "); } } } /* * Print the variable declarations of the scope. * Return the number of variables */ int printVarDeclarations (SymbolTablePtr scope) { int noVar = 0; int i; for(i=0; i<MAXHASHSIZE;i++) { ElementPtr symelement = scope->hashTable[i]; while(symelement) { pad(indLevel); switch(symelement->stype->kind) { case INT : printf("int %s;n", symelement->id); noVar++; break; case ARRAY : printf("int %s[%d];n", symelement->id, symelement->stype->dimension); noVar++; break; case VOID: break; case FUNCTION : break; } symelement = symelement->next; }
  • 33. } return noVar; } //traverses through the entire program and symbol table and prints it //the output must match the program with the exception of white spaces //do a diff -w to ignre white spaces void printType(TypePtr type, int dummy){ switch(type->kind) { case INT : printf("int"); break; case ARRAY : printf("int"); break; case VOID: printf("void"); break; case FUNCTION : printType(type->function, 0); break; } } void print_Expression(AstNodePtr expr, int endWithSemi) { if(expr == NULL) { return; } switch(expr->eKind) { case VAR_EXP : printf("%s", expr->nSymbolPtr->id); break; case ARRAY_EXP :
  • 34. printf("%s[", expr->nSymbolPtr->id); print_Expression(expr->children[0], 0); printf("]"); break; case ASSI_EXP : print_Expression(expr->children[0], 0); printf(" = "); print_Expression(expr->children[1], 0); break; case ADD_EXP : print_Expression(expr->children[0], 0); printf(" + "); print_Expression(expr->children[1], 0); break; case SUB_EXP : print_Expression(expr->children[0], 0); printf(" - "); print_Expression(expr->children[1], 0); break; case MULT_EXP : print_Expression(expr->children[0], 0); printf(" * "); print_Expression(expr->children[1], 0); break; case DIV_EXP : print_Expression(expr->children[0], 0); printf(" / "); print_Expression(expr->children[1], 0); break; case GT_EXP : print_Expression(expr->children[0], 0); printf(" > "); print_Expression(expr->children[1], 0); break; case LT_EXP : print_Expression(expr->children[0], 0);
  • 35. printf(" < "); print_Expression(expr->children[1], 0); break; case GE_EXP : print_Expression(expr->children[0], 0); printf(" >= "); print_Expression(expr->children[1], 0); break; case LE_EXP : print_Expression(expr->children[0], 0); printf(" <= "); print_Expression(expr->children[1], 0); break; case EQ_EXP : print_Expression(expr->children[0], 0); printf(" == "); print_Expression(expr->children[1], 0); break; case NE_EXP : print_Expression(expr->children[0], 0); printf(" != "); print_Expression(expr->children[1], 0); break; case CALL_EXP : printf("%s(", expr->fname); if(expr->children[0] != NULL) { print_Expression(expr->children[0], 0); AstNodePtr ptr = expr->children[0]->sibling; while(ptr != NULL) { printf(", "); print_Expression(ptr, 0); ptr = ptr->sibling; } } printf(")"); break;
  • 36. case CONST_EXP : printf("%d", expr->nValue); break; } if(endWithSemi == 1) printf(";n"); } void print_Statement(AstNodePtr stmt) { if(stmt == NULL) return; pad(indLevel); switch(stmt->sKind) { case IF_THEN_ELSE_STMT : printf("if("); print_Expression(stmt->children[0], 0); printf(")n"); if(stmt->children[1] != NULL) { if(stmt->children[1]->sKind != COMPOUND_STMT) { indLevel++; print_Statement(stmt->children[1]); indLevel--; } else print_Statement(stmt->children[1]); } else { pad(indLevel+1); printf(";n"); } if(stmt->children[2] != NULL) { pad(indLevel); printf("elsen"); if(stmt->children[2]->sKind != COMPOUND_STMT) { indLevel++; print_Statement(stmt->children[2]); indLevel--; } else print_Statement(stmt->children[2]);
  • 37. } break; case WHILE_STMT : printf("while("); print_Expression(stmt->children[0], 0); printf(")n"); if(stmt->children[1] != NULL) { if(stmt->children[1]->sKind != COMPOUND_STMT) { indLevel++; print_Statement(stmt->children[1]); indLevel--; } else print_Statement(stmt->children[1]); } else { pad(indLevel+1); printf(";n"); } break; case RETURN_STMT : printf("return "); if(stmt->children[0] == NULL) printf(";n"); else print_Expression(stmt->children[0], 1); break; case COMPOUND_STMT : printf("{n"); indLevel++; if(stmt->nSymbolTabPtr != NULL) { if(printVarDeclarations(stmt->nSymbolTabPtr) > 0) printf("n"); } if(stmt->children[0] != NULL) print_Statement(stmt->children[0]); // Print the first statement else { pad(indLevel);
  • 38. printf(";n"); } indLevel--; pad(indLevel); printf("}n"); break; case EXPRESSION_STMT : print_Expression(stmt->children[0], 1); break; } //printf("sibling : %dn", stmt->sibling); print_Statement(stmt->sibling); // Print the next statement } void print_Ast_Recursion(AstNodePtr root) { //printf("print_Ast_Recursion: root = %dn", root); /* * End the recursion */ if(root == NULL) return; switch(root->nKind) { case METHOD : //printf("root->nKind = METHODn"); printType(root->nType, 0); printf(" %s(", root->nSymbolPtr->id); if(root->children[0] == NULL) printf("void"); else print_Ast_Recursion(root->children[0]); // print the parameters of the method printf(")n"); print_Ast_Recursion(root->children[1]); // print the body of the method printf("n");
  • 39. print_Ast_Recursion(root->sibling); // print the next method break; case FORMALVAR : //printf("root->nKind = FORMALVARn"); printType(root->nSymbolPtr->stype, 0); // print the type printf(" %s", root->nSymbolPtr->id); // print the name of the variable if(root->nSymbolPtr->stype->kind == ARRAY) printf("[]"); /* * Print the next parameter if there's one */ if(root->sibling != NULL) { printf(", "); print_Ast_Recursion(root->sibling); } break; case STMT : //printf("root->nKind = STMTn"); print_Statement(root); break; case EXPRESSION : //printf("root->nKind = EXPRESSIONn"); print_Expression(root, 1); // I don't think it ever gets here break; } } void print_Ast() { /* * First print the entire symbol table */ //printSymbolTable(); /* * Then print the program
  • 40. */ indLevel = 0; if(printVarDeclarations(symbolStackTop->symbolTablePtr) > 0) printf("n"); print_Ast_Recursion(program); } project4-ast/util.h extern void printNode(AstNodePtr, int); extern void printType(TypePtr, int); lex_yacc.pdf A GUIDE TO LEX & YACC BY THOMAS NIEMANN 2 PREFACE This document explains how to construct a compiler using lex and yacc. Lex and yacc are tools used to generate lexical analyzers and parsers. I assume
  • 41. you can program in C, and understand data structures such as linked-lists and trees. The introduction describes the basic building blocks of a compiler and explains the interaction between lex and yacc. The next two sections describe lex and yacc in more detail. With this background, we construct a sophisticated calculator. Conventional arithmetic operations and control statements, such as if-else and while, are implemented. With minor changes, we convert the calculator into a compiler for a stack-based machine. The remaining sections discuss issues that commonly arise in compiler writing. Source code for examples may be downloaded from the web site listed below. Permission to reproduce portions of this document is given provided the web site listed below is referenced, and no additional restrictions apply. Source code, when part of a software project, may be used freely without reference to the author. THOMAS NIEMANN Portland, Oregon [email protected] http://members.xoom.com/thomasn mailto:[email protected] http://members.xoom.com/thomasn 3
  • 42. CONTENTS 1. INTRODUCTION 4 2. LEX 6 2.1 Theory 6 2.2 Practice 7 3. YACC 12 3.1 Theory 12 3.2 Practice, Part I 14 3.3 Practice, Part II 17 4. CALCULATOR 20 4.1 Description 20 4.2 Include File 23 4.3 Lex Input 24 4.4 Yacc Input 25 4.5 Interpreter 29 4.6 Compiler 30 5. MORE LEX 32 5.1 Strings 32 5.2 Reserved Words 33 5.3 Debugging Lex 33 6. MORE YACC 35 6.1 Recursion 35 6.2 If-Else Ambiguity 35
  • 43. 6.3 Error Messages 36 6.4 Inherited Attributes 37 6.5 Embedded Actions 37 6.6 Debugging Yacc 38 7. BIBLIOGRAPHY 39 4 1. Introduction Until 1975, writing a compiler was a very time-consuming process. Then Lesk [1975] and Johnson [1975] published papers on lex and yacc. These utilities greatly simplify compiler writing. Implementation details for lex and yacc may be found in Aho [1986]. Lex and yacc are available from • Mortice Kern Systems (MKS), at http://www.mks.com, • GNU flex and bison, at http://www.gnu.org, • Ming, at http://agnes.dida.physik.uni- essen.de/~janjaap/mingw32, • Cygnus, at http://www.cygnus.com/misc/gnu-win32, and • me, at http://members.xoom.com/thomasn/y_gnu.zip (executables), and http://members.xoom.com/thomasn/y_gnus.zip (source for y_gnu.zip). The version from MKS is a high-quality commercial product that retails for about $300US. GNU software is free. Output from flex may be used in a commercial product,
  • 44. and, as of version 1.24, the same is true for bison. Ming and Cygnus are 32-bit Windows- 95/NT ports of the GNU software. My version is based on Ming’s, but is compiled with Visual C++ and includes a minor bug fix in the file handling routine. If you download my version, be sure to retain directory structure when you unzip. Lexical Analyzer Syntax Analyzer a = b + c * d id1 = id2 + id3 * id4 = + * id1 source code tokens syntax tree id2 id3 id4 load id3 mul id4
  • 45. add id2 store id1 Code Generator generated code Figure 1-1: Compilation Sequence http://members.xoom.com/thomasn/y_lex.pdf http://members.xoom.com/thomasn/y_yacc.pdf http://www.amazon.com/exec/obidos/ISBN=0201100886/none01 A/#Bibliography http://www.mks.com/ http://www.gnu.org/ http://agnes.dida.physik.uni-essen.de/~janjaap/mingw32 http://www.cygnus.com/misc/gnu-win32 http://members.xoom.com/thomasn/y_gnu.zip http://members.xoom.com/thomasn/y_gnus.zip 5 Lex generates C code for a lexical analyzer, or scanner. It uses patterns that match strings in the input and converts the strings to tokens. Tokens are numerical representations of strings, and simplify processing. This is illustrated in Figure 1-1. As lex finds identifiers in the input stream, it enters them in a symbol table. The symbol table may also contain other information such as data type (integer or real) and location of the variable in memory. All subsequent references to identifiers refer to the
  • 46. appropriate symbol table index. Yacc generates C code for a syntax analyzer, or parser. Yacc uses grammar rules that allow it to analyze tokens from lex and create a syntax tree. A syntax tree imposes a hierarchical structure on tokens. For example, operator precedence and associativity are apparent in the syntax tree. The next step, code generation, does a depth-first walk of the syntax tree to generate code. Some compilers produce machine code, while others, as shown above, output assembly. lex yacc cc bas.y bas.l lex.yy.c y.tab.c bas.exe source compiled output (yylex) (yyparse) y.tab.h
  • 47. Figure 1-2: Building a Compiler with Lex/Yacc Figure 1-2 illustrates the file naming conventions used by lex and yacc. We'll assume our goal is to write a BASIC compiler. First, we need to specify all pattern matching rules for lex (bas.l) and grammar rules for yacc (bas.y). Commands to create our compiler, bas.exe, are listed below: yacc –d bas.y # create y.tab.h, y.tab.c lex bas.l # create lex.yy.c cc lex.yy.c y.tab.c –obas.exe # compile/link Yacc reads the grammar descriptions in bas.y and generates a parser, function yyparse, in file y.tab.c. Included in file bas.y are token declarations. These are converted to constant definitions by yacc and placed in file y.tab.h. Lex reads the pattern descriptions in bas.l, includes file y.tab.h, and generates a lexical analyzer, function yylex, in file lex.yy.c. Finally, the lexer and parser are compiled and linked together to form the executable, bas.exe. From main, we call yyparse to run the compiler. Function yyparse automatically calls yylex to obtain each token. 6 2. Lex
  • 48. 2.1 Theory The first phase in a compiler reads the input source and converts strings in the source to tokens. Using regular expressions, we can specify patterns to lex that allow it to scan and match strings in the input. Each pattern in lex has an associated action. Typically an action returns a token, representing the matched string, for subsequent use by the parser. To begin with, however, we will simply print the matched string rather than return a token value. We may scan for identifiers using the regular expression letter(letter|digit)* This pattern matches a string of characters that begins with a single letter, and is followed by zero or more letters or digits. This example nicely illustrates operations allowed in regular expressions: • repetition, expressed by the “*” operator • alternation, expressed by the “|” operator • concatenation Any regular expression expressions may be expressed as a finite state automaton (FSA). We can represent an FSA using states, and transitions between states. There is one start state, and one or more final or accepting states. 0 1 2 letter
  • 49. letter or digit otherstart Figure 2-1: Finite State Automaton In Figure 2-1, state 0 is the start state, and state 2 is the accepting state. As characters are read, we make a transition from one state to another. When the first letter is read, we transition to state 1. We remain in state 1 as more letters or digits are read. When we read a character other than a letter or digit, we transition to state 2, the accepting state. Any FSA may be expressed as a computer program. For example, our 3-state machine is easily programmed: 7 start: goto state0 state0: read c if c = letter goto state1 goto state0 state1: read c if c = letter goto state1 if c = digit goto state1 goto state2 state2: accept string This is the technique used by lex. Regular expressions are
  • 50. translated by lex to a computer program that mimics an FSA. Using the next input character, and current state, the next state is easily determined by indexing into a computer- generated state table. Now we can easily understand some of lex’s limitations. For example, lex cannot be used to recognize nested structures such as parentheses. Nested structures are handled by incorporating a stack. Whenever we encounter a “(”, we push it on the stack. When a “)” is encountered, we match it with the top of the stack, and pop the stack. Lex, however, only has states and transitions between states. Since it has no stack, it is not well suited for parsing nested structures. Yacc augments an FSA with a stack, and can process constructs such as parentheses with ease. The important thing is to use the right tool for the job. Lex is good at pattern matching. Yacc is appropriate for more challenging tasks. 2.2 Practice Metacharacter Matches . any character except newline n newline * zero or more copies of preceding expression + one or more copies of preceding expression ? zero or one copy of preceeding expression ^ beginning of line $ end of line a|b a or b (ab)+ one or more copies of ab (grouping) "a+b" literal “a+b” (C escapes still work)
  • 51. [] character class Table 2-1: Pattern Matching Primitives 8 Expression Matches abc abc abc* ab, abc, abcc, abccc, … abc+ abc, abcc, abccc, … a(bc)+ abc, abcbc, abcbcbc, … a(bc)? a, abc [abc] a, b, c [a-z] any letter, a through z [a-z] a, -, z [-az] -, a, z [A-Za-z0-9]+ one or more alphanumeric characters [ tn]+ whitespace [^ab] anything except: a, b [a^b] a, ^, b [a|b] a, |, b a|b a or b Table 2-2: Pattern Matching Examples
  • 52. Regular expressions in lex are composed of metacharacters (Table 2-1). Pattern matching examples are shown in Table 2-2. Within a character class, normal operators lose their meaning. Two operators allowed in a character class are the hyphen (“-”) and circumflex (“^”). When used between two characters, the hyphen represents a range of characters. The circumflex, when used as the first character, negates the expression. If two patterns match the same string, the longest match wins. In case both matches are the same length, then the first pattern listed is used. Input to Lex is divided into three sections, with %% dividing the sections. This is best illustrated by example. The first example is the shortest possible lex file: %% Input is copied to output, one character at a time. The first %% is always required, as there must always be a rules section. However, if we don’t specify any rules, then the default action is to match everything and copy it to output. Defaults for input and output are stdin and stdout, respectively. Here is the same example, with defaults explicitly coded: ... definitions ... %% ... rules ... %%
  • 53. ... subroutines ... 9 %% /* match everything except newline */ . ECHO; /* match newline */ n ECHO; %% int yywrap(void) { return 1; } int main(void) { yylex(); return 0; } Two patterns have been specified in the rules section. Each pattern must begin in column one. This is followed by whitespace (space, tab or newline), and an optional action associated with the pattern. The action may be a single C statement, or multiple C statements enclosed in braces. Anything not starting in column one is copied verbatim to the generated C file. We may take advantage of this behavior to
  • 54. specify comments in our lex file. In this example there are two patterns, “.” and “n”, with an ECHO action associated for each pattern. Several macros and variables are predefined by lex. ECHO is a macro that writes code matched by the pattern. This is the default action for any unmatched strings. Typically, ECHO is defined as: #define ECHO fwrite(yytext, yyleng, 1, yyout) Variable yytext is a pointer to the matched string (NULL- terminated), and yyleng is the length of the matched string. Variable yyout is the output file, and defaults to stdout. Function yywrap is called by lex when input is exhausted. Return 1 if you are done, or 0 if more processing is required. Every C program requires a main function. In this case, we simply call yylex, the main entry-point for lex. Some implementations of lex include copies of main and yywrap in a library, eliminating the need to code them explicitly. This is why our first example, the shortest lex program, functioned properly. 10 name function int yylex(void) call to invoke lexer, returns token char *yytext pointer to matched string yyleng length of matched string yylval value associated with token int yywrap(void) wrapup, return 1 if done, 0 if not done
  • 55. FILE *yyout output file FILE *yyin input file INITIAL initial start condition BEGIN condition switch start condition ECHO write matched string Table 2-3: Lex Predefined Variables Here’s a program that does nothing at all. All input is matched, but no action is associated with any pattern, so there will be no output. %% . n The following example prepends line numbers to each line in a file. Some implementations of lex predefine and calculate yylineno. The input file for lex is yyin, and defaults to stdin. %{ int yylineno; %} %% ^(.*)n printf("%4dt%s", ++yylineno, yytext); %% int main(int argc, char *argv[]) { yyin = fopen(argv[1], "r"); yylex(); fclose(yyin); }
  • 56. The definitions section is composed of substitutions, code, and start states. Code in the definitions section is simply copied as-is to the top of the generated C file, and must be bracketed with “%{“ and “%}” markers. Substitutions simplify pattern-matching rules. For example, we may define digits and letters: 11 digit [0-9] letter [A-Za-z] %{ int count; %} %% /* match identifier */ {letter}({letter}|{digit})* count++; %% int main(void) { yylex(); printf("number of identifiers = %dn", count); return 0; } Whitespace must separate the defining term and the associated expression. References to substitutions in the rules section are surrounded by braces ({letter}) to
  • 57. distinguish them from literals. When we have a match in the rules section, the associated C code is executed. Here is a scanner that counts the number of characters, words, and lines in a file (similar to Unix wc): %{ int nchar, nword, nline; %} %% n { nline++; nchar++; } [^ tn]+ { nword++, nchar += yyleng; } . { nchar++; } %% int main(void) { yylex(); printf("%dt%dt%dn", nchar, nword, nline); return 0; } 12 3. Yacc 3.1 Theory Grammars for yacc are described using a variant of Backus Naur Form (BNF). This technique was pioneered by John Backus and Peter Naur, and used to describe ALGOL60. A BNF grammar can be used to express context-free languages. Most
  • 58. constructs in modern programming languages can be represented in BNF. For example, the grammar for an expression that multiplies and adds numbers is 1 E -> E + E 2 E -> E * E 3 E -> id Three productions have been specified. Terms that appear on the left-hand side (lhs) of a production, such as E (expression) are nonterminals. Terms such as id (identifier) are terminals (tokens returned by lex) and only appear on the right- hand side (rhs) of a production. This grammar specifies that an expression may be the sum of two expressions, the product of two expressions, or an identifier. We can use this grammar to generate expressions: E -> E * E (r2) -> E * z (r3) -> E + E * z (r1) -> E + y * z (r3) -> x + y * z (r3) At each step we expanded a term, replacing the lhs of a production with the corresponding rhs. The numbers on the right indicate which rule applied. To parse an expression, we actually need to do the reverse operation. Instead of starting with a single nonterminal (start symbol) and generating an expression from a grammar, we need to reduce an expression to a single nonterminal. This is known as
  • 59. bottom-up or shift-reduce parsing, and uses a stack for storing terms. Here is the same derivation, but in reverse order: 13 1 . x + y * z shift 2 x . + y * z reduce(r3) 3 E . + y * z shift 4 E + . y * z shift 5 E + y . * z reduce(r3) 6 E + E . * z shift 7 E + E * . z shift 8 E + E * z . reduce(r3) 9 E + E * E . reduce(r2) emit multiply 10 E + E . reduce(r1) emit add 11 E . accept Terms to the left of the dot are on the stack, while remaining input is to the right of the dot. We start by shifting tokens onto the stack. When the top of the stack matches the rhs of a production, we replace the matched tokens on the stack with the lhs of the production. Conceptually, the matched tokens of the rhs are popped off the stack, and the lhs of the production is pushed on the stack. The matched tokens are known as a handle, and we are reducing the handle to the lhs of the production. This process continues until we have shifted all input to the stack, and only the starting nonterminal remains on the stack. In step 1 we shift the x to the stack. Step 2 applies rule r3
  • 60. to the stack, changing x to E. We continue shifting and reducing, until a single nonterminal, the start symbol, remains in the stack. In step 9, when we reduce rule r2, we emit the multiply instruction. Similarly, the add instruction is emitted in step 10. Thus, multiply has a higher precedence than addition. Consider, however, the shift at step 6. Instead of shifting, we could have reduced, applying rule r1. This would result in addition having a higher precedence than multiplication. This is known as a shift-reduce conflict. Our grammar is ambiguous, as there is more than one possible derivation that will yield the expression. In this case, operator precedence is affected. As another example, associativity in the rule E -> E + E is ambiguous, for we may recurse on the left or the right. To remedy the situation, we could rewrite the grammar, or supply yacc with directives that indicate which operator has precedence. The latter method is simpler, and will be demonstrated in the practice section. The following grammar has a reduce-reduce conflict. With an id on the stack, we may reduce to T, or reduce to E. E -> T E -> id
  • 61. T -> id Yacc takes a default action when there is a conflict. For shift- reduce conflicts, yacc will shift. For reduce-reduce conflicts, it will use the first rule in the listing. It also issues a warning message whenever a conflict exists. The warnings may be suppressed by making 14 the grammar unambiguous. Several methods for removing ambiguity will be presented in subsequent sections. 3.2 Practice, Part I Input to yacc is divided into three sections. The definitions section consists of token declarations, and C code bracketed by “%{“ and “%}”. The BNF grammar is placed in the rules section, and user subroutines are added in the subroutines section. This is best illustrated by constructing a small calculator that can add and subtract numbers. We’ll begin by examining the linkage between lex and yacc. Here is the definitions section for the yacc input file: %token INTEGER This definition declares an INTEGER token. When we run yacc, it generates a parser in
  • 62. file y.tab.c, and also creates an include file, y.tab.h: #ifndef YYSTYPE #define YYSTYPE int #endif #define INTEGER 258 extern YYSTYPE yylval; Lex includes this file and utilizes the definitions for token values. To obtain tokens, yacc calls yylex. Function yylex has a return type of int, and returns the token value. Values associated with the token are returned by lex in variable yylval. For example, [0-9]+ { yylval = atoi(yytext); return INTEGER; } would store the value of the integer in yylval, and return token INTEGER to yacc. The type of yylval is determined by YYSTYPE. Since the default type is integer, this works well in this case. Token values 0-255 are reserved for character values. For example, if you had a rule such as [-+] return *yytext; /* return operator */ ... definitions ... %% ... rules ... %% ... subroutines ...
  • 63. 15 the character value for minus or plus is returned. Note that we placed the minus sign first so that it wouldn’t be mistaken for a range designator. Generated token values typically start around 258, as lex reserves several values for end-of-file and error processing. Here is the complete lex input specification for our calculator: %{ #include "y.tab.h" %} %% [0-9]+ { yylval = atoi(yytext); return INTEGER; } [-+n] return *yytext; [ t] ; /* skip whitespace */ . yyerror("invalid character"); %% int yywrap(void) { return 1;
  • 64. } Internally, yacc maintains two stacks in memory; a parse stack and a value stack. The parse stack contains terminals and nonterminals, and represents the current parsing state. The value stack is an array of YYSTYPE elements, and associates a value with each element in the parse stack. For example, when lex returns an INTEGER token, yacc shifts this token to the parse stack. At the same time, the corresponding yylval is shifted to the value stack. The parse and value stacks are always synchronized, so finding a value related to a token on the stack is easily accomplished. Here is the yacc input specification for our calculator: 16 %token INTEGER %% program: program expr 'n' { printf("%dn", $2); } | ; expr: INTEGER { $$ = $1; } | expr '+' expr { $$ = $1 + $3; } | expr '-' expr { $$ = $1 - $3; } ;
  • 65. %% int yyerror(char *s) { fprintf(stderr, "%sn", s); return 0; } int main(void) { yyparse(); return 0; } The rules section resembles the BNF grammar discussed earlier. The left-hand side of a production, or nonterminal, is entered left-justified, followed by a colon. This is followed by the right-hand side of the production. Actions associated with a rule are entered in braces. By utilizing left-recursion, we have specified that a program consists of zero or more expressions. Each expression terminates with a newline. When a newline is detected, we print the value of the expression. When we apply the rule expr: expr '+' expr { $$ = $1 + $3; } we replace the right-hand side of the production in the parse stack with the left-hand side of the same production. In this case, we pop “expr '+' expr” and push “expr”. We have reduced the stack by popping three terms off the stack, and
  • 66. pushing back one term. We may reference positions in the value stack in our C code by specifying “$1” for the first term on the right-hand side of the production, “$2” for the second, and so on. “$$” designates the top of the stack after reduction has taken place. The above action adds the value associated with two expressions, pops three terms off the value stack, and pushes back a single sum. Thus, the parse and value stacks remain synchronized. 17 Numeric values are initially entered on the stack when we reduce from INTEGER to expr. After INTEGER is shifted to the stack, we apply the rule expr: INTEGER { $$ = $1; } The INTEGER token is popped off the parse stack, followed by a push of expr. For the value stack, we pop the integer value off the stack, and then push it back on again. In other words, we do nothing. In fact, this is the default action, and need not be specified. Finally, when a newline is encountered, the value associated with expr is printed. In the event of syntax errors, yacc calls the user-supplied function yyerror. If you need to modify the interface to yyerror, you can alter the canned file that yacc includes to fit your needs. The last function in our yacc specification is
  • 67. main … in case you were wondering where it was. This example still has an ambiguous grammar. Yacc will issue shift-reduce warnings, but will still process the grammar using shift as the default operation. 3.3 Practice, Part II In this section we will extend the calculator from the previous section to incorporate some new functionality. New features include arithmetic operators multiply, and divide. Parentheses may be used to over-ride operator precedence, and single-character variables may be specified in assignment statements. The following illustrates sample input and calculator output: user: 3 * (4 + 5) calc: 27 user: x = 3 * (5 + 4) user: y = 5 user: x calc: 27 user: y calc: 5 user: x + 2*y calc: 37 The lexical analyzer returns VARIABLE and INTEGER tokens. For variables, yylval specifies an index to sym, our symbol table. For this program, sym merely holds the value of the associated variable. When INTEGER tokens are returned, yylval contains the number scanned. Here is the input specification for lex:
  • 68. 18 %{ #include "y.tab.h" %} %% /* variables */ [a-z] { yylval = *yytext - 'a'; return VARIABLE; } /* integers */ [0-9]+ { yylval = atoi(yytext); return INTEGER; } /* operators */ [-+()=/*n] { return *yytext; } /* skip whitespace */ [ t] ; /* anything else is an error */ . yyerror("invalid character");
  • 69. %% int yywrap(void) { return 1; } The input specification for yacc follows. The tokens for INTEGER and VARIABLE are utilized by yacc to create #defines in y.tab.h for use in lex. This is followed by definitions for the arithmetic operators. We may specify %left, for left-associative, or %right, for right associative. The last definition listed has the highest precedence. Thus, multiplication and division have higher precedence than addition and subtraction. All four operators are left-associative. Using this simple technique, we are able to disambiguate our grammar. %token INTEGER VARIABLE %left '+' '-' %left '*' '/' %{ int sym[26]; %} 19 %%
  • 70. program: program statement 'n' | ; statement: expr { printf("%dn", $1); } | VARIABLE '=' expr { sym[$1] = $3; } ; expr: INTEGER | VARIABLE { $$ = sym[$1]; } | expr '+' expr { $$ = $1 + $3; } | expr '-' expr { $$ = $1 - $3; } | expr '*' expr { $$ = $1 * $3; } | expr '/' expr { $$ = $1 / $3; } | '(' expr ')' { $$ = $2; } ; %% int yyerror(char *s) { fprintf(stderr, "%sn", s); return 0; } int main(void) { yyparse(); return 0; }
  • 71. 20 4. Calculator 4.1 Description This version of the calculator is substantially more complex than previous versions. Major changes include control constructs such as if-else and while. In addition, a syntax tree is constructed during parsing. After parsing, we walk the syntax tree to produce output. Two versions of the tree walk routine are supplied: • an interpreter that executes statements during the tree walk, and • a compiler that generates code for a hypothetical stack-based machine. To make things more concrete, here is a sample program, x = 0; while (x < 3) { print x; x = x + 1; } with output for the interpretive version, 0 1 2 and output for the compiler version.
  • 72. push 0 pop x L000: push x push 3 compLT jz L001 push x print push x push 1 add pop x jmp L000 L001: 21 The include file contains declarations for the syntax tree and symbol table. The symbol table, sym, allows for single-character variable names. A node in the syntax tree may hold a constant (conNodeType), an identifier (idNodeType), or an internal node with an operator (oprNodeType). Union nodeType encapsulates all three variants, and nodeType.type is used to determine which structure we have. The lex input file contains patterns for VARIABLE and INTEGER tokens. In addition, tokens are defined for 2-character operators such as EQ and NE.
  • 73. Single-character operators are simply returned as themselves. The yacc input file defines YYSTYPE, the type of yylval, as %union { int iValue; /* integer value */ char sIndex; /* symbol table index */ nodeType nPtr; /* node pointer */ }; This causes the following to be generated in y.tab.h: typedef union { int iValue; /* integer value */ char sIndex; /* symbol table index */ nodeType nPtr; /* node pointer */ } YYSTYPE; extern YYSTYPE yylval; Constants, variables, and nodes can be represented by yylval in the parser’s value stack. Notice the type definitions %token <iValue> INTEGER %type <nPtr> expr This binds expr to nPtr, and INTEGER to iValue in the YYSTYPE union. This is required so that yacc can generate the correct code. For example, the rule expr: INTEGER { $$ = con($1); }
  • 74. should generate the following code. Note that yyvsp[0] addresses the top of the value stack, or the value associated with INTEGER. yylval.nPtr = con(yyvsp[0].iValue); 22 The unary minus operator is given higher priority than binary operators as follows: %left GE LE EQ NE '>' '<' %left '+' '-' %left '*' '/' %nonassoc UMINUS The %nonassoc indicates no associativity is implied. It is frequently used in conjunction with %prec to specify precedence of a rule. Thus, we have expr: '-' expr %prec UMINUS { $$ = node(UMINUS, 1, $2); } indicating that the precedence of the rule is the same as the precedence of token UMINUS. And, as defined above, UMINUS has higher precedence than the other operators. A similar technique is used to remove ambiguity associated with the if- else statement (see If-Else Ambiguity, p. 35). The syntax tree is constructed bottom-up, allocating the leaf nodes when variables and integers are reduced. When operators are encountered, a node is allocated and
  • 75. pointers to previously allocated nodes are entered as operands. As statements are reduced, ex is called to do a depth-first walk of the syntax tree. Since the tree was constructed bottom-up, a depth-first walk visits nodes in the order that they were originally allocated. This results in operators being applied in the order that they were encountered during parsing. Two versions of ex, an interpretive version, and a compiler version, are included. 23 4.2 Include File typedef enum { typeCon, typeId, typeOpr } nodeEnum; /* constants */ typedef struct { nodeEnum type; /* type of node */ int value; /* value of constant */ } conNodeType; /* identifiers */ typedef struct { nodeEnum type; /* type of node */ int i; /* subscript to ident array */ } idNodeType;
  • 76. /* operators */ typedef struct { nodeEnum type; /* type of node */ int oper; /* operator */ int nops; /* number of operands */ union nodeTypeTag *op[1]; /* operands (expandable) */ } oprNodeType; typedef union nodeTypeTag { nodeEnum type; /* type of node */ conNodeType con; /* constants */ idNodeType id; /* identifiers */ oprNodeType opr; /* operators */ } nodeType; extern int sym[26]; 24 4.3 Lex Input %{ #include <stdlib.h> #include "calc3.h" #include "y.tab.h" %} %% [a-z] { yylval.sIndex = *yytext - 'a';
  • 77. return VARIABLE; } [0-9]+ { yylval.iValue = atoi(yytext); return INTEGER; } [-()<>=+*/;{}.] { return *yytext; } ">=" return GE; "<=" return LE; "==" return EQ; "!=" return NE; "while" return WHILE; "if" return IF; "else" return ELSE; "print" return PRINT; [ tn+] ; /* ignore whitespace */ . yyerror("Unknown character"); %% int yywrap(void) { return 1; } 25
  • 78. 4.4 Yacc Input %{ #include <stdio.h> #include <stdlib.h> #include <stdarg.h> #include "calc3.h" /* prototypes */ nodeType *opr(int oper, int nops, ...); nodeType *id(int i); nodeType *con(int value); void freeNode(nodeType *p); void yyerror(char *s); int sym[26]; /* symbol table */ %} %union { int iValue; /* integer value */ char sIndex; /* symbol table index */ nodeType *nPtr; /* node pointer */ }; %token <iValue> INTEGER %token <sIndex> VARIABLE %token WHILE IF PRINT %nonassoc IFX %nonassoc ELSE %left GE LE EQ NE '>' '<' %left '+' '-' %left '*' '/' %nonassoc UMINUS
  • 79. %type <nPtr> stmt expr stmt_list %% 26 program: function '.' { exit(0); } ; function: function stmt { ex($2); freeNode($2); } | /* NULL */ ; stmt: ';' { $$ = opr(';', 2, NULL, NULL); } | expr ';' { $$ = $1; } | PRINT expr ';' { $$ = opr(PRINT, 1, $2); } | VARIABLE '=' expr ';' { $$ = opr('=', 2, id($1), $3); } | WHILE '(' expr ')' stmt { $$ = opr(WHILE, 2, $3, $5); } | IF '(' expr ')' stmt %prec IFX { $$ = opr(IF, 2, $3, $5); } | IF '(' expr ')' stmt ELSE stmt { $$ = opr(IF, 3, $3, $5, $7); } | '{' stmt_list '}' { $$ = $2; } ;
  • 80. stmt_list: stmt { $$ = $1; } | stmt_list stmt { $$ = opr(';', 2, $1, $2); } ; expr: INTEGER { $$ = con($1); } | VARIABLE { $$ = id($1); } | '-' expr %prec UMINUS { $$ = opr(UMINUS, 1, $2); } | expr '+' expr { $$ = opr('+', 2, $1, $3); } | expr '-' expr { $$ = opr('-', 2, $1, $3); } | expr '*' expr { $$ = opr('*', 2, $1, $3); } | expr '/' expr { $$ = opr('/', 2, $1, $3); } | expr '<' expr { $$ = opr('<', 2, $1, $3); } | expr '>' expr { $$ = opr('>', 2, $1, $3); } | expr GE expr { $$ = opr(GE, 2, $1, $3); } | expr LE expr { $$ = opr(LE, 2, $1, $3); } | expr NE expr { $$ = opr(NE, 2, $1, $3); } | expr EQ expr { $$ = opr(EQ, 2, $1, $3); } | '(' expr ')' { $$ = $2; } ; %% 27 nodeType *con(int value) { nodeType *p; /* allocate node */ if ((p = malloc(sizeof(conNodeType))) == NULL)
  • 81. yyerror("out of memory"); /* copy information */ p->type = typeCon; p->con.value = value; return p; } nodeType *id(int i) { nodeType *p; /* allocate node */ if ((p = malloc(sizeof(idNodeType))) == NULL) yyerror("out of memory"); /* copy information */ p->type = typeId; p->id.i = i; return p; } nodeType *opr(int oper, int nops, ...) { va_list ap; nodeType *p; size_t size; int i; /* allocate node */ size = sizeof(oprNodeType) + (nops - 1) * sizeof(nodeType*); if ((p = malloc(size)) == NULL) yyerror("out of memory");
  • 82. /* copy information */ p->type = typeOpr; p->opr.oper = oper; p->opr.nops = nops; va_start(ap, nops); for (i = 0; i < nops; i++) p->opr.op[i] = va_arg(ap, nodeType*); va_end(ap); return p; } 28 void freeNode(nodeType *p) { int i; if (!p) return; if (p->type == typeOpr) { for (i = 0; i < p->opr.nops; i++) freeNode(p->opr.op[i]); } free (p); } void yyerror(char *s) { fprintf(stdout, "%sn", s); }
  • 83. int main(void) { yyparse(); return 0; } 29 4.5 Interpreter #include <stdio.h> #include "calc3.h" #include "y.tab.h" int ex(nodeType *p) { if (!p) return 0; switch(p->type) { case typeCon: return p->con.value; case typeId: return sym[p->id.i]; case typeOpr: switch(p->opr.oper) { case WHILE: while(ex(p->opr.op[0])) ex(p->opr.op[1]); return 0; case IF: if (ex(p->opr.op[0])) ex(p->opr.op[1]); else if (p->opr.nops > 2) ex(p->opr.op[2]);
  • 84. return 0; case PRINT: printf("%dn", ex(p->opr.op[0])); return 0; case ';': ex(p->opr.op[0]); return ex(p->opr.op[1]); case '=': return sym[p->opr.op[0]->id.i] = ex(p->opr.op[1]); case UMINUS: return -ex(p->opr.op[0]); case '+': return ex(p->opr.op[0]) + ex(p->opr.op[1]); case '-': return ex(p->opr.op[0]) - ex(p->opr.op[1]); case '*': return ex(p->opr.op[0]) * ex(p->opr.op[1]); case '/': return ex(p->opr.op[0]) / ex(p->opr.op[1]); case '<': return ex(p->opr.op[0]) < ex(p->opr.op[1]); case '>': return ex(p->opr.op[0]) > ex(p->opr.op[1]); case GE: return ex(p->opr.op[0]) >= ex(p->opr.op[1]); case LE: return ex(p->opr.op[0]) <= ex(p->opr.op[1]); case NE: return ex(p->opr.op[0]) != ex(p->opr.op[1]); case EQ: return ex(p->opr.op[0]) == ex(p->opr.op[1]); } } } 30 4.6 Compiler #include <stdio.h> #include "calc3.h" #include "y.tab.h"
  • 85. static int lbl; int ex(nodeType *p) { int lbl1, lbl2; if (!p) return 0; switch(p->type) { case typeCon: printf("tpusht%dn", p->con.value); break; case typeId: printf("tpusht%cn", p->id.i + 'a'); break; case typeOpr: switch(p->opr.oper) { case WHILE: printf("L%03d:n", lbl1 = lbl++); ex(p->opr.op[0]); printf("tjztL%03dn", lbl2 = lbl++); ex(p->opr.op[1]); printf("tjmptL%03dn", lbl1); printf("L%03d:n", lbl2); break; case IF: ex(p->opr.op[0]); if (p->opr.nops > 2) { /* if else */ printf("tjztL%03dn", lbl1 = lbl++); ex(p->opr.op[1]); printf("tjmptL%03dn", lbl2 = lbl++);
  • 86. printf("L%03d:n", lbl1); ex(p->opr.op[2]); printf("L%03d:n", lbl2); } else { /* if */ printf("tjztL%03dn", lbl1 = lbl++); ex(p->opr.op[1]); printf("L%03d:n", lbl1); } break; 31 case PRINT: ex(p->opr.op[0]); printf("tprintn"); break; case '=': ex(p->opr.op[1]); printf("tpopt%cn", p->opr.op[0]->id.i + 'a'); break; case UMINUS: ex(p->opr.op[0]); printf("tnegn"); break; default: ex(p->opr.op[0]); ex(p->opr.op[1]); switch(p->opr.oper) {
  • 87. case '+': printf("taddn"); break; case '-': printf("tsubn"); break; case '*': printf("tmuln"); break; case '/': printf("tdivn"); break; case '<': printf("tcompLTn"); break; case '>': printf("tcompGTn"); break; case GE: printf("tcompGEn"); break; case LE: printf("tcompLEn"); break; case NE: printf("tcompNEn"); break; case EQ: printf("tcompEQn"); break; } } } } 32 5. More Lex 5.1 Strings Quoted strings frequently appear in programming languages. Here is one way to match a string in lex: %{ char *yylval; #include <string.h> %} %% "[^"n]*["n] {
  • 88. yylval = strdup(yytext+1); if (yylval[yyleng-2] != '"') warning("improperly terminated string"); else yylval[yyleng-2] = 0; printf("found '%s'n", yylval); } The above example ensures that strings don’t cross line boundaries, and removes enclosing quotes. If we wish to add escape sequences, such as n or ", start states simplify matters: %{ char buf[100]; char *s; %} %x STRING %% " { BEGIN STRING; s = buf; } <STRING>n { *s++ = 'n'; } <STRING>t { *s++ = 't'; } <STRING>" { *s++ = '"'; } <STRING>" { *s = 0; BEGIN 0; printf("found '%s'n", buf); }
  • 89. <STRING>n { printf("invalid string"); exit(1); } <STRING>. { *s++ = *yytext; } Exclusive start state STRING is defined in the definition section. When the scanner detects a quote, the BEGIN macro shifts lex into the STRING state. Lex stays in the STRING state, recognizing only patterns that begin with <STRING>, until another BEGIN 33 is executed. Thus, we have a mini-environment for scanning strings. When the trailing quote is recognized, we switch back to state 0, the initial state. 5.2 Reserved Words If your program has a large collection of reserved words, it is more efficient to let lex simply match a string, and determine in your own code whether it is a variable or reserved word. For example, instead of coding "if" return IF; "then" return THEN; "else" return ELSE; {letter}({letter}|{digit})* { yylval.id = symLookup(yytext); return IDENTIFIER; } where symLookup returns an index into the symbol table, it is
  • 90. better to detect reserved words and identifiers simultaneously, as follows: {letter}({letter}|{digit})* { int i; if ((i = resWord(yytext)) != 0) return (i); yylval.id = symLookup(yytext); return (IDENTIFIER); } This technique significantly reduces the number of states required, and results in smaller scanner tables. 5.3 Debugging Lex Lex has facilities that enable debugging. This feature may vary with different versions of lex, so you should consult documentation for details. The code generated by lex in file lex.yy.c includes debugging statements that are enabled by specifying command-line option “-d”. Debug output may be toggled on and off by setting yy_flex_debug. Output includes the rule applied and corresponding matched text. If you’re running lex and yacc together, specify the following in your yacc input file: extern int yy_flex_debug; int main(void) { yy_flex_debug = 1; yyparse();
  • 91. } 34 Alternatively, you may write your own debug code by defining functions that display information for the token value, and each variant of the yylval union. This is illustrated in the following example. When DEBUG is defined, the debug functions take effect, and a trace of tokens and associated values is displayed. %union { int ivalue; ... }; %{ #ifdef DEBUG int dbgToken(int tok, char *s) { printf("token %sn", s); return tok; } int dbgTokenIvalue(int tok, char *s) { printf("token %s (%d)n", s, yylval.ivalue); return tok; } #define RETURN(x) return dbgToken(x, #x)
  • 92. #define RETURN_ivalue(x) return dbgTokenIvalue(x, #x) #else #define RETURN(x) return(x) #define RETURN_ivalue(x) return(x) #endif %} %% [0-9]+ { yylval.ivalue = atoi(yytext); RETURN_ivalue(INTEGER); } "if" RETURN(IF); "else" RETURN(ELSE); 35 6. More Yacc 6.1 Recursion When specifying a list, we may do so using left recursion, list: item | list ',' item ; or right recursion:
  • 93. list: item | item ',' list If right recursion is used, all items on the list are pushed on the stack. After the last item is pushed, we start reducing. With left recursion, we never have more than three terms on the stack, since we reduce as we go along. For this reason, it is advantageous to use left recursion. 6.2 If-Else Ambiguity A shift-reduce conflict that frequently occurs involves the if- else construct. Assume we have the following rules: stmt: IF expr stmt | IF expr stmt ELSE stmt ... and the following state: IF expr stmt IF expr stmt . ELSE stmt We need to decide if we should shift the ELSE, or reduce the IF expr stmt at the top of the stack. If we shift, then we have IF expr stmt IF expr stmt . ELSE stmt IF expr stmt IF expr stmt ELSE . stmt IF expr stmt IF expr stmt ELSE stmt . IF expr stmt stmt . where the second ELSE is paired with the second IF. If we
  • 94. reduce, we have 36 IF expr stmt IF expr stmt . ELSE stmt IF expr stmt stmt . ELSE stmt IF expr stmt . ELSE stmt IF expr stmt ELSE . stmt IF expr stmt ELSE stmt . where the second ELSE is paired with the first IF. Modern programming languages pair an ELSE with the most recent unpaired IF, so the former behavior is expected. This works well with yacc, since default behavior, when a shift- reduce conflict is encountered, is to shift. Although yacc does the right thing, it also issues a shift-reduce warning message. To remove the message, give IF-ELSE a higher precedence than the simple IF statement: %nonassoc IFX %nonassoc ELSE stmt: IF expr stmt %prec IFX | IF expr stmt ELSE stmt 6.3 Error Messages A nice compiler gives the user meaningful error messages. For example, not much information is conveyed by the following message:
  • 95. syntax error If we track the line number in lex, then we can at least give the user a line number: void yyerror(char *s) { fprintf(stderr, "line %d: %sn", yylineno, s); } When yacc discovers a parsing error, default action is to call yyerror, and then return from yylex with a return value of one. A more graceful action flushes the input stream to a statement delimiter, and continues to scan: stmt: ';' | expr ';' | PRINT expr ';' | VARIABLE '=' expr '; | WHILE '(' expr ')' stmt | IF '(' expr ')' stmt %prec IFX | IF '(' expr ')' stmt ELSE stmt | '{' stmt_list '}' | error ';' | error '}' ; 37 The error token is a special feature of yacc that will match all input until the token
  • 96. following error is found. For this example, when yacc detects an error in a statement it will call yyerror, flush input up to the next semicolon or brace, and resume scanning. 6.4 Inherited Attributes The examples so far have used synthesized attributes. At any point in a syntax tree we can determine the attributes of a node based on the attributes of its children. Consider the rule expr: expr '+' expr { $$ = $1 + $3; } Since we are parsing bottom-up, the values of both operands are available, and we can determine the value associated with the left-hand side. An inherited attribute of a node depends on the value of a parent or sibling node. The following grammar defines a C variable declaration: decl: type varlist type: INT | FLOAT varlist: VAR { setType($1, $0); } | varlist ',' VAR { setType($3, $0); } Here is a sample parse: . INT VAR INT . VAR type . VAR type VAR . type varlist . decl .
  • 97. When we reduce VAR to varlist, we should annotate the symbol table with the type of the variable. However, the type is buried in the stack. This problem is resolved by indexing back into the stack. Recall that $1 designates the first term on the right-hand side. We can index backwards, using $0, $-1, and so on. In this case, $0 will do just fine. If you need to specify a token type, the syntax is $<tokentype>0, angle brackets included. In this particular example, care must be taken to ensure that type always precedes varlist. 6.5 Embedded Actions Rules in yacc may contain embedded actions: list: item1 { do_item1($1); } item2 { do_item2($3); } item3 Note that the actions take a slot in the stack, so do_item2 must use $3 to reference item2. Actually, this grammar is transformed by yacc into the following: 38 list: item1 _rule01 item2 _rule02 item3 _rule01: { do_item1($0); } _rule02: { do_item2($0); } 6.6 Debugging Yacc Yacc has facilities that enable debugging. This feature may vary with different versions
  • 98. of yacc, so you should consult documentation for details. The code generated by yacc in file y.tab.c includes debugging statements that are enabled by defining YYDEBUG and setting it to a non-zero value. This may also be done by specifying command-line option “-t”. With YYDEBUG properly set, debug output may be toggled on and off by setting yydebug. Output includes tokens scanned and shift/reduce actions. %{ #define YYDEBUG 1 %} %% ... %% int main(void) { #if YYDEBUG yydebug = 1; #endif yylex(); } In addition, you can dump the parse states by specifying command-line option "-v". States are dumped to file y.output, and are often useful when debugging a grammar. Alternatively, you can write your own debug code by defining a TRACE macro, as illustrated below. When DEBUG is defined, a trace of reductions, by line number, is displayed.
  • 99. %{ #ifdef DEBUG #define TRACE printf("reduce at line %dn", __LINE__); #else #define TRACE #endif %} %% statement_list: statement { TRACE $$ = $1; } | statement_list statement { TRACE $$ = newNode(';', 2, $1, $2); } ; 39 7. Bibliography Aho, Alfred V., Ravi Sethi and Jeffrey D. Ullman [1986]. Compilers, Prinicples, Techniques and Tools. Addison-Wesley, Reading, Massachusetts. Gardner, Jim, Chris Retterath and Eric Gisin [1988]. MKS Lex & Yacc. Mortice Kern Systems Inc., Waterloo, Ontario, Canada. Johnson, Stephen C. [1975]. Yacc: Yet Another Compiler Compiler. Computing Science
  • 100. Technical Report No. 32, Bell Laboratories, Murray hill, New Jersey. Lesk, M. E. and E. Schmidt [1975]. Lex – A Lexical Analyzer Generator. Computing Science Technical Report No. 39, Bell Laboratories, Murray Hill, New Jersey. Levine, John R., Tony Mason and Doug Brown [1992]. Lex & Yacc. O’Reilly & Associates, Inc. Sebastopol, California. http://www.amazon.com/exec/obidos/ISBN=0201100886/none01 A/ http://www.amazon.com/exec/obidos/ISBN=0201100886/none01 A/ http://members.xoom.com/thomasn/y_yacc.pdf http://members.xoom.com/thomasn/y_lex.pdf http://www.amazon.com/exec/obidos/ISBN=1565920007/none01 A/PrefaceIntroductionLexTheoryPracticeYaccTheoryPractice, Part IPractice, Part IICalculatorDescriptionInclude FileLex InputYacc InputInterpreterCompilerMore LexStringsReserved WordsDebugging LexMore YaccRecursionIf-Else AmbiguityError MessagesInherited AttributesEmbedded ActionsDebugging YaccBibliography cmparser.y %{ #include <stdio.h> #include <string.h> #include <stdlib.h> #include "ast.h" #include "symbolTable.h" #include "util.h"
  • 101. /* other external function prototypes */ extern int yylex(); extern int initLex(int , char **); extern AstNodePtr new_Node(NodeKind kind); /* external global variables */ extern int yydebug; extern int yylineno; extern SymbolTableStackEntryPtr symbolStackTop; extern int scopeDepth; /* function prototypes */ void yyerror(const char *); /* global variables */ AstNodePtr program; FILE *outfile; %} /* YYSTYPE */ %union { AstNodePtr nodePtr; int iVal; char *cVal; Type *type; } /* terminals */
  • 102. %token TOK_ELSE TOK_IF TOK_RETURN TOK_VOID TOK_INT TOK_WHILE %token TOK_PLUS TOK_MINUS TOK_MULT TOK_DIV TOK_LT TOK_LE TOK_GT TOK_GE TOK_EQ TOK_NE TOK_ASSIGN TOK_SEMI TOK_COMMA %token TOK_LPAREN TOK_RPAREN TOK_LSQ TOK_RSQ TOK_LBRACE TOK_RBRACE TOK_ERROR %token <cVal> TOK_ID %token <iVal> TOK_NUM %type <nodePtr> Declarations Functions Fun_Declaration %type <type> Type_Specifier %type <nodePtr> Compound_Stmt Statements Statement %type <nodePtr> Expr_Statement If_Else_Statement Selection_Stmt Iteration_Stmt Return_Stmt %type <nodePtr> Expression Simple_Expression Additive_Expression Factor Var Call Term %type <nodePtr> Args Args_List %type <nodePtr> Params Param_List Param /* associativity and precedence */ %nonassoc TOK_IF %nonassoc TOK_ELSE %right TOK_ASSIGN %left TOK_EQ TOK_NE %nonassoc TOK_LT TOK_GT TOK_LE TOK_GE %left TOK_PLUS TOK_SUB %left TOK_MULT TOK_DIV %nonassoc error %% Start : Declarations {}
  • 103. ; Declarations : Functions { program = $1;} | Var_Declaration Declarations { } ; Functions : Fun_Declaration { $$ = $1 } | Fun_Declaration Functions { $1->sibling = $2; $$ = $1 } ; Var_Declaration : Type_Specifier TOK_ID TOK_SEMI { if ($1->kind != INT){ yyerror("non-int variable declaredn"); yyerror($2); } ElementPtr symElement = symLookup($2); if(!symElement || scopeDepth != symElement->scope) symInsert($2,$1,yylineno); else { yyerror("Redeclaration of variable"); yyerror($2); free($1); } } | Type_Specifier TOK_ID TOK_LSQ TOK_NUM TOK_RSQ TOK_SEMI { if ($1->kind
  • 104. != INT){ yyerror("non- int variable declaredn"); yyerror($2); } ElementPtr symElement = symLookup($2); if(!symElement || scopeDepth != symElement->scope) { /* reuse Type for storing dimension */ $1->kind = ARRAY; $1- >dimension = $4; $1->function = NULL; symInsert($2,$1,yylineno); } else { yyerror("Redeclaration of variable"); yyerror($2); free($1); } } ; Fun_Declaration : Type_Specifier TOK_ID TOK_LPAREN { $<nodePtr>$ = new_Node(METHOD); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->nType = $1;
  • 105. ElementPtr symElement = symLookup($2); if(!symElement || scopeDepth != symElement->scope) { $<nodePtr>$- >nSymbolPtr = symInsert($2,$1,yylineno); $<nodePtr>$- >nSymbolPtr->stype->function = (Type*) malloc(sizeof(Type)); $<nodePtr>$- >nSymbolPtr->stype->function->kind = $1->kind; $<nodePtr>$- >nSymbolPtr->snode = $<nodePtr>$; $1->kind = FUNCTION; enterScope(); } else { yyerror("Redeclaration of function"); yyerror($2); free($1); } } Params TOK_RPAREN { $<nodePtr>4->children[0] = $5; } Compound_Stmt { $<nodePtr>4->children[1] = $8; $$ = $<nodePtr>4; leaveScope();
  • 106. } ; Params : Param_List {$$ = $1} | TOK_VOID {$$=NULL;} ; Param_List : Param_List TOK_COMMA Param { $<nodePtr>$ = $1; while($<nodePtr>$->sibling) $<nodePtr>$ = $<nodePtr>$->sibling; $<nodePtr>$->sibling = $3; $3->sibling = NULL; $$ = $1; } | Param { $1->sibling = NULL; $$ = $1; } ; Param : Type_Specifier TOK_ID { ElementPtr symElement = symLookup($2); if(!symElement || scopeDepth != symElement->scope) { $$ = new_Node(FORMALVAR); $$->nLinenumber = yylineno; if ($1->kind != INT){ yyerror("non-int variable declaredn"); } else{
  • 107. $$->nSymbolPtr = symInsert($2,$1,yylineno); $$->nType = $1; } } else { yyerror("Redeclaration of variable"); yyerror($2); free($1); } } | Type_Specifier TOK_ID TOK_LSQ TOK_RSQ { ElementPtr symElement = symLookup($2); if(!symElement || scopeDepth != symElement->scope) { Type *typ; if ($1->kind != INT){ yyerror("non-int array declaredn"); } typ = new_type(ARRAY); $$ = new_Node(FORMALVAR); $$->nLinenumber = yylineno; $$->nSymbolPtr = symInsert($2,typ,yylineno); $$->nType = typ; free($1); } else
  • 108. { yyerror("Redeclaration of variablen"); yyerror($2); free($1); } } ; Type_Specifier : TOK_INT { $$ = (Type*) new_type(INT); } | TOK_VOID { $$ = (Type*) new_type(VOID); } ; Compound_Stmt : TOK_LBRACE { enterScope(); $<nodePtr>$ = new_StmtNode(COMPOUND_STMT); $<nodePtr>$->nSymbolTabPtr = symbolStackTop->symbolTablePtr; $<nodePtr>$->nLinenumber = yylineno; } Statements { $<nodePtr>2->children[0] = $3; } // shud point to first statement TOK_RBRACE { leaveScope(); $$ = $<nodePtr>2; } | TOK_LBRACE { enterScope(); $<nodePtr>$ =
  • 109. new_StmtNode(COMPOUND_STMT); $<nodePtr>$->nSymbolTabPtr = symbolStackTop->symbolTablePtr; $<nodePtr>$->nLinenumber = yylineno; } Local_Declarations Statements { $<nodePtr>2- >children[0] = $4;} // shud point to first statement TOK_RBRACE { leaveScope(); $$ = $<nodePtr>2; } ; Local_Declarations : Var_Declaration Local_Declarations {} | Var_Declaration {} ; Statements : Statement Statements { $1->sibling = $2; $$ = $1; } | {$$=NULL} ; Statement : Expr_Statement { $$ = $1; } | Compound_Stmt { $$ = $1; } | Selection_Stmt { $$ = $1; } | Iteration_Stmt { $$ = $1; } | Return_Stmt { $$ = $1; }
  • 110. ; Expr_Statement : Expression TOK_SEMI { $$ = new_StmtNode(EXPRESSION_STMT); $$->nLinenumber = yylineno; $$->children[0] = $1; } | TOK_SEMI { $$ = new_StmtNode(EXPRESSION_STMT) ; $$->nLinenumber = yylineno; $$->children[0] = NULL; } ; Selection_Stmt : If_Else_Statement %prec TOK_IF { $$ = $1; } | If_Else_Statement TOK_ELSE Statement { $$ = $1; $$->children[2]= $3; } ; If_Else_Statement : TOK_IF TOK_LPAREN Expression { $<nodePtr>$ = new_StmtNode(IF_THEN_ELSE_STMT); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->children[0] = $3; } TOK_RPAREN Statement { $<nodePtr>4->children[1] = $6;
  • 111. $$ = $<nodePtr>4; } ; Iteration_Stmt : TOK_WHILE TOK_LPAREN Expression { $<nodePtr>$ = new_StmtNode(WHILE_STMT); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->children[0] = $3; } TOK_RPAREN Statement { $<nodePtr>4->children[1] = $6; $$ = $<nodePtr>4; $<nodePtr>$->nLinenumber = yylineno; } ; Return_Stmt : TOK_RETURN Expression TOK_SEMI { $$ = new_StmtNode(RETURN_STMT); $$->nLinenumber = yylineno; $$->children[0] = $2; } | TOK_RETURN TOK_SEMI { $$ = new_StmtNode(RETURN_STMT); $$->nLinenumber = yylineno; $$->children[0] = NULL; } ;
  • 112. Expression : Var TOK_ASSIGN { $<nodePtr>$ = new_ExprNode(ASSI_EXP); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->children[0] = $1; } Expression { $<nodePtr>3->children[1] = $4; $$ = $<nodePtr>3; } | Simple_Expression { $$ = $1; } ; Var : TOK_ID { $$ = new_ExprNode(VAR_EXP); $$->nLinenumber = yylineno; $$->nSymbolPtr = symLookup($1); if(!$$->nSymbolPtr) { yyerror("Variable must be declared before use"); yyerror($1); free($$); } } | TOK_ID TOK_LSQ Expression TOK_RSQ { $$ = new_ExprNode(ARRAY_EXP); $$->nLinenumber = yylineno; $$->nSymbolPtr = symLookup($1); if(!$$->nSymbolPtr) {
  • 113. yyerror("Variable must be declared before use"); yyerror($1); free($$); } else $$->children[0] = $3; } ; Simple_Expression : Additive_Expression TOK_GT { $<nodePtr>$ = new_ExprNode(GT_EXP); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->children[0] = $1; } Additive_Expression { $<nodePtr>3->children[1] = $4; $$ = $<nodePtr>3; } | Additive_Expression TOK_LT { $<nodePtr>$ = new_ExprNode(LT_EXP); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->children[0] = $1; } Additive_Expression { $<nodePtr>3->children[1] = $4; $$ = $<nodePtr>3; } | Additive_Expression TOK_GE {
  • 114. $<nodePtr>$ = new_ExprNode(GE_EXP); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->children[0] = $1; } Additive_Expression { $<nodePtr>3->children[1] = $4; $$ = $<nodePtr>3; } | Additive_Expression TOK_LE { $<nodePtr>$ = new_ExprNode(LE_EXP); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->children[0] = $1; } Additive_Expression { $<nodePtr>3->children[1] = $4; $$ = $<nodePtr>3; } | Additive_Expression TOK_EQ { $<nodePtr>$ = new_ExprNode(EQ_EXP); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->children[0] = $1; } Additive_Expression { $<nodePtr>3->children[1] = $4; $$ = $<nodePtr>3;
  • 115. } | Additive_Expression TOK_NE { $<nodePtr>$ = new_ExprNode(NE_EXP); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->children[0] = $1; } Additive_Expression { $<nodePtr>3->children[1] = $4; $$ = $<nodePtr>3; } | Additive_Expression { $$ = $1 } ; Additive_Expression : Additive_Expression TOK_PLUS { $<nodePtr>$ = new_ExprNode(ADD_EXP); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->children[0] = $1; } Term { $<nodePtr>3->children[1] = $4; $$ = $<nodePtr>3; } | Additive_Expression TOK_MINUS { $<nodePtr>$ = new_ExprNode(SUB_EXP); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->children[0] =
  • 116. $1; } Term { $<nodePtr>3->children[1] = $4; $$ = $<nodePtr>3; } | Term { $$ = $1; } ; Term : Term TOK_MULT { $<nodePtr>$ = new_ExprNode(MULT_EXP); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->children[0] = $1; } Factor { $<nodePtr>3->children[1] = $4; $$ = $<nodePtr>3; } | Term TOK_DIV { $<nodePtr>$ = new_ExprNode(DIV_EXP); $<nodePtr>$->nLinenumber = yylineno; $<nodePtr>$->children[0] = $1; } Factor { $<nodePtr>3->children[1] = $4; $$ = $<nodePtr>3; } | Factor { $$ = $1; } ;
  • 117. Factor : TOK_LPAREN Expression TOK_RPAREN { $$ = $2; } | Var { $$ = $1 } | Call { $$ = $1 } | TOK_NUM { $$ = new_ExprNode(CONST_EXP); $$->nLinenumber = yylineno; $$->nValue = $1; } ; Call : TOK_ID TOK_LPAREN Args TOK_RPAREN { $$= new_ExprNode(CALL_EXP); $$->nLinenumber = yylineno; $$->nSymbolPtr = symLookup($1); if(!$$->nSymbolPtr) { printf("function %s not found, forward referencen", $1); $$->fname = strdup($1); } $$->children[0] = $3; } ; Args : Args_List {$$=$1;} | { $$ = NULL; } ; Args_List : Args_List TOK_COMMA Expression { $<nodePtr>$ = $1; while($<nodePtr>$->sibling)
  • 118. $<nodePtr>$ = $<nodePtr>$->sibling; $<nodePtr>$->sibling = $3; $3->sibling = NULL; $$ = $1; } | Expression { $1->sibling = NULL; $$ = $1; } ; %% void yyerror (char const *s) { fprintf (stderr, "Error at line %d: %sn", yylineno, s); exit(1); } int main(int argc, char **argv){ initLex(argc,argv); #ifdef YYLLEXER while (gettok() !=0) ; //gettok returns 0 on EOF return; #else initSymbolTable(); yyparse(); #endif }
  • 119. ast.c #include "symbolTable.h" #include "ast.h" extern SymbolTableStackEntryPtr symbolStackTop; extern int scopeDepth; //creates a new expression node AstNodePtr new_ExprNode(ExpKind kind) { AstNodePtr node = (AstNodePtr) malloc(sizeof(AstNode)); node->nKind = EXPRESSION; node->eKind = kind; return node; } //creates a new statement node AstNodePtr new_StmtNode(StmtKind kind) { AstNodePtr node = (AstNodePtr) malloc(sizeof(AstNode)); node->nKind = STMT; node->sKind = kind; return node; } AstNodePtr new_Node(NodeKind kind) { AstNodePtr node = (AstNodePtr) malloc(sizeof(AstNode)); node->nKind = kind; return node; } //creates a new type node for entry into symbol table Type* new_type(TypeKind kind) {
  • 120. Type *typ1 = (Type *)malloc(sizeof(Type)); typ1->kind = kind; return typ1; }