The document describes a C program that lexically analyzes code and outputs token information. It currently identifies identifiers, operators, and parentheses. To also output keywords, the code would need to check for keyword matches after identifying identifiers and output the token as KEYWORD instead of IDENT if there is a match.
I have a c lexer program that outputs the specific type of token as .pdf
1. I have a c lexer program that outputs the specific type of token as a string using the expression
The existing code outputs identifiers and operators and parenthesis. What modifications need to
be made to the existing code to output keywords as well?
Example:
Next token is: KEYWORD, Next lexeme is int
Next token is: IDENT, Next lexeme is sum
Next token is: EQUAL_OP, Next lexeme is =
Next token is: INTEGER, Next lexeme is 3
Etc...
Code:
#include
#include
#include
/* Global declarations */
/* Variables */
int charClass;
char lexeme[100];
char nextChar;
int lexLen;
int nextToken;
FILE* in_fp;
/* Function declarations */
void addChar();
void getChar();
void getNonBlank();
int lex();
int lookup(char ch);
/* Character classes */
#define LETTER 0
#define DIGIT 1
#define UNKNOWN 99
/* Token codes */
#define INT_LIT 10
#define IDENT 11
#define ASSIGN_OP 20
6. break;
default:
addChar();
nextToken = EOF;
break;
}
return nextToken;
}
/* addChar - a function to add nextChar to lexeme */
void addChar() {
if (lexLen <= 98) {
lexeme[lexLen++] = nextChar;
lexeme[lexLen] = 0;
}
else {
printf("Error - lexeme is too long n");
}
}
/* getChar - a function to get the next character of input and determine its character class */
void getChar() {
if ((nextChar = getc(in_fp)) != EOF) {
if (isalpha(nextChar)) {
charClass = LETTER;
}
else if (isdigit(nextChar)) {
charClass = DIGIT;
}
else {
charClass = UNKNOWN;
}
}
else {
charClass = EOF;
}
}
/* getNonBlank - a function to call getChar until it returns a non-whitespace character */
7. void getNonBlank() {
while (isspace(nextChar)) {
getChar();
}
}
int lex() {
lexLen = 0;
getNonBlank();
switch (charClass) {
/* Parse identifiers */
case LETTER:
addChar();
getChar();
while (charClass == LETTER || charClass == DIGIT) {
addChar();
getChar();
}
nextToken = IDENT;
printf("Next token is: IDENT, Next lexeme is %sn", lexeme);
break;
/* Parse integer literals */
case DIGIT:
addChar();
getChar();
while (charClass == DIGIT) {
addChar();
getChar();
}
nextToken = INT_LIT;
printf("Next token is: INT_LIT, Next lexeme is %sn", lexeme);
break;
/* Parentheses and operators */
case UNKNOWN:
lookup(nextChar);
8. getChar();
printf("Next token is: ");
switch (nextToken) {
case LEFT_PAREN:
printf("LEFT_PAREN");
break;
case RIGHT_PAREN:
printf("RIGHT_PAREN");
break;
case ADD_OP:
printf("ADD_OP");
break;
case SUB_OP:
printf("SUB_OP");
break;
case MULT_OP:
printf("MULT_OP");
break;
case DIV_OP:
printf("DIV_OP");
break;
case LESSER_OP:
printf("LESSER_OP");
break;
case GREATER_OP:
printf("GREATER_OP");
break;
case EQUAL_OP:
printf("EQUAL_OP");
break;
case NOT_EQUAL_OP:
printf("NOT_EQUAL_OP");
break;
case LESSER_EQUAL_OP:
printf("LESSER_EQUAL_OP");
break;
9. case GREATER_EQUAL_OP:
printf("GREATER_EQUAL_OP");
break;
case AND_OP:
printf("AND_OP");
break;
case OR_OP:
printf("OR_OP");
break;
case NOT_OP:
printf("NOT_OP");
break;
default:
printf("UNKNOWN");
break;
}
printf(", Next lexeme is %sn", lexeme);
break;
/* End of input */
case EOF:
nextToken = EOF;
lexeme[0] = 'E';
lexeme[1] = 'O';
lexeme[2] = 'F';
lexeme[3] = 0;
printf("Next token is: EOFn");
break;
}
return nextToken;
}