import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* The Token set as an Enum type. This is just "binds" each token to its list of
* valid lexemes.
*
*
*/
public enum TOKEN {
ARTICLE("a", "the"), // a list of articles
NOUN("dog", "cat", "rat"), // a list of nouns
VERB("loves", "hates", "eats"), // a list of verbs
UNKNOWN(); // keep our lexemes "type-safe"!
//
// The lexemes under this token
private List<String> lexemeList;
// Construct the token with the list of lexems
private TOKEN(String... tokenStrings) {
lexemeList = new ArrayList<>(tokenStrings.length);
lexemeList.addAll(Arrays.asList(tokenStrings));
}
// Gets a token from a lexeme
public static TOKEN fromLexeme(String str) {
// Search through the lexemes looking for a match.
for (TOKEN t : TOKEN.values()) {
if (t.lexemeList.contains(str)) {
return t;
}
}
// If nothing matches then return UNKNOWN.
return UNKNOWN;
}
}
/**
* Programming Languages: Implementation and Design.
*
* A Simple Compiler Adapted from Sebesta (2010) by Josh Dehlinger further modified by Adam Conover
* (2012-2015)
*
* A simple compiler used for the simple English grammar in Section 2.2 of Adam Brooks Weber's
* "Modern Programming Languages" book. Parts of this code was adapted from Robert Sebesta's
* "Concepts of Programming Languages".
*
* This compiler assumes that the source file containing the sentences to parse is provided as the
* first runtime argument. Within the source file, the compiler assumes that each sentence to parse
* is provided on its own line.
*
* NOTE: A "real" compiler would more likely treat an entire file as a single stream of input,
* rather than each line being an independent input stream.
*/
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
public class Compiler {
/**
* It is assumed that the first argument provided is the name of the source file that is to be
* "compiled".
*/
public static void main(String[] args) throws IOException {
//args = new String[]{"<some hard coded path for testing>"};
//args = new String[]{"D:\\Version_Controlled\\_SVN_\\Newton\\COS420\\Java\\ParserSample\\input.txt"};
if (args.length < 1) {
System.out.println("Need a filename!");
} else {
// Java 7 "try-with-resource" to create the file input buffer.
try (BufferedReader br = new BufferedReader(new FileReader(args[0]))) {
// Create the new lexer.
LexicalAnalyzer lexer = new LexicalAnalyzer();
// Start lexing and parsing.
processFile(lexer, br);
}
}
}
/**
* Reads each line of the input file and invokes th.
1. import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* The Token set as an Enum type. This is just "binds" each
token to its list of
* valid lexemes.
*
*
*/
public enum TOKEN {
ARTICLE("a", "the"), // a list of articles
NOUN("dog", "cat", "rat"), // a list of nouns
VERB("loves", "hates", "eats"), // a list of verbs
UNKNOWN(); // keep our lexemes "type-safe"!
//
2. // The lexemes under this token
private List<String> lexemeList;
// Construct the token with the list of lexems
private TOKEN(String... tokenStrings) {
lexemeList = new ArrayList<>(tokenStrings.length);
lexemeList.addAll(Arrays.asList(tokenStrings));
}
// Gets a token from a lexeme
public static TOKEN fromLexeme(String str) {
// Search through the lexemes looking for a match.
for (TOKEN t : TOKEN.values()) {
if (t.lexemeList.contains(str)) {
return t;
}
}
3. // If nothing matches then return UNKNOWN.
return UNKNOWN;
}
}
/**
* Programming Languages: Implementation and Design.
*
* A Simple Compiler Adapted from Sebesta (2010) by Josh
Dehlinger further modified by Adam Conover
* (2012-2015)
*
* A simple compiler used for the simple English grammar in
Section 2.2 of Adam Brooks Weber's
* "Modern Programming Languages" book. Parts of this code
was adapted from Robert Sebesta's
* "Concepts of Programming Languages".
*
* This compiler assumes that the source file containing the
sentences to parse is provided as the
4. * first runtime argument. Within the source file, the compiler
assumes that each sentence to parse
* is provided on its own line.
*
* NOTE: A "real" compiler would more likely treat an entire
file as a single stream of input,
* rather than each line being an independent input stream.
*/
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
public class Compiler {
/**
* It is assumed that the first argument provided is the name
of the source file that is to be
* "compiled".
*/
5. public static void main(String[] args) throws IOException {
//args = new String[]{"<some hard coded path for
testing>"};
//args = new
String[]{"D:Version_Controlled_SVN_NewtonCOS420J
avaParserSampleinput.txt"};
if (args.length < 1) {
System.out.println("Need a filename!");
} else {
// Java 7 "try-with-resource" to create the file input
buffer.
try (BufferedReader br = new BufferedReader(new
FileReader(args[0]))) {
// Create the new lexer.
LexicalAnalyzer lexer = new LexicalAnalyzer();
// Start lexing and parsing.
processFile(lexer, br);
}
}
6. }
/**
* Reads each line of the input file and invokes the lexer and
parser for each.
*/
static void processFile(LexicalAnalyzer lexer,
BufferedReader br) throws IOException {
String sourceLine;
// Read each line in the source file to be compiled as a
unique sentence
// to check against the grammar.
while ((sourceLine = br.readLine()) != null) {
// Ignore empty lines and comments.
if (sourceLine.trim().length() <= 0) {
continue;
}
if (sourceLine.trim().startsWith("#")) {
7. System.out.println("Comment: " +
sourceLine.substring(1).trim());
continue;
}
// Create a new syntax analyzer over the provided lexer.
SyntaxAnalyzer parser = new SyntaxAnalyzer(lexer);
// Parse the given sentence against the given grammar.
We assume that the
// sentence, <S>, production is the start state.
try {
// Start the lexer...
lexer.start(sourceLine);
// Start the parser...
parser.analyze();
// No exceptions, so we must be good!
8. System.out.printf("The sentence '%s' follows the
BNF grammar.%n", sourceLine);
} catch (ParseException error) {
// If a syntax error was found, print that the sentence
does not follow the grammar.
System.out.printf("SYNTAX ERROR while
processing: '%s'%n", sourceLine);
System.out.printf("ERROR MSG: %s%n",
error.getErrMsg());
}
System.out.println("------------------------------------------
-----------------");
}
}
}
Assume we start with a simple "sentence" grammar as follows:
<S> ::= <NP><V><NP>
<NP> ::= <A> <N>
<V> ::= loves | hates | eats
<A> ::= a | the
<N> ::= dog | cat | rat
9. Part A:
Show the BNF above with the following additional grammar
elements:
· Adjectives: (0 or more Adjectives separated by commas may
precede any Noun. A comma may or may not be preceded by a
space.)
· furry
· fast
· slow
· delicious
· Adverbs: (0 or 1 Adverb may precede any Verb)
· quickly
· secretly
· Conjunctions: (0 or more may appear in any sentence
· and
· or
· Sentence terminator (The terminator may or may not be
preceded by a space.)
· . (a single period)
· ! (a single period)
·
Part B:
Show/Draw the syntax diagrams for each of the grammar
elements above. Hyperlink reference not valid.
Part C:
Show the parse trees (which can be generated in ANTLRWorks)
for each of the following sentences:
Examples of SYNTACTICALLY VALID Input Strings:
a dog loves the cat.
the cat eats the slow rat and the slow , furry dog secretly hates
the cat and a dog loves the rat !
Examples of SYNTACTICALLY INVALID Input Strings (where
do they fail):
a dog barks at the cat.
the fast furry cat eats quickly
10. NOTE: You can generate the full parse trees from
ANTLRWorks (as can be done with the attached sample for the
base grammar) or simply draw out the cooresponding AST's
(Abstract Syntax Trees) "by hand" on paper or with a simple
drawing tool. The point of this is to have something that you
can then verify against the parse trees generated by your own
code (in the next part).
Part D:
Modify the (attached) sample code to accept valid sentences
based upon the newly defined grammar above. The parser
should also "reject" invalid sentences with a descriptive error
message. Note that the program should still accept a filename
from the "Command Line" as illustrated in the example. Please
no HARD-CODED file paths in the source.