我挑战自己的问题的细节是,我想实现一个词法分析器,并且假设解析一个输入文本文件,如果它在语法中,则返回 true。如果它不在输出内部,则应返回 False。我的代码中有一个小错误,我似乎无法弄清楚。
Lexer.java
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Lexer {
private static final String[] TOKEN_PATTERNS = {
"if", "while", "int|float|boolean|char|String", "else",
"true|false", "\\d+\\.\\d+", "\\{", "\\}", "\\(", "\\)",
";", ",", "==", "\\+", "-", "\\*", "/", "%",
">=", "<=", ">", "<", "=", "!=", "&&", "\\|\\|", "\\\\d+", "\\\\w+"
};
public List<Token> tokenize(String input) {
List<Token> tokens = new ArrayList<>();
int currentPosition = 0;
while (currentPosition < input.length()) {
Token token = null;
for (int i = 0; i < TOKEN_PATTERNS.length; i++) {
Pattern pattern = Pattern.compile("^" + TOKEN_PATTERNS[i]);
Matcher matcher = pattern.matcher(input.substring(currentPosition));
if (matcher.find()) {
token = new Token(Token.TokenType.values()[i], matcher.group());
currentPosition += matcher.end();
break;
}
}
if (token == null) {
currentPosition++;
} else {
tokens.add(token);
}
}
return tokens;
}
}
Token.java
public class Token {
public enum TokenType {
IF, WHILE, DATATYPE, Else, BOOLEAN, FLOAT_LIT,
LBRACE, RBRACE, LPAREN, RPAREN,
SEMICOLON, COMMA, EQ,
PLUS, MINUS, MULT, DIV, MOD,
GTE, LTE, GT, LT, ASSIGN, NEQ, AND, OR, INT_LIT, ID;
}
private TokenType type;
private String value;
public Token(TokenType type, String value) {
this.type = type;
this.value = value;
}
public TokenType getType() {
return type;
}
public String getValue() {
return value;
}
public String toString() {
return "[" + type + ", " + value + "]";
}
}
解析器.java
import java.text.ParseException;
import java.util.List;
public class Parser {
private List<Token> tokens;
private int currentTokenIndex;
public Parser(List<Token> tokens) {
this.tokens = tokens;
this.currentTokenIndex = 0;
}
private boolean match(Token.TokenType type){
if (currentTokenIndex < tokens.size() && tokens.get(currentTokenIndex).getType()==type){
currentTokenIndex++;
return true;
}
return false;
}
public boolean parse() {
try {
stmt();
return true;
} catch (Exception e) {
return false;
}
}
//currentTokenIndex --> 0
// tokens.get(currentTokenIndex).getValue() --> if
private void stmt() throws ParseException {
System.out.println("Processing statement at position " + currentTokenIndex + ": " + tokens.get(currentTokenIndex).getValue());
Token token = tokens.get(currentTokenIndex);
System.out.println(token);
if (token.getType() == Token.TokenType.IF) {
parseIfStmt();
} else if (token.getType() == Token.TokenType.LBRACE) {
parseBlock();
} else if (token.getType() == Token.TokenType.DATATYPE) {
parseAssign();
} else if (token.getType() == Token.TokenType.ID) {
parseDeclare();
} else if (token.getType() == Token.TokenType.WHILE) {
parseWhileLoop();
} else {
throw new ParseException("Invalid statement at position " + currentTokenIndex, currentTokenIndex);
}
}
private void parsestmtlist() {
}
private void parseWhileLoop() {
// TODO Auto-generated method stub
}
private void parseDeclare() {
// TODO Auto-generated method stub
}
private void parseAssign() {
// TODO Auto-generated method stub
}
private void parseBlock() throws ParseException {
}
private void parseIfStmt() throws ParseException {
match(Token.TokenType.IF);
System.out.println("Got a IF Match");
System.out.println("Processing statement at position " + currentTokenIndex + ": " + tokens.get(currentTokenIndex).getValue());
if(match(Token.TokenType.LPAREN)) {
System.out.println("Got a LPAREN Match At " + (currentTokenIndex - 1) + ": " + tokens.get(currentTokenIndex -1).getValue());
parseBoolExpr();
System.out.println("Processing statement at position " + currentTokenIndex + ": " + tokens.get(currentTokenIndex).getValue());
if(match(Token.TokenType.RPAREN)) {
System.out.println("Got a RPAREN Match At " + (currentTokenIndex - 1) + ": " + tokens.get(currentTokenIndex -1).getValue());
parseBlock();
if(match(Token.TokenType.Else)) {
System.out.println("Got a Else Match At " + (currentTokenIndex - 1) + ": " + tokens.get(currentTokenIndex -1).getValue());
parseBlock();
}
}
else {
throw new ParseException("Invalid statement at position " + currentTokenIndex, currentTokenIndex);
}
}
}
private void parseBoolExpr()throws ParseException {
parseBTerm();
Token token = tokens.get(currentTokenIndex);
if(match(Token.TokenType.LTE) || match(Token.TokenType.GTE) || match(Token.TokenType.GT) || match(Token.TokenType.LT) ) {
System.out.println("Got a " + token.getType() + " Match At " + (currentTokenIndex - 1) + ": " + tokens.get(currentTokenIndex -1).getValue());
}else {
throw new ParseException("Invalid statement at position " + currentTokenIndex, currentTokenIndex);
}
parseBTerm();
}
private void parseBTerm() throws ParseException {
parseBAnd();
Token token = tokens.get(currentTokenIndex);
if(match(Token.TokenType.EQ) || match(Token.TokenType.NEQ) ) {
System.out.println("Got a " + token.getType() + " Match At " + (currentTokenIndex - 1) + ": " + tokens.get(currentTokenIndex -1).getValue());
}else {
throw new ParseException("Invalid statement at position " + currentTokenIndex, currentTokenIndex);
}
parseBAnd();
}
private void parseBAnd() throws ParseException {
parseBOr();
Token token = tokens.get(currentTokenIndex);
if(match(Token.TokenType.AND)) {
System.out.println("Got a " + token.getType() + " Match At " + (currentTokenIndex - 1) + ": " + tokens.get(currentTokenIndex -1).getValue());
}else {
throw new ParseException("Invalid statement at position " + currentTokenIndex, currentTokenIndex);
}
parseBOr();
}
private void parseBOr() {
// TODO Auto-generated method stub
}
private void parsefact() throws ParseException {
}
private void parseTerm() throws ParseException {
}
private void parseExpr() throws ParseException{
}
}
Main.java
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
public class Main {
public static void main(String[] args) throws IOException {
String input = Files.readString(Paths.get("C:/Users/imomi/Documents/Input.txt"));
Lexer lexer = new Lexer();
List<Token> tokens = lexer.tokenize(input);
Parser parser = new Parser(tokens);
boolean result = parser.parse();
System.out.println("Parsing result: " + (result ? "Success" : "Failure"));
}
}
语法也是
Design a recursive decent algorithm for the following Grammar that takes in a list of tokens and returns true if the token list is in the language:
<STMT> --> <IF_STMT> | <BLOCK> | <ASSIGN> | <DECLARE> |<WHILE_LOOP>
<STMT_LIST> --> { <STMT> `;` }
<WHILE_LOOP> --> `while` `(` <BOOL_EXPR> `)` <BLOCK>
<IF_STMT> --> `if` `(` <BOOL_EXPR> `)` <BLOCK> [ `else` <BLOCK> ]
<BLOCK> --> `{` <STMT_LIST> `}`
<DECLARE> --> `DataType` ID {`,` ID }
<ASSIGN> --> ID `=` <EXPR>
<EXPR> --> <TERM> {(`+`|`-`) <TERM>}
<TERM> --> <FACT> {(`*`|`/`|`%`) <FACT>}
<FACT> --> ID | INT_LIT | FLOAT_LIT | `(` <EXPR> `)`
<BOOL_EXPR> --> <BTERM> {(`>`|`<`|`>=`|`<=`) <BTERM>}
<BTERM> --> <BAND> {(`==`|`!=`) <BAND>}
<BAND> --> <BOR> {`&&` <BOR>}
<BOR> --> <EXPR> {`&&` <EXPR>}
我注意到,每当我从 parseBAnd() 中删除 else 块时,我都会得到一个成功的输出(见下面的输出)
Processing statement at position 0: if
[IF, if]
Got a IF Match
Processing statement at position 1: (
Got a LPAREN Match At 1: (
Got a AND Match At 2: &&
Got a EQ Match At 3: ==
Got a LTE Match At 4: <=
Got a NEQ Match At 5: !=
Got a AND Match At 6: &&
Processing statement at position 7: )
Got a RPAREN Match At 7: )
Got a Else Match At 8: else
Parsing result: Success
根据语法,这是正确的。但我问自己如果它不是一个 AND 操作数怎么办,所以我包括了
throw new ParseException("Invalid statement at position " + currentTokenIndex, currentTokenIndex);
}
但是无论何时,我运行包含这个 else 块的输出我都会收到
的输出Processing statement at position 0: if
[IF, if]
Got a IF Match
Processing statement at position 1: (
Got a LPAREN Match At 1: (
Got a AND Match At 2: &&
Got a EQ Match At 3: ==
Parsing result: Failure
我能做些什么来修复这个错误?因为我需要在匹配不正确时抛出异常,但即使输入文本文件输入正确也会导致代码失败。