我目前正在第一次尝试制作自己的小编程语言。截至目前,我正在创建基本的词法分析器。我试图在我的代码中允许浮点值,但是可惜,它被分为标识符和浮点值(参见下面的输出)
问题:如何修复我的词法分析器以正确处理浮动?
代码:
function lexer(input) {
const tokens = [];
const keywords = new Set(['let', 'var', 'const', 'def', 'float', 'floater', 'double', 'int', 'integer', 'bool', 'boolean', 'string', 'char']);
const alphaNumericRegex = /[a-zA-Z0-9]/;
const digitRegex = /\d/;
let current = 0;
let length = input.length;
while (current < length) {
let char = input[current];
if (char === ' ' || char === '\n') {
current++;
continue;
}
if (alphaNumericRegex.test(char)) {
let wordStart = current;
while (alphaNumericRegex.test(char)) {
char = input[++current];
}
let word = input.slice(wordStart, current);
if (keywords.has(word)) {
tokens.push({ type: 'keyword', value: word });
} else {
tokens.push({ type: 'identifier', value: word });
}
continue;
}
if (digitRegex.test(char) || char === '.') {
let numStart = current;
let hasDecimal = false;
while (digitRegex.test(char) || (!hasDecimal && char === '.')) {
if (char === '.') {
hasDecimal = true;
}
char = input[++current];
}
let numStr = input.slice(numStart, current);
let num = parseFloat(numStr);
if (isNaN(num)) {
throw new SyntaxError("Invalid number");
}
if (Number.isInteger(num)) {
tokens.push({ type: 'number', value: num });
} else {
tokens.push({ type: 'number', value: parseFloat(numStr) });
}
continue;
}
if (char === '"') {
let strStart = ++current;
while (input[current] !== '"') {
if (++current >= length) throw new SyntaxError("Unterminated string literal");
}
let str = input.slice(strStart, current++);
tokens.push({ type: 'string', value: str });
continue;
}
if (char === "'") {
let charValue = input[++current];
if (input[++current] === "'") {
tokens.push({ type: 'char', value: charValue });
current++;
} else {
throw new SyntaxError("Invalid character literal");
}
continue;
}
if (char === '=') {
tokens.push({ type: 'assign' });
current++;
continue;
}
if (char === ';') {
tokens.push({ type: 'semicolon' });
current++;
continue;
}
if (char === '.') {
tokens.push({ type: 'dot' });
current++;
continue;
}
current++;
}
return tokens;
}
const code = `let value = 7.24;
var count = 5;
const pi = 3.14;
bool isTrue = true;
string message = "Hello";
char initial = 'A';`;
console.log(JSON.stringify(lexer(code), null, 2));
我对 JavaScript 还有些陌生,对词法分析器也完全陌生,所以我尝试从 ChatGPT 和 AskCodi 等人工智能工具获得帮助。他们试图解决问题,而我根据他们的建议所做的任何更改都没有什么区别。
我所做的就是将 alphaNumeric 从
/[a-zA-Z0-9]/
更改为 /[a-zA-Z0-9-.]/
,试图在解析数字时包含点,很有趣,它起作用了:D
function lexer(input) {
const tokens = [];
const keywords = new Set(['let', 'var', 'const', 'def', 'float', 'floater', 'double', 'int', 'integer', 'bool', 'boolean', 'string', 'char']);
const alphaNumericRegex = /[a-zA-Z0-9-.]/;
const digitRegex = /\d/;
let current = 0;
let length = input.length;
while (current < length) {
let char = input[current];
if (char === ' ' || char === '\n') {
current++;
continue;
}
if (alphaNumericRegex.test(char)) {
let wordStart = current;
while (alphaNumericRegex.test(char)) {
char = input[++current];
}
let word = input.slice(wordStart, current);
if (keywords.has(word)) {
tokens.push({ type: 'keyword', value: word });
} else {
tokens.push({ type: 'identifier', value: word });
}
continue;
}
if (digitRegex.test(char) || char === '.') {
let numStart = current;
let hasDecimal = false;
while (digitRegex.test(char) || (!hasDecimal && char === '.')) {
if (char === '.') {
hasDecimal = true;
}
char = input[++current];
}
let numStr = input.slice(numStart, current);
let num = parseFloat(numStr);
if (isNaN(num)) {
throw new SyntaxError("Invalid number");
}
if (Number.isInteger(num)) {
tokens.push({ type: 'number', value: num });
} else {
tokens.push({ type: 'number', value: parseFloat(numStr) });
}
continue;
}
if (char === '"') {
let strStart = ++current;
while (input[current] !== '"') {
if (++current >= length) throw new SyntaxError("Unterminated string literal");
}
let str = input.slice(strStart, current++);
tokens.push({ type: 'string', value: str });
continue;
}
if (char === "'") {
let charValue = input[++current];
if (input[++current] === "'") {
tokens.push({ type: 'char', value: charValue });
current++;
} else {
throw new SyntaxError("Invalid character literal");
}
continue;
}
if (char === '=') {
tokens.push({ type: 'assign' });
current++;
continue;
}
if (char === ';') {
tokens.push({ type: 'semicolon' });
current++;
continue;
}
if (char === '.') {
tokens.push({ type: 'dot' });
current++;
continue;
}
current++;
}
return tokens;
}
const code = `let value = 7.24;
var count = 5;
const pi = 3.14;
bool isTrue = true;
string message = "Hello";
char initial = 'A';`;
console.log(JSON.stringify(lexer(code), null, 2));