为什么我的 JavaScript 词法分析器将我的浮点数拆分为标识符和浮点数?

问题描述 投票:0回答:1

我目前正在第一次尝试制作自己的小编程语言。截至目前,我正在创建基本的词法分析器。我试图在我的代码中允许浮点值,但是可惜,它被分为标识符和浮点值(参见下面的输出)

问题:如何修复我的词法分析器以正确处理浮动?

输出: 令值 = 7.24 常量 pi = 3.14

代码:

function lexer(input) {
    const tokens = [];
    const keywords = new Set(['let', 'var', 'const', 'def', 'float', 'floater', 'double', 'int', 'integer', 'bool', 'boolean', 'string', 'char']);
    const alphaNumericRegex = /[a-zA-Z0-9]/;
    const digitRegex = /\d/;

    let current = 0;
    let length = input.length;

    while (current < length) {
        let char = input[current];

        if (char === ' ' || char === '\n') {
            current++;
            continue;
        }

        if (alphaNumericRegex.test(char)) {
            let wordStart = current;
            while (alphaNumericRegex.test(char)) {
                char = input[++current];
            }
            let word = input.slice(wordStart, current);

            if (keywords.has(word)) {
                tokens.push({ type: 'keyword', value: word });
            } else {
                tokens.push({ type: 'identifier', value: word });
            }

            continue;
        }

        if (digitRegex.test(char) || char === '.') {
            let numStart = current;
            let hasDecimal = false;
        
            while (digitRegex.test(char) || (!hasDecimal && char === '.')) {
                if (char === '.') {
                    hasDecimal = true;
                }
                char = input[++current];
            }
        
            let numStr = input.slice(numStart, current);
            let num = parseFloat(numStr);
        
            if (isNaN(num)) {
                throw new SyntaxError("Invalid number");
            }
        
            if (Number.isInteger(num)) {
                tokens.push({ type: 'number', value: num });
            } else {
                tokens.push({ type: 'number', value: parseFloat(numStr) });
            }
        
            continue;
        }

        if (char === '"') {
            let strStart = ++current;
            while (input[current] !== '"') {
                if (++current >= length) throw new SyntaxError("Unterminated string literal");
            }
            let str = input.slice(strStart, current++);
            tokens.push({ type: 'string', value: str });
            continue;
        }

        if (char === "'") {
            let charValue = input[++current];
            if (input[++current] === "'") {
                tokens.push({ type: 'char', value: charValue });
                current++;
            } else {
                throw new SyntaxError("Invalid character literal");
            }
            continue;
        }

        if (char === '=') {
            tokens.push({ type: 'assign' });
            current++;
            continue;
        }

        if (char === ';') {
            tokens.push({ type: 'semicolon' });
            current++;
            continue;
        }

        if (char === '.') {
            tokens.push({ type: 'dot' });
            current++;
            continue;
        }

        current++;
    }

    return tokens;
}

const code = `let value = 7.24;
var count = 5;
const pi = 3.14;
bool isTrue = true;
string message = "Hello";
char initial = 'A';`;

console.log(JSON.stringify(lexer(code), null, 2));

我对 JavaScript 还有些陌生,对词法分析器也完全陌生,所以我尝试从 ChatGPT 和 AskCodi 等人工智能工具获得帮助。他们试图解决问题,而我根据他们的建议所做的任何更改都没有什么区别。

javascript lexer
1个回答
0
投票

我所做的就是将 alphaNumeric 从

/[a-zA-Z0-9]/
更改为
/[a-zA-Z0-9-.]/
,试图在解析数字时包含点,很有趣,它起作用了:D

function lexer(input) {
    const tokens = [];
    const keywords = new Set(['let', 'var', 'const', 'def', 'float', 'floater', 'double', 'int', 'integer', 'bool', 'boolean', 'string', 'char']);
    const alphaNumericRegex = /[a-zA-Z0-9-.]/;
    const digitRegex = /\d/;

    let current = 0;
    let length = input.length;

    while (current < length) {
        let char = input[current];

        if (char === ' ' || char === '\n') {
            current++;
            continue;
        }

        if (alphaNumericRegex.test(char)) {
            let wordStart = current;
            while (alphaNumericRegex.test(char)) {
                char = input[++current];
            }
            let word = input.slice(wordStart, current);

            if (keywords.has(word)) {
                tokens.push({ type: 'keyword', value: word });
            } else {
                tokens.push({ type: 'identifier', value: word });
            }

            continue;
        }

        if (digitRegex.test(char) || char === '.') {
            let numStart = current;
            let hasDecimal = false;
        
            while (digitRegex.test(char) || (!hasDecimal && char === '.')) {
                if (char === '.') {
                    hasDecimal = true;
                }
                char = input[++current];
            }
        
            let numStr = input.slice(numStart, current);
            let num = parseFloat(numStr);
        
            if (isNaN(num)) {
                throw new SyntaxError("Invalid number");
            }
        
            if (Number.isInteger(num)) {
                tokens.push({ type: 'number', value: num });
            } else {
                tokens.push({ type: 'number', value: parseFloat(numStr) });
            }
        
            continue;
        }

        if (char === '"') {
            let strStart = ++current;
            while (input[current] !== '"') {
                if (++current >= length) throw new SyntaxError("Unterminated string literal");
            }
            let str = input.slice(strStart, current++);
            tokens.push({ type: 'string', value: str });
            continue;
        }

        if (char === "'") {
            let charValue = input[++current];
            if (input[++current] === "'") {
                tokens.push({ type: 'char', value: charValue });
                current++;
            } else {
                throw new SyntaxError("Invalid character literal");
            }
            continue;
        }

        if (char === '=') {
            tokens.push({ type: 'assign' });
            current++;
            continue;
        }

        if (char === ';') {
            tokens.push({ type: 'semicolon' });
            current++;
            continue;
        }

        if (char === '.') {
            tokens.push({ type: 'dot' });
            current++;
            continue;
        }

        current++;
    }

    return tokens;
}

const code = `let value = 7.24;
var count = 5;
const pi = 3.14;
bool isTrue = true;
string message = "Hello";
char initial = 'A';`;

console.log(JSON.stringify(lexer(code), null, 2));

© www.soinside.com 2019 - 2024. All rights reserved.