当输入文本文件未按语法顺序排 列时,ply(lex / yacc)中的语法错误

问题描述 投票:0回答:1

以下代码在文本文件按代码顺序排列时,即Introduction然后是Information时效果很好,但是如果InformationIntroduction之前出现,则会出错。使用lex / yacc处理该问题的解决方案是什么?预先感谢。

import ply.lex as lex

# List of token names.   This is always required
tokens = [
    'CheckupInformation',
    'Introduction',
    'Information',
    'perfect',
    'sick',
    'LPAREN',
    'RPAREN',
    'CHAR',
    'NUMBER'
    ] 
def t_CheckupInformation(t)     : 'CheckupInformation'     ; return t
def t_Introduction(t)  : 'Introduction'  ; return t
def t_Information(t) : 'Information' ; return t
def t_perfect(t): 'perfect'; return t
def t_sick(t) : 'sick'; return t



t_LPAREN  = r'\('
t_RPAREN  = r'\)'
t_CHAR = r'[a-zA-Z_][a-zA-Z0-9_\-]*'
t_ignore = " \t"
# Define a rule so we can track line numbers

def t_NUMBER(t):
    r'[+\-0-9_][0-9_]*'
    t.lexer.lineno += len(t.value)
    try:
        t.value = int(t.value)
    except ValueError:
        print("Integer value too large %s" % t.value)
        t.value = 0
    return t
def t_SEMICOLON(t):
    r'\;.*'
    t.lexer.lineno += len(t.value)
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)
# Error handling rule
def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)

 # Build the lexer
lexer = lex.lex()
# define upper level classes first     
class stat:
    def __init__(self):
        self.statement = ""
        self.intro = list()
        self.body = list()


P=stat()
def p_stat(p):
    'Stat : LPAREN CheckupInformation statIntro statBody RPAREN'
    p[0]=(p[1],p[2],p[3],p[4],p[5])

def p_Intro(p) : 
    '''statIntro : LPAREN Introduction Name RPAREN
                 | statIntro LPAREN Introduction Name RPAREN
                 | empty'''

    if len(p)==5:
       p[0] = (p[3])
    elif len(p)==6:
       p[0] = (p[4])
    else:
       p[0]= None
    P.intro.append(p[0])

def p_Name(p):
    'Name : CHAR'
    p[0]=p[1]



def p_Body(p):
    '''statBody : LPAREN Information bodyinfo RPAREN
                | statBody LPAREN Information bodyinfo RPAREN'''
    if len(p)==5:
       p[0] = (p[3])
    elif len(p)==6:
       p[0] = (p[4])
    P.body.append(p[0])
def p_bodyinfo(p):
    '''bodyinfo : LPAREN CHAR perfect RPAREN
                | LPAREN CHAR sick RPAREN'''
    p[0]=p[2],p[3]


def p_empty(p):
    'empty :  '
    print("This function is called")
    pass   
def p_error(p):
    print("Syntax error in input '%s'!" % p.value)

import ply.yacc as yacc
parser = yacc.yacc()
import sys
if len(sys.argv) < 2 :
    sys.exit("Usage: %s <filename>" % sys.argv[0])
fp = open(sys.argv[1])
contents=fp.read()
result=parser.parse(contents)

print("(CheckupInformation")
if (P.intro) != None:
    for x in range(len(P.intro)):
        print("    (Introduction %s)" %(P.intro[x]))
for x in range(len(P.body)):
        print("    (Information( %s %s))" %(P.body[x]))
print(")")

该代码适用于file1且无法处理file2。

错误:输入'(简介'中的语法错误!(CheckupInformation (无介绍) (信息(Anonymous1完善)))

File1:

(CheckupInformation
  (Introduction John)
  (Introduction Patt)
  (Information(Anonymous1 perfect))
  (Information(Anonymous2 sick))
)

File2:

(CheckupInformation

  (Information(Anonymous1 perfect))
  (Information(Anonymous2 sick))
  (Introduction John)
  (Introduction Patt)
)
python syntax-error yacc context-free-grammar ply
1个回答
0
投票

这可能不是您想要的答案,但是我发现自己无法仅更改代码中的一两行。以下内容还远远不够完美,但我认为它正在针对您的问题采用合理的方法。我试图用有用的注释来注释它。请仔细阅读它,并尝试理解我为什么做我的工作,并在必要时参考Ply手册(一些参考在代码注释中,但是文档中有很多有用的背景信息,我没有具体参考) 。

祝你好运。>>

import ply.lex as lex

# Keyword handling copied from the Ply manual, https://www.dabeaz.com/ply/ply.html#ply_nn6
reserved = {
    'CheckupInformation': 'TK_CheckupInformation',
    'Introduction': 'TK_Introduction',
    'Information': 'TK_Information',
    'perfect': 'TK_perfect',
    'sick': 'TK_sick',
}

# I changed CHAR to WORD because CHAR sounds like a character
tokens = ['NUMBER','WORD'] + list(reserved.values())

def t_WORD(t):
    r'[a-zA-Z_][a-zA-Z0-9_-]*'
    t.type = reserved.get(t.value,'WORD')    # Check for reserved words
    return t

# See the Ply manual: https://www.dabeaz.com/ply/ply.html#ply_nn11
literals = '()'

# See the Ply manual: https://www.dabeaz.com/ply/ply.html#ply_nn8
t_ignore = ' \t\n'
t_ignore_COMMENT = r'\;.*'

# Fixed the regex. You can't have a sign in the middle of a number.
def t_NUMBER(t):
    r'[+-]?[0-9_]+'
    try:
        t.value = int(t.value)
    except ValueError:
        print("Integer value too large %s" % t.value)
        t.value = 0
    return t

# See below for the definition of lineno_for_token
# Error handling rule
def t_error(t):
    print("Illegal character '%s' at line %d'" % (
        t.value[0], t.lexer.lineno_for_token(t)))
    t.lexer.skip(1)

# Build the lexer
lexer = lex.lex()

# Ply tracks the character index automatically as lexer.lexpos, and every
# token it produces has a lexpos attribute. So there is no need to misuse
# the lineno attribute for that purpose. It should be the line number of
# the token, as its name indicates.
# You don't seem to use lineno (or lexpos) anywhere, but it is handy for
# error messages. But since it is used rarely, it's easier to compute it
# on demand by counting newlines to the lex position.
# Perhaps this should just be added to the lexer we just built.
lex.Lexer.lineno_for_token = lambda self, t: 1 + self.lexdata.count('\n', 0, t.lexpos)

# Fixed this to use an upper-class name and to derive from object.
# Object to hold a top-level description
class Stat(object):
    # Attributes used for components
    components = {'intro', 'body'}

    def __init__(self, component_dict):
        self.statement = ""  # I don't know what this is used for
        # Copy the components dictionary as attributes, using
        # an empty list as default
        for k in self.components:
            setattr(self, k, component_dict.get(k, ()))
        # Verify that we used every key in the dict.
        for k in component_dict.keys():
            if k not in self.components:
                print("Warning! Ignoring " + k
                      + " because it is not in Stat.components")

    # Arrange for the object to print as expected
    def __repr__(self):
        return '(CheckupInformation %r %r)' % (self.intro, self.body)

# Instead of having a global "P" object (whose name is not very useful),
# we return a Stat object
def p_stat(p):
    """ stat : '(' TK_CheckupInformation components ')' """
    p[0] = Stat(p[3])

# We allow all components to be optional and order independent here. We
# also allow them all to be repeated. But that could be made more precise.

# components is a dictionary whose values are lists
def p_components_empty(p):
    """ components : """
    p[0] = { }

def p_components_append(p):
    """ components : components component """
    p[0] = p[1]
    # The component is a two-element tuple
    key, value = p[2]
    if key in p[0]:
        p[0][key].append(value)
    else:
        p[0][key] = [value]

# Syntax for each component type (just one element, not a list)
# component is a tuple of (key, value)
# All of the productions just copy the value from some specific syntax.
def p_component(p):
    """ component : statIntro
                  | statBody
    """
    p[0] = p[1]

def p_statIntro(p): 
    """statIntro : '(' TK_Introduction WORD ')' """
    p[0] = ('intro', p[3])

def p_statBody(p):
    """statBody : '(' TK_Information bodyinfo ')' """
    p[0] = ('body', p[3])

# bodyinfo is a tuple of (identifier, status)
def p_bodyinfo(p):
    """bodyinfo : '(' WORD TK_perfect ')'
                | '(' WORD TK_sick ')'
    """
    p[0] = (p[2],p[3])

def p_error(p):
    print("Syntax error in input '%s'! at line %d" % (
        p.value, p.lexer.lineno_for_token(p)))

import ply.yacc as yacc

parser = yacc.yacc()

# Only do this if we're called from the command line
if __name__ == "__main__":
    import sys
    if len(sys.argv) < 2 :
        sys.exit("Usage: %s <filename>" % sys.argv[0])

    with open(sys.argv[1]) as fp:
        stat = parser.parse(fp.read())

    if stat is not None:
        print("(CheckupInformation")
        for x in range(len(stat.intro)):
            print("    (Introduction %s)" %(stat.intro[x]))
        for x in range(len(stat.body)):
            print("    (Information( %s %s))" %(stat.body[x]))
        print(")")
    
© www.soinside.com 2019 - 2024. All rights reserved.