如何使用此CFG随机生成字符串?

问题描述 投票:-3回答:1

我有这段代码描述了上下文无关的语法,并且我试图生成与其匹配的随机字符串;例如,像这样:

“”约翰认为玛丽讨厌每只绿猫“

但是我当前的输出是:

[['_S', ['_NP _VP']], ['_NP', ['_Det _Adj _N', '_Det _N', '_Adj _PropN', '_PropN']], ['_VP', ['_Vi', '_Vt _NP', '_Vc _Comp _S']]]
[['_Det', ['the', 'a', 'some', 'any', 'every']], ['_Adj', ['green', 'young', 'tired', 'confused']], ['_N', ['dog', 'cat']], ['_PropN', ['John', 'Mary']], ['_Vi', ['sleeps', 'walks']], ['_Vt', ['loves', 'hates']], ['_Vc', ['says', 'thinks', 'believes']], ['_Comp', ['that']]]

请帮助!

import random


psg_rules_str = "S → NP VP\n" \
                "NP → Det Adj N | Det N | Adj PropN | PropN\n" \
                "VP → Vi | Vt NP | Vc Comp S"

terminals_str = "Det → the | a | some | any | every\n" \
                "Adj → green | young | tired | confused\n" \
                "N → dog | cat\n" \
                "PropN → John | Mary\n" \
                "Vi → sleeps | walks\n" \
                "Vt → loves | hates\n" \
                "Vc → says | thinks | believes\n" \
                "Comp → that"

psg_rules_list = [a.split("→") for a in psg_rules_str.split("\n")]
for p in psg_rules_list:
    p[0] = "_" + p[0].strip()
    p[1] = p[1].split("|")
    p[1] = ["_" + a.strip().replace(" ", " _") for a in p[1]]
print(psg_rules_list)
# [['_S', ['_NP _VP']], ['_NP', ['_Det _Adj _N', '_Det _N', '_Adj _PropN', '_PropN']], ['_VP', ['_Vi', '_Vt _NP', '_Vc _Comp _S']]]

terminals_list = [a.split("→") for a in terminals_str.split("\n")]
for t in terminals_list:
    t[0] = "_" + t[0].strip()
    t[1] = t[1].split("|")
    t[1] = [a.strip() for a in t[1]]
print(terminals_list)
# [['_Det', ['the', 'a', 'some', 'any', 'every']], ['_Adj', ['green', 'young', 'tired', 'confused']], ['_N', ['dog', 'cat']], ['_PropN', ['John', 'Mary']], ['_Vi', ['sleeps', 'walks']], ['_Vt', ['loves', 'hates']], ['_Vc', ['says', 'thinks', 'believes']], ['_Comp', ['that']]]

def reachTerminals(from_nts, with_rules, with_ts):
    from_nts = str.upper("_" + from_nts.replace("_", "").strip().replace(" ", " _"))
    rule_tags = [a[0] for a in with_rules]
    ts_tags = [a[0] for a in with_ts]
    nts_todo = [a for a in rule_tags if a in from_nts]
    while nts_todo != list():
        tag = nts_todo[0]
        wr_index = rule_tags.index(tag)
        repl_choices = with_rules[wr_index][1]

        nts_todo = [a for a in rule_tags if a in from_nts]


sentence = reachTerminals(from_nts="s", with_rules=psg_rules_list, with_ts=terminals_list)
python context-free-grammar
1个回答
0
投票

您几乎使程序正常运行。这是完成reachTerminals功能的方法:

import random

psg_rules_str = "S → NP VP\n" \
                "NP → Det Adj N | Det N | Adj PropN | PropN\n" \
                "VP → Vi | Vt NP | Vc Comp S"

terminals_str = "Det → the | a | some | any | every\n" \
                "Adj → green | young | tired | confused\n" \
                "N → dog | cat\n" \
                "PropN → John | Mary\n" \
                "Vi → sleeps | walks\n" \
                "Vt → loves | hates\n" \
                "Vc → says | thinks | believes\n" \
                "Comp → that"

psg_rules_list = [a.split("→") for a in psg_rules_str.split("\n")]
for p in psg_rules_list:
    p[0] = "_" + p[0].strip()
    p[1] = p[1].split("|")
    p[1] = ["_" + a.strip().replace(" ", " _") for a in p[1]]

terminals_list = [a.split("→") for a in terminals_str.split("\n")]
for t in terminals_list:
    t[0] = "_" + t[0].strip()
    t[1] = t[1].split("|")
    t[1] = [a.strip() for a in t[1]]

def reachTerminals(from_nts, with_rules, with_ts):
    from_nts = str.upper("_" + from_nts.replace("_", "").strip().replace(" ", " _"))
    rule_tags = [a[0] for a in with_rules]
    ts_tags = [a[0] for a in with_ts]
    nts_todo = [a for a in rule_tags if a in from_nts]
    while nts_todo:
        for tag in nts_todo:
            wr_index = rule_tags.index(tag)
            repl_choices = with_rules[wr_index][1]

            choice = random.choice(repl_choices)
            from_nts = from_nts.replace(tag, choice, 1)
        nts_todo = [a for a in rule_tags if a in from_nts]

    ts_todo = [a for a in ts_tags if a in from_nts]
    while ts_todo:
        for tag in ts_todo:
            wr_index = ts_tags.index(tag)
            repl_choices = with_ts[wr_index][1]

            choice = random.choice(repl_choices)
            from_nts = from_nts.replace(tag, choice, 1)
        ts_todo = [a for a in ts_tags if a in from_nts]

    return from_nts


print(reachTerminals(from_nts = "s", with_rules = psg_rules_list, with_ts = terminals_list))

您要使用的重要工具是random.choice函数和random.choice函数的第三个参数,该参数只能替换第一次出现的子字符串。我还没有对代码进行彻底的测试,但是似乎可以正常工作。输出示例:

str.replace

str.replace

green John loves some confused dog

Mary says that the tired dog says that some green cat hates some cat

© www.soinside.com 2019 - 2024. All rights reserved.