我尝试将各种输入字符串解析为对象列表,其中一些输入可能包含标点符号。
直到消耗字符串块或其他东西我才成功。
测试输入字符串是否正常工作:
var inputs = new List<string>{
"prefix{{test='5'}}middle{{'test string'}}{{'test1'}}postfix",
"{{test='5'}}middle{{'test string'}}{{'test1'}}postfix",
"{{test='5'}}middle{{'test string'}}{{'test1'}}",
}
然而,所有包含字母和数字。所以这个不起作用:
var notWorkingInput= ".?n6y{{test='5'}}+*854d{{'test string'}}{{'test1'}}ret0}";
notWorkingInput
的预期结果是这样的:
Name = ".?n6y"
)Name = "test", Value="5"
)Name = "+*854d"
)Name = "test string"
)Name = "test1"
)Name = "ret0}"
)我使用Pidgin解析库。
完整的工作代码:
public class TestParser {
private static readonly Parser<char, string> ObjectStart = String("{{");
private static readonly Parser<char, string> ObjectEnd = String("}}");
private static readonly Parser<char, string> Equal = String("=");
private static Parser<char, T> Tok<T>(Parser<char, T> token) => Try(token).Before(SkipWhitespaces);
private static Parser<char, string> Tok(string token) => Tok(String(token));
private static readonly Parser<char, string> StringLiteralDouble = Tok(Try(String("\"\"")).ThenReturn('"')
.Or(Token(c => c != '"'))
.ManyString()
.Between(Char('"')));
private static readonly Parser<char, string> StringLiteralSingle = Tok(Try(String("''")).ThenReturn('\'')
.Or(Token(c => c != '\''))
.ManyString()
.Between(Char('\'')));
private static readonly Parser<char, string> StringLiteral = Tok(OneOf(StringLiteralDouble, StringLiteralSingle));
private static readonly Parser<char, ITest> Test1Statement = Tok(StringLiteral.Between(Whitespaces).Between(ObjectStart, ObjectEnd))
.Select<ITest>(s => new Test1(s));
private static readonly Parser<char, ITest> Test2Statement = Tok(Letter.ManyString()).Before(Tok(Equal))
.Then(OneOf(Num.Select<dynamic>(s => s), Real.Select<dynamic>(s => s), StringLiteral.Select<dynamic>(s => s), LetterOrDigit.ManyString().Select<dynamic>(s => s)), (s, v) => new Test2(s, v)).Between(ObjectStart, ObjectEnd)
.Select<ITest>(s => s);
private static readonly Parser<char, ITest> TextStatement = LetterOrDigit.AtLeastOnceString().Select<ITest>(s => new Text(s));
private static readonly Parser<char, ITest> Statement = OneOf(Test1Statement, Test2Statement, TextStatement);
private static readonly Parser<char, IEnumerable<ITest>> Statements = Statement.Many();
public static Result<char, IEnumerable<ITest>> Parse(string input) => Statements.Parse(input);
}
public interface ITest {
string Name { get; }
}
public class Test1 : ITest {
public string Name { get; }
public Test1(string name) {
Name = name;
}
}
public class Test2 : ITest {
public string Name { get; }
public dynamic Value { get; }
public Test2(string name, dynamic value) {
Name = name;
Value = value;
}
}
public class Text : ITest {
public string Name { get; }
public Text(string name) {
Name = name;
}
}
更新:
我测试
AnyCharExcept('{', '}')
。实际上,它正在工作,但现在是单支架问题。
更新 2:
如果可能的话,添加类似的东西
AnyExcept
也会很好。
更新 3:
图书馆的作者建议我使用
Any.Until(Lookahead(String("{{")).Or(End))
但它不起作用。
我写了这个客户解析器,为我解决了这个问题:
internal class AnyStringExcept : Parser<char, string>
{
private readonly string[] terminators;
public AnyStringExcept(params string[] terminators)
{
this.terminators = terminators;
}
public override bool TryParse(ref ParseState<char> state, ref PooledList<Expected<char>> expecteds, out string result)
{
var sb = new StringBuilder();
var found = false;
while (state.HasCurrent && !found)
{
// unfortunately this cannot be simplified as
// found = terminators.Any(terminator => terminator == state.LookAhead(terminator.Length).ToString());
// because the ref parameter state cannot be used in a lambda expression
foreach (var terminator in terminators)
{
var nextChars = state.LookAhead(terminator.Length).ToString();
if (nextChars == terminator)
found = true;
}
if (found)
break;
sb.Append(state.Current);
state.Advance();
}
result = sb.ToString();
return result != "";
}
}
我是这样用的:
var RAW_TEXT = new AnyStringExcept("<ls:", "</ls:");
这使用 Pidgin 3.2.1。希望对您有所帮助。