我有一个XML文件,其中多次包含以下字符串。
<Name Area="" Title="@(String) - @(I am) - @(looking) - @(for)" ...</Name>
我想做的是用regex搜索这些字符串,剪切掉@(),然后把剩下的保存在一个新的Textfile中。我已经找到了与该字符串相匹配的模式,并计算了该模式与该字符串相匹配的频率,但我在选择和处理它们时失败了。
我是在Windows Form-App中做的。
EDIT.Morten Bork:我在一个Windows Form-App中做了所有这些工作。
Morten Bork:
到目前为止,我唯一能做的就是让用户选择要处理的文件 以及保存新文件的位置。而且它正在这样做。我只是在使用regex的思路上失败了。
using System;
using System.IO;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Diagnostics;
using System.Text.RegularExpressions;
namespace Movitool
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
public static class Globals
{
public static string AlrFile = string.Empty;
public static string AlrFileNeu = string.Empty;
}
private void btnPfadAuswahl_Click(object sender, EventArgs e)
{
using (OpenFileDialog fileDialog1 = new OpenFileDialog())
{
fileDialog1.Filter = "movalr files (*.movalr)|*.movalr";
fileDialog1.RestoreDirectory = true;
fileDialog1.InitialDirectory = @"Dokumente\";
if (fileDialog1.ShowDialog() == DialogResult.OK)
{
Globals.AlrFile = fileDialog1.FileName;
txtPfad.Text = Globals.AlrFile;
}
}
}
private void btnStartAlr_Click(object sender, EventArgs e)
{
string pattern = @"@(.*?)";
Regex rgx = new Regex(pattern);
using (SaveFileDialog fileDialog2 = new SaveFileDialog())
{
fileDialog2.InitialDirectory = @"Dokumente";
fileDialog2.Title = "Save File";
fileDialog2.Filter = "txt files (*.txt)|*.txt";
if(fileDialog2.ShowDialog() == DialogResult.OK)
{
Globals.AlrFileNeu = fileDialog2.FileName;
}
}
foreach(string Line in File.ReadLines(Globals.AlrFile))
{
File.AppendAllText(Globals.AlrFileNeu, Line + Environment.NewLine);
}
lblStatus.Text = "Fertig!";
}
}
}
尝试以下。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
namespace ConsoleApplication1
{
class Program
{
const string FILENAME = @"c:\temp\test.txt";
static void Main(string[] args)
{
string input = File.ReadAllText(FILENAME);
string pattern = @"(?'start'@\()(?'middle'[^\)]+)(?'end'\))";
string output = Regex.Replace(input, pattern, "${middle}");
}
}
}
这里是使用你的数据输出
<?xml version="1.0" encoding="ISO-8859-1" ?>
<Alarms>
<AlarmList>
<Alarm>
<Name Device="" Variable="" Area="1" ThresholdExclusive="0" Enabled="1" OnQualityGood="1" VariableDuration="0" EnableVariable="" EnableDispMsg="" Hysteresis="0" EventsCache="1024"></Name>
<ThresholdList>
<Threshold>
<Name Area="" Title="My - Searched - String" Help="" DurationFormat="" ReadAccessLevel="4294901760" WriteAccessLevel="4294901760">On</Name>
<Execution Condition="2" Threshold="1" ThresholdVar="" ThresholdLow="0" ThresholdVarLow="" VariableStatus="" Severity="10" SeverityVar="" SecDelay="0" RunCommandAtServer="0"/>
<Commands/>
<CommandsOn/>
<CommandsAck/>
<CommandsReset/>
<CommandsOff/>
<Style BackColor="4294967295" TextColor="65535" BlinkBackColor="4294967295" BlinkTextColor="4294967295" Print="1" Log="1" BlinkOnNewAlarm="0" VarTimeStamp="0" SupportAck="0" SupportReset="0" SupportResetConditionOn="0" BmpFile="" SndFile="" BeepEnabled="0" SpeechEnabled="0" RepeatSpeechEverySec="0" EnableSpeechVariable="" PlaysoundContinuosly="0" CommentOnAck="0"/>
<Recipient Attachment="" DispatchingText=""/>
<SendEmail SendON="0" SendACK="0" SendRESET="0" SendOFF="0"/>
<SendVoice SendON="0" SendACK="0" SendRESET="0" SendOFF="0"/>
<SendSMS SendON="0" SendACK="0" SendRESET="0" SendOFF="0"/>
<SendFax SendON="0" SendACK="0" SendRESET="0" SendOFF="0"/>
<SendAdminAlert SendON="0" SendACK="0" SendRESET="0" SendOFF="0"/>
<SendMessenger SendON="0" SendACK="0" SendRESET="0" SendOFF="0"/>
<ScriptCode StartSel="0" SelLength="0" OutStatusBar="1" OutLog="1" OutPrinter="1">
</ScriptCode>
</Threshold>
</ThresholdList>
</Alarm>
</AlarmList>
</Alarms>
下面是使用xml linq修改的标题。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
using System.Xml.Linq;
namespace ConsoleApplication1
{
class Program
{
const string FILENAME = @"c:\temp\test.txt";
static void Main(string[] args)
{
XDocument doc = XDocument.Load(FILENAME);
string pattern = @"(?'start'@\()(?'middle'[^\)]+)(?'end'\))";
foreach(XElement xTitle in doc.Descendants("Name").Where(x => x.Attribute("Title") != null))
{
string title = (string)xTitle.Attribute("Title");
Console.WriteLine("Title : " + Regex.Replace(title, pattern, "${middle}"));
}
Console.ReadLine();
}
}
}
回答jdweng的最后一条评论。
是的,我已经尝试了你的代码。我也试着把它和Tim的答案混在一起,他说要用XDocument.我现在试的就是这个。
XmlReader xr = new XmlTextReader(Globals.AlrFile);
lblMatches.Text = "";
while (xr.Read())
{
if(xr.NodeType == XmlNodeType.Element)
{
if(xr.AttributeCount > 0)
{
while(xr.MoveToNextAttribute())
{
string input = Convert.ToString(xr.Value);
string pattern = @"(?'start'@\()(?'middle'[^\)]+)(?'end'\))";
string output = Regex.Replace(input, pattern, "${middle}");
File.AppendAllText(Globals.AlrFileNeu, output + Environment.NewLine);
}
}
}
}
xr.Close();
但我的xml输入是这样的:
<?xml version="1.0" encoding="ISO-8859-1" ?>
<Alarms>
<AlarmList>
<Alarm>
<Name Device="" Variable="" Area="1" ThresholdExclusive="0" Enabled="1" OnQualityGood="1" VariableDuration="0" EnableVariable="" EnableDispMsg="" Hysteresis="0" EventsCache="1024"></Name>
<ThresholdList>
<Threshold>
<Name Area="" Title="@(My) - @(Searched) - @(String)" Help="" DurationFormat="" ReadAccessLevel="4294901760" WriteAccessLevel="4294901760">On</Name>
<Execution Condition="2" Threshold="1" ThresholdVar="" ThresholdLow="0" ThresholdVarLow="" VariableStatus="" Severity="10" SeverityVar="" SecDelay="0" RunCommandAtServer="0"/>
<Commands/>
<CommandsOn/>
<CommandsAck/>
<CommandsReset/>
<CommandsOff/>
<Style BackColor="4294967295" TextColor="65535" BlinkBackColor="4294967295" BlinkTextColor="4294967295" Print="1" Log="1" BlinkOnNewAlarm="0" VarTimeStamp="0" SupportAck="0" SupportReset="0" SupportResetConditionOn="0" BmpFile="" SndFile="" BeepEnabled="0" SpeechEnabled="0" RepeatSpeechEverySec="0" EnableSpeechVariable="" PlaysoundContinuosly="0" CommentOnAck="0"/>
<Recipient Attachment="" DispatchingText=""/>
<SendEmail SendON="0" SendACK="0" SendRESET="0" SendOFF="0"/>
<SendVoice SendON="0" SendACK="0" SendRESET="0" SendOFF="0"/>
<SendSMS SendON="0" SendACK="0" SendRESET="0" SendOFF="0"/>
<SendFax SendON="0" SendACK="0" SendRESET="0" SendOFF="0"/>
<SendAdminAlert SendON="0" SendACK="0" SendRESET="0" SendOFF="0"/>
<SendMessenger SendON="0" SendACK="0" SendRESET="0" SendOFF="0"/>
<ScriptCode StartSel="0" SelLength="0" OutStatusBar="1" OutLog="1" OutPrinter="1">
</ScriptCode>
</Threshold>
</ThresholdList>
</Alarm>
</AlarmList>
</Alarms>
而我在新的文本文件中得到的输出是这样的:
1
0
1
1
0
0
1024
My - Searched - String
4294901760
4294901760
2
1
0
10
0
0
4294967295
65535
4294967295
4294967295
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
而我的问题是,我只需要在标题属性之间的文本 而我有多个报警元素要通过。