我有一个相当大的CSV数据集,大约13.5MB,大约有120,000行和13列。下面的代码是我现有的当前解决方案。
private IEnumerator readDataset()
{
starsRead = 0;
var totalLines = File.ReadLines(path).Count();
totalStars = totalLines - 1;
string firstLine = File.ReadLines(path).First();
int columnCount = firstLine.Count(f => f == ',');
string[,] datasetTable = new string[totalStars, columnCount];
int lineLength;
char bufferChar;
var bufferString = new StringBuilder();
int column;
int row;
using (FileStream fs = File.Open(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
using (BufferedStream bs = new BufferedStream(fs))
using (StreamReader sr = new StreamReader(bs))
{
string line = sr.ReadLine();
while ((line = sr.ReadLine()) != null)
{
row = 0;
column = 0;
lineLength = line.Length;
for (int i = 0; i < lineLength; i++)
{
bufferChar = line[i];
if (bufferChar == ',')
{
datasetTable[row, column] = bufferString.ToString();
column++;
}
else
{
bufferString.Append(bufferChar);
}
}
row++;
starsRead++;
yield return null;
}
}
}
幸运的是,当我通过Unity协程运行此程序时,程序没有冻结,但是此当前解决方案需要近30分钟才能读取整个CSV文件。
还有其他方法可以做到这一点吗?我正在尝试将解析时间控制在1分钟以内。
您可能遇到内存问题。在代码运行时打开任务管理器,以查看您是否已达到最大内存量。
尝试以下操作:
private void readDataset()
{
List<List<string>> datasetTable = new List<List<string>>(); ;
using (StreamReader sr = new StreamReader(path))
{
string line = sr.ReadLine(); //skip header row
while ((line = sr.ReadLine()) != null)
{
datasetTable.Add(line.Split(new char[] { ',' }).ToList());
}
}
}