对于我编写的大多数内容,我通常不关心内存使用情况,但是,我有一个使用 EF8 在 .Net8.02 中编写的控制台应用程序,每次调用 EF 事务时都会泄漏 2MB。我尝试了所有可以想象的 GC(垃圾收集器)安排,试图强制 .Net 在调用之间释放内存,但泄漏仍然存在。我还在各种地方放置了代码片段来跟踪内存使用情况,所有这些都返回到 EF。 2MB 可能看起来不多,但是,每天,这个程序会读取大约 5,000 个日志文件(或更多),并且该程序会使机器(虚拟机或真实机器)崩溃 - 我用 GC 编写了一个内存监视器来查看可用内存,然后停止处理并等待它恢复正常(它永远不会恢复!)。
无论如何,这就是问题所在的区域(LogFileProcessor.cs)。 如果有人对如何释放 EF 占用的内存有任何想法,我将非常感激(在我返回并使用 ADO.Net Core 之前(它没有相同的问题 - 去图)
public partial class LogFileProcessor(LogDbContext dbContext, ILogger<LogFileProcessor> logger)
{
private readonly LogDbContext _dbContext = dbContext;
private readonly ILogger<LogFileProcessor> _logger = logger;
private List<LogEntry> _logEntries = new List<LogEntry>();
private string[] _lines = [];
public async Task<bool> ProcessLogFileAsync(string filePath)
{
bool result = false;
if (!File.Exists(filePath))
{
_logger.LogError("File not found: {filePath}", filePath);
return result;
}
string fileName = Path.GetFileName(filePath);
if (await LogAlreadyProcessedAsync(fileName))
{
_logger.LogInformation("Log file already processed: {fileName}", fileName);
return result;
}
string fileNameNoExt = Path.GetFileNameWithoutExtension(filePath);
DateTime fileDate = File.GetLastWriteTime(filePath);
string fileHash = ProgramBase.ComputeSha256Hash(filePath);
int logFileId = ExtractLogFileId(fileNameNoExt);
string fileType = ExtractFileType(fileNameNoExt);
using var transaction = await _dbContext.Database.BeginTransactionAsync();
try
{
var parsedLog = new ParsedLog
{
FileName = fileName,
LogType = fileType,
LogFileId = logFileId,
DateParsed = DateTime.UtcNow,
FileDate = fileDate,
FileHash = fileHash
};
await _dbContext.ParsedLogs.AddAsync(parsedLog);
await _dbContext.SaveChangesAsync();
int parsedLogId = parsedLog.Id; //retrieve new Id (identity) from ParsedLogs table
//_lines = await File.ReadLinesAsync(filePath).ToArray(); //not really needed, but if user sets log file size really large, this is better for resources
_lines = await File.ReadAllLinesAsync(filePath);
int lineNum = 0;
foreach (var line in _lines)
{
var entry = ParseLine(line, parsedLogId, lineNum);
if (entry != null)
{
_logEntries.Add(entry);
}
else
{
throw new Exception($"Unable to parse or convert line {lineNum}");
}
lineNum += 1;
}
await _dbContext.LogEntries.AddRangeAsync(_logEntries);
await _dbContext.SaveChangesAsync();
await transaction.CommitAsync();
_logger.LogInformation("Log file: {fileName} processed and data committed to the database.", fileName);
await transaction.DisposeAsync();
result = true;
}
catch (Exception ex)
{
await transaction.RollbackAsync();
_logger.LogError("Error processing log file: {fileName} {ex.Message}", fileName, ex.Message);
await transaction.DisposeAsync();
result = false;
}
finally
{
_logEntries.Clear();
_lines = [];
// Force garbage collection - naturally, this doesn't work, UGH!
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
}
return result;
}
private async Task<bool> LogAlreadyProcessedAsync(string fileName)
{
return await _dbContext.ParsedLogs.AsNoTracking().AnyAsync(l => l.FileName == fileName);
}
private static string ExtractFileType(string fileNameNoExt)
{
var match = FileTypeRegex().Match(fileNameNoExt);
return match.Success ? match.Groups[1].Value : "unknown";
}
private static int ExtractLogFileId(string fileNameNoExt)
{
var match = FileIdRegex().Match(fileNameNoExt);
return match.Success ? int.Parse(match.Groups[1].Value) : 0;
}
private static LogEntry? ParseLine(string line, int parsedLogId, int lineNum)
{
var parts = line.Split("->", StringSplitOptions.TrimEntries);
if (parts.Length < 2) return null;
var dateTimePart = parts[0].Trim();
string ipPart = string.Empty;
string statusAndRestPart;
// Check if the IP address is present
if (parts.Length == 3)
{
ipPart = parts[1].Trim();
statusAndRestPart = parts[2].Trim();
}
else
{
// Assume the IP address is missing and adjust accordingly
statusAndRestPart = parts[1].Trim();
}
var statusPart = statusAndRestPart.Split(':', StringSplitOptions.TrimEntries)[0];
var actionDetailsPart = ActionDetailsRegex().Match(statusAndRestPart);
string action = actionDetailsPart.Groups[1].Value.Trim();
string details = actionDetailsPart.Groups.Count > 2 ? actionDetailsPart.Groups[2].Value.Trim() : string.Empty;
return new LogEntry
{
ParsedLogId = parsedLogId,
LineNum = lineNum,
EntryDate = DateTime.ParseExact(dateTimePart, "ddd, dd MMM yyyy HH:mm:ss", CultureInfo.InvariantCulture),
IPaddress = ipPart,
Status = statusPart,
Action = action,
Details = details
};
}
// generates all regexes at compile time
[GeneratedRegex(@"^(.*?)_\d+$")]
private static partial Regex FileTypeRegex();
[GeneratedRegex(@"_([0-9]+)$")]
private static partial Regex FileIdRegex();
[GeneratedRegex(@"Action=\[(.*?)\](?:, Details=\[(.*?)\])?", RegexOptions.Compiled)]
private static partial Regex ActionDetailsRegex();
}
程序.cs文件:
namespace LogParserApp;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Configuration;
using System;
using System.IO;
using Microsoft.EntityFrameworkCore;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
internal partial class Program : ProgramBase
{
public static async Task Main(string[] args)
{
var settings = ParseArguments(args);
if (!settings.TryGetValue("filetype", out List<string>? value) || value.Count == 0)
{
Console.WriteLine("Please specify at least one filetype using '-filetype \"smtp, pop3\"'.");
return;
}
var host = CreateHostBuilder(args).Build();
// Access the configuration and the LogFileProcessor service
var config = host.Services.GetRequiredService<IConfiguration>();
string? folderPath = settings.TryGetValue("folderpath", out List<string>? value1) && value1.Count > 0 ? value1[0]
: config["LogFileSettings:FolderPath"];
string? archivePath = settings.TryGetValue("archivepath", out List<string>? value2) && value2.Count > 0 ? value2[0]
: config["LogFileSettings:ArchivePath"];
var logFileProcessor = host.Services.GetRequiredService<LogFileProcessor>();
string postProcess = settings.TryGetValue("postprocess", out List<string>? value3) && value3.Count > 0 ? value3[0].ToLower() : "keep";
foreach (var fileType in value)
{
var logFiles = Directory.GetFiles(folderPath ?? "C:\\logs", $"{fileType}_*.txt")
.Select(file => new
{
FileName = file,
OrderKey = int.Parse(OrderKeyRegex().Match(Path.GetFileName(file)).Groups[1].Value)
})
.OrderBy(f => f.OrderKey)
.Select(f => f.FileName);
//long memOffset = GC.GetTotalMemory(forceFullCollection: true); //for tracking memory
foreach (var file in logFiles)
{
// EnsureAvailableMemory(); //to keep program from crashing, no joy
//long startMem = GC.GetTotalMemory(forceFullCollection: true); //for tracking memory
Console.WriteLine($"Processing file: {file}");
var processSuccess = (await logFileProcessor.ProcessLogFileAsync(file));
if (processSuccess)
{
switch (postProcess)
{
case "archive":
string targetPath = Path.Combine(archivePath ?? "C:\\logs\\archive", Path.GetFileName(file));
File.Move(file, targetPath);
Console.WriteLine($"Archived file to: {targetPath}");
break;
case "delete":
File.Delete(file);
Console.WriteLine($"Deleted file: {file}");
break;
case "keep":
// Nothing to do, may add something later to keep, but rename, or what-have-you
break;
}
}
else
{
Console.WriteLine($"Processing failed for file: {file}, skipping post-processing steps.");
}
GC.Collect(0, GCCollectionMode.Forced);
//long endMem = GC.GetTotalMemory(forceFullCollection: true); //for tracking memory
//Console.WriteLine($"Memory Utilized: {(endMem - startMem) / 1048576M:N2} MB"); //for tracking memory
//Console.WriteLine($"Running Memory: {(endMem - memOffset) / 1048576M:N2} MB"); //for tracking memory
}
}
await host.RunAsync();
}
static IHostBuilder CreateHostBuilder(string[] args) =>
Host.CreateDefaultBuilder(args)
.ConfigureAppConfiguration((hostingContext, config) =>
{
config.SetBasePath(Directory.GetCurrentDirectory());
config.AddJsonFile("appsettings.json", optional: false, reloadOnChange: true);
})
.ConfigureServices((hostContext, services) =>
{
services.AddDbContext<LogDbContext>(options =>
options.UseSqlServer(hostContext.Configuration.GetConnectionString("DefaultConnection")));
services.AddScoped<LogFileProcessor>();
services.AddLogging();
services.AddSingleton<IConfiguration>(hostContext.Configuration);
})
.ConfigureLogging(logging => {
logging.ClearProviders();
logging.AddConsole();
logging.AddFilter("Microsoft.EntityFrameworkCore.Database.Command", LogLevel.Warning);
});
// generates a regex at compile time
[GeneratedRegex(@"^.*?_(\d+)\.txt$")]
private static partial Regex OrderKeyRegex();
// this doesn't help - garbage collection never actually occurs, so it stays at 1GB & tries again indefinitely
public static void EnsureAvailableMemory()
{
const long maxAllowedMemory = 1_073_741_824; // Set threshold to 1 GB
while (true)
{
long memoryUsed = GC.GetTotalMemory(false);
Console.WriteLine($"Memory used: {memoryUsed} bytes");
if (memoryUsed < maxAllowedMemory)
{
break;
}
Console.WriteLine("Memory usage is too high, forcing garbage collection.");
GC.Collect();
GC.WaitForPendingFinalizers();
Console.WriteLine("Garbage collection complete, pausing for a few seconds...");
Thread.Sleep(5000); // Wait 5 seconds before checking again
}
}
}
注意最后一个方法(我已经用 GC 尝试了各种方法,但没有什么乐趣)。
实体(可能不会有帮助,但它们就在这里)
public class ParsedLog
{
public int Id { get; set; }
public string FileName { get; set; } = string.Empty;
public string LogType { get; set; } = string.Empty;
public int LogFileId { get; set; }
public DateTime DateParsed { get; set; }
public DateTime FileDate { get; set; }
public string? FileHash { get; set; } // SHA-256 hash of the file
}
public class LogEntry
{
public long Id { get; set; }
public int ParsedLogId { get; set; }
public int LineNum { get; set; }
public DateTime EntryDate { get; set; }
public string IPaddress { get; set; } = string.Empty;
public string Status { get; set; } = string.Empty;
public string Action { get; set; } = string.Empty;
public string Details { get; set; } = string.Empty;
}
public class LogDbContext(DbContextOptions<LogDbContext> options) : DbContext(options)
{
public DbSet<LogEntry> LogEntries { get; set; }
public DbSet<ParsedLog> ParsedLogs { get; set; }
}
我计划进行重构以显着提高速度(预先哈希文件、使用跨度、批量插入等),但是在处理数千个文件时内存问题非常严重。
以下是一些输出,演示内存一次高达 1GB 1-2MB
PS D:\Projects\LogParserApp> dotnet run -filetype "smtp" -postprocess "archive"
Processing file: D:\EmailLogs\smtp_0.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_0.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_0.txt
Memory Utilized: 12.27 MB
Running Memory: 12.49 MB
Processing file: D:\EmailLogs\smtp_1.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_1.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_1.txt
Memory Utilized: 2.78 MB
Running Memory: 15.27 MB
Processing file: D:\EmailLogs\smtp_2.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_2.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_2.txt
Memory Utilized: 2.48 MB
Running Memory: 17.74 MB
Processing file: D:\EmailLogs\smtp_3.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_3.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_3.txt
Memory Utilized: 3.28 MB
Running Memory: 21.03 MB
Processing file: D:\EmailLogs\smtp_4.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_4.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_4.txt
Memory Utilized: 2.28 MB
Running Memory: 23.31 MB
Processing file: D:\EmailLogs\smtp_5.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_5.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_5.txt
Memory Utilized: 2.55 MB
Running Memory: 25.86 MB
...
...
...
Processing file: D:\EmailLogs\smtp_370.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_370.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_370.txt
Memory Utilized: 2.36 MB
Running Memory: 999.33 MB
Processing file: D:\EmailLogs\smtp_371.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_371.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_371.txt
Memory Utilized: 2.59 MB
Running Memory: 1,001.92 MB
Processing file: D:\EmailLogs\smtp_372.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_372.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_372.txt
Memory Utilized: 2.24 MB
Running Memory: 1,004.16 MB
这适用于 373 个文件 - 想象一下 10,000 个文件。 :)
看起来您对每个日志文件使用相同的 LogFileProcessor 实例(并且通过扩展,相同的上下文)。除非您另有说明,否则添加到上下文的任何实体都将继续被跟踪,因此处理的文件越多,它将跟踪的实体就越多,从而导致更高的内存使用量。
您可以尝试在成功处理每个文件后清除上下文。只需在 ProcessLogFileAsync 方法末尾调用
_dbContext.ChangeTracker.Clear()
即可。
另外,请谨慎在代码中直接调用 GC.Collect,除非您真的真的知道自己在做什么。