关于 .Net Entity Framework 版本 8(C# 控制台应用程序 .Net8.02),GC 未修复内存泄漏

问题描述 投票:0回答:1

对于我编写的大多数内容,我通常不关心内存使用情况,但是,我有一个使用 EF8 在 .Net8.02 中编写的控制台应用程序,每次调用 EF 事务时都会泄漏 2MB。我尝试了所有可以想象的 GC(垃圾收集器)安排,试图强制 .Net 在调用之间释放内存,但泄漏仍然存在。我还在各种地方放置了代码片段来跟踪内存使用情况,所有这些都返回到 EF。 2MB 可能看起来不多,但是,每天,这个程序会读取大约 5,000 个日志文件(或更多),并且该程序会使机器(虚拟机或真实机器)崩溃 - 我用 GC 编写了一个内存监视器来查看可用内存,然后停止处理并等待它恢复正常(它永远不会恢复!)。

无论如何,这就是问题所在的区域(LogFileProcessor.cs)。 如果有人对如何释放 EF 占用的内存有任何想法,我将非常感激(在我返回并使用 ADO.Net Core 之前(它没有相同的问题 - 去图)

 public partial class LogFileProcessor(LogDbContext dbContext, ILogger<LogFileProcessor> logger)
 {
     private readonly LogDbContext _dbContext = dbContext;
     private readonly ILogger<LogFileProcessor> _logger = logger;

     private List<LogEntry> _logEntries = new List<LogEntry>();
     private string[] _lines = [];

     public async Task<bool> ProcessLogFileAsync(string filePath)
     {
         bool result = false;

         if (!File.Exists(filePath))
         {
             _logger.LogError("File not found: {filePath}", filePath);
             return result;
         }

         string fileName = Path.GetFileName(filePath);

         if (await LogAlreadyProcessedAsync(fileName))
         {
             _logger.LogInformation("Log file already processed: {fileName}", fileName);
             return result;
         }

         string fileNameNoExt = Path.GetFileNameWithoutExtension(filePath);
         DateTime fileDate = File.GetLastWriteTime(filePath);
         string fileHash = ProgramBase.ComputeSha256Hash(filePath);
         int logFileId = ExtractLogFileId(fileNameNoExt);
         string fileType = ExtractFileType(fileNameNoExt);

         using var transaction = await _dbContext.Database.BeginTransactionAsync();
         try
         {
             var parsedLog = new ParsedLog
             {
                 FileName = fileName,
                 LogType = fileType,
                 LogFileId = logFileId,
                 DateParsed = DateTime.UtcNow,
                 FileDate = fileDate,
                 FileHash = fileHash
             };

             await _dbContext.ParsedLogs.AddAsync(parsedLog);
             await _dbContext.SaveChangesAsync();

             int parsedLogId = parsedLog.Id; //retrieve new Id (identity) from ParsedLogs table

             //_lines = await File.ReadLinesAsync(filePath).ToArray();   //not really needed, but if user sets log file size really large, this is better for resources
             _lines = await File.ReadAllLinesAsync(filePath);
             int lineNum = 0;
             foreach (var line in _lines)
             {
                 var entry = ParseLine(line, parsedLogId, lineNum);
                 if (entry != null)
                 {
                     _logEntries.Add(entry);
                 }
                 else
                 {
                     throw new Exception($"Unable to parse or convert line {lineNum}");
                 }
                 lineNum += 1;
             }

             await _dbContext.LogEntries.AddRangeAsync(_logEntries);
             await _dbContext.SaveChangesAsync();

             await transaction.CommitAsync();
             _logger.LogInformation("Log file: {fileName} processed and data committed to the database.", fileName);
             await transaction.DisposeAsync();
             result = true;
         }
         catch (Exception ex)
         {
             await transaction.RollbackAsync();
             _logger.LogError("Error processing log file: {fileName} {ex.Message}", fileName, ex.Message);
             await transaction.DisposeAsync();
             result = false;
         }
         finally
         {
             _logEntries.Clear();
             _lines = [];

             // Force garbage collection - naturally, this doesn't work, UGH!
             GC.Collect();
             GC.WaitForPendingFinalizers();
             GC.Collect();
         }
         return result;
     }



     private async Task<bool> LogAlreadyProcessedAsync(string fileName)
     {
         return await _dbContext.ParsedLogs.AsNoTracking().AnyAsync(l => l.FileName == fileName);
     }

     private static string ExtractFileType(string fileNameNoExt)
     {
         var match = FileTypeRegex().Match(fileNameNoExt);
         return match.Success ? match.Groups[1].Value : "unknown";
     }

     private static int ExtractLogFileId(string fileNameNoExt)
     {
         var match = FileIdRegex().Match(fileNameNoExt);
         return match.Success ? int.Parse(match.Groups[1].Value) : 0;
     }


     private static LogEntry? ParseLine(string line, int parsedLogId, int lineNum)
     {
         var parts = line.Split("->", StringSplitOptions.TrimEntries);
         if (parts.Length < 2) return null;

         var dateTimePart = parts[0].Trim();
         string ipPart = string.Empty;
         string statusAndRestPart;

         // Check if the IP address is present
         if (parts.Length == 3)
         {
             ipPart = parts[1].Trim();
             statusAndRestPart = parts[2].Trim();
         }
         else
         {
             // Assume the IP address is missing and adjust accordingly
             statusAndRestPart = parts[1].Trim();
         }

         var statusPart = statusAndRestPart.Split(':', StringSplitOptions.TrimEntries)[0];
         var actionDetailsPart = ActionDetailsRegex().Match(statusAndRestPart);

         string action = actionDetailsPart.Groups[1].Value.Trim();
         string details = actionDetailsPart.Groups.Count > 2 ? actionDetailsPart.Groups[2].Value.Trim() : string.Empty;

         return new LogEntry
         {
             ParsedLogId = parsedLogId,
             LineNum = lineNum,
             EntryDate = DateTime.ParseExact(dateTimePart, "ddd, dd MMM yyyy HH:mm:ss", CultureInfo.InvariantCulture),
             IPaddress = ipPart,
             Status = statusPart,
             Action = action,
             Details = details
         };
     }

     // generates all regexes at compile time
     [GeneratedRegex(@"^(.*?)_\d+$")]
     private static partial Regex FileTypeRegex();

     [GeneratedRegex(@"_([0-9]+)$")]
     private static partial Regex FileIdRegex();

     [GeneratedRegex(@"Action=\[(.*?)\](?:, Details=\[(.*?)\])?", RegexOptions.Compiled)]
     private static partial Regex ActionDetailsRegex();
 }

程序.cs文件:

    namespace LogParserApp;

using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Configuration;
using System;
using System.IO;
using Microsoft.EntityFrameworkCore;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;

internal partial class Program : ProgramBase
{
    public static async Task Main(string[] args)
    {
        var settings = ParseArguments(args);

        if (!settings.TryGetValue("filetype", out List<string>? value) || value.Count == 0)
        {
            Console.WriteLine("Please specify at least one filetype using '-filetype \"smtp, pop3\"'.");
            return;
        }

        var host = CreateHostBuilder(args).Build();

        // Access the configuration and the LogFileProcessor service
        var config = host.Services.GetRequiredService<IConfiguration>();

        string? folderPath = settings.TryGetValue("folderpath", out List<string>? value1) && value1.Count > 0 ? value1[0]
                              : config["LogFileSettings:FolderPath"];

        string? archivePath = settings.TryGetValue("archivepath", out List<string>? value2) && value2.Count > 0 ? value2[0]
                              : config["LogFileSettings:ArchivePath"];

        var logFileProcessor = host.Services.GetRequiredService<LogFileProcessor>();

        string postProcess = settings.TryGetValue("postprocess", out List<string>? value3) && value3.Count > 0 ? value3[0].ToLower() : "keep";


        foreach (var fileType in value)
        {
            var logFiles = Directory.GetFiles(folderPath ?? "C:\\logs", $"{fileType}_*.txt")
                .Select(file => new
                {
                    FileName = file,
                    OrderKey = int.Parse(OrderKeyRegex().Match(Path.GetFileName(file)).Groups[1].Value)
                })
                .OrderBy(f => f.OrderKey)
                .Select(f => f.FileName);

            //long memOffset = GC.GetTotalMemory(forceFullCollection: true); //for tracking memory

            foreach (var file in logFiles)
            {
                // EnsureAvailableMemory(); //to keep program from crashing, no joy

                //long startMem = GC.GetTotalMemory(forceFullCollection: true);  //for tracking memory

                Console.WriteLine($"Processing file: {file}");
                var processSuccess = (await logFileProcessor.ProcessLogFileAsync(file));

                if (processSuccess)
                {
                    switch (postProcess)
                    {
                        case "archive":
                            string targetPath = Path.Combine(archivePath ?? "C:\\logs\\archive", Path.GetFileName(file));
                            File.Move(file, targetPath);
                            Console.WriteLine($"Archived file to: {targetPath}");
                            break;
                        case "delete":
                            File.Delete(file);
                            Console.WriteLine($"Deleted file: {file}");
                            break;
                        case "keep":
                            // Nothing to do, may add something later to keep, but rename, or what-have-you
                            break;
                    }
                }
                else
                {
                    Console.WriteLine($"Processing failed for file: {file}, skipping post-processing steps.");
                }

                GC.Collect(0, GCCollectionMode.Forced);

                //long endMem = GC.GetTotalMemory(forceFullCollection: true);  //for tracking memory
                //Console.WriteLine($"Memory Utilized: {(endMem - startMem) / 1048576M:N2} MB");  //for tracking memory
                //Console.WriteLine($"Running Memory: {(endMem - memOffset) / 1048576M:N2} MB");  //for tracking memory

            }
        }

        await host.RunAsync();
    }

    static IHostBuilder CreateHostBuilder(string[] args) =>
    Host.CreateDefaultBuilder(args)
        .ConfigureAppConfiguration((hostingContext, config) =>
        {
            config.SetBasePath(Directory.GetCurrentDirectory());
            config.AddJsonFile("appsettings.json", optional: false, reloadOnChange: true);
        })
        .ConfigureServices((hostContext, services) =>
        {
            services.AddDbContext<LogDbContext>(options =>
                options.UseSqlServer(hostContext.Configuration.GetConnectionString("DefaultConnection")));

            services.AddScoped<LogFileProcessor>();
            services.AddLogging(); 

            services.AddSingleton<IConfiguration>(hostContext.Configuration);
        })
        .ConfigureLogging(logging => {
            logging.ClearProviders();
            logging.AddConsole();
            logging.AddFilter("Microsoft.EntityFrameworkCore.Database.Command", LogLevel.Warning);
        });


    // generates a regex at compile time
    [GeneratedRegex(@"^.*?_(\d+)\.txt$")]
    private static partial Regex OrderKeyRegex();


    // this doesn't help - garbage collection never actually occurs, so it stays at 1GB & tries again indefinitely
    public static void EnsureAvailableMemory()
    {
        const long maxAllowedMemory = 1_073_741_824; // Set threshold to 1 GB

        while (true)
        {
            long memoryUsed = GC.GetTotalMemory(false);
            Console.WriteLine($"Memory used: {memoryUsed} bytes");

            if (memoryUsed < maxAllowedMemory)
            {
                break;
            }

            Console.WriteLine("Memory usage is too high, forcing garbage collection.");
            GC.Collect();
            GC.WaitForPendingFinalizers();
            Console.WriteLine("Garbage collection complete, pausing for a few seconds...");
            Thread.Sleep(5000); // Wait 5 seconds before checking again
        }
    }

}

注意最后一个方法(我已经用 GC 尝试了各种方法,但没有什么乐趣)。

实体(可能不会有帮助,但它们就在这里)

public class ParsedLog
{
    public int Id { get; set; }
    public string FileName { get; set; } = string.Empty;
    public string LogType { get; set; } = string.Empty;
    public int LogFileId { get; set; }
    public DateTime DateParsed { get; set; }
    public DateTime FileDate { get; set; }
    public string? FileHash { get; set; }  // SHA-256 hash of the file
}

public class LogEntry
{
    public long Id { get; set; }
    public int ParsedLogId { get; set; }
    public int LineNum { get; set; }
    public DateTime EntryDate { get; set; }
    public string IPaddress { get; set; } = string.Empty;
    public string Status { get; set; } = string.Empty;
    public string Action { get; set; } = string.Empty;
    public string Details { get; set; } = string.Empty;

}


public class LogDbContext(DbContextOptions<LogDbContext> options) : DbContext(options)
{
    public DbSet<LogEntry> LogEntries { get; set; }
    public DbSet<ParsedLog> ParsedLogs { get; set; }
}

我计划进行重构以显着提高速度(预先哈希文件、使用跨度、批量插入等),但是在处理数千个文件时内存问题非常严重。

以下是一些输出,演示内存一次高达 1GB 1-2MB

    PS D:\Projects\LogParserApp> dotnet run -filetype "smtp" -postprocess "archive"

Processing file: D:\EmailLogs\smtp_0.txt
info: LogParserApp.LogFileProcessor[0]
      Log file: smtp_0.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_0.txt
Memory Utilized: 12.27 MB
Running Memory: 12.49 MB
Processing file: D:\EmailLogs\smtp_1.txt
info: LogParserApp.LogFileProcessor[0]
      Log file: smtp_1.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_1.txt
Memory Utilized: 2.78 MB
Running Memory: 15.27 MB
Processing file: D:\EmailLogs\smtp_2.txt
info: LogParserApp.LogFileProcessor[0]
      Log file: smtp_2.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_2.txt
Memory Utilized: 2.48 MB
Running Memory: 17.74 MB
Processing file: D:\EmailLogs\smtp_3.txt
info: LogParserApp.LogFileProcessor[0]
      Log file: smtp_3.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_3.txt
Memory Utilized: 3.28 MB
Running Memory: 21.03 MB
Processing file: D:\EmailLogs\smtp_4.txt
info: LogParserApp.LogFileProcessor[0]
      Log file: smtp_4.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_4.txt
Memory Utilized: 2.28 MB
Running Memory: 23.31 MB
Processing file: D:\EmailLogs\smtp_5.txt
info: LogParserApp.LogFileProcessor[0]
      Log file: smtp_5.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_5.txt
Memory Utilized: 2.55 MB
Running Memory: 25.86 MB
...
...
...
Processing file: D:\EmailLogs\smtp_370.txt
info: LogParserApp.LogFileProcessor[0]
      Log file: smtp_370.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_370.txt
Memory Utilized: 2.36 MB
Running Memory: 999.33 MB
Processing file: D:\EmailLogs\smtp_371.txt
info: LogParserApp.LogFileProcessor[0]
      Log file: smtp_371.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_371.txt
Memory Utilized: 2.59 MB
Running Memory: 1,001.92 MB
Processing file: D:\EmailLogs\smtp_372.txt
info: LogParserApp.LogFileProcessor[0]
      Log file: smtp_372.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_372.txt
Memory Utilized: 2.24 MB
Running Memory: 1,004.16 MB

这适用于 373 个文件 - 想象一下 10,000 个文件。 :)

c# memory-leaks garbage-collection .net-8.0 entity-framework-8
1个回答
0
投票

看起来您对每个日志文件使用相同的 LogFileProcessor 实例(并且通过扩展,相同的上下文)。除非您另有说明,否则添加到上下文的任何实体都将继续被跟踪,因此处理的文件越多,它将跟踪的实体就越多,从而导致更高的内存使用量。

您可以尝试在成功处理每个文件后清除上下文。只需在 ProcessLogFileAsync 方法末尾调用

_dbContext.ChangeTracker.Clear()
即可。

另外,请谨慎在代码中直接调用 GC.Collect,除非您真的真的知道自己在做什么。

© www.soinside.com 2019 - 2024. All rights reserved.