.NET HttpClient、JsonSerializer 中内存泄漏或误用 Stream?

问题描述 投票:0回答:1

我在一个空的 ASP.NET Core 8 Minimal API 项目中有一个基本的背景类。

应用程序启动只是:

builder.Services.AddHttpClient();
builder.Services.AddHostedService<SteamAppListDumpService>();

后台类用于保存 Steam API 端点的快照,所有基本内容:

public class SteamAppListDumpService : BackgroundService
{
    static TimeSpan RepeatDelay = TimeSpan.FromMinutes(30);
    private readonly IHttpClientFactory _httpClientFactory;

    private string GetSteamKey() => "...";

    private string GetAppListUrl(int? lastAppId = null)
    {
        return $"https://api.steampowered.com/IStoreService/GetAppList/v1/?key={GetSteamKey()}" +
            (lastAppId.HasValue ? $"&last_appid={lastAppId}" : "");
    }

    public SteamAppListDumpService(IHttpClientFactory httpClientFactory)
    {
        _httpClientFactory = httpClientFactory;
    }

    protected override async Task ExecuteAsync(CancellationToken stoppingToken)
    {
        while (!stoppingToken.IsCancellationRequested)
        {
            await DumpAppList();
            await Task.Delay(RepeatDelay, stoppingToken);
        }
    }

    public record SteamApiGetAppListApp(int appid, string name, int last_modified, int price_change_number);
    public record SteamApiGetAppListResponse(List<SteamApiGetAppListApp> apps, bool have_more_results, int last_appid);
    public record SteamApiGetAppListOuterResponse(SteamApiGetAppListResponse response);

    protected async Task DumpAppList()
    {
        try
        {
            var httpClient = _httpClientFactory.CreateClient();
            var appList = new List<SteamApiGetAppListApp>();
            int? lastAppId = null;
            do
            {
                using var response = await httpClient.GetAsync(GetAppListUrl(lastAppId));
                if (!response.IsSuccessStatusCode) throw new Exception($"API Returned Invalid Status Code: {response.StatusCode}");

                var responseString = await response.Content.ReadAsStringAsync();
                var responseObject = JsonSerializer.Deserialize<SteamApiGetAppListOuterResponse>(responseString)!.response;
                appList.AddRange(responseObject.apps);
                lastAppId = responseObject.have_more_results ? responseObject.last_appid : null;

            } while (lastAppId != null);

            var contentBytes = JsonSerializer.SerializeToUtf8Bytes(appList);
            using var output = File.OpenWrite(Path.Combine(Config.DumpDataPath, DateTime.UtcNow.ToString("yyyy-MM-dd__HH-mm-ss") + ".json.gz"));
            using var gz = new GZipStream(output, CompressionMode.Compress);
            gz.Write(contentBytes, 0, contentBytes.Length);
        }
        catch (Exception ex)
        {
            Trace.TraceError("skipped...");
        }
    }
}

API 总共返回约 16 MB 的数据,然后每 30 分钟将其压缩/保存到 4 MB 的文件,仅此而已。在两次运行之间,当垃圾收集器运行时,我预计内存消耗会下降到几乎为零,但它会随着时间的推移而增加,例如,它在我的 PC 上运行了 2 小时,消耗了 700MB 内存。在我的服务器上,它已经运行了 24 小时,现在消耗 2.5 GB 内存。

据我所知,所有流都已处理,

HttpClient
是使用推荐的
IHttpClientFactory
创建的,有谁知道为什么这个基本功能即使在垃圾回收之后也会消耗这么多内存?我尝试在 VS 管理内存转储中查看它,但找不到太多有用的东西。这是否表明其中一个类存在内存泄漏(即
HttpClient
/
SerializeToUtf8Bytes
),还是我遗漏了某些内容?

responseString
contentBytes
通常约为2MB。

c# httpclient system.text.json background-service jsonserializer
1个回答
0
投票

每当您分配大小 >= 85,000 字节的连续内存块时,它都会进入大型对象堆。与常规堆不同,除非您“手动”这样做,否则它不会被压缩,因此它可能会因碎片而增长,从而出现内存泄漏的情况。请参阅为什么要使用大对象堆以及我们为什么关心? 由于您的

responseString

contentBytes
通常约为 2 MB,我建议您重写代码以消除它们。相反,使用相关的内置 API 直接从服务器异步流式传输到 JSON 文件,如下所示:
const int BufferSize = 16384;
const bool UseAsyncFileStreams = true; //https://learn.microsoft.com/en-us/dotnet/api/system.io.filestream.-ctor?view=net-5.0#System_IO_FileStream__ctor_System_String_System_IO_FileMode_System_IO_FileAccess_System_IO_FileShare_System_Int32_System_Boolean_

protected async Task DumpAppList()
{
    try
    {
        var httpClient = _httpClientFactory.CreateClient();
        var appList = new List<SteamApiGetAppListApp>();
        int? lastAppId = null;
        do
        {
            // Get the SteamApiGetAppListOuterResponse directly from JSON using HttpClientJsonExtensions.GetFromJsonAsync() without the intermediate string.
            // https://learn.microsoft.com/en-us/dotnet/api/system.net.http.json.httpclientjsonextensions.getfromjsonasync
            // If you need customized error handling see 
            // https://stackoverflow.com/questions/65383186/using-httpclient-getfromjsonasync-how-to-handle-httprequestexception-based-on
            var responseObject = (await httpClient.GetFromJsonAsync<SteamApiGetAppListOuterResponse>(GetAppListUrl(lastAppId)))
                !.response;
            appList.AddRange(responseObject.apps);
            lastAppId = responseObject.have_more_results ? responseObject.last_appid : null;

        } while (lastAppId != null);

        await using var output = new FileStream(Path.Combine(Config.DumpDataPath, DateTime.UtcNow.ToString("yyyy-MM-dd__HH-mm-ss") + ".json.gz"),
                                                            FileMode.Create, FileAccess.Write, FileShare.None, bufferSize: BufferSize, useAsync: UseAsyncFileStreams);
        await using var gz = new GZipStream(output, CompressionMode.Compress);
        // See https://faithlife.codes/blog/2012/06/always-wrap-gzipstream-with-bufferedstream/ for a discussion of buffer sizes vs compression ratios.
        await using var buffer = new BufferedStream(gz, BufferSize);
        // Serialize directly to the buffered, compressed output stream without the intermediate in-memory array.
        await JsonSerializer.SerializeAsync(buffer, appList);
    }
    catch (Exception ex)
    {
        Trace.TraceError("skipped...");
    }
}

备注:

© www.soinside.com 2019 - 2024. All rights reserved.