如何使用 linq 的时间差删除重复项

问题描述 投票:0回答:1

我有一个项目类的 IEnumerable 定义如下:

class Checkup
{
    public Guid SubjectGuid { get; set; }
    public Guid DoctorGuid { get; set; }
    public DateTime Date {get; set;}
}

例如我有以下数据集:

SubjectGuid                          DoctorGuid                           Date
2b9dd19f-9ce4-4a0a-832c-e941f5dc0234 6cfbdc40-d30d-4b49-9f02-ca116bcffee0 03.03.2024 11:04:46.644
2b9dd19f-9ce4-4a0a-832c-e941f5dc0234 6cfbdc40-d30d-4b49-9f02-ca116bcffee0 03.03.2024 11:12:27.369
2b9dd19f-9ce4-4a0a-832c-e941f5dc0234 6cfbdc40-d30d-4b49-9f02-ca116bcffee0 03.03.2024 11:30:36.564
bf4e7572-2328-4b19-9a9e-a67aa2f24fba b8dedde9-a397-4469-9c1b-22af4194f35f 03.03.2024 16:24:46.935
bf4e7572-2328-4b19-9a9e-a67aa2f24fba b8dedde9-a397-4469-9c1b-22af4194f35f 03.03.2024 16:39:27.853
bf4e7572-2328-4b19-9a9e-a67aa2f24fba b8dedde9-a397-4469-9c1b-22af4194f35f 03.03.2024 16:59:27.853
efea799a-c755-4f4f-ad80-8ac63fd35d07 b8e88718-55f0-4700-9900-09e8d440345b 03.03.2024 19:00:00.000
efea799a-c755-4f4f-ad80-8ac63fd35d07 b8e88718-55f0-4700-9900-09e8d440345b 03.03.2024 19:21:00.203
efea799a-c755-4f4f-ad80-8ac63fd35d07 b8e88718-55f0-4700-9900-09e8d440345b 03.03.2024 19:50:33.906

如果日期之间的差异小于或等于二十分钟,而SubjectGuid和DoctorGuid相同,我想删除重复项:

SubjectGuid                          DoctorGuid                           Date
2b9dd19f-9ce4-4a0a-832c-e941f5dc0234 6cfbdc40-d30d-4b49-9f02-ca116bcffee0 03.03.2024 11:04:46.644
2b9dd19f-9ce4-4a0a-832c-e941f5dc0234 6cfbdc40-d30d-4b49-9f02-ca116bcffee0 03.03.2024 11:30:36.564
bf4e7572-2328-4b19-9a9e-a67aa2f24fba b8dedde9-a397-4469-9c1b-22af4194f35f 03.03.2024 16:59:27.853
efea799a-c755-4f4f-ad80-8ac63fd35d07 b8e88718-55f0-4700-9900-09e8d440345b 03.03.2024 19:00:00.000
efea799a-c755-4f4f-ad80-8ac63fd35d07 b8e88718-55f0-4700-9900-09e8d440345b 03.03.2024 19:21:00.203
efea799a-c755-4f4f-ad80-8ac63fd35d07 b8e88718-55f0-4700-9900-09e8d440345b 03.03.2024 19:50:33.906

这是我尝试获得所需结果的代码(LINQPad),但它无法按我想要的方式工作:

void Main()
{
    var checkups = new List<Checkup>()
    {
        new Checkup { SubjectGuid = Guid.Parse("2b9dd19f-9ce4-4a0a-832c-e941f5dc0234"), DoctorGuid = Guid.Parse("6cfbdc40-d30d-4b49-9f02-ca116bcffee0"), Date = DateTime.Parse("03.03.2024 11:04:46.644") },
        new Checkup { SubjectGuid = Guid.Parse("2b9dd19f-9ce4-4a0a-832c-e941f5dc0234"), DoctorGuid = Guid.Parse("6cfbdc40-d30d-4b49-9f02-ca116bcffee0"), Date = DateTime.Parse("03.03.2024 11:12:27.369") },
        new Checkup { SubjectGuid = Guid.Parse("2b9dd19f-9ce4-4a0a-832c-e941f5dc0234"), DoctorGuid = Guid.Parse("6cfbdc40-d30d-4b49-9f02-ca116bcffee0"), Date = DateTime.Parse("03.03.2024 11:30:36.564") },
        new Checkup { SubjectGuid = Guid.Parse("bf4e7572-2328-4b19-9a9e-a67aa2f24fba"), DoctorGuid = Guid.Parse("b8dedde9-a397-4469-9c1b-22af4194f35f"), Date = DateTime.Parse("03.03.2024 16:24:46.935") },
        new Checkup { SubjectGuid = Guid.Parse("bf4e7572-2328-4b19-9a9e-a67aa2f24fba"), DoctorGuid = Guid.Parse("b8dedde9-a397-4469-9c1b-22af4194f35f"), Date = DateTime.Parse("03.03.2024 16:39:27.853") },
        new Checkup { SubjectGuid = Guid.Parse("bf4e7572-2328-4b19-9a9e-a67aa2f24fba"), DoctorGuid = Guid.Parse("b8dedde9-a397-4469-9c1b-22af4194f35f"), Date = DateTime.Parse("03.03.2024 16:59:27.853") },
        new Checkup { SubjectGuid = Guid.Parse("efea799a-c755-4f4f-ad80-8ac63fd35d07"), DoctorGuid = Guid.Parse("b8e88718-55f0-4700-9900-09e8d440345b"), Date = DateTime.Parse("03.03.2024 19:00:00.000") },
        new Checkup { SubjectGuid = Guid.Parse("efea799a-c755-4f4f-ad80-8ac63fd35d07"), DoctorGuid = Guid.Parse("b8e88718-55f0-4700-9900-09e8d440345b"), Date = DateTime.Parse("03.03.2024 19:21:00.203") },
        new Checkup { SubjectGuid = Guid.Parse("efea799a-c755-4f4f-ad80-8ac63fd35d07"), DoctorGuid = Guid.Parse("b8e88718-55f0-4700-9900-09e8d440345b"), Date = DateTime.Parse("03.03.2024 19:50:33.906") },
    };
    checkups.Dump();

    var checkupsNoDuplicates = checkups
        .GroupBy(c => new
        {
            SubjectGuid = c.SubjectGuid,
            DoctorGuid = c.DoctorGuid,
            Time = c.Date.Ticks / TimeSpan.FromMinutes(20).Ticks
        })
        .Select(g => g.OrderByDescending(sg => sg.Date).FirstOrDefault());
    checkupsNoDuplicates.Dump();
}

class Checkup
{
    public Guid SubjectGuid { get; set; }
    public Guid DoctorGuid { get; set; }
    public DateTime Date { get; set; }
}

此代码产生以下结果:

SubjectGuid                          DoctorGuid                           Date
2b9dd19f-9ce4-4a0a-832c-e941f5dc0234 6cfbdc40-d30d-4b49-9f02-ca116bcffee0 03.03.2024 11:12:27
2b9dd19f-9ce4-4a0a-832c-e941f5dc0234 6cfbdc40-d30d-4b49-9f02-ca116bcffee0 03.03.2024 11:30:36
bf4e7572-2328-4b19-9a9e-a67aa2f24fba b8dedde9-a397-4469-9c1b-22af4194f35f 03.03.2024 16:39:27
bf4e7572-2328-4b19-9a9e-a67aa2f24fba b8dedde9-a397-4469-9c1b-22af4194f35f 03.03.2024 16:59:27
efea799a-c755-4f4f-ad80-8ac63fd35d07 b8e88718-55f0-4700-9900-09e8d440345b 03.03.2024 19:00:00
efea799a-c755-4f4f-ad80-8ac63fd35d07 b8e88718-55f0-4700-9900-09e8d440345b 03.03.2024 19:21:00
efea799a-c755-4f4f-ad80-8ac63fd35d07 b8e88718-55f0-4700-9900-09e8d440345b 03.03.2024 19:50:33

划分刻度并不总是有效。 例如,在这段代码中勾选除法:

void Main()
{
    var dt1 = DateTime.Parse("03.01.2024 01:27:44.907");
    Console.WriteLine(dt1.ToString("MM.dd.yyyy hh:mm:ss.fff"));
    Console.WriteLine(dt1.Ticks / TimeSpan.FromMinutes(20).Ticks);
    
    var dt2 = DateTime.Parse("03.01.2024 01:41:55.088");
    Console.WriteLine(dt2.ToString("MM.dd.yyyy hh:mm:ss.fff"));
    Console.WriteLine(dt2.Ticks / TimeSpan.FromMinutes(20).Ticks);
}

产生结果:

01.03.2024 01:27:44.907
53199868
01.03.2024 01:41:55.088
53199869
c# .net linq linqpad drop-duplicates
1个回答
0
投票

您可以利用Aggregate,它会执行减少操作。

我们开始吧:

List<Checkup> result = checkups
    .OrderBy(c => c.Date)   //sorting
    .Aggregate<Checkup, List<Checkup>>(
        [],
        (accumulate, item) =>
        {
            List<Checkup> output = accumulate;
            Checkup? match = accumulate
                .LastOrDefault(c => c.SubjectGuid == item.SubjectGuid && c.DoctorGuid == item.DoctorGuid);

            if (match == null)
            {
                output.Add(item);
            }
            else if ((item.Date - match.Date) > TimeSpan.FromMinutes(20))
            {
                output.Add(item);
            }
            return output;
        });

有第一个排序(升序),如果确保输入数据已排序,则可以选择该排序。

然后是聚合块。

聚合函数将预期结果的初始值作为第一个参数,在我们的例子中是一个空的

List<Checkup>

核心函数是它的第二个参数,它对“累积”和当前项进行操作。通过迭代,累加器代表当前步骤的结果。该函数详细阐述了后步结果,这将是下一次迭代的累加器。

迭代结束后,累加器中的内容就是实际结果。

在我们的例子中,核心做了什么?

  1. 它找到同一(

    SubjectGuid
    DoctorGuid
    )对的最后一个条目。

  2. 如果没有,则将当前项添加到累加器中。

  3. 如果有,则检查是否已经过去了足够的 taime(20 分钟),如果适用则添加该项目。

© www.soinside.com 2019 - 2024. All rights reserved.