我有 2 种生物标志物(“分析物”)的值,在 2 个不同时期使用 2 种不同的“方法”进行测量(从 2022 年 6 月 3 日到 2022 年 6 月 6 日的 4 天内使用“旧”方法,以及“新”方法)从2023年6月1日到2023年6月7日超过7天)。
问题: 如何从最早的共同周期开始,考虑月日(无论年份),按分析物计算新列中“旧”和“新”方法的值的完全相同(最大)数量?
例如,在下面所需的输出数据中,新的“计数”列显示:
第一个分析物“chol”的新旧方法的 n=16 个值,并且
第二个分析物“ldh”的新旧方法的 n=18 个值。
输入数据:
dat0 <-
structure(list(analyte = structure(c(3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L), .Label = c("amy", "auu", "chol", "ggt",
"iron", "ldh", "pal", "prot", "trig", "uree", "ureeu", "uric"
), class = "factor"), method = structure(c(2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("new", "old"), class = "factor"),
date = structure(c(1654214400, 1654214400, 1654214400, 1654214400,
1654214400, 1654300800, 1654300800, 1654300800, 1654300800,
1654387200, 1654387200, 1654387200, 1654473600, 1654473600,
1654473600, 1654473600, 1685577600, 1685577600, 1685577600,
1685577600, 1685577600, 1685577600, 1685664000, 1685664000,
1685664000, 1685664000, 1685664000, 1685664000, 1685750400,
1685750400, 1685750400, 1685750400, 1685750400, 1685836800,
1685836800, 1685836800, 1685836800, 1685923200, 1685923200,
1685923200, 1685923200, 1685923200, 1685923200, 1686009600,
1686009600, 1686009600, 1686009600, 1686009600, 1686009600,
1686096000, 1686096000, 1686096000, 1686096000, 1686096000,
1686096000, 1654214400, 1654214400, 1654214400, 1654214400,
1654214400, 1654300800, 1654300800, 1654300800, 1654300800,
1654300800, 1654300800, 1654387200, 1654387200, 1654387200,
1654387200, 1654387200, 1654473600, 1654473600, 1654473600,
1654473600, 1654473600, 1654473600, 1654473600, 1654473600,
1685577600, 1685577600, 1685577600, 1685577600, 1685577600,
1685664000, 1685664000, 1685664000, 1685664000, 1685664000,
1685750400, 1685750400, 1685750400, 1685836800, 1685836800,
1685836800, 1685923200, 1685923200, 1685923200, 1685923200,
1686009600, 1686009600, 1686009600, 1686009600, 1686096000,
1686096000, 1686096000, 1686096000), tzone = "UTC", class = c("POSIXct",
"POSIXt")), value = c(2.84, 2.79, 2.82, 2.84, 2.84, 2.81,
2.8, 2.83, 2.82, 2.82, 2.8, 2.82, 2.81, 2.79, 2.82, 2.84,
2.74, 2.76, 2.77, 2.79, 2.76, 2.81, 2.73, 2.75, 2.77, 2.78,
2.76, 2.86, 2.73, 2.78, 2.84, 2.82, 2.8, 2.74, 2.76, 2.79,
2.82, 2.8, 2.76, 2.77, 2.83, 2.85, 2.83, 2.78, 2.74, 2.8,
2.8, 2.79, 2.85, 2.75, 2.76, 2.78, 2.82, 2.76, 2.86, 121,
119, 113, 117, 119, 123, 120, 117, 118, 125, 121, 121, 124,
123, 117, 115, 113, 120, 113, 113, 115, 117, 119, 115, 111,
111, 113, 116, 115, 111, 115, 112, 118, 115, 113, 106, 118,
113, 113, 113, 110, 114, 113, 117, 111, 111, 115, 120, 112,
113, 114, 108)), row.names = c(NA, -107L), class = c("tbl_df",
"tbl", "data.frame"))
所需输出数据:
dat1 <-
structure(list(analyte = c("chol", "chol", "chol", "chol", "chol",
"chol", "chol", "chol", "chol", "chol", "chol", "chol", "chol",
"chol", "chol", "chol", "chol", "chol", "chol", "chol", "chol",
"chol", "chol", "chol", "chol", "chol", "chol", "chol", "chol",
"chol", "chol", "chol", "chol", "chol", "chol", "chol", "chol",
"chol", "chol", "chol", "chol", "chol", "chol", "chol", "chol",
"chol", "chol", "chol", "chol", "chol", "chol", "chol", "chol",
"chol", "chol", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh",
"ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh",
"ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh",
"ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh",
"ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh",
"ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh", "ldh"
), method = c("old", "old", "old", "old", "old", "old", "old",
"old", "old", "old", "old", "old", "old", "old", "old", "old",
"new", "new", "new", "new", "new", "new", "new", "new", "new",
"new", "new", "new", "new", "new", "new", "new", "new", "new",
"new", "new", "new", "new", "new", "new", "new", "new", "new",
"new", "new", "new", "new", "new", "new", "new", "new", "new",
"new", "new", "new", "old", "old", "old", "old", "old", "old",
"old", "old", "old", "old", "old", "old", "old", "old", "old",
"old", "old", "old", "old", "old", "old", "old", "old", "old",
"new", "new", "new", "new", "new", "new", "new", "new", "new",
"new", "new", "new", "new", "new", "new", "new", "new", "new",
"new", "new", "new", "new", "new", "new", "new", "new", "new",
"new"), date = structure(c(1654214400, 1654214400, 1654214400,
1654214400, 1654214400, 1654300800, 1654300800, 1654300800, 1654300800,
1654387200, 1654387200, 1654387200, 1654473600, 1654473600, 1654473600,
1654473600, 1685577600, 1685577600, 1685577600, 1685577600, 1685577600,
1685577600, 1685664000, 1685664000, 1685664000, 1685664000, 1685664000,
1685664000, 1685750400, 1685750400, 1685750400, 1685750400, 1685750400,
1685836800, 1685836800, 1685836800, 1685836800, 1685923200, 1685923200,
1685923200, 1685923200, 1685923200, 1685923200, 1686009600, 1686009600,
1686009600, 1686009600, 1686009600, 1686009600, 1686096000, 1686096000,
1686096000, 1686096000, 1686096000, 1686096000, 1654214400, 1654214400,
1654214400, 1654214400, 1654214400, 1654300800, 1654300800, 1654300800,
1654300800, 1654300800, 1654300800, 1654387200, 1654387200, 1654387200,
1654387200, 1654387200, 1654473600, 1654473600, 1654473600, 1654473600,
1654473600, 1654473600, 1654473600, 1654473600, 1685577600, 1685577600,
1685577600, 1685577600, 1685577600, 1685664000, 1685664000, 1685664000,
1685664000, 1685664000, 1685750400, 1685750400, 1685750400, 1685836800,
1685836800, 1685836800, 1685923200, 1685923200, 1685923200, 1685923200,
1686009600, 1686009600, 1686009600, 1686009600, 1686096000, 1686096000,
1686096000, 1686096000), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
value = c(2.84, 2.79, 2.82, 2.84, 2.84, 2.81, 2.8, 2.83,
2.82, 2.82, 2.8, 2.82, 2.81, 2.79, 2.82, 2.84, 2.74, 2.76,
2.77, 2.79, 2.76, 2.81, 2.73, 2.75, 2.77, 2.78, 2.76, 2.86,
2.73, 2.78, 2.84, 2.82, 2.8, 2.74, 2.76, 2.79, 2.82, 2.8,
2.76, 2.77, 2.83, 2.85, 2.83, 2.78, 2.74, 2.8, 2.8, 2.79,
2.85, 2.75, 2.76, 2.78, 2.82, 2.76, 2.86, 121, 119, 113,
117, 119, 123, 120, 117, 118, 125, 121, 121, 124, 123, 117,
115, 113, 120, 113, 113, 115, 117, 119, 115, 111, 111, 113,
116, 115, 111, 115, 112, 118, 115, 113, 106, 118, 113, 113,
113, 110, 114, 113, 117, 111, 111, 115, 120, 112, 113, 114,
108), count = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -107L))
感谢您的帮助
我不是 100% 确定,但我认为这可能有助于前进。我的输出有点不同,但这可能是合理的。
首先,您需要将日期分开以考虑
month
和 day
而不考虑 year
。您可以使用 arrange
和 analyte
对每个 month
的数据进行排序或 day
。一年中存在重叠的第一天(给定日期“新”和“旧”都可用,并且 analyte
)将在名为 TRUE
的新列中用 overlap
表示。
将为每个 count
和
method
(其中存在
analyte
)计算A
overlap
。
下一部分,您可以通过
arrange
和 analyte
count
,以及 filter
或保留对给定 count
和 analyte
两种方法都可用的行。
我相信输出的差异在于,以下仅包含两种方法的“ldh”的 14 次测试,即使一种方法于 6 月 5 日结束,另一种方法于 6 月 6 日结束。如果您希望它们以相同的结束日期继续,则此可以修改。
library(tidyverse)
dat0 |>
separate(date, c('year', 'month', 'day'), sep = "-", remove = FALSE, convert = TRUE) |>
arrange(analyte, month, day) |>
mutate(overlap = all(c("old", "new") %in% method), .by = c(analyte, month, day)) |>
mutate(count = cumsum(overlap), .by = c(analyte, method)) |>
arrange(analyte, count) |>
filter(overlap & all(c("old", "new") %in% method), .by = c(analyte, count)) |>
arrange(analyte, method, date)
输出
analyte method date year month day value overlap count
<fct> <fct> <dttm> <int> <int> <int> <dbl> <lgl> <int>
1 chol new 2023-06-03 00:00:00 2023 6 3 2.73 TRUE 1
2 chol new 2023-06-03 00:00:00 2023 6 3 2.78 TRUE 2
3 chol new 2023-06-03 00:00:00 2023 6 3 2.84 TRUE 3
4 chol new 2023-06-03 00:00:00 2023 6 3 2.82 TRUE 4
5 chol new 2023-06-03 00:00:00 2023 6 3 2.8 TRUE 5
6 chol new 2023-06-04 00:00:00 2023 6 4 2.74 TRUE 6
7 chol new 2023-06-04 00:00:00 2023 6 4 2.76 TRUE 7
8 chol new 2023-06-04 00:00:00 2023 6 4 2.79 TRUE 8
9 chol new 2023-06-04 00:00:00 2023 6 4 2.82 TRUE 9
10 chol new 2023-06-05 00:00:00 2023 6 5 2.8 TRUE 10
11 chol new 2023-06-05 00:00:00 2023 6 5 2.76 TRUE 11
12 chol new 2023-06-05 00:00:00 2023 6 5 2.77 TRUE 12
13 chol new 2023-06-05 00:00:00 2023 6 5 2.83 TRUE 13
14 chol new 2023-06-05 00:00:00 2023 6 5 2.85 TRUE 14
15 chol new 2023-06-05 00:00:00 2023 6 5 2.83 TRUE 15
16 chol new 2023-06-06 00:00:00 2023 6 6 2.78 TRUE 16
17 chol old 2022-06-03 00:00:00 2022 6 3 2.84 TRUE 1
18 chol old 2022-06-03 00:00:00 2022 6 3 2.79 TRUE 2
19 chol old 2022-06-03 00:00:00 2022 6 3 2.82 TRUE 3
20 chol old 2022-06-03 00:00:00 2022 6 3 2.84 TRUE 4
21 chol old 2022-06-03 00:00:00 2022 6 3 2.84 TRUE 5
22 chol old 2022-06-04 00:00:00 2022 6 4 2.81 TRUE 6
23 chol old 2022-06-04 00:00:00 2022 6 4 2.8 TRUE 7
24 chol old 2022-06-04 00:00:00 2022 6 4 2.83 TRUE 8
25 chol old 2022-06-04 00:00:00 2022 6 4 2.82 TRUE 9
26 chol old 2022-06-05 00:00:00 2022 6 5 2.82 TRUE 10
27 chol old 2022-06-05 00:00:00 2022 6 5 2.8 TRUE 11
28 chol old 2022-06-05 00:00:00 2022 6 5 2.82 TRUE 12
29 chol old 2022-06-06 00:00:00 2022 6 6 2.81 TRUE 13
30 chol old 2022-06-06 00:00:00 2022 6 6 2.79 TRUE 14
31 chol old 2022-06-06 00:00:00 2022 6 6 2.82 TRUE 15
32 chol old 2022-06-06 00:00:00 2022 6 6 2.84 TRUE 16
33 ldh new 2023-06-03 00:00:00 2023 6 3 113 TRUE 1
34 ldh new 2023-06-03 00:00:00 2023 6 3 106 TRUE 2
35 ldh new 2023-06-03 00:00:00 2023 6 3 118 TRUE 3
36 ldh new 2023-06-04 00:00:00 2023 6 4 113 TRUE 4
37 ldh new 2023-06-04 00:00:00 2023 6 4 113 TRUE 5
38 ldh new 2023-06-04 00:00:00 2023 6 4 113 TRUE 6
39 ldh new 2023-06-05 00:00:00 2023 6 5 110 TRUE 7
40 ldh new 2023-06-05 00:00:00 2023 6 5 114 TRUE 8
41 ldh new 2023-06-05 00:00:00 2023 6 5 113 TRUE 9
42 ldh new 2023-06-05 00:00:00 2023 6 5 117 TRUE 10
43 ldh new 2023-06-06 00:00:00 2023 6 6 111 TRUE 11
44 ldh new 2023-06-06 00:00:00 2023 6 6 111 TRUE 12
45 ldh new 2023-06-06 00:00:00 2023 6 6 115 TRUE 13
46 ldh new 2023-06-06 00:00:00 2023 6 6 120 TRUE 14
47 ldh old 2022-06-03 00:00:00 2022 6 3 121 TRUE 1
48 ldh old 2022-06-03 00:00:00 2022 6 3 119 TRUE 2
49 ldh old 2022-06-03 00:00:00 2022 6 3 113 TRUE 3
50 ldh old 2022-06-03 00:00:00 2022 6 3 117 TRUE 4
51 ldh old 2022-06-03 00:00:00 2022 6 3 119 TRUE 5
52 ldh old 2022-06-04 00:00:00 2022 6 4 123 TRUE 6
53 ldh old 2022-06-04 00:00:00 2022 6 4 120 TRUE 7
54 ldh old 2022-06-04 00:00:00 2022 6 4 117 TRUE 8
55 ldh old 2022-06-04 00:00:00 2022 6 4 118 TRUE 9
56 ldh old 2022-06-04 00:00:00 2022 6 4 125 TRUE 10
57 ldh old 2022-06-04 00:00:00 2022 6 4 121 TRUE 11
58 ldh old 2022-06-05 00:00:00 2022 6 5 121 TRUE 12
59 ldh old 2022-06-05 00:00:00 2022 6 5 124 TRUE 13
60 ldh old 2022-06-05 00:00:00 2022 6 5 123 TRUE 14