我使用下面在 R 中给出的代码计算了我的数据(.txt 文件)每 15 行的平均值。
x <- df %>% separate(`Date/Time`, into = c("Date", "Time"), sep = "T")
mn <- x %>%
group_by(group = as.integer(gl(n(), 15, n()))) %>%
summarise_all(funs(mean))
write.csv(min, 'C:/Users/Alexia/Desktop/Test/15row.csv')
我成功获得了输出,但在日期和时间列输出中,我收到了 NA。但是,所需的输出应如下所示:
Date Time Col1 Col2 Col3....
2021-01-01 00:00:00 12 36 56
2021-01-01 00:15:00 34 54 43
2021-01-01 00:30:00 24 23 21
2021-01-01 00:45:00 12 36 56
2021-01-01 01:00:00 34 54 43
2021-01-01 01:15:00 24 23 21
2021-01-01 01:30:00 12 36 43
2021-01-01 01:45:00 12 36 34
2021-01-01 02:00:00 12 36 34
.
.
.
我的输入数据(.txt)是每分钟的,并且具有以下方式的日期和时间:
Date/Time Col1 Col2 Col3....
2021-01-01T00:00:00 20 12 34...
2021-01-01T00:01:00 .....
2021-01-01T00:02:00 .....
2021-01-01T00:03:00 .....
2021-01-01T01:04:00 .....
2021-01-01T01:05:00 .....
2021-01-01T01:05:00 .....
2021-01-01T01:07:00 .....
2021-01-01T02:08:00 .....
dput(df)的输出如下:
structure(list(`Date/Time` = c("2021-03-01T00:01:00", "2021-03-
01T00:02:00", "2021-03-01T00:03:00", "2021-03-01T00:04:00", "2021-03-
01T00:05:00", "2021-03-01T00:06:00", "2021-03-01T00:07:00", "2021-03-
01T00:08:00", "2021-03-01T00:09:00", "2021-03-01T00:10:00", "2021-03-
01T00:11:00", "2021-03-01T00:12:00", "2021-03-01T00:13:00", "2021-03-
01T00:14:00", "2021-03-01T00:15:00", "2021-03-01T00:16:00", "2021-03-
01T00:17:00", "2021-03-01T00:18:00", "2021-03-01T00:19:00", "2021-03-
01T00:20:00", "2021-03-01T00:21:00", "2021-03-01T00:22:00", "2021-03-
01T00:23:00", "2021-03-01T00:24:00", "2021-03-01T00:25:00", "2021-03-
01T00:26:00", "2021-03-01T00:27:00", "2021-03-01T00:28:00", "2021-03-
01T00:29:00", "2021-03-01T00:30:00"), `XY [XY]` = c(0.990641, 0.990641,
0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641,
0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641,
0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641,
0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641),
`C1 [CC]` = c(257L, 257L, 257L, 257L, 257L, 257L, 257L,
257L, 257L, 257L, 257L, 257L, 257L, 257L, 257L, 257L, 257L,
257L, 257L, 257L, 257L, 257L, 257L, 257L, 257L, 257L, 257L,
257L, 257L, 257L), Cc = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `C2 [C2]` = c(285L,
285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L,
285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L,
285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L), Dc = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
`C3 [C2]` = c(255L, 255L, 255L, 255L, 255L, 255L, 255L,
255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L,
255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L,
255L, 255L, 255L), Ac = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), C4 = c(0.463735, 0.463735,
0.463735, 0.463735, 0.463735, 0.463735, 0.463735, 0.463735,
0.463735, 0.463735, 0.463735, 0.463735, 0.463735, 0.463735,
0.463735, 0.463735, 0.463735, 0.463735, 0.463735, 0.463735,
0.463735, 0.463735, 0.463735, 0.463735, 0.463735, 0.463735,
0.463735, 0.463735, 0.463735, 0.463735), `C5 [h]` = c(1013L,
1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L,
1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L,
1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L,
1013L, 1013L), `C6 [%]` = c(43L, 43L, 43L, 43L, 43L, 43L,
43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L,
43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L
), `C7 [E2]` = c(390L, 390L, 390L, 390L, 390L, 390L,
390L, 390L, 390L, 390L, 390L, 390L, 390L, 390L, 390L, 390L,
390L, 390L, 390L, 390L, 390L, 390L, 390L, 390L, 390L, 390L,
390L, 390L, 390L, 390L), Jc = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `D [S]` = c(62.3716,
62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716,
62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716,
62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716,
62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716,
62.3716), `Sw [S2]` = c(1392.95, 1392.95, 1392.95, 1392.95,
1392.95, 1392.95, 1392.95, 1392.95, 1392.95, 1392.95, 1392.95,
1392.95, 1392.95, 1392.95, 1392.95, 1392.95, 1392.95, 1392.95,
1392.95, 1392.95, 1392.95, 1392.95, 1392.95, 1392.95, 1392.95,
1392.95, 1392.95, 1392.95, 1392.95, 1392.95), `SW [Q2]` =
c(389.164, 389.164, 389.164, 389.164, 389.164, 389.164, 389.164,
389.164, 389.164, 389.164, 389.164, 389.164, 389.164, 389.164, 389.164,
389.164, 389.164, 389.164, 389.164, 389.164, 389.164, 389.164,
389.164, 389.164, 389.164, 389.164, 389.164, 389.164, 389.164,
389.164), `OA [H2]` = c(646.61, 646.61, 646.61, 646.61,
646.61, 646.61, 646.61, 646.61, 646.61, 646.61, 646.61, 646.61,
646.61, 646.61, 646.61, 646.61, 646.61, 646.61, 646.61, 646.61,
646.61, 646.61, 646.61, 646.61, 646.61, 646.61, 646.61, 646.61,
646.61, 646.61), `T2 [C]` = c(3.7, 3.7, 3.7, 3.7, 3.7,
3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7,
3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7,
3.7), Lc = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-30L))
问题在于您的数据类型 - 您需要告诉
R
您正在使用日期和时间,否则它会假设您正在使用字符向量。如果你取字符向量的平均值,它会产生NA
.
尝试:
library(lubridate)
x <- df %>% separate(`Date/Time`, into = c("Date", "Time"), sep = "T")
min <- x %>%
as_tibble() %>%
mutate(
# Convert Date column into the Date datatype
Date = lubridate::ymd(Date),
# Convert Time column into the Period datatype (HMS). Then,
# change this to number of seconds
Time = period_to_seconds(hms(Time))
) %>%
summarise(across(everything(), mean)) %>%
# Convert Time column from number of seconds
# back into the Period datatype (HMS). Omit this line
# if you'd prefer to have the average in seconds
mutate(Time = seconds_to_period(Time))
write.csv(min, 'C:/Users/Alexia/Desktop/Test/15row.csv')