我有两个数据框。 DF1 具有包括毫秒在内的时间间隔,以及相应的值(行为)。 DF2有一列时间(包括毫秒)。
DF1:
START STOP behavior
1 2023-07-20 14:07:39.00 2023-07-20 14:07:39.14 dig
2 2023-07-20 14:07:39.27 2023-07-20 14:07:39.90 pig
DF2:
x time label
133202 20/07/2023 12:07:39.020 2023-07-20 12:07:39 NA
133203 20/07/2023 12:07:39.040 2023-07-20 12:07:39 NA
133204 20/07/2023 12:07:39.060 2023-07-20 12:07:39 NA
133205 20/07/2023 12:07:39.080 2023-07-20 12:07:39 NA
133206 20/07/2023 12:07:39.100 2023-07-20 12:07:39 NA
133207 20/07/2023 12:07:39.120 2023-07-20 12:07:39 NA
133208 20/07/2023 12:07:39.140 2023-07-20 12:07:39 NA
133209 20/07/2023 12:07:39.160 2023-07-20 12:07:39 NA
133210 20/07/2023 12:07:39.180 2023-07-20 12:07:39 NA
133211 20/07/2023 12:07:39.200 2023-07-20 12:07:39 NA
133212 20/07/2023 12:07:39.220 2023-07-20 12:07:39 NA
133213 20/07/2023 12:07:39.240 2023-07-20 12:07:39 NA
133214 20/07/2023 12:07:39.260 2023-07-20 12:07:39 NA
133215 20/07/2023 12:07:39.280 2023-07-20 12:07:39 NA
133216 20/07/2023 12:07:39.300 2023-07-20 12:07:39 NA
133217 20/07/2023 12:07:39.320 2023-07-20 12:07:39 NA
133218 20/07/2023 12:07:39.340 2023-07-20 12:07:39 NA
133219 20/07/2023 12:07:39.360 2023-07-20 12:07:39 NA
133220 20/07/2023 12:07:39.380 2023-07-20 12:07:39 NA
133221 20/07/2023 12:07:39.400 2023-07-20 12:07:39 NA
我想生成一个 DF2 的数据帧,但如果时间发生在 DF1 中的时间间隔(包括毫秒)内,则具有与 DF1 中的行为相对应的列。
所需输出:
x time label
133202 20/07/2023 12:07:39.020 2023-07-20 12:07:39 dig
133203 20/07/2023 12:07:39.040 2023-07-20 12:07:39 dig
133204 20/07/2023 12:07:39.060 2023-07-20 12:07:39 dig
133205 20/07/2023 12:07:39.080 2023-07-20 12:07:39 dig
133206 20/07/2023 12:07:39.100 2023-07-20 12:07:39 dig
133207 20/07/2023 12:07:39.120 2023-07-20 12:07:39 dig
133208 20/07/2023 12:07:39.140 2023-07-20 12:07:39 dig
133209 20/07/2023 12:07:39.160 2023-07-20 12:07:39 <NA>
133210 20/07/2023 12:07:39.180 2023-07-20 12:07:39 <NA>
133211 20/07/2023 12:07:39.200 2023-07-20 12:07:39 <NA>
133212 20/07/2023 12:07:39.220 2023-07-20 12:07:39 <NA>
133213 20/07/2023 12:07:39.240 2023-07-20 12:07:39 <NA>
133214 20/07/2023 12:07:39.260 2023-07-20 12:07:39 <NA>
133215 20/07/2023 12:07:39.280 2023-07-20 12:07:39 pig
133216 20/07/2023 12:07:39.300 2023-07-20 12:07:39 pig
133217 20/07/2023 12:07:39.320 2023-07-20 12:07:39 pig
133218 20/07/2023 12:07:39.340 2023-07-20 12:07:39 pig
133219 20/07/2023 12:07:39.360 2023-07-20 12:07:39 pig
133220 20/07/2023 12:07:39.380 2023-07-20 12:07:39 pig
133221 20/07/2023 12:07:39.400 2023-07-20 12:07:39 pig
以下是数据输入:
DF1
#用于测试的新数据框#
开始<-c("2023-07-20 14:07:39.01", "2023-07-20 14:07:39.29")
开始<- as.POSIXct(START, "%Y-%m-%d %H:%M:%OS",tz = "UTC")
开始<-format(START, "%Y-%m-%d %H:%M:%OS3")
停止<-c("2023-07-20 14:07:39.14","2023-07-20 14:07:39.90")
停止<- as.POSIXct(STOP, "%Y-%m-%d %H:%M:%OS",tz = "UTC")
停止<-format(STOP, "%Y-%m-%d %H:%M:%OS2")
行为<-c("dig", "pig")
DF1<-data.frame(START, STOP, behavior)
DF1$开始<- as.POSIXct(DF1$START, "%Y-%m-%d %H:%M:%OS",tz = "UTC")
DF1$停止<-as.POSIXct(DF1$STOP, "%Y-%m-%d %H:%M:%OS",tz = "UTC")
DF1$开始<-format(DF1$START, "%Y-%m-%d %H:%M:%OS2")
DF1$停止<-format(DF1$STOP, "%Y-%m-%d %H:%M:%OS2")
DF2(x 是以 ms 表示的时间,使用
format(df2$x, "%Y-%m-%d %H:%M:%OS2")
structure(list(x = c("20/07/2023 12:07:39.020", "20/07/2023 12:07:39.040",
"20/07/2023 12:07:39.060", "20/07/2023 12:07:39.080", "20/07/2023 12:07:39.100",
"20/07/2023 12:07:39.120", "20/07/2023 12:07:39.140", "20/07/2023 12:07:39.160",
"20/07/2023 12:07:39.180", "20/07/2023 12:07:39.200", "20/07/2023 12:07:39.220",
"20/07/2023 12:07:39.240", "20/07/2023 12:07:39.260", "20/07/2023 12:07:39.280",
"20/07/2023 12:07:39.300", "20/07/2023 12:07:39.320", "20/07/2023 12:07:39.340",
"20/07/2023 12:07:39.360", "20/07/2023 12:07:39.380", "20/07/2023 12:07:39.400"
), time = structure(c(1689854859.02, 1689854859.04, 1689854859.06,
1689854859.08, 1689854859.1, 1689854859.12, 1689854859.14, 1689854859.16,
1689854859.18, 1689854859.2, 1689854859.22, 1689854859.24, 1689854859.26,
1689854859.28, 1689854859.3, 1689854859.32, 1689854859.34, 1689854859.36,
1689854859.38, 1689854859.4), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
label = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA)), row.names = 133202:133221, class = "data.frame")
我想制作这个:
dput(df2)
structure(list(x = c("20/07/2023 12:07:39.020", "20/07/2023 12:07:39.040",
"20/07/2023 12:07:39.060", "20/07/2023 12:07:39.080", "20/07/2023 12:07:39.100",
"20/07/2023 12:07:39.120", "20/07/2023 12:07:39.140", "20/07/2023 12:07:39.160",
"20/07/2023 12:07:39.180", "20/07/2023 12:07:39.200", "20/07/2023 12:07:39.220",
"20/07/2023 12:07:39.240", "20/07/2023 12:07:39.260", "20/07/2023 12:07:39.280",
"20/07/2023 12:07:39.300", "20/07/2023 12:07:39.320", "20/07/2023 12:07:39.340",
"20/07/2023 12:07:39.360", "20/07/2023 12:07:39.380", "20/07/2023 12:07:39.400"
), time = structure(c(1689854859.02, 1689854859.04, 1689854859.06,
1689854859.08, 1689854859.1, 1689854859.12, 1689854859.14, 1689854859.16,
1689854859.18, 1689854859.2, 1689854859.22, 1689854859.24, 1689854859.26,
1689854859.28, 1689854859.3, 1689854859.32, 1689854859.34, 1689854859.36,
1689854859.38, 1689854859.4), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
label = c("dig", "dig", "dig", "dig", "dig", "dig", "dig",
NA, NA, NA, NA, NA, NA, "pig", "pig", "pig", "pig", "pig",
"pig", "pig")), row.names = 133202:133221, class = "data.frame")
我可以使用以下代码产生所需的结果:
for (i in 1:nrow(df2)) {
time_val <- df2$time[i]
# Find the row where time_val falls within the interval
interval_row <- which(time_val >= df1$START & time_val <= df1$STOP)
# If there's a match, replace NA in label with the corresponding behavior
if (length(interval_row) > 0) {
behavior_val <- df1$behavior[interval_row]
df2$label[i] <- behavior_val
}
}
使用 DF1 的虚拟数据。
当我尝试使用完整的 DF1 运行时,它似乎找不到时间间隔(抛出错误“Error in df2$label[i] <- behavior_val : replacement has length zero", even though the str() output is identical. I can't work out why. I have also noticed that this seems to only work when I use the columns of DF1 when they are
format(DF1$STOP, "%Y-%m-%d %H:%M:%OS2")
当它们格式化为 POSIXct 时,这让我认为这这是解决我认为简单问题的一种非常有效的方法!
我还认为这是一个混乱的解决方案,并且 for 循环需要一段时间才能运行。有没有更好的方法来获得我想要的结果,并且仍然考虑时间间隔和数据中的毫秒?
我开始研究 data.table 解决方案,但听起来这对于以毫秒为单位的时间间隔来说不太好?
请帮忙!
提前非常感谢!
如果我理解正确的话,您希望将行为合并到
DF2
,具体取决于x
是否落入START
和STOP
定义的区间?
下面,我假设
DF1
中的时间不是14小时而是12小时:
library(tidyverse)
DF1 <- as_tibble(DF1) |>
mutate(across(c(START, STOP), ymd_hms))
DF2 <- as_tibble(DF2) |>
mutate(x = dmy_hms(x))
DF2 |>
left_join(DF1, join_by(between(x, START, STOP))) |>
select(x, time, behavior)
#> # A tibble: 20 × 3
#> x time behavior
#> <dttm> <dttm> <chr>
#> 1 2023-07-20 12:07:39 2023-07-20 12:07:39 dig
#> 2 2023-07-20 12:07:39 2023-07-20 12:07:39 dig
#> 3 2023-07-20 12:07:39 2023-07-20 12:07:39 dig
#> 4 2023-07-20 12:07:39 2023-07-20 12:07:39 dig
#> 5 2023-07-20 12:07:39 2023-07-20 12:07:39 dig
#> 6 2023-07-20 12:07:39 2023-07-20 12:07:39 dig
#> 7 2023-07-20 12:07:39 2023-07-20 12:07:39 dig
#> 8 2023-07-20 12:07:39 2023-07-20 12:07:39 <NA>
#> 9 2023-07-20 12:07:39 2023-07-20 12:07:39 <NA>
#> 10 2023-07-20 12:07:39 2023-07-20 12:07:39 <NA>
#> 11 2023-07-20 12:07:39 2023-07-20 12:07:39 <NA>
#> 12 2023-07-20 12:07:39 2023-07-20 12:07:39 <NA>
#> 13 2023-07-20 12:07:39 2023-07-20 12:07:39 <NA>
#> 14 2023-07-20 12:07:39 2023-07-20 12:07:39 pig
#> 15 2023-07-20 12:07:39 2023-07-20 12:07:39 pig
#> 16 2023-07-20 12:07:39 2023-07-20 12:07:39 pig
#> 17 2023-07-20 12:07:39 2023-07-20 12:07:39 pig
#> 18 2023-07-20 12:07:39 2023-07-20 12:07:39 pig
#> 19 2023-07-20 12:07:39 2023-07-20 12:07:39 pig
#> 20 2023-07-20 12:07:39 2023-07-20 12:07:39 pig
创建于 2023-12-08,使用 reprex v2.0.2