如果时间发生在 R 中的给定时间间隔内,如何用特定值(包括 MS)标记行

问题描述 投票:0回答:1

我有两个数据框。 DF1 具有包括毫秒在内的时间间隔,以及相应的值(行为)。 DF2有一列时间(包括毫秒)。

DF1:

                   START                   STOP behavior
1 2023-07-20 14:07:39.00 2023-07-20 14:07:39.14      dig
2 2023-07-20 14:07:39.27 2023-07-20 14:07:39.90      pig

DF2:

                             x                time label
133202 20/07/2023 12:07:39.020 2023-07-20 12:07:39    NA
133203 20/07/2023 12:07:39.040 2023-07-20 12:07:39    NA
133204 20/07/2023 12:07:39.060 2023-07-20 12:07:39    NA
133205 20/07/2023 12:07:39.080 2023-07-20 12:07:39    NA
133206 20/07/2023 12:07:39.100 2023-07-20 12:07:39    NA
133207 20/07/2023 12:07:39.120 2023-07-20 12:07:39    NA
133208 20/07/2023 12:07:39.140 2023-07-20 12:07:39    NA
133209 20/07/2023 12:07:39.160 2023-07-20 12:07:39    NA
133210 20/07/2023 12:07:39.180 2023-07-20 12:07:39    NA
133211 20/07/2023 12:07:39.200 2023-07-20 12:07:39    NA
133212 20/07/2023 12:07:39.220 2023-07-20 12:07:39    NA
133213 20/07/2023 12:07:39.240 2023-07-20 12:07:39    NA
133214 20/07/2023 12:07:39.260 2023-07-20 12:07:39    NA
133215 20/07/2023 12:07:39.280 2023-07-20 12:07:39    NA
133216 20/07/2023 12:07:39.300 2023-07-20 12:07:39    NA
133217 20/07/2023 12:07:39.320 2023-07-20 12:07:39    NA
133218 20/07/2023 12:07:39.340 2023-07-20 12:07:39    NA
133219 20/07/2023 12:07:39.360 2023-07-20 12:07:39    NA
133220 20/07/2023 12:07:39.380 2023-07-20 12:07:39    NA
133221 20/07/2023 12:07:39.400 2023-07-20 12:07:39    NA

我想生成一个 DF2 的数据帧,但如果时间发生在 DF1 中的时间间隔(包括毫秒)内,则具有与 DF1 中的行为相对应的列。

所需输出:

                            x                time label
133202 20/07/2023 12:07:39.020 2023-07-20 12:07:39   dig
133203 20/07/2023 12:07:39.040 2023-07-20 12:07:39   dig
133204 20/07/2023 12:07:39.060 2023-07-20 12:07:39   dig
133205 20/07/2023 12:07:39.080 2023-07-20 12:07:39   dig
133206 20/07/2023 12:07:39.100 2023-07-20 12:07:39   dig
133207 20/07/2023 12:07:39.120 2023-07-20 12:07:39   dig
133208 20/07/2023 12:07:39.140 2023-07-20 12:07:39   dig
133209 20/07/2023 12:07:39.160 2023-07-20 12:07:39  <NA>
133210 20/07/2023 12:07:39.180 2023-07-20 12:07:39  <NA>
133211 20/07/2023 12:07:39.200 2023-07-20 12:07:39  <NA>
133212 20/07/2023 12:07:39.220 2023-07-20 12:07:39  <NA>
133213 20/07/2023 12:07:39.240 2023-07-20 12:07:39  <NA>
133214 20/07/2023 12:07:39.260 2023-07-20 12:07:39  <NA>
133215 20/07/2023 12:07:39.280 2023-07-20 12:07:39   pig
133216 20/07/2023 12:07:39.300 2023-07-20 12:07:39   pig
133217 20/07/2023 12:07:39.320 2023-07-20 12:07:39   pig
133218 20/07/2023 12:07:39.340 2023-07-20 12:07:39   pig
133219 20/07/2023 12:07:39.360 2023-07-20 12:07:39   pig
133220 20/07/2023 12:07:39.380 2023-07-20 12:07:39   pig
133221 20/07/2023 12:07:39.400 2023-07-20 12:07:39   pig

以下是数据输入:

DF1

#用于测试的新数据框#

开始<-c("2023-07-20 14:07:39.01", "2023-07-20 14:07:39.29")

开始<- as.POSIXct(START, "%Y-%m-%d %H:%M:%OS",tz = "UTC")

开始<-format(START, "%Y-%m-%d %H:%M:%OS3")

停止<-c("2023-07-20 14:07:39.14","2023-07-20 14:07:39.90")

停止<- as.POSIXct(STOP, "%Y-%m-%d %H:%M:%OS",tz = "UTC")

停止<-format(STOP, "%Y-%m-%d %H:%M:%OS2")

行为<-c("dig", "pig")

DF1<-data.frame(START, STOP, behavior)

DF1$开始<- as.POSIXct(DF1$START, "%Y-%m-%d %H:%M:%OS",tz = "UTC")

DF1$停止<-as.POSIXct(DF1$STOP, "%Y-%m-%d %H:%M:%OS",tz = "UTC")

DF1$开始<-format(DF1$START, "%Y-%m-%d %H:%M:%OS2")

DF1$停止<-format(DF1$STOP, "%Y-%m-%d %H:%M:%OS2")

DF2(x 是以 ms 表示的时间,使用

format(df2$x, "%Y-%m-%d %H:%M:%OS2")

structure(list(x = c("20/07/2023 12:07:39.020", "20/07/2023 12:07:39.040", 
"20/07/2023 12:07:39.060", "20/07/2023 12:07:39.080", "20/07/2023 12:07:39.100", 
"20/07/2023 12:07:39.120", "20/07/2023 12:07:39.140", "20/07/2023 12:07:39.160", 
"20/07/2023 12:07:39.180", "20/07/2023 12:07:39.200", "20/07/2023 12:07:39.220", 
"20/07/2023 12:07:39.240", "20/07/2023 12:07:39.260", "20/07/2023 12:07:39.280", 
"20/07/2023 12:07:39.300", "20/07/2023 12:07:39.320", "20/07/2023 12:07:39.340", 
"20/07/2023 12:07:39.360", "20/07/2023 12:07:39.380", "20/07/2023 12:07:39.400"
), time = structure(c(1689854859.02, 1689854859.04, 1689854859.06, 
1689854859.08, 1689854859.1, 1689854859.12, 1689854859.14, 1689854859.16, 
1689854859.18, 1689854859.2, 1689854859.22, 1689854859.24, 1689854859.26, 
1689854859.28, 1689854859.3, 1689854859.32, 1689854859.34, 1689854859.36, 
1689854859.38, 1689854859.4), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    label = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA)), row.names = 133202:133221, class = "data.frame")

我想制作这个:

 dput(df2)
structure(list(x = c("20/07/2023 12:07:39.020", "20/07/2023 12:07:39.040", 
"20/07/2023 12:07:39.060", "20/07/2023 12:07:39.080", "20/07/2023 12:07:39.100", 
"20/07/2023 12:07:39.120", "20/07/2023 12:07:39.140", "20/07/2023 12:07:39.160", 
"20/07/2023 12:07:39.180", "20/07/2023 12:07:39.200", "20/07/2023 12:07:39.220", 
"20/07/2023 12:07:39.240", "20/07/2023 12:07:39.260", "20/07/2023 12:07:39.280", 
"20/07/2023 12:07:39.300", "20/07/2023 12:07:39.320", "20/07/2023 12:07:39.340", 
"20/07/2023 12:07:39.360", "20/07/2023 12:07:39.380", "20/07/2023 12:07:39.400"
), time = structure(c(1689854859.02, 1689854859.04, 1689854859.06, 
1689854859.08, 1689854859.1, 1689854859.12, 1689854859.14, 1689854859.16, 
1689854859.18, 1689854859.2, 1689854859.22, 1689854859.24, 1689854859.26, 
1689854859.28, 1689854859.3, 1689854859.32, 1689854859.34, 1689854859.36, 
1689854859.38, 1689854859.4), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    label = c("dig", "dig", "dig", "dig", "dig", "dig", "dig", 
    NA, NA, NA, NA, NA, NA, "pig", "pig", "pig", "pig", "pig", 
    "pig", "pig")), row.names = 133202:133221, class = "data.frame")

我可以使用以下代码产生所需的结果:

for (i in 1:nrow(df2)) {
  time_val <- df2$time[i]
  # Find the row where time_val falls within the interval
  interval_row <- which(time_val >= df1$START & time_val <= df1$STOP)
  
  # If there's a match, replace NA in label with the corresponding behavior
  if (length(interval_row) > 0) {
    behavior_val <- df1$behavior[interval_row]
    df2$label[i] <- behavior_val
  }
}

使用 DF1 的虚拟数据。

当我尝试使用完整的 DF1 运行时,它似乎找不到时间间隔(抛出错误“Error in df2$label[i] <- behavior_val : replacement has length zero", even though the str() output is identical. I can't work out why. I have also noticed that this seems to only work when I use the columns of DF1 when they are

format(DF1$STOP, "%Y-%m-%d %H:%M:%OS2") 
当它们格式化为 POSIXct 时,这让我认为这这是解决我认为简单问题的一种非常有效的方法!

我还认为这是一个混乱的解决方案,并且 for 循环需要一段时间才能运行。有没有更好的方法来获得我想要的结果,并且仍然考虑时间间隔和数据中的毫秒?

我开始研究 data.table 解决方案,但听起来这对于以毫秒为单位的时间间隔来说不太好?

请帮忙!

提前非常感谢!

r time merge data.table posix
1个回答
0
投票

如果我理解正确的话,您希望将行为合并到

DF2
,具体取决于
x
是否落入
START
STOP
定义的区间?

下面,我假设

DF1
中的时间不是14小时而是12小时:

library(tidyverse)

DF1 <- as_tibble(DF1) |> 
  mutate(across(c(START, STOP), ymd_hms))

DF2 <- as_tibble(DF2) |> 
  mutate(x = dmy_hms(x))

DF2 |> 
  left_join(DF1, join_by(between(x, START, STOP))) |> 
  select(x, time, behavior)
#> # A tibble: 20 × 3
#>    x                   time                behavior
#>    <dttm>              <dttm>              <chr>   
#>  1 2023-07-20 12:07:39 2023-07-20 12:07:39 dig     
#>  2 2023-07-20 12:07:39 2023-07-20 12:07:39 dig     
#>  3 2023-07-20 12:07:39 2023-07-20 12:07:39 dig     
#>  4 2023-07-20 12:07:39 2023-07-20 12:07:39 dig     
#>  5 2023-07-20 12:07:39 2023-07-20 12:07:39 dig     
#>  6 2023-07-20 12:07:39 2023-07-20 12:07:39 dig     
#>  7 2023-07-20 12:07:39 2023-07-20 12:07:39 dig     
#>  8 2023-07-20 12:07:39 2023-07-20 12:07:39 <NA>    
#>  9 2023-07-20 12:07:39 2023-07-20 12:07:39 <NA>    
#> 10 2023-07-20 12:07:39 2023-07-20 12:07:39 <NA>    
#> 11 2023-07-20 12:07:39 2023-07-20 12:07:39 <NA>    
#> 12 2023-07-20 12:07:39 2023-07-20 12:07:39 <NA>    
#> 13 2023-07-20 12:07:39 2023-07-20 12:07:39 <NA>    
#> 14 2023-07-20 12:07:39 2023-07-20 12:07:39 pig     
#> 15 2023-07-20 12:07:39 2023-07-20 12:07:39 pig     
#> 16 2023-07-20 12:07:39 2023-07-20 12:07:39 pig     
#> 17 2023-07-20 12:07:39 2023-07-20 12:07:39 pig     
#> 18 2023-07-20 12:07:39 2023-07-20 12:07:39 pig     
#> 19 2023-07-20 12:07:39 2023-07-20 12:07:39 pig     
#> 20 2023-07-20 12:07:39 2023-07-20 12:07:39 pig

创建于 2023-12-08,使用 reprex v2.0.2

© www.soinside.com 2019 - 2024. All rights reserved.