我需要将两个数据集连接在一起:
>allsecsHR
timestamp HRbpm
2023-03-22 09:04:53 101
2023-03-22 09:04:54 124
2023-03-22 09:04:55 103
2023-03-22 09:04:56 111
2023-03-22 09:04:57 112
2023-03-22 09:04:58 143
2023-03-22 09:04:59 109
2023-03-22 09:05:00 129
2023-03-22 09:05:01 122
2023-03-22 09:05:02 125
2023-03-22 09:05:03 110
>bhr
Behaviour BhrTimeStart BhrTimeEnd
Forage 2023-03-22 09:04:53 2023-03-22 09:04:58
Vigilance 2023-03-22 09:04:58 2023-03-22 09:05:03
Forage 2023-03-22 09:05:03 2023-03-22 09:05:10
最后我想要一个数据集,其中每行代表一秒,以及每秒执行的行为
我尝试在 mySQL 和 R 中使用 sqldf 和 powerjoin 执行此操作,但运行 2 小时后它不断失去与 sql 服务器的连接,或者在 R 中它无法完成并崩溃或返回
Error: vector memory exhausted (limit reached?)
。我真的很感谢一些帮助来找到有效的方法来做到这一点!
我认为 data.table 解决方案可能是最快的,但我不知道如何去做,一个整洁的解决方案也很棒!
到目前为止我已经尝试过:
mySQL/sqldf
library(sqldf)
sqldf("select * from allsecsHR
left join bhr
on allsecsHR.timestamp between bhr.BhrTimeStart and bhr.BhrTimeEnd")
强力加入
library(powerjoin)
power_left_join(
allsecsHR, bhr,
by = ~.x$timestamp > .y$BhrTimeStart &
(.x$timestamp < .y$BhrTimeEnd | is.na(.y$BhrTimeEnd)),
keep = "left")
我想要的输出表是这样的:
timestamp HRbpm Bhr BhrTimeStart BhrTimeEnd
2023-03-22 09:04:53 101 Forage 2023-03-22 09:04:53 2023-03-22 09:04:58
2023-03-22 09:04:54 124 Forage 2023-03-22 09:04:53 2023-03-22 09:04:58
2023-03-22 09:04:55 103 Forage 2023-03-22 09:04:53 2023-03-22 09:04:58
2023-03-22 09:04:56 111 Forage 2023-03-22 09:04:53 2023-03-22 09:04:58
2023-03-22 09:04:57 112 Forage 2023-03-22 09:04:53 2023-03-22 09:04:58
2023-03-22 09:04:58 143 Vigilance 2023-03-22 09:04:58 2023-03-22 09:05:03
2023-03-22 09:04:59 109 Vigilance 2023-03-22 09:04:58 2023-03-22 09:05:03
2023-03-22 09:05:00 129 Vigilance 2023-03-22 09:04:58 2023-03-22 09:05:03
2023-03-22 09:05:01 122 Vigilance 2023-03-22 09:04:58 2023-03-22 09:05:03
2023-03-22 09:05:02 125 Vigilance 2023-03-22 09:04:58 2023-03-22 09:05:03
2023-03-22 09:05:03 110 Forage 2023-03-22 09:05:03 2023-03-22 09:05:10
如果
allsecsHR
中的所有时间戳在bhr
中都有对应的区间:
library(data.table)
bhr[allsecsHR[, ts := timestamp], on = .(BhrTimeStart == ts), roll = TRUE]
#> Bhr BhrTimeStart BhrTimeEnd timestamp HRbpm
#> 1: Forage 2023-03-22 09:04:53 2023-03-22 09:04:58 2023-03-22 09:04:53 101
#> 2: Forage 2023-03-22 09:04:54 2023-03-22 09:04:58 2023-03-22 09:04:54 124
#> 3: Forage 2023-03-22 09:04:55 2023-03-22 09:04:58 2023-03-22 09:04:55 103
#> 4: Forage 2023-03-22 09:04:56 2023-03-22 09:04:58 2023-03-22 09:04:56 111
#> 5: Forage 2023-03-22 09:04:57 2023-03-22 09:04:58 2023-03-22 09:04:57 112
#> 6: Vigilance 2023-03-22 09:04:58 2023-03-22 09:05:03 2023-03-22 09:04:58 143
#> 7: Vigilance 2023-03-22 09:04:59 2023-03-22 09:05:03 2023-03-22 09:04:59 109
#> 8: Vigilance 2023-03-22 09:05:00 2023-03-22 09:05:03 2023-03-22 09:05:00 129
#> 9: Vigilance 2023-03-22 09:05:01 2023-03-22 09:05:03 2023-03-22 09:05:01 122
#> 10: Vigilance 2023-03-22 09:05:02 2023-03-22 09:05:03 2023-03-22 09:05:02 125
#> 11: Forage 2023-03-22 09:05:03 2023-03-22 09:05:10 2023-03-22 09:05:03 110
数据:
allsecHR <- structure(list(timestamp = structure(c(1679490293, 1679490294,
1679490295, 1679490296, 1679490297, 1679490298, 1679490299, 1679490300,
1679490301, 1679490302, 1679490303), class = c("POSIXct", "POSIXt"
), tzone = ""), HRbpm = c(101L, 124L, 103L, 111L, 112L, 143L,
109L, 129L, 122L, 125L, 110L), ts = structure(c(1679490293, 1679490294,
1679490295, 1679490296, 1679490297, 1679490298, 1679490299, 1679490300,
1679490301, 1679490302, 1679490303), class = c("POSIXct", "POSIXt"
), tzone = "")), row.names = c(NA, -11L), class = c("data.table",
"data.frame")
bhr <- structure(list(Bhr = c("Forage", "Vigilance", "Forage"), BhrTimeStart = structure(c(1679490293,
1679490298, 1679490303), class = c("POSIXct", "POSIXt"), tzone = ""),
BhrTimeEnd = structure(c(1679490298, 1679490303, 1679490310
), class = c("POSIXct", "POSIXt"), tzone = "")), row.names = c(NA,
-3L), class = c("data.table", "data.frame"))
)
可能有一个更好的解决方案,无需扩展,但是像这样的东西会起作用吗?
library(tidyverse)
df1 <- tibble(timestamp =
ymd_hms("2023-03-22 09:04:53") +
dseconds(0:3),
HRbpm = c(101, 124, 103, 111))
df1
df2 <- tibble(Behaviour = c("Forage", "Vigilance", "Forage"),
BhrTimeStart = ymd_hms(c("2023-03-22 09:04:53",
"2023-03-22 09:04:58",
"2023-03-22 09:05:03")),
BhrTimeEnd = ymd_hms(c("2023-03-22 09:04:58",
"2023-03-22 09:05:03",
"2023-03-22 09:05:10")))
df2$size <- as.double(df2$BhrTimeEnd - df2$BhrTimeStart) + 1
df3 <- tibble(behaviour = rep(df2$Behaviour, df2$size),
timestamp = as.POSIXct(
sequence(
df2$size,
from = as.double(df2$BhrTimeStart),
by = 1
)
,
origin = origin
))
df3$BhrTimeStart <- rep(df2$BhrTimeStart, df2$size)
df3$BhrTimeEnd <- rep(df2$BhrTimeEnd, df2$size)
df1 %>%
left_join(df3, by = "timestamp")
您可以将
df1
和 df2
替换为您的数据集。
这可能有用:
library(dplyr)
library(lubridate)
# Sample data for allsecsHR
allsecsHR <- data.frame(
timestamp = ymd_hms(c("2023-03-22 09:04:53", "2023-03-22 09:04:54", "2023-03-22 09:04:55",
"2023-03-22 09:04:56", "2023-03-22 09:04:57", "2023-03-22 09:04:58",
"2023-03-22 09:04:59", "2023-03-22 09:05:00", "2023-03-22 09:05:01",
"2023-03-22 09:05:02", "2023-03-22 09:05:03", "2023-07-22 09:05:03"
)),
HRbpm = c(101, 124, 103, 111, 112, 143, 109, 129, 122, 125, 110, 202)
)
# Sample data for bhr
bhr <- data.frame(
Behaviour = c("Forage", "Vigilance", "Keks"),
BhrTimeStart = ymd_hms(c("2023-03-22 09:04:53", "2023-03-22 09:04:58", "2023-03-22 09:05:03")),
BhrTimeEnd = ymd_hms(c("2023-03-22 09:04:58", "2023-03-22 09:05:03", "2023-03-22 09:05:10"))
)
# Function to check if timestamp is within BhrTimeStart and BhrTimeEnd
check_within_interval <- function(timestamp) {
idx <- which(timestamp >= bhr$BhrTimeStart & timestamp < bhr$BhrTimeEnd)
if (length(idx) > 0) {
return(bhr$Behaviour[idx[1]])
} else {
return(NA)
}
}
# Adding a new column 'Behaviour' to allsecsHR based on the condition
allsecsHR$Behaviour <- sapply(allsecsHR$timestamp, check_within_interval)
print(allsecsHR)
给我以下输出:
> print(allsecsHR)
timestamp HRbpm Behaviour
1 2023-03-22 09:04:53 101 Forage
2 2023-03-22 09:04:54 124 Forage
3 2023-03-22 09:04:55 103 Forage
4 2023-03-22 09:04:56 111 Forage
5 2023-03-22 09:04:57 112 Forage
6 2023-03-22 09:04:58 143 Vigilance
7 2023-03-22 09:04:59 109 Vigilance
8 2023-03-22 09:05:00 129 Vigilance
9 2023-03-22 09:05:01 122 Vigilance
10 2023-03-22 09:05:02 125 Vigilance
11 2023-03-22 09:05:03 110 Keks
12 2023-07-22 09:05:03 202 <NA>
请注意,我为 allsecsHR 添加了一个未在 bhr 中表示的日期,以显示对 NA 的处理。 Keks 是另一种表现 < values.
处理的行为