library(lubridate)
library(dplyr)
library(suncalc)
这是我数据的前 6 行的列表。我只为我正在尝试做的事情拉了必要的专栏。
structure(list(Date.of.Capture = structure(c(18383, 18393, 18395,
18395, 18402, 18815), class = "Date"), Month = c(5L, 5L, 5L,
5L, 5L, 7L), Day = c(1L, 11L, 13L, 13L, 20L, 7L), Year = c(2020L,
2020L, 2020L, 2020L, 2020L, 2021L), Time.of.Capture = c("6:24",
"6:27", "8:55", "8:55", "20:22", "6:26"), Time = structure(c(1588314240,
1589178420, 1589360100, 1589360100, 1590006120, 1625639160), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), ToD = c(NA, NA, NA, NA, "Daytime",
NA), lat = c(40.75336, 40.75336, 40.75336, 40.75336, 40.75336,
40.75336), lon = c(-111.624088, -111.624088, -111.624088, -111.624088,
-111.624088, -111.624088), sunriseEnd = structure(c(1588336111,
1589199430, 1589372111, 1589372111, 1589976545, 1625659614), class = c("POSIXct",
"POSIXt"), tzone = "MST"), sunsetStart = structure(c(1588386056,
1589250673, 1589423593, 1589423593, 1590028795, 1625713092), class = c("POSIXct",
"POSIXt"), tzone = "MST"), dawn = structure(c(1588334143, 1589197398,
1589370065, 1589370065, 1589974454, 1625657442), class = c("POSIXct",
"POSIXt"), tzone = "MST"), dusk = structure(c(1588388024, 1589252706,
1589425639, 1589425639, 1590030886, 1625715264), class = c("POSIXct",
"POSIXt"), tzone = "MST")), class = "data.frame", row.names = c(NA,
6L))
这是我尝试运行的代码,用于从“时间”列中提取信息并将其与所有条目的“黄昏”、“黎明”、“sunsetStart”和“sunriseEnd”列进行比较,然后将它们放入分类变量中基于条目出现的“时间”。例如,我的代码尝试将“时间”列中大于“黎明”列的每个值和小于“sunriseEnd”列的每个值都放在“ToD”列中作为“黎明”(如果不正确,则继续所有其他论点)。
md$ToD<-NA
for(i in 1: nrow(md)){
if (md$Time[i] > md$dawn[i] & md$Time[i] < md$sunriseEnd[i]){
md$ToD[i] <- "Dawn"
} else if (md$Time[i] > md$sunsetStart[i] & md$Time[i] < md$dusk[i]){
md$ToD[i] <- "Dusk"
} else if (md$Time[i] > md$dusk[i] & md$Time[i] < md$dawn[i]){
md$ToD[i] <- "Nighttime"
} else if (md$Time[i] > md$sunriseEnd[i] & md$Time[i] < md$sunsetStart[i]){
md$ToD[i] <- "Daytime"
}
}
unique(md$ToD)
下面是我用来以 suncalc 可以使用它们的方式格式化我的列的代码,然后创建一个单独的数据框,我在其中加入了主“md”数据框。
md$Time <- paste0(md$Year, "-", md$Month, "-", md$Day, " ", md$Time.of.Capture,":00")
md$Time <- ymd_hms(md$Time)
md$Date.of.Capture <- paste0(md$Year, "-", md$Month, "-", md$Day)
md$Date.of.Capture <- as.Date(md$Date.of.Capture, format = "%Y-%m-%d", tz = "MST")
timesofday <- getSunlightTimes(date = md$Date.of.Capture,
lat = 40.753360, lon = -111.624088,
tz="MST", keep=c("sunriseEnd", "sunsetStart", "dawn", "dusk"))
md <- left_join(md, timesofday, by = c("Date.of.Capture" = "date"))
我原以为这些逻辑论证会奏效,但由于某些原因它们没有奏效。这可能是格式化代码的编写方式,因为问题似乎在数据集中。
{dplyr} 和
findInterval
提供了一种简洁的方法来对一天中的时间进行分类,给定您的示例数据框md:
library(dplyr)
ToD_names <- c('dawn', 'daytime', 'dusk', 'nighttime')
md |>
rowwise() |>
mutate(ToD = findInterval(Time, c(-Inf, dawn, sunriseEnd, sunsetStart, dusk)),
ToD = ToD_names[1 + ToD]
)