有没有办法只计算工作时间的“任务时间”?工作时间周一至周五早八点至晚五点。示例:
使用
datediff()
:
预期结果:
示例任务时间:
# df %>%
# select(v_v_initiated,v_v_complete)
v_v_initiated v_v_complete
1 2020-04-23 14:13:52.0000000 2020-04-23 16:04:28.0000000
2 2020-11-10 11:48:53.0000000 2020-11-10 13:12:31.0000000
3 2020-10-20 16:03:39.0000000 2020-10-20 16:25:16.0000000
4 2020-04-02 13:43:54.0000000 2020-04-02 14:14:45.0000000
5 2020-07-09 08:52:54.0000000 2020-07-23 09:18:29.0000000
6 2020-06-09 14:56:33.0000000 2020-06-10 07:44:17.0000000
7 2020-09-17 15:11:39.0000000 2020-09-17 15:13:41.0000000
8 2020-10-28 14:08:20.0000000 2020-10-28 14:07:35.0000000
9 2020-04-21 12:55:36.0000000 2020-04-27 12:56:17.0000000
10 2020-11-06 11:02:03.0000000 2020-11-06 11:02:30.0000000
11 2020-02-17 12:29:21.0000000 2020-02-18 12:52:23.0000000
12 2020-08-25 15:25:46.0000000 2020-08-26 10:18:26.0000000
13 2020-02-19 15:05:28.0000000 2020-02-20 09:43:48.0000000
14 2020-09-23 21:19:41.0000000 2020-09-24 14:52:21.0000000
15 2020-07-01 14:20:11.0000000 2020-07-01 14:20:59.0000000
16 2020-05-01 15:22:58.0000000 2020-05-01 16:32:35.0000000
17 2020-06-29 13:10:58.0000000 2020-06-30 13:53:29.0000000
18 2020-06-16 12:56:54.0000000 2020-06-16 14:27:15.0000000
19 2020-03-27 11:02:29.0000000 2020-03-30 15:18:51.0000000
20 2020-04-08 07:38:01.0000000 2020-04-08 07:52:35.0000000
21 2020-07-30 09:32:42.0000000 2020-07-30 10:32:28.0000000
22 2020-06-17 14:03:31.0000000 2020-07-10 15:38:03.0000000
23 2020-04-24 10:41:27.0000000 2020-04-29 13:07:05.0000000
24 2020-08-26 10:41:10.0000000 2020-08-26 12:55:23.0000000
25 2020-10-26 18:11:16.0000000 2020-10-27 16:10:39.0000000
26 2020-01-08 11:12:49.0000000 2020-01-09 09:18:37.0000000
27 2020-04-17 11:40:10.0000000 2020-04-17 15:51:21.0000000
28 2020-02-11 10:38:21.0000000 2020-02-11 10:33:54.0000000
29 2020-03-23 12:10:21.0000000 2020-03-23 12:33:06.0000000
30 2020-06-02 12:44:00.0000000 2020-06-03 08:28:05.0000000
31 2020-04-13 09:30:31.0000000 2020-04-13 13:16:55.0000000
32 2020-04-07 17:36:02.0000000 2020-04-07 17:36:44.0000000
33 2020-01-15 12:24:42.0000000 2020-01-15 12:25:00.0000000
34 2020-08-18 08:55:58.0000000 2020-08-18 09:02:34.0000000
35 2020-07-06 14:10:23.0000000 2020-07-07 10:28:05.0000000
36 2020-03-25 15:03:20.0000000 2020-03-31 14:17:43.0000000
37 2020-01-29 12:58:33.0000000 2020-02-14 09:53:06.0000000
38 2020-02-07 15:11:21.0000000 2020-02-10 09:13:53.0000000
39 2020-07-27 17:51:13.0000000 2020-07-29 11:52:51.0000000
40 2020-09-02 11:43:02.0000000 2020-09-02 13:10:46.0000000
41 2020-07-22 11:04:50.0000000 2020-07-22 11:12:34.0000000
42 2020-06-29 13:57:17.0000000 2020-06-30 07:34:55.0000000
43 2020-07-21 10:46:58.0000000 2020-07-21 16:15:59.0000000
44 2020-05-27 07:38:46.0000000 2020-05-27 07:51:24.0000000
45 2020-07-14 10:33:49.0000000 2020-07-14 11:38:28.0000000
46 2020-06-04 16:59:09.0000000 2020-06-09 10:49:20.0000000
您可以采用另一个函数来计算某个时间间隔的营业时间(例如this。
首先,创建从开始到结束的日期序列,并按仅包含工作日进行过滤。
接下来,使用感兴趣的营业时间创建时间间隔(在本例中为“08:00”到“17:00”)。
确定每天的工作时间与您的时间有多少重叠。这样,如果时间从“09:05”开始,则该时间将用作一天的开始,而不是“08:00”。
最后,总结时间间隔,并确定工作日数(假设每天 9 小时)以及剩余的小时和分钟。
如果您想将此函数应用于数据框中的行,您可以使用
mapply
,如下所示:
df$business_hours <- mapply(calc_bus_hours, df$start_date, df$end_date)
希望这对您有帮助。
library(lubridate)
library(dplyr)
calc_bus_hours <- function(start, end) {
my_dates <- seq.Date(as.Date(start), as.Date(end), by = "day")
my_dates <- my_dates[!weekdays(my_dates) %in% c("Saturday", "Sunday")]
my_intervals <- interval(ymd_hm(paste(my_dates, "08:00"), tz = "UTC"), ymd_hm(paste(my_dates, "17:00"), tz = "UTC"))
int_start(my_intervals[1]) <- pmax(pmin(start, int_end(my_intervals[1])), int_start(my_intervals[1]))
int_end(my_intervals[length(my_intervals)]) <- pmax(pmin(end, int_end(my_intervals[length(my_intervals)])),
int_start(my_intervals[length(my_intervals)]))
total_time <- sum(time_length(my_intervals, "minutes"))
total_days <- total_time %/% (9 * 60)
total_hours <- total_time %% (9 * 60) %/% 60
total_minutes <- total_time - (total_days * 9 * 60) - (total_hours * 60)
paste(total_days, "days,", total_hours, "hours,", total_minutes, "minutes")
}
calc_bus_hours(as.POSIXct("11/4/2020 9:05", format = "%m/%d/%Y %H:%M", tz = "UTC"),
as.POSIXct("11/9/2020 11:25", format = "%m/%d/%Y %H:%M", tz = "UTC"))
[1] "3 days, 2 hours, 20 minutes"
编辑:正如@DPH所提到的,假期和部分假期更加复杂。
您可以创建假期数据框并指示开放时间,允许部分假期(例如平安夜从上午 8:00 到中午)。
这是一个修改后的函数,应该给出可比较的结果。
library(lubridate)
library(dplyr)
holiday_df <- data.frame(
date = as.Date(c("2020-12-24", "2020-12-25", "2020-12-31", "2020-01-01")),
start = c("08:00", "08:00", "08:00", "08:00"),
end = c("12:00", "08:00", "08:00", "08:00")
)
calc_bus_hours <- function(start, end) {
my_dates <- seq.Date(as.Date(start), as.Date(end), by = "day")
my_dates_df <- data.frame(
date = my_dates[!weekdays(my_dates) %in% c("Saturday", "Sunday")],
start = "08:00",
end = "17:00"
)
all_dates <- union_all(
inner_join(my_dates_df["date"], holiday_df),
anti_join(my_dates_df, holiday_df["date"])
) %>%
arrange(date)
my_intervals <- interval(ymd_hm(paste(all_dates$date, all_dates$start), tz = "UTC"),
ymd_hm(paste(all_dates$date, all_dates$end), tz = "UTC"))
int_start(my_intervals[1]) <- pmax(pmin(start, int_end(my_intervals[1])), int_start(my_intervals[1]))
int_end(my_intervals[length(my_intervals)]) <- pmax(pmin(end, int_end(my_intervals[length(my_intervals)])),
int_start(my_intervals[length(my_intervals)]))
total_time <- sum(time_length(my_intervals, "minutes"))
total_days <- total_time %/% (9 * 60)
total_hours <- total_time %% (9 * 60) %/% 60
total_minutes <- total_time - (total_days * 9 * 60) - (total_hours * 60)
paste(total_days, "days,", total_hours, "hours,", total_minutes, "minutes")
}