在 R 数据框中计算访客持续时间:如何计算代理时间、房间时间和等待时间?

问题描述 投票:0回答:2

我有一个 R 数据框,其中包含来自机构的数据。访客 (ID) 进入代理机构 (CREATE),然后被叫到不同的房间 (CALL),并离开这些房间 (DISCHARGE)。现在,我想计算某人在机构中的总体停留时间、他们在一个房间中的停留时间以及他们在两个房间之间等待的时间。我创建了一个示例 data.frame,其中最后三列已经包含我最终想要实现的所需结果。这些列在实际数据框中不存在。

对于

diff_since_create
,它只需要计算 ID 中每一行自 CREATE 以来的时间。

variable time_in_room
应代表自上次“CALL”事件以来 id 的分钟数。这表示访客在房间内停留的时间。对于初始“CALL”,该时间从 0 开始,以“DISCHARGE”事件结束。

变量

time_waiting
应表示自上次“DISCHARGE”事件以来 id 的分钟数。这表示访客(通过 ID 标识)在房间外花费的时间。

“CALL”和“DISCHARGE”事件之间,或者“DISCHARGE”和“CALL”事件之间可以出现多行。

有人知道如何用 R 计算最后 3 行吗?我将非常感谢您的帮助!

structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), time = structure(c(1713164400, 
1713164700, 1713165000, 1713165600, 1713165720, 1713165780, 1713166200, 
1713166500, 1713167100, 1713164400, 1713164700, 1713165000, 1713165600, 
1713166020, 1713166200, 1713166500, 1713166620, 1713167100), class = c("POSIXct", 
"POSIXt"), tzone = ""), action = c("create", "call", "discharge", 
"call", "work", "work", "discharge", "call", "discharge", "create", 
"call", "discharge", "call", "work", "discharge", "call", "work", 
"discharge"), room = c("", "room 1", "room 1", "room 2", "room 2", 
"room 2", "room 2", "room 3", "room 3", "", "room 1", "room 1", 
"room 2", "room 2", "room 2", "room 3", "room 3", "room 3"), 
    diff_since_create = c(0L, 5L, 10L, 20L, 22L, 23L, 30L, 35L, 
    45L, 0L, 5L, 10L, 20L, 20L, 30L, 35L, 37L, 45L), time_in_room = c("na", 
    "0", "5", "0", "2", "3", "10", "0", "10", "na", "0", "5", 
    "0", "7", "10", "0", "2", "10"), time_waiting = c(0L, 5L, 
    0L, 10L, 0L, 0L, 0L, 5L, 0L, 0L, 5L, 0L, 10L, 0L, 0L, 5L, 
    0L, 0L)), row.names = c(NA, -18L), class = "data.frame")
r dataframe dplyr time lubridate
2个回答
1
投票
df |>
  mutate(dsc = (time - min(time))/lubridate::dminutes(), .by = id, 
         .after = diff_since_create) |>
  mutate(tir = if_else(action == "create", NA_real_,
                       (time - min(time))/lubridate::dminutes()), 
         .after = time_in_room,
         .by = c(id, room)) |>
  mutate(last_disch = if_else(action %in% c("discharge", "create"), time, NA_POSIXct_)) |>
  group_by(id) |> fill(last_disch) |> ungroup() |>
  mutate(tw = if_else(action == "call", (time - last_disch)/lubridate::dminutes(), 0),
         .after = time_waiting) |>
  select(-last_disch)

结果(这大部分匹配。我认为第 14 行 diff_since_create 应该是 27。)

# A tibble: 18 × 10
      id time                action    room     diff_since_create   dsc time_in_room   tir time_waiting    tw
   <int> <dttm>              <chr>     <chr>                <int> <dbl> <chr>        <dbl>        <int> <dbl>
 1     1 2024-04-15 00:00:00 create    ""                       0     0 na              NA            0     0
 2     1 2024-04-15 00:05:00 call      "room 1"                 5     5 0                0            5     5
 3     1 2024-04-15 00:10:00 discharge "room 1"                10    10 5                5            0     0
 4     1 2024-04-15 00:20:00 call      "room 2"                20    20 0                0           10    10
 5     1 2024-04-15 00:22:00 work      "room 2"                22    22 2                2            0     0
 6     1 2024-04-15 00:23:00 work      "room 2"                23    23 3                3            0     0
 7     1 2024-04-15 00:30:00 discharge "room 2"                30    30 10              10            0     0
 8     1 2024-04-15 00:35:00 call      "room 3"                35    35 0                0            5     5
 9     1 2024-04-15 00:45:00 discharge "room 3"                45    45 10              10            0     0
10     2 2024-04-15 00:00:00 create    ""                       0     0 na              NA            0     0
11     2 2024-04-15 00:05:00 call      "room 1"                 5     5 0                0            5     5
12     2 2024-04-15 00:10:00 discharge "room 1"                10    10 5                5            0     0
13     2 2024-04-15 00:20:00 call      "room 2"                20    20 0                0           10    10
14     2 2024-04-15 00:27:00 work      "room 2"                20    27 7                7            0     0
15     2 2024-04-15 00:30:00 discharge "room 2"                30    30 10              10            0     0
16     2 2024-04-15 00:35:00 call      "room 3"                35    35 0                0            5     5
17     2 2024-04-15 00:37:00 work      "room 3"                37    37 2                2            0     0
18     2 2024-04-15 00:45:00 discharge "room 3"                45    45 10              10            0     0

0
投票

略有不同

tidyverse
dplyr::lag
一起服用:

libray(tidyverse)
conflicted::conflict_prefer("lag", winner = "dplyr") # or just `dplyr::lag`
# ----------------

df %>% 
  as_tibble() %>% 
  mutate(
    .by = id, 
    visit_id = cumsum(if_else(action == "call", 1, 0)),
    diff_since_create = time - min(time),
    time_waiting = if_else(visit_id == lag(visit_id) + 1, time - lag(time), as.duration(NA))) %>% 
  
  mutate(.by = c(id, visit_id), time_in_room = as.duration(time - min(time))) %>% 
  
  select(id:room, time_in_room, time_waiting)

输出:

# A tibble: 18 × 6
      id time                action    room     time_in_room       time_waiting      
   <int> <dttm>              <chr>     <chr>    <Duration>         <Duration>        
 1     1 2024-04-15 04:00:00 create    ""       0s                 NA                
 2     1 2024-04-15 04:05:00 call      "room 1" 0s                 300s (~5 minutes) 
 3     1 2024-04-15 04:10:00 discharge "room 1" 300s (~5 minutes)  NA                
 4     1 2024-04-15 04:20:00 call      "room 2" 0s                 600s (~10 minutes)
 5     1 2024-04-15 04:22:00 work      "room 2" 120s (~2 minutes)  NA                
 6     1 2024-04-15 04:23:00 work      "room 2" 180s (~3 minutes)  NA                
 7     1 2024-04-15 04:30:00 discharge "room 2" 600s (~10 minutes) NA                
 8     1 2024-04-15 04:35:00 call      "room 3" 0s                 300s (~5 minutes) 
 9     1 2024-04-15 04:45:00 discharge "room 3" 600s (~10 minutes) NA                
10     2 2024-04-15 04:00:00 create    ""       0s                 NA                
11     2 2024-04-15 04:05:00 call      "room 1" 0s                 300s (~5 minutes) 
12     2 2024-04-15 04:10:00 discharge "room 1" 300s (~5 minutes)  NA                
13     2 2024-04-15 04:20:00 call      "room 2" 0s                 600s (~10 minutes)
14     2 2024-04-15 04:27:00 work      "room 2" 420s (~7 minutes)  NA                
15     2 2024-04-15 04:30:00 discharge "room 2" 600s (~10 minutes) NA                
16     2 2024-04-15 04:35:00 call      "room 3" 0s                 300s (~5 minutes) 
17     2 2024-04-15 04:37:00 work      "room 3" 120s (~2 minutes)  NA                
18     2 2024-04-15 04:45:00 discharge "room 3" 600s (~10 minutes) NA 

或者,正如 @Jon Spring 选择的那样,在几分钟内:

> mutate(df, across(where(is.duration), \(x) x/dminutes()))
# A tibble: 18 × 6
      id time                action    room     time_in_room time_waiting
   <int> <dttm>              <chr>     <chr>           <dbl>        <dbl>
 1     1 2024-04-15 04:00:00 create    ""                  0           NA
 2     1 2024-04-15 04:05:00 call      "room 1"            0            5
 3     1 2024-04-15 04:10:00 discharge "room 1"            5           NA
 4     1 2024-04-15 04:20:00 call      "room 2"            0           10
 5     1 2024-04-15 04:22:00 work      "room 2"            2           NA
 6     1 2024-04-15 04:23:00 work      "room 2"            3           NA
 7     1 2024-04-15 04:30:00 discharge "room 2"           10           NA
 8     1 2024-04-15 04:35:00 call      "room 3"            0            5
 9     1 2024-04-15 04:45:00 discharge "room 3"           10           NA
10     2 2024-04-15 04:00:00 create    ""                  0           NA
11     2 2024-04-15 04:05:00 call      "room 1"            0            5
12     2 2024-04-15 04:10:00 discharge "room 1"            5           NA
13     2 2024-04-15 04:20:00 call      "room 2"            0           10
14     2 2024-04-15 04:27:00 work      "room 2"            7           NA
15     2 2024-04-15 04:30:00 discharge "room 2"           10           NA
16     2 2024-04-15 04:35:00 call      "room 3"            0            5
17     2 2024-04-15 04:37:00 work      "room 3"            2           NA
18     2 2024-04-15 04:45:00 discharge "room 3"           10           NA
© www.soinside.com 2019 - 2024. All rights reserved.