使用 lapply 和多个 DF 列表作为函数的输入

问题描述 投票:0回答:1

我有 3 名受试者接受了干预 - 所以我有他们所有人的治疗前和治疗后 DF。最终的6个数据集如下:

df_pre_sub101, df_post_sub101, df_pre_sub202, df_post_sub202, df_pre_sub303, df_post_sub303 

下面是我当前手动输入并输入数据集名称的代码。当我有更多主题时,我希望自动化此过程,这样我就不必手动输入新数据集的所有这些步骤。这是代码:

# create my necessary pre and post dataframes for each subject
df_pre_sub101 <- read_csv("sub101_pre.csv")
df_post_sub101<- read_csv("sub101_post.csv")
df_pre_sub202<- read_csv("sub202_pre.csv")
df_post_sub202<- read_csv("sub202_post.csv")
df_pre_sub303 <- read_csv("sub303_pre.csv")
df_post_sub303 <- read_csv("sub303_post.csv")

# pull out columns "State" and "MEP amp pre" into df_pre
# pull out columns "State" and "MEP amp post" into df_post
df_pre_sub101 <- df_pre_sub101[ , c("State", "MEP AMP (mV)")] %>%
  rename("mepAMP_pre" = "MEP AMP (mV)")
df_post_sub101 <- df_post_sub101[ , c("State", "MEP AMP (mV)")] %>%
  rename("mepAMP_post" = "MEP AMP (mV)")

df_pre_sub202 <- df_pre_sub202[ , c("State", "MEP AMP (mV)")] %>%
  rename("mepAMP_pre" = "MEP AMP (mV)")
df_post_sub202 <- df_post_sub202[ , c("State", "MEP AMP (mV)")] %>%
  rename("mepAMP_post" = "MEP AMP (mV)")

df_pre_sub303 <- df_pre_sub303[ , c("State", "MEP AMP (mV)")] %>%
  rename("mepAMP_pre" = "MEP AMP (mV)")
df_post_sub303 <- df_post_sub303[ , c("State", "MEP AMP (mV)")] %>%
  rename("mepAMP_pre" = "MEP AMP (mV)")

# combine pre and post DFs per subject, and add in a subID column
df_sub101 <- cbind(df_pre_sub101, mepAMP_post = df_post_sub101$mepAMP_post)
df_sub101 <- cbind(subID = 101, df_sub101)

df_sub202 <- cbind(df_pre_sub202, mepAMP_post = df_post_sub202$mepAMP_post)
df_sub202 <- cbind(subID = 202, df_sub202)

df_sub303 <- cbind(df_pre_sub303, mepAMP_post = df_post_sub303$mepAMP_post)
df_sub303 <- cbind(subID = 303, df_sub303)

# combine rows for different subjects
df_all <- bind_rows(df_sub101, df_sub202, df_sub303)

有人告诉我,我可以尝试使用

lapply
,而不是 for 循环。这是我到目前为止所拥有的:

# create my necessary pre and post dataframes for each subject
df_pre_sub101 <- read_csv("sub101_pre.csv")
df_post_sub101<- read_csv("sub101_post.csv")
df_pre_sub202<- read_csv("sub202_pre.csv")
df_post_sub202<- read_csv("sub202_post.csv")
df_pre_sub303 <- read_csv("sub303_pre.csv")
df_post_sub303 <- read_csv("sub303_post.csv")

# create a list of DFs to feed into lapply
df.list.pre <- list(dfRCpre_sub19746, dfRCpre_sub19674, dfRCpre_sub19673)
df.list.post <- list(dfRCpost_sub19746, dfRCpost_sub19674, dfRCpost_sub19673)

# create my function to try and replicate my above code
myfunction <- function(x,y) {
  df.pre <- x[ , c("State", "MEP AMP (mV)")] %>% rename("mepAMP_pre" = "MEP AMP (mV)")
  df.post <- y[ , c("State", "MEP AMP (mV)")] %>% rename("mepAMP_post" = "MEP AMP (mV)")
  df <- cbind(df.pre, mepAMP_post = df.post$mepAMP_post)
}

# use lapply to feed in my pre and post DF lists
result <- lapply(x = df.list.pre, FUN = myfunction, y = df.list.post)

但是,这给了我一个错误,即缺少“X”。当我分别执行 pre 和 post DF 时,我最初使上述代码部分工作(因此不尝试输入两个列表)。我在网上发现我可以使用

mapply
,但是,这个为我输出的结构很奇怪。我想获得以下数据帧作为上述数据的输出:

但是我没能做到

  1. 使用
    lapply
  2. 将我的函数传递到多个列表(前和后 DF)
  3. 合并我原始代码中的
    df_sub101 <- cbind(subID = 101, dfRC_sub101)
    。这会添加一列
    subID
    ,粘贴所有 11 个州的该主题的 ID。
  4. 我不确定是否仍然需要包含
    df_all <- bind_rows(df_sub101, df_sub202, df_sub303)
    或者
    lapply
    是否会自动附加它迭代的主题。

希望得到一些帮助。

可重现的数据集示例(全部遵循相同的结构):

df_pre_sub101

structure(list(State = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), 
    `Pulse Time (ms)` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA), `MEP AMP (mV)` = c(0.197647215643277, 0.0954348832989732, 
    0.523944806307554, 0.625025630825096, 0.924895880371332, 
    0.898288637399673, 0.918388723582029, 1.43350288197398, 2.1276653945446, 
    1.6496229916811, 1.64261059297456), `MEP Max (mV)` = c(0.112193062901497, 
    0.0476949736475945, 0.337814380414784, 0.382357756296794, 
    0.551612275838852, 0.599131107330322, 0.5466592207551, 0.875852793455124, 
    1.34147183597088, 0.968559554219246, 0.960105763541328), 
    `MEP Min (mV)` = c(-0.0854541527417799, -0.0477399096513788, 
    -0.18613042589277, -0.242667874528302, -0.37328360453248, 
    -0.299157530069351, -0.371729502826929, -0.557650088518858, 
    -0.786193558573723, -0.681063437461853, -0.682504829433229
    ), `MEP Max T. (ms)` = c(158.066666666667, 158.866666666667, 
    159.7, 160.244444444444, 160, 160.64, 159.9, 160.74, 159.82, 
    161.28, 160.355555555556), `MEP Min T. (ms)` = c(165.066666666667, 
    166.866666666667, 164.4, 163.577777777778, 163.84, 164.5, 
    165.4, 165.18, 165.58, 166.74, 165.822222222222), `MEP Latency (ms)` = c(28.0666666666667, 
    28.8666666666667, 29.7, 30.2444444444444, 30, 30.64, 29.9, 
    30.74, 29.82, 31.28, 30.3555555555555), `# Trials` = c(3, 
    3, 4, 9, 10, 10, 10, 10, 10, 10, 9), `# Rejected` = c(7, 
    7, 6, 1, 0, 0, 0, 0, 0, 0, 1), `Rejected Trials` = c("8 22 34 57 73 76 96", 
    "3 14 20 42 45 67 77", "2 6 25 26 41 92", "38", NA, NA, NA, 
    NA, NA, NA, "1")), row.names = c(NA, -11L), spec = structure(list(
    cols = list(State = structure(list(), class = c("collector_double", 
    "collector")), `Pulse Time (ms)` = structure(list(), class = c("collector_logical", 
    "collector")), `MEP AMP (mV)` = structure(list(), class = c("collector_double", 
    "collector")), `MEP Max (mV)` = structure(list(), class = c("collector_double", 
    "collector")), `MEP Min (mV)` = structure(list(), class = c("collector_double", 
    "collector")), `MEP Max T. (ms)` = structure(list(), class = c("collector_double", 
    "collector")), `MEP Min T. (ms)` = structure(list(), class = c("collector_double", 
    "collector")), `MEP Latency (ms)` = structure(list(), class = c("collector_double", 
    "collector")), `# Trials` = structure(list(), class = c("collector_double", 
    "collector")), `# Rejected` = structure(list(), class = c("collector_double", 
    "collector")), `Rejected Trials` = structure(list(), class = c("collector_character", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
    "collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x10d9c71f0>, class = c("spec_tbl_df", 
"tbl_df", "tbl", "data.frame"))
r function for-loop lapply mapply
1个回答
0
投票

实际上不需要

lapply
,因为
readr::read_csv
可以一次导入多个文件。为此,创建一个包含文件名的列表或向量,例如可以通过
list.files
来实现。然后您可以立即读取文件并按行绑定它们,其中使用
id
参数您可以为每个文件添加一个标识符(即文件名)。之后,您可以清理文件标识符以获取主题 ID 和治疗类型。最后,选择您想要的列并将其形状调整为宽。

下面的代码首先根据您提供的示例数据创建四个示例文件。

# Make example files. Save in a temporary dictionary
fns <- paste0("sub", rep(c(101, 202), each = 2), "_", rep(c("pre", "post"), 2), ".csv")
path <- tempdir()
lapply(fns, \(x) write.csv(dat, file.path(path, x), row.names = FALSE))

library(readr)
library(dplyr, warn=FALSE)
library(tidyr)

# Get a list of the files to red
files <- list.files(path, pattern = "\\.csv", full.names = TRUE)

# Read files and add an id
read_csv(files, id = "file") |>
  # Clean file name
  mutate(file = gsub("\\.csv", "", basename(file))) |>
  # separate into subject and treatment columns
  separate(file, into = c("subject", "treatment"), sep = "_") |> 
  # Select and rename
  select(subject, treatment, State, mepAMP = "MEP AMP (mV)") |> 
  pivot_wider(names_from = treatment, values_from = mepAMP, names_prefix = "mepAMP_")

#> # A tibble: 22 × 4
#>    subject State mepAMP_post mepAMP_pre
#>    <chr>   <dbl>       <dbl>      <dbl>
#>  1 sub101      1      0.198      0.198 
#>  2 sub101      2      0.0954     0.0954
#>  3 sub101      3      0.524      0.524 
#>  4 sub101      4      0.625      0.625 
#>  5 sub101      5      0.925      0.925 
#>  6 sub101      6      0.898      0.898 
#>  7 sub101      7      0.918      0.918 
#>  8 sub101      8      1.43       1.43  
#>  9 sub101      9      2.13       2.13  
#> 10 sub101     10      1.65       1.65  
#> # ℹ 12 more rows

数据

dat <- structure(list(
  State = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
  `Pulse Time (ms)` = c(
    NA, NA, NA, NA, NA, NA, NA, NA, NA,
    NA, NA
  ), `MEP AMP (mV)` = c(
    0.197647215643277, 0.0954348832989732,
    0.523944806307554, 0.625025630825096, 0.924895880371332,
    0.898288637399673, 0.918388723582029, 1.43350288197398, 2.1276653945446,
    1.6496229916811, 1.64261059297456
  ), `MEP Max (mV)` = c(
    0.112193062901497,
    0.0476949736475945, 0.337814380414784, 0.382357756296794,
    0.551612275838852, 0.599131107330322, 0.5466592207551, 0.875852793455124,
    1.34147183597088, 0.968559554219246, 0.960105763541328
  ),
  `MEP Min (mV)` = c(
    -0.0854541527417799, -0.0477399096513788,
    -0.18613042589277, -0.242667874528302, -0.37328360453248,
    -0.299157530069351, -0.371729502826929, -0.557650088518858,
    -0.786193558573723, -0.681063437461853, -0.682504829433229
  ), `MEP Max T. (ms)` = c(
    158.066666666667, 158.866666666667,
    159.7, 160.244444444444, 160, 160.64, 159.9, 160.74, 159.82,
    161.28, 160.355555555556
  ), `MEP Min T. (ms)` = c(
    165.066666666667,
    166.866666666667, 164.4, 163.577777777778, 163.84, 164.5,
    165.4, 165.18, 165.58, 166.74, 165.822222222222
  ), `MEP Latency (ms)` = c(
    28.0666666666667,
    28.8666666666667, 29.7, 30.2444444444444, 30, 30.64, 29.9,
    30.74, 29.82, 31.28, 30.3555555555555
  ), `# Trials` = c(
    3,
    3, 4, 9, 10, 10, 10, 10, 10, 10, 9
  ), `# Rejected` = c(
    7,
    7, 6, 1, 0, 0, 0, 0, 0, 0, 1
  ), `Rejected Trials` = c(
    "8 22 34 57 73 76 96",
    "3 14 20 42 45 67 77", "2 6 25 26 41 92", "38", NA, NA, NA,
    NA, NA, NA, "1"
  )
), row.names = c(NA, -11L), spec = structure(list(
  cols = list(State = structure(list(), class = c(
    "collector_double",
    "collector"
  )), `Pulse Time (ms)` = structure(list(), class = c(
    "collector_logical",
    "collector"
  )), `MEP AMP (mV)` = structure(list(), class = c(
    "collector_double",
    "collector"
  )), `MEP Max (mV)` = structure(list(), class = c(
    "collector_double",
    "collector"
  )), `MEP Min (mV)` = structure(list(), class = c(
    "collector_double",
    "collector"
  )), `MEP Max T. (ms)` = structure(list(), class = c(
    "collector_double",
    "collector"
  )), `MEP Min T. (ms)` = structure(list(), class = c(
    "collector_double",
    "collector"
  )), `MEP Latency (ms)` = structure(list(), class = c(
    "collector_double",
    "collector"
  )), `# Trials` = structure(list(), class = c(
    "collector_double",
    "collector"
  )), `# Rejected` = structure(list(), class = c(
    "collector_double",
    "collector"
  )), `Rejected Trials` = structure(list(), class = c(
    "collector_character",
    "collector"
  ))), default = structure(list(), class = c(
    "collector_guess",
    "collector"
  )), delim = ","
), class = "col_spec"), class = c(
  "spec_tbl_df",
  "tbl_df", "tbl", "data.frame"
))
© www.soinside.com 2019 - 2024. All rights reserved.