通配符,将多个文件读入.csv代码

问题描述 投票:-1回答:1

我们通过BioRad运行多个平板,每个样品运行3次(G1-1,G1-2和G2)。

该代码从软件中获取等位基因识别CSV文件并运行它。我想通过某种方式合并通配符来使代码更容易。

可以将工作目录设置为保存3次运行中3个文件的文件位置。 (每个文件分别以G1-1,G1-2或G2开头)。

我已经附加了我当前正在使用的代码(手动读取每个文件名的.csv并不是很友好。

###Code for converting call to allele variants###

library(dplyr)
library(plyr)
library(tidyverse)
library(openxlsx)

###G1-1###

##Load in G1-1 file from PCR###

APOL_1_Allelic_Discrimination_G1_1 <- read.csv("admin_2019-03-17 08-04-00_BR007717_PLATE6_G1-1_SAMPLES41-80_3-17-2019 -  Allelic Discrimination Results_ADSheet.csv")
attach(APOL_1_Allelic_Discrimination_G1_1)
drops <- c("X","Sample","Type","RFU1","RFU2")
G1_1 <- APOL_1_Allelic_Discrimination_G1_1[ , !(names(APOL_1_Allelic_Discrimination_G1_1) %in% drops)]


G1_1 <- G1_1 %>% mutate(G1_1_1 = case_when(Call == "Allele 1" ~ "G1^{S342G}", Call == "Allele 2" ~ "+", Call == "Heterozygote" ~ "G1^{S342G}", Call == "No Call" ~ "Blank"),
                        G1_1_2 = case_when(Call == "Allele 1" ~ "G1^{S342G}", Call == "Allele 2" ~ "+", Call == "Heterozygote" ~ "+", Call == "No Call" ~ "Blank"))

G1_1$Call <- NULL

###G1-2###

##Load in G1-2 file from PCR###

APOL_1_Allelic_Discrimination_G1_2 <- read.csv("admin_2019-03-17 04-59-11_BR007717_PLATE5_G1-2_SAMPLES41-80_3-17-2019 -  Allelic Discrimination Results_ADSheet.csv")
attach(APOL_1_Allelic_Discrimination_G1_2)
drops <- c("X","Sample","Type","RFU1","RFU2")
G1_2 <- APOL_1_Allelic_Discrimination_G1_2[ , !(names(APOL_1_Allelic_Discrimination_G1_2) %in% drops)]

G1_2 <- G1_2 %>% mutate(G1_2_1 = case_when(Call == "Allele 1" ~ "+", Call == "Allele 2" ~ "G1^{I384M}", Call == "Heterozygote" ~ "G1^{I384M}", Call == "No Call" ~ "Blank"),
                        G1_2_2 = case_when(Call == "Allele 1" ~ "+", Call == "Allele 2" ~ "G1^{I384M}", Call == "Heterozygote" ~ "+", Call == "No Call" ~ "Blank"))

G1_2$Call <- NULL

###G2###

##Load in G2 file from PCR###

APOL_1_Allelic_Discrimination_G2 <- read.csv("admin_2019-03-17 01-41-46_BR007717_PLATE4_G2_SAMPLES41-80_3-17-2019 -  Allelic Discrimination Results_ADSheet.csv")
attach(APOL_1_Allelic_Discrimination_G2)
drops <- c("X","Sample","Type","RFU1","RFU2")
G2 <- APOL_1_Allelic_Discrimination_G2[ , !(names(APOL_1_Allelic_Discrimination_G2) %in% drops)]

G2 <- G2 %>% mutate(G2_1 = case_when(Call == "Allele 1" ~ "G2", Call == "Allele 2" ~ "+", Call == "Heterozygote" ~ "G2", Call == "No Call" ~ "Blank"),
                        G2_2 = case_when(Call == "Allele 1" ~ "G2", Call == "Allele 2" ~ "+", Call == "Heterozygote" ~ "+", Call == "No Call" ~ "Blank"))

最终数据帧结构

structure(list(Well = structure(1:10, .Label = c("A01", "A02", 
"A03", "A04", "A05", "A06", "A07", "A08", "A09", "A10", "A11", 
"A12", "B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", 
"B09", "B10", "B11", "B12", "C01", "C02", "C03", "C04", "C05", 
"C06", "C07", "C08", "C09", "C10", "C11", "C12", "D01", "D02", 
"D03", "D04", "D05", "D06", "D07", "D08", "D09", "D10", "D11", 
"D12", "E01", "E02", "E03", "E04", "E05", "E06", "E07", "E08", 
"E09", "E10", "E11", "E12", "F01", "F02", "F03", "F04", "F05", 
"F06", "F07", "F08", "F09", "F10", "F11", "F12", "G01", "G02", 
"G03", "G04", "G05", "G06", "G07", "G08", "G09", "G10", "G11", 
"G12", "H01", "H02", "H03", "H04", "H05", "H06", "H07", "H08", 
"H09", "H10", "H11", "H12"), class = "factor"), G1_1_1 = c("Blank", 
"Blank", "+", "+", "+", "+", "G1^{S342G}", "G1^{S342G}", "+", 
"+"), G1_1_2 = c("Blank", "Blank", "+", "+", "+", "+", "+", "+", 
"+", "+"), G1_2_1 = c("Blank", "Blank", "+", "+", "+", "+", "G1^{I384M}", 
"G1^{I384M}", "+", "+"), G1_2_2 = c("Blank", "Blank", "+", "+", 
"+", "+", "+", "+", "+", "+"), G2_1 = c("Blank", "Blank", "+", 
"+", "+", "+", "G2", "G2", "+", "+"), G2_2 = c("Blank", "Blank", 
"+", "+", "+", "+", "+", "+", "+", "+"), `Final genotype of APOL1` = c("NA", 
"NA", "G0/G0", "G0/G0", "G0/G0", "G0/G0", "G1^{GM}/G2", "G1^{GM}/G2", 
"G0/G0", "G0/G0"), `no APOL1 Risk Alleles` = c(NA, NA, 1, 1, 
1, 1, NA, NA, 1, 1), `1 APOL1 Risk Alleles` = c(NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_), `2 APOL1 Risk Alleles` = c(NA, NA, NA, NA, NA, NA, 
1, 1, NA, NA)), row.names = c(NA, 10L), class = "data.frame")
r wildcard user-friendly
1个回答
0
投票

您可以通过遍历ID(G1-1,G2-1和G2)来做到这一点:

library(tidyverse)
file_ids <- c("G1-1","G1-2","G2")
drops <- c("X","Sample","Type","RFU1","RFU2")
all_results <- map(file_ids, function(id){
  f <- list.files(pattern = id) 
  read.csv(f) %>% 
    select(-drops) %>% 
    mutate(paste(id,"_1") = case_when(Call == "Allele 1" ~ "G1^{S342G}", Call == "Allele 2" ~ "+", Call == "Heterozygote" ~ "G1^{S342G}", Call == "No Call" ~ "Blank"),
           paste(id,"_2") = case_when(Call == "Allele 1" ~ "G1^{S342G}", Call == "Allele 2" ~ "+", Call == "Heterozygote" ~ "+", Call == "No Call" ~ "Blank"))
})
© www.soinside.com 2019 - 2024. All rights reserved.