我在数据框的一列中有一个字符串列表,希望将它们组合成一个向量,然后找到唯一的值,看起来像这样
lipids_tgtg <- group.go.tgtg.BP.df %>% filter(grepl("lipid",Description))
lipids_tgtg$geneID <- gsub( "/", ",", as.character(lipids_tgtg$geneID))
geneID
Nucb2,Apod,Acox2,Acaa1b,Dgat2,Pex2,Gcdh,Ivd,Acat2,Eci3,Appl2,Cyp4v3,Hao2,Ppara,Hacl1,Plin5
Trib3,Snai1,Apod,Ormdl2,Pibf1,Dgat2,Pde3b,Brca1,Malrd1,Appl2,Pde8b,Sik1,Pik3ip1,Dkkl1,Plin5,Snai2
Nucb2,Rab38,Fdps,Abcg1,Dgat2,Scd1,Stard4,Sec14l2,Ccdc3,Aadac,Scarb1,Vav2,Pnpla2,Ppara,Nod2,Tgfb1,Plin5,Nr1d1,Igf2,Dab2
Trib3,Nucb2,Slc27a3,Rab38,Sel1l,Apod,Far1,Isyna1,Mvd,Ormdl2,Lipa,Pla2g5,Ptges,Pibf1,Fdps,Akr1b3,Lmf1,Chka,Gstm7,Abcg1,Snca,Cers3,Pdk3,Acox2,Acaa1b,Scd2,Piga,Alg12,Dgat2,Nus1,Scd1,Stard4,Eif2ak3,Fasn,Cat,Hmgcs1,Fads1,Fdft1,Ankrd26,Sacm1l,Erg28,Pi4k2a,Plpp6,Alg8,Sptssa,Pgp,Pex2,Mtmr4,Alg6,Agpat3,Gcdh,Brca1,Ivd,Crls1,Acat2,Dpagt1,Il6st,Aadac,Prkab2,Cers5,Akr1b8,Dhrs3,Gpd1,Eci3,Scarb1,Dgkh,Hmgcs2,Appl2,Vav2,Cyp2s1,Akr1c14,Pnpla2,Cyp4v3,Bco2,Pm20d1,Hao2,Plcg2,Plcb1,Ppara,Cyp3a13,Cyp2d26,Nod2,Hacd4,Gm2a,Sptssb,Edn2,Hacl1,Cyp2c65,Ang,Aldh1a1,Cyp2d22,Pck1,Smpdl3a,St3gal4,B3galt2,Ptgs1,Hpgd,Prdx6,Sik1,Agmo,Ces1d,Pik3r5,Rbp2,Tmem86a,Itgb8,Lrat,Cyp2d12,Pik3ip1,Tgfb1,Plin5,Lpl,Cyp2c66,Apoc1,Ces1f,Cwh43,Tbxas1,Ces1g,Acsm2
Trib3,Snai1,Nucb2,Slc27a3,Rab38,Pltp,Sel1l,Apod,Abca1,Far1,Isyna1,Nphp3,Mvd,Ormdl2,Scly,Lipa,Pla2g5,Ebp,Ptges,Pibf1,Fdps,Osbpl10,Akr1b3,Lmf1,Chka,Gstm7,Abcg1,Snca,Cers3,Pdk3,Hdlbp,Acox2,Acaa1b,Scd2,Piga,Alg12,Dgat2,Nus1,Scd1,Stard4,Eif2ak3,Fasn,Cat,Rorc,Sec14l2,Ccdc3,Hmgcs1,Fads1,Cyp2r1,Fdft1,Pde3b,Dhrs11,Ankrd26,Sacm1l,Erg28,Pi4k2a,Plpp6,Alg8,Dolpp1,Sptssa,Pgp,Pex2,Mtmr4,Alg6,Agpat3,Xbp1,Gcdh,Brca1,Ivd,Crls1,Acat2,Dpagt1,Il6st,Malrd1,Aadac,Prkab2,Hsd17b2,Cers5,Akr1b8,Dhrs3,Gpd1,Akr1c19,Eci3,Scarb1,Dgkh,Hmgcs2,Appl2,Vav2,Cyp2s1,Ugt1a7c,Akr1c14,Pnpla2,Cyp4v3,Bco2,Pm20d1,Hao2,Plcg2,Plcb1,Ppara,Cyp3a13,Cyp2d26,Plcb2,Nod2,Hacd4,Tm6sf2,Gm2a,Sptssb,Edn2,Hacl1,Pde8b,Tspo,Saa1,Cyp2c65,Ang,Hsd3b3,Aldh1a1,Il1rn,Plbd1,Hsd3b7,Cyp2d22,Pck1,Gdpd2,Smpdl3a,Shh,St3gal4,B3galt2,Ptgs1,Hpgd,Prdx6,Sik1,Agmo,Ces1d,Pik3r5,Rbp2,Cubn,Tmem86a,Plcd3,Cacna1h,Pnpla7,Itgb8,Lrat,Schip1,Sult1a1,Cyp2d12,Pik3ip1,Tgfb1,Dkkl1,Cyp27a1,Plin5,Nr1d2,Lpl,Cyp2c66,Apoc1,Ces1f,Cwh43,Nr1d1,Igf2,Rora,Dab2,Snai2,Lhcgr,Tbxas1,Ces1g,Acsm2
St3gal4
Acat2
Nucb2,Pltp,Abca1,Abcg1,Ghrl,Abca7,Acat2,Lrat,Dab2
Ikbke,Scarb1,Plin5,Lpl
Cry1,Itgav,Thbs1,Shh
Abcg1,Itgav,Pnpla2,Ppara
Nucb2,Pltp,Abca1,Ikbke,Abcg1,Ghrl,Cry1,Nus1,Abca7,Itgav,Acat2,Scarb1,Pnpla2,Thbs1,Ppara,Shh,Lrat,Plin5,Lpl,Dab2
Ormdl2
Nucb2,Pltp,Abca1,Ikbke,Abcg1,Ghrl,Abca7,Acat2,Scarb1,Lrat,Plin5,Lpl,Dab2
Abcg1,Cry1,Itgav,Pnpla2,Thbs1,Ppara,Shh
Nucb2,Pltp,Apod,Abca1,Atp8b2,Mfsd2a,Ttpa,Pla2g5,Osbpl10,Ikbke,Abcg1,Ghrl,Hdlbp,Cry1,Dgat2,Nus1,Stard4,Osbp,Enpp1,Gramd1a,Abca7,Got2,Selenom,Itgav,Acat2,Osbpl5,Abcb1a,Scarb1,Pnpla2,Thbs1,Ppara,Gm2a,Prelid2,Tspo,Shh,Slco2b1,Lrat,Slco3a1,Plin5,Lpl,Apoc1,Dab2,Abca8b,Abcc2,Lhcgr
Ikbke,Abcg1,Cry1,Dgat2,Stard4,Enpp1,Itgav,Scarb1,Pnpla2,Ppara,Gm2a,Plin5,Lpl
Ikbke,Abcg1,Itgav,Scarb1,Pnpla2,Ppara,Plin5,Lpl
Abca1,Atp8b2,Abca7,Abcb1a,Plscr4
使用dplyr的拉力将列作为向量,这给了我一个无法平整的命名字符向量。如果我使用unlist或reduce,它们将返回完全相同的内容。
# drop zeroes
lipids_tgtg_vec <- filter(lipids_tgtg, Count > 0) %>% select(geneID) %>% pull()
[1] "Nucb2,Apod,Acox2,Acaa1b,Dgat2,Pex2,Gcdh,Ivd,Acat2,Eci3,Appl2,Cyp4v3,Hao2,Ppara,Hacl1,Plin5"
[2] "Trib3,Snai1,Apod,Ormdl2,Pibf1,Dgat2,Pde3b,Brca1,Malrd1,Appl2,Pde8b,Sik1,Pik3ip1,Dkkl1,Plin5,Snai2"
[3] "Nucb2,Rab38,Fdps,Abcg1,Dgat2,Scd1,Stard4,Sec14l2,Ccdc3,Aadac,Scarb1,Vav2,Pnpla2,Ppara,Nod2,Tgfb1,Plin5,Nr1d1,Igf2,Dab2"
[4] "Trib3,Nucb2,Slc27a3,Rab38,Sel1l,Apod,Far1,Isyna1,Mvd,Ormdl2,Lipa,Pla2g5,Ptges,Pibf1,Fdps,Akr1b3,Lmf1,Chka,Gstm7,Abcg1,Snca,Cers3,Pdk3,Acox2,Acaa1b,Scd2,Piga,Alg12,Dgat2,Nus1,Scd1,Stard4,Eif2ak3,Fasn,Cat,Hmgcs1,Fads1,Fdft1,Ankrd26,Sacm1l,Erg28,Pi4k2a,Plpp6,Alg8,Sptssa,Pgp,Pex2,Mtmr4,Alg6,Agpat3,Gcdh,Brca1,Ivd,Crls1,Acat2,Dpagt1,Il6st,Aadac,Prkab2,Cers5,Akr1b8,Dhrs3,Gpd1,Eci3,Scarb1,Dgkh,Hmgcs2,Appl2,Vav2,Cyp2s1,Akr1c14,Pnpla2,Cyp4v3,Bco2,Pm20d1,Hao2,Plcg2,Plcb1,Ppara,Cyp3a13,Cyp2d26,Nod2,Hacd4,Gm2a,Sptssb,Edn2,Hacl1,Cyp2c65,Ang,Aldh1a1,Cyp2d22,Pck1,Smpdl3a,St3gal4,B3galt2,Ptgs1,Hpgd,Prdx6,Sik1,Agmo,Ces1d,Pik3r5,Rbp2,Tmem86a,Itgb8,Lrat,Cyp2d12,Pik3ip1,Tgfb1,Plin5,Lpl,Cyp2c66,Apoc1,Ces1f,Cwh43,Tbxas1,Ces1g,Acsm2"
[5] "Trib3,Snai1,Nucb2,Slc27a3,Rab38,Pltp,Sel1l,Apod,Abca1,Far1,Isyna1,Nphp3,Mvd,Ormdl2,Scly,Lipa,Pla2g5,Ebp,Ptges,Pibf1,Fdps,Osbpl10,Akr1b3,Lmf1,Chka,Gstm7,Abcg1,Snca,Cers3,Pdk3,Hdlbp,Acox2,Acaa1b,Scd2,Piga,Alg12,Dgat2,Nus1,Scd1,Stard4,Eif2ak3,Fasn,Cat,Rorc,Sec14l2,Ccdc3,Hmgcs1,Fads1,Cyp2r1,Fdft1,Pde3b,Dhrs11,Ankrd26,Sacm1l,Erg28,Pi4k2a,Plpp6,Alg8,Dolpp1,Sptssa,Pgp,Pex2,Mtmr4,Alg6,Agpat3,Xbp1,Gcdh,Brca1,Ivd,Crls1,Acat2,Dpagt1,Il6st,Malrd1,Aadac,Prkab2,Hsd17b2,Cers5,Akr1b8,Dhrs3,Gpd1,Akr1c19,Eci3,Scarb1,Dgkh,Hmgcs2,Appl2,Vav2,Cyp2s1,Ugt1a7c,Akr1c14,Pnpla2,Cyp4v3,Bco2,Pm20d1,Hao2,Plcg2,Plcb1,Ppara,Cyp3a13,Cyp2d26,Plcb2,Nod2,Hacd4,Tm6sf2,Gm2a,Sptssb,Edn2,Hacl1,Pde8b,Tspo,Saa1,Cyp2c65,Ang,Hsd3b3,Aldh1a1,Il1rn,Plbd1,Hsd3b7,Cyp2d22,Pck1,Gdpd2,Smpdl3a,Shh,St3gal4,B3galt2,Ptgs1,Hpgd,Prdx6,Sik1,Agmo,Ces1d,Pik3r5,Rbp2,Cubn,Tmem86a,Plcd3,Cacna1h,Pnpla7,Itgb8,Lrat,Schip1,Sult1a1,Cyp2d12,Pik3ip1,Tgfb1,Dkkl1,Cyp27a1,Plin5,Nr1d2,Lpl,Cyp2c66,Apoc1,Ces1f,Cwh43,Nr1d1,Igf2,Rora,Dab2,Snai2,Lhcgr,Tbxas1,Ces1g,Acsm2"
[6] "St3gal4"
[7] "Acat2"
[8] "Nucb2,Pltp,Abca1,Abcg1,Ghrl,Abca7,Acat2,Lrat,Dab2"
[9] "Ikbke,Scarb1,Plin5,Lpl"
[10] "Cry1,Itgav,Thbs1,Shh"
[11] "Abcg1,Itgav,Pnpla2,Ppara"
[12] "Nucb2,Pltp,Abca1,Ikbke,Abcg1,Ghrl,Cry1,Nus1,Abca7,Itgav,Acat2,Scarb1,Pnpla2,Thbs1,Ppara,Shh,Lrat,Plin5,Lpl,Dab2"
[13] "Ormdl2"
[14] "Nucb2,Pltp,Abca1,Ikbke,Abcg1,Ghrl,Abca7,Acat2,Scarb1,Lrat,Plin5,Lpl,Dab2"
[15] "Abcg1,Cry1,Itgav,Pnpla2,Thbs1,Ppara,Shh"
[16] "Nucb2,Pltp,Apod,Abca1,Atp8b2,Mfsd2a,Ttpa,Pla2g5,Osbpl10,Ikbke,Abcg1,Ghrl,Hdlbp,Cry1,Dgat2,Nus1,Stard4,Osbp,Enpp1,Gramd1a,Abca7,Got2,Selenom,Itgav,Acat2,Osbpl5,Abcb1a,Scarb1,Pnpla2,Thbs1,Ppara,Gm2a,Prelid2,Tspo,Shh,Slco2b1,Lrat,Slco3a1,Plin5,Lpl,Apoc1,Dab2,Abca8b,Abcc2,Lhcgr"
[17] "Ikbke,Abcg1,Cry1,Dgat2,Stard4,Enpp1,Itgav,Scarb1,Pnpla2,Ppara,Gm2a,Plin5,Lpl"
[18] "Ikbke,Abcg1,Itgav,Scarb1,Pnpla2,Ppara,Plin5,Lpl"
[19] "Abca1,Atp8b2,Abca7,Abcb1a,Plscr4"
R中有什么方法可以将此列表展平为单个向量,可以在上使用唯一函数?
您可以使用strsplit
,unlist
和unique
在列中查找唯一值。
unique(unlist(strsplit(df$geneID, ",")))
如果要对所有列分别执行此操作,则可以使用lapply
lapply(df, function(x) unique(unlist(strsplit(x, ","))))
其中df
是数据帧的名称,geneID
是字符列。