如果行中有任何缺失值同时满足这两个条件,我将尝试删除这些行:
这是我的数据:
structure(list(unqid = c(248, 248, 248, 248, 260, 260, 260, 260,
3245, 3245, 3245, 3245, 3356, 3356, 3356, 3356, 5777, 5777, 5777,
5777, 6670, 6670, 6670, 6670), time = c(1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4), risk_period = c("baseline",
"0 to 2", "2 to 6", "6 to 12", "baseline", "0 to 2", "2 to 6",
"6 to 12", "baseline", "0 to 2", "2 to 6", "6 to 12", "baseline",
"0 to 2", "2 to 6", "6 to 12", "baseline", "0 to 2", "2 to 6",
"6 to 12", "baseline", "0 to 2", "2 to 6", "6 to 12"), log_Cer.d18.0.24.1. =
c(1.75591357030424,
1.78808074202933, 1.78808074202933, 2.12541503739623, NA, 1.84784683417316,
1.84784683417316, 1.660523253172, 1.71686160700412, 1.53021484400339,
1.53021484400339, NA, 1.89959518683134, 2.13535376890766, 2.13535376890766,
1.85969746880244, 1.79719659193748, NA, NA, 1.58721201454406,
1.58269404870719, NA, NA, NA), log_Cer.d18.1.20.0. = c(2.0779936380825,
1.91571413701583, 1.91571413701583, 2.37155913815626, 1.90173659734545,
2.17760248999473, 2.17760248999473, 2.25151395301426, 1.92254402612409,
2.13414350086059, 2.13414350086059, NA, 2.06112457583167, 2.11854093530707,
2.11854093530707, 2.16685493654321, 1.78492915001842, NA, NA,
1.88865763010095, 1.90477840908931, NA, NA, NA), log_GlcCer..d18.1.18.0. =
c(1.37530467341568,
1.26149055730786, 1.26149055730786, 2.20534847316661, 1.37179016097532,
1.94465372915655, 1.94465372915655, 1.86255558272858, 1.39432569370228,
1.80008523512444, 1.80008523512444, NA, 1.59814912304543, 1.65836069384085,
1.65836069384085, 1.80053131813665, 1.30323667707535, NA, NA,
1.66851539788016, 1.79824483492343, NA, NA, NA), log_GlcCer..d18.1.20.0. =
c(1.68455467959852,
1.68084892938971, 1.68084892938971, 2.83694873855279, 1.98660174822612,
2.11687085647631, 2.11687085647631, 1.94521751150187, 1.94408113760572,
2.3144321228024, 2.3144321228024, NA, 1.86220914942921, 2.08792541332488,
2.08792541332488, 2.14127700040429, 1.25556058999258, NA, NA,
1.75839585512294, 1.85392719235767, NA, NA, NA), log_SM.d18.0.22.0. =
c(3.16608443039769,
2.81581789967824, 2.81581789967824, 3.57758543823053, 3.05566675337641,
2.71402626524511, 2.71402626524511, 2.74435051671931, 3.18268277797341,
3.06067406280674, 3.06067406280674, NA, 3.00579040289776, 3.43150364762063,
3.43150364762063, 2.92413057011273, 2.89178396996734, NA, NA,
2.65395668019336, 2.61724748884637, NA, NA, NA), log_SM.d18.1.18.0. =
c(4.12097226184649,
3.97496147376645, 3.97496147376645, 4.39933846293122, 3.95478500357647,
4.04453196517474, 4.04453196517474, 4.14121011613781, 3.89734856154778,
4.00561959288859, 4.00561959288859, NA, 4.01511036918758, 4.00986157943819,
4.00986157943819, 3.97023826969138, 3.86134148535091, NA, NA,
3.77303147344872, 4.06523949171878, NA, NA, NA), log_SM.d18.1.24.1. =
c(4.80118834282449,
4.58079754854406, 4.58079754854406, 5.29872341013633, 4.89353600842266,
5.01659126290913, 5.01659126290913, 5.01117232938486, 4.87122149340715,
4.81332745585807, 4.81332745585807, NA, 4.88235204875765, 4.92826803352429,
4.92826803352429, 4.78283431218245, 4.3613226254187, NA, NA,
4.58555011488179, 4.63520556565684, NA, NA, NA), cancer = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0)), row.names = c(917L, 918L, 919L, 920L, 3458L, 3459L, 3460L,
3461L, 4286L, 4287L, 4288L, 4289L, 4290L, 4291L, 4292L, 4293L,
4462L, 4463L, 4464L, 4465L, 4506L, 4507L, 4508L, 4509L), class = "data.frame")
我创建了一个识别神经酰胺列的对象:
## Find first ceramide column
cer.start <- which(colnames(long_data) == "log_Cer.d18.0.24.1."); cer.start #Note
in R, two "==" signs are used to denote equals to
## Find last ceramide column
cer.stop <- which(colnames(long_data) == "log_SM.d18.1.24.1."); cer.stop
#select all ceramide column names between the first and last ceramide
ceramides <- colnames(long_data)[cer.start:cer.stop]
然后我想删除所有在这些列下有缺失值且时间为 1 的行。第 5 行的第一个神经酰胺列中有一个
NA
时间 1 行。我想删除该行,因为它位于神经酰胺列下,并且也被标识为时间 1 行。
如果只满足其中一个条件,我还想保留这一行
我已经尝试了很多东西,但似乎仍然没有弄清楚如何写这个。
您可以将
filter
与 if_any
包中的 dplyr
一起使用,以在多列 &
NA
中指定 time == 1
值。使用否定号!
保留排除上述条件的行。
library(dplyr)
df %>% filter(!(time == 1 & if_any(log_Cer.d18.0.24.1.:log_SM.d18.1.24.1., is.na)))
unqid time risk_period log_Cer.d18.0.24.1. log_Cer.d18.1.20.0. log_GlcCer..d18.1.18.0. log_GlcCer..d18.1.20.0. log_SM.d18.0.22.0. log_SM.d18.1.18.0. log_SM.d18.1.24.1. cancer
1 248 1 baseline 1.755914 2.077994 1.375305 1.684555 3.166084 4.120972 4.801188 0
2 248 2 0 to 2 1.788081 1.915714 1.261491 1.680849 2.815818 3.974961 4.580798 0
3 248 3 2 to 6 1.788081 1.915714 1.261491 1.680849 2.815818 3.974961 4.580798 0
4 248 4 6 to 12 2.125415 2.371559 2.205348 2.836949 3.577585 4.399338 5.298723 0
5 260 2 0 to 2 1.847847 2.177602 1.944654 2.116871 2.714026 4.044532 5.016591 0
6 260 3 2 to 6 1.847847 2.177602 1.944654 2.116871 2.714026 4.044532 5.016591 0
7 260 4 6 to 12 1.660523 2.251514 1.862556 1.945218 2.744351 4.141210 5.011172 0
8 3245 1 baseline 1.716862 1.922544 1.394326 1.944081 3.182683 3.897349 4.871221 0
9 3245 2 0 to 2 1.530215 2.134144 1.800085 2.314432 3.060674 4.005620 4.813327 0
10 3245 3 2 to 6 1.530215 2.134144 1.800085 2.314432 3.060674 4.005620 4.813327 0
11 3245 4 6 to 12 NA NA NA NA NA NA NA 0
12 3356 1 baseline 1.899595 2.061125 1.598149 1.862209 3.005790 4.015110 4.882352 0
13 3356 2 0 to 2 2.135354 2.118541 1.658361 2.087925 3.431504 4.009862 4.928268 0
14 3356 3 2 to 6 2.135354 2.118541 1.658361 2.087925 3.431504 4.009862 4.928268 0
15 3356 4 6 to 12 1.859697 2.166855 1.800531 2.141277 2.924131 3.970238 4.782834 0
16 5777 1 baseline 1.797197 1.784929 1.303237 1.255561 2.891784 3.861341 4.361323 0
17 5777 2 0 to 2 NA NA NA NA NA NA NA 0
18 5777 3 2 to 6 NA NA NA NA NA NA NA 0
19 5777 4 6 to 12 1.587212 1.888658 1.668515 1.758396 2.653957 3.773031 4.585550 1
20 6670 1 baseline 1.582694 1.904778 1.798245 1.853927 2.617247 4.065239 4.635206 0
21 6670 2 0 to 2 NA NA NA NA NA NA NA 0
22 6670 3 2 to 6 NA NA NA NA NA NA NA 0
23 6670 4 6 to 12 NA NA NA NA NA NA NA 0