如何删除在特定列范围内且仅在特定行上丢失数据的任何行?

问题描述 投票:0回答:1

如果行中有任何缺失值同时满足这两个条件,我将尝试删除这些行:

  1. 失踪时间为1
  2. 缺失的部分属于“神经酰胺柱”之一 - 请参阅下面有关哪些是神经酰胺柱的更多信息

这是我的数据:

structure(list(unqid = c(248, 248, 248, 248, 260, 260, 260, 260, 
3245, 3245, 3245, 3245, 3356, 3356, 3356, 3356, 5777, 5777, 5777, 
5777, 6670, 6670, 6670, 6670), time = c(1, 2, 3, 4, 1, 2, 3, 
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4), risk_period = c("baseline", 
"0 to 2", "2 to 6", "6 to 12", "baseline", "0 to 2", "2 to 6", 
"6 to 12", "baseline", "0 to 2", "2 to 6", "6 to 12", "baseline", 
"0 to 2", "2 to 6", "6 to 12", "baseline", "0 to 2", "2 to 6", 
"6 to 12", "baseline", "0 to 2", "2 to 6", "6 to 12"), log_Cer.d18.0.24.1. = 
c(1.75591357030424, 
1.78808074202933, 1.78808074202933, 2.12541503739623, NA, 1.84784683417316, 
1.84784683417316, 1.660523253172, 1.71686160700412, 1.53021484400339, 
1.53021484400339, NA, 1.89959518683134, 2.13535376890766, 2.13535376890766, 
1.85969746880244, 1.79719659193748, NA, NA, 1.58721201454406, 
1.58269404870719, NA, NA, NA), log_Cer.d18.1.20.0. = c(2.0779936380825, 
1.91571413701583, 1.91571413701583, 2.37155913815626, 1.90173659734545, 
2.17760248999473, 2.17760248999473, 2.25151395301426, 1.92254402612409, 
2.13414350086059, 2.13414350086059, NA, 2.06112457583167, 2.11854093530707, 
2.11854093530707, 2.16685493654321, 1.78492915001842, NA, NA, 
1.88865763010095, 1.90477840908931, NA, NA, NA), log_GlcCer..d18.1.18.0. = 
c(1.37530467341568, 
1.26149055730786, 1.26149055730786, 2.20534847316661, 1.37179016097532, 
1.94465372915655, 1.94465372915655, 1.86255558272858, 1.39432569370228, 
1.80008523512444, 1.80008523512444, NA, 1.59814912304543, 1.65836069384085, 
1.65836069384085, 1.80053131813665, 1.30323667707535, NA, NA, 
1.66851539788016, 1.79824483492343, NA, NA, NA), log_GlcCer..d18.1.20.0. = 
c(1.68455467959852, 
1.68084892938971, 1.68084892938971, 2.83694873855279, 1.98660174822612, 
2.11687085647631, 2.11687085647631, 1.94521751150187, 1.94408113760572, 
2.3144321228024, 2.3144321228024, NA, 1.86220914942921, 2.08792541332488, 
2.08792541332488, 2.14127700040429, 1.25556058999258, NA, NA, 
1.75839585512294, 1.85392719235767, NA, NA, NA), log_SM.d18.0.22.0. = 
c(3.16608443039769, 
2.81581789967824, 2.81581789967824, 3.57758543823053, 3.05566675337641, 
2.71402626524511, 2.71402626524511, 2.74435051671931, 3.18268277797341, 
3.06067406280674, 3.06067406280674, NA, 3.00579040289776, 3.43150364762063, 
3.43150364762063, 2.92413057011273, 2.89178396996734, NA, NA, 
2.65395668019336, 2.61724748884637, NA, NA, NA), log_SM.d18.1.18.0. = 
c(4.12097226184649, 
3.97496147376645, 3.97496147376645, 4.39933846293122, 3.95478500357647, 
4.04453196517474, 4.04453196517474, 4.14121011613781, 3.89734856154778, 
4.00561959288859, 4.00561959288859, NA, 4.01511036918758, 4.00986157943819, 
4.00986157943819, 3.97023826969138, 3.86134148535091, NA, NA, 
3.77303147344872, 4.06523949171878, NA, NA, NA), log_SM.d18.1.24.1. = 
c(4.80118834282449, 
4.58079754854406, 4.58079754854406, 5.29872341013633, 4.89353600842266, 
5.01659126290913, 5.01659126290913, 5.01117232938486, 4.87122149340715, 
4.81332745585807, 4.81332745585807, NA, 4.88235204875765, 4.92826803352429, 
4.92826803352429, 4.78283431218245, 4.3613226254187, NA, NA, 
4.58555011488179, 4.63520556565684, NA, NA, NA), cancer = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 
0, 0)), row.names = c(917L, 918L, 919L, 920L, 3458L, 3459L, 3460L, 
3461L, 4286L, 4287L, 4288L, 4289L, 4290L, 4291L, 4292L, 4293L, 
4462L, 4463L, 4464L, 4465L, 4506L, 4507L, 4508L, 4509L), class = "data.frame")

我创建了一个识别神经酰胺列的对象:

## Find first ceramide column
cer.start <- which(colnames(long_data) == "log_Cer.d18.0.24.1."); cer.start #Note 
in R, two "==" signs are used to denote equals to
## Find last ceramide column
cer.stop <- which(colnames(long_data) == "log_SM.d18.1.24.1."); cer.stop
#select all ceramide column names between the first and last ceramide
ceramides <- colnames(long_data)[cer.start:cer.stop]

然后我想删除所有在这些列下有缺失值且时间为 1 的行。第 5 行的第一个神经酰胺列中有一个

NA
时间 1 行。我想删除该行,因为它位于神经酰胺列下,并且也被标识为时间 1 行。

如果只满足其中一个条件,我还想保留这一行

我已经尝试了很多东西,但似乎仍然没有弄清楚如何写这个。

r dataframe subset
1个回答
0
投票

您可以将

filter
if_any
包中的
dplyr
一起使用,以在多列
&
NA 中指定
time == 1
值。使用否定号
!
保留排除上述条件的行。

library(dplyr)

df %>% filter(!(time == 1 & if_any(log_Cer.d18.0.24.1.:log_SM.d18.1.24.1., is.na)))

   unqid time risk_period log_Cer.d18.0.24.1. log_Cer.d18.1.20.0. log_GlcCer..d18.1.18.0. log_GlcCer..d18.1.20.0. log_SM.d18.0.22.0. log_SM.d18.1.18.0. log_SM.d18.1.24.1. cancer
1    248    1    baseline            1.755914            2.077994                1.375305                1.684555           3.166084           4.120972           4.801188      0
2    248    2      0 to 2            1.788081            1.915714                1.261491                1.680849           2.815818           3.974961           4.580798      0
3    248    3      2 to 6            1.788081            1.915714                1.261491                1.680849           2.815818           3.974961           4.580798      0
4    248    4     6 to 12            2.125415            2.371559                2.205348                2.836949           3.577585           4.399338           5.298723      0
5    260    2      0 to 2            1.847847            2.177602                1.944654                2.116871           2.714026           4.044532           5.016591      0
6    260    3      2 to 6            1.847847            2.177602                1.944654                2.116871           2.714026           4.044532           5.016591      0
7    260    4     6 to 12            1.660523            2.251514                1.862556                1.945218           2.744351           4.141210           5.011172      0
8   3245    1    baseline            1.716862            1.922544                1.394326                1.944081           3.182683           3.897349           4.871221      0
9   3245    2      0 to 2            1.530215            2.134144                1.800085                2.314432           3.060674           4.005620           4.813327      0
10  3245    3      2 to 6            1.530215            2.134144                1.800085                2.314432           3.060674           4.005620           4.813327      0
11  3245    4     6 to 12                  NA                  NA                      NA                      NA                 NA                 NA                 NA      0
12  3356    1    baseline            1.899595            2.061125                1.598149                1.862209           3.005790           4.015110           4.882352      0
13  3356    2      0 to 2            2.135354            2.118541                1.658361                2.087925           3.431504           4.009862           4.928268      0
14  3356    3      2 to 6            2.135354            2.118541                1.658361                2.087925           3.431504           4.009862           4.928268      0
15  3356    4     6 to 12            1.859697            2.166855                1.800531                2.141277           2.924131           3.970238           4.782834      0
16  5777    1    baseline            1.797197            1.784929                1.303237                1.255561           2.891784           3.861341           4.361323      0
17  5777    2      0 to 2                  NA                  NA                      NA                      NA                 NA                 NA                 NA      0
18  5777    3      2 to 6                  NA                  NA                      NA                      NA                 NA                 NA                 NA      0
19  5777    4     6 to 12            1.587212            1.888658                1.668515                1.758396           2.653957           3.773031           4.585550      1
20  6670    1    baseline            1.582694            1.904778                1.798245                1.853927           2.617247           4.065239           4.635206      0
21  6670    2      0 to 2                  NA                  NA                      NA                      NA                 NA                 NA                 NA      0
22  6670    3      2 to 6                  NA                  NA                      NA                      NA                 NA                 NA                 NA      0
23  6670    4     6 to 12                  NA                  NA                      NA                      NA                 NA                 NA                 NA      0
© www.soinside.com 2019 - 2024. All rights reserved.