这是一些示例数据:
structure(list(kidney.patient_id = c(1210520, 1189592, 1113741,
1217475, 1116183, 1229628, 1232227, 1232227, 1234958, 1265879,
1265879, 1236909, 1225604, 1237043, 1134683, 1237661, 1243689,
1243689, 1238390, 1260140, 1260140, 1240335, 1243559, 1240345,
1199028, 1242663, 1262396, 1256453, 1256453, 1198017, 1198017,
1198017, 1256555, 1256555, 1256555, 1137461, 1257334, 1257334,
1257334, 1260237, 1257485, 1257485, 1259316, 1259526, 1259526,
1138909, 1259823, 1259823, 1259209, 1260139, 1260139, 1260252,
1261258, 1261490, 1261640, 1261640, 1260443, 1261876, 1261876,
1236380, 1264770, 1264770, 1255462, 1265305, 1119668, 1266433,
1266433, 1266488, 1266488, 1269529, 1234254, 1270266, 1264374,
1264374, 1270275, 1270275), GPplasma_1 = c(4.722702815, 15.13083745,
2.616373253, 4.252955558, NA, 6.903629211, 2.062762966, 3.419624322,
4.460253231, 3.031831908, 3.070273955, 4.134182102, 5.53518182,
4.303791081, NA, 5.477463863, 5.322218829, 4.81890102, 4.338491028,
3.193482542, 4.557574106, 7.128594477, 2.647212945, 4.429683178,
4.875371902, 5.45857832, 7.922327072, 4.592780644, 3.605821785,
15.43059051, 14.4597711, 20.56964006, 2.463418982, 4.055084911,
4.173100845, 4.513571383, 6.550947395, 6.011236511, 6.56060276,
3.636231263, 4.280688898, 4.289923476, 4.38805309, 6.468675975,
6.34707179, 4.015534735, 17.22025185, 17.71553649, 2.984821335,
5.450364231, 5.70397817, 3.89827858, 8.054153313, 6.606611288,
3.363318462, 4.029429195, 10.26347691, 4.727224345, 5.124302802,
5.266373298, 6.848505568, 5.815452181, NA, 4.602175244, 6.661954129,
3.973173983, 2.905282478, 7.701719764, 7.284489397, 10.0853342,
7.519244573, 7.403494987, 1.839398389, 1.999462021, 5.349686328,
5.83107762), GPplasma_2 = c(1.865474993, 3.752399819, 1.299474681,
2.613415886, NA, 1.90508529, 1.405896921, 1.684655204, 2.478138021,
1.67416249, 1.633127813, 2.093457686, 1.909552481, 1.472085097,
NA, 1.571601721, 2.491386859, 2.281934029, 1.826694551, 1.528981852,
1.804623961, 2.773202652, 1.238969156, 1.98003417, 2.387097222,
1.59402684, 1.735297778, 2.355554683, 2.148497334, 4.159371472,
3.956392954, 4.164449008, 1.334998955, 1.629317925, 1.643574159,
1.864193012, 2.435507497, 2.35148558, 2.354314184, 1.822684975,
1.869399242, 1.889904759, 1.972049233, 2.662837206, 2.636567204,
2.48517468, 2.682280154, 2.671669914, 1.67543787, 2.05021491,
2.083329058, 1.727642413, 1.568676328, 2.246848561, 1.89238424,
2.035203072, 2.469648522, 2.366984952, 2.492314171, 2.585416911,
1.640418622, 1.382270294, NA, 2.784100618, 2.304629494, 1.771888254,
1.436115299, 1.222992362, 1.171196521, 3.409253789, 3.369898261,
2.1975109, 1.320145884, 1.391797838, 2.394354109, 2.44141094),
GPplasma_3 = c(0.056947101, 0.091860594, 0.044099888, 0.083199037,
NA, 0.056608797, 0.062543745, 0.089386164, 0.133041879, 0.05714612,
0.051877582, 0.097120457, 0.046049278, 0.043849861, NA, 0.070350459,
0.086436795, 0.071717533, 0.078110246, 0.073707773, 0.104551601,
0.127522128, 0.061619945, 0.076656506, 0.083781629, 0.056360013,
0.081233466, 0.117488007, 0.091821172, 0.087216263, 0.090214425,
0.106586508, 0.030269588, 0.051738273, 0.049953194, 0.077081819,
0.080660377, 0.079810587, 0.083443572, 0.098767364, 0.05832075,
0.056078922, 0.047925894, 0.116819689, 0.099905753, 0.056593499,
0.120847446, 0.117375517, 0.094459333, 0.08888956, 0.087753712,
0.066038836, 0.044969318, 0.064810647, 0.076239052, 0.086890738,
0.09048183, 0.102430099, 0.10889413, 0.165653539, 0.088879992,
0.060500554, NA, 0.229193637, 0.070935001, 0.070830102, 0.050544554,
0.055000868, 0.057526685, 0.27278714, 0.094890354, 0.100144842,
0.060482814, 0.07092659, 0.105592792, 0.1213855), GPplasma_4 = c(1.931314234,
4.995124568, 1.85599831, 1.806933062, NA, 3.282886173, 2.636787514,
4.228448689, 3.183367167, 2.906265089, 2.698704251, 3.158662956,
2.607117343, 2.241219442, NA, 3.759888537, 3.757924945, 3.34566158,
2.742848766, 2.663967592, 3.462553089, 3.960217396, 2.739990463,
3.227070093, 2.460941401, 3.910451266, 6.23791327, 3.066762445,
2.512516039, 4.750037153, 4.582922445, 5.367717423, 0.984596367,
1.593822663, 1.302932408, 1.777561265, 4.289991994, 3.804580666,
3.876114488, 3.544584724, 1.990427636, 1.997468289, 1.886075788,
4.163803973, 4.149502354, 2.054525688, 4.686049342, 4.352709528,
2.906108408, 4.106283917, 4.064177063, 4.571100706, 2.961177976,
4.732168196, 2.605192407, 2.890409737, 4.225550802, 3.453415357,
3.426345796, 3.997078537, 2.930895705, 2.219162189, NA, 4.063481675,
3.803881348, 2.818737927, 1.915749039, 3.125193397, 2.721262009,
3.040619673, 4.499842325, 4.759111227, 1.157330385, 1.235923566,
3.197695689, 3.093271159)), row.names = c(NA, 76L), class = "data.frame")
我遇到的问题是代码不是计算平均值,而是创建一个名为 na.rm 的列,而不是从计算的平均值中删除缺失值。代码运行并且没有产生任何错误。但是当我查看数据时,我看到的唯一区别是存在新列(称为 na.rm)。我哪里错了?我尝试过使用其他管道。我尝试取出 na.rm 看看它是否会计算此时的平均值。
这是我的代码:
#libraries that are loaded
library(dplyr)
library(tidyverse)
library(table1)
library(ggplot2)
library(anytime)
library(data.table)
library(plyr)
library(fastDummies)
library(reshape2)
kidney = kidney %>%
group_by(patient_id) |>
mutate(across(GPplasma_1 : GPplasma_4, mean), na.rm=TRUE) |>
ungroup()
使用数据表提供了一种非常直接的方法,如果您的数据集很大,可能会更快
将示例中的结构分配给
my_struc
然后,这将在数据表中创建一个名为“avg”的列,它是所有 gplasma 列的平均值(已删除 NA),按患者 ID 分组
library(data.table)
setDT(my_struc)
my_struc[,avg:=mean(unlist(.SD),na.rm=T),by=.(kidney.patient_id)]
my_struc