脚本创建一个名为 na.rm 的列,而不是对值进行平均

问题描述 投票:0回答:1

这是一些示例数据:

structure(list(kidney.patient_id = c(1210520, 1189592, 1113741, 
1217475, 1116183, 1229628, 1232227, 1232227, 1234958, 1265879, 
1265879, 1236909, 1225604, 1237043, 1134683, 1237661, 1243689, 
1243689, 1238390, 1260140, 1260140, 1240335, 1243559, 1240345, 
1199028, 1242663, 1262396, 1256453, 1256453, 1198017, 1198017, 
1198017, 1256555, 1256555, 1256555, 1137461, 1257334, 1257334, 
1257334, 1260237, 1257485, 1257485, 1259316, 1259526, 1259526, 
1138909, 1259823, 1259823, 1259209, 1260139, 1260139, 1260252, 
1261258, 1261490, 1261640, 1261640, 1260443, 1261876, 1261876, 
1236380, 1264770, 1264770, 1255462, 1265305, 1119668, 1266433, 
1266433, 1266488, 1266488, 1269529, 1234254, 1270266, 1264374, 
1264374, 1270275, 1270275), GPplasma_1 = c(4.722702815, 15.13083745, 
2.616373253, 4.252955558, NA, 6.903629211, 2.062762966, 3.419624322, 
4.460253231, 3.031831908, 3.070273955, 4.134182102, 5.53518182, 
4.303791081, NA, 5.477463863, 5.322218829, 4.81890102, 4.338491028, 
3.193482542, 4.557574106, 7.128594477, 2.647212945, 4.429683178, 
4.875371902, 5.45857832, 7.922327072, 4.592780644, 3.605821785, 
15.43059051, 14.4597711, 20.56964006, 2.463418982, 4.055084911, 
4.173100845, 4.513571383, 6.550947395, 6.011236511, 6.56060276, 
3.636231263, 4.280688898, 4.289923476, 4.38805309, 6.468675975, 
6.34707179, 4.015534735, 17.22025185, 17.71553649, 2.984821335, 
5.450364231, 5.70397817, 3.89827858, 8.054153313, 6.606611288, 
3.363318462, 4.029429195, 10.26347691, 4.727224345, 5.124302802, 
5.266373298, 6.848505568, 5.815452181, NA, 4.602175244, 6.661954129, 
3.973173983, 2.905282478, 7.701719764, 7.284489397, 10.0853342, 
7.519244573, 7.403494987, 1.839398389, 1.999462021, 5.349686328, 
5.83107762), GPplasma_2 = c(1.865474993, 3.752399819, 1.299474681, 
2.613415886, NA, 1.90508529, 1.405896921, 1.684655204, 2.478138021, 
1.67416249, 1.633127813, 2.093457686, 1.909552481, 1.472085097, 
NA, 1.571601721, 2.491386859, 2.281934029, 1.826694551, 1.528981852, 
1.804623961, 2.773202652, 1.238969156, 1.98003417, 2.387097222, 
1.59402684, 1.735297778, 2.355554683, 2.148497334, 4.159371472, 
3.956392954, 4.164449008, 1.334998955, 1.629317925, 1.643574159, 
1.864193012, 2.435507497, 2.35148558, 2.354314184, 1.822684975, 
1.869399242, 1.889904759, 1.972049233, 2.662837206, 2.636567204, 
2.48517468, 2.682280154, 2.671669914, 1.67543787, 2.05021491, 
2.083329058, 1.727642413, 1.568676328, 2.246848561, 1.89238424, 
2.035203072, 2.469648522, 2.366984952, 2.492314171, 2.585416911, 
1.640418622, 1.382270294, NA, 2.784100618, 2.304629494, 1.771888254, 
1.436115299, 1.222992362, 1.171196521, 3.409253789, 3.369898261, 
2.1975109, 1.320145884, 1.391797838, 2.394354109, 2.44141094), 
    GPplasma_3 = c(0.056947101, 0.091860594, 0.044099888, 0.083199037, 
    NA, 0.056608797, 0.062543745, 0.089386164, 0.133041879, 0.05714612, 
    0.051877582, 0.097120457, 0.046049278, 0.043849861, NA, 0.070350459, 
    0.086436795, 0.071717533, 0.078110246, 0.073707773, 0.104551601, 
    0.127522128, 0.061619945, 0.076656506, 0.083781629, 0.056360013, 
    0.081233466, 0.117488007, 0.091821172, 0.087216263, 0.090214425, 
    0.106586508, 0.030269588, 0.051738273, 0.049953194, 0.077081819, 
    0.080660377, 0.079810587, 0.083443572, 0.098767364, 0.05832075, 
    0.056078922, 0.047925894, 0.116819689, 0.099905753, 0.056593499, 
    0.120847446, 0.117375517, 0.094459333, 0.08888956, 0.087753712, 
    0.066038836, 0.044969318, 0.064810647, 0.076239052, 0.086890738, 
    0.09048183, 0.102430099, 0.10889413, 0.165653539, 0.088879992, 
    0.060500554, NA, 0.229193637, 0.070935001, 0.070830102, 0.050544554, 
    0.055000868, 0.057526685, 0.27278714, 0.094890354, 0.100144842, 
    0.060482814, 0.07092659, 0.105592792, 0.1213855), GPplasma_4 = c(1.931314234, 
    4.995124568, 1.85599831, 1.806933062, NA, 3.282886173, 2.636787514, 
    4.228448689, 3.183367167, 2.906265089, 2.698704251, 3.158662956, 
    2.607117343, 2.241219442, NA, 3.759888537, 3.757924945, 3.34566158, 
    2.742848766, 2.663967592, 3.462553089, 3.960217396, 2.739990463, 
    3.227070093, 2.460941401, 3.910451266, 6.23791327, 3.066762445, 
    2.512516039, 4.750037153, 4.582922445, 5.367717423, 0.984596367, 
    1.593822663, 1.302932408, 1.777561265, 4.289991994, 3.804580666, 
    3.876114488, 3.544584724, 1.990427636, 1.997468289, 1.886075788, 
    4.163803973, 4.149502354, 2.054525688, 4.686049342, 4.352709528, 
    2.906108408, 4.106283917, 4.064177063, 4.571100706, 2.961177976, 
    4.732168196, 2.605192407, 2.890409737, 4.225550802, 3.453415357, 
    3.426345796, 3.997078537, 2.930895705, 2.219162189, NA, 4.063481675, 
    3.803881348, 2.818737927, 1.915749039, 3.125193397, 2.721262009, 
    3.040619673, 4.499842325, 4.759111227, 1.157330385, 1.235923566, 
    3.197695689, 3.093271159)), row.names = c(NA, 76L), class = "data.frame")

我遇到的问题是代码不是计算平均值,而是创建一个名为 na.rm 的列,而不是从计算的平均值中删除缺失值。代码运行并且没有产生任何错误。但是当我查看数据时,我看到的唯一区别是存在新列(称为 na.rm)。我哪里错了?我尝试过使用其他管道。我尝试取出 na.rm 看看它是否会计算此时的平均值。

这是我的代码:

#libraries that are loaded
library(dplyr)
library(tidyverse)
library(table1)
library(ggplot2)
library(anytime)
library(data.table)
library(plyr)
library(fastDummies)
library(reshape2)

kidney = kidney %>%
  group_by(patient_id) |>
  mutate(across(GPplasma_1 : GPplasma_4, mean), na.rm=TRUE) |>
  ungroup()
r pipe mutate
1个回答
0
投票

使用数据表提供了一种非常直接的方法,如果您的数据集很大,可能会更快

将示例中的结构分配给

my_struc
然后,这将在数据表中创建一个名为“avg”的列,它是所有 gplasma 列的平均值(已删除 NA),按患者 ID 分组

library(data.table)
setDT(my_struc)
my_struc[,avg:=mean(unlist(.SD),na.rm=T),by=.(kidney.patient_id)]
my_struc
© www.soinside.com 2019 - 2024. All rights reserved.