我有一个来自具有三个参数的阶乘(双因素)实验的数据集。我正在尝试创建一个函数,用于导出一个包含主要因素和交互作用的均值、sd 和字母(来自
HSD.test
)的整洁表格。我将使用输出表在ggplot
中绘图。 这是我的csv
格式的数据。我的功能如下:
library(reshape2)
library(Rmisc)
library(agricolae)
library(dplyr)
library(stringr)
data<-read.csv(file = 'moistNcopy.csv', header = T, check.names = F)
make_Figure_table_2F<-function(MyDf){
orig.names<-names(MyDf[,c(1:2)])
colnames(MyDf)[c(1:2)] <- c('Factor_A', 'Factor_B')
dataMelt<-melt(MyDf)
x<-as.character(unique(dataMelt$variable)) # split out the levels of variables
# For individual factors # For 'Factor_A' $ 'B'
dataSE_A<-summarySE(dataMelt, measurevar = 'value', groupvars = c('variable', 'Factor_A'))
dataSE_B<-summarySE(dataMelt, measurevar = 'value', groupvars = c('variable', 'Factor_B'))
models<-sapply(x, function(my) {lm(value~Factor_A*Factor_B, data=dataMelt, variable==my)}, simplify=FALSE)
HSDlist_A = lapply(models, function(m) HSD.test((m), 'Factor_A', alpha = 0.05, group = TRUE, console = F, variable==m))
HSDlist_B = lapply(models, function(m) HSD.test((m), 'Factor_B', alpha = 0.05, group = TRUE, console = F, variable==m))
# For interaction #
int<-with(dataMelt, interaction(Factor_A, Factor_B, sep = "*", lex.order = TRUE))
dataSE_int<-summarySE(dataMelt, measurevar = 'value', groupvars = c('variable', 'int'))
models_int<-sapply(x, function(my) {lm(value~int, data=dataMelt, variable==my)}, simplify=FALSE)
HSDlist_int<-lapply(models_int, function(m) HSD.test((m), 'int', alpha = 0.05, group = TRUE, console = F, variable==m))
#----------------------------------------------------------------
ind_table<-function(HSDlist, dataSE){
myLetters<-lapply(lapply(HSDlist, `[[`, 'groups'), '[', 'groups')
myLetters<-Map(cbind, myLetters, my_row_names = lapply(myLetters, rownames))
myLetters<-lapply(myLetters, function(df) {df[order(df$my_row_names), ]})
myLetters<-lapply(myLetters, function(x) x[,1])
myLetters<-do.call(cbind, myLetters)
myLetters<-melt(myLetters)
colnames(myLetters)[3]<-'letters'
DataTable<-cbind(dataSE, myLetters$letters)
return(DataTable)
}
tableA<-ind_table(HSDlist_A, dataSE_A)
names(tableA)[2]<-'treatment'
tableA['ExpFactor']<-rep(orig.names[1], length(tableA$variable))
tableB<-ind_table(HSDlist_B, dataSE_B)
names(tableB)[2]<-'treatment'
tableB['ExpFactor']<-rep(orig.names[2], length(tableB$variable))
tableC<-ind_table(HSDlist_int, dataSE_int)
names(tableC)[2]<-'treatment'
tableC['ExpFactor']<-rep('int', length(tableC$variable))
MyFigureData<-bind_rows(tableA, tableB, tableC)
colnames(MyFigureData)[8]<-'letters'
write.table(MyFigureData, 'my_Figure_table_2F.csv', append = F, sep = ',', row.names = FALSE)
return(MyFigureData)
}
make_Figure_table_2F(data)
该函数抛出以下错误消息:
Using Factor_A, Factor_B as id variables Error in data.frame(..., check.names = FALSE) : arguments imply differing number of rows: 10, 36 In addition: Warning message: In cbind(int = c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, : number of rows of result is not a multiple of vector length (arg 2)
但是,如果我手动运行每一行,我可以收到如下预期的输出:
head(MyFigureData)
variable treatment N value sd se ci letters ExpFactor
1 Parameter_1 M1 9 22.33333 3.774917 1.258306 2.901658 ab Moisture
2 Parameter_1 M2 9 24.44444 5.027701 1.675900 3.864633 ab Moisture
3 Parameter_1 M3 9 21.77778 4.841946 1.613982 3.721849 b Moisture
4 Parameter_1 M4 9 24.88889 4.621808 1.540603 3.552636 a Moisture
5 Parameter_2 M1 9 121.77778 8.671473 2.890491 6.665484 c Moisture
6 Parameter_2 M2 9 148.00000 18.668155 6.222718 14.349614 b Moisture
tail(MyFigureData)
variable treatment N value sd se ci letters ExpFactor
52 Parameter_3 M3*N1 3 67.00000 12.529964 7.234178 31.12616 ab int
53 Parameter_3 M3*N2 3 66.66667 8.736895 5.044249 21.70365 ab int
54 Parameter_3 M3*N3 3 74.00000 4.582576 2.645751 11.38375 a int
55 Parameter_3 M4*N1 3 76.33333 8.144528 4.702245 20.23213 a int
56 Parameter_3 M4*N2 3 64.00000 5.196152 3.000000 12.90796 ab int
57 Parameter_3 M4*N3 3 77.33333 4.041452 2.333333 10.03952 a int
我哪里弄错了?