我想跨数据帧中的列迭代一段代码,以创建一个新的结果矩阵。我陷入了如何使用数据框中某个列(站点)中的值迭代创建新对象和矩阵/以该值命名的新对象和矩阵,然后传递到以下代码行的问题。
我想要迭代的代码是:
SiteX_data = data %>% subset(Site == X) %>%select(x_coord, y_cood)
SiteX_dists <- as.matrix(dist(SiteX_data))
SiteX_dists.inv <- 1/SiteX_dists
diag(SiteX_dists.inv) <- 0
Resid_taxaX_siteX <- as.vector( Resid_data %>% subset(Site == X) %>% select(Abundance_TaxaX) )
Moran.I(Resid_taxaX_siteX, SiteX_dists.inv)
其中“X”指数据帧(数据)中的站点值:
样品 | 网站 | 治疗 | x_坐标 | y_坐标 | 丰度_Taxa_1 | ... | 丰度_分类_n |
---|---|---|---|---|---|---|---|
I1S1 | I1 | 内部 | 140.00 | -29.00 | 56 | ... | 0 |
O2S1 | 氧气 | 外面 | 141.00 | -28.00 | 3 | ... | 100 |
O2S2 | 氧气 | 外面 | 141.10 | -28.10 | 5 | ... | 4 |
“Y”指的是单独数据帧(Resid_data)中唯一的“Abundance_Taxa”列:
网站 | 丰度_Taxa_1 | ... | 丰度_分类_n |
---|---|---|---|
O1 | -0.5673 | --- | 1.1579 |
I1 | -0.6666 | --- | 1.2111 |
每个“站点”有多个“样本”。前四行代码旨在使用 x 和 y 坐标为每个站点创建距离矩阵,然后简单地反转距离。
第五行旨在为每个位点和分类单元创建 Resid_data 中值的向量。
最后一行代码使用类群/位点向量和之前创建的距离矩阵来计算每个位点和类群的 Moran's I。 我期望的结果是产生以下矩阵:
网站 | 治疗 | 莫兰I_Taxa_1 | ... | 莫兰I_Taxa_n |
---|---|---|---|---|
I1 | 内部 | 0.1 | ... | 0.2 |
氧气 | 外面 | 0.5 | ... | 0.01 |
我不知道如何写这个
Moran.I(Resid_taxa1_site1, Site1_dists.inv)
Moran.I(Resid_taxa2_site1, Site1_dists.inv)
直到该站点的所有残差值向量完成)。
站点值的数量相对较少(12),但我需要迭代的“Abundance_Taxa”列的数量很大(~2400)。我在网上找到的大部分内容都是编写非常简单的“for”循环。因此,我什至不知道从哪里开始。
我们首先定义两个辅助函数:
calculateWeights
对应于您提供的前四行代码,getResidualData
反映第五行。
library(dplyr)
library(tidyr)
library(ape)
calculateWeights <- function(df, site) {
df |>
filter(Site == site) |>
select(x_coord, y_coord) |>
dist() |>
as.matrix() |>
(\(.) 1 / replace(., . == 0, 1))()
}
getResidualData <- function(df, site, t) {
Resid_data |>
filter(Site == site) |>
select(ends_with(paste0("_", t))) |>
pull()
}
那么你想要的结果可以这样计算:
res <- lapply(unique(data$Site), function(site) {
weight <- calculateWeights(data, site)
resSite <- data.frame()
lapply(1:(sum(grepl(
'Taxa', colnames(data)
))), function(t) {
x <- getResidualData(Resid_data, site, t)
rbind(resSite, list(
site = site,
t = t,
val = Moran.I(x, weight)$observed
))
}) |> bind_rows()
}) |> bind_rows() |>
mutate(Treatment = case_match(substring(site, 1, 1), "I" ~ "Inside", "O" ~ "Outside")) |>
pivot_wider(names_from = t,
names_glue = "MoranI_Taxa_{t}",
values_from = val)
例如,这看起来像
> res
# A tibble: 2 × 12
site Treatment MoranI_taxa_1 MoranI_taxa_2 MoranI_taxa_3 MoranI_taxa_4
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 I1 Inside -0.393 -0.136 0.0111 0.0191
2 O2 Outside -0.116 0.262 -0.153 -0.639
# ℹ 6 more variables: MoranI_taxa_5 <dbl>, MoranI_taxa_6 <dbl>, MoranI_taxa_7 <dbl>,
# MoranI_taxa_8 <dbl>, MoranI_taxa_9 <dbl>, MoranI_taxa_10 <dbl>
使用样本数据
> dput(data)
structure(list(Sample = c("I1S1", "O2S1", "O2S2", "O2S3", "O2S4",
"I1S2", "I1S3", "I1S4"), Site = c("I1", "O2", "O2", "O2", "O2",
"I1", "I1", "I1"), Treatment = c("Inside", "Outside", "Outside",
"Outside", "Outside", "Inside", "Inside", "Inside"), x_coord = c(140,
141, 141.1, 139.9, 139.4, 141.2, 140.5, 139.8), y_coord = c(-29,
-28, -28.1, -28.5, -29.1, -28.9, -28.3, -29.2), Abundance_Taxa_1 = c(42L,
46L, 24L, 93L, 30L, 45L, 100L, 39L), Abundance_Taxa_2 = c(52L,
85L, 53L, 43L, 97L, 26L, 62L, 6L), Abundance_Taxa_3 = c(58L,
23L, 42L, 41L, 60L, 45L, 82L, 85L), Abundance_Taxa_4 = c(33L,
11L, 45L, 14L, 2L, 98L, 35L, 28L), Abundance_Taxa_5 = c(45L,
16L, 80L, 100L, 8L, 72L, 37L, 87L), Abundance_Taxa_6 = c(10L,
60L, 75L, 91L, 23L, 33L, 86L, 15L), Abundance_Taxa_7 = c(68L,
60L, 10L, 72L, 95L, 92L, 45L, 84L), Abundance_Taxa_8 = c(55L,
48L, 8L, 96L, 3L, 99L, 75L, 13L), Abundance_Taxa_9 = c(18L, 85L,
5L, 31L, 56L, 20L, 82L, 67L), Abundance_Taxa_10 = c(8L, 19L,
10L, 79L, 61L, 12L, 35L, 52L)), class = "data.frame", row.names = c(NA,
-8L))
> dput(Resid_data)
structure(list(Site = c("O2", "I1", "I1", "O2", "I1", "I1", "O2",
"O2"), Abundance_Taxa_1 = c(0.77, -0.68, -0.33, -0.19, -0.39,
0, -0.02, -0.59), Abundance_Taxa_2 = c(-0.45, 0.52, 0.66, -0.87,
-0.4, -0.1, -0.27, 0.5), Abundance_Taxa_3 = c(-0.27, 0.48, 0.68,
0.66, -0.31, 0.79, 0.55, -0.93), Abundance_Taxa_4 = c(0.58, -0.11,
0.89, -0.77, -0.64, -0.43, -0.18, -0.17), Abundance_Taxa_5 = c(-0.9,
-0.1, -0.3, 0.83, 0.05, 0.71, 0.17, 0.31), Abundance_Taxa_6 = c(0.58,
0.79, -0.66, 0.88, 0.97, -0.36, 0.75, 0.21), Abundance_Taxa_7 = c(-0.96,
-0.84, -0.58, 0.92, -0.68, -0.19, -0.34, -0.32), Abundance_Taxa_8 = c(-0.62,
0.97, -0.88, -0.68, 0.19, 0.77, 0.37, -0.23), Abundance_Taxa_9 = c(0.05,
-0.03, 0.23, -0.2, -0.99, -0.14, -0.1, -0.61), Abundance_Taxa_10 = c(0.94,
0.61, 0.76, 0.27, 0.17, -0.34, 0.7, -0.43)), class = "data.frame", row.names = c(NA,
-8L))