我有一个数据框df
:
structure(list(sample = structure(c(4L, 2L, 1L, 4L, 1L, 2L, 3L,
3L, 3L, 1L), .Label = c("A1", "B1", "C1", "D2"), class = "factor"),
genotype = structure(c(4L, 2L, 2L, 2L, 4L, 4L, 1L, 2L, 3L,
1L), .Label = c("germline_private", "germline_recurrent",
"somatic_normal", "somatic_tumour"), class = "factor"), n = c(5L,
4L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 1L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -10L), vars = "sample", drop = TRUE, .Names = c("sample",
"genotype", "n"), indices = list(c(2L, 4L, 9L), c(1L, 5L), 6:8,
c(0L, 3L)), group_sizes = c(3L, 2L, 3L, 2L), biggest_group_size = 3L, labels = structure(list(
sample = structure(1:4, .Label = c("A1", "B1", "C1", "D2"
), class = "factor")), class = "data.frame", row.names = c(NA,
-4L), vars = "sample", drop = TRUE, .Names = "sample"))
head(df)
sample event_no genotype
A1 1 somatic_tumour
A1 2 germline_recurrent
A1 3 germline_recurrent
A1 4 somatic_tumour
A1 5 germline_recurrent
A1 6 germline_private
在这个例子中,我想计算每个样本中基因型出现的次数,然后排序,以便样本按somatic_tumour
事件的数量排序
这就是我所拥有的:
library(tidyverse)
df <- df %>%
group_by(sample, genotype) %>%
tally %>%
arrange(-n)
然后我想绘制每个样本的这些计数,由〜基因型分面:
p <- ggplot(df)
p <- p + geom_histogram(aes(sample, n), stat = "identity")
p <- p + facet_wrap(~genotype)
p
但是,我希望所有面板中的样本按右下图(somatic_tumour
)中的计数进行排序
这是一种方法,通过用q替换除new_n
以外的所有n
来创建somatic_tumour
,并在2 n
s上排序,即
library(tidyverse)
df %>%
group_by(sample, genotype) %>%
tally() %>%
mutate(new_n = replace(n, genotype != 'somatic_tumour', 0)) %>%
arrange(-new_n, -n) %>%
select(-new_n)
这使,
# A tibble: 11 x 3 # Groups: sample [4] sample genotype n <fct> <fct> <int> 1 A1 somatic_tumour 2 2 B1 somatic_tumour 2 3 D2 somatic_tumour 2 4 B1 germline_recurrent 4 5 A1 germline_recurrent 3 6 D2 germline_recurrent 3 7 C1 germline_private 2 8 C1 germline_recurrent 2 9 C1 somatic_normal 2 10 A1 germline_private 1 11 D2 somatic_normal 1
您还可以使用left_join
在每个样本中添加somatic_tumour
的出现次数。然后使用n
观察的somatic_tumour
列创建有序向量。因此,相应地布置x轴。
library(dplyr)
library(ggplot2)
df %>%
left_join(df %>% filter(genotype == "somatic_tumour") %>% select(n, sample),
by = "sample") %>%
arrange(-n.y, -n.x) %>%
ungroup() %>%
mutate(sample = ordered(sample,
df %>% filter(genotype == "somatic_tumour") %>%
arrange(n) %>%
select(sample) %>%
as_vector(.))) %>%
ggplot() +
geom_histogram(aes(sample, n.x), stat = "identity") +
facet_wrap(~genotype)
注意:这里引入了NA
标签,可能是因为样本量很小。