为每个组指定单独的比例后,用于排序数据的 Factor() 不起作用

问题描述 投票:0回答:1

我最终试图生成一个图,其中包括三组中每组的不同比例。我还需要这些组(每个“样本”)在绘图上聚集在一起,这就是它们当前在 data.frame 中的排序方式。我使用 Factor() 在一个图中完成了这一任务,该图中仅包含所有三个组的一个比例。 [![正确顺序只有一个刻度]

#here is my data
cell_cycle_sample <- structure(list(sample = c("WAT", "WAT", "WAT", "WAT", "WAT", 
"WAT", "WAT", "WAT", "WAT", "WAT", "WAT", "WAT", "WAT", "WAT", 
"WAT", "WAT", "BAT", "BAT", "BAT", "BAT", "BAT", "BAT", "BAT", 
"BAT", "BAT", "BAT", "BAT", "BAT", "BAT", "BAT", "BAT", "BAT", 
"BAT", "BAT", "BAT", "BAT", "BAT", "BAT", "BAT", "BAT", "BAT", 
"BAT", "BAT", "BAT", "BAT", "BAT", "BAT", "in_vitro", "in_vitro", 
"in_vitro", "in_vitro", "in_vitro"), label = structure(1:52, .Label = c("KEGG_CELL_CYCLE(WAT)(123)", 
"REACTOME_MITOTIC_G2_G2_M_PHASES(WAT)(183)", "REACTOME_CELL_CYCLE_CHECKPOINTS(WAT)(286)", 
"REACTOME_M_PHASE(WAT)(390)", "REACTOME_NUCLEAR_ENVELOPE_NE_REASSEMBLY(WAT)(65)", 
"REACTOME_SEPARATION_OF_SISTER_CHROMATIDS(WAT)(187)", "REACTOME_CELL_CYCLE(WAT)(601)", 
"REACTOME_POSTMITOTIC_NUCLEAR_PORE_COMPLEX_NPC_REFORMATION(WAT)(22)", 
"REACTOME_MITOTIC_METAPHASE_AND_ANAPHASE(WAT)(225)", "HALLMARK_MITOTIC_SPINDLE(WAT)(199)", 
"REACTOME_RESOLUTION_OF_SISTER_CHROMATID_COHESION(WAT)(122)", 
"REACTOME_MITOTIC_SPINDLE_CHECKPOINT(WAT)(111)", "REACTOME_MITOTIC_PROMETAPHASE(WAT)(193)", 
"REACTOME_AMPLIFICATION_OF_SIGNAL_FROM_THE_KINETOCHORES(WAT)(94)", 
"HALLMARK_MYC_TARGETS_V1(WAT)(197)", "HALLMARK_G2M_CHECKPOINT(WAT)(195)", 
"REACTOME_APC_C_CDH1_MEDIATED_DEGRADATION_OF_CDC20_AND_OTHER_APC_C_CDH1_TARGETED_PROTEINS_IN_LATE_MITOSIS_EARLY_G1(BAT)(74)", 
"REACTOME_ASSEMBLY_OF_THE_PRE_REPLICATIVE_COMPLEX(BAT)(83)", 
"REACTOME_RECOGNITION_OF_DNA_DAMAGE_BY_PCNA_CONTAINING_REPLICATION_COMPLEX(BAT)(30)", 
"REACTOME_APC_C_MEDIATED_DEGRADATION_OF_CELL_CYCLE_PROTEINS(BAT)(87)", 
"REACTOME_CYCLIN_D_ASSOCIATED_EVENTS_IN_G1(BAT)(44)", "REACTOME_MITOTIC_SPINDLE_CHECKPOINT(BAT)(111)", 
"REACTOME_RESOLUTION_OF_SISTER_CHROMATID_COHESION(BAT)(122)", 
"REACTOME_G1_S_DNA_DAMAGE_CHECKPOINTS(BAT)(67)", "REACTOME_MITOTIC_PROPHASE(BAT)(121)", 
"REACTOME_DNA_REPLICATION_PRE_INITIATION(BAT)(98)", "REACTOME_G1_S_TRANSITION(BAT)(106)", 
"REACTOME_RECRUITMENT_OF_NUMA_TO_MITOTIC_CENTROSOMES(BAT)(88)", 
"REACTOME_MITOTIC_G1_PHASE_AND_G1_S_TRANSITION(BAT)(130)", "REACTOME_G2_M_DNA_DAMAGE_CHECKPOINT(BAT)(90)", 
"REACTOME_DNA_REPLICATION(BAT)(126)", "REACTOME_G2_M_CHECKPOINTS(BAT)(163)", 
"REACTOME_S_PHASE(BAT)(148)", "REACTOME_MITOTIC_G2_G2_M_PHASES(BAT)(183)", 
"REACTOME_CYCLIN_A_CDK2_ASSOCIATED_EVENTS_AT_S_PHASE_ENTRY(BAT)(73)", 
"REACTOME_MITOTIC_PROMETAPHASE(BAT)(193)", "REACTOME_MITOTIC_METAPHASE_AND_ANAPHASE(BAT)(225)", 
"KEGG_CELL_CYCLE(BAT)(123)", "REACTOME_TP53_REGULATES_TRANSCRIPTION_OF_DNA_REPAIR_GENES(BAT)(43)", 
"REACTOME_M_PHASE(BAT)(390)", "REACTOME_DNA_REPAIR(BAT)(299)", 
"REACTOME_CELL_CYCLE_CHECKPOINTS(BAT)(286)", "REACTOME_CELL_CYCLE(BAT)(601)", 
"HALLMARK_MYC_TARGETS_V1(BAT)(197)", "HALLMARK_MITOTIC_SPINDLE(BAT)(199)", 
"HALLMARK_G2M_CHECKPOINT(BAT)(195)", "HALLMARK_E2F_TARGETS(BAT)(199)", 
"REACTOME_CELLULAR_SENESCENCE(invitro)(95)", "REACTOME_MITOTIC_PROMETAPHASE(invitro)(193)", 
"REACTOME_AMPLIFICATION_OF_SIGNAL_FROM_THE_KINETOCHORES(invitro)(94)", 
"REACTOME_RESOLUTION_OF_SISTER_CHROMATID_COHESION(invitro)(122)", 
"REACTOME_MITOTIC_SPINDLE_CHECKPOINT(invitro)(111)"), class = "factor"), 
    percent_ccgs = c(26.8, 26.2, 20.6, 19, 29.2, 21.9, 20.5, 
    40.9, 22.7, 22.6, 23, 21.6, 21.8, 22.3, 27.9, 25.6, 33.8, 
    32.5, 33.3, 34.5, 38.6, 24.3, 21.3, 31.3, 20.7, 31.6, 34, 
    27.3, 31.5, 21.1, 29.4, 25.2, 32.4, 29.5, 37, 25.4, 27.6, 
    32.5, 37.2, 25.9, 21.7, 24.8, 24.8, 27.9, 29.6, 30.3, 26.1, 
    28.4, 16.1, 19.1, 16.4, 18.9), kuiper_q = c(0.09611, 0.08737, 
    0.0695, 0.04749, 0.0278, 0.01853, 0.0139, 0.00927, 0.00927, 
    0.0068, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 0.083925532, 
    0.067253165, 0.066876923, 0.066518421, 0.062635616, 0.0546, 
    0.054455882, 0.048555556, 0.042714286, 0.0406, 0.036672222, 
    0.03521875, 0.03442069, 0.03442069, 0.027906667, 0.025044444, 
    0.01932, 0.01932, 0.01932, 0.0161, 0.010733333, 0.007542857, 
    1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 
    0.0125, 0.0122, 0.0012, 8e-04, 2e-04)), row.names = c(NA, 
-52L), class = "data.frame")

这是只有一个比例的第一个图的代码

#import data
cell_cycle_sample <- read.csv("cell_cycle_sample.csv")
# make label an ordered factor
cell_cycle_all$label <- factor(cell_cycle_all$label, levels = cell_cycle_all$label)
#make plot - all groups one scale
ggplot(cell_cycle_sample, aes(x=percent_ccgs, y=label, group=sample)) + 
  geom_col(position="dodge", width=0.4, linewidth=0.7, aes(fill = kuiper_q))

然后我生成了第二个图,其中三个组(样本)中的每一个都使用不同的比例。但问题是 y 轴上的标签现在按字母顺序排序,而不是按因子(标签)排序

这是为每个组指定不同比例的代码,现在按字母顺序排列

#import data
cell_cycle_sample <- read.csv("cell_cycle_sample.csv")
# make label an ordered factor
cell_cycle_sample$label <- factor(cell_cycle_sample$label, levels = cell_cycle_sample$label)
#make plot - different scale for each group
ggplot(cell_cycle_sample, aes(x=percent_ccgs, y=label, group=sample)) + 
  geom_col(position="dodge", width=0.4, linewidth=0.7, aes(fill = kuiper_q),
           data = ~ subset(.x, sample == "WAT")) +
  scale_fill_gradient(
    name = "BAT", guide = guide_colorbar(order = 3),
    limits = c(0.0001, 0.1),
    low = "blue", high = "lightblue",
    breaks=b, labels = format(b)) +
  ggnewscale::new_scale_fill() +
  geom_col(position="dodge", width=0.4, linewidth=0.7, aes(fill = kuiper_q),
           data = ~ subset(.x, sample == "BAT")) +
  scale_fill_gradient(
    name = "WAT", guide = guide_colorbar(order = 2),
    limits = c(0.0001, 0.1),
    low = "green", high = "lightgreen",
    breaks=b, labels = format(b)) +
  ggnewscale::new_scale_fill() +
  geom_col(position="dodge", width=0.4, linewidth=0.7, aes(fill = kuiper_q),
           data = ~ subset(.x, sample == "in_vitro")) +
  scale_fill_gradient(
    name = "in_vitro", guide = guide_colorbar(order = 1),
    limits = c(0.0001, 0.1),
    low = "red", high = "pink",
    breaks=b, labels = format(b))

当我添加额外的代码来指定三个单独的比例时,我无法弄清楚为什么 Factor() 不再起作用。

r bar-chart factors levels
1个回答
0
投票

我从未使用过

ggnewscale
,但我发现了一个技巧。就在
ggplot()
之后,从完整数据集的一层(不是子集)开始。我放了
alpha = 0
,但似乎不需要,因为其他层将被覆盖

ggplot(cell_cycle_sample, aes(x=percent_ccgs, y=label, group=sample)) + 
  geom_col(position="dodge", width=0.4, linewidth=0.7, aes(fill = kuiper_q), alpha = 0)+

补充说明:

  • 检查您的子集和色标名称,BAT 和 WAT 是否颠倒。
  • 您的示例中未定义
  • b
    ,因此我无法重现色标上的数字。
  • 您的第二个块指的是
    cell_cycle_all
    ,而通常使用
    cell_cycle_sample
  • 图表和示例难以阅读且令人困惑。我不太擅长因子,但有一些方法可以指定值和标签。 123 个字符的标签名称可能不是一个好的做法。您可以查看R for Data Science
© www.soinside.com 2019 - 2024. All rights reserved.