为什么基因匹配(MatchIt)可能会增加不平衡?

问题描述 投票:0回答:1

我使用 MatchIt 来实现学习小组之间的更大平衡。大多数方法似乎都效果很好,减少了组间协变量的差异(我在这里将

method = "optimal"
作为效果良好的方法的示例)。然而,基因匹配极大地增加了不平衡。还值得注意的是,遗传匹配的结果似乎与最近邻匹配的结果完全相同,对我来说,这表明出了问题。为什么/怎么会发生这种情况?

library(MatchIt)

data <- data.frame(
  treatment_group = rep(
    rep(c(TRUE, FALSE), 88),
    c(104L, 3L, 6L, 2L, 1L, 2L, 1L, 1L, 1L, 6L, 2L, 1L, 1L, 1L, 1L,
      1L, 2L, 1L, 8L, 1L, 1L, 1L, 2L, 6L, 2L, 3L, 1L, 1L, 1L, 1L, 1L,
      3L, 1L, 1L, 1L, 5L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 5L, 3L, 1L,
      1L, 2L, 2L, 1L, 1L, 2L, 1L, 5L, 1L, 2L, 2L, 23L, 1L, 1L, 9L,
      1L, 2L, 2L, 2L, 2L, 6L, 1L, 1L, 1L, 5L, 1L, 5L, 1L, 2L, 2L, 4L,
      1L, 2L, 1L, 6L, 5L, 1L, 1L, 1L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 2L,
      6L, 2L, 6L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 4L, 1L, 4L, 1L, 2L, 1L,
      1L, 3L, 5L, 2L, 1L, 1L, 1L, 1L, 9L, 2L, 1L, 2L, 1L, 4L, 4L, 2L,
      1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 6L, 1L, 5L, 1L, 2L, 5L,
      2L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 5L, 7L, 1L, 1L,
      2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 4L, 3L, 1L, 1L, 5L, 1L, 1L, 2L,
      1L, 1L)
  ),
  x1 = c(
    5.5, 4.5, 4, 5.5, 2, 3, 5, 5, 3, 3.5, 4.5, 3, 3.5, 4, 4, 4.5, 5, 4.5, 4.5,
    2.5, 3, 5.5, 5.5, 5.5, 5.5, 4.5, 4.5, 5, 6, 3, 6, 2, 5, 4, 3.5, 3, 4, 4, 4,
    4.5, 6, 5, 3, 4, 5, 2, 1.5, 3.5, 2, 2.5, 4.5, 3, 2.5, 4, 5.5, 3, 4.5, 4, 2,
    3.5, 5.5, 5, 1, 5, 6, 3.5, 4, 4, 4.5, 2.5, 4.5, 3.5, 5, 3, 5.5, 4.5, 6, 4,
    3.5, 4.5, 5, 5, 6, 2.5, 4.5, 6, 6, 3.5, 2, 5, 3.5, 4, 4, 4, 6, 6, 5, 5, 4, 6,
    4, 5, 2.5, 3, 3, 4.5, 2.5, 3, 6, 3, 5, 5.5, 3.5, 5, 3.5, 4.5, 5, 5, 3.5, 5.5,
    4.5, 5, 5, 3.5, 5, 5, 4.5, 3, 3, 6, 5, 6, 3.5, 4.5, 4, 2.5, 4.5, 1, 5.5, 6,
    4.5, 5, 6, 4, 1.5, 4.5, 3.5, 2.5, 3, 4.5, 5, 5, 3, 4, 5, 4.5, 4.5, 4.5, 6, 5,
    4.5, 5, 4.5, 5, 6, 4, 5, 6, 5, 5.5, 6, 4.5, 4, 3, 6, 2.5, 5, 2, 4.5, 5.5, 4.5,
    3.5, 5, 4.5, 5.5, 4.5, 5.5, 4, 4, 5, 4.5, 2, 5, 5, 6, 2.5, 2.5, 3.5, 5, 2,
    3.5, 5.5, 5, 2.5, 6, 6, 6, 1.5, 6, 5, 6, 4, 3, 4.5, 1.5, 2, 3, 4, 3, 3, 4, 5,
    6, 6, 3.5, 3.5, 5, 5, 6, 4, 3, 6, 5, 6, 3, 4.5, 4, 4.5, 5, 5, 3, 1.5, 3.5, 6,
    5, 4, 6, 6, 4.5, 4, 5, 5.5, 3, 4, 3, 6, 2.5, 4.5, 3, 3, 3.5, 4, 4, 4, 5.5,
    4.5, 4.5, 4.5, 1.5, 6, 3.5, 3, 5, 3.5, 4.5, 3.5, 6, 3, 2, 5.5, 5, 5.5, 5, 5,
    5, 5, 3.5, 3, 3, 4, 4, 5, 5, 6, 5, 5, 3.5, 5, 5, 3.5, 1.5, 2.5, 4.5, 5, 3, 2,
    4, 4.5, 3.5, 4, 4.5, 3.5, 4, 5, 4, 6, 4, 5.5, 4.5, 5, 4.5, 4, 4.5, 4, 5, 5,
    2.5, 5, 3, 2, 4.5, 3, 4.5, 5, 4.5, 5, 5, 5, 3, 5.5, 5, 6, 2.5, 4.5, 3.5, 4, 5,
    1, 2.5, 2, 4, 1.5, 5, 5.5, 5, 5, 6, 5, 6, 2.5, 5.5, 2.5, 5, 5, 2.5, 6, 4, 5,
    3, 4.5, 5, 2.5, 5, 2, 4, 2.5, 5, 4, 5, 3.5, 4, 4.5, 3, 5, 2, 5, 5, 4.5, 6, 5,
    5, 4, 5, 3, 6, 6, 5.5, 5, 3, 5, 5.5, 4.5, 5, 3, 4, 3, 3.5, 3, 3.5, 4, 2.5, 4,
    3.5, 6, 2, 5.5, 3, 4.5, 5, 4.5, 3, 2.5, 5, 2, 5.5, 5, 5, 2, 6, 6, 3, 4, 1, 3,
    5, 3.5, 4.5, 4.5, 5.5, 3.5, 3, 3.5, 4.5, 3.5, 6, 4, 4, 4.5, 6, 4.5, 2, 2, 6,
    4, 4.5, 4, 5, 5, 3, 4, 4.5, 3.5, 2, 3.5, 5, 2, 5, 5, 2.5, 2.5, 5, 5, 4, 4, 3,
    5, 5, 5.5, 5.5, 4.5, 6, 5.5, 2.5, 2, 4.5, 3.5, 5, 6, 5, 5, 4.5, 4, 6, 3.5,
    1.5, 2.5, 5, 4.5, 2, 5
  ),
  x2 = c(
    3, 4, 2, 4, 2.5, 4, 4, 4, 2.5, 2.5, 2.5, 3, 3, 3, 2.5, 3.5, 4, 3.5, 2.5, 3, 2,
    4, 3.5, 1.5, 3, 4, 3.5, 3, 3.5, 3.5, 3.5, 1.5, 3.5, 3.5, 1.5, 2.5, 3.5, 3.5,
    3.5, 3.5, 4, 3.5, 3, 3, 2, 2.5, 4, 3.5, 3, 1.5, 2, 2, 2, 1.5, 4, 3, 4, 2, 3,
    1.5, 3, 3, 1, 2.5, 2, 2.5, 3.5, 3, 3, 3, 3, 3, 2, 4, 3.5, 3, 3, 2, 2, 2.5,
    2.5, 4, 2.5, 3, 3.5, 4, 3, 3.5, 2, 2.5, 3.5, 4, 2.5, 3, 4, 3, 3.5, 2, 1.5,
    3.5, 3, 3.5, 3.5, 4, 1.5, 3, 2, 2, 4, 3, 3.5, 4, 1.5, 2.5, 3.5, 3, 4, 2, 3,
    2.5, 3, 3, 4, 3, 4, 2, 3, 2.5, 2, 3.5, 2, 2.5, 3, 3, 3, 4, 3, 3.5, 2.5, 3.5,
    3.5, 3.5, 3, 3.5, 3, 2.5, 3.5, 2, 3, 2, 3, 2.5, 3.5, 3, 3, 3, 3, 3, 2.5, 3, 4,
    3, 3.5, 2, 3.5, 3, 3, 4, 2, 3.5, 3, 2, 3, 3, 2, 4, 3.5, 1.5, 3, 2, 3, 4, 3, 2,
    3.5, 3, 1.5, 2.5, 3.5, 2, 3, 3, 3, 3, 3, 2, 2, 2, 2, 3, 4, 4, 3, 3.5, 2.5, 4,
    4, 4, 3, 2, 4, 3.5, 4, 3, 2, 4, 2, 3, 2.5, 2.5, 3.5, 2, 3, 4, 3, 4, 4, 2.5, 2,
    3, 3, 4, 3, 4, 2.5, 3.5, 3.5, 3.5, 2, 3.5, 2.5, 2.5, 3, 2, 3, 4, 3, 3, 3, 2,
    4, 3.5, 3, 3, 2.5, 3, 3.5, 3, 3, 4, 2.5, 2, 4, 3, 3, 2, 2, 3, 3, 4, 1, 2.5, 4,
    1, 3, 3, 2, 4, 2.5, 3.5, 3, 2, 3, 2.5, 3, 3, 2, 2, 4, 1.5, 2.5, 2.5, 2, 3, 3,
    4, 3, 3, 3, 2.5, 3, 3, 3, 3.5, 2, 2.5, 3, 3.5, 3, 2, 3, 3.5, 3, 1.5, 3, 4, 3,
    3.5, 3.5, 3, 2.5, 3, 3, 3, 3.5, 3.5, 1.5, 3, 2.5, 3.5, 2, 2, 3.5, 2, 3.5, 3,
    3, 2, 3, 1.5, 3, 3, 4, 2, 2.5, 4, 2, 4, 3.5, 4, 2, 3, 3, 1.5, 3, 3, 3, 2, 3,
    3.5, 1.5, 2.5, 3.5, 3, 3.5, 3.5, 3, 3, 2, 2.5, 2, 1.5, 3.5, 1.5, 3, 2.5, 2.5,
    3, 2, 2, 2, 3.5, 4, 3.5, 2.5, 3.5, 3, 3, 4, 2.5, 2, 3, 3, 2, 4, 4, 2, 2.5, 1,
    3.5, 4, 3, 3, 4, 2.5, 2, 3, 1.5, 2.5, 4, 2, 3.5, 3, 4, 3, 3, 3, 3, 3, 2, 2, 2,
    3, 3, 4, 2, 3.5, 3, 3.5, 2, 2, 2, 3, 3, 3, 3, 3.5, 2, 3, 2, 4, 2, 3, 3, 2.5,
    2, 2.5, 4, 3, 2.5, 2, 3, 2.5, 2, 4, 3.5, 3, 2.5, 3, 2.5, 2.5, 3.5, 2.5, 2.5,
    2.5, 3.5, 4, 3, 2, 3, 3, 4, 2.5, 2.5, 4, 3, 1.5, 2, 2.5, 1.5, 3.5, 4, 3, 1.5,
    1.5, 3, 4, 3, 2.5, 4, 2, 2.5, 3, 2.5, 3, 3.5, 2, 3, 3, 2.5
  ),
  x3 = c(5L, 5L, 5L, 6L, 4L, 6L, 3L, 6L, 2L, 5L, 5L, 5L, 5L, 1L, 5L,
         6L, 6L, 3L, 4L, 6L, 3L, 6L, 3L, 1L, 5L, 6L, 6L, 3L, 6L, 3L, 6L,
         3L, 5L, 4L, 6L, 5L, 5L, 5L, 4L, 4L, 3L, 5L, 3L, 5L, 5L, 5L, 5L,
         5L, 6L, 3L, 5L, 3L, 3L, 2L, 5L, 3L, 5L, 3L, 5L, 4L, 5L, 5L, 1L,
         3L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 4L,
         4L, 5L, 4L, 6L, 6L, 5L, 6L, 6L, 5L, 1L, 6L, 5L, 4L, 5L, 5L, 5L,
         5L, 5L, 6L, 2L, 6L, 4L, 5L, 2L, 5L, 4L, 4L, 3L, 2L, 6L, 4L, 4L,
         6L, 2L, 4L, 3L, 6L, 4L, 5L, 5L, 5L, 3L, 6L, 5L, 5L, 6L, 5L, 5L,
         6L, 3L, 6L, 2L, 2L, 3L, 3L, 5L, 6L, 5L, 4L, 1L, 6L, 2L, 2L, 5L,
         2L, 3L, 5L, 1L, 3L, 5L, 4L, 5L, 2L, 5L, 4L, 5L, 3L, 5L, 4L, 6L,
         1L, 5L, 6L, 5L, 5L, 6L, 3L, 4L, 2L, 2L, 5L, 5L, 5L, 6L, 5L, 4L,
         1L, 3L, 3L, 4L, 6L, 5L, 4L, 4L, 3L, 5L, 4L, 3L, 3L, 1L, 3L, 6L,
         1L, 5L, 5L, 5L, 6L, 3L, 5L, 3L, 2L, 4L, 5L, 5L, 3L, 4L, 2L, 6L,
         4L, 6L, 4L, 6L, 5L, 4L, 5L, 5L, 2L, 4L, 4L, 2L, 3L, 5L, 4L, 2L,
         5L, 5L, 5L, 6L, 3L, 4L, 6L, 5L, 5L, 2L, 6L, 5L, 5L, 5L, 5L, 5L,
         3L, 5L, 3L, 5L, 5L, 5L, 6L, 5L, 3L, 3L, 5L, 5L, 6L, 3L, 5L, 4L,
         6L, 2L, 5L, 5L, 6L, 4L, 2L, 5L, 4L, 4L, 3L, 2L, 5L, 3L, 6L, 1L,
         4L, 5L, 3L, 3L, 5L, 6L, 5L, 3L, 6L, 5L, 4L, 3L, 4L, 4L, 5L, 3L,
         1L, 5L, 1L, 2L, 6L, 4L, 6L, 6L, 6L, 6L, 5L, 5L, 2L, 5L, 3L, 4L,
         6L, 3L, 5L, 5L, 4L, 5L, 5L, 3L, 4L, 4L, 5L, 5L, 6L, 2L, 3L, 6L,
         4L, 5L, 5L, 3L, 6L, 5L, 6L, 3L, 5L, 4L, 3L, 6L, 5L, 5L, 5L, 4L,
         5L, 6L, 4L, 4L, 4L, 5L, 5L, 3L, 3L, 5L, 4L, 5L, 6L, 2L, 2L, 3L,
         2L, 5L, 1L, 5L, 5L, 3L, 3L, 5L, 5L, 6L, 6L, 6L, 5L, 6L, 5L, 5L,
         5L, 2L, 5L, 5L, 3L, 5L, 5L, 2L, 5L, 6L, 3L, 3L, 4L, 4L, 2L, 3L,
         5L, 5L, 5L, 5L, 6L, 1L, 3L, 4L, 3L, 5L, 4L, 2L, 5L, 5L, 5L, 3L,
         6L, 5L, 6L, 6L, 6L, 3L, 5L, 2L, 5L, 4L, 4L, 3L, 2L, 5L, 6L, 2L,
         4L, 5L, 4L, 5L, 6L, 4L, 5L, 3L, 2L, 5L, 6L, 5L, 3L, 3L, 4L, 6L,
         3L, 1L, 1L, 5L, 5L, 1L, 5L, 5L, 3L, 4L, 6L, 5L, 3L, 6L, 6L, 5L,
         4L, 5L, 6L, 6L, 5L, 5L, 4L, 6L, 6L, 4L, 4L, 4L, 4L, 5L, 6L, 3L,
         5L, 6L, 5L, 3L, 5L, 5L, 5L, 5L, 6L, 5L, 4L, 1L, 5L, 5L, 5L, 4L,
         3L, 6L, 5L, 3L, 3L, 4L, 4L, 5L, 6L, 5L, 4L, 3L, 3L, 6L, 5L, 3L,
         1L, 4L, 3L, 2L, 3L)
)

out_optimal <- matchit(
  data = data,
  formula = treatment_group ~ x1 + x2 + x3,
  method = "optimal"
)

out_nearest <- matchit(
  data = data,
  formula = treatment_group ~ x1 + x2 + x3,
  method = "nearest"
)

out_genetic <- matchit(
  data = data,
  formula = treatment_group ~ x1 + x2 + x3,
  method = "genetic"
)

summary(out_optimal) # Std. Mean Diff.s go from -.05, -.05, +.01 (all data) to -.03, +.00, -.02 (matched data)
summary(out_nearest) # Std. Mean Diff.s go from -.05, -.05, +.01 (all data) to -.51, -.50, +.05 (matched data)
summary(out_genetic) # Std. Mean Diff.s go from -.05, -.05, +.01 (all data) to -.51, -.50, +.05 (matched data)
r matching matchit
1个回答
0
投票

当处理单元多于对照单元时,这些匹配方法效果不佳。如果将估计值更改为 ATC(这有效地切换了处理组和对照组),则所有方法都效果良好。这是因为最近邻匹配(以及遗传匹配,只是最近邻匹配)默认从倾向得分最大的到最小的进行匹配。这意味着最难匹配的处理单元首先匹配,最容易匹配的最后一个,当处理单元多于控制单元时,最容易匹配的处理单元根本不会匹配。

使用

m.order = "closest"
进行最佳匹配和最近邻匹配不会遇到此问题,因为最容易匹配的单元会首先匹配。当您设置
estimand = "ATC"
时,所有控制单元都会收到匹配,包括最容易匹配和最难匹配的控制单元。

或者,当您的治疗单位多于对照单位时,您可以进行替换匹配(最近邻匹配或遗传匹配)。设置

replace = TRUE
可以实现这一点并产生出色的平衡。

© www.soinside.com 2019 - 2024. All rights reserved.