我在 Stata 中有一个数据集,如下所示:
数学分数 | 识字_分数 | 学生_性别 | 地点 |
---|---|---|---|
45.1 | 67.1 | 男孩 | 农村 |
32.6 | 45.2 | 女孩 | 城市 |
65.2 | 87.02 | 女孩 | 城市 |
34.2 | 66.7 | 男孩 | 农村 |
56.9 | 37.8 | 男孩 | 农村 |
79.3 | 45.8 | 男孩 | 城市 |
等等。实际数据集编码为 Girl==0、Boy==1、Rural==0 和 Urban==1
我想生成条形图,其中包含按性别和位置划分的识字得分平均值。除了条形图之外,我还想要置信区间,但数据没有下限值和上限值。
我确实有一个可以工作的代码,但它非常冗长,并且会生成额外的数据集,从而产生混乱。请告诉我是否有更简单的方法来做到这一点。谢谢
foreach x of math_score literacy_score {
foreach var in Location student_gender {
preserve
statsby mean_`x' =r(mean) upper=r(ub) lower=r(lb), by(`var') saving(`var', replace) : ci mean `x'
restore
}
preserve
use student_gender, clear
append using Location
gen xgroup = ceil(_n/2)
label def xgroup 1 student_gender 2 Location
label val xgroup xgroup
gen xaxis = xgroup + _n - 1
label def xaxis 1 "`: label (student_gender) 0'", add
label def xaxis 2 "`: label (student_gender) 1'", add
label def xaxis 4 "`: label (Location) 0'", add
label def xaxis 5 "`: label (Location) 1'", add
label val xaxis xaxis
drop student_gender Location
list
gen mylabel = string(mean_`x', "%9.1f")
local title=""
if `"`x'"' == "math_student_knowledge" loc title `"title(Math Student Knowledge Score (0-100))"'
if `"`x'"' == "literacy_student_knowledge" loc title `"title(Literacy Student Knowledge Score (0-100))"'
local ytitle=""
if `"`x'"' == "math_student_knowledge" loc ytitle `"ytitle(Mean Math Knowledge Score)"'
if `"`x'"' == "literacy_student_knowledge" loc ytitle `"ytitle(Mean Literacy Knowledge Score)"'
twoway (bar mean_`x' xaxis, color(ltblue)) || scatter mean_`x' xaxis, msymbol(none) mlabel(mylabel) ///
mlabposition(1) mlabs(small) || (rcap lower upper xaxis, color(black)) , xla(1/2 4/5 , valuelabel ///
noticks labsize(small)) `ytitle' xmla("", tlc(none)) xsc(r(0 6)) xtitle("Student Features", size(medsmall)) legend(off) ///
`title' ylab(0(25)100) name(`x'_2, replace) scheme(s2color8)
graph export "$output/`x'_gen_loc.jpg" , as(jpg) width(4000) replace
restore
问题是显示两个变量的均值和置信区间,每个变量由两个二元预测变量组成。你的问题表明你了解大部分机械。这展示了一些额外的小技巧。
foreach v in math write {
use https://stats.idre.ucla.edu/stat/stata/notes/hsb2, clear
statsby , by(female schtyp) saving(`v', replace) : ci means `v'
}
use math
gen which = "Mathematics"
append using write
replace which = "Writing" if missing(which)
egen xaxis = seq(), to(4)
label def xaxis 1 `" "male" "public" "' 2 `" "male" "private" "' 3 `" "female" "public" "' 4 `" "female" "private" "'
label val xaxis xaxis
set scheme stcolor
twoway scatter mean xaxis, ms(D) || rcap ub lb xaxis, xla(1/4, valuelabel tlc(none)) xsc(r(0.5 4.5)) xtitle("") by(which, note("") legend(off)) ytitle(Score)