我已经使用rbind
将两个数据集合并为一个828 x 5数据帧,称为vegetation。这是我当前数据框的一个示例:
site year sos eos vegetation
EPIC_alligatorriver.csv 2016 176 301
EPIC_alligatorriver.csv 2018 164 291
Landsat_alligatorriver.csv 2016 170 303
Pheno_alligatorriver.csv 2017 152 288
Landsat_NEON.BART42.csv 2017 115 290
Pheno_NEON.BART42.csv 2017 120 290
alligatorriver.csv NA NA NA deciduous broadleaf
NEON.BART42.csv NA NA NA mixed forest
这是我需要的
site year sos eos vegetation
EPIC_alligatorriver.csv 2016 176 301 deciduous broadleaf
EPIC_alligatorriver.csv 2018 164 291 deciduous broadleaf
Landsat_alligatorriver.csv 2016 170 303 deciduous broadleaf
Pheno_alligatorriver.csv 2017 152 288 deciduous broadleaf
Landsat_NEON.BART42.csv 2017 115 290 mixed forest
Pheno_NEON.BART42.csv 2017 120 290 mixed forest
alligatorriver.csv NA NA NA deciduous broadleaf
NEON.BART42.csv NA NA NA mixed forest
基本上,我需要通过根据站点名称匹配正确的植被类型来填写空白植被列。
我还可以删除框线并将所有植被数据保存在完全不同的两列数据框中
site vegetation
alligatorriver.csv deciduous broadleaf
konza.csv grassland
merbleue.csv wetland
NEON.BART42.csv mixed forest
如果这样更容易。共有7种植被类型,三年内(2016年,2017年,2018年)有99个地点。任何帮助表示赞赏!
df1 (partial): structure(list(site = c("EPIC_alligatorriver.csv", "EPIC_alligatorriver.csv",
"EPIC_alligatorriver.csv", "EPIC_arbutuslakeinlet.csv", "EPIC_arbutuslakeinlet.csv",
"EPIC_arbutuslakeinlet.csv", "EPIC_archboldavir.csv", "EPIC_archboldavir.csv",
"EPIC_archboldavir.csv", "EPIC_archboldavirx.csv"), year = c(2016L, 2017L, 2018L, 2016L, 2017L, 2018L, 2016L,
2017L, 2018L, 2016L), sos = c(117, 111, 122, 147, 145, 144, 98, 156,
114, 98), eos = c(294, 294, 274, 276, 271, 274, 315, 295, 307, 315), vegetation = c("", "", "", "", "", "", "", "", "", "")), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), class = "data.frame")
df2: structure(list(vegetation = structure(c(2L, 2L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 6L, 3L, 2L, 4L, 2L, 2L, 3L, 4L, 2L, 3L, 2L,
1L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 6L, 6L, 4L, 1L, 4L, 1L, 4L, 2L,
3L, 2L, 4L, 7L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 7L, 2L, 4L, 7L, 2L,
4L, 2L, 2L, 5L, 2L, 4L, 2L, 5L, 2L, 5L, 2L, 6L, 2L, 5L, 3L, 2L,
2L, 5L, 2L, 4L, 4L, 2L, 4L, 2L, 2L, 2L, 1L, 2L, 2L, 4L, 6L, 2L,
1L, 4L, 2L, 1L, 3L, 2L, 2L, 4L, 3L, 2L, 3L, 2L, 2L), .Label = c("agriculture",
"deciduous broadleaf", "evergreen needlefeaf", "grassland", "mixed forest",
"shrub", "wetland"), class = "factor"), site = structure(1:99, .Label = c("alligatorriver",
"arbutuslakeinlet", "archboldavir", "archboldavirx", "archboldpnot",
"archboldpnotx", "arsmnswanlake1", "ashburnham", "bartlettir",
"bostoncommon", "bullshoals", "burnssagebrush", "canadaOBS",
"caryinstitute", "cperuvb", "downerwoods", "dukehw", "goodnow",
"grandteton", "harvard", "harvardbarn", "harvardbarn2", "harvardfarmsouth",
"harvardhemlock", "harvardhemlock2", "harvardlph", "howland1",
"howland2", "hubbardbrook", "huyckpreserveny", "jerbajada", "jernort",
"kansas", "kelloggcorn", "kendall", "kingmanfarm", "konza", "lacclair",
"laclaflamme", "laurentides", "lethbridge", "lostcreek", "luckyhills",
"mandanh5", "mandani2", "mead1", "mead2", "mead3", "meadpasture",
"merbleue", "missouriozarks", "montebondonegrass", "montebondonepeat",
"morganmonroe", "nationalelkrefuge", "ncssm", "NEON.BART33",
"NEON.BART42", "NEON.DELA33", "NEON.DSNY33", "NEON.HARV33", "NEON.HARV42",
"NEON.JERC33", "NEON.JERC42", "NEON.LENO33", "NEON.ONAQ33", "NEON.ORNL33",
"NEON.ORNL42", "NEON.RMNP33", "NEON.SERC33", "NEON.TREE33", "NEON.TREE42",
"NEON.UNDE33", "NEON.WOOD33", "ninemileprairie", "northattkeboroma",
"oakville", "proctor", "queens", "readingma", "rosemountnprs",
"russellsage", "sanford", "sevilletagrass", "sevilletashrub",
"shalehillsczo", "southerngreatplains", "stjones", "sweetbriar",
"sweetbriargrass", "sylvania", "tonzi", "umichbiological2", "usgseros",
"usmpj", "uwmfieldsta", "warrenwilson", "willowcreek", "worcester"
), class = "factor")), class = "data.frame", row.names = c(NA,
-99L))
[这可以使用match()
来完成,使用两个数据帧中的站点列,并使用sub()
从第一个data.frame中删除前缀:
df$vegetation <- df2$vegetation[match(sub("^.*_", "", df$site), df2$site)]
df
site year sos eos vegetation
1 EPIC_alligatorriver.csv 2016 176 301 deciduous broadleaf
2 EPIC_alligatorriver.csv 2018 164 291 deciduous broadleaf
3 Landsat_alligatorriver.csv 2016 170 303 deciduous broadleaf
4 Pheno_alligatorriver.csv 2017 152 288 deciduous broadleaf
5 Landsat_NEON.BART42.csv 2017 115 290 mixed forest
6 Pheno_NEON.BART42.csv 2017 120 290 mixed forest
数据:
df <- structure(list(site = c("EPIC_alligatorriver.csv", "EPIC_alligatorriver.csv",
"Landsat_alligatorriver.csv", "Pheno_alligatorriver.csv", "Landsat_NEON.BART42.csv",
"Pheno_NEON.BART42.csv"), year = c(2016L, 2018L, 2016L, 2017L,
2017L, 2017L), sos = c(176L, 164L, 170L, 152L, 115L, 120L), eos = c(301L,
291L, 303L, 288L, 290L, 290L), vegetation = c("deciduous broadleaf",
"deciduous broadleaf", "deciduous broadleaf", "deciduous broadleaf",
"mixed forest", "mixed forest")), row.names = c(NA, -6L), class = "data.frame")
df2 <- structure(list(site = c("alligatorriver.csv", "konza.csv", "merbleue.csv",
"NEON.BART42.csv"), vegetation = c("deciduous broadleaf", "grassland",
"wetland", "mixed forest")), class = "data.frame", row.names = c(NA,
-4L))