我有这个脚本,可以用 R 语言从 GBIF.org 下载某个物种的观测数据。然后,将数据绘制在欧洲地图上,以便制作物种分布图。
# PACKAGES
library(sf)
library(rgbif)
library(dismo)
library(tidyverse)
library(eurostat)
library(leaflet)
library(sf)
library(scales)
library(cowplot)
library(ggthemes)
library(ggplot2)
# GBIF EXAMPLE
# Get data from GBIF
data <- gbif("Psilocybe", "semilanceata", geo = TRUE)
# Filter out rows with missing coordinates
data_filtered <- data[complete.cases(data$lon, data$lat), ]
# Create a spatial dataframe
data_sf <- st_as_sf(data_filtered, coords = c("lon", "lat"), crs = 4326)
# MAP
# Get map data
get_eurostat_geospatial(resolution = 10,
nuts_level = 0,
year = 2016)
SHP_0 <- get_eurostat_geospatial(resolution = 10,
nuts_level = 0,
year = 2016)
# PLOT OBSERVATIONS + MAP
# Simple
species_plot <-ggplot() +
geom_sf() +
geom_sf(data = SHP_0, color = 'black', fill = 'grey90',linewidth = 0.20) +
scale_x_continuous(limits = c(-10, 30)) +
scale_y_continuous(limits = c(35, 70)) +
geom_sf(data = data_sf, color = "cornflowerblue", size = 1.2, alpha = 0.25) +
theme_void()
# Save the ggplot as a .jpg file with the species name
species_name <- "Psilocybe_semilanceata"
ggsave(paste0(species_name, ".jpg"), plot = species_plot, width = 10, height = 8, units = "in", dpi = 300)
我有一个包含数百个物种的完整数据集,我想为其制作分布图。因此,我加载了带有拉丁物种名称的数据集:
> str(bird_names)
chr [1:280] "Prunella modularis" "Myiopsitta monachus" ...
> head(bird_names)
[1] "Prunella modularis" "Myiopsitta monachus" "Pyrrhura perlata"
[4] "Tyto alba" "Panurus biarmicus" "Merops apiaster"
因此,我用 for 循环重写了第一个脚本,以从 GBIF.org 下载所有物种数据并绘制它:
library(sf)
library(rgbif)
library(dismo)
library(tidyverse)
library(eurostat)
library(leaflet)
library(sf)
library(scales)
library(cowplot)
library(ggthemes)
library(ggplot2)
# Create a directory for storing bird maps
dir.create("BIRD_MAPS", showWarnings = FALSE)
# Your existing packages and GBIF example...
# Loop through each bird species
for (species in bird_names) {
# Split the species name into genus and species
species_parts <- strsplit(species, " ")[[1]]
genus <- species_parts[1]
species_name <- species_parts[2]
# Get data from GBIF for the current species
data <- gbif(genus, species_name, geo = TRUE)
# Filter out rows with missing coordinates
data_filtered <- data[complete.cases(data$lon, data$lat), ]
# Create a spatial dataframe
data_sf <- st_as_sf(data_filtered, coords = c("lon", "lat"), crs = 4326)
# Get map data
SHP_0 <- get_eurostat_geospatial(resolution = 10, nuts_level = 0, year = 2016)
# Plot observations + map
species_plot <- ggplot() +
geom_sf() +
geom_sf(data = SHP_0, color = 'black', fill = 'grey90', linewidth = 0.20) +
scale_x_continuous(limits = c(-10, 30)) +
scale_y_continuous(limits = c(35, 70)) +
geom_sf(data = data_sf, color = "cornflowerblue", size = 1.2, alpha = 0.25) +
theme_void()
# Save the ggplot as a .jpg file with the species name in BIRD_MAPS directory
ggsave(paste0("BIRD_MAPS/", gsub(" ", "_", species), ".jpg"), plot = species_plot, width = 10, height = 8, units = "in", dpi = 300)
}
但是,我收到了这个错误:
2790578 records found
Error in gbif(genus, species_name, geo = TRUE) :
The number of records is larger than the maximum for download via this service (200,000)
有可能克服这个问题吗?我收到错误是否是因为脚本想要立即下载所有观察结果而不是一个物种的所有观察结果?我只想要欧洲的观察结果,这样这可能是过滤掉一些多余观察结果的一种方法。
正如
L Tyrone
提到的,您可以使用rgbif
包。下面是我的标准工作流程示例,用于获取物种的 GBIF 出现次数(包括同义词)。请随意使用它作为指南:
occ_count
:spc <- "Cynodon dactylon (L.) Pers."
output_file <- "cynodon_dactylon.csv"
if(!file.exists(output_file)) {
a <- rgbif::name_backbone(name = spc) |>
subset(select = c("usageKey", "species", "scientificName", "status")) |>
dplyr::mutate(acceptedKey = usageKey)
b <- rgbif::name_usage(key = a$usageKey, data = "synonyms")$data |>
subset(select = c("key", "species", "scientificName", "taxonomicStatus", "acceptedKey")) |>
dplyr::rename(c(usageKey = key, status = taxonomicStatus))
a <- a |>
rbind(b)
for (i in 1:nrow(a)) {
key <- as.integer(a[i,"usageKey"])
occCount <- rgbif::occ_count(taxonKey = key)
a[a$usageKey == key, "occCount"] <- occCount
message(i, " ", a[i, "scientificName"], ", count = ", occCount)
}
readr::write_csv(a, file = output_file)
} else {
a <- readr::read_csv(file = output_file, show_col_types = FALSE)
}
rgbif
使用GBIF API,因此您必须在GBIF上创建一个帐户。if(file.exists(output_file)) {
a <- readr::read_csv(file = output_file, show_col_types = FALSE)
b <- a |>
subset(occCount >= 1L)
for (i in 1:nrow(b)) {
if("occDownloadKey" %in% names(b) && is.na(b[i, "occDownloadKey"])) {
message(i, " ", b[i, "scientificName"])
calKey <- as.integer(b[i, "usageKey"])
res <- rgbif::occ_download(
rgbif::pred('taxonKey', calKey),
rgbif::pred('hasCoordinate', TRUE),
user = "your_user_name",
pwd = "your_password",
email = "your_email"
)
repeat {
ala <- rgbif::occ_download_meta(res)
message(paste(calKey, Sys.time(), ala$status, sep = " - "))
if (ala$status == "SUCCEEDED") {
Sys.sleep(3)
data <- rgbif::occ_download_get(key = ala$key, path = "data/gbif", overwrite = FALSE)
a[a$usageKey == calKey, "occDownloadKey"] <- ala$key
break
}
Sys.sleep(60)
}
} else {
message(i, " ", b[i, "scientificName"], " ", b[i, "occDownloadKey"])
}
}
readr::write_csv(a, file = output_file)
}
CoordinateCleaner
包来清理数据,例如:a <- readr::read_csv(file = output_file, show_col_types = FALSE)
b <- a |>
subset(!is.na(occDownloadKey))
d <- data.frame()
for (i in 1:nrow(b)) {
message(i, " of ", nrow(b), " - ", b[i, "scientificName"])
dd <- rgbif::occ_download_import(rgbif::as.download(paste0("data/gbif/",b[i,"occDownloadKey"],".zip"))) |>
subset(select = c(gbifID, species, decimalLongitude, decimalLatitude, countryCode, individualCount,
family, taxonRank, coordinateUncertaintyInMeters, year,
basisOfRecord, institutionCode, datasetName)) |>
subset(!is.na(decimalLongitude) & !is.na(decimalLatitude))
dd$gbifID <- as.character(dd$gbifID)
d <- dd |>
rbind(d)
}
d$countryCode <- countrycode::countrycode(d$countryCode, origin = 'iso2c', destination = 'iso3c')
d <- data.frame(d)
rf <- CoordinateCleaner::clean_coordinates(d,
lon = "decimalLongitude",
lat = "decimalLatitude",
countries = "countryCode",
species = "species",
tests = c("capitals", "centroids",
"equal", "gbif",
"institutions",
"outliers", "seas",
"zeros", "countries")
)
rf <- rf |>
sf::st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), crs = "EPSG:4326") |>
subset(.summary == TRUE)