空间观测下载循环错误

Question

我有这个脚本，可以用 R 语言从 GBIF.org 下载某个物种的观测数据。然后，将数据绘制在欧洲地图上，以便制作物种分布图。

# PACKAGES

library(sf)
library(rgbif)
library(dismo)
library(tidyverse)
library(eurostat)
library(leaflet)
library(sf)
library(scales)
library(cowplot)
library(ggthemes)
library(ggplot2)

# GBIF EXAMPLE
  # Get data from GBIF
  data <- gbif("Psilocybe", "semilanceata", geo = TRUE)

  # Filter out rows with missing coordinates
  data_filtered <- data[complete.cases(data$lon, data$lat), ]

  # Create a spatial dataframe
  data_sf <- st_as_sf(data_filtered, coords = c("lon", "lat"), crs = 4326)


# MAP
  # Get map data
  get_eurostat_geospatial(resolution = 10, 
                        nuts_level = 0, 
                        year = 2016)

  SHP_0 <- get_eurostat_geospatial(resolution = 10, 
                                 nuts_level = 0, 
                                 year = 2016)
# PLOT OBSERVATIONS + MAP
# Simple  
  species_plot <-ggplot() +
    geom_sf() +
    geom_sf(data = SHP_0, color = 'black', fill = 'grey90',linewidth = 0.20) +
    scale_x_continuous(limits = c(-10, 30)) +
    scale_y_continuous(limits = c(35, 70)) + 
    geom_sf(data = data_sf, color = "cornflowerblue", size = 1.2, alpha = 0.25) +
    theme_void()
  # Save the ggplot as a .jpg file with the species name
  species_name <- "Psilocybe_semilanceata"
  ggsave(paste0(species_name, ".jpg"), plot = species_plot, width = 10, height = 8, units = "in", dpi = 300)

我有一个包含数百个物种的完整数据集，我想为其制作分布图。因此，我加载了带有拉丁物种名称的数据集：

> str(bird_names)
 chr [1:280] "Prunella modularis" "Myiopsitta monachus" ...
> head(bird_names)
[1] "Prunella modularis"  "Myiopsitta monachus" "Pyrrhura perlata"   
[4] "Tyto alba"           "Panurus biarmicus"   "Merops apiaster"

因此，我用 for 循环重写了第一个脚本，以从 GBIF.org 下载所有物种数据并绘制它：

library(sf)
library(rgbif)
library(dismo)
library(tidyverse)
library(eurostat)
library(leaflet)
library(sf)
library(scales)
library(cowplot)
library(ggthemes)
library(ggplot2)


# Create a directory for storing bird maps
dir.create("BIRD_MAPS", showWarnings = FALSE)

# Your existing packages and GBIF example...

# Loop through each bird species
for (species in bird_names) {
  # Split the species name into genus and species
  species_parts <- strsplit(species, " ")[[1]]
  genus <- species_parts[1]
  species_name <- species_parts[2]
  
  # Get data from GBIF for the current species
  data <- gbif(genus, species_name, geo = TRUE)
  
  # Filter out rows with missing coordinates
  data_filtered <- data[complete.cases(data$lon, data$lat), ]
  
  # Create a spatial dataframe
  data_sf <- st_as_sf(data_filtered, coords = c("lon", "lat"), crs = 4326)
  
  # Get map data
  SHP_0 <- get_eurostat_geospatial(resolution = 10, nuts_level = 0, year = 2016)
  
  # Plot observations + map
  species_plot <- ggplot() +
    geom_sf() +
    geom_sf(data = SHP_0, color = 'black', fill = 'grey90', linewidth = 0.20) +
    scale_x_continuous(limits = c(-10, 30)) +
    scale_y_continuous(limits = c(35, 70)) + 
    geom_sf(data = data_sf, color = "cornflowerblue", size = 1.2, alpha = 0.25) +
    theme_void()
  
  # Save the ggplot as a .jpg file with the species name in BIRD_MAPS directory
  ggsave(paste0("BIRD_MAPS/", gsub(" ", "_", species), ".jpg"), plot = species_plot, width = 10, height = 8, units = "in", dpi = 300)
}

但是，我收到了这个错误：

2790578 records found
Error in gbif(genus, species_name, geo = TRUE) : 
  The number of records is larger than the maximum for download via this service (200,000)

有可能克服这个问题吗？我收到错误是否是因为脚本想要立即下载所有观察结果而不是一个物种的所有观察结果？我只想要欧洲的观察结果，这样这可能是过滤掉一些多余观察结果的一种方法。

Answer 1

正如

L Tyrone

提到的，您可以使用

rgbif

包。下面是我的标准工作流程示例，用于获取物种的 GBIF 出现次数（包括同义词）。请随意使用它作为指南：

准备辅助数据文件：检查同义词并为它们获取
```
occ_count
```
：

spc <- "Cynodon dactylon (L.) Pers."
output_file <- "cynodon_dactylon.csv"

if(!file.exists(output_file)) {
  a <- rgbif::name_backbone(name = spc) |>
    subset(select = c("usageKey", "species", "scientificName", "status")) |>
    dplyr::mutate(acceptedKey = usageKey)
  
  b <- rgbif::name_usage(key = a$usageKey, data = "synonyms")$data |>
    subset(select = c("key", "species", "scientificName", "taxonomicStatus", "acceptedKey")) |>
    dplyr::rename(c(usageKey = key, status = taxonomicStatus))
  
  a <- a |>
    rbind(b)
  
  for (i in 1:nrow(a)) {
    key <- as.integer(a[i,"usageKey"])
    occCount <- rgbif::occ_count(taxonKey = key)
    a[a$usageKey == key, "occCount"] <- occCount
    message(i, " ", a[i, "scientificName"], ", count = ", occCount)
  }
  readr::write_csv(a, file = output_file)
} else {
  a <- readr::read_csv(file = output_file, show_col_types = FALSE)
}

现在，有了所有同义词及其出现次数，我们就可以下载 GBIF 数据集了。首先，我们仅过滤掉在 GBIF 中至少出现 1 次的物种/同义词。请注意，
```
rgbif
```
使用GBIF API，因此您必须在GBIF上创建一个帐户。

if(file.exists(output_file)) {
  a <- readr::read_csv(file = output_file, show_col_types = FALSE)
  b <- a |>
    subset(occCount >= 1L)
  
  for (i in 1:nrow(b)) {
    if("occDownloadKey" %in% names(b) && is.na(b[i, "occDownloadKey"])) {
      message(i, " ", b[i, "scientificName"])
      calKey <- as.integer(b[i, "usageKey"])
      res <- rgbif::occ_download(
        rgbif::pred('taxonKey', calKey),
        rgbif::pred('hasCoordinate', TRUE),
        user = "your_user_name",
        pwd = "your_password",
        email = "your_email"
      )
      repeat {
        ala <- rgbif::occ_download_meta(res)
        message(paste(calKey, Sys.time(), ala$status, sep = " - "))
        if (ala$status == "SUCCEEDED") {
          Sys.sleep(3)
          data <- rgbif::occ_download_get(key = ala$key, path = "data/gbif", overwrite = FALSE)
          a[a$usageKey == calKey, "occDownloadKey"] <- ala$key
          break
        }
        Sys.sleep(60)
      }
    } else {
      message(i, " ", b[i, "scientificName"], " ", b[i, "occDownloadKey"])
    }
  }
  readr::write_csv(a, file = output_file)
}

下载完所有数据（需要一段时间）后，我们可以进一步进行，我通常调用
```
CoordinateCleaner
```
包来清理数据，例如：

a <- readr::read_csv(file = output_file, show_col_types = FALSE)
b <- a |>
  subset(!is.na(occDownloadKey))

d <- data.frame()
for (i in 1:nrow(b)) {
  message(i, " of ", nrow(b), " - ", b[i, "scientificName"])
  dd <- rgbif::occ_download_import(rgbif::as.download(paste0("data/gbif/",b[i,"occDownloadKey"],".zip"))) |>
    subset(select = c(gbifID, species, decimalLongitude, decimalLatitude, countryCode, individualCount,
           family, taxonRank, coordinateUncertaintyInMeters, year,
           basisOfRecord, institutionCode, datasetName)) |>
    subset(!is.na(decimalLongitude) & !is.na(decimalLatitude))
  
  dd$gbifID <- as.character(dd$gbifID)
  d <- dd |>
    rbind(d)
}

d$countryCode <- countrycode::countrycode(d$countryCode, origin =  'iso2c', destination = 'iso3c')
d <- data.frame(d)

rf <- CoordinateCleaner::clean_coordinates(d,
                                           lon = "decimalLongitude",
                                           lat = "decimalLatitude",
                                           countries = "countryCode",
                                           species = "species",
                                           tests = c("capitals", "centroids", 
                                                     "equal", "gbif", 
                                                     "institutions", 
                                                     "outliers", "seas",
                                                     "zeros", "countries")
)

rf <- rf |>
  sf::st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), crs = "EPSG:4326") |>
  subset(.summary == TRUE)

空间观测下载循环错误

问题描述投票：0回答：1

1个回答

最新问题

空间观测下载循环错误

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1