它有什么意思错误seq.int(0,TO0 - 从通过):在错误的标志“的” r中的说法

问题描述 投票:-1回答:1

下面用于网页代码中使用API​​调用报废网站。我只需要改变起始日期和结束日期得到的数据集,我想。此前它工作正常,奇妙尽的循环,但我没有对html_nodes一些修改后的()的一部分 - 尝试在网页中提取不同的部分,它保持返回我的同日的数据。错误警告“错误seq.int(0,TO0 - 从通过):在错误的标志‘的’r中的说法”。这里应该做些什么?

library(tidyverse)
library(readr)
library(tidyr)
library(dplyr)
library(xlsx)
library(beepr)

get_sounding_data <- function(region = c("naconf", "samer", "pac", "nz", "ant",
                                     "np", "europe", "africa", "seasia", "mideast"),
                          date,
                          from_hr = c("00", "12", "all"),
                          to_hr = c("00", "12", "all"),
                          station_number = 48615) {

  # we use these pkgs (the readr and dplyr dependencies removed)
  suppressPackageStartupMessages({
    require("xml2", quietly = TRUE)
    require("httr", quietly = TRUE)
    require("rvest", quietly = TRUE)
  })

  # validate region
  region <- match.arg(
    arg = region,
    choices = c(
      "naconf", "samer", "pac", "nz", "ant",
      "np", "europe", "africa", "seasia", "mideast"
    )
  )

  # validates the date for us if it's a character string
  date <- as.Date(date)

  # get year and month
  year <- as.integer(format(date, "%Y"))
  stopifnot(year %in% 1973:as.integer(format(Sys.Date(), "%Y")))

  year <- as.character(year)
  month <- format(date, "%m")

# we need these to translate day & *_hr to the param the app needs
c(
"0100", "0112", "0200", "0212", "0300", "0312", "0400", "0412",
"0500", "0512", "0600", "0612", "0700", "0712", "0800", "0812",
"0900", "0912", "1000", "1012", "1100", "1112", "1200", "1212",
"1300", "1312", "1400", "1412", "1500", "1512", "1600", "1612",
"1700", "1712", "1800", "1812", "1900", "1912", "2000", "2012",
"2100", "2112", "2200", "2212", "2300", "2312", "2400", "2412",
"2500", "2512", "2600", "2612", "2700", "2712", "2800", "2812",
"2900", "2912", "3000", "3012", "3100", "3112"
  ) -> hr_vals

c(
"01/00Z", "01/12Z", "02/00Z", "02/12Z", "03/00Z", "03/12Z", "04/00Z",
"04/12Z", "05/00Z", "05/12Z", "06/00Z", "06/12Z", "07/00Z", "07/12Z",
"08/00Z", "08/12Z", "09/00Z", "09/12Z", "10/00Z", "10/12Z", "11/00Z",
"11/12Z", "12/00Z", "12/12Z", "13/00Z", "13/12Z", "14/00Z", "14/12Z",
"15/00Z", "15/12Z", "16/00Z", "16/12Z", "17/00Z", "17/12Z", "18/00Z",
"18/12Z", "19/00Z", "19/12Z", "20/00Z", "20/12Z", "21/00Z", "21/12Z",
"22/00Z", "22/12Z", "23/00Z", "23/12Z", "24/00Z", "24/12Z", "25/00Z",
"25/12Z", "26/00Z", "26/12Z", "27/00Z", "27/12Z", "28/00Z", "28/12Z",
"29/00Z", "29/12Z", "30/00Z", "30/12Z", "31/00Z", "31/12Z"
  ) -> hr_inputs

  hr_trans <- stats::setNames(hr_vals, hr_inputs)

  o_from_hr <- from_hr <- as.character(tolower(from_hr))
  o_to_hr <- to_hr <- as.character(tolower(to_hr))

  if ((from_hr == "all") || (to_hr == "all")) {
  from_hr <- to_hr <- "all"
  } else {

  from_hr <- hr_trans[sprintf("%s/%02dZ", format(date, "%d"), as.integer(from_hr))]
match.arg(from_hr, hr_vals)

 to_hr <- hr_trans[sprintf("%s/%02dZ", format(date, "%d"), as.integer(to_hr))]
match.arg(to_hr, hr_vals)
 }

  # clean up the station number if it was entered as a double
  station_number <- as.character(as.integer(station_number))

# execute the API call
httr::GET(
url = "http://weather.uwyo.edu/cgi-bin/sounding",
query = list(
  region = region,
  TYPE = "TEXT:LIST",
  YEAR = year,
  MONTH = sprintf("%02d", as.integer(month)),
  FROM = from_hr,
  TO = to_hr,
  STNM = station_number
   )
  ) -> res

  # check for super bad errors (that we can't handle nicely)
  httr::stop_for_status(res)

  # get the page content
  doc <- httr::content(res, as="text")

  # if the site reports no data, issue a warning and return an empty data frame
  if (grepl("Can't get", doc)) {
    doc <- xml2::read_html(doc)
    msg <- rvest::html_nodes(doc, "body")
    msg <- rvest::html_text(msg, trim=TRUE)
    msg <- gsub("\n\n+.*$", "", msg)
    warning(msg)
    return(data.frame(stringsAsFactors=FALSE))
   }

  # if the site reports no data, issue a warning and return an empty data frame
  if (grepl("Can't get", doc)) {
    doc <- xml2::read_html(doc)
    msg <- rvest::html_nodes(doc, "body")
    msg <- rvest::html_text(msg, trim=TRUE)
    msg <- gsub("\n\n+.*$", "", msg)
    warning(msg)
    return(data.frame(stringsAsFactors=FALSE))
   }

  # turn it into something we can parse
  doc <- xml2::read_html(doc)

  raw_dat <- doc %>%
    html_nodes("h3+ pre") %>% 
    html_text()

  indices <- raw_dat %>% 
    str_split(pattern = "\n", simplify = T) %>% 
    map_chr(str_squish) %>% 
    tibble(x = .) %>% 
    separate(x, into = c("Station", "Value"), sep = ": ") %>% 
    filter(!is.na(Value))

  data <- tidyr::spread(indices, Station, Value)
  data
    }

startDate <- as.Date("01-11-1979", format="%d-%m-%y")
endDate <- as.Date("31-01-1980",format="%d-%m-%y")

days <- seq(startDate, endDate, "1 day")

lapply(days[1:92], function(day) {
  get_sounding_data(
    region = "seasia",
    date = day,
    from_hr = "00",
    to_hr = "00",
    station_number = "48615"
  )
}) -> soundings_48615

warnings()

new_df <- map(soundings_48615, . %>% mutate_all(parse_guess))
dat <- bind_rows(new_df)
dat <- dat %>% separate(col =`Observation time`, into = c('Date', 'time'), sep = '/')

dat$Date <- as.Date(dat$Date, format = "%y%m%d")

#save in text file
write.csv(dat, 'c:/Users/Hp/Documents/yr/climatology/yr_SoundingIndexLowerPart/1979.csv')

get_sounding_data <- NULL
beep()
r seq
1个回答
1
投票

看来你现在拥有的是错误,是基于日期格式。更具体的时间如下

as.Date("01-11-1979", format="%d-%m-%y")
as.Date("31-01-1980",format="%d-%m-%y")

输出

"2019-11-01"
"2019-01-31"

的r日期/时间格式为国际标准yyyy-mm-dd。这样的"2019-11-01" "2019-01-31"时间明智之后说到。因此这将导致如果它试图在同一时间进行迭代1积极的一天序列失败。该格式是这里的问题,这需要解决有一个非常简单的解决方案。在国际标准的日期格式总是工作,因为这些会被(几乎)所有的程序予以确认。

这样的改变,你的代码的日期序列部分

....
startDate <- as.Date("1979-11-01")
endDate <- as.Date("1980-01-31")

days <- seq(startDate, endDate, "1 day")
....

请注意格式更改。至于为什么它改变了1979至2019年,我不完全肯定,别人可能有这种怪异的行为更加复杂的答案。

© www.soinside.com 2019 - 2024. All rights reserved.