RSelenium 单击下一步按钮后网页继续加载

问题描述 投票:0回答:1

我是网络抓取的新手,想从 https://www.forwardpathway.com/us-college-database 抓取数据。我使用以下代码从表中提取数据,但在我单击下一步按钮后页面一直在加载。谁能指出哪里出了问题?

library(RSelenium)
library(tidyverse)
library(netstat)
library(xml2)
library(data.table)
library(rvest)

binman::list_versions("chromedriver")
rs_driver_object<-rsDriver(browser="chrome",
                          chromever="107.0.5304.62",
                          verbose=F,
                          port=free_port())

## create the client 
remDr<-rs_driver_object$client

## open the brower
remDr$open()

remDr$navigate("https://www.forwardpathway.com/us-college-database")

## locate the table that stores the data
data_table<-remDr$findElement(using = "id","table_1")

#And I tried three different methods to click the next button, but the problem persisted. 

## next button method 1
next_button<-remDr$findElement(using = "id",'table_1_next')
next_button$clickElement()

## next button method 2
remDr$executeScript("document.getElementById('table_1_next').click()")

## next button method 3
next_button <- remDr$findElement("id", "table_1_next")
next_button$sendKeysToElement(list(key="enter"))

all_data<-list()
cond<-TRUE

while(cond == TRUE){
  data_table_html<-data_table$getPageSource()
  page<-read_html(data_table_html %>% unlist())
  df<-html_table(page) %>% .[[1]]
  all_data<-rbindlist((list(all_data,df)))
  
  Sys.sleep(5)
  
  tryCatch(
    {next_button <- remDr$findElement("id", "table_1_next")
    next_button$sendKeysToElement(list(key="enter"))
    },
    error=function(e){
      print("script complete")
      cond<<-FALSE
    }
  )
  
  if (cond ==FALSE){
    break
  }
  
}
r web-scraping rvest
1个回答
0
投票

我已经能够使用以下代码单击下一步按钮并更改页面。

library(RSelenium)

shell('docker run -d -p 4446:4444 selenium/standalone-firefox')
remDr <- remoteDriver(remoteServerAddr = "localhost", port = 4446L, browserName = "firefox")
remDr$open()
remDr$navigate("https://www.forwardpathway.com/us-college-database")

for(i in 1 : 200)
{
  print(i)
  java_Script <- paste0("scroll(0,", i * 20, ")")
  remDr$executeScript(java_Script)
}

remDr$screenshot(TRUE)

web_Obj_Next <- remDr$findElement('xpath', '//*[@id="table_1_next"]')
web_Obj_Next$clickElement()

remDr$screenshot(TRUE)

for(i in 1 : 200)
{
  print(i)
  java_Script <- paste0("scroll(0,", i * 20, ")")
  remDr$executeScript(java_Script)
}

remDr$screenshot(TRUE)

© www.soinside.com 2019 - 2024. All rights reserved.