我是网络抓取的新手,想从 https://www.forwardpathway.com/us-college-database 抓取数据。我使用以下代码从表中提取数据,但在我单击下一步按钮后页面一直在加载。谁能指出哪里出了问题?
library(RSelenium)
library(tidyverse)
library(netstat)
library(xml2)
library(data.table)
library(rvest)
binman::list_versions("chromedriver")
rs_driver_object<-rsDriver(browser="chrome",
chromever="107.0.5304.62",
verbose=F,
port=free_port())
## create the client
remDr<-rs_driver_object$client
## open the brower
remDr$open()
remDr$navigate("https://www.forwardpathway.com/us-college-database")
## locate the table that stores the data
data_table<-remDr$findElement(using = "id","table_1")
#And I tried three different methods to click the next button, but the problem persisted.
## next button method 1
next_button<-remDr$findElement(using = "id",'table_1_next')
next_button$clickElement()
## next button method 2
remDr$executeScript("document.getElementById('table_1_next').click()")
## next button method 3
next_button <- remDr$findElement("id", "table_1_next")
next_button$sendKeysToElement(list(key="enter"))
all_data<-list()
cond<-TRUE
while(cond == TRUE){
data_table_html<-data_table$getPageSource()
page<-read_html(data_table_html %>% unlist())
df<-html_table(page) %>% .[[1]]
all_data<-rbindlist((list(all_data,df)))
Sys.sleep(5)
tryCatch(
{next_button <- remDr$findElement("id", "table_1_next")
next_button$sendKeysToElement(list(key="enter"))
},
error=function(e){
print("script complete")
cond<<-FALSE
}
)
if (cond ==FALSE){
break
}
}
我已经能够使用以下代码单击下一步按钮并更改页面。
library(RSelenium)
shell('docker run -d -p 4446:4444 selenium/standalone-firefox')
remDr <- remoteDriver(remoteServerAddr = "localhost", port = 4446L, browserName = "firefox")
remDr$open()
remDr$navigate("https://www.forwardpathway.com/us-college-database")
for(i in 1 : 200)
{
print(i)
java_Script <- paste0("scroll(0,", i * 20, ")")
remDr$executeScript(java_Script)
}
remDr$screenshot(TRUE)
web_Obj_Next <- remDr$findElement('xpath', '//*[@id="table_1_next"]')
web_Obj_Next$clickElement()
remDr$screenshot(TRUE)
for(i in 1 : 200)
{
print(i)
java_Script <- paste0("scroll(0,", i * 20, ")")
remDr$executeScript(java_Script)
}
remDr$screenshot(TRUE)