我想用硒从推特上抓取数据。我写了这段代码,但是
page_articles = driver.find_elements(By.XPATH,"//div[@data-testid='tweet']")
行给出了错误。如果这次我将“driver.find_element”更改为“driver.find_elements”search_box = driver.find_element(By.XPATH,"//input[@data-testid='SearchBox_Search_Input']")
行会出错。我会写这两个错误。有人可以帮助我吗?我快要疯了。
如果我写 driver_find_element(By.XPATH,......) --------------------------------- ------------------------------------------ NoSuchElementException Traceback(最后一次调用) 单元格输入 [1],第 60 行 57 滚动 = 真 59 滚动时: ---> 60 page_articles = driver.find_element(By.XPATH,"//div[@data-testid='tweet']") page_articles 中的文章为 61: 62 推文 = get_tweet_data(文章)
如果我写 driver_find_elements(By.XPATH, ......)-------------------------------- ------------------------------------------ NoSuchElementException Traceback(最后一次调用) 单元格输入 [2],第 46 行 43 log_in.点击() 45 睡眠(3) ---> 46 search_box = driver.find_element(By.XPATH,"//input[@data-testid='SearchBox_Search_Input']") 47 search_box.send_keys('Kemal Kılıçdaroğlu') 48 search_box.send_keys(键。返回)
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from time import sleep
from getpass import getpass
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import Chrome
import csv
def get_tweet_data(article):
username = article.find_element(By.XPATH,".//span").text
nickname = article.find_element(By.XPATH,".//span[contains(text(), '@')]").text
try:
postdate = article.find_element(By.XPATH,".//time").get_attribute('datetime')
except NoSuchElementException:
return
tweetText = article.find_element(By.XPATH,".//div[@data-testid='tweetText']").text
reply_count = article.find_element(By.XPATH,".//div[@data-testid='reply']").text
retweet_count = article.find_element(By.XPATH,".//div[@data-testid='retweet']").text
like_count = article.find_element(By.XPATH,".//div[@data-testid='like']").text
tweet = (username, nickname, postdate, tweetText, reply_count, retweet_count, like_count)
return tweet
driver = webdriver.Chrome()
driver.get("https://twitter.com/login")
sleep(3)
username = driver.find_element(By.XPATH,"//input[@name='text']")
username.send_keys("dnmeztolga")
next_button = driver.find_element(By.XPATH,"//span[contains(text(),'İleri')]")
next_button.click()
my_pass = getpass()
sleep(3)
password = driver.find_element(By.XPATH,"//input[@name='password']")
password.send_keys(my_pass)
#password.send_keys(Keys.RETURN)
log_in = driver.find_element(By.XPATH,"//span[contains(text(),'Giriş yap')]")
log_in.click()
sleep(3)
search_box = driver.find_element(By.XPATH,"//input[@data-testid='SearchBox_Search_Input']")
search_box.send_keys('Kemal Kılıçdaroğlu')
search_box.send_keys(Keys.RETURN)
sleep(3)
people = driver.find_element(By.XPATH,"//span[contains(text(),'En Son')]")
people.click()
data = []
tweet_ids = set()
last_position = driver.execute_script("return window.pageYOffset;")
scrolling = True
while scrolling:
page_articles = driver.find_elements(By.XPATH,"//div[@data-testid='tweet']")
for article in page_articles:
tweet = get_tweet_data(article)
if tweet:
tweet_id = ''.join(tweet)
if tweet_id not in tweet_ids:
tweet_ids.add(tweet_id)
data.append(tweet)
scroll_attempt = 0
while True:
driver.execute_script('window.scrollTo(0,document.body.scrollHeight);')
sleep(3)
current_position = driver.execute_script("return window.pageYOffset;")
if last_position == current_position:
scroll_attempt += 1
if scroll_attemps >= 3:
scrolling = False
break
else:
sleep(2)
else:
last_position = current_position
break
我写了我期望的和我尝试的。我是这个平台的新手。任何人都可以帮助我吗?我会很感激。