使用 python 和 selenium 抓取 Twitter 数据

问题描述 投票:0回答:0

我写了这段代码。当我一步一步运行时它会起作用,但我无法自动化我想要的东西。我试着做得更好,但我无法摆脱我的工作。我想让我写的代码继续循环,直到我想要它为止。我尝试了不同的方法,我没有得到任何错误,但是我无法获取数据。我每次都以“csv”格式保存它,但是文件是空的。

import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from time import sleep
from getpass import getpass
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import Chrome
import csv

driver = webdriver.Chrome()
driver.get("https://twitter.com/login")
sleep(3)
username = driver.find_element(By.XPATH,"//input[@name='text']")
username.send_keys("dnmeztolga")
next_button = driver.find_element(By.XPATH,"//span[contains(text(),'İleri')]")
next_button.click()
my_pass = getpass()
sleep(3)
password = driver.find_element(By.XPATH,"//input[@name='password']")
password.send_keys(my_pass)
#password.send_keys(Keys.RETURN)
log_in = driver.find_element(By.XPATH,"//span[contains(text(),'Giriş yap')]")
log_in.click()
search_box = driver.find_element(By.XPATH,"//input[@data-testid='SearchBox_Search_Input']")
search_box.send_keys('Kemal Kılıçdaroğlu')
search_box.send_keys(Keys.RETURN)
sleep(3)
people = driver.find_element(By.XPATH,"//span[contains(text(),'En Son')]")
people.click()
articles = driver.find_elements(By.XPATH,"//article[@data-testid='tweet']")
article = articles[0]
article.find_element(By.XPATH,".//span").text
article.find_element(By.XPATH,".//span[contains(text(), '@')]").text
article.find_element(By.XPATH,".//time").get_attribute('datetime')
article.find_element(By.XPATH,".//div[@data-testid='tweetText']").text
article.find_element(By.XPATH,".//div[@data-testid='like']").text
article.find_element(By.XPATH,".//div[@data-testid='reply']").text
article.find_element(By.XPATH,".//div[@data-testid='retweet']").text
def get_tweet_data(article):
    username = article.find_element(By.XPATH,".//span").text
    nickname = article.find_element(By.XPATH,".//span[contains(text(), '@')]").text
    try:  
        postdate = article.find_element(By.XPATH,".//time").get_attribute('datetime')
    except NoSuchElementException:
        return
    tweetText = article.find_element(By.XPATH,".//div[@data-testid='tweetText']").text
    reply_count = article.find_element(By.XPATH,".//div[@data-testid='reply']").text
    retweet_count = article.find_element(By.XPATH,".//div[@data-testid='retweet']").text
    like_count = article.find_element(By.XPATH,".//div[@data-testid='like']").text
    
    tweet = (username, nickname, postdate, tweetText, reply_count, retweet_count, like_count)
    return tweet
get_tweet_data(article)
tweet_data = []

for article in articles:
    data = get_tweet_data(article)
    if data:
        tweet_data.append(data)
tweet_data[0]
driver.execute_script('window.scrollTo(0,document.body.scrollHeight);')
selenium-webdriver web-scraping xpath twitter
© www.soinside.com 2019 - 2024. All rights reserved.