Python Selenium 在将 .click() 与 find_elements 一起使用时给我 StaleElementReferenceException

问题描述 投票:0回答:0

我正在尝试使用 find_elements 遍历每个帖子,第一次点击总是有效,但第二次点击总是失败,但我需要页面来遍历每个帖子。

我试过添加 Javascript 点击,但没有用,只是不知道如何继续。 它真正应该做的是浏览每篇文章,我也试过添加一个 WebDriverWait 脚本,或者只是在第二次点击前添加 10 秒,但仍然给我同样的错误。

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options
import time
import requests
from selenium.common.exceptions import NoSuchElementException


# specify the URL you want to scrape
url = 'https://www.depop.com/_realvintage/'
# results = requests.get(url)

chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
driver = webdriver.Chrome("D:\\Selenium_python2\\chromedriver.exe", chrome_options=chrome_options)
# driver.maximize_window()

#game plan for this
# scroll down till encounters first sold listing //// 
# then stop scrolling //// 
# count all unsold listings, maybe count all elements until you get to 
# the total title number, then parse them all,
scroll_down = driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
total_titles = 0
ovr_listings = 0

driver.get(url)
time.sleep(1)
driver.find_element('xpath', '//*[@id="__next"]/div/div[2]/div[2]/button[2]').click()
time.sleep(1.5)

current_url = driver.current_url
response = requests.get(current_url)
soup = BeautifulSoup(response.text, 'html.parser')

#need a new code so that when the first sold liting appears on the page, the scroll_down_script() ends
def scroll_down_script():
    all_post = driver.find_elements('xpath', '//*[contains(@class, "styles__PrimaryProductImage-sc-__dbpyge-1 jUQFmU")]') 
    load_more_button = driver.find_element('xpath', '//*[contains(@class, "sc-gFGZVQ ewexUW")]')
    scroll_down = driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    while True:
        time.sleep(1)
        # Scroll down to the bottom of the screen
        scroll_down
        time.sleep(1)
        driver.execute_script("arguments[0].scrollIntoView(true)",load_more_button)
        time.sleep(1)
        load_more_button.click()
        try: 
            time.sleep(1)
            load_more_button.click()
        except:
            continue
        time.sleep(3)
        # Wait for the page to load
        try:
            # sold_listing = driver.find_element('xpath', '//*[contains(@class, "styles__SoldOverlay-sc-__sc-13q41bc-9 jMuEhs")]')
            # sold_listing
            # sold = driver.find_element('css selector', '[data-testid="product__sold"]')
            # sold = driver.find_element('xpath', '//*[contains(@class, "sc-papXJ kLPPHi")]')
            sold_text = driver.find_elements('xpath', '//*[contains(text(), "Sold")]')
            if len(sold_text) >= 3:
                print(len(sold_text))
                print(len(all_post))
                print('sold_listing found, stopping function')
                break
        except NoSuchElementException:
            print(len(sold_text))
            pass
count = 0

def each_post(): 
    global count  
    print("starting item count " + str(count))
    time.sleep(3)
    print('reading all posts')
    #all post doesnt work
    all_post = driver.find_elements('xpath', '//*[contains(@class, "styles__PrimaryProductImage-sc-__dbpyge-1 jUQFmU")]') 
    all_posts = all_post[count:]
    print('length of posts')
    print(len(all_posts))
    for post in all_posts:
        print('start post ' + str(count))
        try:
            driver.execute_script("arguments[0].scrollIntoView(true)",all_post[count])
        except:
            pass
        print('clicking good')
        time.sleep(6)
        try:
            # all_post[count].click()
            all_post[count].click()
            print('click 1')
        except:
            # driver.execute_script("arguments[0].click();", all_post[count])
            driver.execute_script("arguments[0].click();", all_post[count])

            print('click 2')
            all_post = driver.find_elements('xpath', '//*[contains(@class, "styles__PrimaryProductImage-sc-__dbpyge-1 jUQFmU")]')
        time.sleep(2)
        title_parsing()
        count += 1


all_titles = []
def hashtag_parser(word):
    global overall_title
    ind_title = []
    time.sleep(2)
    try:
        wordd = word.split("PLEASE READ CAREFULLY!! Some items may have unlisted markings, but for the most part all marking would be listed. These clothes are often old and used, so tend to not be in pristine condition. Most items that are listed here will by default be used, unless stated otherwise. Items have not been washed so we also highly recommend that you wash before putting it on.  All items have been handpicked by us. Thank you for choosing to support us at RealVintage._ and if there is anything we can do to make your experience better feel free to send us a message! We also give large discounts if purchasing stuff in bulk.")
    except:
        wordd = word
    for letter in wordd:
        if letter != '#' or ',' or '/n' or "PLEASE":
            ind_title.append(letter)
        else:
            overall_title = ''.join(ind_title)
            print(overall_title)

            if overall_title not in all_titles:
                all_titles.append(overall_title)
            break
    title =''.join(ind_title)
    item_title = title.split('#')[0].strip()
    print(item_title)

def title_parsing():
    global ovr_listings
    global count
    print('start title parsing')   
    time.sleep(2)   
    script_tag = driver.find_element('css selector', "[data-testid='meta-schema__json-ld']")
    script_content = script_tag.get_attribute("innerHTML")
    data = json.loads(script_content)
# Extract the name category
    item_title = data["description"]
    hashtag_parser(item_title)
    #price
    price = data['offers']['price']
    print(price)
    #images
    images = data['image']
    print(images)
    print(len(images))
    #size
    size_condition = driver.find_elements('xpath', '//*[contains(@class, "TableCell-sc-__sc-12y8so1-0 bWenjz")]')
    if len(size_condition) >= 3:
        try:
            size = size_condition[0]
            print(size.text)
        except:
            print('No size for ' + str(count))
        try:
            brand = size_condition[1]
            print(brand.text)
        except:
            print('No condition for ' + str(count))

        #condition
        try:
            condition = size_condition[2]
            print(condition.text)
        except:
            print('No condition for ' + str(count))      
    else:
        try:  
            size = size_condition[0]
            print(size.text)
        except:
            print('No size for ' + str(count))
        #condition
        try:
            condition = size_condition[1]
            print(condition.text)
        except:
            print('No condition for ' + str(count))    
    #styles
    try:
        style_color = driver.find_elements('xpath', '//*[contains(@class, "TableCell-sc-__sc-12y8so1-0 kRbwCZ")]')
        style = style_color[0]
        text_of_style = style.text
        style_list = text_of_style.split(', ')
        for styles in style_list:
            print(styles)
    except:
        print('no style for ' + str(count))
    #color
    try:
        color = style_color[1]
        text_of_color = color.text
        try:
            colors = text_of_color.split(", ")
            color1 = colors[0]
            color2 = colors[1]
            print(color1)
            print(color2)
        except:
            print('only 1 color')       
    except:
        print('no color for ' + str(count))
    category = driver.find_elements('css selector', 'span[itemprop="name"]')
    # specific category
    if len(category) >= 5:        
        try:            
            item_category = category[5]
            category_text = item_category.text
            print(category_text)
        except:
            print('No specific category for ' + str(count))        
        #category
        try:
            gen_category = category[4]
            gen_category_text = gen_category.text
            print(gen_category_text)
        except:
            print('No category for ' + str(count))
    else:
        try:
            item_category = category[3]
            category_text = item_category.text
            print(category_text)
        except:
            print('No specific category for ' + str(count))        
        #category
        try:
            gen_category = category[2]
            gen_category_text = gen_category.text
            print(gen_category_text)
        except:
            print('No category for ' + str(count))
    #likes
    all_text = driver.find_element('xpath', "/html/body").text
    match = re.search(r'\b\d{1,3}\b(?=\s+likes)', all_text)
    if match:
        likes = int(match.group())
        print(likes)
    else:
        print("No likes found.")
    driver.back()
    time.sleep(3)
    print('end title parsing')




# scroll_down_script()
each_post()

driver.find_element('xpath', '//*[@id="__next"]/div/div[2]/div[2]/button[2]').click()

time.sleep(1.5)

python selenium-webdriver e-commerce
© www.soinside.com 2019 - 2024. All rights reserved.