Python,无法正确计算Schedule的剩余天数并获取与之相关的dataframe

问题描述 投票:0回答:1
from time import sleep
import os
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import schedule
import time
import pandas as pd
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

options = uc.ChromeOptions()
options.add_argument('--blink-settings=imagesEnabled=false') # disable images for loading of page faster
options.add_argument('--disable-notifications')
prefs = {"profile.default_content_setting_values.notifications" : 2}
options.add_experimental_option("prefs",prefs)
driver = uc.Chrome(options=options)
from math import ceil

tracking_days = 1
tracking_period = ['10:16','10:17','10:18']  # Add additional tracking times here
completed_days = 0
price_data = []
day_price_data = []  # Move day_price_data outside of track_product function

def get_price():
    try:
        price_element = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, 'offering-price')))
        price = float(price_element.find_element_by_xpath('./span[1]').text + '.' + price_element.find_element_by_xpath('./span[2]').text)
        return price
    except:
        return None

def get_time():
    return datetime.now()

def track_product():
    global current_price, day_price_data

    current_time = datetime.now()
    print(f"Time just before tracking_period: {current_time}")
    if current_time.strftime('%H:%M') in tracking_period:
        driver.get('https://www.hepsiburada.com/pinar-tam-yagli-sut-4x1-lt-pm-zypinar153100004')
        current_price = get_price()

        if current_price is not None:
            day_price_data.append([get_time(), current_price])

def job():
    global completed_days, day_price_data, price_data

    track_product()

    completed_days += 1

    if completed_days == tracking_days * len(tracking_period):
        remaining_days = 0
        print(f"Remaining days: {remaining_days}")
        print("Product tracking finished.")
    else:
        day_in_tracking_period = completed_days % len(tracking_period)

        completed_periods = completed_days // len(tracking_period)

        remaining_periods = (tracking_days - completed_periods) * len(tracking_period) - day_in_tracking_period

        remaining_days = ceil(remaining_periods / len(tracking_period))

        print(f"Waiting for the next tracking period... Remaining days: {remaining_days}")

    if day_in_tracking_period == 0:
        day_price_df = pd.DataFrame(day_price_data, columns=['date', 'price'])
        day_price_df['date'] = pd.to_datetime(day_price_df['date'])
        price_data.append(day_price_df)
        day_price_data = []

for time_str in tracking_period:
    if ':' in time_str:
        hour, minute = time_str.split(':')
    else:
        hour, minute = time_str, '00'
    print("time_str:", time_str)
    schedule.every().day.at(f"{hour}:{minute}").do(job)

print("Schedule started...")
while completed_days < tracking_days * len(tracking_period):
    if datetime.now().strftime('%H:%M') >= tracking_period[-1]:
        print("Product tracking finished for today.")
        break
    schedule.run_pending()
    time.sleep(1)

你好朋友,我正在尝试创建一段通用代码,它将在电子商务网站上获取产品价格,并在所需的日期和时间将其收集到名为 price_data 的数据框中。但是,我无法在 price_data 数据框中收集价格,因为我无法正确计算程序中剩余的天数。我只想让程序运行,使 tracking_period 由 3 个固定小时组成 (['hour1:min1,'hour2:min2','hour3:min3']),这样代码就不会太复杂。所以程序只会在给定的 3 小时内获取价格,并且不会改变。唯一可以更改的参数是天数。例如:tracking_days = 1/2/10/365 等。但是即使tracking_days发生变化,它应该能够正确计算剩余天数并在price_data中正确收集价格。 我怎样才能得到像下图这样的准确结果? (正确结果)

不正确和正确的结果对于跟踪天数 = 1 相比:

如果跟踪天数 = 2,我期望的正确结果:

python python-3.x pandas scheduled-tasks schedule
1个回答
0
投票

如果有什么遗漏/错误,需要改进,请添加评论。

from time import sleep
import os
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import schedule
import time
import pandas as pd
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

options = uc.ChromeOptions()
options.add_argument('--blink-settings=imagesEnabled=false') # disable images for loading of page faster
options.add_argument('--disable-notifications')
prefs = {"profile.default_content_setting_values.notifications" : 2}
options.add_experimental_option("prefs",prefs)
driver = uc.Chrome(options=options)




tracking_days = 2
tracking_period = ['20:55', '20:58', '21:00']

completed_days = 0
tracking_count = 0
day_price_data = []
price_data = []

def get_price():
    try:
        price_element = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, 'offering-price')))
        price = float(price_element.find_element_by_xpath('./span[1]').text + '.' + price_element.find_element_by_xpath('./span[2]').text)
        return price
    except:
        return None

def get_time():
    return datetime.now()
    
    
def remaining_days_str():
    remaining_days = tracking_days - completed_days - 1
    return str(remaining_days)


def job():
    global completed_days, tracking_count
    
    current_time = datetime.now()
    print(f"Time just before tracking_period: {current_time}")
    if current_time.strftime('%H:%M') in tracking_period:
        driver.get('https://www.hepsiburada.com/pinar-tam-yagli-sut-4x1-lt-pm-zypinar153100004')
        current_price = get_price()

        if current_price is not None:
            day_price_data.append([get_time(), current_price])
            print(f"Price added at {get_time()}: {current_price}")
    
    tracking_count += 1
    print(f"Remaining days: {remaining_days_str()}")

    if tracking_count == len(tracking_period):
        completed_days += 1
        tracking_count = 0
        print(f"Day {completed_days} completed.")

    if completed_days == tracking_days:
        print("Product tracking finished.")
        return schedule.CancelJob



for time_str in tracking_period:
    if ':' in time_str:
        hour, minute = time_str.split(':')
    else:
        hour, minute = time_str, '00'
    print("time_str:", time_str)
    schedule.every().day.at(f"{hour}:{minute}").do(job)

print("Schedule started...")
while completed_days < tracking_days:
    schedule.run_pending()
    print("Waiting for the next job...")
    time.sleep(50)  
print("before df")
day_price_df = pd.DataFrame(day_price_data, columns=['date', 'price'])
day_price_df['date'] = pd.to_datetime(day_price_df['date'])
price_data.append(day_price_df)

1期2天:

3期2天:

另外,我跑了1天、2、3期的代码。结果是正确的。

© www.soinside.com 2019 - 2024. All rights reserved.