from time import sleep
import os
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import schedule
import time
import pandas as pd
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = uc.ChromeOptions()
options.add_argument('--blink-settings=imagesEnabled=false') # disable images for loading of page faster
options.add_argument('--disable-notifications')
prefs = {"profile.default_content_setting_values.notifications" : 2}
options.add_experimental_option("prefs",prefs)
driver = uc.Chrome(options=options)
from math import ceil
tracking_days = 1
tracking_period = ['10:16','10:17','10:18'] # Add additional tracking times here
completed_days = 0
price_data = []
day_price_data = [] # Move day_price_data outside of track_product function
def get_price():
try:
price_element = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, 'offering-price')))
price = float(price_element.find_element_by_xpath('./span[1]').text + '.' + price_element.find_element_by_xpath('./span[2]').text)
return price
except:
return None
def get_time():
return datetime.now()
def track_product():
global current_price, day_price_data
current_time = datetime.now()
print(f"Time just before tracking_period: {current_time}")
if current_time.strftime('%H:%M') in tracking_period:
driver.get('https://www.hepsiburada.com/pinar-tam-yagli-sut-4x1-lt-pm-zypinar153100004')
current_price = get_price()
if current_price is not None:
day_price_data.append([get_time(), current_price])
def job():
global completed_days, day_price_data, price_data
track_product()
completed_days += 1
if completed_days == tracking_days * len(tracking_period):
remaining_days = 0
print(f"Remaining days: {remaining_days}")
print("Product tracking finished.")
else:
day_in_tracking_period = completed_days % len(tracking_period)
completed_periods = completed_days // len(tracking_period)
remaining_periods = (tracking_days - completed_periods) * len(tracking_period) - day_in_tracking_period
remaining_days = ceil(remaining_periods / len(tracking_period))
print(f"Waiting for the next tracking period... Remaining days: {remaining_days}")
if day_in_tracking_period == 0:
day_price_df = pd.DataFrame(day_price_data, columns=['date', 'price'])
day_price_df['date'] = pd.to_datetime(day_price_df['date'])
price_data.append(day_price_df)
day_price_data = []
for time_str in tracking_period:
if ':' in time_str:
hour, minute = time_str.split(':')
else:
hour, minute = time_str, '00'
print("time_str:", time_str)
schedule.every().day.at(f"{hour}:{minute}").do(job)
print("Schedule started...")
while completed_days < tracking_days * len(tracking_period):
if datetime.now().strftime('%H:%M') >= tracking_period[-1]:
print("Product tracking finished for today.")
break
schedule.run_pending()
time.sleep(1)
你好朋友,我正在尝试创建一段通用代码,它将在电子商务网站上获取产品价格,并在所需的日期和时间将其收集到名为 price_data 的数据框中。但是,我无法在 price_data 数据框中收集价格,因为我无法正确计算程序中剩余的天数。我只想让程序运行,使 tracking_period 由 3 个固定小时组成 (['hour1:min1,'hour2:min2','hour3:min3']),这样代码就不会太复杂。所以程序只会在给定的 3 小时内获取价格,并且不会改变。唯一可以更改的参数是天数。例如:tracking_days = 1/2/10/365 等。但是即使tracking_days发生变化,它应该能够正确计算剩余天数并在price_data中正确收集价格。 我怎样才能得到像下图这样的准确结果? (正确结果)
不正确和正确的结果对于跟踪天数 = 1 相比:
如果跟踪天数 = 2,我期望的正确结果:
如果有什么遗漏/错误,需要改进,请添加评论。
from time import sleep
import os
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import schedule
import time
import pandas as pd
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = uc.ChromeOptions()
options.add_argument('--blink-settings=imagesEnabled=false') # disable images for loading of page faster
options.add_argument('--disable-notifications')
prefs = {"profile.default_content_setting_values.notifications" : 2}
options.add_experimental_option("prefs",prefs)
driver = uc.Chrome(options=options)
tracking_days = 2
tracking_period = ['20:55', '20:58', '21:00']
completed_days = 0
tracking_count = 0
day_price_data = []
price_data = []
def get_price():
try:
price_element = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, 'offering-price')))
price = float(price_element.find_element_by_xpath('./span[1]').text + '.' + price_element.find_element_by_xpath('./span[2]').text)
return price
except:
return None
def get_time():
return datetime.now()
def remaining_days_str():
remaining_days = tracking_days - completed_days - 1
return str(remaining_days)
def job():
global completed_days, tracking_count
current_time = datetime.now()
print(f"Time just before tracking_period: {current_time}")
if current_time.strftime('%H:%M') in tracking_period:
driver.get('https://www.hepsiburada.com/pinar-tam-yagli-sut-4x1-lt-pm-zypinar153100004')
current_price = get_price()
if current_price is not None:
day_price_data.append([get_time(), current_price])
print(f"Price added at {get_time()}: {current_price}")
tracking_count += 1
print(f"Remaining days: {remaining_days_str()}")
if tracking_count == len(tracking_period):
completed_days += 1
tracking_count = 0
print(f"Day {completed_days} completed.")
if completed_days == tracking_days:
print("Product tracking finished.")
return schedule.CancelJob
for time_str in tracking_period:
if ':' in time_str:
hour, minute = time_str.split(':')
else:
hour, minute = time_str, '00'
print("time_str:", time_str)
schedule.every().day.at(f"{hour}:{minute}").do(job)
print("Schedule started...")
while completed_days < tracking_days:
schedule.run_pending()
print("Waiting for the next job...")
time.sleep(50)
print("before df")
day_price_df = pd.DataFrame(day_price_data, columns=['date', 'price'])
day_price_df['date'] = pd.to_datetime(day_price_df['date'])
price_data.append(day_price_df)
1期2天:
另外,我跑了1天、2、3期的代码。结果是正确的。