这是一条漫长的道路,但我有 4 个独立的 python 脚本,我想将它们组合成一个。
这是有效的,如下所列:
import gspread
import requests
from bs4 import BeautifulSoup
import pandas as pd
from oauth2client.service_account import ServiceAccountCredentials
# Set up credentials and authorize the client
scope = ['https://spreadsheets.google.com/feeds',
'https://www.googleapis.com/auth/drive']
creds = {
"type": "service_account",
"project_id": "g-league-tracker-final",
"private_key_id": "1b8efa2e9cc9ff846ee358811687b98f0425d4ea",
"private_key": "-----BEGIN PRIVATE KEY-----\nMII\n-----END PRIVATE KEY-----\n",
"client_email": "g",
"client_id": "1",
"auth_uri": "https://acc",
"token_uri": "http",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gleaguetracker%40g-league-tracker-final.iam.gserviceaccount.com"
}
client = gspread.authorize(creds)
print('STARTING G LEAGUE PROFILES')
# G Leauge PROFILES
gc = gspread.service_account(creds)
sh = gc.open_by_key('1DpasSS8yC1UX6WqAbkQ515BwEEjdDL-x74T0eTW8hLM')
worksheet = sh.worksheet('GLeague Profile Details')
# AddValue = ["Test", 25, "Test2"]
# worksheet.insert_row(AddValue, 3)
def get_links(url):
data = []
req_url = requests.get(url)
soup = BeautifulSoup(req_url.content, "html.parser")
for td in soup.find_all('td', {'data-th': 'Player'}):
a_tag = td.a
name = a_tag.text
player_url = a_tag['href']
pos = td.find_next_sibling('td').text
print(f"Getting {name}")
req_player_url = requests.get(
f"https://basketball.realgm.com{player_url}")
soup_player = BeautifulSoup(req_player_url.content, "html.parser")
div_profile_box = soup_player.find("div", class_="profile-box")
row = {"Name": name, "URL": player_url, "pos_option1": pos}
row['pos_option2'] = div_profile_box.h2.span.text
for p in div_profile_box.find_all("p"):
try:
key, value = p.get_text(strip=True).split(':', 1)
row[key.strip()] = value.strip()
except: # not all entries have values
pass
data.append(row)
return data
urls = [
'https://basketball.realgm.com/dleague/players/2022',
]
res = []
for url in urls:
print(f"Getting: {url}")
data = get_links(url)
res = [*res, *data]
if res != []:
header = list(res[0].keys())
values = [
header, *[[e[k] if e.get(k) else "" for k in header] for e in res]]
worksheet.append_rows(values, value_input_option="USER_ENTERED")
print('FINISHED G LEAGUE PROFILES')
print('STARTING INTERNATIONAL PROFILES')
# STARTING INTERNATIONAL PROFILES
worksheet2 = sh.worksheet('International Profile Details')
# AddValue = ["Test", 25, "Test2"]
# worksheet.insert_row(AddValue, 3)
def get_links2(url):
data = []
req_url = requests.get(url)
soup = BeautifulSoup(req_url.content, "html.parser")
for td in soup.select('td.nowrap'):
a_tag = td.a
if a_tag:
name = a_tag.text
player_url = a_tag['href']
pos = td.find_next_sibling('td').text
print(f"Getting {name}")
req_player_url = requests.get(
f"https://basketball.realgm.com{player_url}")
soup_player = BeautifulSoup(req_player_url.content, "html.parser")
div_profile_box = soup_player.find("div", class_="profile-box")
row = {"Name": name, "URL": player_url, "pos_option1": pos}
row['pos_option2'] = div_profile_box.h2.span.text if div_profile_box.h2.span else None
for p in div_profile_box.find_all("p"):
try:
key, value = p.get_text(strip=True).split(':', 1)
row[key.strip()] = value.strip()
except: # not all entries have values
pass
data.append(row)
return data
urls2 = ["https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/player/All/desc",
"https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/2"]
res2 = []
for url in urls2:
data = get_links2(url)
res2 = [*res2, *data]
# print(res2)
if res2 != []:
header = list(res2[0].keys())
values = [
header, *[[e[k] if e.get(k) else "" for k in header] for e in res2]]
worksheet2.append_rows(values, value_input_option="USER_ENTERED")
print('FINISHED INTERNATIONAL PROFILES')
print('STARTING G LEAGUE PROFILES')
# STARTING GLEAGUE STATS
worksheet_name1 = "All G League Stats"
worksheet1 = sh.worksheet(worksheet_name1)
url = 'https://basketball.realgm.com/dleague/stats/2023/Averages/Qualified/player/All/desc/1/Regular_Season'
res = []
for count in range(1, 99):
# pd.read_html accepts a URL too so no need to make a separate request
df_list = pd.read_html(f"{url}/{count}")
res.append(df_list[-1])
data = pd.concat(res)
# Convert the data to a list of lists
values = data.values.tolist()
# Add header row
header = data.columns.tolist()
values.insert(0, header)
# Write the data to the worksheet
worksheet1.clear() # Clear any existing data
worksheet1.append_rows(values, value_input_option="USER_ENTERED",
insert_data_option="INSERT_ROWS", table_range="B1")
print('FINISHED G LEAGUE STATS')
print('STARTING INTERNATIONAL STATS')
# STARTING INTERNATIONAL STATS
worksheet_name2 = "All International Stats"
worksheet2 = sh.worksheet(worksheet_name2)
url = 'https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/player/All/desc'
res = []
for count in range(1, 95):
# pd.read_html accepts a URL too so no need to make a separate request
df_list = pd.read_html(f"{url}/{count}")
res.append(df_list[-1])
data = pd.concat(res)
# Replace NaN values with an empty string
data = data.fillna("")
# Convert the data to a list of lists
values = data.values.tolist()
# Add header row
header = data.columns.tolist()
values.insert(0, header)
# Write the data to the worksheet
worksheet2.clear() # Clear any existing data
worksheet2.append_rows(values, value_input_option="USER_ENTERED",
insert_data_option="OVERWRITE", table_range="B1")
我想将它包装在一个简单的 GUI 中,以便我可以从我的桌面运行:
import tkinter as tk
import threading
def update_data():
# Set the status to "In progress"
status_label.config(text="In progress...")
root.update()
# Paste your code here
# Set the status to "Completed"
status_label.config(text="Completed")
root.update()
print("Data updated.")
# Create the main window
root = tk.Tk()
root.geometry("400x250")
root.title("G League & International Finder")
# Create the title label
title_label = tk.Label(root, text="G League & International Finder", font=("Helvetica", 16))
title_label.pack(pady=10)
# Create the update data button
update_data_button = tk.Button(root, text="Update Data", font=("Helvetica", 14), command=update_data)
update_data_button.pack(pady=20)
# Create the status label
status_label = tk.Label(root, text="", font=("Helvetica", 12))
status_label.pack(pady=10)
# Start the main loop
root.mainloop()
我在应该去的地方添加代码,但是收到各种错误,我认为代码应该是这样的:
import tkinter as tk
import threading
import gspread
import requests
from bs4 import BeautifulSoup
import pandas as pd
from oauth2client.service_account import ServiceAccountCredentials
# Set up credentials and authorize the client
scope = ['https://spreadsheets.google.com/feeds',
'https://www.googleapis.com/auth/drive']
creds = {
"type": "service_account",
"project_id": "g",
"private_key_id": "1b8efa2e9c",
"private_key": "-----BEGIN PRIVATE KEY-----\\+DEpmj73dM8TUFEGuI7BSbW\ndCvEgLYRbFNE4d1AoGdxjpntne64DyzHwOKWVV0/aQKBgFZOTfyKxp16bThXmcDI\ntuZbLGK5PEP+OAsqM9lQ0DveaDXsl942LNHLKYj11+ZZ375DFmZeIHsFjcO73XuQ\nFRK9+zSsWL9PZWr18PwUUdqaLkMqh7EKoMHo2JcG9EOo6o4srdrtH8SFQoJ1Eklm\n7vzwtoJU0aGPoOqoJIxKH/z7\n-----END PRIVATE KEY-----\n",
"client_email": "",
"client_id": "",
"auth_uri": "",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gleaguetracker%40g-league-tracker-final.iam.gserviceaccount.com"
}
client = gspread.authorize(creds)
print('STARTING G LEAGUE PROFILES')
# G Leauge PROFILES
gc = gspread.service_account(creds)
sh = gc.open_by_key('1DpasSS8yC1UX6WqAbkQ515BwEEjdDL-x74T0eTW8hLM')
def update_data():
# Set the status to "In progress"
status_label.config(text="In progress...")
root.update()
# Paste your code here
worksheet = sh.worksheet('GLeague Profile Details')
# AddValue = ["Test", 25, "Test2"]
# worksheet.insert_row(AddValue, 3)
def get_links(url):
data = []
req_url = requests.get(url)
soup = BeautifulSoup(req_url.content, "html.parser")
for td in soup.find_all('td', {'data-th': 'Player'}):
a_tag = td.a
name = a_tag.text
player_url = a_tag['href']
pos = td.find_next_sibling('td').text
print(f"Getting {name}")
req_player_url = requests.get(
f"https://basketball.realgm.com{player_url}")
soup_player = BeautifulSoup(req_player_url.content, "html.parser")
div_profile_box = soup_player.find("div", class_="profile-box")
row = {"Name": name, "URL": player_url, "pos_option1": pos}
row['pos_option2'] = div_profile_box.h2.span.text
for p in div_profile_box.find_all("p"):
try:
key, value = p.get_text(strip=True).split(':', 1)
row[key.strip()] = value.strip()
except: # not all entries have values
pass
data.append(row)
return data
urls = [
'https://basketball.realgm.com/dleague/players/2022',
]
res = []
for url in urls:
print(f"Getting: {url}")
data = get_links(url)
res = [*res, *data]
if res != []:
header = list(res[0].keys())
values = [
header, *[[e[k] if e.get(k) else "" for k in header] for e in res]]
worksheet.append_rows(values, value_input_option="USER_ENTERED")
print('FINISHED G LEAGUE PROFILES')
print('STARTING INTERNATIONAL PROFILES')
# STARTING INTERNATIONAL PROFILES
worksheet2 = sh.worksheet('International Profile Details')
# AddValue = ["Test", 25, "Test2"]
# worksheet.insert_row(AddValue, 3)
def get_links2(url):
data = []
req_url = requests.get(url)
soup = BeautifulSoup(req_url.content, "html.parser")
for td in soup.select('td.nowrap'):
a_tag = td.a
if a_tag:
name = a_tag.text
player_url = a_tag['href']
pos = td.find_next_sibling('td').text
print(f"Getting {name}")
req_player_url = requests.get(
f"https://basketball.realgm.com{player_url}")
soup_player = BeautifulSoup(req_player_url.content, "html.parser")
div_profile_box = soup_player.find("div", class_="profile-box")
row = {"Name": name, "URL": player_url, "pos_option1": pos}
row['pos_option2'] = div_profile_box.h2.span.text if div_profile_box.h2.span else None
for p in div_profile_box.find_all("p"):
try:
key, value = p.get_text(strip=True).split(':', 1)
row[key.strip()] = value.strip()
except: # not all entries have values
pass
data.append(row)
return data
urls2 = ["https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/player/All/desc",
"https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/2"]
res2 = []
for url in urls2:
data = get_links2(url)
res2 = [*res2, *data]
# print(res2)
if res2 != []:
header = list(res2[0].keys())
values = [
header, *[[e[k] if e.get(k) else "" for k in header] for e in res2]]
worksheet2.append_rows(values, value_input_option="USER_ENTERED")
print('FINISHED INTERNATIONAL PROFILES')
print('STARTING G LEAGUE PROFILES')
# STARTING GLEAGUE STATS
worksheet_name1 = "All G League Stats"
worksheet1 = sh.worksheet(worksheet_name1)
url = 'https://basketball.realgm.com/dleague/stats/2023/Averages/Qualified/player/All/desc/1/Regular_Season'
res = []
for count in range(1, 99):
# pd.read_html accepts a URL too so no need to make a separate request
df_list = pd.read_html(f"{url}/{count}")
res.append(df_list[-1])
data = pd.concat(res)
# Convert the data to a list of lists
values = data.values.tolist()
# Add header row
header = data.columns.tolist()
values.insert(0, header)
# Write the data to the worksheet
worksheet1.clear() # Clear any existing data
worksheet1.append_rows(values, value_input_option="USER_ENTERED",
insert_data_option="INSERT_ROWS", table_range="B1")
print('FINISHED G LEAGUE STATS')
print('STARTING INTERNATIONAL STATS')
# STARTING INTERNATIONAL STATS
worksheet_name2 = "All International Stats"
worksheet2 = sh.worksheet(worksheet_name2)
url = 'https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/player/All/desc'
res = []
for count in range(1, 95):
# pd.read_html accepts a URL too so no need to make a separate request
df_list = pd.read_html(f"{url}/{count}")
res.append(df_list[-1])
data = pd.concat(res)
# Replace NaN values with an empty string
data = data.fillna("")
# Convert the data to a list of lists
values = data.values.tolist()
# Add header row
header = data.columns.tolist()
values.insert(0, header)
# Write the data to the worksheet
worksheet2.clear() # Clear any existing data
worksheet2.append_rows(values, value_input_option="USER_ENTERED",
insert_data_option="OVERWRITE", table_range="B1")
# Set the status to "Completed"
status_label.config(text="Completed")
root.update()
print("Data updated.")
# Create the main window
root = tk.Tk()
root.geometry("400x250")
root.title("G League & International Finder")
# Create the title label
title_label = tk.Label(
root, text="G League & International Finder", font=("Helvetica", 16))
title_label.pack(pady=10)
# Create the update data button
update_data_button = tk.Button(root, text="Update Data", font=(
"Helvetica", 14), command=update_data)
update_data_button.pack(pady=20)
# Create the status label
status_label = tk.Label(root, text="", font=("Helvetica", 12))
status_label.pack(pady=10)
# Start the main loop
root.mainloop()
但是我收到了这个错误,这是来自凭据吗?我最初将它作为一个单独的文件,但需要将它添加到我的代码中,这样它们就在一起了。不确定这里出了什么问题,但如果有任何关于如何将其打包为 gui 的建议,我们将不胜感激。
Traceback (most recent call last):
File "c:\Users\AMadle\GLeagueFinal\gui.py", line 32, in <module>
client = gspread.authorize(creds)
File "C:\Python\python3.10.5\lib\site-packages\gspread\__init__.py", line 40, in authorize
client = client_class(auth=credentials)
File "C:\Python\python3.10.5\lib\site-packages\gspread\client.py", line 46, in __init__
self.auth = convert_credentials(auth)
File "C:\Python\python3.10.5\lib\site-packages\gspread\utils.py", line 67, in convert_credentials
module = credentials.__module__
AttributeError: 'dict' object has no attribute '__module__'. Did you mean: '__reduce__'?
PS C:\Users\AMadle\GLeagueFinal>
美国东部时间 3 月 4 日下午 4 点更新:
我错了——看起来错误肯定是因为将 CREDS 直接放入我的文件中。就像我提到的,它之前是一个单独的文件,将尝试解决这个问题,但如果有人有解决方案,我们将不胜感激。
虽然我花了一整天的时间尝试组合这些代码,但我了解到这不是必需的。我现在正在基于我的文件夹创建这个 gui - 这不是关于“如何将 creds.json 添加到 python 脚本”的直接解决方案,但它可以完成工作:
import os
import subprocess
import tkinter as tk
# Set the path to the folder containing your scripts
folder_path = r"C:\Users\AMadle\GLeagueFinal"
# Define a function to run each script
def run_script(script_name):
script_path = os.path.join(folder_path, script_name)
creds_path = os.path.join(folder_path, "creds.json")
subprocess.Popen(["python", script_path, creds_path])
# Create the GUI window
root = tk.Tk()
root.title("Script Runner")
# Define a function to run all scripts
def run_all():
for script_name in os.listdir(folder_path):
if script_name.endswith(".py"):
run_script(script_name)
# Create buttons for each script
for script_name in os.listdir(folder_path):
if script_name.endswith(".py"):
script_button = tk.Button(
root, text=script_name, command=lambda script_name=script_name: run_script(script_name))
script_button.pack()
# Create a button to run all scripts
all_button = tk.Button(root, text="Run All", command=run_all)
all_button.pack()
# Start the GUI
root.mainloop()