我在尝试用 BeautifulSoup 废弃 ESPN 网站时总是遇到麻烦:下面是我的代码。由于某种原因,我什至无法获取要提取的团队的名称,更不用说每个团队的奖金额度了。
import bs4
from bs4 import BeautifulSoup
import requests
import pandas as pd
def extract_money_lines(url):
response = requests.get(url)
bs = bs4.BeautifulSoup(response.text, 'html.parser')
# Extracting team names
teams = bs.find_all('span', {'class': 'rteQ'})
if len(teams) < 2:
return None, None, None, None
home_team = teams[0].text.strip()
away_team = teams[1].text.strip()
# Extracting money lines
money_lines = bs.find_all('div', {'id': 'topOdd'})
if len(money_lines) < 2:
return None, None, None, None
home_money_line = money_lines[0].text.strip()
away_money_line = money_lines[1].text.strip()
return home_team, away_team, home_money_line, away_money_line
# Example NBA game ID
game_id = "401585432"
url = f"https://www.espn.com/nba/game/_/gameId/{game_id}"
# Extract money lines and team names
home_team, away_team, home_money_line, away_money_line = extract_money_lines(url)
# Create DataFrame
data = {
'Home Team': [home_team],
'Home Money Line': [home_money_line],
'Away Team': [away_team],
'Away Money Line': [away_money_line]
}
df = pd.DataFrame(data)
print(df)
# Save DataFrame to CSV
#df.to_csv('nba_money_lines.csv', index=False)
#print("CSV file saved successfully.")
由于某种原因,我什至无法获取要提取的团队名称,更不用说每个团队的奖金额度了。我已经更详细地查看了其他网络抓取工具,但对此还很陌生,所以可以解释我的麻烦。非常感谢您的帮助。
您的初始问题是
teams = bs.find_all('span', {'class': 'rteQ'})
返回 0 个对象。另外,我只是到了这个页面,我没有看到任何地方写着金钱线。
尽管如此,我个人还是会浏览他们的 api。您可能需要查看这些数据才能找到您需要的内容,但比赛摘要中的赔率是空的。但它可以在记分板 api 中使用。您只需要传递一个日期即可。
import bs4
from bs4 import BeautifulSoup
import requests
import pandas as pd
def extract_money_lines(url, dateStr):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'}
payload = {
'sport': 'basketball',
'league': 'nba',
'region': 'us',
'lang': 'en',
'contentorigin': 'espn',
'buyWindow': '1m',
'showAirings': 'buy,live,replay',
'tz': 'America/New_York',
'dates': dateStr}
response = requests.get(url, headers=headers, params=payload).json()
events = response['sports'][0]['leagues'][0]['events']
df = pd.json_normalize(events,
record_path=['competitors'],
meta=['odds', ['odds', 'away', 'moneyLine'], ['odds', 'home', 'moneyLine']],
errors='ignore')
return df
dateStr = '20240301'
url = f"https://site.web.api.espn.com/apis/v2/scoreboard/header"
# Extract money lines and team names
df = extract_money_lines(url, dateStr)
输出:(只有金钱线和团队)
print(df[['homeAway', 'displayName', 'odds.away.moneyLine', 'odds.home.moneyLine']])
homeAway displayName odds.away.moneyLine odds.home.moneyLine
0 away Cleveland Cavaliers -400 320
1 home Detroit Pistons -400 320
2 away Charlotte Hornets 450 -650
3 home Philadelphia 76ers 450 -650
4 away Dallas Mavericks 340 -440
5 home Boston Celtics 340 -440
6 away Golden State Warriors -140 120
7 home Toronto Raptors -140 120
8 away Portland Trail Blazers 120 -140
9 home Memphis Grizzlies 120 -140
10 away Sacramento Kings 200 -240
11 home Minnesota Timberwolves 200 -240
12 away Indiana Pacers 180 -210
13 home New Orleans Pelicans 180 -210
14 away Milwaukee Bucks -175 150
15 home Chicago Bulls -175 150
16 away Washington Wizards 800 -1400
17 home LA Clippers 800 -1400
重塑:
# New DataFrame with reshaped structure
reshaped_data = []
for i in range(0, len(df), 2):
row_away = df.iloc[i]
row_home = df.iloc[i+1]
reshaped_row = {
'away': f"{row_away['displayName']}",
'home': f"{row_home['displayName']}",
'odds.away.moneyLine': row_away['odds.away.moneyLine'],
'odds.home.moneyLine': row_home['odds.home.moneyLine']
}
reshaped_data.append(reshaped_row)
reshaped_df = pd.DataFrame(reshaped_data)
print(reshaped_df.to_string())
away home odds.away.moneyLine odds.home.moneyLine
0 Cleveland Cavaliers Detroit Pistons -400 320
1 Charlotte Hornets Philadelphia 76ers 450 -650
2 Dallas Mavericks Boston Celtics 340 -440
3 Golden State Warriors Toronto Raptors -140 120
4 Portland Trail Blazers Memphis Grizzlies 120 -140
5 Sacramento Kings Minnesota Timberwolves 200 -240
6 Indiana Pacers New Orleans Pelicans 180 -210
7 Milwaukee Bucks Chicago Bulls -175 150
8 Washington Wizards LA Clippers 800 -1400