网络抓取 ESPN 预测赔率和输赢盘

问题描述 投票:0回答:1

我在尝试用 BeautifulSoup 废弃 ESPN 网站时总是遇到麻烦:下面是我的代码。由于某种原因,我什至无法获取要提取的团队的名称,更不用说每个团队的奖金额度了。

import bs4
from bs4 import BeautifulSoup
import requests
import pandas as pd

def extract_money_lines(url):
    response = requests.get(url)
    bs = bs4.BeautifulSoup(response.text, 'html.parser')

    # Extracting team names
    teams = bs.find_all('span', {'class': 'rteQ'})
    if len(teams) < 2:
        return None, None, None, None
    home_team = teams[0].text.strip()
    away_team = teams[1].text.strip()

    # Extracting money lines
    money_lines = bs.find_all('div', {'id': 'topOdd'})
    if len(money_lines) < 2:
        return None, None, None, None
    home_money_line = money_lines[0].text.strip()
    away_money_line = money_lines[1].text.strip()

    return home_team, away_team, home_money_line, away_money_line

# Example NBA game ID
game_id = "401585432"
url = f"https://www.espn.com/nba/game/_/gameId/{game_id}"

# Extract money lines and team names
home_team, away_team, home_money_line, away_money_line = extract_money_lines(url)

# Create DataFrame
data = {
    'Home Team': [home_team],
    'Home Money Line': [home_money_line],
    'Away Team': [away_team],
    'Away Money Line': [away_money_line]
}
df = pd.DataFrame(data)

print(df)


# Save DataFrame to CSV
#df.to_csv('nba_money_lines.csv', index=False)
#print("CSV file saved successfully.")

由于某种原因,我什至无法获取要提取的团队名称,更不用说每个团队的奖金额度了。我已经更详细地查看了其他网络抓取工具,但对此还很陌生,所以可以解释我的麻烦。非常感谢您的帮助。

python web-scraping beautifulsoup
1个回答
0
投票

您的初始问题是

teams = bs.find_all('span', {'class': 'rteQ'})
返回 0 个对象。另外,我只是到了这个页面,我没有看到任何地方写着金钱线。

尽管如此,我个人还是会浏览他们的 api。您可能需要查看这些数据才能找到您需要的内容,但比赛摘要中的赔率是空的。但它可以在记分板 api 中使用。您只需要传递一个日期即可。

import bs4
from bs4 import BeautifulSoup
import requests
import pandas as pd

def extract_money_lines(url, dateStr):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'}
    payload = {
        'sport': 'basketball',
        'league': 'nba',
        'region': 'us',
        'lang': 'en',
        'contentorigin': 'espn',
        'buyWindow': '1m',
        'showAirings': 'buy,live,replay',
        'tz': 'America/New_York',
        'dates': dateStr}
    
    response = requests.get(url, headers=headers, params=payload).json()
    events = response['sports'][0]['leagues'][0]['events']
    
    df = pd.json_normalize(events,
                           record_path=['competitors'],
                           meta=['odds', ['odds', 'away', 'moneyLine'], ['odds', 'home', 'moneyLine']],
                           errors='ignore')
    
    

    return df


dateStr = '20240301'
url = f"https://site.web.api.espn.com/apis/v2/scoreboard/header"

# Extract money lines and team names
df = extract_money_lines(url, dateStr)

输出:(只有金钱线和团队)

print(df[['homeAway', 'displayName', 'odds.away.moneyLine', 'odds.home.moneyLine']])
   homeAway             displayName odds.away.moneyLine odds.home.moneyLine
0      away     Cleveland Cavaliers                -400                 320
1      home         Detroit Pistons                -400                 320
2      away       Charlotte Hornets                 450                -650
3      home      Philadelphia 76ers                 450                -650
4      away        Dallas Mavericks                 340                -440
5      home          Boston Celtics                 340                -440
6      away   Golden State Warriors                -140                 120
7      home         Toronto Raptors                -140                 120
8      away  Portland Trail Blazers                 120                -140
9      home       Memphis Grizzlies                 120                -140
10     away        Sacramento Kings                 200                -240
11     home  Minnesota Timberwolves                 200                -240
12     away          Indiana Pacers                 180                -210
13     home    New Orleans Pelicans                 180                -210
14     away         Milwaukee Bucks                -175                 150
15     home           Chicago Bulls                -175                 150
16     away      Washington Wizards                 800               -1400
17     home             LA Clippers                 800               -1400

重塑:

# New DataFrame with reshaped structure
reshaped_data = []

for i in range(0, len(df), 2):
    row_away = df.iloc[i]
    row_home = df.iloc[i+1]
    reshaped_row = {
        'away': f"{row_away['displayName']}",
        'home': f"{row_home['displayName']}",
        'odds.away.moneyLine': row_away['odds.away.moneyLine'],
        'odds.home.moneyLine': row_home['odds.home.moneyLine']
    }
    reshaped_data.append(reshaped_row)

reshaped_df = pd.DataFrame(reshaped_data)
print(reshaped_df.to_string())
                     away                    home  odds.away.moneyLine  odds.home.moneyLine
0     Cleveland Cavaliers         Detroit Pistons                 -400                  320
1       Charlotte Hornets      Philadelphia 76ers                  450                 -650
2        Dallas Mavericks          Boston Celtics                  340                 -440
3   Golden State Warriors         Toronto Raptors                 -140                  120
4  Portland Trail Blazers       Memphis Grizzlies                  120                 -140
5        Sacramento Kings  Minnesota Timberwolves                  200                 -240
6          Indiana Pacers    New Orleans Pelicans                  180                 -210
7         Milwaukee Bucks           Chicago Bulls                 -175                  150
8      Washington Wizards             LA Clippers                  800                -1400
© www.soinside.com 2019 - 2024. All rights reserved.