如何在beautifulsoup中找到列表的父母

问题描述 投票:0回答:3
import requests
from bs4 import BeautifulSoup

url ="https://www.hltv.org/stats/matches/mapstatsid/103093/furia-vs-chaos"
headers= {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}

response = requests.get(url, headers = headers)
soup = BeautifulSoup(response.content, "html.parser")
#stat_tables = soup.find_all("table", class_="stats-table")



results = {}

all_results = soup.find_all("div", class_="round-history-half")
for partial_result in all_results:
    half_results = partial_result.find_all("img")
    for result in half_results:
        if (result["title"]):
            rounds_won = result["title"].split("-")
            key = int(rounds_won[0]) + int(rounds_won[1])
            results[key] = result["title"]

for key in sorted(results):
    print(key, results[key])


此代码为我提供了比赛的所有分数,我想找到父母,比如说print(results [4]),看看谁赢了。我不确定如何从列表中查找父母。

结果应该是

<img alt="FURIA" src="https://static.hltv.org/images/team/logo/8297" class="round-history-team" title="FURIA">

Furia
python html beautifulsoup parent
3个回答
0
投票

您可能希望更改数据结构以使其更易于使用:编辑:用新的数据结构更新

import json
import requests
from bs4 import BeautifulSoup

url = 'https://www.hltv.org/stats/matches/mapstatsid/103093/furia-vs-chaos'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
#stat_tables = soup.find_all('table', class_='stats-table')


results = {}

match = soup.find('div', class_='standard-box round-history-con')
teams = [team['title']
         for team in match.find_all('img', class_='round-history-team')]
rows = match.find_all('div', class_='round-history-team-row')

for row in rows:
    for idx, rnd in enumerate(row.find_all('img', class_='round-history-outcome')):
        if (rnd['title']):
            scores = rnd['title'].split('-')
            results[idx+1] = {team: score for team, score in zip(teams, scores)}

# sort the results
sorted_results = {i: results[i]
                  for i in sorted(results.keys())}

print(json.dumps(sorted_results, indent=4))

输出:

{
    "1": {
        "FURIA": "0",
        "Chaos": "1"
    },
    "2": {
        "FURIA": "0",
        "Chaos": "2"
    },
    "3": {
        "FURIA": "0",
        "Chaos": "3"
    },
    "4": {
        "FURIA": "1",
        "Chaos": "3"
    },
    "5": {
        "FURIA": "2",
        "Chaos": "3"
    },
    "6": {
        "FURIA": "3",
        "Chaos": "3"
    },
    "7": {
        "FURIA": "4",
        "Chaos": "3"
    },
    "8": {
        "FURIA": "5",
        "Chaos": "3"
    },
    "9": {
        "FURIA": "5",
        "Chaos": "4"
    },
    "10": {
        "FURIA": "5",
        "Chaos": "5"
    },
    "11": {
        "FURIA": "5",
        "Chaos": "6"
    },
    "12": {
        "FURIA": "5",
        "Chaos": "7"
    },
    "13": {
        "FURIA": "6",
        "Chaos": "7"
    },
    "14": {
        "FURIA": "7",
        "Chaos": "7"
    },
    "15": {
        "FURIA": "8",
        "Chaos": "7"
    },
    "16": {
        "FURIA": "9",
        "Chaos": "7"
    },
    "17": {
        "FURIA": "10",
        "Chaos": "7"
    },
    "18": {
        "FURIA": "10",
        "Chaos": "8"
    },
    "19": {
        "FURIA": "10",
        "Chaos": "9"
    },
    "20": {
        "FURIA": "10",
        "Chaos": "10"
    },
    "21": {
        "FURIA": "11",
        "Chaos": "10"
    },
    "22": {
        "FURIA": "12",
        "Chaos": "10"
    },
    "23": {
        "FURIA": "13",
        "Chaos": "10"
    "24": {
        "FURIA": "14",
        "Chaos": "10"
    },
    "25": {
        "FURIA": "15",
        "Chaos": "10"
    },
    "26": {
        "FURIA": "15",
        "Chaos": "11"
    },
    "27": {
        "FURIA": "16",
        "Chaos": "11"
    }
}

0
投票

使用此,

# retrieve the parent first.
divs = soup.find_all("div", class_="round-history-team-row")

for div in divs:
    parent_img_title = div.find('img', class_="round-history-team")['title']
    print("Title : " + parent_img_title)

    for result in div.find_all("img", class_="round-history-outcome"):
        if result["title"]:
            rounds_won = result["title"].split("-")
            key = int(rounds_won[0]) + int(rounds_won[1])
            print("Key %d" % key)
            results[key] = result["title"]

输出,

Title : FURIA
Key 4
Key 5
...
Title : Chaos
Key 1
Key 2

0
投票

尝试一下...

if team_left[0].find_all("div", class_="bold won"):
    print(team_left[0].find_all("img")[0].get("alt"))
else:
    print(team_right[0].find_all("img")[0].get("alt"))
© www.soinside.com 2019 - 2024. All rights reserved.