使用 Cheerio JS 在对象中存储特定值的数据

问题描述 投票:0回答:1

我正在使用 Node 和 Cheerio JS 练习网络抓取,我需要帮助。

我有这个示例表,我对如何获取每场比赛的主场赔率和客场赔率 (tr) 并将它们存储在一个对象中并将这些对象存储在一个名为 odds 的数组中感到困惑。

      <table>
        <thead>
          <tr>
            <th>Match Date</th>
            <th>League</th>
            <th>Win (Home Team)</th>
            <th>Home Odds</th>
            <th>Radish (Standard)</th>
            <th>Away Odds</th>
            <th>Lose (Away Team)</th>
          </tr>
        </thead>
        <tbody>
          <tr>
            <td>04-24 19:00</td>
            <td>Ukraine D1</td>
            <td>Dnipro-1</td>
            <td>1.26</td>
            <td>5.11</td>
            <td>9.67</td>
            <td>Rukh Lviv</td>
          </tr>
          <tr>
            <td>04-24 19:00</td>
            <td>Ukraine D1</td>
            <td>Dnipro-1</td>
            <td>1.87</td>
            <td>2.75</td>
            <td>1.87</td>
            <td>Rukh Lviv</td>
          </tr>
          <tr>
            <td>04-24 20:00</td>
            <td>Ukraine D1</td>
            <td>FC Oleksandriya</td>
            <td>1.58</td>
            <td>3.59</td>
            <td>5.51</td>
            <td>FC Inhulets Petrove</td>
          </tr>
          <tr>
            <td>04-24 20:00</td>
            <td>Ukraine D1</td>
            <td>FC Oleksandriya</td>
            <td>1.82</td>
            <td>2.25</td>
            <td>1.92</td>
            <td>FC Inhulets Petrove</td>
          </tr>
        </tbody>
        <tbody>
          <tr>
            <td>04-24 21:00</td>
            <td>PBA</td>
            <td>Meralco Bolts</td>
            <td>1.26</td>
            <td>5.11</td>
            <td>9.67</td>
            <td>Barangay Ginebra</td>
          </tr>
          <tr>
            <td>04-24 21:00</td>
            <td>PBA</td>
            <td>Meralco Bolts</td>
            <td>1.87</td>
            <td>2.75</td>
            <td>1.87</td>
            <td>Barangay Ginebra</td>
          </tr>
          <tr>
            <td>04-24 22:00</td>
            <td>PBA</td>
            <td>San Miguel Beermen</td>
            <td>1.58</td>
            <td>3.59</td>
            <td>5.51</td>
            <td>Blackwater</td>
          </tr>
          <tr>
            <td>04-24 22:00</td>
            <td>PBA</td>
            <td>San Miguel Beermen</td>
            <td>1.82</td>
            <td>2.25</td>
            <td>1.92</td>
            <td>Blackwater</td>
          </tr>
        </tbody>
      </table>

这是我所做的和输出

const data = [];
const $table = $("table > tbody");
let odds = [];

$table.find("tr").each((i, row) => {
  const $trs = $(row);

  const $date = $trs.find("td:first").text().trim();
  const $league_name = $trs.find("td:nth-child(2)").text().trim();
  const $home_team = $trs.find("td:nth-child(3)").text().trim();
  const $away_team = $trs.find("td:last").text().trim();

  // Odds
  const $home_odds = $trs.find("td:nth-child(4)").text().trim();
  const $away_odds = $trs.find("td:nth-child(6)").text().trim();

  let game_odds = {};

  game_odds = {
    home: $home_odds,
    away: $away_odds,
  };

  odds.push(game_odds);

  data.push({
    date: $date,
    league_name: $league_name,
    home_team: $home_team,
    away_team: $away_team,
    odds,
  });
});

我使用数据数组中的数据循环遍历它们以避免像这样重复

let finalData = [];
let previousLeague = null;
let previousHomeTeam = null;
let previousAwayTeam = null;

for (let i = 0; i < data.length; i++) {
  if (data[i].league_name === previousLeague) {
    if (
      data[i].home_team === previousHomeTeam &&
      data[i].away_team === previousAwayTeam
    ) {
      // The odds of that match is pushed to the finalData array
    } else {
      previousHomeTeam = data[i].home_team;
      previousAwayTeam = data[i].away_team;

      finalData.push({
        date: data[i].date,
        league_name: data[i].league_name,
        home_team: data[i].home_team,
        away_team: data[i].away_team,
      });
    }
  } else {
    previousLeague = data[i].league_name;
  }
}

输出看起来像这样

[
  {
    date: '04-24 19:00',
    league_name: 'Ukraine D1',
    home_team: 'Dnipro-1',
    away_team: 'Rukh Lviv'
    odds: [
        // This match's odds should be here
        {
            home: 123,
            away: 123,
        },
        {
            home: 123,
            away: 123,
        },
        // There should be two since the in the table there were two matches on both of this teams
    ]
  },
  {
    date: '04-24 20:00',
    league_name: 'Ukraine D1',
    home_team: 'FC Oleksandriya',
    away_team: 'FC Inhulets Petrove'
  },
  {
    date: '04-24 21:00',
    league_name: 'PBA',
    home_team: 'Meralco Bolts',
    away_team: 'Barangay Ginebra'
  },
  {
    date: '04-24 22:00',
    league_name: 'PBA',
    home_team: 'San Miguel Beermen',
    away_team: 'Blackwater'
  }
]

感谢您的帮助和建议!

javascript json loops cheerio
1个回答
0
投票

如果我没理解错的话,你想对具有共同联赛名称和主/客队的跑步进行分组:

const cheerio = require("cheerio"); // 1.0.0-rc.12
require("util").inspect.defaultOptions.depth = null;

const html = `<Your table HTML>`;

const $ = cheerio.load(html);
const headers = [
  "date",
  "league_name",
  "home_team",
  "home",
  "radish",
  "away",
  "away_team",
];
const data = [...$("tr")]
  .slice(1)
  .map(e =>
    Object.fromEntries(
      [...$(e).find("td")].map((e, i) => [
        headers[i],
        +$(e).text() || $(e).text().trim(),
      ])
    )
  );

const groupOn = ["league_name", "home_team", "away_team"];
const result = [];

for (const match of data) {
  if (
    result.length === 0 ||
    groupOn.some(k => match[k] !== result.at(-1)[k])
  ) {
    result.push({...match, odds: []});
  }

  result.at(-1).odds.push({home: match.home, away: match.away});
}

result.forEach(o => {
  ["home", "away", "radish"].forEach(k => delete o[k]);
});
console.log(result);

输出:

[
  {
    date: '04-24 19:00',
    league_name: 'Ukraine D1',
    home_team: 'Dnipro-1',
    away_team: 'Rukh Lviv',
    odds: [ { home: 1.26, away: 9.67 }, { home: 1.87, away: 1.87 } ]
  },
  {
    date: '04-24 20:00',
    league_name: 'Ukraine D1',
    home_team: 'FC Oleksandriya',
    away_team: 'FC Inhulets Petrove',
    odds: [ { home: 1.58, away: 5.51 }, { home: 1.82, away: 1.92 } ]
  },
  {
    date: '04-24 21:00',
    league_name: 'PBA',
    home_team: 'Meralco Bolts',
    away_team: 'Barangay Ginebra',
    odds: [ { home: 1.26, away: 9.67 }, { home: 1.87, away: 1.87 } ]
  },
  {
    date: '04-24 22:00',
    league_name: 'PBA',
    home_team: 'San Miguel Beermen',
    away_team: 'Blackwater',
    odds: [ { home: 1.58, away: 5.51 }, { home: 1.82, away: 1.92 } ]
  }
]
© www.soinside.com 2019 - 2024. All rights reserved.