我正在使用 Node 和 Cheerio JS 练习网络抓取,我需要帮助。
我有这个示例表,我对如何获取每场比赛的主场赔率和客场赔率 (tr) 并将它们存储在一个对象中并将这些对象存储在一个名为 odds 的数组中感到困惑。
<table>
<thead>
<tr>
<th>Match Date</th>
<th>League</th>
<th>Win (Home Team)</th>
<th>Home Odds</th>
<th>Radish (Standard)</th>
<th>Away Odds</th>
<th>Lose (Away Team)</th>
</tr>
</thead>
<tbody>
<tr>
<td>04-24 19:00</td>
<td>Ukraine D1</td>
<td>Dnipro-1</td>
<td>1.26</td>
<td>5.11</td>
<td>9.67</td>
<td>Rukh Lviv</td>
</tr>
<tr>
<td>04-24 19:00</td>
<td>Ukraine D1</td>
<td>Dnipro-1</td>
<td>1.87</td>
<td>2.75</td>
<td>1.87</td>
<td>Rukh Lviv</td>
</tr>
<tr>
<td>04-24 20:00</td>
<td>Ukraine D1</td>
<td>FC Oleksandriya</td>
<td>1.58</td>
<td>3.59</td>
<td>5.51</td>
<td>FC Inhulets Petrove</td>
</tr>
<tr>
<td>04-24 20:00</td>
<td>Ukraine D1</td>
<td>FC Oleksandriya</td>
<td>1.82</td>
<td>2.25</td>
<td>1.92</td>
<td>FC Inhulets Petrove</td>
</tr>
</tbody>
<tbody>
<tr>
<td>04-24 21:00</td>
<td>PBA</td>
<td>Meralco Bolts</td>
<td>1.26</td>
<td>5.11</td>
<td>9.67</td>
<td>Barangay Ginebra</td>
</tr>
<tr>
<td>04-24 21:00</td>
<td>PBA</td>
<td>Meralco Bolts</td>
<td>1.87</td>
<td>2.75</td>
<td>1.87</td>
<td>Barangay Ginebra</td>
</tr>
<tr>
<td>04-24 22:00</td>
<td>PBA</td>
<td>San Miguel Beermen</td>
<td>1.58</td>
<td>3.59</td>
<td>5.51</td>
<td>Blackwater</td>
</tr>
<tr>
<td>04-24 22:00</td>
<td>PBA</td>
<td>San Miguel Beermen</td>
<td>1.82</td>
<td>2.25</td>
<td>1.92</td>
<td>Blackwater</td>
</tr>
</tbody>
</table>
这是我所做的和输出
const data = [];
const $table = $("table > tbody");
let odds = [];
$table.find("tr").each((i, row) => {
const $trs = $(row);
const $date = $trs.find("td:first").text().trim();
const $league_name = $trs.find("td:nth-child(2)").text().trim();
const $home_team = $trs.find("td:nth-child(3)").text().trim();
const $away_team = $trs.find("td:last").text().trim();
// Odds
const $home_odds = $trs.find("td:nth-child(4)").text().trim();
const $away_odds = $trs.find("td:nth-child(6)").text().trim();
let game_odds = {};
game_odds = {
home: $home_odds,
away: $away_odds,
};
odds.push(game_odds);
data.push({
date: $date,
league_name: $league_name,
home_team: $home_team,
away_team: $away_team,
odds,
});
});
我使用数据数组中的数据循环遍历它们以避免像这样重复
let finalData = [];
let previousLeague = null;
let previousHomeTeam = null;
let previousAwayTeam = null;
for (let i = 0; i < data.length; i++) {
if (data[i].league_name === previousLeague) {
if (
data[i].home_team === previousHomeTeam &&
data[i].away_team === previousAwayTeam
) {
// The odds of that match is pushed to the finalData array
} else {
previousHomeTeam = data[i].home_team;
previousAwayTeam = data[i].away_team;
finalData.push({
date: data[i].date,
league_name: data[i].league_name,
home_team: data[i].home_team,
away_team: data[i].away_team,
});
}
} else {
previousLeague = data[i].league_name;
}
}
输出看起来像这样
[
{
date: '04-24 19:00',
league_name: 'Ukraine D1',
home_team: 'Dnipro-1',
away_team: 'Rukh Lviv'
odds: [
// This match's odds should be here
{
home: 123,
away: 123,
},
{
home: 123,
away: 123,
},
// There should be two since the in the table there were two matches on both of this teams
]
},
{
date: '04-24 20:00',
league_name: 'Ukraine D1',
home_team: 'FC Oleksandriya',
away_team: 'FC Inhulets Petrove'
},
{
date: '04-24 21:00',
league_name: 'PBA',
home_team: 'Meralco Bolts',
away_team: 'Barangay Ginebra'
},
{
date: '04-24 22:00',
league_name: 'PBA',
home_team: 'San Miguel Beermen',
away_team: 'Blackwater'
}
]
感谢您的帮助和建议!
如果我没理解错的话,你想对具有共同联赛名称和主/客队的跑步进行分组:
const cheerio = require("cheerio"); // 1.0.0-rc.12
require("util").inspect.defaultOptions.depth = null;
const html = `<Your table HTML>`;
const $ = cheerio.load(html);
const headers = [
"date",
"league_name",
"home_team",
"home",
"radish",
"away",
"away_team",
];
const data = [...$("tr")]
.slice(1)
.map(e =>
Object.fromEntries(
[...$(e).find("td")].map((e, i) => [
headers[i],
+$(e).text() || $(e).text().trim(),
])
)
);
const groupOn = ["league_name", "home_team", "away_team"];
const result = [];
for (const match of data) {
if (
result.length === 0 ||
groupOn.some(k => match[k] !== result.at(-1)[k])
) {
result.push({...match, odds: []});
}
result.at(-1).odds.push({home: match.home, away: match.away});
}
result.forEach(o => {
["home", "away", "radish"].forEach(k => delete o[k]);
});
console.log(result);
输出:
[
{
date: '04-24 19:00',
league_name: 'Ukraine D1',
home_team: 'Dnipro-1',
away_team: 'Rukh Lviv',
odds: [ { home: 1.26, away: 9.67 }, { home: 1.87, away: 1.87 } ]
},
{
date: '04-24 20:00',
league_name: 'Ukraine D1',
home_team: 'FC Oleksandriya',
away_team: 'FC Inhulets Petrove',
odds: [ { home: 1.58, away: 5.51 }, { home: 1.82, away: 1.92 } ]
},
{
date: '04-24 21:00',
league_name: 'PBA',
home_team: 'Meralco Bolts',
away_team: 'Barangay Ginebra',
odds: [ { home: 1.26, away: 9.67 }, { home: 1.87, away: 1.87 } ]
},
{
date: '04-24 22:00',
league_name: 'PBA',
home_team: 'San Miguel Beermen',
away_team: 'Blackwater',
odds: [ { home: 1.58, away: 5.51 }, { home: 1.82, away: 1.92 } ]
}
]