任何人都可以从 skytrax 网站上查看下面的代码,而且我不确定我是如何根据评级值将星级评级转换为数字表示的。任何提示将不胜感激。提前致谢
library(rvest)
library(dplyr)
link = "https://www.airlinequality.com/airline-reviews/emirates/page/1/sortby=post_date%3ADesc&pagesize=100"
page = read_html(link)
pax_name= page %>% html_nodes(".grippy-host , .userStatusWrapper") %>% html_text()
date = page %>% html_nodes("time , .grippy-host") %>% html_text()
pax_rating = page %>% html_nodes(".grippy-host , .rating-10") %>% html_text()
traveller_type = page %>% html_nodes(".type_of_traveller+ .review-value") %>% html_text()
trip_verified = page %>% html_nodes("em , .grippy-host") %>% html_text()
crew_service = page %>% html_nodes(".grippy-host , .cabin_staff_service+ .stars") %>% html_text()
pax_recommended = page %>% html_nodes("t.recommended+ .review-value") %>% html_text()
food_drinks = page %>% html_nodes(".food_and_beverages+ .stars") %>% html_text()
ground_service = page %>% html_nodes(".ground_service+ .stars") %>% html_text()
travel_class= page %>% html_nodes(".cabin_flown+ .review-value") %>% html_text()
emirates_reviews = data.frame(pax_name, date, pax_rating, traveller_type, trip_verified,
crew_service, pax_recommended, food_drinks, ground_service, travel_class, stringsAsFactors = FALSE)
在此页面上没有用于评级的 html 元素。相反,每个单独的开始都有 html 元素,星的类是“星”或“星填充”。
所以看起来找到评级的唯一方法是计算评级表中每一行的填充星数。下面是如何为第一个检查表执行此操作的示例。您仍然需要想办法将此信息附加到评论的其余部分,但这里有一种方法可以只获取每条评论的评分:
# define target url
url <- "https://www.airlinequality.com/airline-reviews/emirates/?sortby=post_date%3ADesc&pagesize=100"
page_html <- url %>% read_html()
# Find all the reviews
reviews <- page_html %>% html_nodes(".review-ratings")
# find each invidual star's class
stars <- reviews[1] %>% html_nodes(".review-rating-stars.stars") %>% html_children() %>% html_attr("class")
# create a dataframe where each row is a review category and each column is a star's class
stars_df <- matrix(stars, nrow = length(stars)/5) %>% as.data.frame()
# count the number of filled in stars and make it a new column
stars_df$rating <- apply(stars_df, 1, function(x) length(which(x=="star fill")))
# pull the review ratings table for this review
review_table <- reviews[1] %>% html_table() %>% .[[1]]
# count the number of filled in stars and make it a new column
review_table$rating <- apply(stars_df, 1, function(x) length(which(x=="star fill")))
这是结果
> review_table
# A tibble: 5 × 3
X1 X2 rating
<chr> <int> <int>
1 Food & Beverages 12345 5
2 Inflight Entertainment 12345 5
3 Seat Comfort 12345 5
4 Staff Service 12345 1
5 Value for Money 12345 0