各位晚上好!
我正在尝试从雅虎财经 (https://finance.yahoo.com/quote/AMZN/financials) 获取资产负债表、损益表和现金流量。
这是我已经拥有的代码:
# Loads the rvest, stringr, and dplyr libraries
library(rvest)
library(stringr)
library(dplyr)
# Defines the Unicorn Auctions URL (past auctions)
url <- paste0("https://finance.yahoo.com/quote/AMZN/financials")
# Scrape the auction data
BS <-
# Read HTML content from the specified URL
read_html(url) %>%
# Extract the script element containing auction data using XPath 'auction_data ='
html_nodes("script") %>%
html_text() %>%
.[48]
start = gregexpr("context",BS)[[1]][1]-2
end = nchar(BS)-12
BS <- substr(BS,start,end)
BS <- jsonlite::fromJSON(BS)
BS$context$dispatcher$stores
我关注了 Youtube 视频,但我看到 2023 年之后雅虎网站发生了变化,此代码不再有效。
在视频中,此代码将获取所有资产负债表和现金流量数据。
有人可以帮助我吗?我知道有一些类似的问题,但 2023 年之后网站发生了变化,他们的答案不再有效。
我已经能够使用以下代码提取表格:
library(RDCOMClient)
url <- "https://finance.yahoo.com/quote/AMZN/financials"
IEApp <- COMCreate("InternetExplorer.Application")
IEApp[['Visible']] <- TRUE
IEApp$Navigate(url)
Sys.sleep(5)
doc <- IEApp$document()
doc$body()$innerHTML()
web_Obj_Table <- doc$getElementByID("Col1-1-Financials-Proxy")
text_Table <- web_Obj_Table$innerText()
text_Table <- unlist(strsplit(text_Table, "\r\n"))
text_Table <- text_Table[text_Table != ""]
text_Table
[1] "Show:" "Income Statement"
[3] "Balance Sheet" "Cash Flow"
[5] "Annual" "Quarterly"
[7] "Income Statement" "Expand All"
[9] "All numbers in thousands" "Breakdown"
[11] "ttm" "12/31/2023"
[13] "12/31/2022" "12/31/2021"
[15] "12/31/2020" "Total Revenue"
[17] "574,785,000" "574,785,000"
[19] "513,983,000" "469,822,000"
[21] "386,064,000" "Cost of Revenue"
[23] "480,980,000" "480,980,000"
[25] "446,343,000" "403,507,000"
[27] "334,564,000" "Gross Profit"
[29] "93,805,000" "93,805,000"
[31] "67,640,000" "66,315,000"
[33] "51,500,000" "Operating Expense"
[35] "56,953,000" "56,953,000"
[37] "55,392,000" "41,436,000"
[39] "28,601,000" "Operating Income"
[41] "36,852,000" "36,852,000"
[43] "12,248,000" "24,879,000"
[45] "22,899,000" "Net Non Operating Interest Income Expense"
[47] "-233,000" "-233,000"
[49] "-1,378,000" "-1,361,000"
[51] "-1,092,000" "Other Income Expense"
[53] "938,000" "938,000"
[55] "-16,806,000" "14,633,000"
[57] "2,371,000" "Pretax Income"
[59] "37,557,000" "37,557,000"
[61] "-5,936,000" "38,151,000"
[63] "24,178,000" "Tax Provision"
[65] "7,120,000" "7,120,000"
[67] "-3,217,000" "4,791,000"
[69] "2,863,000" "Earnings from Equity Interest Net of Tax"
[71] "-12,000" "-12,000"
[73] "-3,000" "4,000"
[75] "16,000" "Net Income Common Stockholders"
[77] "30,425,000" "30,425,000"
[79] "-2,722,000" "33,364,000"
[81] "21,331,000" "Diluted NI Available to Com Stockholders"
[83] "30,425,000" "30,425,000"
[85] "-2,722,000" "33,364,000"
[87] "21,331,000" "Basic EPS"
[89] "1.95" "2.95"
[91] "-0.27" "3.30"
[93] "2.13" "Diluted EPS"
[95] "1.91" "2.90"
[97] "-0.27" "3.24"
[99] "2.09" "Basic Average Shares"
[101] "10,270,000" "10,304,000"
[103] "10,189,000" "10,120,000"
[105] "10,000,000" "Diluted Average Shares"
[107] "10,394,500" "10,492,000"
[109] "10,189,000" "10,300,000"
[111] "10,200,000" "Total Operating Income as Reported"
[113] "36,852,000" "36,852,000"
[115] "12,248,000" "24,879,000"
[117] "22,899,000" "Total Expenses"
[119] "537,933,000" "537,933,000"
[121] "501,735,000" "444,943,000"
[123] "363,165,000" "Net Income from Continuing & Discontinued Operation"
[125] "30,425,000" "30,425,000"
[127] "-2,722,000" "33,364,000"
[129] "21,331,000" "Normalized Income"
[131] "29,521,380" "29,521,380"
[133] "10,128,140" "20,551,997"
[135] "19,189,626" "Interest Income"
[137] "2,949,000" "2,949,000"
[139] "989,000" "448,000"
[141] "555,000" "Interest Expense"
[143] "3,182,000" "3,182,000"
[145] "2,367,000" "1,809,000"
[147] "1,647,000" "Net Interest Income"
[149] "-233,000" "-233,000"
[151] "-1,378,000" "-1,361,000"
[153] "-1,092,000" "EBIT"
[155] "40,739,000" "40,739,000"
[157] "-3,569,000" "39,960,000"
[159] "25,825,000" "EBITDA"
[161] "89,402,000" "89,402,000"
[163] "38,352,000" "74,393,000"
[165] "51,076,000" "Reconciled Cost of Revenue"
[167] "480,980,000" "480,980,000"
[169] "446,343,000" "403,507,000"
[171] "334,564,000" "Reconciled Depreciation"
[173] "48,663,000" "48,663,000"
[175] "41,921,000" "34,433,000"
[177] "25,251,000" "Net Income from Continuing Operation Net Minority Interest"
[179] "30,425,000" "30,425,000"
[181] "-2,722,000" "33,364,000"
[183] "21,331,000" "Total Unusual Items Excluding Goodwill"
[185] "1,115,000" "1,115,000"
[187] "-16,266,000" "14,652,000"
[189] "2,429,000" "Total Unusual Items"
[191] "1,115,000" "1,115,000"
[193] "-16,266,000" "14,652,000"
[195] "2,429,000" "Normalized EBITDA"
[197] "88,287,000" "88,287,000"
[199] "54,618,000" "59,741,000"
[201] "48,647,000" "Tax Rate for Calcs"
[203] "0" "0"
[205] "0" "0"
[207] "0" "Tax Effect of Unusual Items"
[209] "211,380" "211,380"
[211] "-3,415,860" "1,839,997"
[213] "287,626"