我正在尝试下载雅虎财经历史报价。 在一些请求之后,雅虎开始回答不带引号,但带有错误字符串。
我读过 Yahoo 请求 cookie 和 crumb 机制,但我不明白。
这是我的 C# 代码。
如何添加 cookie 和 crumb 请求?
async Task DownloadQuoteFromYahoo(string symbol)
{
// Set symbol and date range for historical data
DateTime startDate = dtpUpdateFromDate.Value;
DateTime endDate = dtpUpdateToDate.Value;
string interval = "1d"; //intervals: 1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo
// Set URL for API request
string url = string.Format("https://query1.finance.yahoo.com/v7/finance/download/{0}?period1={1}&period2={2}&interval={3}&events=history",
symbol, (int)(startDate.Subtract(new DateTime(1970, 1, 1))).TotalSeconds, (int)(endDate.Subtract(new DateTime(1970, 1, 1))).TotalSeconds, interval);
// Create HttpClient with custom user-agent
HttpClient httpClient = new HttpClient();
httpClient.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0");
// Read response data
var responseData = await GetWebContentAsync(url);
... }
这是我的面包屑和 cookie 检测功能(下面代码中的
RevalidateCrumbAsync
和 RevalidateCookiesAsync
);
我从 htps://finance.yahoo.com/quote/{symbol}
得到了给定的面包屑
或默认 symbol
和来自 https://login.yahoo.com/
的 cookie。
主要问题(正如我在消息中所说)是它没有经过战斗测试,
因为我还没有遇到没有获得完整 csv 数据的情况,
即使不使用面包屑或饼干。当我真正对抗无响应的服务器时,可能需要一些调整/改进。
using System;
using System.Threading.Tasks;
using System.Net.Http;
var requests = new QuoteRequest[]
{
new QuoteRequest(
"GOOG",
new DateTime(2004, 1, 1),
new DateTime(2019, 12, 31),
"1d"),
new QuoteRequest(
"MSFT",
new DateTime(2004, 1, 1),
new DateTime(2019, 12, 31),
"1d"),
new QuoteRequest(
"INTC",
new DateTime(2004, 1, 1),
new DateTime(2019, 12, 31),
"1d"),
new QuoteRequest(
"AMZN",
new DateTime(2004, 12, 1),
new DateTime(2019, 12, 31),
"1d"),
new QuoteRequest(
"IBM",
new DateTime(2004, 12, 1),
new DateTime(2019, 12, 31),
"1d")
};
var cookie = "";
var crumb = await RevalidateCrumbAsync();
cookie = await RevalidateCookiesAsync();
//Console.WriteLine($"crumb = '{crumb}'");
//Console.WriteLine($"cookie = '{cookie}'");
foreach(var req in requests)
{
Console.WriteLine(req);
var csv = await DownloadQuoteFromYahooAsync(req.symbol, req.from, req.to, req.interval, crumb);
Console.WriteLine($"{csv.Length} bytes");
Console.WriteLine();
}
return;
HttpClient newConfiguredHttpClient(bool useCookie = false)
{
var httpClient = new HttpClient();
httpClient.DefaultRequestHeaders.Add("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0");
if (useCookie && cookie.Length > 0)
{
httpClient.DefaultRequestHeaders.Add("Cookie", cookie);
}
httpClient.Timeout = TimeSpan.FromSeconds(15);
return httpClient;
}
async Task<string> RevalidateCrumbAsync(string symbol = "GOOG")
{
var url = $"https://finance.yahoo.com/quote/{symbol}";
HttpClient httpClient = newConfiguredHttpClient();
var response = await httpClient.GetAsync(url);
var html = await response.Content.ReadAsStringAsync();
var pos = html.IndexOf("\"crumb\":\"");
if (pos <= 0) return "";
var pos1 = html.IndexOf(":\"", pos) + 2;
var pos2 = html.IndexOf('"', pos1);
if (pos2 <= 0) return "";
return html.Substring(pos1, pos2 - pos1);
}
async Task<string> RevalidateCookiesAsync()
{
var url = "https://login.yahoo.com/";
HttpClient httpClient = newConfiguredHttpClient();
var response = await httpClient.GetAsync(url);
foreach (var header in response.Headers)
{
if (header.Key != "Set-Cookie") continue;
var cookies = "";
foreach (var cookie in header.Value)
{
if(cookies.Length > 0)
{
cookies += "; ";
}
var pos = cookie.IndexOf(";");
if(pos > 0)
{
cookies += cookie.Substring(0, pos);
}
else
{
cookies += cookie;
}
}
return cookies;
}
return "";
}
async Task<string> DownloadQuoteFromYahooAsync(string symbol, DateTime startDate, DateTime endDate, string interval, string crumb = "")
{
// Set URL for API request
var period1 = (int)(startDate.Subtract(new DateTime(1970, 1, 1)).TotalSeconds);
var period2 = (int)(endDate.Subtract(new DateTime(1970, 1, 1)).TotalSeconds);
var url = $"https://query1.finance.yahoo.com/v7/finance/download/{symbol}?period1={period1}&period2={period2}&interval={interval}&events=history";
if (crumb.Length > 0)
{
url += "&crumb=" + crumb;
}
Console.WriteLine(url);
HttpClient httpClient = newConfiguredHttpClient(true);
var response = await httpClient.GetAsync(url).ConfigureAwait(false);
Console.WriteLine($"status = {response.StatusCode}");
//Console.WriteLine($"headers = {response.Headers}");
return await response.Content.ReadAsStringAsync().ConfigureAwait(false);
}
internal record QuoteRequest(string symbol, DateTime from, DateTime to, string interval);