我一直在使用 C# 开发浏览器,我需要从 URL 下载所有文件。问题是并非所有文件都已下载。
我怀疑我的功能实现可能存在问题。有人可以帮助我确定问题所在并提出解决方案吗?
这是我下载文件的代码片段:
public class ResourceManager
{
private readonly string _dataPath;
public ResourceManager(string dataPath)
{
this._dataPath = dataPath;
}
private Dictionary<string, Dictionary<string, string>> _data = new();
private static bool DownloadResource(string url, string path)
{
using var client = new HttpClient();
using var request = new HttpRequestMessage(HttpMethod.Get, url);
using var response = client.Send(request);
if (response.IsSuccessStatusCode)
{
using var fs = new FileStream(path, FileMode.OpenOrCreate);
response.Content.CopyToAsync(fs);
return true;
}
else
{
//Console.Error.WriteLine($"Bad Url: {url}");
return false;
}
}
public bool GetResource(string url, out string fileName)
{
var myUri = new Uri(url);
var host = myUri.Host;
var path = myUri.AbsolutePath;
if (!_data.ContainsKey(host))
{
_data.Add(host, new Dictionary<string, string>());
}
_data[host].TryGetValue(path, out fileName);
if (!string.IsNullOrEmpty(fileName) && File.Exists(fileName)) return true;
fileName = Path.Combine(_dataPath, $"{host}__{Util.ComputeHash(path)}");
if (DownloadResource(url, fileName))
{
_data[host].Add(path, fileName);
return true;
}
else
{
fileName = null;
return false;
}
}
}
Httpclient无法从URL获取所有资源。我建议您尝试“httrack”https://www.httrack.com/page/2/#google_vignette,它将网站资源抓取到本地文件夹。抓取后即可离线浏览网站。