如何捕获网络(XHR)?您可以在 Chrome 中跟踪所需的包 (XHR)(打开 DevTools (F12) → 打开选项卡“网络”→ 选择过滤器“Fetch/XHR”)。
我找到了使用 BrowserMob Proxy 捕获流量的解决方案。但我想使用 Selenium 4,它可以与 CDP(Chrome DevTools Protocol)一起使用。
我可以成功接收 XHR 包(状态 200)。但我无法获取某些 XHR 包的主体 (getResponseBody)。
namespace SeleniumCDP
{
using System;
using System.Collections.Concurrent;
using System.Linq;
using System.Threading.Tasks;
using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
using OpenQA.Selenium.DevTools;
using DevToolsVer = OpenQA.Selenium.DevTools.V93;
class Program
{
private static IWebDriver Driver = null;
private static IDevTools Tools = null;
private static IDevToolsSession Session = null;
private static DevToolsVer.DevToolsSessionDomains Domains = null;
private static ConcurrentBag<Task<Response>> CollectionXHR = null;
public struct Response
{
public string RequestId { get; set; }
public string ResponseUrl { get; set; }
public long ResponseStatus { get; set; }
public bool ResponseBodySuccess { get; set; }
public string ResponseBody { get; set; }
}
private static async Task Main()
{
Driver = new ChromeDriver();
Driver.Manage().Timeouts().ImplicitWait = new TimeSpan(0, 0, 10);
Driver.Manage().Timeouts().AsynchronousJavaScript = new TimeSpan(0, 0, 30);
Driver.Manage().Timeouts().PageLoad = new TimeSpan(0, 0, 30);
Driver.Manage().Window.Maximize();
Tools = Driver as IDevTools;
Session = Tools.GetDevToolsSession();
Domains = Session.GetVersionSpecificDomains<DevToolsVer.DevToolsSessionDomains>();
await Domains.Network.Enable(new DevToolsVer.Network.EnableCommandSettings());
// Create storage
CollectionXHR = new ConcurrentBag<Task<Response>>();
// Enable receiving packs
Domains.Network.ResponseReceived += ResponseReceived;
// Some actions
Instagram("username", "password"); // !!!
// Disable receiving packs
Domains.Network.ResponseReceived -= ResponseReceived;
// Waiting for completion
Task.WaitAll(CollectionXHR.ToArray());
// Number of failures
var failResponseBody = CollectionXHR.Where(w => w.Result.ResponseBodySuccess == false).Count();
// Info
string log = string.Empty;
foreach (var i in CollectionXHR.Where(w => w.Result.ResponseBodySuccess == false).ToList())
{
log += $"RequestId = {i.Result.RequestId} | "; ;
log += $"ResponseStatus = {i.Result.ResponseStatus} | ";
log += $"ResponseBodySuccess = {i.Result.ResponseBodySuccess} | ";
log += $"ResponseBody = {i.Result.ResponseBody} \n";
}
}
private static void ResponseReceived(object sender, DevToolsVer.Network.ResponseReceivedEventArgs e)
{
if (e.Type == DevToolsVer.Network.ResourceType.XHR)
{
CollectionXHR.Add(GetResponseBodyAsync(e));
}
}
private static async Task<Response> GetResponseBodyAsync(DevToolsVer.Network.ResponseReceivedEventArgs e)
{
try
{
var cmd = new DevToolsVer.Network.GetResponseBodyCommandSettings();
cmd.RequestId = e.RequestId;
var data = await Domains.Network.GetResponseBody(cmd);
return new Response()
{
RequestId = e.RequestId,
ResponseUrl = e.Response.Url,
ResponseStatus = e.Response.Status,
ResponseBodySuccess = true,
ResponseBody = data.Body
};
}
catch (Exception ex)
{
return new Response()
{
RequestId = e.RequestId,
ResponseUrl = e.Response.Url,
ResponseStatus = e.Response.Status,
ResponseBodySuccess = false,
ResponseBody = $"{ex.GetType()}: {ex.Message}"
};
}
}
private static void Instagram(string username, string password)
{
// Go to Instagram
Driver.Navigate().GoToUrl("https://www.instagram.com/");
// Login
{
var byUsernameInput = By.XPath("//form[@id='loginForm']//input[@name='username']");
var byPasswordInput = By.XPath("//form[@id='loginForm']//input[@name='password']");
var byLoginButton = By.XPath("//form[@id='loginForm']//button[@type='submit']");
if (Driver.FindElements(byLoginButton).Count > 0)
{
Driver.FindElement(byUsernameInput).SendKeys(username);
Driver.FindElement(byPasswordInput).SendKeys(password);
Driver.FindElement(byLoginButton).Click();
}
}
// Go to direct
{
var byLink = By.XPath("//a[@href='/direct/inbox/']");
if (Driver.FindElements(byLink).Count > 0)
{
Driver.FindElement(byLink).Click();
}
}
}
}
}
如果您多次运行我的代码,您会发现failResponseBody大于0。异常包含以下消息之一:
我做错了什么?
试试这个代码,它对我有用。
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.set_capability('goog:loggingPrefs', {'performance': 'ALL'})
driver=webdriver.Chrome(options=chrome_options)
perfs = driver.get_log('performance')
for log_data in perfs:
message_ = log_data['message']
try:
log_json = json.loads(message_)
log = log_json['message']
except:pass
if log['method'] != 'Network.responseReceived':continue
if log['params']['type']!='Fetch' or log['params']['type']!='XHR':continue
response = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': log['params']['requestId']})