如何捕获网络(XHR)? (硒4)

问题描述 投票:0回答:1

如何捕获网络(XHR)?您可以在 Chrome 中跟踪所需的包 (XHR)(打开 DevTools (F12) → 打开选项卡“网络”→ 选择过滤器“Fetch/XHR”)。

我找到了使用 BrowserMob Proxy 捕获流量的解决方案。但我想使用 Selenium 4,它可以与 CDP(Chrome DevTools Protocol)一起使用。

我可以成功接收 XHR 包(状态 200)。但我无法获取某些 XHR 包的主体 (getResponseBody)。

namespace SeleniumCDP
{
    using System;
    using System.Collections.Concurrent;
    using System.Linq;
    using System.Threading.Tasks;
    using OpenQA.Selenium;
    using OpenQA.Selenium.Chrome;
    using OpenQA.Selenium.DevTools;
    using DevToolsVer = OpenQA.Selenium.DevTools.V93;
 
 
    class Program
    {
        private static IWebDriver Driver = null;
        private static IDevTools Tools = null;
        private static IDevToolsSession Session = null;
        private static DevToolsVer.DevToolsSessionDomains Domains = null;
 
        private static ConcurrentBag<Task<Response>> CollectionXHR = null;
 
        public struct Response
        {
            public string RequestId { get; set; }
            public string ResponseUrl { get; set; }
            public long ResponseStatus { get; set; }
            public bool ResponseBodySuccess { get; set; }
            public string ResponseBody { get; set; }
        }
 
        private static async Task Main()
        {
            Driver = new ChromeDriver();
            Driver.Manage().Timeouts().ImplicitWait = new TimeSpan(0, 0, 10);
            Driver.Manage().Timeouts().AsynchronousJavaScript = new TimeSpan(0, 0, 30);
            Driver.Manage().Timeouts().PageLoad = new TimeSpan(0, 0, 30);
            Driver.Manage().Window.Maximize();
 
            Tools = Driver as IDevTools;
 
            Session = Tools.GetDevToolsSession();
 
            Domains = Session.GetVersionSpecificDomains<DevToolsVer.DevToolsSessionDomains>();
            await Domains.Network.Enable(new DevToolsVer.Network.EnableCommandSettings());
 
            // Create storage
            CollectionXHR = new ConcurrentBag<Task<Response>>();
 
            // Enable receiving packs
            Domains.Network.ResponseReceived += ResponseReceived;
 
            // Some actions
            Instagram("username", "password"); // !!!
 
            // Disable receiving packs
            Domains.Network.ResponseReceived -= ResponseReceived;
 
            // Waiting for completion
            Task.WaitAll(CollectionXHR.ToArray());
 
            // Number of failures
            var failResponseBody = CollectionXHR.Where(w => w.Result.ResponseBodySuccess == false).Count();
 
            // Info
            string log = string.Empty;
            foreach (var i in CollectionXHR.Where(w => w.Result.ResponseBodySuccess == false).ToList())
            {
                log += $"RequestId = {i.Result.RequestId} | "; ;
                log += $"ResponseStatus = {i.Result.ResponseStatus} | ";
                log += $"ResponseBodySuccess = {i.Result.ResponseBodySuccess} | ";
                log += $"ResponseBody = {i.Result.ResponseBody} \n";
            }
            
            
        }
 
        private static void ResponseReceived(object sender, DevToolsVer.Network.ResponseReceivedEventArgs e)
        {
            if (e.Type == DevToolsVer.Network.ResourceType.XHR)
            {
                CollectionXHR.Add(GetResponseBodyAsync(e));
            }
        }
 
        private static async Task<Response> GetResponseBodyAsync(DevToolsVer.Network.ResponseReceivedEventArgs e)
        {
            try
            {
                var cmd = new DevToolsVer.Network.GetResponseBodyCommandSettings();
                cmd.RequestId = e.RequestId;
 
                var data = await Domains.Network.GetResponseBody(cmd);
 
                return new Response()
                {
                    RequestId = e.RequestId,
                    ResponseUrl = e.Response.Url,
                    ResponseStatus = e.Response.Status,
                    ResponseBodySuccess = true,
                    ResponseBody = data.Body
                };
            }
            catch (Exception ex)
            {
                return new Response()
                {
                    RequestId = e.RequestId,
                    ResponseUrl = e.Response.Url,
                    ResponseStatus = e.Response.Status,
                    ResponseBodySuccess = false,
                    ResponseBody = $"{ex.GetType()}: {ex.Message}"
                };
            }
        }
 
        private static void Instagram(string username, string password)
        {
            // Go to Instagram
            Driver.Navigate().GoToUrl("https://www.instagram.com/");
 
            // Login
            {
                var byUsernameInput = By.XPath("//form[@id='loginForm']//input[@name='username']");
                var byPasswordInput = By.XPath("//form[@id='loginForm']//input[@name='password']");
                var byLoginButton = By.XPath("//form[@id='loginForm']//button[@type='submit']");
 
                if (Driver.FindElements(byLoginButton).Count > 0)
                {
                    Driver.FindElement(byUsernameInput).SendKeys(username);
                    Driver.FindElement(byPasswordInput).SendKeys(password);
                    Driver.FindElement(byLoginButton).Click();
                }
            }
 
            // Go to direct
            {
                var byLink = By.XPath("//a[@href='/direct/inbox/']");
 
                if (Driver.FindElements(byLink).Count > 0)
                {
                    Driver.FindElement(byLink).Click();
                }
            }
        }
    }
}

如果您多次运行我的代码,您会发现failResponseBody大于0。异常包含以下消息之一:

  • Network.getResponseBody:未找到具有给定标识符的资源
  • Network.getResponseBody:未找到具有给定标识符的资源的数据

我做错了什么?

selenium selenium-webdriver
1个回答
0
投票

试试这个代码,它对我有用。

    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    chrome_options = Options()
    chrome_options.set_capability('goog:loggingPrefs', {'performance': 'ALL'})
    driver=webdriver.Chrome(options=chrome_options)
    perfs = driver.get_log('performance')
    for log_data in perfs:
        message_ = log_data['message']
        try:
            log_json = json.loads(message_)
            log = log_json['message']
        except:pass
        if log['method'] != 'Network.responseReceived':continue
        if log['params']['type']!='Fetch' or log['params']['type']!='XHR':continue
        response = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': log['params']['requestId']})
        
© www.soinside.com 2019 - 2024. All rights reserved.