尝试使用 Python 抓取 GraphQL 时不断收到错误 403

问题描述 投票:0回答:1

我正在尝试通过请求抓取一个使用 GraphQL 的网站。为此,我将链接复制为 cURL 并使用该值创建我的有效负载,但我不断收到以下错误。

requests.exceptions.HTTPError: 403 Client Error: Forbidden for url: https://www.vrbo.com/graphql

如果有帮助,这里是 cURL 值:

curl "https://www.vrbo.com/graphql" --compressed -X POST -H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/118.0" -H "Accept: */*" -H "Accept-Language: en-GB,en;q=0.5" -H "Accept-Encoding: gzip, deflate, br" -H "content-type: application/json" -H "client-info: shopping-pwa,unknown,unknown" -H "x-page-id: page.Hotel-Search,H,20" -H "x-enable-apq: true" -H "Origin: https://www.vrbo.com" -H "Connection: keep-alive" -H "Referer: https://www.vrbo.com/search?adults=1&amenities=&children=&d1=&d2=&destination=&latLong=45.31271^%^2C-72.50357&mapBounds=45.25277^%^2C-72.72626&mapBounds=45.37265^%^2C-72.28089&neighborhood=&poi=&pwaDialog=&regionId=&selected=&semdtl=&sort=RECOMMENDED&theme=&userIntent=" -H "Cookie: linfo=v.4,^|0^|0^|255^|1^|0^|^|^|^|^|^|^|^|1033^|0^|0^|^|0^|0^|0^|-1^|-1; CRQS=t^|9001\`s^|9001001\`l^|en_US\`c^|USD; currency=USD; cesc=^%^7B^%^22lpe^%^22^%^3A^%^5B^%^221cf5adc0-4b40-43fe-b99c-6c488aa39093^%^22^%^2C1698076047571^%^5D^%^2C^%^22marketingClick^%^22^%^3A^%^5B^%^22false^%^22^%^2C1698076047571^%^5D^%^2C^%^22lmc^%^22^%^3A^%^5B^%^22DIRECT.WEB^%^22^%^2C1698076047571^%^5D^%^2C^%^22hitNumber^%^22^%^3A^%^5B^%^2217^%^22^%^2C1698076047571^%^5D^%^2C^%^22amc^%^22^%^3A^%^5B^%^22DIRECT.WEB^%^22^%^2C1698076047571^%^5D^%^2C^%^22visitNumber^%^22^%^3A^%^5B^%^2235^%^22^%^2C1698074905441^%^5D^%^2C^%^22ape^%^22^%^3A^%^5B^%^221cf5adc0-4b40-43fe-b99c-6c488aa39093^%^22^%^2C1698076047571^%^5D^%^2C^%^22cidVisit^%^22^%^3A^%^5B^%^22Brand.DTI^%^22^%^2C1698076047571^%^5D^%^2C^%^22entryPage^%^22^%^3A^%^5B^%^22Homepage^%^22^%^2C1698076047571^%^5D^%^2C^%^22cid^%^22^%^3A^%^5B^%^22SEO.U.Dark^%^22^%^2C1697725122983^%^5D^%^7D; hav=07a07f5c-f8e3-54b7-c37c-8e42e59bed32; MC1=GUID=07a07f5cf8e354b7c37c8e42e59bed32; DUAID=07a07f5c-f8e3-54b7-c37c-8e42e59bed32; ha-device-id=07a07f5c-f8e3-54b7-c37c-8e42e59bed32; hav=07a07f5c-f8e3-54b7-c37c-8e42e59bed32; AMCV_C00802BE5330A8350A490D4C^%^40AdobeOrg=1585540135^%^7CMCIDTS^%^7C19654^%^7CMCMID^%^7C29243024125030748754161386097619444504^%^7CMCAID^%^7CNONE^%^7CMCOPTOUT-1698082145s^%^7CNONE^%^7CMCAAMLH-1698679745^%^7C7^%^7CMCAAMB-1698679745^%^7Cj8Odv6LonN4r3an7LhD3WZrU1bUpAkFkkiY1ncBR96t2PTI^%^7CvVersion^%^7C4.4.0; _gcl_au=1.1.1828779423.1692724837.798102264.1695756044.1695756070; _tq_id.TV-8181636390-1.1968=b4c5ffbf637f1046.1692724837.0.1695835932..; xdid=d436fb56-d0f3-4d1d-bac9-c170366ee40a^|1692724852^|vrbo.com; _cls_v=65c13a44-5f9d-46a0-ae29-45221189920a; OIP=ccpa^|0\`ts^|1695835928; EG_SESSIONTOKEN=VvLK-Bnf3Q2Y4LdZeP5ul6QX6E5eX1p7AAeluK6qnqQ:jhDtke_yq7eGLwpF7Q7g1loEpyt1aj5Z-1Cg5TIndfw; tracker_device=c2c39144-dda9-4088-86d2-d9383a85c3c8; _ga=GA1.2.241387532.1692724932; QuantumMetricUserID=d1adf7742b0fe3fad3a4e14d83f7c03e; OptanonConsent=isGpcEnabled=1&datestamp=Mon+Oct+23+2023+11^%^3A29^%^3A05+GMT-0400+(Eastern+Daylight+Saving+Time)&version=202306.2.0&browserGpcFlag=1&isIABGlobal=false&consentId=822217831&hosts=&interactionCount=2&landingPath=NotLandingPage&groups=C0001^%^3A1^%^2CSPD_BG^%^3A0^%^2CC0002^%^3A0^%^2CC0004^%^3A0&geolocation=CA^%^3BQC&AwaitingReconsent=false; OptanonAlertBoxClosed=2023-09-26T19:20:33.793Z; s_fid=19A615C344BCF324-16BD5FC410FA8AFC; aspp=v.1,0^|^|^|^|^|^|^|^|^|^|^|^|^|; ha-trip-prst=^%^7B^%^22arrival^%^22^%^3A^%^22^%^22^%^2C^%^22departure^%^22^%^3A^%^22^%^22^%^2C^%^22numberOfAdults^%^22^%^3A0^%^2C^%^22numberOfChildren^%^22^%^3A0^%^2C^%^22petIncluded^%^22^%^3Afalse^%^7D; tpid=v.1,9001; ensighten:source={""source"":null,""medium"":null,""lastAffiliate"":null,""sessionid"":""9b459e5d-892b-4acd-a9f6-ff7a51bab0c7""}; _uetvid=3551c180411011ee8dd137a02db34195; CRQSS=e^|1; iEAPID=1; HMS=8227bf9c-300e-4d02-aabf-8db090bdc916; has=27a5f01a-185a-ee37-0cc9-a178bfffbeb7; ak_bmsc=38F8C5582112F2A215557FC5294F6211~000000000000000000000000000000~YAAQPQp8aEn5jlyLAQAAbcsmXRXvh6mn5382/JbkLMBTsvKM3oKqYfZVO7Cr0ufDt9fD4CgxrUaiOljV38/RYQLAKW/QJiAL6nTjm7cLCQZtkGKwu9/oPWdbnKE/GrrS7f1jKYLn4p0tExykuWqRd6vgJlYvJ9+5toMorNSuCVitFKeZoAi8hUmgEjsMdjUpCJ04+Bn9gT92VrGUbO93UxvQJfF37Vj3qjOP/PcfWAffEN4gZKntbD8ID3kHA5BbD2mnZ5r8NCSSURW8UfS6SkrmMzy0AbVL2wy7oR+Z0+UFS+mQjU8h+tNBApugQXrZv1tRddFQWVNbtFjRBg2a3xIozDPBu9rW+ajaJ4jr/AzRFpzGQ8huoxnwj+7BvKEgwUQMyZEBFtif; s_ppv=^%^5B^%^5BB^%^5D^%^5D; s_ips=1; s_tp=604; bm_sv=E0AF85F6EF0621DE2AED7395E38A3762~YAAQPQp8aCPekFyLAQAATDk4XRUPvKmkEDYHNPgtINozGUFRcrIjok3QLEPen+TJxbh2jTQIiXQRBIWQhvgCS/K/3hfL883+s9o4B110HTpwGX1pbeh6DyMjkWNyxjv+9mwx3en3dk5dm9GFZcKsH1gYuPKCzHwDTAqKOYWQ2ilZZ852c/gjMEBSzlILRICgmplJoL5KGdax7eaV50LK8gf/fJ3xvKxvE8KiZTEGJgpH5EpJWFd/6IqNrNQSaM8=~1; JSESSIONID=AB584C9C9367BB26104F5286B60175EB; eg_ppid=3f9e106a-bc65-4993-8662-cbf6957f0493; session_id=8227bf9c-300e-4d02-aabf-8db090bdc916; page_name=page.Hotel-Search; AMCVS_C00802BE5330A8350A490D4C^%^40AdobeOrg=1; SheetBannerClose=yes; _dd_s=rum=0&expire=1698081797994" -H "Sec-Fetch-Dest: empty" -H "Sec-Fetch-Mode: cors" -H "Sec-Fetch-Site: same-origin" -H "TE: trailers" --data-raw "{""variables"":{""context"":{""siteId"":9001001,""locale"":""en_US"",""eapid"":1,""currency"":""USD"",""device"":{""type"":""DESKTOP""},""identity"":{""duaid"":""07a07f5c-f8e3-54b7-c37c-8e42e59bed32"",""expUserId"":""822217831"",""tuid"":""1042106065"",""authState"":""IDENTIFIED""},""privacyTrackingState"":""CAN_TRACK"",""debugContext"":{""abacusOverrides"":^[^],""alterMode"":""RELEASED""}},""criteria"":{""primary"":{""dateRange"":null,""destination"":{""regionName"":null,""regionId"":null,""coordinates"":{""latitude"":45.31271,""longitude"":-72.50357},""pinnedPropertyId"":null,""propertyIds"":null,""mapBounds"":^[{""latitude"":45.25277,""longitude"":-72.72626},{""latitude"":45.37265,""longitude"":-72.28089}^]},""rooms"":^[{""adults"":1,""children"":^[^]}^]},""secondary"":{""counts"":^[^],""booleans"":^[^],""selections"":^[{""id"":""sort"",""value"":""RECOMMENDED""},{""id"":""privacyTrackingState"",""value"":""CAN_NOT_TRACK""},{""id"":""useRewards"",""value"":""SHOP_WITHOUT_POINTS""},{""id"":""searchId"",""value"":""61cf7b54-2c37-4459-88e5-67159de8903f""}^],""ranges"":^[^]}},""destination"":{""regionName"":null,""regionId"":null,""coordinates"":null,""pinnedPropertyId"":null,""propertyIds"":null,""mapBounds"":null},""shoppingContext"":{""multiItem"":null},""returnPropertyType"":false,""includeDynamicMap"":true},""operationName"":""LodgingPwaPropertySearch"",""extensions"":{""persistedQuery"":{""sha256Hash"":""7f81c84fcfa2273784af9e801f999c515569f8bdeca7cc7fcbc669fb79192b9f"",""version"":1}}}"

这是我的代码(抱歉,标题和cookie很长)。我正在尝试抓取的网站是 VRBO。

import json
import requests

cookies = {
    'linfo': 'v.4,^|0^|0^|255^|1^|0^|^|^|^|^|^|^|^|1033^|0^|0^|^|0^|0^|0^|-1^|-1',
    'CRQS': 't^|9001`s^|9001001`l^|en_US`c^|USD',
    'currency': 'USD',
    'cesc': '^%^7B^%^22lpe^%^22^%^3A^%^5B^%^221cf5adc0-4b40-43fe-b99c-6c488aa39093^%^22^%^2C1698075034509^%^5D^%^2C^%^22marketingClick^%^22^%^3A^%^5B^%^22false^%^22^%^2C1698075034509^%^5D^%^2C^%^22lmc^%^22^%^3A^%^5B^%^22DIRECT.WEB^%^22^%^2C1698075034509^%^5D^%^2C^%^22hitNumber^%^22^%^3A^%^5B^%^2213^%^22^%^2C1698075034509^%^5D^%^2C^%^22amc^%^22^%^3A^%^5B^%^22DIRECT.WEB^%^22^%^2C1698075034509^%^5D^%^2C^%^22visitNumber^%^22^%^3A^%^5B^%^2235^%^22^%^2C1698074905441^%^5D^%^2C^%^22ape^%^22^%^3A^%^5B^%^221cf5adc0-4b40-43fe-b99c-6c488aa39093^%^22^%^2C1698075034509^%^5D^%^2C^%^22cidVisit^%^22^%^3A^%^5B^%^22Brand.DTI^%^22^%^2C1698075034509^%^5D^%^2C^%^22entryPage^%^22^%^3A^%^5B^%^22Homepage^%^22^%^2C1698075034509^%^5D^%^2C^%^22cid^%^22^%^3A^%^5B^%^22SEO.U.Dark^%^22^%^2C1697725122983^%^5D^%^7D',
    'hav': '07a07f5c-f8e3-54b7-c37c-8e42e59bed32',
    'MC1': 'GUID=07a07f5cf8e354b7c37c8e42e59bed32',
    'DUAID': '07a07f5c-f8e3-54b7-c37c-8e42e59bed32',
    'ha-device-id': '07a07f5c-f8e3-54b7-c37c-8e42e59bed32',
    'hav': '07a07f5c-f8e3-54b7-c37c-8e42e59bed32',
    'AMCV_C00802BE5330A8350A490D4C^%^40AdobeOrg': '1585540135^%^7CMCIDTS^%^7C19654^%^7CMCMID^%^7C29243024125030748754161386097619444504^%^7CMCAID^%^7CNONE^%^7CMCOPTOUT-1698082145s^%^7CNONE^%^7CMCAAMLH-1698679745^%^7C7^%^7CMCAAMB-1698679745^%^7Cj8Odv6LonN4r3an7LhD3WZrU1bUpAkFkkiY1ncBR96t2PTI^%^7CvVersion^%^7C4.4.0',
    '_gcl_au': '1.1.1828779423.1692724837.798102264.1695756044.1695756070',
    '_tq_id.TV-8181636390-1.1968': 'b4c5ffbf637f1046.1692724837.0.1695835932..',
    'xdid': 'd436fb56-d0f3-4d1d-bac9-c170366ee40a^|1692724852^|vrbo.com',
    '_cls_v': '65c13a44-5f9d-46a0-ae29-45221189920a',
    'OIP': 'ccpa^|0`ts^|1695835928',
    'EG_SESSIONTOKEN': 'VvLK-Bnf3Q2Y4LdZeP5ul6QX6E5eX1p7AAeluK6qnqQ:jhDtke_yq7eGLwpF7Q7g1loEpyt1aj5Z-1Cg5TIndfw',
    'tracker_device': 'c2c39144-dda9-4088-86d2-d9383a85c3c8',
    '_ga': 'GA1.2.241387532.1692724932',
    'QuantumMetricUserID': 'd1adf7742b0fe3fad3a4e14d83f7c03e',
    'OptanonConsent': 'isGpcEnabled=1&datestamp=Mon+Oct+23+2023+11^%^3A29^%^3A05+GMT-0400+(Eastern+Daylight+Saving+Time)&version=202306.2.0&browserGpcFlag=1&isIABGlobal=false&consentId=822217831&hosts=&interactionCount=2&landingPath=NotLandingPage&groups=C0001^%^3A1^%^2CSPD_BG^%^3A0^%^2CC0002^%^3A0^%^2CC0004^%^3A0&geolocation=CA^%^3BQC&AwaitingReconsent=false',
    'OptanonAlertBoxClosed': '2023-09-26T19:20:33.793Z',
    's_fid': '19A615C344BCF324-16BD5FC410FA8AFC',
    'aspp': 'v.1,0^|^|^|^|^|^|^|^|^|^|^|^|^|',
    'ha-trip-prst': '^%^7B^%^22arrival^%^22^%^3A^%^22^%^22^%^2C^%^22departure^%^22^%^3A^%^22^%^22^%^2C^%^22numberOfAdults^%^22^%^3A0^%^2C^%^22numberOfChildren^%^22^%^3A0^%^2C^%^22petIncluded^%^22^%^3Afalse^%^7D',
    'tpid': 'v.1,9001',
    'ensighten:source': '{source:null,medium:null,lastAffiliate:null,sessionid:9b459e5d-892b-4acd-a9f6-ff7a51bab0c7}',
    '_uetvid': '3551c180411011ee8dd137a02db34195',
    'CRQSS': 'e^|1',
    'iEAPID': '1',
    'HMS': '8227bf9c-300e-4d02-aabf-8db090bdc916',
    'has': '27a5f01a-185a-ee37-0cc9-a178bfffbeb7',
    'ak_bmsc': '38F8C5582112F2A215557FC5294F6211~000000000000000000000000000000~YAAQPQp8aEn5jlyLAQAAbcsmXRXvh6mn5382/JbkLMBTsvKM3oKqYfZVO7Cr0ufDt9fD4CgxrUaiOljV38/RYQLAKW/QJiAL6nTjm7cLCQZtkGKwu9/oPWdbnKE/GrrS7f1jKYLn4p0tExykuWqRd6vgJlYvJ9+5toMorNSuCVitFKeZoAi8hUmgEjsMdjUpCJ04+Bn9gT92VrGUbO93UxvQJfF37Vj3qjOP/PcfWAffEN4gZKntbD8ID3kHA5BbD2mnZ5r8NCSSURW8UfS6SkrmMzy0AbVL2wy7oR+Z0+UFS+mQjU8h+tNBApugQXrZv1tRddFQWVNbtFjRBg2a3xIozDPBu9rW+ajaJ4jr/AzRFpzGQ8huoxnwj+7BvKEgwUQMyZEBFtif',
    's_ppn': 'page.Hotel-Search',
    's_ppv': '^%^5B^%^5BB^%^5D^%^5D',
    's_ips': '1',
    's_tp': '604',
    '_dd_s': 'rum=0&expire=1698075971497',
    'AWSELB': 'D79B53F10ADCF9DDDF09C7B84896C09A6222EC2F5D7767E9135D856FC4ED805E4F688FFB957FB227E1C002DCAC7D058C176547AF19403D743048278F3BF55103EB1E8A0D14',
    'AWSELBCORS': 'D79B53F10ADCF9DDDF09C7B84896C09A6222EC2F5D7767E9135D856FC4ED805E4F688FFB957FB227E1C002DCAC7D058C176547AF19403D743048278F3BF55103EB1E8A0D14',
    'bm_sv': 'E0AF85F6EF0621DE2AED7395E38A3762~YAAQPQp8aGwij1yLAQAAqMMoXRXp0NbCR94zML8grMqZ/zVKCg7vhtNUah7+BBYP52wXtKyIL5gbXxgZ1JMqGtZypWWYAphkhgKQbKuQ3nMlfgAcVwA4TPnrZRN4iE3CDqP6iug2jWIqiGgZ0pezuL6w8f5rrUR0NG4u48UgAgFo0wdoVcsvjaHF74DLgNTGXus5+KYrACDZtDx9FxVdfAeQKCF/sY58IlDxln67QaRiMbhD13OR5BzGAUvCFe0=~1',
    'JSESSIONID': 'AB584C9C9367BB26104F5286B60175EB',
    'eg_ppid': '3f9e106a-bc65-4993-8662-cbf6957f0493',
    'session_id': '8227bf9c-300e-4d02-aabf-8db090bdc916',
    'page_name': 'page.Hotel-Search',
    'AMCVS_C00802BE5330A8350A490D4C^%^40AdobeOrg': '1',
    'SheetBannerClose': 'yes'
}

headers = {
       'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/118.0',
    'Accept': '*/*',
    'Accept-Language': 'en-GB,en;q=0.5',
    'content-type': 'application/json',
    'client-info': 'shopping-pwa,unknown,unknown',
    'x-page-id': 'page.Hotel-Search,H,20',
    'x-enable-apq': 'true',
    'Origin': 'https://www.vrbo.com',
    'Connection': 'keep-alive',
    'Referer': 'https://www.vrbo.com/search?adults=1&amenities=&children=&d1=&d2=&destination=&latLong=45.29453^%^2C-72.61171&mapBounds=45.23457^%^2C-72.8344&mapBounds=45.35449^%^2C-72.38903&neighborhood=&poi=&pwaDialog=&regionId=&selected=&semdtl=&sort=RECOMMENDED&theme=&userIntent=',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin'
}

data = {"variables": {"context":{"siteId":"9001001","locale":"en_US","eapid":1,"currency":"USD","device":{"type":"DESKTOP"}, "identity":{"duaid":"07a07f5c-f8e3-54b7-c37c-8e42e59bed32","expUserId":"822217831","tuid":"1042106065","authState":"IDENTIFIED"},"privacyTrackingState":"CAN_TRACK","debugContext":{"abacusOverrides":[],"alterMode":"RELEASED"}},"criteria":{"primary":{"dateRange":None,"destination":{"regionName":None,"regionId":None, "coordinates":{"latitude":"45.29453","longitude":"-72.61171"},"pinnedPropertyId":None,"propertyIds":None, "mapBounds":[{"latitude":"45.23457","longitude":"-72.8344"},{"latitude":"45.35449","longitude":"-72.38903"}]},"rooms":[{"adults":"1","children":[]}]},"secondary":{"counts":[],"booleans":[],"selections":[{"id":"sort","value":"RECOMMENDED"},{"id":"privacyTrackingState","value":"CAN_NOT_TRACK"},{"id":"useRewards","value":"SHOP_WITHOUT_POINTS"},{"id":"searchId","value":"9e9447d8-76ad-4de5-b273-9ac953081861"}],"ranges":[]}},"destination":{"regionName":None,"regionId":None,"coordinates":None,"pinnedPropertyId":None,"propertyIds":None,"mapBounds":None},"shoppingContext":{"multiItem":None},"returnPropertyType":False,"includeDynamicMap":True},"operationName":"LodgingPwaPropertySearch","extensions":{"persistedQuery":{"sha256Hash":"7f81c84fcfa2273784af9e801f999c515569f8bdeca7cc7fcbc669fb79192b9f","version":1}}}
data = json.dumps(data)
proxy = {'http': 'http://[email protected]:20000',
         'https': 'http://[email protected]:20000'
         }
r = requests.post('https://www.vrbo.com/graphql', json=data, headers=headers, cookies=cookies)
r.raise_for_status()
print(r)
data = r.json()

我尝试过使用和不使用旋转代理,但我不断收到相同的错误消息。有人尝试抓取同一个网站吗?或者任何人都可以使用 GraphQL 解决该特定错误消息。

python web-scraping post python-requests
1个回答
0
投票

我想知道你是否解决了这个问题。我也很难刮 Vrbo。

© www.soinside.com 2019 - 2024. All rights reserved.