from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
my_url = 'https://www.eventbrite.com/d/malaysia--kuala-lumpur--85675181/all-
events/?page=1'
#opening connection , downloading page
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
# html parser
page_soup = soup(page_html, "html.parser")
# catch each events
card = page_soup.findAll("div",{"class":"eds-media-card-content__content"})
filename = "Data_Events.csv"
f = open(filename, "w")
headers = "events_name, events_dates, events_location, events_fees\n"
f.write(headers)
for activity in card :
event_activity = activity.findAll("div",{"class":"eds-event-
card__formatted-name--is-clamped"})
events_name = event_activity[0].text
event_date = activity.findAll("div",{"class":"eds-text-bs--fixed eds-
text-color--grey-600 eds-l-mar-top-1"})
events_dates = event_date[0].text
events_location = event_date[1].text
events_fees = event_date[2].text
print("events_name: " + events_name)
print("events_dates: " + events_dates)
print("events_location: " + events_location)
print("events_fees: " + events_fees)
f.write(events_name + "," + events_dates + "," + events_location + "," +
events_fees + "\n")
f.close()
嗨,我仍然是使用Python语言的初学者,我想知道如何应用一个函数,使该脚本能够将数据获取到网站的下一页?
我已经尝试做一个
for pages in page (1, 49)
my_url = 'https://www.eventbrite.com/d/malaysia--kuala-lumpur--85675181/all-
events/?page=1'
任何建议将不胜感激
import itertools
import requests
from bs4 import BeautifulSoup
def parse_page(url, page)
params = dict(page=page)
resp = requests.get(url, params=params) # will format `?page=#` to url
soup = BeautifulSoup(resp.text, 'html.parser')
... # parse data from page
url = 'https://www.eventbrite.com/d/malaysia--kuala-lumpur--85675181/all-events'
for page in itertools.count(start=1): # don't need to know total pages
try:
parse_page(url, page)
except Exception:
# `parse_url` was designed for a different page layout and will
# fail when no more pages to scrape, so we break here
break