这里是链接:https://www.118100.se/sok/foretag/?q=brf&loc=&ob=rel&p=0
def get_index_data(soup):
try:
links = soup.find_all('div','a',id=False).get('href')
except:
links = []
print(links)
查找全部div
,其名称为class
Name
(class =“ Name”)]。这将为您提供所有标题名称。如果要href
,则遍历所有titles
并找到具有a
的title
标签是title.text
的文本。
import requests
import bs4 as bs
url = 'https://www.118100.se/sok/foretag/?q=brf&loc=&ob=rel&p=0'
response = requests.get(url)
# print('Response:', response.status_code)
soup = bs.BeautifulSoup(response.text, 'lxml')
titles = soup.find_all('div', {'class': 'Name'})
# a = soup.find_all('a')
# print(a)
for title in titles:
link = soup.find('a', {'title': title.text}).get('href')
print('https://www.118100.se' + link)