我正在尝试获取个人的电子邮件但无法获取

问题描述 投票:-1回答:2
from bs4 import BeautifulSoup     
    import requests
    for count in range(1,3):  
     r = requests.get('https://www.indiancattle.com/directory-page/page/'+ 
         str(count)+'/? ds&type=29&st&dis&dshid=1&dssearch=SEARCH#038;type=29&st&dis&dshid=1&dssearch= 
SEARCH')    
     soup = BeautifulSoup(r.text,'lxml')

 for link in soup.find_all('a',{"rel":"bookmark"}):
       link1 =link.get('href')
       r = requests.get(link1)
       soup = BeautifulSoup(r.text,'lxml')
       try:
          name = soup.find('h1',class_='entry-title')
          print "NAME:"+name.text
       except:
          "NAME:NA"           
       try:
          Type = soup.find('strong',string='Type:').next_sibling
          print "TYPE:"+Type.text
       except:
          "TYPE:NA"
       try:
          Address = soup.find('strong',string='Address:').next_sibling
          print "ADDRESS:"+Address
       except:
          print"ADDRESS:NA"
       try:
          District = soup.find('strong',string='District:').next_sibling
          print "DISTRICT:"+District
       except:
          print"DISTRICT:NA"
       try:
          State = soup.find('strong',string='State:').next_sibling
          print "STATE:"+State
       except:
          print"STATE:NA"
       try:
          Pin = soup.find('strong',string='Pin Code:').next_sibling
          print "PIN:"+Pin    
       except:
          print"PIN:NA"
       try:
          Mobile = soup.find('strong',string='Mobile 1: ').next_sibling
          print "MOBILE:"+Mobile
       except:
          print "MOBILE:NA"
       try:
          Mobile1 = soup.find('strong',string='Mobile 2: ').next_sibling
          print "MOBILE1:"+Mobile1
       except:
          print"MOBILE1:NA"              
       try:
          reg_No = soup.find('strong',string='Registration Number:').next_sibling
          print "REG:"+reg_No
       except:
          print"REG:NA"

       try:
          Exper = soup.find('strong',string='Years:').next_sibling
          print "EXP:"+Exper
       except:
          print"EXP:NA"

       try:
          Email = soup.find('strong',string='PersonalEmail:').next_sibling
          print "EMAIL:"+Email
       except:
          print"EMAIL:NA"
python web-scraping beautifulsoup
2个回答
0
投票

电子邮件是使用JS动态加载的,所以你必须使用selenium,检查下面的代码......

from bs4 import BeautifulSoup     
from selenium import webdriver

driver = webdriver.Chrome()
driver.get('https://www.indiancattle.com/directory/dr-mandeep-tajinder-kaur/')
soup = BeautifulSoup(driver.page_source,'lxml')
Email = soup.find(text='Personal Email:').findNext('a').text
Address = soup.find(text='Address:').next
print('Email: {}\nAddress: {}'.format(Email,Address))

输出:

Email: [email protected]

0
投票

Selenium有一个attribute = value选择器,可以轻松读取

from selenium import webdriver
driver = webdriver.Chrome()
driver.get('https://www.indiancattle.com/directory/dr-mandeep-tajinder-kaur/')
email = driver.find_element_by_css_selector('[href^=mailto]').text
print(email)
© www.soinside.com 2019 - 2024. All rights reserved.