im试图编写返回我给定网址价格的代码,代码:
from bs4 import BeautifulSoup
import time
from smtplib import SMTP
import pandas as pd
import numpy as np
import requests
URL='https://www.bigbasket.com/pd/10000200/fresho-tomato-hybrid-1-kg/?nc=cl-prod-list&t_pg=&t_p=&t_s=cl-prod-list&t_pos=1&t_ch=desktop'
headers = {"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 OPR/65.0.3467.78'}
page= requests.get(URL, headers = headers)
soup = BeautifulSoup(page.content,'html.parser')
div=soup.find(id="price")
print(div)
输出:
<div id="price"><table><tbody class="_6eiYL" id="10000200"><tr class="_23Nyv"><td>MRP: </td><td class="_2ifWF">Rs <!-- -->37.50</td></tr><tr class="_157dw"><td> <!-- -->Price:</td><td class="IyLvo" data-qa="productPrice">Rs <!-- -->30</td></tr><tr class="_21awm"><td>You Save: </td><td class="IyLvo">20%</td></tr><tr><td></td> <td>(Inclusive of all taxes)</td></tr></tbody></table></div>
我想获得价格(即37.50)作为我的输出,而不是我得到的输出。
from bs4 import BeautifulSoup
import requests
import re
URL='https://www.bigbasket.com/pd/10000200/fresho-tomato-hybrid-1-kg/?nc=cl-prod-list&t_pg=&t_p=&t_s=cl-prod-list&t_pos=1&t_ch=desktop'
headers = {"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 OPR/65.0.3467.78'}
page= requests.get(URL, headers = headers)
soup = BeautifulSoup(page.content,'html.parser')
item=soup.find('div',id="price").find_next('td', text=re.compile('MRP:')).find_next('td').contents
print(item[-1])