data-stock=" "
中获取值12、9、8、9、6、24、15、11、11,并获取文本大小选项36、37、38、39、40、41、42、43、44来自下面的HTML代码:<div class="col-lg-10">
<select class="form-control m-select2 func_select2_manual" name="product_size_id">
<option data-height="40" data-length="245" data-stock="12" data-weight="500" data-width="110" value="656">
36
</option>
<option data-height="40" data-length="250" data-stock="9" data-weight="500" data-width="110" value="657">
37
</option>
<option data-height="40" data-length="255" data-stock="8" data-weight="500" data-width="110" value="658">
38
</option>
<option data-height="40" data-length="260" data-stock="9" data-weight="500" data-width="110" value="659">
39
</option>
<option data-height="40" data-length="265" data-stock="6" data-weight="500" data-width="110" value="660">
40
</option>
<option data-height="40" data-length="270" data-stock="24" data-weight="500" data-width="110" value="661">
41
</option>
<option data-height="40" data-length="275" data-stock="15" data-weight="500" data-width="110" value="662">
42
</option>
<option data-height="40" data-length="280" data-stock="11" data-weight="500" data-width="110" value="663">
43
</option>
<option data-height="40" data-length="285" data-stock="11" data-weight="500" data-width="110" value="664">
44
</option>
</select>
</div>
xmltodict
解析htmlimport xmltodict
html_string = '''
your_html_string
'''
parsed_html_dict = xmltodict.parse(html_string)
result = []
for option in parsed_html_dict['div']['select']['option']: # selection is wrt your example
result.append({
'data-stock': option['@data-stock'],
'text': option['#text']
})
print(result)
from bs4 import BeautifulSoup
html_doc = """<div class="col-lg-10">
<div class="col-lg-10">
<select class="form-control m-select2 func_select2_manual" name="product_size_id">
<option data-height="40" data-length="245" data-stock="12" data-weight="500" data-width="110" value="656">36</option>
<option data-height="40" data-length="250" data-stock="9" data-weight="500" data-width="110" value="657">37</option>
<option data-height="40" data-length="255" data-stock="8" data-weight="500" data-width="110" value="658">38</option>
<option data-height="40" data-length="260" data-stock="9" data-weight="500" data-width="110" value="659">39</option>
</select>
</div>
"""
soup = BeautifulSoup(html_doc, 'html.parser')
options = soup.find_all('option')
option_data_stock_list = []
option_text_list = []
for option in options:
option_data_stock = option.attrs.get('data-stock')
option_text = option.text.strip().strip('\n')
option_data_stock_list.append(option_data_stock)
option_text_list.append(option_text)
print(option_data_stock, option_text)
print(', '.join(option_data_stock_list))
print(', '.join(option_text_list))
我已使用BeautifulSoup解析html
from bs4 import BeautifulSoup
html_doc = """<div class="col-lg-10">
<div class="col-lg-10">
<select class="form-control m-select2 func_select2_manual" name="product_size_id">
<option data-height="40" data-length="245" data-stock="12" data-weight="500" data-width="110" value="656">36</option>
<option data-height="40" data-length="250" data-stock="9" data-weight="500" data-width="110" value="657">37</option>
<option data-height="40" data-length="255" data-stock="8" data-weight="500" data-width="110" value="658">38</option>
<option data-height="40" data-length="260" data-stock="9" data-weight="500" data-width="110" value="659">39</option>
</select>
</div>
"""
soup = BeautifulSoup(html_doc, 'html.parser')
options = soup.find_all('option')
for option in options:
option_data_stock = option.attrs.get('data-stock')
option_text = option.text
print(option_data_stock, option_text)