这个网页上有一个表格。https:/www.avanza.semina-sidorkontooversikt.html
我试图用这个来填写和提交。
# -*- coding: utf-8 -*-
import cookielib
import urllib2
import mechanize
from bs4 import BeautifulSoup
from lxml import html
import urllib2, base64, lxml
import numpy as np
import unicodedata
import datetime
import re
import time
URL = "https://www.avanza.se/mina-sidor/kontooversikt.html"
br = mechanize.Browser()
cookiejar = cookielib.LWPCookieJar()
br.set_cookiejar( cookiejar )
# Browser options
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
# Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.open(URL)
for f in br.forms():
if f.attrs['class'] == 'loginForm clearFix':
br.f = f
print f
break
br.f[ "j_username" ] = "user"
br.f[ "j_password" ] = "pass"
res = br.submit()
time.sleep(5)
br.open('https://www.avanza.se/handla/aktier.html/kop/5447/abb-ltd')
#Getting the response in beautifulsoup
soup = BeautifulSoup(br.response().read(), 'lxml')
print(soup.prettify().encode("utf-8"))
我的打印验证我得到了正确的形式。
<div class="section accordion-togglebar">
<a class="toggleBar whiteBG normalBlack noTopBorder ubuntu active" href="#" data-login-method="userCredentials">
<img src="/jmvc/avanzabank/images/inloggningsuppgifter_ic.svg" class="loginTypeIcon" alt="Inloggningsuppgifter" title="Inloggningsuppgifter">
Användarnamn & lösenord <span class="arrow fRight"></span>
</a>
<div class="toggleBarContent noPaddingTop ">
<div class="messageWrapper formLogin"></div>
<form autocomplete="off" class="loginForm clearFix" method="POST" action="/ab/noop">
<input placeholder="Användarnamn" type="text" name="j_username" autocapitalize="none">
<input placeholder="Lösenord" type="password" name="j_password">
<button class="focusBtn loginButton marginTop5px marginBottom15px errorToolTipPlacement fRight" type="submit" disabled="disabled">Logga in</button>
<a class="marginTop4px defaultSize plcLink" href="/glomt-uppgift.html">Glömt lösenord eller användarnamn?</a>
</form>
</div>
</div>
然而,当我试图提交我得到的。
Traceback (most recent call last):
File "scrap_mec.py", line 91, in <module>
res = br.submit()
File "/usr/lib/python2.7/dist-packages/mechanize/_mechanize.py", line 541, in submit
return self.open(self.click(*args, **kwds))
File "/usr/lib/python2.7/dist-packages/mechanize/_mechanize.py", line 530, in click
request = self.form.click(*args, **kwds)
AttributeError: 'NoneType' object has no attribute 'click'
我想你还是要用 select_form
方法,以便浏览器知道你要提交的是哪一个表单
而不是。
res = br.submit()
你需要做的是:
在点击方法中,你必须将HTML中的name属性值传递给按钮,其中type = "submit".
request = form.click(name=<name>) # or form.click(id=<id>)
br.open(request)