爬虫----表单登录

1.post提交表单

# -*- coding:utf-8 -*-

import urllib, urllib2
import cookielib
import lxml.html

from lxml import etree

def getcookie():
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
return opener

def get_data(html):
tree = lxml.html.fromstring(html)
data = {}
for i in tree.cssselect('form input'):
if i.get('name'):
data[i.get('name')] = i.get('value')
return data

def main():
url = 'https://www.douban.com/accounts/login?source=main'
html = urllib2.urlopen(url).read()
data = get_data(html)
data['login'] = 'login'
data['form_email'] = '1111111111@qq.com'
data['form_password'] = 'admin'
encode_data = urllib.urlencode(data)
request = urllib2.Request(url, encode_data)
opener = getcookie()
response = opener.open(request)

print response.geturl()



if __name__ == "__main__":
main()

2.高级模块mechanize

# -*- coding:utf-8 -*-
import mechanize
import sys
reload(sys)
sys.setdefaultencoding('utf-8')


def main():
url = 'https://www.douban.com/accounts/login?source=main'
br = mechanize.Browser()
br.open(url)
# for form in br.forms():
# print form
br.select_form(nr=0)
br['form_email'] = '1111111111@qq.com'
br['form_password'] = 'admin'
response = br.submit()
print response.geturl()


if __name__ == '__main__':
main()
原文地址:https://www.cnblogs.com/wozuilang-mdzz/p/9742620.html