python网页下载

python 2.7版本下可以运行

import urllib2

def getHtml(url):

response = None

requset = None

headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}

try:

requset = urllib2.Request(url,headers = headers)

response = urllib2.urlopen(requset)

html_body = response.read()

return html_body

except urllib2.URLError as e:

if hasattr(e,'code'):

print 'Error code:',e.code

elif hasattr(e,'reason'):

print 'Reason:',e.code

finally:

if response:

response.close()

def saveHtml(file_name, file_content):

with open(file_name.replace('/', '_') + ".html", "wb") as f:

f.write(file_content)

html = getHtml("https://www.baidu.com/")

saveHtml("xxx", html)

#show me------------------------------

print html