python写简单爬虫的五种方法 (转)

原文地址链接:  http://blog.sina.com.cn/s/blog_5e32cc130100fszx.html

2, 【用Python写爬虫】获取html的方法【二】:使用pycurl

# Pycurl参考地址:http://pycurl.sourceforge.net/
# Pycurl下载地址:http://pycurl.sourceforge.net/download/pycurl-7.18.1.tar.gz

#!/usr/bin/python
#coding:utf8

import pycurl
import StringIO

def getURLContent_pycurl(url):
    c=pycurl.Curl()
    c.setopt(pycurl.URL,url)
    b=StringIO.StringIO()
    c.setopt(pycurl.WRITEFUNCTION,b.write)
    c.setopt(pycurl.FOLLOWLOCATION,1)
    c.setopt(pycurl.MAXREDIRS,5)

    c.perform()
    return b.getvalue()

url='http://www.baidu.com'

content = getURLContent_pycurl(url)

print content

3,【用Python写爬虫】获取html的方法【三】:使用cPAMIE
# cPAMIE下载:http://sourceforge.net/project/showfiles.php?group_id=103662

     1    #!/usr/bin/python
     2    #coding:utf8
     3    
     4    import cPAMIE
     5    
     6    def getURLContent_cPAMIE(url):
     7        g_ie = cPAMIE.PAMIE()
     8        g_ie = showDebugging = False
     9        g_ie.frameName = None
    10        g_ie.navigate(url)
    11        
    12        content = g_ie.pageGetText()
    13        g_ie.quit()
    14        return content
    15    
    16    url = 'http://www.baidu.com'
    17    
    18    content = getURLContent_cPAMIE(url)
    19    
    20    print content
原文地址:https://www.cnblogs.com/jackge/p/3089235.html