下载博客首页的博客列表

url_con = urllib.urlopen('http://blog.sina.com.cn/s/articlelist_1193111400_0_1.html').read()
print 'con' ,url_con

url = ['']*40
i = 0
title = url_con.find(r'<a title=')

print "title",title
href = url_con.find(r'href=',title)
print "href",href

html = url_con.find(r'.html',href)
print "html",html


while title != -1 and href != -1 and html != -1 and i < 40:
    url[i] = url_con[href+6:html+5]
    print url[i]
    title = url_con.find(r'<a title=',html)
    
    href = url_con.find(r'href=',title)
    
    html = url_con.find(r'.html',href)
    
    filename = url[-26:]

    i = i + 1
j = 0
while j < 40:
    content = urllib.urlopen(url[j]).read()
    filename = url[j][-26:]
    open(r'blog/'+ filename,'w').write(content)
    j = j+ 1
    time.sleep(5)

 以上代码是获取博客文章的列表

原文地址:https://www.cnblogs.com/y15821933792/p/7797197.html