python爬虫 爬去vamei的python快速教程

目标:爬取vamei的python快速教程

代码如下:

from bs4 import BeautifulSoup
import requests


def get_links():  # 获取要爬去打所有页面的链接
    links = []
    url = 'http://www.cnblogs.com/vamei/archive/2012/09/13/2682778.html'
    web_data = requests.get(url)
    soup = BeautifulSoup(web_data.text, 'lxml')

    titles = soup.select('#cnblogs_post_body > p > span > a')
    for title in titles:
        links.append(title.get('href'))
    return links


def get_content():  # 爬取博客正文部分内容
    links = get_links()
    for link in links:
        web_data = requests.get(link)
        soup = BeautifulSoup(web_data.text, 'lxml')
        contents = soup.select('#topics > div')
        for content in contents:
            print(content.get_text())


get_content()

接下来可以考虑把爬取的内容保存到本地文件

原文地址:https://www.cnblogs.com/november1943/p/5263568.html