BeautifulSoup库应用实例

获取博客园本人的积分排名数据:

1. 抓包获取积分排名数据返回接口:http://www.cnblogs.com/belle-ls/mvc/blog/sidecolumn.aspx?blogApp=belle-ls

2. 解析返回的数据,获取积分排名

3. 开启线程,循环打印积分排名信息

from bs4 import BeautifulSoup
import time
import requests
import threading


class BlogRankMonitor(object):
    """
    获取博客园积分排名
    """

    def __init__(self, id):
        self.urlBasic = 'http://www.cnblogs.com/%s/mvc/blog/sidecolumn.aspx?blogApp=%s'
        self.id = id
        self.score = 0
        self.rank = 0

    def get_nums(self, blogs_des):
        """
        分割字符串 积分 -    43 排名 -    283325,获取数字
        :return:
        """
        split_str = blogs_des.split('-')[1].strip()
        return split_str

    def get_blog_ranks(self):
        """
        解析页面获取博客积分和排名
        :return:
        """
        url = self.urlBasic % (self.id, self.id)
        res = requests.get(url)
        soup = BeautifulSoup(res.text, "lxml")
        liScore= soup.find(class_="liScore")
        for child in liScore.children:
            if u'积分' in child.string:
                self.score = self.get_nums(child.string)
            elif u'排名' in child.string:
                self.rank = self.get_nums(child.string)
        print("积分:", blog.score, "排名:", blog.rank, "时间:", time.strftime("%Y-%m-%d %X", time.localtime()))

    def start_score_rank_thread(self):
        t = threading.Thread(target= self.get_blog_ranks())
        t.start()


if __name__ == '__main__':
    blog = BlogRankMonitor('belle-ls')

    while 1:
        blog.start_score_rank_thread()
        time.sleep(10) #10s打印一次

"""
运行结果:
积分: 45 排名: 0 时间: 2019-02-12 15:57:22
积分: 45 排名: 0 时间: 2019-02-12 15:57:32
积分: 45 排名: 0 时间: 2019-02-12 15:57:42
积分: 45 排名: 0 时间: 2019-02-12 15:57:53
积分: 45 排名: 0 时间: 2019-02-12 15:58:03
....
"""

 

原文地址:https://www.cnblogs.com/belle-ls/p/10365557.html