简单爬取腾讯新闻内容方法封装

import urllib.request
import re


def get_new_qq(url, pat, file):
    data = urllib.request.urlopen(url).read().decode("gbk", "ignore").strip()
    res = re.compile(pat).findall(data)
    with open(file, "w") as f:
        f.write(str(res))
        return res


if __name__ == '__main__':
    url = "https://www.qq.com/?pgv_ref=1"
    pat = "<li><a .*>(.*)</a></li>"
    file = "../腾讯新闻.txt"
    print(get_new_qq(url, pat, file))
原文地址:https://www.cnblogs.com/CesareZhang/p/12175054.html