给cc爬取一下百度的榜单

给cc爬取一下百度的榜单

import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.utils import formataddr

# 第一步:打开百度,获取到html

headers = {
    "User-Agent":
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
    'Chrome/78.0.3904.108 Safari/537.36'
}

re = requests.get("https://www.baidu.com", headers=headers)
html = re.content.decode("utf-8")
bs = BeautifulSoup(html,"html.parser")

# 第二步:提取html里面的6个排行榜

six_hot_alink_list = bs.find(id="hotsearch-content-wrapper").find_all("a")
my_hot_list = []

for item in six_hot_alink_list:
    my_hot_list.append([item.get_text(), item.get("href")])


# 第三步:保存到excel

book = Workbook()
sheet = book.create_sheet("baidu",0)

for item in my_hot_list:
    sheet.append(item)

book.save("baidu_hot_six.xlsx")


# 第四步:发送邮件

mail_host = 'smtp.qq.com'
mail_port = '465'
login_sender = 'XXX@qq.com'
login_pass = 'XXX'.  #
str = "get_baidu_six_hot"
sendName = "XX@qq.com"
resName = "XXX@qq.com"
title = "get_baidu_six_hot"


def sendQQ(receivers):

    msg = MIMEMultipart(str,'related')
    # 发送excel-附件
    message_xlsx = MIMEText(open('baidu_hot_six.xlsx', 'rb').read(), 'base64', 'utf-8')
    message_xlsx['Content-disposition'] = 'attachment;filename="baidu_hot_six.xlsx'
    msg.attach(message_xlsx)

    # 发送py-附件
    message_py = MIMEText(open('get_baidu_six_hot.py', 'rb').read(), 'base64', 'utf-8')
    message_py['Content-disposition'] = 'attachment;filename="get_baidu_six_hot.py'
    msg.attach(message_py)

    msg['From'] = formataddr([sendName, login_sender])
    # 邮件的标题
    msg['Subject'] = title
    try:
            server = smtplib.SMTP_SSL(mail_host, mail_port)
            server.login(login_sender, login_pass)
            server.sendmail(login_sender, receivers, msg.as_string())
            print("已发送到" + "".join(receivers) + "的邮箱中!")
            server.quit()

    except smtplib.SMTPException:
        print("发送邮箱失败!")


sendQQ(['XXXX@qq.com', 'XXX@qq.com'])
原文地址:https://www.cnblogs.com/andy0816/p/14769259.html