python使用mysql

链接mysql做了个小练习:爬取 http://wufazhuce.com 上的问题,描述和答案,存到本地的数据库里。

数据表结构:

CREATE TABLE `questions` (
`title` varchar(2000) DEFAULT NULL,
`description` varchar(200) DEFAULT NULL,
`answers` varchar(2000) DEFAULT NULL,
`url` varchar(2000) DEFAULT NULL,
`daynum` varchar(20) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8
View Code

代码:

# author:

import pymysql.cursors
import requests
from bs4 import BeautifulSoup

con = pymysql.connect(host='192.168.86.130', user='root', password='letmein', db='0603simon', port=3306,
                      charset='utf8')

# with con.cursor() as cur:
#     sql = 'show tables'
#     result = cur.execute(sql)
#     print(result)
# exit()
cur = con.cursor()
for p_num in range(1, 1872):
    # for p_num in range(8,9):
    url = 'http://wufazhuce.com/question/%s' % p_num
    response = requests.get(url=url)
    response.encoding = response.apparent_encoding
    soup = BeautifulSoup(response.text, features="html.parser")
    # print(soup)
    tar = soup.find('div', class_='one-cuestion')
    # print(tar)
    if not tar:
        print('not tar')
        sql = '''insert into questions (title,description,answers,url,daynum) values('404','404','404','%s','%s')''' % (
            url, p_num)
        print(sql)
        result = cur.execute(sql)
        con.commit()
        print('执行结果:' + str(result))
        continue
    title = tar.find('h4').text.strip()
    # print (title)
    # exit()
    desc = soup.find('div', class_='cuestion-contenido').text.strip()
    # print(desc)
    ans = soup.find_all('div', class_='cuestion-contenido')
    for index, answer in enumerate(ans):
        if index == 0:
            continue
            # print(answer.text.strip())
    answer = answer.text.strip()
    # print(url)
    if answer.__len__() > 1800:
        answer = answer[0:1800]
    sql = '''insert into questions (title,description,answers,url,daynum) values(%s,%s,%s,'%s','%s')''' % (
        con.escape(title), con.escape(desc), con.escape(answer), url, p_num)

    print(sql)
    result = cur.execute(sql)
    con.commit()
    print('执行结果:' + str(result))

cur.close()
con.close()
View Code

邮箱的使用

# coding:utf-8
def get_question():
    obj = {}
    import requests
    from bs4 import BeautifulSoup
    import time
    first_num = 2593
    first_date = time.mktime(time.strptime('2019-09-08', '%Y-%m-%d'))
    now_date = time.mktime(time.localtime())
    cur_num = first_num + (int((now_date - first_date) / (24 * 60 * 60)))
    print(cur_num)

    url = 'http://wufazhuce.com/question/%s' % cur_num
    response = requests.get(url=url)
    response.encoding = response.apparent_encoding
    soup = BeautifulSoup(response.text, features="html.parser")
    tar = soup.find('div', class_='one-cuestion')

    if not tar:
        print('not tar')
        obj['title'] = ''
        return obj
    title = tar.find('h4').text.strip()

    desc = soup.find('div', class_='cuestion-contenido').text.strip()
    ans = soup.find_all('div', class_='cuestion-contenido')
    answer = ''
    for index, answer2 in enumerate(ans):
        if index == 0:
            continue
        answer = answer2.text.strip()
    # obj = {'title': '', 'desc': '', 'answer': ''}
    obj['title'] = title
    obj['desc'] = desc
    obj['answer'] = answer
    return obj


def send_email(title, desc, content):
    import smtplib  # 加载smtplib模块
    from email.mime.text import MIMEText
    from email.utils import formataddr
    from email.mime.application import MIMEApplication
    from email.mime.image import MIMEImage
    from email.mime.multipart import MIMEMultipart

    sender = 'xxxxx@163.com'  # 发件人邮箱账号
    receive = 'xxxxx@qq.com'  # 收件人邮箱账号
    passwd = 'xxxxx'
    mailserver = 'smtp.163.com'
    port = '25'

    try:
        msg = MIMEMultipart('related')
        msg['From'] = formataddr(["sender", sender])  # 发件人邮箱昵称、发件人邮箱账号
        msg['To'] = formataddr(["receiver", receive])  # 收件人邮箱昵称、收件人邮箱账号
        msg['Subject'] = title
        # 文本信息
        # txt = MIMEText('this is a test mail', 'plain', 'utf-8')
        # msg.attach(txt)

        # 附件信息
        # attach = MIMEApplication(open("D:xx\toolpycharm\1.csv").read())
        # attach.add_header('Content-Disposition', 'attachment', filename='1.csv')
        # msg.attach(attach)

        # 正文显示图片
        body = '''【描述】:%s/br
        【回答】:%s''' % (desc, content)
        text = MIMEText(body, 'html', 'utf-8')
        msg.attach(text)

        server = smtplib.SMTP(mailserver, port)  # 发件人邮箱中的SMTP服务器,端口是25
        server.login(sender, passwd)  # 发件人邮箱账号、邮箱密码
        server.sendmail(sender, receive, msg.as_string())  # 发件人邮箱账号、收件人邮箱账号、发送邮件
        server.quit()
        print('success')
    except Exception as e:
        print(e)


def main_to():
    obj = get_question()
    res = 0
    if obj['title']:
        send_email(obj['title'], obj['desc'], obj['answer'])
        res = 1
    else:
        res = 0
    return res


from threading import Timer
import time

timer_interval = 1


def delayrun():
    print('running')


first_time = 3 * 60 * 60
time.sleep(first_time)
print('先歇一下')
t = Timer(timer_interval, delayrun)
t.start()
wait_time = 60
while True:
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())))
    res = main_to()
    if res:
        wait_time = 60 * 60 * 24
    else:
        wait_time = 60
    time.sleep(wait_time)
View Code
原文地址:https://www.cnblogs.com/Simonsun002/p/9152944.html