小说获取

import requests
import re
import pymysql
# 保存到数据库
# mysql=pymysql.connect(host='127.0.0.1',user='root',password='wbf980728',database='aaa',charset='utf8')#连接数据库,编码方式为utf8
# youbiao=mysql.cursor()#建立游标
# source = requests.get('https://www.23us.com/').content.decode('gbk') #获取页面源代码
# a='<li><p class="ul1">[(.*?)]《<a class="poptext" href=".*?" target="_blank">(.*?)</a>》</p><p class="ul2"><a href=".*?" target="_blank">(.*?)</a></p><p>(.*?)</p>(.*?)</li>'#用正则取出要获取的东西
# demo = re.compile(a)#将用正则取出的字符串编译为字节代码。
# lists = demo.findall(source)#以列表的形式返回能匹配的字符串
# # print(lists)
# for a,b,c,d,e in lists:
#     sql='insert into dingdian(leixing,shuming,zhangjie,zuozhe,shijian) values("{}","{}","{}","{}","{}")'.format(a,b,c,d,e)#sql语句
#     youbiao.execute(sql)#用sql语句将数据填入表中
#     mysql.commit()
# mysql.close()#保存


# 以文档形式保存
# a=requests.get('https://www.23us.com/html/78/78926/').content.decode('gbk')#decode('gbk')为编码方式
# b='<a href="(d+.html)">.*?</a>'
# c=re.compile(b)
# d=c.findall(a)
# for i in d[0:1]:
#     hrefs='https://www.23us.com/html/78/78926/'+i
#     a=requests.get(hrefs).content.decode('gbk')
#     b='<h1>(.*?)</h1>.*?<dd id="contents".*?>(.*?)</dd>'
#     b=b.replace('/<br />', "
")
#     c=re.compile(b,re.S)
#     d=c.findall(a)
#     # print(d)
#     for a,b in d:
#         b=b.replace('&nbsp;&nbsp;&nbsp;&nbsp;','        ').replace('<br /><br />','
')#替换
#         op=open('E:\text\1.text','w+')#写入
#         op.write(a+'
'+b)
#         op.close()
#         # print(a,b)
原文地址:https://www.cnblogs.com/wbf980728/p/14308916.html