import requests import re import pymysql # 保存到数据库 # mysql=pymysql.connect(host='127.0.0.1',user='root',password='wbf980728',database='aaa',charset='utf8')#连接数据库,编码方式为utf8 # youbiao=mysql.cursor()#建立游标 # source = requests.get('https://www.23us.com/').content.decode('gbk') #获取页面源代码 # a='<li><p class="ul1">[(.*?)]《<a class="poptext" href=".*?" target="_blank">(.*?)</a>》</p><p class="ul2"><a href=".*?" target="_blank">(.*?)</a></p><p>(.*?)</p>(.*?)</li>'#用正则取出要获取的东西 # demo = re.compile(a)#将用正则取出的字符串编译为字节代码。 # lists = demo.findall(source)#以列表的形式返回能匹配的字符串 # # print(lists) # for a,b,c,d,e in lists: # sql='insert into dingdian(leixing,shuming,zhangjie,zuozhe,shijian) values("{}","{}","{}","{}","{}")'.format(a,b,c,d,e)#sql语句 # youbiao.execute(sql)#用sql语句将数据填入表中 # mysql.commit() # mysql.close()#保存 # 以文档形式保存 # a=requests.get('https://www.23us.com/html/78/78926/').content.decode('gbk')#decode('gbk')为编码方式 # b='<a href="(d+.html)">.*?</a>' # c=re.compile(b) # d=c.findall(a) # for i in d[0:1]: # hrefs='https://www.23us.com/html/78/78926/'+i # a=requests.get(hrefs).content.decode('gbk') # b='<h1>(.*?)</h1>.*?<dd id="contents".*?>(.*?)</dd>' # b=b.replace('/<br />', " ") # c=re.compile(b,re.S) # d=c.findall(a) # # print(d) # for a,b in d: # b=b.replace(' ',' ').replace('<br /><br />',' ')#替换 # op=open('E:\text\1.text','w+')#写入 # op.write(a+' '+b) # op.close() # # print(a,b)