爬去金10网数据,并写入到Excel表格里面(re,requests,xlwt)

import requests
import re
import xlwt

def Get_news():
    url = 'https://www.jin10.com/'
    html = requests.get(url)
    html.encoding = html.apparent_encoding
    reg = re.compile('<div class="jin-flash_time">(.*?)</div></div><div class="jin-flash_b"><h4>(.*?)</h4></div></div>')
    flash_news = re.findall(reg,html.text)
    return flash_news

    
def excel_write(flash_news):
    newtable = 'jin10.xls'          #创建一个Excel文件名称
    wb = xlwt.Workbook(encoding = 'utf-8')          #创建Excel文件
    ws = wb.add_sheet('jin_new')            #创建一个Excel工作表格
    headDate = ['时间','新闻内容']        #创建标题头
    for colnum in range(0,2):
        ws.write(0,colnum,headDate[colnum])         
    
    index = 1           #表示从第二行开始
    for flash_new in flash_news:
        for i in range(0,2):
            #print(flash_new[i])
            ws.write(index,i,flash_new[i])
        index += 1
        wb.save(newtable)
        
excel_write(Get_news())
View Code

还不是很完美,新闻的内容里面是有<b></b>标签,也还有空白的单元格没有处理,最关键的一点是没有做到实时监控

原文地址:https://www.cnblogs.com/114811yayi/p/6762741.html