Python 读取本地txt文件生成excel

import os
import re
import logging
from bs4 import BeautifulSoup
from openpyxl import  Workbook
from openpyxl.utils import get_column_letter
import datetime

logging.basicConfig(level=logging.INFO,#控制台打印的日志级别
                    filename='food.log',
                    filemode='a',##模式,有w和a,w就是写模式,每次都会重新写日志,覆盖之前的日志,#a是追加模式,默认如果不写的话,就是追加模式
                    format= '%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s' #日志格式
                    )

def Insert2Excel(allinfo):
    # 插入数据
    try:
        tableTitle = ['name', 'time', 'score']
        wb = Workbook()
        ws = wb.active
        ws.title = 'restaurants'
        ws.append(tableTitle)
        work_name = 'restaurantsinfo.xlsx'
        for i in range(1, ws.max_column + 1):
            ws.column_dimensions[get_column_letter(i)].width = 15
        for info in allinfo :
            ws.append(info)
        wb.save(work_name)
        return 'Insert Excel succcessfully!'
    except:
        return 'Insert Excel failed!'
if __name__ == '__main__':
    start = datetime.datetime.now().replace(microsecond=0)
    print('Start: ', start)
    path = os.getcwd()
    allinfo = []
    # url = 'https://food.grab.com/sg/en/restaurants'
    #########################  test  ##################
    with open('food.txt', 'rb') as f:  # 设置文件对象
        html = f.read()  # 可以是随便对文件的操作

    soup = BeautifulSoup(html, 'html.parser')
    tag = soup.find('div', attrs={'class': 'ant-row-flex RestaurantListRow___1SbZY'})
    print(len(tag))
    for restaurant in tag:
        resinfo = []
        name = restaurant.find('h6', attrs={'class': 'name___2epcT'}).get_text()        
        resinfo.append(name)
        lst = restaurant.find_all('div', attrs={'class': 'numbersChild___2qKMV'})
        if len(lst) == 2:
            score = lst[0].get_text()
            time = re.findall("d+",lst[1].get_text())[0]
        else:
            score = '0'
            aa = re.findall("d+",lst[0].get_text())
            time = aa[0]
        resinfo.append(time)
        resinfo.append(score)
        allinfo.append(resinfo)
    print(Insert2Excel(allinfo))
    end = datetime.datetime.now().replace(microsecond=0)
    print('End:', end)
    print('Running time: %s Seconds' % (end - start))
原文地址:https://www.cnblogs.com/ouzai/p/13739321.html