python zip文件读取转存excel

import os
import re
import zipfile
import logging
import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook
from openpyxl.utils import get_column_letter

logging.basicConfig(level=logging.INFO,#控制台打印的日志级别
                    filename='new.log',
                    filemode='a',##模式,有w和a,w就是写模式,每次都会重新写日志,覆盖之前的日志,#a是追加模式,默认如果不写的话,就是追加模式
                    format= '%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s' #日志格式
                    )

def Readzip(file_name):
    try:
        z = zipfile.ZipFile(file_name, 'r')
        # 打印zip文件中的文件列表
        guokanzhiguang_folder = 'guokanzhiguang'
        guokanzhiguang_list = []
        for filename in z.namelist():
            # sertch .txt
            print(filename)
            if filename.find(guokanzhiguang_folder) >= 0:
                content = z.read(filename)
                if len(content) == 0:
                    continue
                guokanzhiguang_list.append(content)
        return guokanzhiguang_list
    except:
        return 'Readzip Running Faild!!'

def getBookList(letter_lst):
    try:for html in letter_lst:
            soup = BeautifulSoup(html, 'html.parser')
            tag1 = soup.find_all('div', attrs={'class': "book-result-item-warp"})
            tag.append(tag1)
        return tag
    except:
        return 'getBookList Running Faild!!'

def getBookElementInfo(letter_lst):
    try:
        alllist = getBookList(letter_lst)
        if len(alllist) > 0:
            print('文件个数:%d' % len(alllist))
            alldetialbookinfolst = []
            for lst in alllist :
                for bookinfo in lst:
                    detialbookinfolst = []
                    center = bookinfo.find('div', attrs={'class': 'center'})
                    #杂志
                    title = re.sub(u"\(.*?\)|\{.*?}|\[.*?]", "", center.find("div", attrs={"class": "title"}).get_text())
                    detialbookinfolst.append(title)
                    allinfo = center.findAll('div', attrs={'class': "info"})
                    #国家
                    country = allinfo[0].get_text()
                    detialbookinfolst.append(country[4:])
                    # 因子
                    factor = allinfo[1].find('span', class_='field').get_text()
                    ifs = allinfo[1].find('span', class_='ifs').get_text()
                    diff = allinfo[1].find('span', class_='diff').get_text()
                    detialbookinfolst.append(ifs + " " + diff)
                    # 周期
                    period = allinfo[2].get_text()
                    detialbookinfolst.append(period[4:])
                    # 占比
                    ratio = allinfo[3].get_text()
                    detialbookinfolst.append(ratio[6:])
                    # 地址
                    addre = allinfo[4].find('a').get('href')
                    detialbookinfolst.append(addre)
                    # 自引
                    cited_rate = allinfo[5].get_text()
                    detialbookinfolst.append(cited_rate[5:])
                    # print("+++++++++++++++++++++++++++++++++++")
                    alldetialbookinfolst.append(detialbookinfolst)
            return alldetialbookinfolst
        else:
            print('txt文件不存在或内容为空!!!')
            return ''
    except:
        return 'getBookElementInfo Running Faild!!'

def Insert2Excel(bookinfo):
    # 插入数据
    try:
        tableTitle = ['杂志', '国家', '因子', '周期', '占比', '地址', '自引']
        wb = Workbook()
        ws = wb.active
        ws.title = 'gk_sheet'
        ws.append(tableTitle)
        work_name = 'gkbookinfolist.xlsx'
        for i in range(1, ws.max_column + 1):
            ws.column_dimensions[get_column_letter(i)].width = 15
        for info in bookinfo :
            ws.append(info)
        wb.save(work_name)
        return 'Insert Excel succcessfully!'
    except:
        return 'Insert Excel failed!'

if __name__ == '__main__':
    path = os.getcwd()
    letter_lst = Readzip('bookinfo.zip')
    bookinfo = getBookElementInfo(letter_lst)
    #写excel
    print(Insert2Excel(bookinfo))
原文地址:https://www.cnblogs.com/ouzai/p/13723707.html