python操作excel

关于excel,大部分操作应该主要用于数据分析,其中,最主流的方式应该是采用pandas库,这里主要介绍几种方式来读写excel

读取excel数据

处理一般的excel,尤其没有合并单元格之类的情况时,我们可以采用普通的方法

这里采用xlrd库

  • 无单元格合并的情况下

    import xlrd
    
    def parse_list(data):
        """
        parse a list
        :param data: list
        :return:
        """
        data_list = []
        for i in range(1, len(data)):
            data_dict = {}
            for index, key in enumerate(data[0]):
                data_dict[key] = data[i][index]
            data_list.append(data_dict)
        return data_list
    
    def read_excel(filename, sheet=None):
        """
        read excel and parse every line
    
        :param name: file of absolute path
        :return: some dict which is about everyline of list
        """
        workbook = xlrd.open_workbook(filename)
        total_data = []
        if sheet:
            sh = workbook.sheet_by_name(sheet)
            for row in range(0, sh.nrows):
                row_list = sh.row_values(row)
                total_data.append(row_list)
        return parse_list(total_data)
    

    上述方法主要针对特定的sheet,当然,根据上述方法也也可进行改变,读取所有的sheet

    关键方法在于read_excel,所以我们来改写该方法

    import xlrd
    def read_excel(filename):
        """
        read excel and parse every line
    
        :param name: file of absolute path
        :return: some dict which is about everyline of list
        """
        workbook = xlrd.open_workbook(filename)
        total_data = []
        name_sheets = workbook.sheet_names()
        for sheet in name_sheets:
            sh = workbook.sheet_by_name(sheet)
            for row in range(0, sh.nrows):
                row_list = sh.row_values(row)
                total_data.append(row_list)
        return parse_list(total_data)
    
  • 主要用于处理合并存在合并单元格的情况

    import xlrd
    
    
    def get_excel():
        data_list = []
        with xlrd.open_workbook(r'D:325.xlsx') as workbook:
            name_sheets = workbook.sheet_names()  # 获取Excel的sheet表列表,存储是sheet表名
            for index in name_sheets:  # for 循环读取每一个sheet表的内容
                apply_dic = []
                sheet_info = workbook.sheet_by_name(index)  # 根据表名获取表中的所有内容,sheet_info也是列表,列表中的值是每个单元格里值
                first_line = sheet_info.row_values(0)  # 获取首行,我这里的首行是表头,我打算用表头作为字典的key,每一行数据对应表头的value,每一行组成一个字典
                values_merge_cell = merge_cell(sheet_info)  # 这里是调用处理合并单元格的函数
                for i in range(1, sheet_info.nrows):  # 开始为组成字典准备数据
                    other_line = sheet_info.row_values(i)
                    for key in values_merge_cell.keys():
                        if key[0] == i:
                            other_line[key[1]] = values_merge_cell[key]
                    # print(other_line)
                    dic = list_dic(first_line, other_line)  # 调用组合字典的函数,传入key和value,字典生成
                    apply_dic.append(dic)
                data_list.append({'index': index, 'data': apply_dic})
        return data_list
    
    
    def list_dic(list1, list2):
        '''
        two lists merge a dict,a list as key,other list as value
        :param list1:key
        :param list2:value
        :return:dict
        '''
        dic = dict(map(lambda x, y: [x, y], list1, list2))
        return dic
    
    
    def merge_cell(sheet_info):
        '''
        #handle Merge transverse cells and handle Merge Vertical Cells, assign empty cells,
        :param rlow:row, include row exclusive of row_range
        :param rhigh:row_range
        :param clow:col, include col exclusive of col_range
        :param chigh:col_range
        :param sheet_info:object of sheet
        :return:dic contain all of empty cells value
        '''
        merge = {}
        merge_cells = sheet_info.merged_cells
        for (rlow, rhigh, clow, chigh) in merge_cells:
            value_mg_cell = sheet_info.cell_value(rlow, clow)
            if rhigh - rlow == 1:
                # Merge transverse cells
                for n in range(chigh - clow - 1):
                    merge[(rlow, clow + n + 1)] = value_mg_cell
            elif chigh - clow == 1:
                # Merge Vertical Cells
                for n in range(rhigh - rlow - 1):
                    merge[(rlow + n + 1, clow)] = value_mg_cell
        return merge
    
    
    if __name__ == '__main__':
        get_excel()
    
    

写excel

  • 生成xlsx,使用pandas

    import pandas as pd
    def write_excel(data, filename, keys=''):
        data_df = pd.DataFrame(data)
        if keys != '':
            data_df.columns = list(keys)
        writer = pd.ExcelWriter('{}.xlsx'.format(filename))
        data_df.to_excel(writer, index=None)
        writer.save()
    
  • 生成csv

    import csv
    def write_csv(name, data_list):
        fieldnames = data_list[0]
        print(fieldnames)
        with open(name + '.csv', mode='w', newline='', encoding='utf-8-sig') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
            writer.writeheader()
            for data in data_list:
                writer.writerow(data)
    

    当然,可以通过json去生成csv

    import json
    def json_to_csv(name):
        with open(name) as f:
            a = json.loads(f.read())
            write_csv(name, a)
    

    这里依旧可以使用pandas

    def data_to_csv(data_list, csv_key, name):
        """
        save data to csv
        :param data_list: data
        :param csv_key: csv column
        :param name: file name
        :return:
        """
        final_data = []
        for data in data_list:
            sign_key = []
            for key, value in data.items():
                sign_key.append(value)
            final_data.append(sign_key)
        # 将总数据转化为data frame再输出
        df = pd.DataFrame(data=final_data,
                          columns=csv_key)
        df.to_csv(name + '.csv', index=False, encoding='utf-8_sig')
    
    
原文地址:https://www.cnblogs.com/ZSMblog/p/13154984.html