Python--pandas操作excel

pandas将excel内容生成在线的html:

#encoding:utf-8
import pandas as pd
import codecs
def test19():
    xd = pd.ExcelFile('b站.xls')
    df = xd.parse()
    with codecs.open('1.html', 'w', 'UTF-8') as html_file:
        html_file.write(df.to_html(header=True, index=False))

pandas将多个excel合并成一个excel并对数据进行去重

#encoding:utf-8
import os
import pandas as pd
def test20():
    # 两个excel合并，并去重
    current_path = os.path.dirname(os.path.realpath(__file__))
    files = os.listdir(os.path.join(current_path, "csv_file"))
    df1 = pd.read_excel(os.path.join(current_path, "csv_file", files[0]))
    for file in files[1:]:
        df2 = pd.read_excel(os.path.join(current_path, "csv_file", file))
        # 合并操作
        df1 = pd.concat([df1, df2], axis=0, ignore_index=True)
    df1 = df1.drop_duplicates()    # 去重
    # df1 = df1.reset_index(drop=True)    # 重新生成index
    df1.to_excel(os.path.join(current_path, "csv_file", "total.xlsx"), index=False)


test20()

根据列进行合并

#encoding:utf-8
import os
import numpy as np
import pandas as pd
def test21():
    # excel的列合并
    # 将除了第一个以外的excel中第几列添加到第一个excel中
    # 注意点: excel的行数保持一致
    current_path = os.path.dirname(os.path.realpath(__file__))
    files = os.listdir(os.path.join(current_path, "csv_file"))
    df = pd.read_excel(os.path.join(current_path, "csv_file", files[0]))
    for file in files[1:]:
        # usecols: 取第几列
        new_file = pd.read_excel(os.path.join(current_path, "csv_file", file),usecols=[1])
        df = np.hstack((df, new_file))
    df = pd.DataFrame(df)
    df.to_excel(os.path.join(current_path, "csv_file", "total.xlsx"), index=False)