python 从txt文件中提取数据保存到 xlxs 文件中

1、python 时间的转换和大小比较

2、写数据到 xlsx 中

'''
@description: 获取指定的文件中指定的数据
@param {*}
@return {*}
@author: wanghao
'''
'''
程序所在路径必须和文件所在路径在同一个文件夹
验证方法是：打开vscode 的终端：输入 pwd，程序的名字和想要提取文件的名字都能看到即表示在同一个文件夹里

版本可以修改的东西为：提取文件的文件名，想要提取数据的列名，想要保持的文件名
其他部分不建议修改
'''

import xlwt
import openpyxl
import os
import time
import datetime
from xlwt.Worksheet import Worksheet

def readData(fileName):    

    fopen = open(fileName, 'r')
    lines = fopen.readlines()

    return lines

def getColNum(line, get_col):
    get_col_num = []
    index = -1
    col_info = line.split(',')
    for col in col_info:
        index = index + 1
        tmp = col.strip()
        if get_col[0] == tmp:
            get_col_num.append(index)
        elif get_col[1] == tmp:
            get_col_num.append(index)
        
    return get_col_num

if __name__ == '__main__':

    # 从txt文档中读取数据
    txtFileName = '1.txt'                    #改文件名
    lines = readData(txtFileName)

    get_col = ['[OCS]d_ppm', '[CO2]d_ppm_sd']    #改想要提取的数据列
    get_col_num = []    

    # 写输入到 xls 中
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = '数据处理'
    data = ['  Time'] + get_col
    ws.append(data)

    # 按行处理数据
    i = -1
    for line in lines:
        i = i + 1
        if 0 == i:
            continue
        elif 1 == i:
            get_col_num = getColNum(line, get_col)
            print(get_col_num)
        elif 2 < i:
            col_info = line.split(',')
            tmp = [col_info[0]]
            for j in range(len(get_col_num)):
                tmp.append(col_info[get_col_num[j]])
            ws.append(tmp)
    wb.save('113.xlsx')        #改保存的文件名

v2.0：添加时间得筛选

'''
@description: 获取指定的文件中指定的数据
@param {*}
@return {*}
@author: wanghao
'''
'''
程序所在路径必须和文件所在路径在同一个文件夹
验证方法是：打开vscode 的终端：输入 pwd，程序的名字和想要提取文件的名字都能看到即表示在同一个文件夹里

版本可以修改的东西为：
1、提取文件的文件名
2、想要提取数据的列名
3、想要保持的文件名
4、提取数据的起止时间段需要去修改
其他部分不建议修改
'''

import datetime
import os
import time

import openpyxl
import xlwt
from xlwt.Worksheet import Worksheet


# 打开指定的txt文件
def readData(fileName):    
    fopen = open(fileName, 'r')
    lines = fopen.readlines()
    return lines

# 获取指定时间的时间戳
def get_time_stamp(valid_time):
    dd = datetime.datetime.strptime(valid_time, '%m/%d/%Y %H:%M:%S.%f').strftime('%Y-%m-%d %H:%M:%S')
    ts = int(time.mktime(time.strptime(dd, '%Y-%m-%d %H:%M:%S')))
    return ts

# 获取指定列的数据的列数
def getColNum(line, get_col):
    get_col_num = []
    index = -1
    col_info = line.split(',')
    for col in col_info:
        index = index + 1
        tmp = col.strip()
        if get_col[0] == tmp:
            get_col_num.append(index)
        elif get_col[1] == tmp:
            get_col_num.append(index)
        
    return get_col_num

if __name__ == '__main__':

    # 从txt文档中读取数据
    txtFileName = '1.txt'                    #改文件名
    lines = readData(txtFileName)

    get_col = ['[OCS]d_ppm', '[CO2]d_ppm_sd']    #改想要提取的数据列
    get_col_num = []    

    # 写输入到 xls 中
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = '数据处理'
    data = ['  Time'] + get_col
    ws.append(data)

    start_time = '06/24/2021 15:47:07.172'    #提取数据的起止时间需要修改
    end_time = '06/24/2021 15:47:25.073'

    start_time_stamp = get_time_stamp(start_time)
    end_time_stamp = get_time_stamp(end_time)

    # 按行处理数据
    i = -1
    for line in lines:
        i = i + 1
        if 0 == i:
            continue
        elif 1 == i:
            get_col_num = getColNum(line, get_col)
            print(get_col_num)
        elif 2 < i:
            col_info = line.split(',')
            cur_stamp = get_time_stamp(col_info[0].strip())
            if cur_stamp - start_time_stamp < 0:
                continue
            if cur_stamp - end_time_stamp > 0:
                break
            tmp = [col_info[0]]
            for j in range(len(get_col_num)):
                tmp.append(col_info[get_col_num[j]])
            ws.append(tmp)
    wb.save('114.xlsx')        #改数据保存的文件名