数据处理02

建立4个txt记录数据,james.txt/julie.txt/mikey.txt/sarah.txt
with open('james.txt') as jaf:  #打开文件
    data = jaf.readline()
james = data.strip().split(',')  #以逗号进行分隔
with open('julie.txt') as juf:
    data = juf.readline()
julie = data.strip().split(',')
with open('mikey.txt') as mif:
    data = mif.readline()
mikey = data.strip().split(',')
with open('sarah.txt') as saf:
    data = saf.readline()
sarah = data.strip().split(',')


def sanitize(time_string):  #定义清洗数据函数
    if '-' in time_string:  #使用"in"操作符检查字符串是否包含一个短横线或冒号
        splitter = '-'
    elif ':' in time_string:
        splitter = ':'
    else:
        return(time_string) #如果字符串不需要清理,就什么也不做
    (mins, secs) = time_string.split(splitter)  #分解字符串,抽出分钟和秒部分
    return(mins + '.' + secs)

james = sorted([sanitize(t) for t in james])  #清洗后的数据再赋给james
julie = sorted([sanitize(t) for t in julie])
mikey = sorted([sanitize(t) for t in mikey])
sarah = sorted([sanitize(t) for t in sarah])

unique_james = []  #定义一个特殊的james空数组
for each_t in james:
    if each_t not in unique_james:
        unique_james.append(each_t)
print(unique_james[0:3])  #打印第0到3项(不包括0项)

unique_julie = []
for each_t in julie:
    if each_t not in unique_julie:
        unique_julie.append(each_t)
print(unique_julie[0:3])

unique_mikey = []
for each_t in mikey:
    if each_t not in unique_mikey:
        unique_mikey.append(each_t)
print(unique_mikey[0:3])

unique_sarah = []
for each_t in sarah:
    if each_t not in unique_sarah:
        unique_sarah.append(each_t)
print(unique_sarah[0:3])

输出

['2.01', '2.22', '2.34']
['2.11', '2.23', '2.59']
['2.22', '2.38', '2.49']
['2.18', '2.25', '2.39']

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------

 数据理解

# Author kevin_hou
def get_coach_data(filename):  #定义一个通用打开文件的函数,替换with语句
    try:
        with open(filename) as f:
            data = f.readline()
        return(data.strip().split(','))
    except IOError as ioerr:
        print('File error:' + str(ioerr))
# sarah = get_coach_data('sarah.txt')
def sanitize(time_string):
    if '-' in time_string:
        splitter = '-'
    elif ':' in time_string:
        splitter = ':'
    else:
        return(time_string)
    (mins, secs) = time_string.split(splitter)
    return(mins + '.' + secs)

james = get_coach_data('james.txt')
julie = get_coach_data('julie.txt')
mikey = get_coach_data('mikey.txt')
sarah = get_coach_data('sarah.txt')

# with open('james.txt') as jaf:  #等价于 james = get_coach_data('james.txt')
#     data = jaf.readline()
# james = data.strip().split(',')
# with open('julie.txt') as juf:
#     data = juf.readline()
# julie = data.strip().split(',')
# with open('mikey.txt') as mif:
#     data = mif.readline()
# mikey = data.strip().split(',')
# with open('sarah.txt') as saf:
#     data = saf.readline()
# sarah = data.strip().split(',')

print(sorted(set([sanitize(t) for t in james]))[0:3])
print(sorted(set([sanitize(t) for t in julie]))[0:3])
print(sorted(set([sanitize(t) for t in mikey]))[0:3])
print(sorted(set([sanitize(t) for t in sarah]))[0:3])

输出 ''' ['2.01', '2.22', '2.34'] ['2.11', '2.23', '2.59'] ['2.22', '2.38', '2.49'] ['2.18', '2.25', '2.39'] '''

  

  

原文地址:https://www.cnblogs.com/kevin-hou1991/p/13641207.html