源数据的换行符

from openpyxl import Workbook
import time
import sys
import os
import pprint
import re

curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)

db_db_code_dic = {}
fname = 'jmtool0315_public_tbl_area_type.csv'
fd = '%s\%s' % (curPath, fname)
with open(fd, 'r', encoding='utf-8') as fo:
    for i in fo:
        l = i.split(',')
        db_db_code_dic[l[1]] = l[2]
fo.close()

curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)

tag_jmtool_list = ['(', '(', '-']
tail_list = ['号楼', '栋', '幢', '单元']
for i in range(0, 30, 1):
    for tail in tail_list:
        str_ = '%s%s' % (i, tail)
        tag_jmtool_list.append(str_)

zh_num_list = ['零', '一', '二', '三', '四', '五', '六', '七', '八', '九']
zh_num_zhk_dic = {}
zh_num_numk_dic = {}
for i in range(0, 20, 1):
    if i < 10:
        zh_num = zh_num_list[i]
    elif i == 10:
        zh_num = '十'
    else:
        ii = i - 10
        zh_num = '%s%s' % ('十', zh_num_list[ii])
    zh_num_numk_dic[str(i)] = zh_num
    zh_num_zhk_dic[zh_num] = str(i)

for i in range(0, 30, 1):
    for tail in tail_list:
        str_ = '%s%s' % (i, tail)
        tag_jmtool_list.append(str_)

for k in zh_num_zhk_dic:
    for tail in tail_list:
        str_ = '%s%s' % (k, tail)
        tag_jmtool_list.append(str_)


def extract_name(name_):
    for i in tag_jmtool_list:
        name_ = name_.split(i)[0]
    # 润景怡园10号
    name_ = re.compile('d+').split(name_)[0]
    # '宝安智谷G座'
    name_ = re.compile('[a-zA-Z]+').split(name_)[0]
    return name_


def gen_bd_query_name(name_, type_):
    res_ = name_
    if type_ == '住宅小区':
        res_ = extract_name(name_)
    if type_ == '写字楼':
        # 海星城市广场B座
        name_ = re.compile('[a-zA-Z]+').split(name_)[0]
    res_ = res_.replace('·', '').replace('.', '').replace('(', '(').replace(')', ')').replace('?', '').replace('?',
                                                                                                               '').replace(
        ' ', '')
    return res_


fname = '采务.csv'
fd = '%s\%s' % (curPath, fname)
wb = Workbook()
worksheet = wb.active
file_title_l = 'id,area_code,uid, name,type_,city,district,address,street,submit_time'.split(',')
worksheet.append(file_title_l)

res_dic = {}

with open(fd, 'r', encoding='utf-8') as fc:
    str_ = ''
    for i in fc:
        if i.find('"id"') > -1:
            continue
        if i.find('"
') == -1:
            str_ = '%s%s' % (str_, i.replace('
', ''))
            continue
        else:
            if len(i.replace(' ', '')) < 6:
                continue
            else:
                str_ = '%s%s' % (str_, i.replace('
', ''))
        ii = str_.replace('
', '').replace('";', ';').split(';"')
        id, area_code, uid, name, db_code, city, district, address, street, submit_time = ii
        submit_time = submit_time.replace('"', '')
        type_ = db_db_code_dic[db_code]
        ll = id, area_code, uid, name, type_, city, district, address, street, submit_time
        worksheet.append(ll)
        str_ = ''

        name_reduction = gen_bd_query_name(name, type_)
        if city not in res_dic:
            res_dic[city] = {}
        if district not in res_dic[city]:
            res_dic[city][district] = {}
        if name_reduction not in res_dic[city][district]:
            res_dic[city][district][name_reduction] = []
        ll = id, area_code, type_, city, district, uid, name, address, street, name_reduction, submit_time
        res_dic[city][district][name_reduction].append(ll)

fc.close()
localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
file_name = '%s%s%s' % (fname, '-TO-EXCEL', localtime_)
file_name_save = '%s\%s%s' % (curPath, file_name, '.xlsx')
wb.save(file_name_save)
wb.close()

wb = Workbook()
worksheet = wb.active
file_title_str = 'id, area_code,type_, city, district, uid, name,address, street, name_reduction, submit_time'
file_title_l = file_title_str.replace(' ', '').split(',')
worksheet.append(file_title_l)
type_dic, uid_l = {}, []
for city in res_dic:
    for district in res_dic[city]:
        for name_reduction in res_dic[city][district]:
            for l in res_dic[city][district][name_reduction]:
                worksheet.append(l)
                type_, uid = l[2], l[5]
                if type_ not in type_dic:
                    type_dic[type_] = 0
                type_dic[type_] += 1
                if uid not in uid_l:
                    uid_l.append(uid)
localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
file_name = '%s%s%s' % (file_name, '-ORDERED', localtime_)
file_name_save = '%s\%s%s' % (curPath, file_name, '.xlsx')
wb.save(file_name_save)
wb.close()

sorted(type_dic.items(), key=lambda x: x[1])
for type_ in type_dic:
    print(type_, ';', type_dic[type_])

print(len(uid_l))

  

from openpyxl import Workbook
import time
import sys
import os
import pprint

curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)

db_code_dic = {}
fname = 'j_area_type.csv'
fd = '%s\%s' % (curPath, fname)
with open(fd, 'r', encoding='utf-8') as fo:
    for i in fo:
        l = i.split(',')
        db_code_dic[l[1]] = l[2]
fo.close()

fname = '采任务.csv'
fd = '%s\%s' % (curPath, fname)
wb = Workbook()
worksheet = wb.active
file_title_l = 'id, area_code, uid, name, type_, district, dname, address, street, submit_time'.split(',')
worksheet.append(file_title_l)
with open(fd, 'r', encoding='utf-8') as fc:
    str_ = ''
    for i in fc:
        if i.find('"id"') > -1:
            continue
        if i.find('"
') == -1:
            str_ = '%s%s' % (str_, i.replace('
', ''))
            continue
        else:
            if len(i.replace(' ', '')) < 6:
                continue
            else:
                str_ = '%s%s' % (str_, i.replace('
', ''))
        ii = str_.replace('
', '').replace('";', ';').split(';"')
        id, area_code, uid, name, ref_area_type_code, district, dname, address, street, submit_time = ii
        type_ = db_code_dic[ref_area_type_code]
        ll = id, area_code, uid, name, type_, district, dname, address, street, submit_time.replace('"','')
        worksheet.append(ll)
        str_ = ''
fc.close()
localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
file_name = '%s%s%s' % (fname, '-TO-EXCEL', localtime_)
file_name_save = '%s\%s%s' % (curPath, file_name, '.xlsx')
wb.save(file_name_save)
wb.close()

  

原文地址:https://www.cnblogs.com/rsapaper/p/7455011.html