python jieba分词

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import jieba
import jieba.analyse
import pymysql

id = 1
# shoes.txt中的标签库
tag_ku = []
#精确模式分成的tag
tag = []
# 打开标签库
f = open('D:spidershoes.txt','r',encoding='utf-8')
f.seek(0)
#从标签库中读取所有数据,并将每行内容作为一个元素存在data列表里
data = f.read().splitlines()
# 将每行第一个词条即标签读出来
for tag_line in data:
    tag_ku.append(tag_line.split(' ')[0])
# jiebashe'zhiz自定义词库
jieba.set_dictionary('./shoes.txt')
# 连接数据库
coon = pymysql.connect(user='root', password='root', host='127.0.0.1', port=3306, database='bishe_shoes',use_unicode=True, charset="utf8")
cursor = coon.cursor()
# 根据id从数据库读取内容
while id <=100000:
    print(id)
    # 读取商品名称
    cursor.execute("select shoes_name from shoes where id ={}".format(id))
    shoes_name = cursor.fetchone()[0]
    print(shoes_name)
    # 对商品名称根据自定义词库精准分词
    result = list(jieba.cut(shoes_name, cut_all=False))
    result = list(result)
    print(result)
    shoes_ku = ''
    # 对生成的关键词进行过滤,将符合条件的关键词保存到shoes_ku中
    for each in result:
        if each in tag_ku:
            #对一些特殊标签进行处理
            if each == 'Massimo':
                shoes_ku = shoes_ku + 'Massimo Dutti' + '|'
            elif each == 'WHAT':
                shoes_ku = shoes_ku + 'WHAT FOR' + '|'
            elif each == '' or each == '男款' or each == '男鞋':
                shoes_ku = shoes_ku + '男鞋' + '|'
            elif each == '' or each == '女款' or each == '女鞋':
                shoes_ku = shoes_ku + '女鞋' + '|'
            elif each == 'Kiss':
                shoes_ku = shoes_ku + 'KissKitty' + '|'
            elif each == 'URBAN':
                shoes_ku = shoes_ku + 'URBAN REVIVO' + '|'
            elif each == 'Jimmy':
                shoes_ku = shoes_ku + 'Jimmy Choo' + '|'
            elif each == 'Inking':
                shoes_ku = shoes_ku + 'Inking Pot' + '|'
            elif each == 'Miss':
                shoes_ku = shoes_ku + 'Miss Sixty' + '|'
            elif each == 'Martens':
                shoes_ku = shoes_ku + 'Dr.Martens' + '|'
            else:
                shoes_ku = shoes_ku + each + '|'
    print(shoes_ku)
    # 将生成的商品的标签保存到数据库中
    sql = "update shoes set tag = '{0}' where id = {1}".format(shoes_ku,id)
    print(sql)
    cursor.execute(sql)
    coon.commit()
    id = id + 1
cursor.close()

运行结果:

原文地址:https://www.cnblogs.com/qilin20/p/12284570.html