#!/usr/bin/python # -*- coding: UTF-8 -*- import jieba import jieba.analyse import pymysql id = 1 # shoes.txt中的标签库 tag_ku = [] #精确模式分成的tag tag = [] # 打开标签库 f = open('D:spidershoes.txt','r',encoding='utf-8') f.seek(0) #从标签库中读取所有数据,并将每行内容作为一个元素存在data列表里 data = f.read().splitlines() # 将每行第一个词条即标签读出来 for tag_line in data: tag_ku.append(tag_line.split(' ')[0]) # jiebashe'zhiz自定义词库 jieba.set_dictionary('./shoes.txt') # 连接数据库 coon = pymysql.connect(user='root', password='root', host='127.0.0.1', port=3306, database='bishe_shoes',use_unicode=True, charset="utf8") cursor = coon.cursor() # 根据id从数据库读取内容 while id <=100000: print(id) # 读取商品名称 cursor.execute("select shoes_name from shoes where id ={}".format(id)) shoes_name = cursor.fetchone()[0] print(shoes_name) # 对商品名称根据自定义词库精准分词 result = list(jieba.cut(shoes_name, cut_all=False)) result = list(result) print(result) shoes_ku = '' # 对生成的关键词进行过滤,将符合条件的关键词保存到shoes_ku中 for each in result: if each in tag_ku: #对一些特殊标签进行处理 if each == 'Massimo': shoes_ku = shoes_ku + 'Massimo Dutti' + '|' elif each == 'WHAT': shoes_ku = shoes_ku + 'WHAT FOR' + '|' elif each == '男' or each == '男款' or each == '男鞋': shoes_ku = shoes_ku + '男鞋' + '|' elif each == '女' or each == '女款' or each == '女鞋': shoes_ku = shoes_ku + '女鞋' + '|' elif each == 'Kiss': shoes_ku = shoes_ku + 'KissKitty' + '|' elif each == 'URBAN': shoes_ku = shoes_ku + 'URBAN REVIVO' + '|' elif each == 'Jimmy': shoes_ku = shoes_ku + 'Jimmy Choo' + '|' elif each == 'Inking': shoes_ku = shoes_ku + 'Inking Pot' + '|' elif each == 'Miss': shoes_ku = shoes_ku + 'Miss Sixty' + '|' elif each == 'Martens': shoes_ku = shoes_ku + 'Dr.Martens' + '|' else: shoes_ku = shoes_ku + each + '|' print(shoes_ku) # 将生成的商品的标签保存到数据库中 sql = "update shoes set tag = '{0}' where id = {1}".format(shoes_ku,id) print(sql) cursor.execute(sql) coon.commit() id = id + 1 cursor.close()
运行结果: