joke python

# -*- coding: utf-8 -*-
import pycurl
import re
import cStringIO
from pypinyin import lazy_pinyin

import sys, locale

buf = cStringIO.StringIO()

GRAB_URL = 'http://www.gov.cn/test/2011-08/22/content_1930111.htm'

c = pycurl.Curl()
c.setopt(c.HTTPHEADER, ['Accept-Charset: UTF-8'])
c.setopt(c.URL, GRAB_URL)
c.setopt(c.WRITEFUNCTION, buf.write)
c.perform()

grab_data = buf.getvalue().decode('utf-8')
tmp_list = re.findall(r"d{3}[^0]0{2}.*<", grab_data)
# 词尾一样
shi_dic = {}
# 词头一样
shi_dic_head = {}
for i in tmp_list:
    tmp = i.split(' ')
    city = tmp[1].split('<')
    try:
        shi_exist = city[0].index(u'市')
        if shi_exist > 1:
            # 深圳市
            shi_tail = city[0].split(u'市')
            # 深圳
            shi = shi_tail[0]
            shi_pinyin = lazy_pinyin(shi)
            tail_pinyin = shi_pinyin[-1]
            head_pinyin = shi_pinyin[0]
            if shi_dic.has_key(tail_pinyin):
                shi_dic[tail_pinyin].append(shi)
            else:
                shi_dic[tail_pinyin] = [shi]

            if shi_dic_head.has_key(head_pinyin):
                shi_dic_head[head_pinyin].append(shi)
            else:
                shi_dic_head[head_pinyin] = [shi]
        else:
            continue
    except Exception, e:
        # print Exception, ":", e
        continue


def letsgo():
    info = raw_input(u'请输入,如“深圳”:'.encode('utf-8')).decode(sys.stdin.encoding or locale.getpreferredencoding(True))

    info_pinyin = lazy_pinyin(info)
    info_shi_pinyin = info_pinyin[-1]
    flag = 1
    # if shi_dic.has_key(info_shi_pinyin):
    #     flag = 0
    #     for i in shi_dic[info_shi_pinyin]:
    #         print i
    if shi_dic_head.has_key(info_shi_pinyin):
        flag = 0
        for i in shi_dic_head[info_shi_pinyin]:
            print i
    if flag == 1:
        print "输入字符串非法"
    die = raw_input(u'如继续,请输入go,如结束请输入其他')
    if die == 'go':
        letsgo()
    else:
        print '程序已经退出'


letsgo()

  

原文地址:https://www.cnblogs.com/rsapaper/p/7090671.html