调用百度API来进行词法分析

"""
import json
import urllib.request
import urllib.parse
import urllib.request
url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials"
values = {
 'host':'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials',
 'client_id':'',
 'client_secret' : ''
}
host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id= &client_secret='
request = urllib.request.Request(host)
request.add_header('Content-Type', 'application/json; charset=UTF-8')
response = urllib.request.urlopen(request)
content = response.read()
print(content)
"""
"""
for i in range(len(title_list)):
    if (i+1)%5==0:
        time.sleep(1)
"""
import urllib3
import json
import time
count=0
access_token=""
http=urllib3.PoolManager()
url='https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer?access_token='+access_token
f=open("C:/Users/Jack/Desktop/data.txt",'r')
f1=open("C:/Users/Jack/Desktop/result1.txt",'a')
f2=open("C:/Users/Jack/Desktop/result2.txt",'a')
while True:
    count=count+1
    if (count+1)%5==0:
        time.sleep(1)
    ch=f.read(10000)
    ch=str(ch)
    #print("input
",ch)
    params={'text':ch}
    encoded_data = json.dumps(params).encode('GBK')
    request=http.request('POST',
                          url,
                          body=encoded_data,
                          headers={'Content-Type':'application/json'})
    result = str(request.data,'GBK')
    a=json.loads(result)
    print("a",a)
    a1 =len(a['items'])
    for i in range(0,a1):
        a2=a['items'][i]
        if a2['byte_length']>2 and(a2['ne']=='ORG' or a2['ne']=='PER' or a2['ne']=='LOC' or a2['pos']=='n' or a2['pos']=='f' or a2['pos']=='s'or a2['pos']=='nr' or a2['pos']=='ns' or a2['pos']=='nt' or a2['pos']=='nw' or a2['pos']=='nz' or a2['pos']=='v' or a2['pos']=='vn'):
            f1.write(a2['item'])
            f1.write("
")
            if a2['ne']=='ORG':
                f2.write("机构名")
                f2.write("
")
            elif a2['ne']=='PER':
                f2.write("人名")
                f2.write("
")
            elif a2['ne'] == 'LOC':
                f2.write("地名")
                f2.write("
")
            elif a2['pos'] == 'n':
                f2.write("普通名词")
                f2.write("
")
            elif a2['pos'] == 'f':
                f2.write("方位名词")
                f2.write("
")
            elif a2['pos'] == 's':
                f2.write("处所名词")
                f2.write("
")
            elif a2['pos'] == 'nr':
                f2.write("人名")
                f2.write("
")
            elif a2['pos'] == 'ns':
                f2.write("地名")
                f2.write("
")
            elif a2['pos'] == 'nt':
                f2.write("机构团体名")
                f2.write("
")
            elif a2['pos'] == 'nw':
                f2.write("作品名")
                f2.write("
")
            elif a2['pos'] == 'nz':
                f2.write("其他专名")
                f2.write("
")
            elif a2['pos'] == 'v':
                f2.write("普通动词")
                f2.write("
")
            elif a2['pos'] == 'vn':
                f2.write("名动词")
                f2.write("
")
    if not ch:
        break
f.close()
f1.close()
f2.close()
原文地址:https://www.cnblogs.com/libin123/p/13215186.html