获取搜索引擎关键字建议

闲来无事,用Python写的小实例。

何为搜索引擎关键字建议呢。 看图吧,红框中内容的就是搜索引擎自动提示的关键字搜索建议。 aa  

直接用浏览器的开发者工具抓取地址和返回的数据。

包括百度、谷歌和360。

 1 #coding=utf8
 2 import cookielib
 3 import urllib
 4 import urllib2
 5 import json
 6 import re
 7 
 8 def getSuggestion(word):
 9     headers={'User-Agent':'Mozilla/4.0'}
10     url = "http://suggestion.baidu.com/su?wd="+word+"&p=3&cb=window.bdsug.sug&from=superpage&t=1392097137657"
11     req = urllib2.Request(url,None,headers)
12     res_data = urllib2.urlopen(req)
13     res = res_data.read()
14     #print res
15 
16     m=re.search('s:.*',res)
17     s=m.group(0)
18     s=s.replace('s:','').replace('});','')
19     s=s.decode('gbk')
20     sugs=json.loads(s)
21     return sugs
22 
23 while True:
24     wd=raw_input('Input a keyword:')
25     if wd:
26         sugs=getSuggestion(wd)
27         if sugs:
28             for sug in sugs:
29                 print sug
30         else:
31             print 'None'
32             continue
33     else:
34         break
35 #coding=utf8
36 import cookielib
37 import urllib
38 import urllib2
39 import json
40 import re
41 
42 def getSuggestion(word):
43     headers={'User-Agent':'Mozilla/5.0'}
44     #url = "https://www.google.com.hk/complete/search?client=hp&hl=zh-CN&sugexp=ernk_timepromotiona&gs_rn=35&gs_ri=hp&tok=rNp0iPattR-lTwVnhtFFxg&cp=1&gs_id=3i0&q="+word+"&xhr=t"
45     #req = urllib2.Request(url,None,headers)
46     url = "https://www.google.com.hk/complete/search?client=hp&q="+word
47     res_data = urllib2.urlopen(url)
48     res = res_data.read()
49     s=res.replace('window.google.ac.h(','').replace(')','')
50     sugs=json.loads(s)
51     return sugs[1]
52 
53 while True:
54     wd=raw_input('Input a keyword:')
55     if wd:
56         sugs=getSuggestion(wd)
57         if sugs:
58             for sug in sugs:
59                 print sug[0]
60         else:
61             print 'None'
62             continue
63     else:
64         break
65 #coding=utf8
66 import cookielib
67 import urllib
68 import urllib2
69 import json
70 import re
71 
72 def getSuggestion(word):
73     headers={'User-Agent':'Mozilla/4.0'}
74     url = "http://sug.so.360.cn/suggest?callback=suggest_so&encodein=utf-8&encodeout=utf-8&format=json&fields=word,obdata&word="+word
75     req = urllib2.Request(url,None,headers)
76     res_data = urllib2.urlopen(req)
77     res = res_data.read().decode('utf8')
78 
79     s=res.replace('suggest_so(','').replace(');','')
80     sugs=json.loads(s)
81     return sugs['result']
82 
83 while True:
84     wd=raw_input('Input a keyword:')
85     if wd:
86         sugs=getSuggestion(wd)
87         if sugs:
88             for sug in sugs:
89                 print sug['word']
90         else:
91             print 'None'
92             continue
93     else:
94         break

直接抓的网页数据,原理非常简单。 均未深入做编码处理,所以中文支持不好。

原文地址:https://www.cnblogs.com/tinker/p/3780812.html