爬取百度百科

 1 import urllib.request
 2 from bs4 import BeautifulSoup
 3 import re
 4 
 5 def main():
 6     response= urllib.request.urlopen('http://baike.baidu.com/view/284853.htm').read()
 7     soup = BeautifulSoup(response,'html.parser')#使用python默认的解析器
 8     for each in soup.find_all(href = re.compile('view')):
 9         print(each.text,'->',''.join(['http://baike.baidu.com/',each['href']]))#join函数明显比+提高
10 if __name__=='__main__':
11     main()
原文地址:https://www.cnblogs.com/themost/p/6701757.html