爬取google的搜索结果并保存

demo:

#coding:utf-8
import requests
from bs4 import BeautifulSoup
import bs4
import re
def getHTMLText(url):
    try:
        r=requests.get(url,timeout=30)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return ''


def fillList(ulist,html):
    soup=BeautifulSoup(html,'lxml')
    bd=soup.body.find_all('cite')
    for node in soup.find_all('div', {'class': 'g'}):
        cite_node = node.find('cite')
        abstract_node = node.find('span', {'class': 'st'})
        time_node=node.find('span',{'class':'f'})
        cite=cite_node.text
        abstract=abstract_node.text
        #time=time_node.text
        #if time is None:
        #    continue
        #print(time)
        ulist.append([cite,abstract])
        #print('*********')
    print(ulist)


uinfo=[]
url="https://www.google.com.hk/search?safe=strict&source=hp&ei=mQltW6O1CLe60PEP-_eY-AQ&q=%E6%98%8E%E7%95%A5%E6%95%B0%E6%8D%AECTO&oq=%E6%98%8E%E7%95%A5%E6%95%B0%E6%8D%AECTO&gs_l=psy-ab.3...7917.11610.0.12024.14.12.0.0.0.0.896.1417.5-1j1.2.0....0...1c.1j4.64.psy-ab..12.2.1416...0j0i30k1j0i5i30k1.0.uovOOEULNls"
html=getHTMLText(url)
fillList(uinfo,html)
原文地址:https://www.cnblogs.com/elpsycongroo/p/9454551.html