百度收录批量查询【python版】

import urllib2
from BeautifulSoup import BeautifulSoup
import random
import time

def checkIndex(url):
    url = url.replace('http://', '')
    baiduUrl = 'http://www.baidu.com/s?wd=' + url
    webPage = urllib2.urlopen(baiduUrl)
    webCont = webPage.read()
    webCont = webCont.replace('<b>', '').replace('</b>', '')
    soup = BeautifulSoup(webCont)
    findlist = soup.find('span', {'class': 'g'})
    if findlist:
        for each in findlist:
            if url in unicode(each):
                return url
            else:
                return None
    else:
        return None

waittime = random.randint(1, 20)


urllist = open('list.txt')
res = open('check.txt', 'w')

for eachurl in urllist.readlines():
    indexurl = unicode(checkIndex(eachurl)) + '\n'
    res.write(indexurl)
    time.sleep(waittime)

urllist.close()
res.close()
print 'over!'
原文地址:https://www.cnblogs.com/alexkh/p/2865566.html