标题和价格的爬取,主要是遍历的方法


import re
import urllib2

def get_html():
html = urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
r = r'target="_blank">&yen; <strong>(.*)</strong>'
reg = re.compile(r)
text = re.findall(reg,html)
return text

def get_html1():
html = urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
r = r' target="_blank" class="title" title="(.*)">(.*)</a>'
reg = re.compile(r)
text = re.findall(reg,html)
return text

dict = {}
price = get_html()
title = get_html1()
list = []
for k in title:
list.append(k[-1])

for i in range(len(price)):
for j in range(len(list)):
if i==j:
dict[price[i]]=list[j]
fd=open('aaa.txt','a')
for k,v in dict.items():
fd.write(k+' '+v+' ')
fd.close()


# - *- coding:utf-8 -*-
import urllib2
import re

def geturl():
html=urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
reg=re.compile(r' target="_blank" class="title" title="(.*?)">.*?</a>')
title=re.findall(reg,html)
return title
def geturl1():
html=urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
reg=re.compile(r' <strong>(.*?)</strong>')
title1=re.findall(reg,html)
return title1
fd=open('baobiao.txt','wb')
for(i,j) in zip(geturl(),geturl1()):

fd.write(i+' '+ j + ' ')
fd.close()
原文地址:https://www.cnblogs.com/ZHANG576433951/p/6090038.html