爬取北京房子信息与图片保存文件夹

#encoding:utf8
import requests
from lxml import etree
import re
import hashlib
#random定义任意随机数
import random
import time
yuan = requests.get('http://esf.fang.com/').text
demo = etree.HTML(yuan).xpath('//*/dd')
demo1 = etree.HTML(yuan).xpath('//*/dt/a/img/@onerror')
for i in demo1:
z = re.compile("imgiserror\(this,\'(.*?)\'\)",re.S)
yuan1 = requests.get(z.findall(i)[0]).content
# h = hashlib.new("md5")
# h.update(z.findall(i)[0].encode('utf8'))
# zhi = h.hexdigest()
#以上三行注释可以和定义的任意随机数互换
#定义任意随机数
zhi = str(random.choice(range(1,10000000000000)))+str(time.time())
op = open('F:\\10\\bsbs\\mai\\'+zhi+'.jpg','wb')
op.write(yuan1)
print(z.findall(i)[0])
for i in demo:
a =i.xpath('p[1]/a/text()')
b = i.xpath('p[2]/text()')
c = i.xpath('p[3]/a/span/text()')
d = i.xpath('p[4]/a/text()')
if a and b and c and d:
str = ''
for i in range(len(b)):
str+=b[i].strip()+'|'
print(a[0],str.strip('|'),c[0],d[0])
原文地址:https://www.cnblogs.com/lianghongrui/p/6857170.html