爬虫,第七次实战xpath之(58二手房)

import requests
from lxml import etree

url = 'https://sz.58.com/ershoufang/'
headers = {'User-Agen':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36'}
page_text = requests.get(url=url,headers=headers).text
tree = etree.HTML(page_text)
title_list = tree.xpath('//ul[@class="house-list-wrap"]/li')
fp = open('title.txt','w',encoding='UTF-8')

for title in title_list:
        list1 = title.xpath('./div/h2/a/text()')[0]
        fp.write(list1+'
')
        print('辛辛苦苦')
原文地址:https://www.cnblogs.com/sucanji/p/10850263.html