python爬虫系列之爬京东手机数据

python抓京东手机数据

作者:vpoet

mail:vpoet_sir@163.com

 1 #coding=utf-8
 2 
 3 import urllib2
 4 
 5 from lxml import etree
 6 
 7 import re
 8 
 9 
10 if __name__ == '__main__':
11     
12     main_url = """http://search.jd.com/Search?keyword=%%E6%%89%%8B%%E6%%9C%%BA&enc=utf-8&suggest=0#keyword=%%E6%%89%%8B%%E6%%9C%%BA&enc=utf-8&qrst=1&ps=addr&rt=1&stop=1&sttr=1&cid3=655&click=3-                655&psort=3&page=%s"""
13     
14     page_num = 1
15     
16     for page in range(page_num):
17         
18         html_url = main_url % page
19         
20         Res = urllib2.urlopen(html_url)
21         
22         Htm = Res.read()
23         
24         #print Htm
25 
26         tree = etree.HTML(Htm);
27  
28         #phone_names = tree.xpath("//div[@id='plist']/ul/li/div[@class='lh-wrap']/div[@class='p-name']/a/text()")   
29         
30     #x = 1
31         #for phone_name in phone_names: 
32             #print phone_name+'	'+str(x)+'
'
33 
34         #x=x+1
35     
36  
37 
38     #phone_pic_urls = tree.xpath("//div[@class='lh-wrap']/div[@class='p-img']/a/img")
39 
40     #for phone_pic_url in phone_pic_urls: 
41         #print phone_pic_url.values()[3]
42 
43     #phone_prices = tree.xpath("//div[@class='p-price']/strong")  
44     phone_prices = tree.xpath("//*[@id='plist']/ul[@class='list-h clearfix']/li/div/div[@class='p-price']/strong") 
45         
46     x = 1
47 
48         for phone_price in phone_prices: 
49             print phone_price.values()[1]+'	'+str(x)+'
'
50         x = x + 1
51 
52 
53     #phone_comments = tree.xpath("//div[@class='extra']/a/text()")  
54         
55         #for phone_comment in phone_comments: 
56             #print "评价数"
57         #comment_num = re.findall(r'.{2}(d+).{3}',phone_comment)
58         #print comment_num[0]
59 
60 
61     #phone_good_comments = tree.xpath("//div[@class='extra']/span[@class='reputation']/text()")  
62         
63         #for phone_good_comment in phone_good_comments: 
64             #print "好评率"
65         #comment_good_num = re.findall(r'((d{2})%.{2})',phone_good_comment)
66         #print comment_good_num[0]
67 
68 
69     print "over"

这个没写完,先保存在这里。有时间再完成

原文地址:https://www.cnblogs.com/vpoet/p/4659586.html