python爬虫demo01

python爬虫demo01

复制代码
 1 import requests, json, time, sys
 2 from bs4 import BeautifulSoup
 3 from contextlib import closing
 4 
 5 url = 'https://image.xiaozhustatic1.com/12/9,0,27,3473,1800,1200,d064ccfb.jpg'
 6 headers = {
 7     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
 8 }
 9 
10 url2 = 'http://bj.xiaozhu.com/'
11 
12 
13 def get_img_src_list(url):
14     """
15     获取图片的url
16     :param url:
17     :return:
18     """
19     res = requests.get(url, headers=headers)
20     res_data = BeautifulSoup(res.text, 'lxml')
21     imgs = res_data.select('#page_list > ul > li > a > img')
22     # titles = res_data.select('#page_list > ul > li > div.result_btm_con.lodgeunitname > div > a > span')
23     # print( imgs )
24     img_srcs = []
25     img_alt = []
26     for img in imgs:
27         img_srcs.append(img.get('lazy_src'))
28     return img_srcs
29 
30 
31 def downloadPic(url, pic_name):
32     """
33     下载图片
34     :param url:
35     :param pic_name:
36     :return:
37     """
38     res = requests.get(url, headers=headers, stream=True)
39     with closing(res) as r:
40         with open('%d.jpg' %pic_name, 'ab+') as f:
41             for chunk in res.iter_content(chunk_size=1024):
42                 if chunk:
43                     f.write(chunk)
44                     f.flush()
45     print('下载{}.jpg成功!'.format(pic_name))
46 
47 if __name__ == '__main__':
48     img_srcs = get_img_src_list(url2)
49     for i in range(len(img_srcs)):
50         print(img_srcs[i])
51        # time.sleep(1)
52         downloadPic(img_srcs[i], i)
53         i += 1
复制代码
原文地址:https://www.cnblogs.com/valorchang/p/11476835.html