第一个爬虫小程序

 1 import requests,os,urllib,urllib.request
 2 from bs4 import BeautifulSoup as be
 3 
 4 
 5 path = '/users/2018/desktop/'#设置路径
 6 new_file_name = 'tylor swift'#设置文件名
 7 urls = 'https://weheartit.com/inspirations/taylorswift?page='
 8 
 9 
10 
11 new_path = os.path.join(path,new_file_name)
12 if not os.path.isdir(new_path):
13         os.makedirs(new_path)
14 
15 #根据页码创建路径
16 def build_path(name):
17     paths = os.path.join(new_path, str(name))
18     if not os.path.isdir(paths):
19         os.makedirs(paths)
20     return paths
21 
22 #图片写入路径
23 def img_file(url,page_num):
24     web_data = requests.get(url)
25     soup = be(web_data.text,'lxml')
26     i=0
27     for link in soup.find_all("img",class_='entry-thumbnail'):
28         i=i+1
29         img_addr=link.get('src')
30         img_content = requests.get(img_addr).content
31         img_name = str(i)+'.jpg'
32 
33         with open(build_path(page_num)+'/'+img_name,'wb') as write_file:
34             write_file.write(img_content)
35     print(i)
36 
37 #获取不同页码
38 def img_files(start,end):
39     for page_num in range(start,end):
40         real_urls = urls+str(page_num)
41 
42         img_file(real_urls,page_num)
43 
44 
45 
46 img_files(1,10)

 

原文地址:https://www.cnblogs.com/yangmingustb/p/8528067.html