多线程下载图片

  1. # -*- coding:utf8 -*-
  2. from bs4 import BeautifulSoup
  3. import os, sys, urllib2, urllib
  4. import thread, threading
  5. class downloader(threading.Thread):
  6. """docstring for downloader"""
  7. def __init__(self, url, name):
  8. threading.Thread.__init__(self)
  9. self.url = url
  10. self.name = name
  11. def run(self):
  12. print 'downling from %s' % self.url
  13. urllib.urlretrieve(self.url, self.name)
  14. threads=[]
  15. def page_loop(page=1):
  16. url = 'http://www.beautylegmm.com/Tiara/beautyleg-936.html?page=%s' % page
  17. content = urllib2.urlopen(url)
  18. soup = BeautifulSoup(content)
  19. my_girl = soup.find_all('img')
  20. global x
  21. # 加入结束检测,写的不好....
  22. if len(my_girl) <5:
  23. print '已经全部抓取完毕'
  24. sys.exit(0)
  25. print '开始抓取'
  26. for girl in my_girl:
  27. link = girl.get('src')
  28. if 'jpg' in link:
  29. flink = 'http://www.beautylegmm.com' + link
  30. print flink
  31. path = 'dbmeizi'+'/' + str(x) + flink[-4:]
  32. x = x + 1
  33. t = downloader(flink, path)
  34. threads.append(t)
  35. t.start()
  36. # content2 = urllib2.urlopen(flink).read()
  37. # with open('dbmeizi'+'/' + str(x) + flink[-4:],'wb') as code:
  38. # code.write(content2)
  39. page = int(page) + 1
  40. print '开始抓取下一页'
  41. print 'the %s page' % page
  42. page_loop(page)
  43. x = 1
  44. page_loop()





原文地址:https://www.cnblogs.com/highroom/p/cbb0d977a78d35dac83bd56f5d08f61c.html