getjob

[op@TIM getpage]$ cat job.py

#coding: utf-8
#title..href...
import urllib.request
import time

url=['']*30
page=83909
j=0
while j<30:
    url[j]='http://job.csdn.net/Job/Index?jobID='+str(page)
    content=urllib.request.urlopen(url[j]).read().decode('utf-8')
    open(r'job/'+str(page)+'.html','w+',encoding='utf-8').write(content)
    print('donwloading ',j,' page:',url[j])
    j=j+1
    page=page+1
    time.sleep(4)
else:    
    print('download article finished')
原文地址:https://www.cnblogs.com/timssd/p/4399013.html