Python文件传参数和日志模块使用

  说明:下面的脚本,涵盖利用文件传参数、logging日志模块,多进程

  1 import requests
  2 import re
  3 import logging
  4 from logging.handlers import RotatingFileHandler
  5 import datetime
  6 import time
  7 import configparser
  8 import os
  9 import sys
 10 import getpass
 11 from multiprocessing import Pool
 12 from requests.packages.urllib3.exceptions import InsecureRequestWarning
 13 requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
 14 
 15 def get_parameter():
 16     confFile='multiSiteAccess.cnf'
 17     if os.path.exists(confFile) is False:
 18         print('配置文件:{}不存在!'.format(confFile))
 19         opt=getpass.getpass('输入回车键退出程序!')
 20         sys.exit(1)
 21     else:
 22         cf=configparser.ConfigParser(allow_no_value=True)
 23         cf.read(confFile,encoding='UTF-8')
 24         try:
 25             username=cf.get('Auth','username')
 26             password=cf.get('Auth','password')
 27             urlFile=cf.get('FilePath','urlFile')
 28             proxyFile=cf.get('FilePath','proxyFile')
 29             ProccessNum=int(cf.get('Proccess','ProccessNum'))
 30             logFile=cf.get('logRecord','logFile')
 31             logPrint=cf.get('logRecord','logPrint')
 32             logRotateType=cf.get('logRecord','logRotateType')
 33             return username,password,urlFile,proxyFile,ProccessNum,logFile,logPrint,logRotateType
 34         except Exception as e:
 35             print(e)
 36             opt = getpass.getpass('输入回车键退出程序!')
 37             sys.exit(2)
 38 
 39 def log_record_conf(logFile,logPrint,logRotateType):
 40     global logger
 41     logger=logging.getLogger()
 42     logger.setLevel(logging.DEBUG)
 43     console_handler=logging.StreamHandler()
 44     maxBytes=100*1024*1024
 45     interval=8
 46     if logRotateType.lower()=='size':
 47         file_handler=logging.handlers.RotatingFileHandler(filename=logFile,encoding='UTF-8',
 48                                                           maxBytes=maxBytes,backupCount=50,delay=False)
 49     elif logRotateType.lower()=='time':
 50         file_handler=logging.handlers.TimedRotatingFileHandler(filename=logFile,encoding='UTF-8',when='H',
 51                                                                interval=interval,delay=False,utc=False,atTime=datetime.time)
 52     else:
 53         file_handler=logging.FileHandler(filename=logFile,encoding='UTF-8',mode='a',delay=False)
 54     formatter=logging.Formatter('%(asctime)s - %(levelname)-7s - %(message)s')
 55     console_handler.setFormatter(formatter)
 56     file_handler.setFormatter(formatter)
 57     logging.addHandler(file_handler)
 58     if logPrint.lower()=='on':
 59         logger.addHandler(console_handler)
 60     return logger
 61 
 62 def get_urls(urlFile):
 63     with open(urlFile,'r',encoding='UTF-8') as rf:
 64         allUrl=rf.readlines()
 65     return allUrl
 66 
 67 def get_proxy_host(proxyFile):
 68     with open(proxyFile,'r',encoding='UTF-8') as rf:
 69         allProxy=rf.readlines()
 70     return allProxy
 71 
 72 def url_sample_disposal(urllink):
 73     urllink=urllink.strip('"')
 74     urllink=urllink.strip("'")
 75     pattern=r'^https?://.*'
 76     match=re.findall(pattern,urllink)
 77     if len(match) ==0:        # URL样本不是以http/https开头的样本
 78         urllink1='http://{}'.format(urllink)
 79         urllink2='https://{}'.format(urllink)
 80         match.append(urllink1)
 81         match.append(urllink2)
 82     return match
 83 
 84 def set_proxy(urlProxy,username,password):
 85     if not urlProxy:
 86         proxies={}
 87     else:
 88         http_proxy='http://{}:{}@{}'.format(username,password,urlProxy)
 89         https_proxy=http_proxy
 90         proxies={
 91             'http':http_proxy,
 92             'https':https_proxy
 93         }
 94     return proxies
 95 
 96 def http_request(urllink,proxy='',proxyHost=''):
 97     headers={
 98         'User-Agent':'curl/3.03',
 99         'Connection':'close'    # keep-alive
100     }
101     requests.adapters.DEFAULT_RETRIES=5
102     try:
103         r=requests.get(urllink,headers=headers,proxies=proxy,timeout=30,verify=False)
104         r.close()
105         urlresult='{}	{}	{}'.format(proxyHost,r.url,str(r.status_code))
106     except Exception as e:
107         urlresult='{}	{}	{}'.format(proxyHost,urllink,str(e))
108     finally:
109         return urlresult
110 
111 def writer_log(urlresult):
112     logger.info(urlresult)
113 
114 if __name__=='__main__':
115     start_time=datetime.datetime.now()
116     username,password,urlFile,proxyFile,ProccessNum,logFile,logPrint,logRotateType=get_parameter()
117     logger=log_record_conf(logFile,logPrint,logRotateType)
118     logger.info('{}URL测试开始{}'.format('='*15,'='*15))
119     allUrl=get_urls(urlFile)
120     allProxy=get_proxy_host(proxyFile)
121     logger.debug('测试总URL数:{}'.format(len(allUrl)))
122     logger.debug('测试进程数:{}'.format(ProccessNum))
123     p=Pool(ProccessNum)
124     for i in allUrl:
125         urlLink=i.split()[-1]
126         match=url_sample_disposal(urlLink)
127         for urllink in match :
128             urllink=urllink.strip()
129             for proxyHost in allProxy:
130                 proxyHost=proxyHost.strip()
131                 proxies=set_proxy(proxyHost,username,password)
132                 result=p.apply_async(http_request,args=(urllink,proxies,proxyHost),callback=writer_log)
133     p.close()
134     count=0
135     while True:
136         try:
137             time.sleep(60)
138             if result.ready():
139                 count+=1
140                 time.sleep(30)
141                 logger.debug('进程池调度结束')
142                 result.get(timeout=30)
143             if count>=3:
144                 break
145         except Exception as e:
146             logger.warning('进程异常:{}'.format(e))
147     p.terminate()
148     p.join()
149     end_time=datetime.datetime.now()
150     logger.info('开始时间:{}'.format(start_time))
151     logger.info('结束时间:{}'.format(end_time))
152     logger.info('总耗时:{}'.format(end_time-start_time))
153     logger.debug('日志文件:{}'.format(logFile))
154     logger.info('{}URL测试结束{}'.format('=' * 15, '=' * 15))

原文地址:https://www.cnblogs.com/wlinuxtop/p/14433097.html