1 ### 模板脚本存放路径(无需修改) 2 cd /tmp/fix_data/tmp_wjj_20180322_01 3 ### 脚本名称 4 script=tmp_wjj_20180322_01 5 ### 开始日期(包括当月/天) 6 etl_dt_start='2017-09-01' 7 ### 结束日期(不包括当月/天) 8 etl_dt_end='2016-12-01' 9 ### 并发数(请勿设置高于10) 10 thread_num=3 11 ### task数量(设置spark_sql的task数量,如果数据量过亿,可适量调高) 12 partitions=150 13 ### 以下无需修改 14 thread_no=0 15 spark="beeline -u jdbc:hive2://ip:port -n username -p password --verbose=true" 16 hive="hive -v" 17 excute=${spark} 18 ### 删除执行脚本内容 19 cat /dev/null > run_sql_${script} 20 ### to do: 根据进程数进行判断 21 while [[ ${etl_dt_start} > ${etl_dt_end} ]] 22 do 23 echo ${etl_dt_start} 24 etl_dt_start_str=`echo ${etl_dt_start}|sed 's/-/_/g'` 25 echo "select 'job_start' as flag,'${etl_dt_start}' as num,current_timestamp() as time;" > ${script}_${etl_dt_start_str} 26 echo "set spark.sql.shuffle.partitions=${partitions};" >> ${script}_${etl_dt_start_str} 27 echo "use db_name;" >> ${script}_${etl_dt_start_str} 28 echo "set mapred.job.queue.name=queue_name;" >> ${script}_${etl_dt_start_str} 29 echo "set hive.exec.dynamic.partition=true;" >> ${script}_${etl_dt_start_str} 30 echo "set hive.exec.dynamic.partition.mode=nonstrict;" >> ${script}_${etl_dt_start_str} 31 sed "s/2017-10-01/${etl_dt_start}/g" ${script} >> ${script}_${etl_dt_start_str} 32 echo "select 'job_finish' as flag,'${etl_dt_start}' as num,current_timestamp() as time;" >> ${script}_${etl_dt_start_str} 33 34 thread_no=$((${thread_no}+1)) 35 echo "${excute} -f ${script}_${etl_dt_start_str} > ${script}_${etl_dt_start_str}_log 2>&1 &" >>run_sql_${script} 36 if [[ $((${thread_no}%${thread_num})) == 0 ]] 37 then 38 echo "wait" >>run_sql_${script} 39 fi 40 ### etl_dt_start=`date -d "+1 days ${etl_dt_start}" +%Y-%m-%d` 41 etl_dt_start=`date -d "-1 months ${etl_dt_start}" +%Y-%m-%d` 42 done 43 44 ### 执行初始化 45 sh run_sql_${script} 46 47 ### 查看执行时间 48 # grep -E "job_start|job_finish" ${script}_*_log|grep -v select