python 写hive循环脚本

要一个月的数据 一次跑一周的数据 ,建表用插入数据的方法:

 1 import os, sys
 2 reload(sys); sys.setdefaultencoding( "utf-8" )
 3 flag_server = int(os.popen('ifconfig | grep "inet addr:172" | wc -l').read().strip())
 4 dir_scripts = '/app/home/zhangb/' if flag_server else '/Users/zhangb/Desktop/'
 5 dir_data = '/app/home/zhangbo/' if flag_server else '/Users/zhangb/Desktop/'
 6 dir_server = '/app/home/'
 7 sys.path.append(dir_scripts)
 8 
 9 import datetime
10 import time
11 db_name = 'zhangb'
12 
13 def hive_day_cid(create_date,type11_duration):
14         #原始表geohash表关联,找到cid
15         # ------------------   建立 geohash 表   -------------------- #
16         print "# ---------------------------------------------------------------------------------- #"
17         
18         start_date_str = (create_date - datetime.timedelta(days=type11_duration-1)).strftime("%Y%m%d")
19         end_date_str   = create_date.strftime("%Y%m%d")
20         
21         hive_command = ( '''
22 hive -e " use %s;
23     create table if not exists hive_day_cid_provider(provider string,day int, cnt_cid bigint,dist_cid bigint );
24 
25     insert into hive_day_cid_provider
26     select a.provider,a.day,count(a.cid) as cnt_cid,count(distinct(a.cid)) as dist_cid from 
27     (select day,provider,token_md5 as cid  from  report_ods_mdp.upload_bi_type11
28     where day >=%s and day <= %s  and length(token_md5)>0 and provider in ('gps','network','passive','none') ) a 
29     group by a.provider,a.day
30     
31 ;"
32     ''' % ( db_name,start_date_str, end_date_str) )
33 
34         print hive_command
35         if flag_server:
36             os.system(hive_command)
37         print "
"
38         
39         
40 
41 
42 if __name__ == '__main__':
43     
44     start = time.time()
45     business_name = 'brand48'
46     # ----------------------------------------
47     #7号是这个周期中的最后一天,是周期结束日期
48     for (i,j) in [ (11,7),(11,14),(11,28),(12,5),(12,12),(12,19),(12,26)]:      
49     # for (i, j) in [(2, 7), (2, 14), (2, 21), (2, 28)]:
50         create_date = datetime.date(2016,i,j)
51         type11_duration=7
52         hive_day_cid(create_date,type11_duration)
53         print "
"
54         print '# Time: ', str(datetime.timedelta(seconds=(time.time() - start)))
55         print '# the end'
56         print '
'
57     
58     #hive_imei_time_list(create_date,type11_duration)
59     print '# Time: ', str(datetime.timedelta(seconds=(time.time() - start)))    
60         
61      # Beintoo_day.hive_output(create_date, cnt_duration=7)
62 '''    
63 for i in range(1,30):
64     a=datetime.date(2016, 2, 23)
65     b=a+ datetime.timedelta(7*i)
66     print b
67 '''
68 #===跨年的时候处理方法1
69 '''
70 date_begin = datetime.date(2016,12,1)
71     # date_end = date_begin
72     date_end = datetime.date(2017,1,10)
73     for i in range(0,(date_end - date_begin).days+1,7):
74         create_date = date_begin + datetime.timedelta(days=i)
75 
76         print create_date
77 
78 #方法2
79 date_begin = datetime.date(2016,12,1)
80     # date_end = date_begin
81     date_end = datetime.date(2017,1,10)
82 
83  while date_begin <= date_end:
84         print date_begin
85         date_begin = date_begin + datetime.timedelta(days=7)
86 '''
原文地址:https://www.cnblogs.com/zhangbojiangfeng/p/6382555.html