MySQL和hive对比表结构脚本

 1 #!/bin/bash
 2 
 3 source /etc/profile
 4 
 5 runlog='/tmp/zewei/check_schema_log'
 6 hive_database_schema=/tmp/hive_database_schema/hive/
 7 mysql_database_schema=/tmp/hive_database_schema/mysql/
 8 
 9 > $runlog
10 #每天下午一点删除现有的表结构缓存文件.
11 #防止hive有变动.
12 if [ `date +%k` -eq 13 ] 
13 then
14     rm -rf /tmp/hive_database_schema/hive/*
15 fi
16 
17 while read DB; do
18     HiveDB=`echo $DB |awk '{print $1}'`
19     MysqlDB=`echo $DB |awk '{print $2}'`
20     MysqlHost=`echo $DB |awk '{print $3}'`
21     MysqlPort=`echo $DB |awk '{print $4}'`
22     PartTag=`echo $DB |awk '{print $5}'`
23 
24     connect_mysql="mysql -u TableSchemaCheck -pSchemacheck666 -h $MysqlHost -P $MysqlPort $MysqlDB -BNe"
25     #缓存文件夹不在就创建
26     ls $mysql_database_schema/$MysqlDB > /dev/null || mkdir -p $mysql_database_schema/$MysqlDB
27     ls $hive_database_schema/$HiveDB > /dev/null || mkdir -p $hive_database_schema/$HiveDB
28     
29     #通过part标记检查是否为分区库
30     if [ "$PartTag" == 'part' ]
31     then
32         table_list=`hive -S -e "use $HiveDB; show tables;" | grep "_part$"`
33     else
34         table_list=`hive -S -e "use $HiveDB; show tables;"`
35     fi
36 
37     #对获取到的tables进行循环检查
38     for table in $table_list
39     do
40         if [ "$PartTag" == 'part' ]
41         then
42             hive_table_name=$table
43             mysql_table_name=`echo $table | sed 's/_part$//'`
44         else
45             hive_table_name=$table
46             mysql_table_name=$table
47         fi
48 
49         #获取mysql的表结构
50         $connect_mysql "desc $mysql_table_name;"  > /tmp/mysql_column 
51         if [ $? -ne 0 ] 
52         then 
53             continue
54         else
55             #把大写转换为小写.因为hive里面的列名没有大小写之分,所以转换一下
56             cat /tmp/mysql_column |  awk '{print $1}' | tr '[A-Z]' '[a-z]' > $mysql_database_schema/$MysqlDB/$mysql_table_name
57         fi
58 
59         #如果没有hive的表结构缓存文件就去hive取...然后对分区字段进行删除,因为这对于MySQL的表结构来说是多余的
60         if ! ls $hive_database_schema/$HiveDB/$table > /dev/null
61         then
62             hive -S -e "use $HiveDB; desc ${hive_table_name};" | awk '{print $1 }' > /tmp/hive_column
63             part_column=`sed -n  '/#/,$p' /tmp/hive_column | egrep -v '#|^$'` 
64             for i in $part_column
65             do
66                 sed -i "/${i}/d" /tmp/hive_column 
67             done
68             egrep -v "#|^$" /tmp/hive_column | tee $hive_database_schema/$HiveDB/$table
69         fi
70 
71         #获取MD5
72         md5ForMysql=`md5sum $mysql_database_schema/$MysqlDB/$mysql_table_name | awk '{print $1}'`
73         md5ForHive=`md5sum $hive_database_schema/$HiveDB/$hive_table_name | awk '{print $1}'`
74 
75         #MD5不同就记录日志
76         if [ ! "$md5ForMysql"x == "$md5ForHive"x ]; then
77                 echo -e "HiveDB:	$HiveDB		table:	$hive_table_name		not equal to MysqlDB table:	$mysql_table_name" >> $runlog
78         fi
79     done
80 done < /root/script/DBlist
81 
82 #统计并报警.报警阈值在监控里面设置
83 err_line=`wc -l $runlog | awk '{print $1}'`
84 [报警]
原文地址:https://www.cnblogs.com/wayde-Z/p/6958050.html