hive 存储格式及压缩

 1 -- 设置参数
 2 set hivevar:target_db_name=db_dw;
 3 use ${hivevar:target_db_name};
 4 
 5 -- 创建textfile表
 6 create table file_format_textfile 
 7 row format delimited fields terminated by '01'
 8 stored as textfile
 9 as
10 select * from smple_table;
11 
12 -- 测试各种压缩的orc表
13 create table file_format_orc_zlib
14 row format delimited fields terminated by '01'
15 stored as orc tblproperties ("orc.compress"="ZLIB")
16 as
17 select * from file_format_textfile
18 ;
19 
20 create table file_format_orc_snappy
21 row format delimited fields terminated by '01'
22 stored as orc tblproperties ("orc.compress"="SNAPPY")
23 as
24 select * from file_format_orc_zlib
25 ;
26 
27 create table file_format_orc_none
28 row format delimited fields terminated by '01'
29 stored as orc tblproperties ("orc.compress"="NONE")
30 as
31 select * from file_format_orc_zlib
32 ;
33 
34 create table file_format_orc_default
35 row format delimited fields terminated by '01'
36 stored as orc
37 as
38 select * from file_format_orc_zlib
39 ;
40 
41 -- 测试各种压缩的parquet表
42 create table file_format_parquet_zlib
43 row format delimited fields terminated by '01'
44 stored as parquet tblproperties ("parquet.compress"="ZLIB")
45 as
46 select * from file_format_orc_zlib
47 ;
48 
49 create table file_format_parquet_snappy
50 row format delimited fields terminated by '01'
51 stored as parquet tblproperties ("parquet.compress"="SNAPPY")
52 as
53 select * from file_format_orc_zlib
54 ;
55 
56 create table file_format_parquet_none
57 row format delimited fields terminated by '01'
58 stored as parquet tblproperties ("parquet.compress"="NONE")
59 as
60 select * from file_format_orc_zlib
61 ;
62 
63 create table file_format_parquet_default
64 row format delimited fields terminated by '01'
65 stored as parquet
66 as
67 select * from file_format_orc_zlib
68 ;
69 
70 -- 测试各种压缩的rcfile表(可能参数没生效,各种压缩后大小一致)
71 create table file_format_rcfile_zlib
72 row format delimited fields terminated by '01'
73 stored as rcfile tblproperties ("rcfile.compress"="ZLIB")
74 as
75 select * from file_format_orc_zlib
76 ;
77 
78 create table file_format_rcfile_snappy
79 row format delimited fields terminated by '01'
80 stored as rcfile tblproperties ("rcfile.compress"="SNAPPY")
81 as
82 select * from file_format_orc_zlib
83 ;
84 
85 create table file_format_rcfile_none
86 row format delimited fields terminated by '01'
87 stored as rcfile tblproperties ("rcfile.compress"="NONE")
88 as
89 select * from file_format_orc_zlib
90 ;
91 
92 create table file_format_rcfile_default
93 row format delimited fields terminated by '01'
94 stored as rcfile
95 as
96 select * from file_format_orc_zlib
97 ;
-- 查看各种压缩下的格式大小
dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_textfile;

dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_orc_zlib;
dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_orc_snappy;
dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_orc_none;
dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_orc_default;

dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_parquet_zlib;
dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_parquet_snappy;
dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_parquet_none;
dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_parquet_default;

dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_rcfile_zlib;
dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_rcfile_snappy;
dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_rcfile_none;
dfs -du -s /user/hive/warehouse/${hivevar:target_db_name}.db/file_format_rcfile_default;

-- 统计数据,原文件见文件中的附件

原文地址:https://www.cnblogs.com/chenzechao/p/9447543.html