hive的map类型处理

https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-CollectionFunctions


--首先进行一些基础配置设置:

SET mapred.job.queue.name=queue3;
SET hbase.client.scanner.caching=5000;
SET hbase.zookeeper.quorum=datanode06,datanode07,datanode08;
SET zookeeper.znode.parent=/hbase;


--然后对hbase的表建立外部表:
CREATE EXTERNAL TABLE wizad_test_geely_20141017 (
rowkey string,
fixeddim_map map<STRING,STRING>,
social_map map<STRING,STRING>
) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,fixeddim:,social:")
TBLPROPERTIES ("hbase.table.name" = "wizad_mdm_task_geely_hk_20141017");




--查询时,需要用Lateral View语法将(map)值展开为一个新的虚拟表:
--lateral view首先为原始表的每行调用UDTF,UTDF会把一行拆分成一或者多行,lateral view再把结果组合,产生一个支持别名表的虚拟表。


SELECT social_key,count(DISTINCT rowkey) FROM wizad_test_geely_20141017
LATERAL VIEW explode(fixeddim_map) myTable1 AS fixeddim_key,fixeddim_value
LATERAL VIEW explode(social_map) myTable2 AS social_key,social_value
WHERE (social_key='sex_f' or social_key='sex_m') and (social_value>50)
GROUP BY social_key;






=========================================================================
CREATE EXTERNAL TABLE lxw2 
(
key string,
value map<STRING,STRING>

STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,fixeddim:")
TBLPROPERTIES ("hbase.table.name" = "wizad_mdm_task_geely_hk_20141014"); 
 


SELECT KEY,dim_name,dim_value FROM lxw2 
LATERAL VIEW explode(VALUE) myTable1 AS dim_name,dim_value
--WHERE KEY = '000000054153796'














hive支持8个层级的分隔符,依次是01,02,03...08(HIVE nested ARRAY in MAP data type)
map嵌套使用
map的key分隔符指定
hive> explain
    > create table t3(foo int,bar MAP<STRING, MAP<STRING,STRING>>)
    > row format delimited fields terminated by ' ' 
    > collection items terminated by ','
    > MAP KEYS TERMINATED BY ':'
> STORED AS TEXTFILE;


map元素用分隔符04(就是','),key通过分隔符05(就是':')指示;
map只有一个元素数据: key_out0:key_in0105val_in0104key_in0205val_in02
实际结构:{"key_out0":{"key_in01":"val_in01","key_in02":"val_in02"}}


外层map两个元素:1 key_out0:key_in0105val_in0104key_in0205val_in02,key_out1:key_in0105val_in0104key_in0205val_in02
结构: {"key_out0":{"key_in01":"val_in01","key_in02":"val_in02"},"key_out1":{"key_in01":"val_in01","key_in02":"val_in02"}}




map的使用
1建表:
hive> CREATE TABLE t3 (foo STRING, bar MAP<STRING,INT>)
    > ROW FORMAT DELIMITED
    > FIELDS TERMINATED BY '/t'
    > COLLECTION ITEMS TERMINATED BY ','
    > MAP KEYS TERMINATED BY ':'
    > STORED AS TEXTFILE;
2,load本地数据:路径默认
LOAD DATA LOCAL INPATH 'test.txt' OVERWRITE INTO TABLE t3;
3使用:
select foo,bar['a'] from t3 where id='1'; --使用下标访问map


select foo,size(bar) from t3 where id='1'; #查看map长度


select ip,uid from t3 where dt='20130101'  where array_contains(map_keys(gameinfo),'wow');#查看map的key,找出有玩wow游戏的记录
更多操作参考 https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-CollectionFunctions

原文地址:https://www.cnblogs.com/cl1024cl/p/6205404.html