hive解析json数组数据

/*
样本数据
60862 2227577 CN020004021585629448991 oe8GejqY8gkJwP8FX0149SltKi94 1585584000 [{"key":"key1","value":"营养健康"},{"key":"key2","value":null},{"key":"key3","value":"十分满意"},{"key":"key4","value":"十分满意"},{"key":"key5","value":null},{"key":"key6","value":null},{"key":"key7","value":"十分满意"},{"key":"key8","value":null},{"key":"key9","value":null},{"key":"key10","value":"十分满意"},{"key":"key11","value":null},{"key":"key12","value":null},{"key":"key13","value":null},{"key":"key14","value":null},{"key":"key15","value":null},{"key":"key16","value":null},{"key":"key17","value":null},{"key":"key18","value":null},{"key":"key19","value":null},{"key":"key20","value":null},{"key":"key21","value":null},{"key":"key22","value":null},{"key":"key23","value":null},{"key":"key24","value":null},{"key":"key25","value":null},{"key":"key26","value":null},{"key":"key27","value":null},{"key":"key28","value":null},{"key":"key29","value":"十分满意"},{"key":"key30","value":null},{"key":"key31","value":null},{"key":"key32","value":"十分满意"},{"key":"key33","value":null},{"key":"key34","value":null},{"key":"key35","value":"是"},{"key":"key36","value":null},{"key":"key37","value":null},{"key":"key38","value":null},{"key":"key39","value":null},{"key":"key40","value":null},{"key":"key41","value":null},{"key":"key42","value":null},{"key":"key43","value":null},{"key":"key44","value":null},{"key":"key45","value":null},{"key":"key46","value":"10 – 10, 一定会"},{"key":"key47","value":null},{"key":"key48","value":null},{"key":"key49","value":null},{"key":"key50","value":null},{"key":"key51","value":null},{"key":"key52","value":null},{"key":"key53","value":null},{"key":"key54","value":"否"},{"key":"key55","value":null},{"key":"key56","value":null},{"key":"key57","value":null},{"key":"key58","value":null},{"key":"key59","value":"1人"},{"key":"key60","value":null},{"key":"key61","value":"上班族"},{"key":"key62","value":null},{"key":"key63","value":"健康控脂"}] 2020-04-01 07:43:35 2020-04-02 15:05:33 MrTang 2020-04-01
*/
-- hive array类型测试
drop table test_json ;
CREATE external TABLE if not exists test_json
( id string ,
surveyId string COMMENT '评价ID',
tradeNo string COMMENT '订单号',
openId string ,
identifier string COMMENT '查询时间标识,表示同步何日的数据',
answer string COMMENT '问卷答案详情',
created string ,
extract_time string,
operator string
)
partitioned by (date string)
row format delimited
fields terminated by ' '
stored as textfile

-- ; #加载数据
alter table test_json add partition (date='2020-04-01')
location '/user/gbw/voucher_answer/2020/202004/2020-04-01'
-- 将数组json拆分后存入临时表 test_json_tmp
drop table test_json_tmp;
create table test_json_tmp
as
SELECT a.id
,a.surveyId -- '评价ID',
,a.tradeNo -- '订单号',
,a.openId
,a.identifier -- '查询时间标识,表示同步何日的数据',
,b.answer -- '问卷答案详情',
,a.created
-- extract_time ,
,'MrTang' as operator
from test_json a
lateral view explode(split(regexp_replace(regexp_replace(a.answer , '\]|\[' ,'') ,'\}\,\{','\}\;\{' ),'\;') ) b as answer
;
-- 解析 拆分数组json后的单个json数据
select tradeNo -- '订单号'
,answer
,get_json_object(answer,'$.key') as key
,get_json_object(answer,'$.value') as value
from test_json_tmp
where get_json_object(answer,'$.key')='key4' -- 友善度
and get_json_object(answer,'$.value')='十分满意'

原文地址:https://www.cnblogs.com/guweiwei/p/14976831.html