hive创建分区表

#创建分区表
CREATE TABLE if not exists data_center.test_partition (
id int,
name string,
age int
)
PARTITIONED BY (date_id string)
row format delimited fields terminated by ','
stored as textfile
#
LOCATION
'hdfs://master:9000/user/hive/warehouse/data_center.db/test_table';

#添加分区
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-06') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-06'
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-07') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07'
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-08') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-08'


#插入数据操作:
set hive.exec.compress.output=true; 
set mapred.output.compress=true; 
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec; 
set io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec; 
SET mapred.output.compression.type=BLOCK;
insert overwrite table seqfile_table select * from textfile_table;

name:pass_address_id,type:string;name:order_id,type:string;name:address_type,type:string;name:receiver,type:string;name:receiver_tel,type:string;name:zip_code,type:string;name:province,type:string;name:city,type:string;name:district,type:string;name:address,type:string;name:x_coordinate,type:string;name:y_coordinate,type:string;name:del_flg,type:string;name:create_time,type:string;name:create_user,type:string;name:update_time,type:string;name:update_user,type:string;name:address_order,type:string;name:midway_order_status,type:string;name:street,type:string;name:order_type,type:string;name:out_order_id,type:string;name:poi_title,type:string

复制表结构
create table tmp.soon_tbl_address like select * from default.soon_tbl_address;

#拷贝数据:从default库soon_tbl_address到tmp库soon_tbl_address
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-05') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 3
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-06') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 6
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-07') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 9
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-08') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 12

pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update,date_id


/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07
hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-02-08/000000_0

#删除分区数据
ALTER TABLE soon_tbl_address DROP PARTITION (date_id='2017-06-07');

#删除表中数据,保留表
insert overwrite table tmp.soon_tbl_address select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address where 1=0
truncate table tmp.soon_tbl_address


#hadoop查看文件
[ds@master ~]$ hadoop fs -ls /user/hive/warehouse/tmp.db/soon_tbl_address
17/06/09 16:02:20 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 5 items
drwxr-xr-x - ds supergroup 0 2017-06-09 11:07 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-06
drwxr-xr-x - ds supergroup 0 2017-06-09 11:08 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07
drwxr-xr-x - ds supergroup 0 2017-06-09 14:23 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
drwxr-xr-x - ds supergroup 0 2017-06-09 14:56 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__b6a456c4_73b6_4fec_8dfa_ddbe38f55856
drwxr-xr-x - ds supergroup 0 2017-06-09 11:08 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-08

#hadoop删除文件及递归文件
[ds@master ~]$ hadoop fs -rmr hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
rmr: DEPRECATED: Please use 'rm -r' instead.
17/06/09 16:03:24 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
17/06/09 16:03:25 INFO fs.TrashPolicyDefault: Moved: 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2' to trash at: hdfs://master:9000/user/ds/.Trash/Current/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
[ds@master ~]$


#导出数据到本地
INSERT OVERWRITE LOCAL DIRECTORY '/home/users/my' SELECT * FROM tmp.soon_tbl_address

#linux上本地.txt,导入到表某个分区
load data LOCAL INPATH '/home/users/my/test_table.txt' OVERWRITE into table data_center.test_table PARTITION(date='2017-06-07')
load data LOCAL INPATH '/home/users/my/test_partition.txt' OVERWRITE into table tmp.temp_test_partition PARTITION(date='2017-06-13')

#手动创建表
CREATE TABLE IF NOT EXISTS data_center.test_no_partition( 
id int , 
name string , 
age int

ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
stored as textfile;

创建空表
CREATE TABLE data_center.test_no_partition LIKE tmp.test_no_partition;


#修改表字段名称、类型
ALTER TABLE test_no_partition CHANGE ip id int;

#删除表及数据
drop table test_no_partition;

#删除表数据,表结构还在
delete from test_no_partition;
insert overwrite table test_no_partition select id,age,name from test_no_partition where id=3;

#给hive库加权限
hdfs dfs -chmod 777 hdfs://master:9000/user/hive/warehouse/tmp.db


#hadoop文件夹重命名
hadoop fs -mv hdfs://master:9000/user/hive/warehouse/ods.db/test_table__cb086c0f_88ee_4623_938c_311a1e717c8a hdfs://master:9000/user/hive/warehouse/ods.db/test_table

CREATE TABLE ods.test_table_tmp(
id int,
name_path string,
parent_path string
)
row format delimited fields terminated by ','
stored as textfile
#路径可以不要
LOCATION
'hdfs://master:9000/user/hive/warehouse/ods.db/test_table_tmp'



hdfs://master:9000/user/hive/warehouse/ods.db/test_table/000000_0__c2175f22_ec6f_4641_a17d_fdc37084713a


#导出到本地文件
#执行导出本地文件命令:
insert overwrite local directory '/home/ds/user/my' select * from ods.test_table;

#查看hdfs文件内容
hdfs dfs -cat hdfs://master:9000/user/hive/warehouse/ods.db/test_table/000000_0__c2175f22_ec6f_4641_a17d_fdc37084713a


CREATE TABLE `tmp.temp_test_partition`(
`id` int, 
`name` string, 
`age` int, 
`date_id` string)
row format delimited fields terminated by ','
stored as textfile


CREATE TABLE `data_center.test_partition`( 
`id` int, 
`name` string, 
`age` int) 
PARTITIONED BY ( `date_id` string) 
ROW FORMAT SERDE 
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
WITH SERDEPROPERTIES ( 'field.delim'=',', 'serialization.format'=',') 
STORED AS INPUTFORMAT 
'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' 
LOCATION 
'hdfs://master:9000/user/hive/warehouse/data_center.db/test_partition' 
TBLPROPERTIES ( 'transient_lastDdlTime'='1497492633')

#手动创建分区表
CREATE TABLE if not exists data_center.test_partition (
id int,
name string,
age int
)
PARTITIONED BY (date string)
row format delimited fields terminated by ','
stored as textfile



#修改分区
ALTER TABLE table_name PARTITION (dt='2008-08-08') SET LOCATION "new location";
ALTER TABLE data_center.test_partition PARTITION (date_id='2008-08-08') RENAME TO PARTITION (dt='20080808'); 

set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
insert overwrite table data_center.test_partition partition (date_id) select * from data_center.test_partition where name in ('ccc','lisi')


#分区增量更新
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
insert overwrite table data_center.test_partition partition(date_id) 
select tmp.id,tmp.name,tmp.age,tmp.date_id from tmp.temp_test_partition tmp 
union all select a.* from data_center.test_partition a
left outer join tmp.temp_test_partition b on a.id=b.id where b.id is null and a.date_id in ('2017-06-14','2017-06-15','2017-06-16')


insert overwrite table data_center.
/home/ds/users/wcy/mysql_to_hdfs_update.txt

CREATE TABLE zjs_cust_endurance_time_copy (
id int,
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date
)
row format delimited fields terminated by ','
stored as textfile

#从本地加载txt数据到表中
load data LOCAL INPATH '/home/ds/users/wcy/mysql_to_hdfs_update.txt' 
OVERWRITE into table data_center.zjs_cust_endurance_time_copy
hdfs目标表中已有数据:
106 2017-06-21 120001 1062.22 2017-05-27
107 2017-06-21 310001 1387.85 2017-05-27
108 2017-06-21 111111 100.0 2017-05-27

mysql中抽取数据:
106 2017-04-01 120000 2062.22 2017-04-27
107 2017-04-01 310000 2387.85 2017-04-27

执行后hdfs目标表中数据:
108 2017-06-21 111111 100.0 2017-05-27
106 2017-04-01 120000 2062.22 2017-04-27
107 2017-04-01 310000 2387.85 2017-04-27


#表结果中没有id列,后手动添加id
CREATE TABLE zjs_cust_endurance_time_copy (
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date,
id int 
)
row format delimited fields terminated by ','
stored as textfile

hdfs目标表中已有数据:
2017-06-21 120001 1062.22 2017-05-27 106
2017-06-21 310001 1387.85 2017-05-27 107
2017-06-21 111111 100.0 2017-05-27 108

mysql表中抽数据:
2017-04-01 120000 2062.22 2017-04-27 06:46:30.0 106 
2017-04-01 310000 2387.85 2017-04-27 06:46:30.0 107 
2017-04-01 320100 2027.64 2017-04-27 06:46:30.0 108 
2017-04-01 330100 3763.16 2017-04-27 06:46:30.0 109

执行后hdfs目标表中数据:
2017-04-01 120000 2062.22 2017-04-27 106
2017-04-01 310000 2387.85 2017-04-27 107
2017-04-01 320100 2027.64 2017-04-27 108
2017-04-01 330100 3763.16 2017-04-27 109


#从本地加载txt数据到分区表中
CREATE TABLE data_center.zjs_cust_endurance_time_copy_part (
id int,
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date
)
PARTITIONED BY (date string)
row format delimited fields terminated by ','
stored as textfile

load data LOCAL INPATH '/home/ds/users/wcy/mysql_to_hdfs_partupdate.txt' 
OVERWRITE into table data_center.zjs_cust_endurance_time_copy_part PARTITION(date='2017-06-21')

hdfs目标表中已有数据:
select * from data_center.zjs_cust_endurance_time_copy_part;
108 2017-06-21 111111 100.0 2017-05-27 2017-06-21
106 2017-04-01 222222 99.0 2017-04-27 2017-06-21

mysql中抽取数据:
106 2017-04-01 120000 2062.22 2017-04-27 06:46:30.0
107 2017-04-01 310000 2387.85 2017-04-27 06:46:30.0
108 2017-04-01 320100 2027.64 2017-04-27 06:46:30.0

insert overwrite table data_center.zjs_cust_endurance_time_copy_part partition(date) 
select tmp.id,tmp.computer_time,tmp.user_city_code,tmp.avg_patient_time,tmp.update_time,tmp.date 
from tmp.temp_zjs_cust_endurance_time_copy_part tmp 
union all select a.* from data_center.zjs_cust_endurance_time_copy_part a 
left outer join tmp.temp_zjs_cust_endurance_time_copy_part b 
on a.id=b.id where b.id is null


#hdfs中文件数据下载到本地
hadoop fs -get 源数据路径 本地文件夹路径
hadoop fs -get hdfs://SAD-HDP-003:9000/user/hive/warehouse/data_center.db/word_category_weights/000000_0 /home/ds/mydata
hadoop fs -get hdfs://SAD-HDP-003:9000/user/hive/warehouse/data_center.db/zjs_user_result/000000_0 /home/ds/mydata/zjs_user_result
下载文件
sz /home/ds/mydata/000000_0


create table data_center.word_category_weights(
keyword string,
category_id string,
weight_auto double,
category_name string,
del int
)
row format delimited fields terminated by ','
stored as textfile;

load data LOCAL INPATH '/home/users/wcy/000000_0' 
OVERWRITE into table data_center.word_category_weights


load data LOCAL INPATH '/home/users/wcy/000000_0_more_data' 
OVERWRITE into table data_center.word_category_weights


load data LOCAL INPATH '/home/users/wcy/zjs_user_result' OVERWRITE into table data_center.zjs_user_result

load data LOCAL INPATH '/home/users/wcy/goodsinfo' OVERWRITE into table data_center.goodsinfo

#启动hive元数据服务进程
hive --service metastore &
hive --service hiveserver2 &


UnstructuredStorageReaderUtil - CsvReader使用默认值[{"captureRawRecord":true,"columnCount":0,"comment":"#","currentRecord":-1,"delimiter":",","escapeMode":1,"headerCount":0,"rawRecord":"","recordDelimiter":"u0000","safetySwitch":true,"skipEmptyRecords":true,"textQualifier":""","trimWhitespace":true,"useComments":false,"useTextQualifier":true,"values":[]}],csvReaderConfig值为[null]
2017-06-26 16:39:24.963 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:44.629 [job-0] INFO StandAloneJobContainerCommunicator - Total 2560 records, 45917 bytes | Speed 4.48KB/s, 256 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.006s | All Task WaitReaderTime 0.138s | Percentage 0.00%
2017-06-26 16:39:45.393 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:53.884 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:54.630 [job-0] INFO StandAloneJobContainerCommunicator - Total 4608 records, 81924 bytes | Speed 3.52KB/s, 204 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 11.211s | All Task WaitReaderT

UnstructuredStorageReaderUtil - CsvReader使用默认值[{"captureRawRecord":true,"columnCount":0,"comment":"#","currentRecord":-1,"delimiter":",","escapeMode":1,"headerCount":0,"rawRecord":"","recordDelimiter":"u0000","safetySwitch":true,"skipEmptyRecords":true,"textQualifier":""","trimWhitespace":true,"useComments":false,"useTextQualifier":true,"values":[]}],csvReaderConfig值为[null]
2017-06-27 10:37:49.949 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command

hadoop fs -rm hdfs://SAD-HDP-003:9000/user/hive/warehouse/ods.db/soon_tbl_address_part/.hive-staging_hive_2017-06-23_18-23-08_102_5432451293100937443-1

CREATE TABLE goodsinfo(
create_time string,
deal_code bigint , 
store_code bigint, 
category_code1 bigint, 
category_code2 bigint, 
category_code3 bigint, 
category_code4 bigint, 
goods_code bigint, 
term_code int, 
term_name string, 
store_name string, 
goods_name string, 
deal_name string, 
goods_image string, 
goods_pv int, 
goods_uv int, 
goods_pv_total int, 
time_total int, 
jump2_pv int, 
collect_total int, 
add_total int, 
pay_total int, 
pay_amount decimal(10,2))
row format delimited fields terminated by ','
stored as textfile;

CREATE TABLE zjs_user_result_1(
compute_day string, 
member_id string, 
mobile string, 
uc_user_id String, 
real_name string, 
register_time string, 
order_city_code string, 
city_name string, 
manage_amount double, 
deduction double, 
freight double, 
actual_paid double, 
insurance_amount double, 
complete_orders String, 
deduction_all double, 
freight_all double, 
complete_orders_all String, 
last_complete_time string, 
order_time string, 
cancel_order_before String, 
cancel_order_after String, 
order_nums String, 
invite_peoples String, 
invite_peoples_all String, 
share_orders String, 
share_orders_all String)
row format delimited fields terminated by ','
stored as textfile;

原文地址:https://www.cnblogs.com/ngy0217/p/10051380.html