Hive(6)-DML数据操作

一. 数据导入

1. 语法

load data [local] inpath 'path' [overwrite] into table table_name [partition (partcol1=val1,…)];

1). load data:表示加载数据

2). local:表示从本地加载数据到hive表；否则从HDFS加载数据到hive表

3). inpath:表示加载数据的路径

4). overwrite:表示覆盖表中已有数据，否则表示追加

5). into table:表示加载到哪张表

6). partition:表示上传到指定分区

2. 实操

1). 加载本地文件到hive

-- 创建一张表
create table student(id string, name string) row format delimited fields terminated by '	';

-- 加载本地文件
load data local inpath '/opt/module/datas/student.txt' into table default.student;

2). 加载HDFS文件到hive中

#上传文件
dfs -put /opt/module/datas/student.txt  /user/nty/hive;

-- 加载HDFS上数据
load data inpath '/user/nty/hive/student.txt' into table default.student;

3). 加载数据覆盖表中已有的数据

#上传文件
dfs -put /opt/module/datas/student.txt /user/nty/hive;

-- 加载数据覆盖表中已有的数据
load data inpath '/user/nty/hive/student.txt' overwrite into table default.student;

3. 通过查询语句向表中插入数据(Insert)

1). 创建一张分区表

create table student(id int, name string) partitioned by (month string) row format delimited fields terminated by '	';

2). 基本插入数据

insert into table  student partition(month='201709') values(1,'wangwu'),(2,’zhaoliu’);

3). 基本模式插入（根据单张表查询结果）

insert overwrite table student partition(month='201708')
select id, name from student where month='201709';

insert into：以追加数据的方式插入到表或分区，原有数据不会删除

insert overwrite：会覆盖表或分区中已存在的数据

4).多表（多分区）插入模式（根据多张表查询结果）

from student
insert overwrite table student partition(month='201707')
select id, name where month='201709'
insert overwrite table student partition(month='201706')
select id, name where month='201709';

4. 查询语句中创建表并加载数据（As Select）

-- 根据查询结果创建表（查询的结果会添加到新创建的表中）
create table if not exists student3
as select id, name from student;

5. 创建表时通过Location指定加载数据路径

1). 上传数据到hdfs上

 dfs -mkdir /student;

 dfs -put /opt/module/datas/student.txt /student;

2). 创建表，并指定在hdfs上的位置

create external table if not exists student5(
 id int, name string
)
row format delimited fields terminated by '	'
location '/student;

6.Import数据到指定表中

import table student2 partition(month='201709') from
'/user/hive/warehouse/export/student';

注意：先用export导出后，再将数据导入。

二. 数据导出

1.Insert 导出

-- 将查询的结果导出到本地
insert overwrite local directory '/opt/module/datas/export/student'
select * from student;

-- 将查询的结果格式化导出到本地
insert overwrite local directory '/opt/module/datas/export/student1'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '	'             
select * from student;

-- 将查询的结果导出到HDFS上(没有local)
insert overwrite directory '/user/nty/student2'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '	' 
select * from student;

2. Hadoop命令导出到本地

dfs -get /user/hive/warehouse/student/month=201709/000000_0 /opt/module/datas/export/student3.txt;

3. Hive Shell 命令导出

bin/hive -e 'select * from default.student;' > /opt/module/datas/export/student4.txt;

4. Export导出到HDFS上

export table default.student to '/user/hive/warehouse/export/student';

三. 清除数据(Truncate)

truncate table student;

Truncate只能删除管理表，不能删除外部表中数据