2.1-2.2 Hive 中数据库(Table、Database)基本操作

官网文档:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL

一、create table

1、官方字段

#
#

CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name    -- (Note: TEMPORARY available in Hive 0.14.0 and later)
  [(col_name data_type [COMMENT col_comment], ... [constraint_specification])]
  [COMMENT table_comment]
  [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
  [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
  [SKEWED BY (col_name, col_name, ...)                  -- (Note: Available in Hive 0.10.0 and later)]
     ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
     [STORED AS DIRECTORIES]
  [
   [ROW FORMAT row_format] 
   [STORED AS file_format]
     | STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)]  -- (Note: Available in Hive 0.6.0 and later)
  ]
  [LOCATION hdfs_path]
  [TBLPROPERTIES (property_name=property_value, ...)]   -- (Note: Available in Hive 0.6.0 and later)
  [AS select_statement];   -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
 
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
  LIKE existing_table_or_view_name
  [LOCATION hdfs_path];
 
data_type
  : primitive_type
  | array_type
  | map_type
  | struct_type
  | union_type  -- (Note: Available in Hive 0.7.0 and later)
 
primitive_type
  : TINYINT
  | SMALLINT
  | INT
  | BIGINT
  | BOOLEAN
  | FLOAT
  | DOUBLE
  | DOUBLE PRECISION -- (Note: Available in Hive 2.2.0 and later)
  | STRING
  | BINARY      -- (Note: Available in Hive 0.8.0 and later)
  | TIMESTAMP   -- (Note: Available in Hive 0.8.0 and later)
  | DECIMAL     -- (Note: Available in Hive 0.11.0 and later)
  | DECIMAL(precision, scale)  -- (Note: Available in Hive 0.13.0 and later)
  | DATE        -- (Note: Available in Hive 0.12.0 and later)
  | VARCHAR     -- (Note: Available in Hive 0.12.0 and later)
  | CHAR        -- (Note: Available in Hive 0.13.0 and later)
 
array_type
  : ARRAY < data_type >
 
map_type
  : MAP < primitive_type, data_type >
 
struct_type
  : STRUCT < col_name : data_type [COMMENT col_comment], ...>
 
union_type
   : UNIONTYPE < data_type, data_type, ... >  -- (Note: Available in Hive 0.7.0 and later)
 
row_format
  : DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
        [MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
        [NULL DEFINED AS char]   -- (Note: Available in Hive 0.13 and later)
  | SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
 
file_format:
  : SEQUENCEFILE
  | TEXTFILE    -- (Default, depending on hive.default.fileformat configuration)
  | RCFILE      -- (Note: Available in Hive 0.6.0 and later)
  | ORC         -- (Note: Available in Hive 0.11.0 and later)
  | PARQUET     -- (Note: Available in Hive 0.13.0 and later)
  | AVRO        -- (Note: Available in Hive 0.14.0 and later)
  | JSONFILE    -- (Note: Available in Hive 4.0.0 and later)
  | INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
 
constraint_specification:
  : [, PRIMARY KEY (col_name, ...) DISABLE NOVALIDATE ]
    [, CONSTRAINT constraint_name FOREIGN KEY (col_name, ...) REFERENCES table_name(col_name, ...) DISABLE NOVALIDATE


2、建表例子

例子1、2

##################栗子#####################

--------------------------------------------------------------------------------------------
create table IF NOT EXISTS default.bf_1og_20150913        #在default库下创建一个表,不存在则创建;
(
    ip string COMMENT 'remote ip address',         #COMMENT:字段注释
    user string, 
    req_url string COMMENT 'user request url'
)
COMMENT ' BeiFeng Web Access Logs'            #表注释
ROW FORMAT DELIMITED FIELDS TERMINATED BY‘ ’        #hive的数据存在hdfs上,此项指定数据文件中列之间的间隔符
STORED AS TEXTFILE                    #数据格式
LOCATION '/user/bf/hive/warehouse/bf_log_201501913'        #表的存储路径,可以自己指定




--------------------------------------------------------------------------------------------
create table IF NOT EXISTS default.bf_1og_20150913_sa 
AS select ip, req_url from default.bf_log_20150913;    #创建一个表,此表的字段来源于查询另外一个表


例子3

################################
create table IF NOT EXISTS default.bf_log_20150914
like default.bf_log_20150913            #根据另外一张表来创建表


二、演示

1、建表

#创建表
hive (default)> create table IF NOT EXISTS default.bf_1og_20150913(
              > ip string COMMENT 'remote ip address',
              > user string, 
              > req_url string COMMENT 'user request url')
              > COMMENT 'BeiFeng Web Access Logs'
              > ROW FORMAT DELIMITED FIELDS TERMINATED BY' '
              > STORED AS TEXTFILE;
OK
Time taken: 0.361 seconds

hive (default)> show tables;
OK
tab_name
bf_1og_20150913
bf_log
Time taken: 0.052 seconds, Fetched: 2 row(s)


2、导入数据

#########
hive (default)> load data local inpath '/opt/datas/bf-log.txt' into table default.bf_1og_20150913;
Copying data from file:/opt/datas/bf-log.txt
Copying file: file:/opt/datas/bf-log.txt
Loading data to table default.bf_1og_20150913
Table default.bf_1og_20150913 stats: [numFiles=1, numRows=0, totalSize=141, rawDataSize=0]
OK
Time taken: 0.36 seconds

#########
hive (default)> select * from default.bf_1og_20150913;
OK
bf_1og_20150913.ip    bf_1og_20150913.user    bf_1og_20150913.req_url
"27.38.5.159"    "-"    "31/Aug/2015:00:04:53
"27.38.5.159"    "-"    "31/Aug/2015:00:04:37
"27.38.5.159"    "-"    "31/Aug/2015:00:04:53
Time taken: 0.156 seconds, Fetched: 3 row(s)


3、第二种建表例子

#建表
hive (default)> create table IF NOT EXISTS default.bf_1og_20150913_sa AS select ip, req_url from default.bf_1og_20150913;


#
hive (default)> show tables;
OK
tab_name
bf_1og_20150913
bf_1og_20150913_sa


#
hive (default)> select * from default.bf_1og_20150913_sa;
OK
bf_1og_20150913_sa.ip    bf_1og_20150913_sa.req_url
"27.38.5.159"    "31/Aug/2015:00:04:53
"27.38.5.159"    "31/Aug/2015:00:04:37
"27.38.5.159"    "31/Aug/2015:00:04:53
Time taken: 0.028 seconds, Fetched: 3 row(s)


4、第三种建表例子

##
hive (default)> create table IF NOT EXISTS default.bf_log_20150914 like default.bf_1og_20150913;
OK
Time taken: 0.046 seconds

##
hive (default)> show tables;                                                                    
OK
tab_name
bf_1og_20150913
bf_1og_20150913_sa
bf_log
bf_log_20150914
Time taken: 0.013 seconds, Fetched: 4 row(s)


#这里是指copy表结构,不copy表数据
hive (default)> select * from default.bf_log_20150914;
OK
bf_log_20150914.ip    bf_log_20150914.user    bf_log_20150914.req_url
Time taken: 0.029 seconds


三、Create Database

DDL:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL

DML:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DML

1、Create Database

CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name
  [COMMENT database_comment]
  [LOCATION hdfs_path]
  [WITH DBPROPERTIES (property_name=property_value, ...)];

具体:

##
创建
create database db_name;

create database if not exists db_name;        #标准

#指定HDFS上的存储位置
create database if not exists db_name location ‘/user/root/hive/warehouse/db_name.db’;


##
查看
show databases;
show databases like 'db_hive*';

desc database extended db_name;


##
删除
drop database db_name;

drop database db_name cascade;

drop database if exists db_name;


##
Alter Database
ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...);   -- (Note: SCHEMA added in Hive 0.14.0)
 
ALTER (DATABASE|SCHEMA) database_name SET OWNER [USER|ROLE] user_or_role;   -- (Note: Hive 0.13.0 and later; SCHEMA added in Hive 0.14.0)
  
ALTER (DATABASE|SCHEMA) database_name SET LOCATION hdfs_path; -- (Note: Hive 2.2.1, 2.4.0 and later)
原文地址:https://www.cnblogs.com/weiyiming007/p/10748998.html