1.7 hive基本操作

一、基本命令和设置

1、命令

[root@hadoop-senior hive-0.13.1]# bin/hive
Logging initialized using configuration in jar:file:/opt/modules/hive-0.13.1/lib/hive-common-0.13.1.jar!/hive-log4j.properties
hive> show databases;
OK
default
Time taken: 0.367 seconds, Fetched: 1 row(s)
hive> show databases;
OK
default
Time taken: 0.013 seconds, Fetched: 1 row(s)
hive> create database db_hive;
OK
Time taken: 0.155 seconds
hive> show databases;         
OK
db_hive
default
Time taken: 0.011 seconds, Fetched: 2 row(s)
hive> use db_hive;   
OK
Time taken: 0.017 seconds
hive> create table student(id int,name string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '	';
OK
Time taken: 0.108 seconds
hive> show tables;
OK
student
Time taken: 0.016 seconds, Fetched: 1 row(s)
hive> desc student;
OK
id                      int                                         
name                    string                                      
Time taken: 0.098 seconds, Fetched: 2 row(s)

hive> desc formatted student;            #查看表结构详细信息
OK
# col_name                data_type               comment             
          
id                      int                                         
name                    string                                      
          
# Detailed Table Information          
Database:               db_hive                  
Owner:                  root                     
CreateTime:             Fri Apr 19 10:44:19 CST 2019     
LastAccessTime:         UNKNOWN                  
Protect Mode:           None                     
Retention:              0                        
Location:               hdfs://hadoop-senior.ibeifeng.com:8020/user/hive/warehouse/db_hive.db/student     #此表在HDFS上的存储位置
#在HDFS上分层存储的，数据库名/表名
Table Type:             MANAGED_TABLE          #表类型      
Table Parameters:          
    transient_lastDdlTime    1555641859          
          
# Storage Information          
SerDe Library:          org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe     
InputFormat:            org.apache.hadoop.mapred.TextInputFormat     
OutputFormat:           org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat     
Compressed:             No                       
Num Buckets:            -1                       
Bucket Columns:         []                       
Sort Columns:           []                       
Storage Desc Params:          
    field.delim             	                  
    serialization.format    	                  
Time taken: 0.074 seconds, Fetched: 28 row(s)

2、命令

#加载数据到hive表中
hive> load data local inpath '/opt/datas/student.txt' into table db_hive.student;
Copying data from file:/opt/datas/student.txt
Copying file: file:/opt/datas/student.txt
Loading data to table db_hive.student
Table db_hive.student stats: [numFiles=1, numRows=0, totalSize=36, rawDataSize=0]
OK
Time taken: 0.36 seconds

hive> select * from student;
OK
1001    zhangsan
1002    lisi
1003    wangwu
Time taken: 0.16 seconds, Fetched: 3 row(s)


#######函数#######
#查看自带的函数
hive> show functions;

#查看一个函数的使用方法
hive> desc function 函数名;

#查看一个函数的详细使用方法
hive> desc function extended 函数名;


#upper函数的用法
hive> select id,upper(name) uname from db_hive.student;
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
Starting Job = job_1554717689707_0004, Tracking URL = http://hadoop-senior.ibeifeng.com:8088/proxy/application_1554717689707_0004/
Kill Command = /opt/modules/hadoop-2.5.0/bin/hadoop job  -kill job_1554717689707_0004
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0
2019-04-19 11:09:47,786 Stage-1 map = 0%,  reduce = 0%
2019-04-19 11:09:51,931 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 1.29 sec
MapReduce Total cumulative CPU time: 1 seconds 290 msec
Ended Job = job_1554717689707_0004
MapReduce Jobs Launched: 
Job 0: Map: 1   Cumulative CPU: 1.29 sec   HDFS Read: 279 HDFS Write: 36 SUCCESS
Total MapReduce CPU Time Spent: 1 seconds 290 msec
OK
1001    ZHANGSAN
1002    LISI
1003    WANGWU
Time taken: 10.298 seconds, Fetched: 3 row(s)

3、让hive命令行能显示前使用的库和查询时显示字段名

#修改配置文件
    <property>
        <name>hive.cli.print.header</name>
        <value>true</value>
        <description>Whether to print the names of the columns in query output.</description>
    </property>

    <property>
        <name>hive.cli.print.current.db</name>
        <value>true</value>
        <description>Whether to include the current database in the Hive prompt.</description>
    </property>


#退出hive命令行，重新进入查看
hive (default)>            #已经显示当前使用的库了

hive (default)> use db_hive;
OK
Time taken: 0.303 seconds
hive (db_hive)>

hive (db_hive)> select * from student;
OK
student.id    student.name        #这里也显示字段名了
1001    zhangsan
1002    lisi
1003    wangwu
Time taken: 0.035 seconds, Fetched: 3 row(s)