2.9-2.10 hive中常见查询

一、查询语句

https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Select

1、select语法

SELECT [ALL | DISTINCT] select_expr, select_expr, ...
  FROM table_reference
  [WHERE where_condition]
  [GROUP BY col_list]
  [ORDER BY col_list]
  [CLUSTER BY col_list
    | [DISTRIBUTE BY col_list] [SORT BY col_list]
  ]
 [LIMIT [offset,] rows]


########
select 
[ALL | DISTINCT]            #all:查所有;DISTINCT:去重
select_expr, select_expr        #要查的字段
FROM table_reference        #从哪张表
[WHERE where_condition]        #where条件
[LIMIT [offset,] rows]        #查询的条数,感觉有点像tain -n    
....

Queries

全表查询、指定字段查询

=、 >=、 <=、 between and、limit

(not)in/is(not)null

max/min/count/sum/avg

group by/having

join


2、全表查询、指定字段查询

全表查询

select * from table_name;


指定字段查询

select t.empno, t.ename, t.deptno from emp t;


3、=、 >=、 <=、 between and、limit

#between and
hive (default)> select t.empno, t.ename, t.deptno from emp t where t.sal between 800 and 1500;


#limit
hive (default)> select * from emp limit 3;


4、(not)in/is(not)null

##
hive (default)> select t.empno, t.ename, t.deptno from emp t where comm is null;


5、max/min/count/sum/avg

##
hive (default)> select count(*) cnt from emp;


##
hive (default)> select max(sal) max_sal from emp;


##
hive (default)> select sum(sal) from emp;


##
hive (default)> select avg(sal) from emp;


6、group by/having

############ group by ###########
##每个部门的平均工资
hive (default)> select t.deptno, avg(t.sal) avg_sal from emp t group by t.deptno;


##每个部门中每个岗位的最高薪水,要查的字段必须出现在group by中
hive (default)> select t.deptno, t.job, max(t.sal) avg_sal from emp t group by t.deptno, job;



########### having #############
where是针对单条记录进行筛选
having是针对分组结果进行筛选

#求每个部门的平均薪水大于2000
hive (default)> select deptno, avg(sal) avg_sal from emp group by deptno having avg_sal > 2000;




########### join #############
两个表进行连接
m n
m表中一条记录和n表中的一条记录组成一条记录

#等值join
join ... on
hive (default)> select e.empno, e.ename, e.deptno from emp e join dept d on e.deptno = d.deptno;

#左连接
left join
hive (default)> select e.empno, e.ename, d.deptno, d.dname from emp e left join dept d on e.deptno = d.deptno;

#右连接
right join

hive (default)>select e.empno, e.ename, e.deptno, d.dname from emp e right join dept d on e.deptno = d.deptno;

#全连接
hive (default)> select e.empno, e.ename, e.deptno, d.dname from emp e full join dept d on e.deptno = d.deptno; 
原文地址:https://www.cnblogs.com/weiyiming007/p/10760693.html