dep.txt
10 ACCOUNTING 170020 RESEARCH 180030 SALES 190040 OPERATIONS 1700
emp.txt
7369 SMITH CLERK 7902 1980-12-17 800.00 207499 ALLEN SALESMAN 7698 1981-2-20 1600.00 300.00 307521 WARD SALESMAN 7698 1981-2-22 1250.00 500.00 307566 JONES MANAGER 7839 1981-4-2 2975.00 207654 MARTIN SALESMAN 7698 1981-9-28 1250.00 1400.00 307698 BLAKE MANAGER 7839 1981-5-1 2850.00 307782 CLARK MANAGER 7839 1981-6-9 2450.00 107788 SCOTT ANALYST 7566 1987-4-19 3000.00 207839 KING PRESIDENT 1981-11-17 5000.00 107844 TURNER SALESMAN 7698 1981-9-8 1500.00 0.00 307876 ADAMS CLERK 7788 1987-5-23 1100.00 207900 JAMES CLERK 7698 1981-12-3 950.00 307902 FORD ANALYST 7566 1981-12-3 3000.00 207934 MILLER CLERK 7782 1982-1-23 1300.00 10查询搜索
hive (default)> select *from dept;
OK
dept.deptno dept.dname dept.loc
10 ACCOUNTING 1700
20 RESEARCH 1800
30 SALES 1900
40 OPERATIONS 1700
Time taken: 0.214 seconds, Fetched: 4 row(s)
创建部门表
create table if not exists dept(deptno int,dname string,loc int)row format delimited fields terminated by '\t';
创建员工表
create table if not exists emp(empno int,ename string,job string,mgr int,hiredate string,sal double,comm double,deptno int)row format delimited fields terminated by '\t';
加载数据
hive (default)> load data local inpath '/opt/module/data/dept.txt' into table dept;Loading data to table default.deptTable default.dept stats: [numFiles=1, totalSize=71]OKTime taken: 1.038 secondshive (default)> load data local inpath '/opt/module/data/emp.txt' into table emp;Loading data to table default.empTable default.emp stats: [numFiles=1, totalSize=656]OKTime taken: 0.318 seconds
设置别名
hive (default)> select ename as name,deptno dn from emp;OKname dnSMITH 20ALLEN 30WARD 30JONES 20MARTIN 30BLAKE 30CLARK 10SCOTT 20KING 10TURNER 30ADAMS 20JAMES 30FORD 20MILLER 10Time taken: 0.101 seconds, Fetched: 14 row(s)
总数量
hive (default)> select count(*) cnt from emp;cnt14
平均值
hive (default)> select avg(sal) avg_sal from emp;avg_sal2073.214285714286
工资总和
hive (default)> select sum(sal) sum_sal from emp;OKsum_sal29025.0
工资的最大值(max)
select sum(sal) sum_sal from emp;max_sal5000.0
工资的最小值(min)
select min(sal) min_sal from emp;min_sal800.0
limit,LIMIT子句用于限制返回的行数。
hive (default)> select * from emp limit 3;OKemp.empno emp.ename emp.job emp.mgr emp.hiredate emp.sal emp.comm emp.deptno7369 SMITH CLERK 7902 1980-12-17 800.0 NULL 207499 ALLEN SALESMAN 7698 1981-2-20 1600.0 300.0 307521 WARD SALESMAN 7698 1981-2-22 1250.0 500.0 30Time taken: 0.079 seconds, Fetched: 3 row(s)
where
hive (default)> select * from emp where sal >1000;OKemp.empno emp.ename emp.job emp.mgr emp.hiredate emp.sal emp.comm emp.deptno7499 ALLEN SALESMAN 7698 1981-2-20 1600.0 300.0 307521 WARD SALESMAN 7698 1981-2-22 1250.0 500.0 307566 JONES MANAGER 7839 1981-4-2 2975.0 NULL 207654 MARTIN SALESMAN 7698 1981-9-28 1250.0 1400.0 307698 BLAKE MANAGER 7839 1981-5-1 2850.0 NULL 307782 CLARK MANAGER 7839 1981-6-9 2450.0 NULL 107788 SCOTT ANALYST 7566 1987-4-19 3000.0 NULL 207839 KING PRESIDENT NULL 1981-11-17 5000.0 NULL 107844 TURNER SALESMAN 7698 1981-9-8 1500.0 0.0 307876 ADAMS CLERK 7788 1987-5-23 1100.0 NULL 207902 FORD ANALYST 7566 1981-12-3 3000.0 NULL 207934 MILLER CLERK 7782 1982-1-23 1300.0 NULL 10Time taken: 0.137 seconds, Fetched: 12 row(s)
查询工资是1500或5000的员工信息
hive (default)> select * from emp where sal IN (3000, 5000);OKemp.empno emp.ename emp.job emp.mgr emp.hiredate emp.sal emp.comm emp.deptno7788 SCOTT ANALYST 7566 1987-4-19 3000.0 NULL 207839 KING PRESIDENT NULL 1981-11-17 5000.0 NULL 107902 FORD ANALYST 7566 1981-12-3 3000.0 NULL 20Time taken: 0.096 seconds, Fetched: 3 row(s)
RLIKE子句是Hive中这个功能的一个扩展,其可以通过Java的正则表达式这个更强大的语言来指定匹配条件。
hive (default)> select ename, sal from emp where sal RLIKE '[2]';OKename salWARD 1250.0JONES 2975.0MARTIN 1250.0BLAKE 2850.0CLARK 2450.0Time taken: 0.07 seconds, Fetched: 5 row(s)
GROUP BY语句通常会和聚合函数一起使用,按照一个或者多个列队结果进行分组,然后对每个组执行聚合操作。
hive (default)> select t.deptno, avg(t.sal) avg_sal from emp t group by t.deptno;t.deptno avg_sal10 2916.666666666666520 2175.030 1566.666666666666
select t.deptno, t.job, max(t.sal) max_sal from emp t group by t.deptno, t.job;
t.deptno t.job max_sal
10 CLERK 1300.0
10 MANAGER 2450.0
10 PRESIDENT 5000.0
20 ANALYST 3000.0
20 CLERK 1100.0
20 MANAGER 2975.0
30 CLERK 950.0
30 MANAGER 2850.0
30 SALESMAN 1600.
having与where不同点
(1)where后面不能写分组函数,而having后面可以使用分组函数。
(2)having只用于group by分组统计语句。
每个部门的平均薪水大于2000的部门
select deptno, avg(sal) avg_sal from emp group by deptno having avg_sal > 2000;
