dep.txt

    1. 10 ACCOUNTING 1700
    2. 20 RESEARCH 1800
    3. 30 SALES 1900
    4. 40 OPERATIONS 1700

    emp.txt

    1. 7369 SMITH CLERK 7902 1980-12-17 800.00 20
    2. 7499 ALLEN SALESMAN 7698 1981-2-20 1600.00 300.00 30
    3. 7521 WARD SALESMAN 7698 1981-2-22 1250.00 500.00 30
    4. 7566 JONES MANAGER 7839 1981-4-2 2975.00 20
    5. 7654 MARTIN SALESMAN 7698 1981-9-28 1250.00 1400.00 30
    6. 7698 BLAKE MANAGER 7839 1981-5-1 2850.00 30
    7. 7782 CLARK MANAGER 7839 1981-6-9 2450.00 10
    8. 7788 SCOTT ANALYST 7566 1987-4-19 3000.00 20
    9. 7839 KING PRESIDENT 1981-11-17 5000.00 10
    10. 7844 TURNER SALESMAN 7698 1981-9-8 1500.00 0.00 30
    11. 7876 ADAMS CLERK 7788 1987-5-23 1100.00 20
    12. 7900 JAMES CLERK 7698 1981-12-3 950.00 30
    13. 7902 FORD ANALYST 7566 1981-12-3 3000.00 20
    14. 7934 MILLER CLERK 7782 1982-1-23 1300.00 10
    15. 查询搜索

    hive (default)> select *from dept;
    OK
    dept.deptno dept.dname dept.loc
    10 ACCOUNTING 1700
    20 RESEARCH 1800
    30 SALES 1900
    40 OPERATIONS 1700
    Time taken: 0.214 seconds, Fetched: 4 row(s)

    创建部门表

    1. create table if not exists dept(
    2. deptno int,
    3. dname string,
    4. loc int
    5. )
    6. row format delimited fields terminated by '\t';

    创建员工表

    1. create table if not exists emp(
    2. empno int,
    3. ename string,
    4. job string,
    5. mgr int,
    6. hiredate string,
    7. sal double,
    8. comm double,
    9. deptno int)
    10. row format delimited fields terminated by '\t';

    加载数据

    1. hive (default)> load data local inpath '/opt/module/data/dept.txt' into table dept;
    2. Loading data to table default.dept
    3. Table default.dept stats: [numFiles=1, totalSize=71]
    4. OK
    5. Time taken: 1.038 seconds
    6. hive (default)> load data local inpath '/opt/module/data/emp.txt' into table emp;
    7. Loading data to table default.emp
    8. Table default.emp stats: [numFiles=1, totalSize=656]
    9. OK
    10. Time taken: 0.318 seconds

    设置别名

    1. hive (default)> select ename as name,deptno dn from emp;
    2. OK
    3. name dn
    4. SMITH 20
    5. ALLEN 30
    6. WARD 30
    7. JONES 20
    8. MARTIN 30
    9. BLAKE 30
    10. CLARK 10
    11. SCOTT 20
    12. KING 10
    13. TURNER 30
    14. ADAMS 20
    15. JAMES 30
    16. FORD 20
    17. MILLER 10
    18. Time taken: 0.101 seconds, Fetched: 14 row(s)

    总数量

    1. hive (default)> select count(*) cnt from emp;
    2. cnt
    3. 14

    平均值

    1. hive (default)> select avg(sal) avg_sal from emp;
    2. avg_sal
    3. 2073.214285714286

    工资总和

    1. hive (default)> select sum(sal) sum_sal from emp;
    2. OK
    3. sum_sal
    4. 29025.0

    工资的最大值(max)

    1. select sum(sal) sum_sal from emp;
    2. max_sal
    3. 5000.0

    工资的最小值(min)

    1. select min(sal) min_sal from emp;
    2. min_sal
    3. 800.0

    limit,LIMIT子句用于限制返回的行数。

    1. hive (default)> select * from emp limit 3;
    2. OK
    3. emp.empno emp.ename emp.job emp.mgr emp.hiredate emp.sal emp.comm emp.deptno
    4. 7369 SMITH CLERK 7902 1980-12-17 800.0 NULL 20
    5. 7499 ALLEN SALESMAN 7698 1981-2-20 1600.0 300.0 30
    6. 7521 WARD SALESMAN 7698 1981-2-22 1250.0 500.0 30
    7. Time taken: 0.079 seconds, Fetched: 3 row(s)

    where

    1. hive (default)> select * from emp where sal >1000;
    2. OK
    3. emp.empno emp.ename emp.job emp.mgr emp.hiredate emp.sal emp.comm emp.deptno
    4. 7499 ALLEN SALESMAN 7698 1981-2-20 1600.0 300.0 30
    5. 7521 WARD SALESMAN 7698 1981-2-22 1250.0 500.0 30
    6. 7566 JONES MANAGER 7839 1981-4-2 2975.0 NULL 20
    7. 7654 MARTIN SALESMAN 7698 1981-9-28 1250.0 1400.0 30
    8. 7698 BLAKE MANAGER 7839 1981-5-1 2850.0 NULL 30
    9. 7782 CLARK MANAGER 7839 1981-6-9 2450.0 NULL 10
    10. 7788 SCOTT ANALYST 7566 1987-4-19 3000.0 NULL 20
    11. 7839 KING PRESIDENT NULL 1981-11-17 5000.0 NULL 10
    12. 7844 TURNER SALESMAN 7698 1981-9-8 1500.0 0.0 30
    13. 7876 ADAMS CLERK 7788 1987-5-23 1100.0 NULL 20
    14. 7902 FORD ANALYST 7566 1981-12-3 3000.0 NULL 20
    15. 7934 MILLER CLERK 7782 1982-1-23 1300.0 NULL 10
    16. Time taken: 0.137 seconds, Fetched: 12 row(s)

    查询工资是1500或5000的员工信息

    1. hive (default)> select * from emp where sal IN (3000, 5000);
    2. OK
    3. emp.empno emp.ename emp.job emp.mgr emp.hiredate emp.sal emp.comm emp.deptno
    4. 7788 SCOTT ANALYST 7566 1987-4-19 3000.0 NULL 20
    5. 7839 KING PRESIDENT NULL 1981-11-17 5000.0 NULL 10
    6. 7902 FORD ANALYST 7566 1981-12-3 3000.0 NULL 20
    7. Time taken: 0.096 seconds, Fetched: 3 row(s)

    RLIKE子句是Hive中这个功能的一个扩展,其可以通过Java的正则表达式这个更强大的语言来指定匹配条件。

    1. hive (default)> select ename, sal from emp where sal RLIKE '[2]';
    2. OK
    3. ename sal
    4. WARD 1250.0
    5. JONES 2975.0
    6. MARTIN 1250.0
    7. BLAKE 2850.0
    8. CLARK 2450.0
    9. Time taken: 0.07 seconds, Fetched: 5 row(s)

    GROUP BY语句通常会和聚合函数一起使用,按照一个或者多个列队结果进行分组,然后对每个组执行聚合操作。

    1. hive (default)> select t.deptno, avg(t.sal) avg_sal from emp t group by t.deptno;
    2. t.deptno avg_sal
    3. 10 2916.6666666666665
    4. 20 2175.0
    5. 30 1566.666666666666
    1. select t.deptno, t.job, max(t.sal) max_sal from emp t group by t.deptno, t.job;

    t.deptno t.job max_sal
    10 CLERK 1300.0
    10 MANAGER 2450.0
    10 PRESIDENT 5000.0
    20 ANALYST 3000.0
    20 CLERK 1100.0
    20 MANAGER 2975.0
    30 CLERK 950.0
    30 MANAGER 2850.0
    30 SALESMAN 1600.

    having与where不同点
    (1)where后面不能写分组函数,而having后面可以使用分组函数。
    (2)having只用于group by分组统计语句。

    每个部门的平均薪水大于2000的部门

    1. select deptno, avg(sal) avg_sal from emp group by deptno having avg_sal > 2000;