banner.webp

1.准备工作

  1. 因为Hive基于Hadoop,所以我们需要Hadoop的环境,具体Hadoop的环境如何搭建可以查看前几篇文章。
  2. 要使用JDBC访问Hive,可能会出现权限问题,我们需要在Hadoop的配置文件core-site.xml中加入如下配置,赋予root访问权限
  1. <property>
  2. <name>hadoop.proxyuser.root.hosts</name>
  3. <value>*</value>
  4. </property>
  5. <property>
  6. <name>hadoop.proxyuser.root.groups</name>
  7. <value>*</value>
  8. </property>
  1. 此外,要使用JDBC则必须开启HiveServer2,可以通过下面的方式启动HiveServer2
  1. hiverserver2 &
  2. hive --service hiveserver2
  1. 但我遇到了输出语句打印到控制台的问题,所以我这里用下面这个命令
  1. nohup hiveserver2 >> /usr/local/hive/log/hiveserver2.log 2>&1 &
  1. 默认开启10000号端口

2.导入依赖、编写代码

  1. 因为Hive基于Hadoop的原因,我们这里用之前HadoopSpringBoot整合的项目来继续整合Hive

pom.xml

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  3. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
  4. <modelVersion>4.0.0</modelVersion>
  5. <parent>
  6. <groupId>org.springframework.boot</groupId>
  7. <artifactId>spring-boot-starter-parent</artifactId>
  8. <version>2.7.0</version>
  9. <relativePath/> <!-- lookup parent from repository -->
  10. </parent>
  11. <groupId>com.zym</groupId>
  12. <artifactId>hadoop_springboot</artifactId>
  13. <version>0.0.1-SNAPSHOT</version>
  14. <name>hadoop_springboot</name>
  15. <description>Demo project for Spring Boot</description>
  16. <properties>
  17. <java.version>1.8</java.version>
  18. <hadoop.version>3.3.3</hadoop.version>
  19. </properties>
  20. <dependencies>
  21. <dependency>
  22. <groupId>org.springframework.boot</groupId>
  23. <artifactId>spring-boot-starter-web</artifactId>
  24. </dependency>
  25. <!-- hadoop -->
  26. <dependency>
  27. <groupId>org.apache.hadoop</groupId>
  28. <artifactId>hadoop-common</artifactId>
  29. <version>${hadoop.version}</version>
  30. </dependency>
  31. <dependency>
  32. <groupId>org.apache.hadoop</groupId>
  33. <artifactId>hadoop-streaming</artifactId>
  34. <version>${hadoop.version}</version>
  35. </dependency>
  36. <dependency>
  37. <groupId>org.apache.hadoop</groupId>
  38. <artifactId>hadoop-yarn-common</artifactId>
  39. <version>${hadoop.version}</version>
  40. <exclusions>
  41. <exclusion>
  42. <groupId>com.google.guava</groupId>
  43. <artifactId>guava</artifactId>
  44. </exclusion>
  45. </exclusions>
  46. </dependency>
  47. <dependency>
  48. <groupId>org.apache.hadoop</groupId>
  49. <artifactId>hadoop-distcp</artifactId>
  50. <version>${hadoop.version}</version>
  51. <scope>provided</scope>
  52. </dependency>
  53. <dependency>
  54. <groupId>org.apache.hadoop</groupId>
  55. <artifactId>hadoop-mapreduce-client-core</artifactId>
  56. <version>${hadoop.version}</version>
  57. </dependency>
  58. <dependency>
  59. <groupId>org.apache.hadoop</groupId>
  60. <artifactId>hadoop-hdfs</artifactId>
  61. <version>${hadoop.version}</version>
  62. </dependency>
  63. <dependency>
  64. <groupId>org.apache.hadoop</groupId>
  65. <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
  66. <version>${hadoop.version}</version>
  67. <scope>provided</scope>
  68. </dependency>
  69. <dependency>
  70. <groupId>com.alibaba</groupId>
  71. <artifactId>druid</artifactId>
  72. <version>1.2.1</version>
  73. </dependency>
  74. <!-- hive依赖 -->
  75. <dependency>
  76. <groupId>org.apache.hive</groupId>
  77. <artifactId>hive-jdbc</artifactId>
  78. <version>2.3.0</version>
  79. </dependency>
  80. <!-- 中文分词器 -->
  81. <dependency>
  82. <groupId>cn.bestwu</groupId>
  83. <artifactId>ik-analyzers</artifactId>
  84. <version>5.1.0</version>
  85. </dependency>
  86. <dependency>
  87. <groupId>org.projectlombok</groupId>
  88. <artifactId>lombok</artifactId>
  89. </dependency>
  90. <dependency>
  91. <groupId>com.alibaba</groupId>
  92. <artifactId>fastjson</artifactId>
  93. <version>1.2.46</version>
  94. </dependency>
  95. <!-- 此处的导入依赖与mapreduce有关 -->
  96. <dependency>
  97. <groupId>jdk.tools</groupId>
  98. <artifactId>jdk.tools</artifactId>
  99. <version>1.8</version>
  100. <scope>system</scope>
  101. <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
  102. </dependency>
  103. <dependency>
  104. <groupId>junit</groupId>
  105. <artifactId>junit</artifactId>
  106. <scope>test</scope>
  107. </dependency>
  108. <dependency>
  109. <groupId>org.springframework.boot</groupId>
  110. <artifactId>spring-boot-starter-test</artifactId>
  111. <scope>test</scope>
  112. </dependency>
  113. <dependency>
  114. <groupId>org.springframework.boot</groupId>
  115. <artifactId>spring-boot-starter-jdbc</artifactId>
  116. </dependency>
  117. </dependencies>
  118. <build>
  119. <plugins>
  120. <plugin>
  121. <groupId>org.springframework.boot</groupId>
  122. <artifactId>spring-boot-maven-plugin</artifactId>
  123. </plugin>
  124. </plugins>
  125. </build>
  126. </project>

application.yml

  1. # tomcat thread = 200
  2. server:
  3. tomcat:
  4. max-threads: 1000
  5. # edit tomcat port
  6. port: 8900
  7. # session time 30
  8. session-timeout: 60
  9. spring:
  10. application:
  11. name: hadoop
  12. servlet:
  13. multipart:
  14. max-file-size: 50MB
  15. max-request-size: 50MB
  16. datasource:
  17. hive: #hive数据源
  18. url: jdbc:hive2://hadoop0:10000/test
  19. type: com.alibaba.druid.pool.DruidDataSource
  20. username: hive1
  21. password: Newpassword2022!!!
  22. driver-class-name: org.apache.hive.jdbc.HiveDriver
  23. common-config: #连接池统一配置,应用到所有的数据源
  24. initialSize: 1
  25. minIdle: 1
  26. maxIdle: 5
  27. maxActive: 50
  28. maxWait: 10000
  29. timeBetweenEvictionRunsMillis: 10000
  30. minEvictableIdleTimeMillis: 300000
  31. validationQuery: select 'x'
  32. testWhileIdle: true
  33. testOnBorrow: false
  34. testOnReturn: false
  35. poolPreparedStatements: true
  36. maxOpenPreparedStatements: 20
  37. filters: stat
  38. hdfs:
  39. path: hdfs://hadoop0:9000
  40. username: root

DataSourceCommonProperties.java

  1. import lombok.AllArgsConstructor;
  2. import lombok.Data;
  3. import lombok.NoArgsConstructor;
  4. import org.springframework.boot.context.properties.ConfigurationProperties;
  5. import org.springframework.stereotype.Component;
  6. @Data
  7. @NoArgsConstructor
  8. @AllArgsConstructor
  9. @Component
  10. @ConfigurationProperties(prefix = "spring.datasource.common-config", ignoreUnknownFields = false)
  11. public class DataSourceCommonProperties {
  12. private int initialSize = 10;
  13. private int minIdle;
  14. private int maxIdle;
  15. private int maxActive;
  16. private int maxWait;
  17. private int timeBetweenEvictionRunsMillis;
  18. private int minEvictableIdleTimeMillis;
  19. private String validationQuery;
  20. private boolean testWhileIdle;
  21. private boolean testOnBorrow;
  22. private boolean testOnReturn;
  23. private boolean poolPreparedStatements;
  24. private int maxOpenPreparedStatements;
  25. private String filters;
  26. private String mapperLocations;
  27. private String typeAliasPackage;
  28. }

HiveDruidConfiguration.java

  1. import com.alibaba.druid.pool.DruidDataSource;
  2. import lombok.extern.slf4j.Slf4j;
  3. import org.springframework.beans.factory.annotation.Autowired;
  4. import org.springframework.beans.factory.annotation.Qualifier;
  5. import org.springframework.boot.context.properties.EnableConfigurationProperties;
  6. import org.springframework.context.annotation.Bean;
  7. import org.springframework.context.annotation.Configuration;
  8. import javax.sql.DataSource;
  9. import java.sql.SQLException;
  10. @Slf4j
  11. @Configuration
  12. @EnableConfigurationProperties({HiveJdbcProperties.class, DataSourceCommonProperties.class})
  13. public class HiveDruidConfiguration {
  14. @Autowired
  15. private HiveJdbcProperties hiveJdbcProperties;
  16. @Autowired
  17. private DataSourceCommonProperties dataSourceCommonProperties;
  18. @Bean("hiveDruidDataSource") //新建bean实例
  19. @Qualifier("hiveDruidDataSource")//标识
  20. public DataSource dataSource(){
  21. DruidDataSource datasource = new DruidDataSource();
  22. //配置数据源属性
  23. datasource.setUrl(hiveJdbcProperties.getUrl());
  24. datasource.setUsername(hiveJdbcProperties.getUsername());
  25. datasource.setPassword(hiveJdbcProperties.getPassword());
  26. datasource.setDriverClassName(hiveJdbcProperties.getDriverClassName());
  27. //配置统一属性
  28. datasource.setInitialSize(dataSourceCommonProperties.getInitialSize());
  29. datasource.setMinIdle(dataSourceCommonProperties.getMinIdle());
  30. datasource.setMaxActive(dataSourceCommonProperties.getMaxActive());
  31. datasource.setMaxWait(dataSourceCommonProperties.getMaxWait());
  32. datasource.setTimeBetweenEvictionRunsMillis(dataSourceCommonProperties.getTimeBetweenEvictionRunsMillis());
  33. datasource.setMinEvictableIdleTimeMillis(dataSourceCommonProperties.getMinEvictableIdleTimeMillis());
  34. datasource.setValidationQuery(dataSourceCommonProperties.getValidationQuery());
  35. datasource.setTestWhileIdle(dataSourceCommonProperties.isTestWhileIdle());
  36. datasource.setTestOnBorrow(dataSourceCommonProperties.isTestOnBorrow());
  37. datasource.setTestOnReturn(dataSourceCommonProperties.isTestOnReturn());
  38. datasource.setPoolPreparedStatements(dataSourceCommonProperties.isPoolPreparedStatements());
  39. try {
  40. datasource.setFilters(dataSourceCommonProperties.getFilters());
  41. } catch (SQLException e) {
  42. log.error("Druid configuration initialization filter error.", e);
  43. }
  44. return datasource;
  45. }
  46. }

HiveJdbcConfiguration.java

  1. import org.springframework.beans.factory.annotation.Qualifier;
  2. import org.springframework.context.annotation.Bean;
  3. import org.springframework.context.annotation.Configuration;
  4. import org.springframework.jdbc.core.JdbcTemplate;
  5. import javax.sql.DataSource;
  6. @Configuration
  7. public class HiveJdbcConfiguration {
  8. @Bean("hiveJdbcTemplate")
  9. @Qualifier("hiveJdbcTemplate")
  10. public JdbcTemplate jdbcTemplate(@Qualifier("hiveDruidDataSource") DataSource dataSource) {
  11. return new JdbcTemplate(dataSource);
  12. }
  13. }

HiveJdbcProperties.java

  1. import lombok.AllArgsConstructor;
  2. import lombok.Data;
  3. import lombok.NoArgsConstructor;
  4. import org.springframework.boot.context.properties.ConfigurationProperties;
  5. import org.springframework.stereotype.Component;
  6. @Data
  7. @NoArgsConstructor
  8. @AllArgsConstructor
  9. @Component
  10. @ConfigurationProperties(prefix = "spring.datasource.hive", ignoreUnknownFields = false)
  11. public class HiveJdbcProperties {
  12. private String url;
  13. private String type;
  14. private String username;
  15. private String password;
  16. private String driverClassName;
  17. }

HiveController.java

  1. import com.zym.result.Result;
  2. import com.zym.service.HiveService;
  3. import lombok.extern.slf4j.Slf4j;
  4. import org.springframework.beans.factory.annotation.Autowired;
  5. import org.springframework.web.bind.annotation.GetMapping;
  6. import org.springframework.web.bind.annotation.RequestMapping;
  7. import org.springframework.web.bind.annotation.RestController;
  8. @RestController
  9. @RequestMapping("/hadoop/hive")
  10. @Slf4j
  11. public class HiveController {
  12. @Autowired
  13. private HiveService hiveService;
  14. @GetMapping("/listAllTables")
  15. public Result listAllTables(){
  16. return new Result("SUCCESS","获取所有表成功",hiveService.listAllTables());
  17. }
  18. @GetMapping("/selectFromTable")
  19. public Result selectFromTable(String tableName){
  20. return new Result("SUCCESS","获取表内容成功",hiveService.selectFromTable(tableName));
  21. }
  22. @GetMapping("/select")
  23. public Result select(String hql){
  24. return new Result("SUCCESS","获取内容",hiveService.select(hql));
  25. }
  26. }

HiveService.java

  1. import java.util.List;
  2. public interface HiveService {
  3. Object select(String hql);
  4. List<String> listAllTables();
  5. List<String> describeTable(String tableName);
  6. List<String> selectFromTable(String tableName);
  7. }

HiveServiceImpl.java

  1. import com.zym.service.HiveService;
  2. import lombok.extern.slf4j.Slf4j;
  3. import org.springframework.beans.factory.annotation.Autowired;
  4. import org.springframework.beans.factory.annotation.Qualifier;
  5. import org.springframework.jdbc.core.JdbcTemplate;
  6. import org.springframework.stereotype.Service;
  7. import org.springframework.util.StringUtils;
  8. import javax.sql.DataSource;
  9. import java.sql.ResultSet;
  10. import java.sql.SQLException;
  11. import java.sql.Statement;
  12. import java.util.ArrayList;
  13. import java.util.Collections;
  14. import java.util.List;
  15. @Slf4j
  16. @Service
  17. public class HiveServiceImpl implements HiveService {
  18. @Autowired
  19. @Qualifier("hiveJdbcTemplate")
  20. private JdbcTemplate hiveJdbcTemplate;
  21. @Autowired
  22. @Qualifier("hiveDruidDataSource")
  23. private DataSource hiveDruidDataSource;
  24. @Override
  25. public Object select(String hql) {
  26. return hiveJdbcTemplate.queryForList(hql);
  27. }
  28. @Override
  29. public List<String> listAllTables() {
  30. List<String> result = new ArrayList<>();
  31. try {
  32. Statement statement = hiveDruidDataSource.getConnection().createStatement();
  33. String sql = "show tables";
  34. log.info("Running: " + sql);
  35. ResultSet resultSet = statement.executeQuery(sql);
  36. while (resultSet.next()) {
  37. result.add(resultSet.getString(1));
  38. }
  39. return result;
  40. } catch (SQLException throwables) {
  41. log.error(throwables.getMessage());
  42. }
  43. return Collections.emptyList();
  44. }
  45. @Override
  46. public List<String> describeTable(String tableName) {
  47. if (StringUtils.isEmpty(tableName)){
  48. return Collections.emptyList();
  49. }
  50. List<String> result = new ArrayList<>();
  51. try {
  52. Statement statement = hiveDruidDataSource.getConnection().createStatement();
  53. String sql = "describe " + tableName;
  54. log.info("Running" + sql);
  55. ResultSet resultSet = statement.executeQuery(sql);
  56. while (resultSet.next()) {
  57. result.add(resultSet.getString(1));
  58. }
  59. return result;
  60. } catch (SQLException throwables) {
  61. log.error(throwables.getMessage());
  62. }
  63. return Collections.emptyList();
  64. }
  65. @Override
  66. public List<String> selectFromTable(String tableName) {
  67. if (StringUtils.isEmpty(tableName)){
  68. return Collections.emptyList();
  69. }
  70. List<String> result = new ArrayList<>();
  71. try {
  72. Statement statement = hiveDruidDataSource.getConnection().createStatement();
  73. String sql = "select * from " + tableName;
  74. log.info("Running" + sql);
  75. ResultSet resultSet = statement.executeQuery(sql);
  76. int columnCount = resultSet.getMetaData().getColumnCount();
  77. String str = null;
  78. while (resultSet.next()) {
  79. str = "";
  80. for (int i = 1; i < columnCount; i++) {
  81. str += resultSet.getString(i) + " ";
  82. }
  83. str += resultSet.getString(columnCount);
  84. log.info(str);
  85. result.add(str);
  86. }
  87. return result;
  88. } catch (SQLException throwables) {
  89. log.error(throwables.getMessage());
  90. }
  91. return Collections.emptyList();
  92. }
  93. }

3.简单测试

获取数据库中所有表
1.png
获取表中数据
2.png
执行Hive QL
3.png

4.遇到的问题

4.png
从控制台日志看不出来,我们到我们自定义的输出路径查看一下
5.png
与控制台报错大差不差,还是看不懂,只能看出来在job阶段出错了,我们再查看一下hive的日志

  1. cat /tmp/root/hive.log

6.png
……内存溢出,行吧

解决方法:在Hive的配置文件hive-env.sh里面配置heap大小
7.png
然后重新启动Hiveserver2即可

这里有一个遗留问题,在测试时发现,无论利用beeline还是原生jdbc,不需要输入用户名与密码也可链接上,目前还没找到什么原因,有懂的人可以分享一下为什么。

——通过在网上找了一些资料明白了一些,Hive安装的时候默认是没有用户名和密码的,如果想启用需要自己写代码自定义验证,然后给配置文件中加入用户名密码的信息

参考链接:
https://blog.csdn.net/Winter_chen001/article/details/110449266
https://blog.csdn.net/yamaxifeng_132/article/details/119869080