SpringBoot 2.X集成Hive-jdbc 3.1.1


最近公司有一个需求,需求的内容是根据用户页面选择的参数条件查询Hive,数量量大致是300万以内,要求3秒响应.使用的其它的技术就不要说了,先说说SpingBoot集成Hive-jdbc吧,网上虽然有完整的集成方案,但是根据方案来实现总是遇到各种各样的问题,一会日志包问题 一会jetty问题,各种烦心的异常.这次蹭着这个机会来说说我是怎么集成的.

先贴上我的pom.xml相关依赖:


org.projectlombok
lombok


com.alibaba
druid-spring-boot-starter
1.1.16


org.apache.hive
hive-jdbc
1.2.1


org.eclipse.jetty.aggregate
jetty-all


org.apache.hive
hive-shims


org.slf4j
slf4j-log4j12




org.springframework.boot
spring-boot-starter-jdbc

连接数据库肯定需要DataSource配置,我这里用的阿里系的
.

@Configuration
@ConfigurationProperties(prefix = "hive")
public class HiveDruidConfig {

private String url;
private String user;
private String password;
private String driverClassName;
private int initialSize;
private int minIdle;
private int maxWait;
private int timeBetweenEvictionRunsMillis;
private int minEvictableIdleTimeMillis;
private String validationQuery;
private boolean testWhileIdle;
private boolean testOnBorrow;
private boolean testOnReturn;
private boolean poolPreparedStatements;
private int maxPoolPreparedStatementPerConnectionSize;

@Bean(name = "hiveDruidDataSource")
@Qualifier("hiveDruidDataSource")
public DruidDataSource dataSource() {
DruidDataSource datasource = new DruidDataSource();
datasource.setUrl(url);
datasource.setUsername(user);
datasource.setPassword(password);
datasource.setDriverClassName(driverClassName);

// pool configuration
datasource.setInitialSize(initialSize);
datasource.setMinIdle(minIdle);
datasource.setMaxWait(maxWait);
datasource.setTimeBetweenEvictionRunsMillis(timeBetweenEvictionRunsMillis);
datasource.setMinEvictableIdleTimeMillis(minEvictableIdleTimeMillis);
datasource.setValidationQuery(validationQuery);
datasource.setTestWhileIdle(testWhileIdle);
datasource.setTestOnBorrow(testOnBorrow);
datasource.setTestOnReturn(testOnReturn);
datasource.setPoolPreparedStatements(poolPreparedStatements);
datasource.setMaxPoolPreparedStatementPerConnectionSize(maxPoolPreparedStatementPerConnectionSize);
return datasource;
}

// 此处省略各个属性的get和set方法

@Bean(name = "hiveDruidTemplate")
public JdbcTemplate hiveDruidTemplate(@Qualifier("hiveDruidDataSource") DataSource dataSource) {
return new JdbcTemplate(dataSource);
}
}

配置完成以后,我们就需要写工具类了
HiveRepository.java
@Service
public class HiveRepository{

@Autowired
private JdbcTemplate hiveJdbcTemplate;

/**
*
  • Description: TODO

  • */
    @PostConstruct
    public void createTable() {
    /*建表SQL语句*/
    StringBuffer sql = new StringBuffer("create table IF NOT EXISTS ");
    sql.append("bus_receiver ");
    sql.append("(id BIGINT comment '主键ID' " +
    ",name STRING comment '姓名' " +
    ",address STRING comment '地址'" +
    ",en_name STRING comment '拼音名字'" +
    ",member_family INT comment '家庭成员'" +
    ",createDate DATE comment '创建时') ");
    sql.append(" ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'"); // 定义分隔符
    sql.append(" STORED AS TEXTFILE"); // 作为文本存储*/
    hiveJdbcTemplate.execute(sql.toString());
    }

    /**
    *
  • Description: TODO

  • *
    * @param pathFile TODO
    */
    public void loadData(String pathFile){
    String sql = "LOAD DATA INPATH '"+pathFile+"' INTO TABLE bus_receiver";
    hiveJdbcTemplate.execute(sql);
    }


    /**
    *
  • Description: TODO

  • *
    * @param busReceiverEntity 实体
    */
    public void insert(BusReceiverEntity busReceiverEntity) {
    hiveJdbcTemplate.update("insert into bus_receiver(id,name,address,en_name,member_family) values(?,?,?,?,?)",
    new PreparedStatementSetter(){
    @Override
    public void setValues(PreparedStatement ps) throws SQLException {
    ps.setLong(1, busReceiverEntity.getId());
    ps.setString(2,busReceiverEntity.getName());
    ps.setString(3,busReceiverEntity.getAddress());
    ps.setString(4,busReceiverEntity.getEnName());
    ps.setInt(5,busReceiverEntity.getMemberFamily());
    }
    }
    );
    }

    public void deleteAll(){
    String sql = "insert overwrite table bus_receiver select * from bus_receiver where 1=0";
    hiveJdbcTemplate.execute(sql);
    }
    }
    最后贴上配置文件:
    hive:
    url: jdbc:hive2://XXX:10000/test
    driver-class-name: org.apache.hive.jdbc.HiveDriver
    filters: stat
    initialSize: 2
    maxWait: 60000
    timeBetweenEvictionRunsMillis: 60000
    minEvictableIdleTimeMillis: 300000
    validationQuery: SELECT 1
    testWhileIdle: true
    testOnBorrow: false
    testOnReturn: false
    poolPreparedStatements: false
    maxPoolPreparedStatementPerConnectionSize: 200


    需要注意的是,再启动项目的时候需要将servlet-api放到JAVA_HOME/jre/lib/ext目录下

    以上方案并非完全原创.