Hive集成tez引擎


在使用Hive 2的时候,启动时会提示:Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.

tez引擎可以将多个有依赖的作业转换为一个作业,这样只需写一次HDFS,且中间节点较少,从而大大提升作业的计算性能。

#下载tez依赖包
https://archive.apache.org/dist/tez/0.9.1/

#解压
tar -zxvf /opt/software/apache-tez-0.9.1-bin.tar.gz -C /opt/module
mv /opt/module/apache-tez-0.9.1-bin /opt/module/tez-0.9.1

#上传至hdfs,配置文件中会用到
hadoop fs -mkdir /tez
hadoop fs -put /opt/software/apache-tez-0.9.1-bin.tar.gz/ /tez

#进入hive配置文件目录
cd /opt/module/hive/conf

#配置tez-site.xml
vim tez-site.xml
	<?xml version="1.0" encoding="UTF-8"?>
	<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
	
    	
        	tez.lib.uris
        	${fs.defaultFS}/tez/apache-tez-0.9.1-bin.tar.gz
    	
    	
         	tez.use.cluster.hadoop-libs
         	true
    	
    	
         	tez.history.logging.service.class        
         	org.apache.tez.dag.history.logging.ats.ATSHistoryLoggingService
    	
	

#配置hive-env.sh
mv hive-env.sh.template hive-env.sh
vim hive-env.sh
	# Set HADOOP_HOME to point to a specific hadoop install directory
	export HADOOP_HOME=/opt/module/hadoop-2.7.7
	# Hive Configuration Directory can be controlled by:
	export HIVE_CONF_DIR=/opt/module/hive/conf
	# Folder containing extra libraries required for hive compilation/execution can be controlled by:
	export TEZ_HOME=/opt/module/tez-0.9.1    #tez解压目录
	export TEZ_JARS=""
	for jar in `ls $TEZ_HOME |grep jar`; do
    	export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/$jar
	done
	for jar in `ls $TEZ_HOME/lib`; do
   		export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/lib/$jar
	done
	export HIVE_AUX_JARS_PATH=/opt/module/hadoop-2.7.7/share/hadoop/common/hadoop-lzo-0.4.21-SNAPSHOT.jar$TEZ_JARS

#配置计算引擎
vim hive-stie.xml
	
    	hive.execution.engine
    	tez
	

#启动时不报错就OK