在 WSL2 Ubuntu 20.04 上安装 Hive

下载

HIVE 3.1.3 下载下来解压

1
sudo tar -zxvf apache-hive-3.1.3-bin.tar.gz -C ~

配置

在 ./bashrc 里添加 Hive 环境:

1
2
3
## Hive env
export HIVE_HOME=~/apps/hive-3.1.3
export PATH=$PATH:$HIVE_HOME/bin

mysql-connector-java-8.0.28.jar 放在 $HIVE_HOME/lib 下,

在 MySQL 中配置 Hive 元数据库

1
2
3
4
5
mysql -u root -p
mysql> create database hive;
mysql> CREATE USER hive@localhost IDENTIFIED BY 'hive';
mysql> GRANT ALL PRIVILEGES ON *.* TO hive@localhost WITH GRANT OPTION;
mysql> FLUSH PRIVILEGES;

$HIVE_HOME/conf 下新建 hive-default.xml

1
cp hive-default.xml.template hive-default.xml

新建 hive-site.xml, 并添加以下内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hive</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>hive.server2.thrift.client.user</name>
<value>herman</value>
<description>Username to use against thrift client</description>
</property>
<property>
<name>hive.server2.thrift.client.password</name>
<value>[password]</value>
<description>Password to use against thrift client</description>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>false</value>
<description>Setting this property to true will have HiveServer2 execute Hive operations as the user making the calls to it.</description>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://localhost:9083</value>
</property>
<property>
<name>hive.metastore.event.db.notification.api.auth</name>
<value>false</value>
</property>
</configuration>

$HADOOP_HOME/etc/hadoop/core-site.xml 中添加下面内容:

1
2
3
4
5
6
7
8
<property> 
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>

启动 Hive CLI

启动 HADOOP 集群 start-dfs.sh, Hive 第一次启动要格式化 Hive Shema:

1
$HIVE_HOME/bin/schematool -dbType mysql -initSchema

启动 hive 验证是否安装成功:

1
2
hive>
hive>exit;

启动 Beeline

先启动 metastorehiveserver2 服务:

1
2
nohup hive --service metastore 1>/dev/null 2>&1 &
nohup hive --service hiveserver2 1>/dev/null 2>&1 &

启动 Beeline 并连接 Hive:

1
2
3
4
5
6
beeline
beeline> !connect jdbc:hive2://localhost:10000
Connected to: Apache Hive (version 3.1.3)
Driver: Hive JDBC (version 2.3.9)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://localhost:10000>

Hive 启动脚本

sudo vim $HIVE_HOME/bin/hive.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/bin/bash

mkdir -p $HIVE_HOME/log

status(){
ps -ef | grep -v grep | grep -E 'HiveMetaStore|HiveServer2'
}

start(){
status &>/dev/null && { echo "Stop HiveMetaStore or HiveServer2 first!";exit 1; }
nohup $HIVE_HOME/bin/hive --service metastore &> $HIVE_HOME/log/metastore.log &
nohup $HIVE_HOME/bin/hive --service hiveserver2 &> $HIVE_HOME/log/metastore.log &
sleep 1 && echo "HiveMetaStore & HiveServer2 started!"

ip addr | grep "eth0" | grep "inet" | cut -d " " -f 6 | cut -d / -f 1
}

stop(){
if status &>/dev/null
then
status | awk '{print $2}' | xargs kill -9
echo "HiveMetaStore & HiveServer2 stopped!"
else
echo "HiveMetaStore & HiveServer2 not found!"
fi
}

restart(){
stop && start
}

case $1 in
status) status ;;
start) start;;
stop) stop;;
restart) restart;;
*) echo "Usage: $0 <start|stop|status|restart>";;
esac

sudo vim $HIVE_HOME/bin/hive.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/bin/bash
ssh_flag=$(echo 1 | sudo -S service ssh status | grep "Active" | awk '{print $2}')
mysql_flag=$(echo 1 | sudo -S service mysql status | grep "Active" | awk '{print $2}')
dfs_flag=$(jps | grep -v grep | awk '{print $2}' | grep -E 'DataNode|NameNode|SecondaryNameNode' | sort | xargs)
yarn_flag=$(jps | grep -v grep | awk '{print $2}' | grep -E 'ResourceManager|NodeManager' | sort | xargs)


if [ $ssh_flag == "inactive" ]; then
echo 1 | sudo -S service ssh start
echo "ssh service start successfully"
fi

if [ $mysql_flag == 'inactive' ]; then
echo 1 | sudo -S service mysql start
echo "mysql service start successfully"
fi

if [ "$dfs_flag" != "DataNode NameNode SecondaryNameNode" ]; then
start-dfs.sh
fi

if [ "$yarn_flag" != "NodeManager ResourceManager" ]; then
start-yarn.sh
fi

hive.sh start

故障排错

1
cat $HIVE_HOME/log