Skip to content

Instantly share code, notes, and snippets.

@kangvcar
Last active June 4, 2025 03:11
Show Gist options
  • Select an option

  • Save kangvcar/2aa0fc38b27694b656d9591e07ac56c2 to your computer and use it in GitHub Desktop.

Select an option

Save kangvcar/2aa0fc38b27694b656d9591e07ac56c2 to your computer and use it in GitHub Desktop.
#!/bin/bash
#################################################
# Hive 4.0.1 自动部署脚本 v1.0
#
# 功能说明:
# 1. 自动下载和安装Hive 4.0.1
# 2. 配置MariaDB作为Metastore
# 3. 初始化Schema并启动服务
# 4. 支持多节点集群部署
# 5. 具有幂等性,可重复执行
#
# 使用前提:
# - 已完成Hadoop集群部署
# - 集群节点之间可免密SSH
# - 所有节点具有sudo权限
#
# 使用方法:
# chmod +x deploy_hive.sh
# ./deploy_hive.sh
#
# 作者:HE
# 版本:1.0
# 日期:20250604
#################################################
# 设置颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# 日志函数
log_info() {
echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
log_step() {
echo -e "${BLUE}[STEP]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1"
}
# 错误处理函数
handle_error() {
log_error "脚本执行失败,退出码: $1"
log_error "失败的命令: $2"
exit 1
}
# 设置错误处理
set -e
trap 'handle_error $? "$BASH_COMMAND"' ERR
# 检查是否为root用户或具有sudo权限
check_permissions() {
if [[ $EUID -eq 0 ]]; then
SUDO_CMD=""
elif sudo -n true 2>/dev/null; then
SUDO_CMD="sudo"
else
log_error "需要root权限或sudo权限来执行此脚本"
exit 1
fi
}
# 获取用户输入
get_user_input() {
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE} Hive 4.0.1 集群部署脚本${NC}"
echo -e "${BLUE}========================================${NC}"
echo ""
# 获取主节点名称
while true; do
read -p "请输入主节点主机名 (如: master): " MASTER_NODE
if [[ -n "$MASTER_NODE" ]]; then
break
else
log_warn "主节点名称不能为空,请重新输入"
fi
done
# 获取从节点名称
read -p "请输入从节点主机名,用空格分隔 (如: slave1 slave2): " SLAVE_NODES_INPUT
if [[ -n "$SLAVE_NODES_INPUT" ]]; then
SLAVE_NODES=($SLAVE_NODES_INPUT)
else
SLAVE_NODES=()
log_warn "未输入从节点,将只在主节点部署"
fi
# 获取MariaDB密码
while true; do
read -s -p "请输入MariaDB root密码: " MARIADB_PASSWORD
echo ""
if [[ -n "$MARIADB_PASSWORD" ]]; then
break
else
log_warn "密码不能为空,请重新输入"
fi
done
# 确认配置
echo ""
echo -e "${BLUE}========== 部署配置确认 ==========${NC}"
echo "主节点: $MASTER_NODE"
echo "从节点: ${SLAVE_NODES[*]:-无}"
echo "MariaDB密码: ********"
echo ""
read -p "确认以上配置是否正确? (y/N): " CONFIRM
if [[ ! "$CONFIRM" =~ ^[Yy]$ ]]; then
log_info "取消部署,退出脚本"
exit 0
fi
}
# 检查Hadoop环境
check_hadoop() {
log_step "检查Hadoop环境"
if ! command -v hdfs &> /dev/null; then
log_error "未找到Hadoop命令,请确保Hadoop已正确安装并配置环境变量"
exit 1
fi
# 检查HDFS是否运行
if ! hdfs dfsadmin -report &> /dev/null; then
log_error "HDFS服务未运行,请先启动Hadoop集群"
exit 1
fi
log_info "Hadoop环境检查通过"
}
# 检查网络连通性
check_connectivity() {
log_step "检查节点网络连通性"
# 检查主节点
if ! ping -c 1 "$MASTER_NODE" &> /dev/null; then
log_error "无法连接到主节点: $MASTER_NODE"
exit 1
fi
# 检查从节点
for slave in "${SLAVE_NODES[@]}"; do
if ! ping -c 1 "$slave" &> /dev/null; then
log_error "无法连接到从节点: $slave"
exit 1
fi
done
log_info "所有节点网络连通性检查通过"
}
# 下载和安装Hive
install_hive() {
log_step "下载和安装Hive 4.0.1"
# 创建软件目录
$SUDO_CMD mkdir -p /opt/software
cd /opt/software
# 检查是否已下载
if [[ ! -f "apache-hive-4.0.1-bin.tar.gz" ]]; then
log_info "下载Hive 4.0.1..."
$SUDO_CMD wget https://dlcdn.apache.org/hive/hive-4.0.1/apache-hive-4.0.1-bin.tar.gz
else
log_info "Hive安装包已存在,跳过下载"
fi
# 检查是否已解压安装
if [[ ! -d "/opt/hive" ]]; then
log_info "解压并安装Hive..."
$SUDO_CMD tar -zxvf apache-hive-4.0.1-bin.tar.gz -C /opt/
$SUDO_CMD mv /opt/apache-hive-4.0.1-bin /opt/hive
else
log_info "Hive已安装,跳过解压"
fi
# 配置环境变量
if ! grep -q "HIVE_HOME" /etc/profile; then
log_info "配置Hive环境变量..."
$SUDO_CMD tee -a /etc/profile > /dev/null << EOF
# Hive Environment
export HIVE_HOME=/opt/hive
export PATH=\$PATH:\$HIVE_HOME/bin
EOF
else
log_info "Hive环境变量已配置,跳过"
fi
source /etc/profile
export HIVE_HOME=/opt/hive
export PATH=$PATH:$HIVE_HOME/bin
log_info "Hive安装完成"
}
# 安装和配置MariaDB
setup_mariadb() {
log_step "安装和配置MariaDB"
# 检查MariaDB是否已安装
if ! command -v mysql &> /dev/null; then
log_info "安装MariaDB..."
$SUDO_CMD yum install mariadb mariadb-server -y
else
log_info "MariaDB已安装,跳过安装"
fi
# 启动MariaDB服务
log_info "启动MariaDB服务..."
$SUDO_CMD systemctl start mariadb
$SUDO_CMD systemctl enable mariadb
# 检查MariaDB是否运行
if ! $SUDO_CMD systemctl is-active mariadb &> /dev/null; then
log_error "MariaDB服务启动失败"
exit 1
fi
# 配置MariaDB安全设置(自动化)
log_info "配置MariaDB安全设置..."
$SUDO_CMD mysql -e "
UPDATE mysql.user SET Password=PASSWORD('$MARIADB_PASSWORD') WHERE User='root';
DELETE FROM mysql.user WHERE User='';
DELETE FROM mysql.user WHERE User='root' AND Host NOT IN ('localhost', '127.0.0.1', '::1');
DROP DATABASE IF EXISTS test;
DELETE FROM mysql.db WHERE Db='test' OR Db='test\\_%';
FLUSH PRIVILEGES;
" 2>/dev/null || log_warn "MariaDB安全配置可能已完成"
log_info "MariaDB配置完成"
}
# 创建Hive数据库和用户
setup_hive_database() {
log_step "创建Hive数据库和用户权限"
# 创建Hive数据库和配置权限
mysql -uroot -p"$MARIADB_PASSWORD" << EOF 2>/dev/null || log_warn "数据库可能已存在"
CREATE DATABASE IF NOT EXISTS hive;
GRANT ALL PRIVILEGES ON hive.* TO 'root'@'%' IDENTIFIED BY '$MARIADB_PASSWORD';
GRANT ALL PRIVILEGES ON hive.* TO 'root'@'localhost' IDENTIFIED BY '$MARIADB_PASSWORD';
FLUSH PRIVILEGES;
EOF
log_info "Hive数据库配置完成"
}
# 下载MariaDB驱动
download_mariadb_driver() {
log_step "下载MariaDB JDBC驱动"
if [[ ! -f "/opt/hive/lib/mariadb-java-client-3.5.3.jar" ]]; then
log_info "下载MariaDB驱动..."
$SUDO_CMD wget -P /opt/hive/lib https://dlm.mariadb.com/4234102/Connectors/java/connector-java-3.5.3/mariadb-java-client-3.5.3.jar
else
log_info "MariaDB驱动已存在,跳过下载"
fi
log_info "MariaDB驱动配置完成"
}
# 创建Hive配置文件
create_hive_config() {
log_step "创建Hive配置文件"
$SUDO_CMD mkdir -p /opt/hive/conf
$SUDO_CMD tee /opt/hive/conf/hive-site.xml > /dev/null << EOF
<configuration>
<!-- Metastore仓库配置 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
<description>Hive默认仓库位置,用于存储托管表数据</description>
</property>
<!-- MariaDB数据库连接配置 -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mariadb://$MASTER_NODE:3306/hive?createDatabaseIfNotExist=true&amp;useSSL=false&amp;useUnicode=true&amp;characterEncoding=utf8</value>
<description>连接到Hive Metastore数据库的JDBC URL,使用MariaDB作为后端数据库</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.mariadb.jdbc.Driver</value>
<description>连接Hive Metastore数据库的JDBC驱动类名(MariaDB Connector/J 3.5.3)</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>连接Hive Metastore数据库的用户名</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>$MARIADB_PASSWORD</value>
<description>连接Hive Metastore数据库的密码</description>
</property>
<!-- Schema管理配置 -->
<property>
<name>datanucleus.schema.autoCreateAll</name>
<value>true</value>
<description>当设置为true时,DataNucleus将自动创建不存在的表和列</description>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>true</value>
<description>自动创建数据库schema</description>
</property>
<property>
<name>datanucleus.fixedDatastore</name>
<value>false</value>
<description>允许修改数据存储结构</description>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
<description>禁用schema验证,允许自动更新Metastore schema而无需手动干预</description>
</property>
<!-- Metastore服务配置 -->
<property>
<name>hive.metastore.uris</name>
<value>thrift://$MASTER_NODE:9083</value>
<description>Metastore Thrift服务地址,用于远程连接</description>
</property>
<property>
<name>hive.metastore.client.connect.retry.delay</name>
<value>5</value>
<description>客户端连接Metastore失败后重试的延迟时间(秒)</description>
</property>
<property>
<name>hive.metastore.client.socket.timeout</name>
<value>1800</value>
<description>客户端socket超时时间(秒)</description>
</property>
<!-- HiveServer2配置 -->
<property>
<name>hive.server2.enable.doAs</name>
<value>true</value>
<description>启用HiveServer2以提交查询的用户身份执行查询,而不是HiveServer2服务用户</description>
</property>
<property>
<name>hive.server2.authentication</name>
<value>NONE</value>
<description>指定HiveServer2连接的认证模式</description>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
<description>HiveServer2 Thrift服务端口</description>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>0.0.0.0</value>
<description>HiveServer2绑定所有网络接口</description>
</property>
<property>
<name>hive.server2.transport.mode</name>
<value>binary</value>
<description>HiveServer2传输模式</description>
</property>
</configuration>
EOF
log_info "Hive配置文件创建完成"
}
# 部署到从节点
deploy_to_slaves() {
if [[ ${#SLAVE_NODES[@]} -eq 0 ]]; then
log_info "无从节点,跳过从节点部署"
return
fi
log_step "部署Hive到从节点"
for slave in "${SLAVE_NODES[@]}"; do
log_info "部署到从节点: $slave"
# 复制Hive安装目录
if ! ssh "$slave" "test -d /opt/hive"; then
log_info "复制Hive到 $slave..."
scp -r /opt/hive "$slave":/opt/ 2>/dev/null || {
log_warn "使用sudo复制到 $slave..."
$SUDO_CMD scp -r /opt/hive "$slave":/opt/
}
else
log_info "$slave 上Hive已存在,跳过复制"
fi
# 配置环境变量
if ! ssh "$slave" "grep -q 'HIVE_HOME' /etc/profile"; then
log_info "在 $slave 上配置环境变量..."
ssh "$slave" "sudo tee -a /etc/profile > /dev/null" << 'EOF'
# Hive Environment
export HIVE_HOME=/opt/hive
export PATH=$PATH:$HIVE_HOME/bin
EOF
else
log_info "$slave 上环境变量已配置,跳过"
fi
done
log_info "从节点部署完成"
}
# 创建HDFS目录
setup_hdfs_directories() {
log_step "创建HDFS仓库目录"
# 检查目录是否存在
if ! hdfs dfs -test -d /user/hive/warehouse 2>/dev/null; then
log_info "创建Hive仓库目录..."
hdfs dfs -mkdir -p /user/hive/warehouse
hdfs dfs -chmod 777 /user/hive/warehouse
else
log_info "HDFS仓库目录已存在,跳过创建"
fi
log_info "HDFS目录配置完成"
}
# 初始化Schema
initialize_schema() {
log_step "初始化Metastore Schema"
# 创建日志目录
$SUDO_CMD mkdir -p /opt/hive/logs
# 检查Schema是否已初始化
if mysql -uroot -p"$MARIADB_PASSWORD" -e "USE hive; SHOW TABLES;" 2>/dev/null | grep -q "TBLS"; then
log_info "Metastore Schema已初始化,跳过"
else
log_info "初始化Metastore Schema..."
cd /opt/hive/bin
./schematool -dbType mysql -initSchema
fi
log_info "Schema初始化完成"
}
# 启动Hive服务
start_hive_services() {
log_step "启动Hive服务"
# 检查Metastore服务是否已运行
if pgrep -f "metastore" > /dev/null; then
log_info "Metastore服务已运行,跳过启动"
else
log_info "启动Metastore服务..."
nohup /opt/hive/bin/hive --service metastore > /opt/hive/logs/metastore.log 2>&1 &
sleep 10
fi
# 检查HiveServer2服务是否已运行
if pgrep -f "hiveserver2" > /dev/null; then
log_info "HiveServer2服务已运行,跳过启动"
else
log_info "启动HiveServer2服务..."
nohup /opt/hive/bin/hive --service hiveserver2 > /opt/hive/logs/hiveserver2.log 2>&1 &
sleep 20
fi
log_info "Hive服务启动完成"
}
# 验证部署
verify_deployment() {
log_step "验证Hive部署"
# 等待服务完全启动
log_info "等待服务完全启动..."
sleep 30
# 检查服务端口
log_info "检查服务端口..."
if netstat -tlnp | grep -q ":9083"; then
log_info "✓ Metastore服务端口9083正常"
else
log_warn "✗ Metastore服务端口9083未监听"
fi
if netstat -tlnp | grep -q ":10000"; then
log_info "✓ HiveServer2服务端口10000正常"
else
log_warn "✗ HiveServer2服务端口10000未监听"
fi
# 检查进程
log_info "检查Hive进程..."
if pgrep -f "metastore" > /dev/null; then
log_info "✓ Metastore进程运行正常"
else
log_warn "✗ Metastore进程未运行"
fi
if pgrep -f "hiveserver2" > /dev/null; then
log_info "✓ HiveServer2进程运行正常"
else
log_warn "✗ HiveServer2进程未运行"
fi
# 测试连接
log_info "测试Hive连接..."
if timeout 30 /opt/hive/bin/beeline -u "jdbc:hive2://$MASTER_NODE:10000" -n root -e "show databases;" &>/dev/null; then
log_info "✓ Hive连接测试成功"
else
log_warn "✗ Hive连接测试失败,可能需要等待更长时间"
fi
log_info "部署验证完成"
}
# 显示部署结果
show_deployment_summary() {
echo ""
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN} Hive 4.0.1 部署完成${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo "部署信息:"
echo "- 主节点: $MASTER_NODE"
echo "- 从节点: ${SLAVE_NODES[*]:-无}"
echo "- Hive版本: 4.0.1"
echo "- Metastore: MariaDB"
echo ""
echo "服务端口:"
echo "- Metastore: 9083"
echo "- HiveServer2: 10000"
echo "- MariaDB: 3306"
echo ""
echo "连接命令:"
echo "beeline -u \"jdbc:hive2://$MASTER_NODE:10000\" -n root"
echo ""
echo "日志位置:"
echo "- Metastore: /opt/hive/logs/metastore.log"
echo "- HiveServer2: /opt/hive/logs/hiveserver2.log"
echo ""
echo -e "${YELLOW}注意:如果连接失败,请等待1-2分钟后重试${NC}"
echo ""
}
# 主函数
main() {
echo -e "${BLUE}"
cat << 'EOF'
_ _ _ _____ _
| | | (_) | _ | | |
| |_| |___ _____ | |/' |_ _ _| |
| _ | \ \ / / _ \ | /| | | | | |
| | | | |\ V / __/ \ |_/ / |_| |_|
\_| |_/_| \_/ \___| \___/ \__,_(_)
EOF
echo -e "${NC}"
# 执行部署步骤
check_permissions
get_user_input
check_hadoop
check_connectivity
install_hive
setup_mariadb
setup_hive_database
download_mariadb_driver
create_hive_config
deploy_to_slaves
setup_hdfs_directories
initialize_schema
start_hive_services
verify_deployment
show_deployment_summary
log_info "Hive 4.0.1 部署脚本执行完成!"
}
# 执行主函数
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment