Last active
June 4, 2025 03:11
-
-
Save kangvcar/2aa0fc38b27694b656d9591e07ac56c2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| ################################################# | |
| # Hive 4.0.1 自动部署脚本 v1.0 | |
| # | |
| # 功能说明: | |
| # 1. 自动下载和安装Hive 4.0.1 | |
| # 2. 配置MariaDB作为Metastore | |
| # 3. 初始化Schema并启动服务 | |
| # 4. 支持多节点集群部署 | |
| # 5. 具有幂等性,可重复执行 | |
| # | |
| # 使用前提: | |
| # - 已完成Hadoop集群部署 | |
| # - 集群节点之间可免密SSH | |
| # - 所有节点具有sudo权限 | |
| # | |
| # 使用方法: | |
| # chmod +x deploy_hive.sh | |
| # ./deploy_hive.sh | |
| # | |
| # 作者:HE | |
| # 版本:1.0 | |
| # 日期:20250604 | |
| ################################################# | |
| # 设置颜色输出 | |
| RED='\033[0;31m' | |
| GREEN='\033[0;32m' | |
| YELLOW='\033[1;33m' | |
| BLUE='\033[0;34m' | |
| NC='\033[0m' # No Color | |
| # 日志函数 | |
| log_info() { | |
| echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1" | |
| } | |
| log_warn() { | |
| echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1" | |
| } | |
| log_error() { | |
| echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1" | |
| } | |
| log_step() { | |
| echo -e "${BLUE}[STEP]${NC} $(date '+%Y-%m-%d %H:%M:%S') - $1" | |
| } | |
| # 错误处理函数 | |
| handle_error() { | |
| log_error "脚本执行失败,退出码: $1" | |
| log_error "失败的命令: $2" | |
| exit 1 | |
| } | |
| # 设置错误处理 | |
| set -e | |
| trap 'handle_error $? "$BASH_COMMAND"' ERR | |
| # 检查是否为root用户或具有sudo权限 | |
| check_permissions() { | |
| if [[ $EUID -eq 0 ]]; then | |
| SUDO_CMD="" | |
| elif sudo -n true 2>/dev/null; then | |
| SUDO_CMD="sudo" | |
| else | |
| log_error "需要root权限或sudo权限来执行此脚本" | |
| exit 1 | |
| fi | |
| } | |
| # 获取用户输入 | |
| get_user_input() { | |
| echo -e "${BLUE}========================================${NC}" | |
| echo -e "${BLUE} Hive 4.0.1 集群部署脚本${NC}" | |
| echo -e "${BLUE}========================================${NC}" | |
| echo "" | |
| # 获取主节点名称 | |
| while true; do | |
| read -p "请输入主节点主机名 (如: master): " MASTER_NODE | |
| if [[ -n "$MASTER_NODE" ]]; then | |
| break | |
| else | |
| log_warn "主节点名称不能为空,请重新输入" | |
| fi | |
| done | |
| # 获取从节点名称 | |
| read -p "请输入从节点主机名,用空格分隔 (如: slave1 slave2): " SLAVE_NODES_INPUT | |
| if [[ -n "$SLAVE_NODES_INPUT" ]]; then | |
| SLAVE_NODES=($SLAVE_NODES_INPUT) | |
| else | |
| SLAVE_NODES=() | |
| log_warn "未输入从节点,将只在主节点部署" | |
| fi | |
| # 获取MariaDB密码 | |
| while true; do | |
| read -s -p "请输入MariaDB root密码: " MARIADB_PASSWORD | |
| echo "" | |
| if [[ -n "$MARIADB_PASSWORD" ]]; then | |
| break | |
| else | |
| log_warn "密码不能为空,请重新输入" | |
| fi | |
| done | |
| # 确认配置 | |
| echo "" | |
| echo -e "${BLUE}========== 部署配置确认 ==========${NC}" | |
| echo "主节点: $MASTER_NODE" | |
| echo "从节点: ${SLAVE_NODES[*]:-无}" | |
| echo "MariaDB密码: ********" | |
| echo "" | |
| read -p "确认以上配置是否正确? (y/N): " CONFIRM | |
| if [[ ! "$CONFIRM" =~ ^[Yy]$ ]]; then | |
| log_info "取消部署,退出脚本" | |
| exit 0 | |
| fi | |
| } | |
| # 检查Hadoop环境 | |
| check_hadoop() { | |
| log_step "检查Hadoop环境" | |
| if ! command -v hdfs &> /dev/null; then | |
| log_error "未找到Hadoop命令,请确保Hadoop已正确安装并配置环境变量" | |
| exit 1 | |
| fi | |
| # 检查HDFS是否运行 | |
| if ! hdfs dfsadmin -report &> /dev/null; then | |
| log_error "HDFS服务未运行,请先启动Hadoop集群" | |
| exit 1 | |
| fi | |
| log_info "Hadoop环境检查通过" | |
| } | |
| # 检查网络连通性 | |
| check_connectivity() { | |
| log_step "检查节点网络连通性" | |
| # 检查主节点 | |
| if ! ping -c 1 "$MASTER_NODE" &> /dev/null; then | |
| log_error "无法连接到主节点: $MASTER_NODE" | |
| exit 1 | |
| fi | |
| # 检查从节点 | |
| for slave in "${SLAVE_NODES[@]}"; do | |
| if ! ping -c 1 "$slave" &> /dev/null; then | |
| log_error "无法连接到从节点: $slave" | |
| exit 1 | |
| fi | |
| done | |
| log_info "所有节点网络连通性检查通过" | |
| } | |
| # 下载和安装Hive | |
| install_hive() { | |
| log_step "下载和安装Hive 4.0.1" | |
| # 创建软件目录 | |
| $SUDO_CMD mkdir -p /opt/software | |
| cd /opt/software | |
| # 检查是否已下载 | |
| if [[ ! -f "apache-hive-4.0.1-bin.tar.gz" ]]; then | |
| log_info "下载Hive 4.0.1..." | |
| $SUDO_CMD wget https://dlcdn.apache.org/hive/hive-4.0.1/apache-hive-4.0.1-bin.tar.gz | |
| else | |
| log_info "Hive安装包已存在,跳过下载" | |
| fi | |
| # 检查是否已解压安装 | |
| if [[ ! -d "/opt/hive" ]]; then | |
| log_info "解压并安装Hive..." | |
| $SUDO_CMD tar -zxvf apache-hive-4.0.1-bin.tar.gz -C /opt/ | |
| $SUDO_CMD mv /opt/apache-hive-4.0.1-bin /opt/hive | |
| else | |
| log_info "Hive已安装,跳过解压" | |
| fi | |
| # 配置环境变量 | |
| if ! grep -q "HIVE_HOME" /etc/profile; then | |
| log_info "配置Hive环境变量..." | |
| $SUDO_CMD tee -a /etc/profile > /dev/null << EOF | |
| # Hive Environment | |
| export HIVE_HOME=/opt/hive | |
| export PATH=\$PATH:\$HIVE_HOME/bin | |
| EOF | |
| else | |
| log_info "Hive环境变量已配置,跳过" | |
| fi | |
| source /etc/profile | |
| export HIVE_HOME=/opt/hive | |
| export PATH=$PATH:$HIVE_HOME/bin | |
| log_info "Hive安装完成" | |
| } | |
| # 安装和配置MariaDB | |
| setup_mariadb() { | |
| log_step "安装和配置MariaDB" | |
| # 检查MariaDB是否已安装 | |
| if ! command -v mysql &> /dev/null; then | |
| log_info "安装MariaDB..." | |
| $SUDO_CMD yum install mariadb mariadb-server -y | |
| else | |
| log_info "MariaDB已安装,跳过安装" | |
| fi | |
| # 启动MariaDB服务 | |
| log_info "启动MariaDB服务..." | |
| $SUDO_CMD systemctl start mariadb | |
| $SUDO_CMD systemctl enable mariadb | |
| # 检查MariaDB是否运行 | |
| if ! $SUDO_CMD systemctl is-active mariadb &> /dev/null; then | |
| log_error "MariaDB服务启动失败" | |
| exit 1 | |
| fi | |
| # 配置MariaDB安全设置(自动化) | |
| log_info "配置MariaDB安全设置..." | |
| $SUDO_CMD mysql -e " | |
| UPDATE mysql.user SET Password=PASSWORD('$MARIADB_PASSWORD') WHERE User='root'; | |
| DELETE FROM mysql.user WHERE User=''; | |
| DELETE FROM mysql.user WHERE User='root' AND Host NOT IN ('localhost', '127.0.0.1', '::1'); | |
| DROP DATABASE IF EXISTS test; | |
| DELETE FROM mysql.db WHERE Db='test' OR Db='test\\_%'; | |
| FLUSH PRIVILEGES; | |
| " 2>/dev/null || log_warn "MariaDB安全配置可能已完成" | |
| log_info "MariaDB配置完成" | |
| } | |
| # 创建Hive数据库和用户 | |
| setup_hive_database() { | |
| log_step "创建Hive数据库和用户权限" | |
| # 创建Hive数据库和配置权限 | |
| mysql -uroot -p"$MARIADB_PASSWORD" << EOF 2>/dev/null || log_warn "数据库可能已存在" | |
| CREATE DATABASE IF NOT EXISTS hive; | |
| GRANT ALL PRIVILEGES ON hive.* TO 'root'@'%' IDENTIFIED BY '$MARIADB_PASSWORD'; | |
| GRANT ALL PRIVILEGES ON hive.* TO 'root'@'localhost' IDENTIFIED BY '$MARIADB_PASSWORD'; | |
| FLUSH PRIVILEGES; | |
| EOF | |
| log_info "Hive数据库配置完成" | |
| } | |
| # 下载MariaDB驱动 | |
| download_mariadb_driver() { | |
| log_step "下载MariaDB JDBC驱动" | |
| if [[ ! -f "/opt/hive/lib/mariadb-java-client-3.5.3.jar" ]]; then | |
| log_info "下载MariaDB驱动..." | |
| $SUDO_CMD wget -P /opt/hive/lib https://dlm.mariadb.com/4234102/Connectors/java/connector-java-3.5.3/mariadb-java-client-3.5.3.jar | |
| else | |
| log_info "MariaDB驱动已存在,跳过下载" | |
| fi | |
| log_info "MariaDB驱动配置完成" | |
| } | |
| # 创建Hive配置文件 | |
| create_hive_config() { | |
| log_step "创建Hive配置文件" | |
| $SUDO_CMD mkdir -p /opt/hive/conf | |
| $SUDO_CMD tee /opt/hive/conf/hive-site.xml > /dev/null << EOF | |
| <configuration> | |
| <!-- Metastore仓库配置 --> | |
| <property> | |
| <name>hive.metastore.warehouse.dir</name> | |
| <value>/user/hive/warehouse</value> | |
| <description>Hive默认仓库位置,用于存储托管表数据</description> | |
| </property> | |
| <!-- MariaDB数据库连接配置 --> | |
| <property> | |
| <name>javax.jdo.option.ConnectionURL</name> | |
| <value>jdbc:mariadb://$MASTER_NODE:3306/hive?createDatabaseIfNotExist=true&useSSL=false&useUnicode=true&characterEncoding=utf8</value> | |
| <description>连接到Hive Metastore数据库的JDBC URL,使用MariaDB作为后端数据库</description> | |
| </property> | |
| <property> | |
| <name>javax.jdo.option.ConnectionDriverName</name> | |
| <value>org.mariadb.jdbc.Driver</value> | |
| <description>连接Hive Metastore数据库的JDBC驱动类名(MariaDB Connector/J 3.5.3)</description> | |
| </property> | |
| <property> | |
| <name>javax.jdo.option.ConnectionUserName</name> | |
| <value>root</value> | |
| <description>连接Hive Metastore数据库的用户名</description> | |
| </property> | |
| <property> | |
| <name>javax.jdo.option.ConnectionPassword</name> | |
| <value>$MARIADB_PASSWORD</value> | |
| <description>连接Hive Metastore数据库的密码</description> | |
| </property> | |
| <!-- Schema管理配置 --> | |
| <property> | |
| <name>datanucleus.schema.autoCreateAll</name> | |
| <value>true</value> | |
| <description>当设置为true时,DataNucleus将自动创建不存在的表和列</description> | |
| </property> | |
| <property> | |
| <name>datanucleus.autoCreateSchema</name> | |
| <value>true</value> | |
| <description>自动创建数据库schema</description> | |
| </property> | |
| <property> | |
| <name>datanucleus.fixedDatastore</name> | |
| <value>false</value> | |
| <description>允许修改数据存储结构</description> | |
| </property> | |
| <property> | |
| <name>hive.metastore.schema.verification</name> | |
| <value>false</value> | |
| <description>禁用schema验证,允许自动更新Metastore schema而无需手动干预</description> | |
| </property> | |
| <!-- Metastore服务配置 --> | |
| <property> | |
| <name>hive.metastore.uris</name> | |
| <value>thrift://$MASTER_NODE:9083</value> | |
| <description>Metastore Thrift服务地址,用于远程连接</description> | |
| </property> | |
| <property> | |
| <name>hive.metastore.client.connect.retry.delay</name> | |
| <value>5</value> | |
| <description>客户端连接Metastore失败后重试的延迟时间(秒)</description> | |
| </property> | |
| <property> | |
| <name>hive.metastore.client.socket.timeout</name> | |
| <value>1800</value> | |
| <description>客户端socket超时时间(秒)</description> | |
| </property> | |
| <!-- HiveServer2配置 --> | |
| <property> | |
| <name>hive.server2.enable.doAs</name> | |
| <value>true</value> | |
| <description>启用HiveServer2以提交查询的用户身份执行查询,而不是HiveServer2服务用户</description> | |
| </property> | |
| <property> | |
| <name>hive.server2.authentication</name> | |
| <value>NONE</value> | |
| <description>指定HiveServer2连接的认证模式</description> | |
| </property> | |
| <property> | |
| <name>hive.server2.thrift.port</name> | |
| <value>10000</value> | |
| <description>HiveServer2 Thrift服务端口</description> | |
| </property> | |
| <property> | |
| <name>hive.server2.thrift.bind.host</name> | |
| <value>0.0.0.0</value> | |
| <description>HiveServer2绑定所有网络接口</description> | |
| </property> | |
| <property> | |
| <name>hive.server2.transport.mode</name> | |
| <value>binary</value> | |
| <description>HiveServer2传输模式</description> | |
| </property> | |
| </configuration> | |
| EOF | |
| log_info "Hive配置文件创建完成" | |
| } | |
| # 部署到从节点 | |
| deploy_to_slaves() { | |
| if [[ ${#SLAVE_NODES[@]} -eq 0 ]]; then | |
| log_info "无从节点,跳过从节点部署" | |
| return | |
| fi | |
| log_step "部署Hive到从节点" | |
| for slave in "${SLAVE_NODES[@]}"; do | |
| log_info "部署到从节点: $slave" | |
| # 复制Hive安装目录 | |
| if ! ssh "$slave" "test -d /opt/hive"; then | |
| log_info "复制Hive到 $slave..." | |
| scp -r /opt/hive "$slave":/opt/ 2>/dev/null || { | |
| log_warn "使用sudo复制到 $slave..." | |
| $SUDO_CMD scp -r /opt/hive "$slave":/opt/ | |
| } | |
| else | |
| log_info "$slave 上Hive已存在,跳过复制" | |
| fi | |
| # 配置环境变量 | |
| if ! ssh "$slave" "grep -q 'HIVE_HOME' /etc/profile"; then | |
| log_info "在 $slave 上配置环境变量..." | |
| ssh "$slave" "sudo tee -a /etc/profile > /dev/null" << 'EOF' | |
| # Hive Environment | |
| export HIVE_HOME=/opt/hive | |
| export PATH=$PATH:$HIVE_HOME/bin | |
| EOF | |
| else | |
| log_info "$slave 上环境变量已配置,跳过" | |
| fi | |
| done | |
| log_info "从节点部署完成" | |
| } | |
| # 创建HDFS目录 | |
| setup_hdfs_directories() { | |
| log_step "创建HDFS仓库目录" | |
| # 检查目录是否存在 | |
| if ! hdfs dfs -test -d /user/hive/warehouse 2>/dev/null; then | |
| log_info "创建Hive仓库目录..." | |
| hdfs dfs -mkdir -p /user/hive/warehouse | |
| hdfs dfs -chmod 777 /user/hive/warehouse | |
| else | |
| log_info "HDFS仓库目录已存在,跳过创建" | |
| fi | |
| log_info "HDFS目录配置完成" | |
| } | |
| # 初始化Schema | |
| initialize_schema() { | |
| log_step "初始化Metastore Schema" | |
| # 创建日志目录 | |
| $SUDO_CMD mkdir -p /opt/hive/logs | |
| # 检查Schema是否已初始化 | |
| if mysql -uroot -p"$MARIADB_PASSWORD" -e "USE hive; SHOW TABLES;" 2>/dev/null | grep -q "TBLS"; then | |
| log_info "Metastore Schema已初始化,跳过" | |
| else | |
| log_info "初始化Metastore Schema..." | |
| cd /opt/hive/bin | |
| ./schematool -dbType mysql -initSchema | |
| fi | |
| log_info "Schema初始化完成" | |
| } | |
| # 启动Hive服务 | |
| start_hive_services() { | |
| log_step "启动Hive服务" | |
| # 检查Metastore服务是否已运行 | |
| if pgrep -f "metastore" > /dev/null; then | |
| log_info "Metastore服务已运行,跳过启动" | |
| else | |
| log_info "启动Metastore服务..." | |
| nohup /opt/hive/bin/hive --service metastore > /opt/hive/logs/metastore.log 2>&1 & | |
| sleep 10 | |
| fi | |
| # 检查HiveServer2服务是否已运行 | |
| if pgrep -f "hiveserver2" > /dev/null; then | |
| log_info "HiveServer2服务已运行,跳过启动" | |
| else | |
| log_info "启动HiveServer2服务..." | |
| nohup /opt/hive/bin/hive --service hiveserver2 > /opt/hive/logs/hiveserver2.log 2>&1 & | |
| sleep 20 | |
| fi | |
| log_info "Hive服务启动完成" | |
| } | |
| # 验证部署 | |
| verify_deployment() { | |
| log_step "验证Hive部署" | |
| # 等待服务完全启动 | |
| log_info "等待服务完全启动..." | |
| sleep 30 | |
| # 检查服务端口 | |
| log_info "检查服务端口..." | |
| if netstat -tlnp | grep -q ":9083"; then | |
| log_info "✓ Metastore服务端口9083正常" | |
| else | |
| log_warn "✗ Metastore服务端口9083未监听" | |
| fi | |
| if netstat -tlnp | grep -q ":10000"; then | |
| log_info "✓ HiveServer2服务端口10000正常" | |
| else | |
| log_warn "✗ HiveServer2服务端口10000未监听" | |
| fi | |
| # 检查进程 | |
| log_info "检查Hive进程..." | |
| if pgrep -f "metastore" > /dev/null; then | |
| log_info "✓ Metastore进程运行正常" | |
| else | |
| log_warn "✗ Metastore进程未运行" | |
| fi | |
| if pgrep -f "hiveserver2" > /dev/null; then | |
| log_info "✓ HiveServer2进程运行正常" | |
| else | |
| log_warn "✗ HiveServer2进程未运行" | |
| fi | |
| # 测试连接 | |
| log_info "测试Hive连接..." | |
| if timeout 30 /opt/hive/bin/beeline -u "jdbc:hive2://$MASTER_NODE:10000" -n root -e "show databases;" &>/dev/null; then | |
| log_info "✓ Hive连接测试成功" | |
| else | |
| log_warn "✗ Hive连接测试失败,可能需要等待更长时间" | |
| fi | |
| log_info "部署验证完成" | |
| } | |
| # 显示部署结果 | |
| show_deployment_summary() { | |
| echo "" | |
| echo -e "${GREEN}========================================${NC}" | |
| echo -e "${GREEN} Hive 4.0.1 部署完成${NC}" | |
| echo -e "${GREEN}========================================${NC}" | |
| echo "" | |
| echo "部署信息:" | |
| echo "- 主节点: $MASTER_NODE" | |
| echo "- 从节点: ${SLAVE_NODES[*]:-无}" | |
| echo "- Hive版本: 4.0.1" | |
| echo "- Metastore: MariaDB" | |
| echo "" | |
| echo "服务端口:" | |
| echo "- Metastore: 9083" | |
| echo "- HiveServer2: 10000" | |
| echo "- MariaDB: 3306" | |
| echo "" | |
| echo "连接命令:" | |
| echo "beeline -u \"jdbc:hive2://$MASTER_NODE:10000\" -n root" | |
| echo "" | |
| echo "日志位置:" | |
| echo "- Metastore: /opt/hive/logs/metastore.log" | |
| echo "- HiveServer2: /opt/hive/logs/hiveserver2.log" | |
| echo "" | |
| echo -e "${YELLOW}注意:如果连接失败,请等待1-2分钟后重试${NC}" | |
| echo "" | |
| } | |
| # 主函数 | |
| main() { | |
| echo -e "${BLUE}" | |
| cat << 'EOF' | |
| _ _ _ _____ _ | |
| | | | (_) | _ | | | | |
| | |_| |___ _____ | |/' |_ _ _| | | |
| | _ | \ \ / / _ \ | /| | | | | | | |
| | | | | |\ V / __/ \ |_/ / |_| |_| | |
| \_| |_/_| \_/ \___| \___/ \__,_(_) | |
| EOF | |
| echo -e "${NC}" | |
| # 执行部署步骤 | |
| check_permissions | |
| get_user_input | |
| check_hadoop | |
| check_connectivity | |
| install_hive | |
| setup_mariadb | |
| setup_hive_database | |
| download_mariadb_driver | |
| create_hive_config | |
| deploy_to_slaves | |
| setup_hdfs_directories | |
| initialize_schema | |
| start_hive_services | |
| verify_deployment | |
| show_deployment_summary | |
| log_info "Hive 4.0.1 部署脚本执行完成!" | |
| } | |
| # 执行主函数 | |
| main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment