基础环境配置

关闭防火墙

#关闭防火墙
systemctl stop firewalld
systemctl disable firewalld
systemctl status firewalld

关闭selinux

#永久关闭,一定要重启操作系统后生效。
sed -ri 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
#临时关闭,不重启操作系统,即刻生效。
setenforce 0 

配置主机名称

hostnamectl set-hostname master
hostnamectl set-hostname slave1
hostnamectl set-hostname slave2

设置Hosts

vi /etc/hosts
192.168.100.45 master master.liguoli.cn
192.168.100.46 slave1 slave1.liguoli.cn
192.168.100.47 slave2 slave2.liguoli.cn

配置ssh无密码访问

sudo yum -y install openssh 
systemctl enable sshd.service 
systemctl start  sshd.service

秘钥文件生成

## 删除以前的配置
rm -rf ~/.ssh

## 生成秘钥(直接回车)
ssh-keygen

## 复制秘钥到 master (三节点都执行)
ssh-copy-id master

## 分发秘钥(master 执行)
scp -r ~/.ssh/authorized_keys slave1:~/.ssh/
scp -r ~/.ssh/authorized_keys slave2:~/.ssh/

## 免密验证
ssh master
ssh slave1
ssh slave2

软件环境

JAVA

## 查看JAVA版本
java -version

## 安装JAVA
yum install java-1.8.0-openjdk* -y

## 搜索JAVA安装位置
find / -name 'java'

## yum 安装可忽略配置
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.392.b08-2.el7_9.x86_64
export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

Hadoop

## 下载
https://archive.apache.org/dist/hadoop/common/
https://archive.apache.org/dist/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz

cd /usr/local
tar -zxvf hadoop-3.3.6.tar.gz

## 配置环境变量

vi /etc/profile  
export HADOOP_HOME=/usr/local/hadoop-3.3.6
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

## 配置文件生效
source /etc/profile

Hadoop配置文件修改

修改core-site.xml文件

vi /usr/local/hadoop-3.3.6/etc/hadoop/core-site.xml
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://master:9000</value>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>file:/usr/local/hadoop-3.3.6/tmp</value>
        <description>Abase for other temporary directories.</description>
    </property>
    <property> 
        <name>hadoop.http.staticuser.user</name> 
        <value>root</value> 
    </property> 
    <!-- 整合hive -->
    <property> 
        <name>hadoop.proxyuser.root.hosts</name> 
        <value>*</value> 
    </property> 
    <property> 
        <name>hadoop.proxyuser.root.groups</name> 
        <value>*</value> 
    </property>
</configuration>

修改hadoop-env.sh文件

vi /usr/local/hadoop-3.3.6/etc/hadoop/hadoop-env.sh

## 修改JAVA_HOME
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.392.b08-2.el7_9.x86_64

修改hdfs-site.xml文件

vi /usr/local/hadoop-3.3.6/etc/hadoop/hdfs-site.xml

<configuration>
  <property>
    <name>dfs.namenode.secondary.http-address</name>
    <value>master:50090</value>
  </property>
  <property>
    <name>dfs.replication</name>
    <value>2</value>
  </property>
  <property>
    <name>dfs.namenode.name.dir</name>
    <value>file:/usr/local/hadoop-3.3.6/tmp/dfs/name</value>
  </property>
  <property>
    <name>dfs.datanode.data.dir</name>
    <value>file:/usr/local/hadoop-3.3.6/tmp/dfs/data</value>
  </property>
</configuration>

修改mapred-site.xml文件

vi /usr/local/hadoop-3.3.6/etc/hadoop/mapred-site.xml

<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
        </property>
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>master:10020</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>master:19888</value>
    </property>
</configuration>

修改workers文件

vi /usr/local/hadoop-3.3.6/etc/hadoop/workers
##删除原有内容

slave1
slave2

修改yarn-site.xml文件

vi /usr/local/hadoop-3.3.6/etc/hadoop/yarn-site.xml

<configuration>
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>master</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
</configuration>

配置hadoop-env.sh

vi /usr/local/hadoop-3.3.6/etc/hadoop/hadoop-env.sh 

export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root

分发配置到个个节点

## 主节点运行

scp -r /etc/profile root@slave1:/etc/profile
scp -r /etc/profile root@slave2:/etc/profile

scp -r /usr/local/hadoop-3.3.6 root@slave1:/usr/local
scp -r /usr/local/hadoop-3.3.6 root@slave2:/usr/local

##配置生效

ssh slave1
source /etc/profile

ssh slave2
source /etc/profile

启动(主节点 master)

首次执行,格式化

cd /usr/local/hadoop-3.3.6
hdfs namenode -format

### common.Storage: Storage directory /usr/local/hadoop-3.3.6/tmp/dfs/name has been successfully formatted. 看到这一句表示成功

启动

cd /usr/local/hadoop-3.3.6
start-dfs.sh 
start-yarn.sh  
#或者 
start-all.sh

## 验证
master
jps 

ResourceManager
NameNode
SecondaryNameNode


slave1
jps

NodeManager
DataNode

slave2
jps

NodeManager
DataNode

http://master:9870
http://master:8088/

mysql 安装

http://www.liguoli.cn/view/my/32.html

## mysql 安装后创建远程用户访问

hive 远程模式

安装

## 下载
https://mirrors.tuna.tsinghua.edu.cn/apache/hive/    
https://mirrors.tuna.tsinghua.edu.cn/apache/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz

## 安装
cd /usr/local
tar -zxvf apache-hive-3.1.3-bin.tar.gz 

## 解决GUAVA版本差异
rm -rf apache-hive-3.1.3/lib/guava-19.0.jar
cp ../hadoop-3.3.6/share/hadoop/common/lib/guava-27.0-jre.jar ./apache-hive-3.1.3/lib/

修改配置文件

cd /usr/local/apache-hive-3.1.3-bin/conf
cp hive-env.sh.template hive-env.sh

vi hive-env.sh
export HADOOP_HOME=/usr/local/hadoop-3.3.6
export HIVE_CONF_DIR=/usr/local/apache-hive-3.1.3-bin/conf
export HIVE_AUX_JARS_PATH=/usr/local/apache-hive-3.1.3-bin/lib

vi hive-site.xml
<configuration>
    <!-- 存储元数据mysql相关配置-->
    <property>
        <name>javax.jdo.option.ConnectionURL</name>
        <value>jdbc:mysql://master:3306/hive3?createDatabaseIfNotExist=true&amp;useSSL=false&amp;useUnicode=true&amp;characterEncoding=UTF-8</value>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.jdbc.Driver</value>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionUserName</name>
        <value>root</value>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionPassword</name>
        <value>root</value>
    </property>

    <!-- H2S运行绑定host-->
    <property>
        <name>hive.server2.thrift.bind.host</name>
        <value>master</value>
    </property>

    <!-- 远程模式部署metastore metastore地址-->
    <property>
        <name>hive.metastore.uris</name>
        <value>thrift://master:9083</value>
    </property>

    <!-- 关闭元数据存储授权-->
    <property>
        <name>hive.metastore.event.db.notification.api.auth</name>
        <value>false</value>
    </property>
</configuration>

mysql jdbc驱动

## 下载驱动
https://downloads.mysql.com/archives/c-j/
下载 mysql-connector-java-5.1.49.tar.gz
## 上传 mysql jdbc驱动到hive安装包lib下
mysql-connector-java-5.1.49.jar

初始化元数据

cd /usr/local/apache-hive-3.1.3-bin/bin 
./schematool -initSchema -dbType mysql -verbos
#初始化成功会在mysql中创建74张表

在hdfs创建hive存储目录

hadoop fs -mkdir /tmp
hadoop fs -mkdir -p /user/hive/warehouse
hadoop fs -chmod g+w /tmp
hadoop fs -chmod g+w /user/hive/warehouse

启动hive

1.启动metastore服务 
#前台启动 关闭ctrl+c
/usr/local/apache-hive-3.1.3-bin/bin/hive --service metastore
#前台启动开启debug日志
/usr/local/apache-hive-3.1.3-bin/bin/hive --service metastore --hiveconf hive.root.logger=DEBUG,console
#后台启动 进程挂起  关闭使用jps+kill -9 
nohup /usr/local/apache-hive-3.1.3-bin/bin/hive --service metastore &
2.启动hiveserver2服务
nohup /usr/local/apache-hive-3.1.3-bin/bin/hive --service hiveserver2 &
#注意 启动hiveserver2需要一定时间 不要启动之后立即beeline连接 可能连接不上
3.beeline 客户端连接
## 拷贝master安装包到beeline客户端机器上(slave2)
scp -r /usr/local/apache-hive-3.1.3-bin/ root@slave2:/usr/local/
## 连接访问
ssh slave2
/usr/local/apache-hive-3.1.3-bin/bin/beeline
beeline> ! connect jdbc:hive2://master:10000
beeline> root
beeline> 直接回车不要密码
©2019 Somore 豫ICP备19009951号 sqlixiaoli@163.com