hadoop集群搭建
LiuSw Lv6

hadoop集群搭建

集群列表

服务器 地址 角色 备注
hadoop1 192.168.11.81 namenode datanode 32G 12C 800G
hadoop2 192.168.11.82 sencondarynode datanode 32G 12C 800G
hadoop3 192.168.11.83 datanode 32G 12C 800G
mysql8 192.168.11.72 元数据数据库

一.基础环境设置

关闭防火墙

1
2
systemctl stop firewalld
systemctl disable firewalld

关闭selinux

1
2
sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
setenforce 0

创建用户和组

1
2
useradd hadoop
groupadd hadoop

添加hosts

1
2
3
4
vi /etc/hosts
192.168.11.81 hadoop1
192.168.11.82 hadoop2
192.168.11.83 hadoop3

开启ssh免密登录

1
2
3
4
ssh-keygen -t rsa
ssh-copy-id hadoop1
ssh-copy-id hadoop2
ssh-copy-id hadoop3

安装jdk1.8

1
2
3
4
5
6
7
8
9
10
11
12
tar -xzvf jdk-8u271-linux-x64.tar.gz -C /
cat >>/etc/profile <<EOF
# java1.8
export JAVA_HOME=/jdk1.8.0_271
export JRE_HOME=/jdk1.8.0_271/jre
export CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
EOF
# 加载环境变量
source /etc/profile
# 验证java
java -version

注:其它节点也需要安装jdk

二.安装hadoop

下载hadoop包

1
2
3
http://archive.apache.org/dist/hadoop/core/

https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop

上传解压hadoop包

1
tar -zxvf hadoop-2.6.4.tar.gz -C /data

添加环境变量

1
2
3
4
5
6
7
cat >>/etc/profile <<EOF
# hadoop
export HADOOP_HOME=/data/hadoop-2.6.4
export PATH=$HADOOP_HOME/bin:$PATH
EOF

source /etc/profile

配置hadoop参数

配置文件放在$HADOOP_HOME/etc/hadoop/下

1
2
3
4
5
6
cd $HADOOP_HOME/etc/hadoop/
vi hadoop-env.sh
export JAVA_HOME=/jdk1.8.0_271

vi yarn-env.sh
export JAVA_HOME=/jdk1.8.0_271

配置工作节点

1
2
3
4
vi slaves
hadoop1
hadoop2
hadoop3

配置hadoop参数

1
vi core-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
 <?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>fs.defaultFS</name>

<value>hdfs://hadoop1:9000</value>
</property>

<property>
<name>hadoop.tmp.dir</name>
<value>/dfs/data/tmp</value>
</property>

<!-- HUE配置,没有可忽略 -->
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>

</configuration>
1
vi hdfs-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/dfs/nn/</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/dfs/dn/</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>hadoop1</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop2:50090</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
1
2
cp mapred-site.xml.template mapred-site.xml
vi mapred-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<final>true</final>
</property>
<property>
<name>mapreduce.jobtracker.http.address</name>
<value>hadoop3:50030</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop3:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop3:19888</value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>hadoop3:9001</value>
</property>
</configuration>
1
vi yarn-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->


<configuration>

<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop1</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>hadoop1:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>hadoop1:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>hadoop1:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>hadoop1:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>hadoop1:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>

注:以上配置需要复制到其它节点中

三.验证安装

格式化文件系统

注意:namenode第一次需要格式化文件系统,格式化后切忌不要再格式化!!!

namenode格式化文件系统

1
hadoop namenode -format

若没有hadoop命令可去$HADOOP_HOME/bin中寻找

启动服务

启动namenode进程

1
2
cd $HADOOP_HOME/sbin
hadoop-daemon.sh start namenode

启动datanode进程

1
2
cd $HADOOP_HOME/sbin
hadoop-daemon.sh start datanode

或者一次性启动

1
2
cd $HADOOP_HOME/sbin
./start-all.sh

检查节点配置情况

1
2
3
4
5
6
7
[root@hadoop1 ~]# jps

7616 NodeManager
7074 DataNode
7331 ResourceManager
13703 Jps
6939 NameNode

关闭服务

$HADOOP_HOME/sbin有相应的停止脚本

1
2
3
# 停止所有服务
cd $HADOOP_HOME/sbin
./stop-all.sh

四.web可视化

各个服务启动成功后会有相应的web界面

1
2
3
4
5
# 节点管理
http://192.168.11.81:8088

# 资源管理
http://192.168.11.81:50070

五.集群调优

 评论