Hadoop集群搭建与调优
LiuSw Lv6

hadoop集群搭建

集群列表

服务器 地址 角色 备注
hadoop1 192.168.11.81 namenode datanode 32G 12C 800G
hadoop2 192.168.11.82 sencondarynode datanode 32G 12C 800G
hadoop3 192.168.11.83 datanode 32G 12C 800G
mysql8 192.168.11.72 元数据数据库

一.基础环境设置

关闭防火墙

1
2
systemctl stop firewalld
systemctl disable firewalld

关闭selinux

1
2
sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
setenforce 0

创建用户和组

1
2
useradd hadoop
groupadd hadoop

添加hosts

1
2
3
4
vi /etc/hosts
192.168.11.81 hadoop1
192.168.11.82 hadoop2
192.168.11.83 hadoop3

开启ssh免密登录

1
2
3
4
ssh-keygen -t rsa
ssh-copy-id hadoop1
ssh-copy-id hadoop2
ssh-copy-id hadoop3

安装jdk1.8

1
2
3
4
5
6
7
8
9
10
11
12
tar -xzvf jdk-8u271-linux-x64.tar.gz -C /
cat >>/etc/profile <<EOF
# java1.8
export JAVA_HOME=/jdk1.8.0_271
export JRE_HOME=/jdk1.8.0_271/jre
export CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
EOF
# 加载环境变量
source /etc/profile
# 验证java
java -version

注:其它节点也需要安装jdk

二.安装hadoop

下载hadoop包

1
2
3
http://archive.apache.org/dist/hadoop/core/

https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop

上传解压hadoop包

1
tar -zxvf hadoop-2.6.4.tar.gz -C /data

添加环境变量

1
2
3
4
5
6
7
cat >>/etc/profile <<EOF
# hadoop
export HADOOP_HOME=/data/hadoop-2.6.4
export PATH=$HADOOP_HOME/bin:$PATH
EOF

source /etc/profile

配置hadoop参数

配置文件放在$HADOOP_HOME/etc/hadoop/下

1
2
3
4
5
6
cd $HADOOP_HOME/etc/hadoop/
vi hadoop-env.sh
export JAVA_HOME=/jdk1.8.0_271

vi yarn-env.sh
export JAVA_HOME=/jdk1.8.0_271

配置工作节点

1
2
3
4
vi slaves
hadoop1
hadoop2
hadoop3

配置hadoop参数

1
vi core-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
 <?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>fs.defaultFS</name>

<value>hdfs://hadoop1:9000</value>
</property>

<property>
<name>hadoop.tmp.dir</name>
<value>/dfs/data/tmp</value>
</property>

<!-- HUE配置,没有可忽略 -->
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>

</configuration>
1
vi hdfs-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/dfs/nn/</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/dfs/dn/</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>hadoop1</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop2:50090</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
1
2
cp mapred-site.xml.template mapred-site.xml
vi mapred-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<final>true</final>
</property>
<property>
<name>mapreduce.jobtracker.http.address</name>
<value>hadoop3:50030</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop3:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop3:19888</value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>hadoop3:9001</value>
</property>
</configuration>
1
vi yarn-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->


<configuration>

<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop1</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>hadoop1:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>hadoop1:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>hadoop1:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>hadoop1:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>hadoop1:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>

注:以上配置需要复制到其它节点中

三.验证安装

格式化文件系统

注意:namenode第一次需要格式化文件系统,格式化后切忌不要再格式化!!!

namenode格式化文件系统

1
hadoop namenode -format

若没有hadoop命令可去$HADOOP_HOME/bin中寻找

同步Namenode数据(其他NN节点)

1
hdfs namenode -bootstrapStandby

启动服务

启动namenode进程

1
2
cd $HADOOP_HOME/sbin
hadoop-daemon.sh start namenode

启动datanode进程

1
2
cd $HADOOP_HOME/sbin
hadoop-daemon.sh start datanode

或者一次性启动

1
2
cd $HADOOP_HOME/sbin
./start-all.sh

其他启动命令

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 初始化ZKFC
hdfs zkfc -formatZK

# 启动初始化的NameNode
hdfs --daemon start namenode

# 启动JournalNode
hdfs --daemon start journalnode

# 停止NameNode服务
hdfs --daemon stop namenode

# 停止JournalNode服务
hdfs --daemon stop journalnode

# 启动hdfs
start-dfs.sh

检查节点配置情况

1
2
3
4
5
6
7
[root@hadoop1 ~]# jps

7616 NodeManager
7074 DataNode
7331 ResourceManager
13703 Jps
6939 NameNode

关闭服务

$HADOOP_HOME/sbin有相应的停止脚本

1
2
3
# 停止所有服务
cd $HADOOP_HOME/sbin
./stop-all.sh

四.web可视化

各个服务启动成功后会有相应的web界面

1
2
3
4
5
# 节点管理
http://192.168.11.81:8088

# 资源管理
http://192.168.11.81:50070

五.配置文件示例

以下为hadoop3的配置(仅供参考,配置低参数要适当调小)

hadoop-env.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
export JAVA_HOME=/java/jdk1.8.0_191

export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_JOURNALNODE_USER=root
export HDFS_ZKFC_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root

export HDFS_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS -Xmx1024m"
export HDFS_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS -Xmx4096m"
export HADOOP_PID_DIR=/opt/hadoop-3.1.3/pid

core-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

<!-- 把多个 NameNode 的地址组装成一个集群mycluster -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>

<!-- 指定 hadoop 数据的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop-3.1.3/data</value>
</property>

<!-- 配置 HDFS 网页登录使用的静态用户 -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>

<!-- 设置HDFS web UI用户身份 -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>

<!-- 整合hive 用户代理设置 -->
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>

<!-- 配置 开启回收站 -->
<property>
<name>fs.trash.interval</name>
<value>60</value>
</property>

<!-- 指定zkfc要连接的zkServer地址-->
<property>
<name>ha.zookeeper.quorum</name>
<value>192.168.11.201:2181,192.168.11.202:2181,192.168.11.203:2181</value>
</property>

</configuration>

hdfs-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

<!-- NameNode 数据存储目录-->
<property>
<name>dfs.namenode.name.dir</name>
<value>file://${hadoop.tmp.dir}/name1,file://${hadoop.tmp.dir}/name2</value>
</property>

<!-- DataNode 数据存储目录-->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///data1,file:///data2</value>
</property>

<!-- JournalNode 数据存储目录-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>${hadoop.tmp.dir}/jn</value>
</property>

<!-- 完全分布式集群名称-->
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>

<!-- 集群中 NameNode 节点-->
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>

<!-- NameNode 的RPC通信地址-->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>hadoop1:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>hadoop2:8020</value>
</property>

<!-- NameNode 的HTTP通信地址-->
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>hadoop1:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>hadoop2:9870</value>
</property>

<!-- 指定 NameNode 元数据在JournalNode上存放的位置-->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop1:8485;hadoop2:8485/mycluster</value>
</property>

<!-- 访问代理类:client用于确定哪个NameNode为Active-->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>

<!-- 配置隔离机制,同一时刻只能有一台服务器对外响应-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>

<!-- 使用隔离机制时需要ssh秘钥登录-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>

<!-- 启用NameNode故障自动转移-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>

</configuration>

mapred-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

<!-- 指定 MapReduce 程序运行在 Yarn 上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>

<!-- 历史服务器端地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop1:10020</value>
</property>

<!-- 历史服务器 web 端地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop1:19888</value>
</property>

<!-- 开启 uber 模式,默认关闭 -->
<property>
<name>mapreduce.job.ubertask.enable</name>
<value>true</value>
</property>

<!-- uber 模式中最大的 mapTask 数量,可向下修改 -->
<property>
<name>mapreduce.job.ubertask.maxmaps</name>
<value>9</value>
</property>

<!-- uber 模式中最大的 reduce 数量,可向下修改 -->
<property>
<name>mapreduce.job.ubertask.maxreduces</name>
<value>1</value>
</property>

<!-- uber 模式中最大的输入数据量,默认使用 dfs.blocksize 的值,可向下修改 -->
<property>
<name>mapreduce.job.ubertask.maxbytes</name>
<value></value>
</property>
</configuration>

yarn-site.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<!-- Site specific YARN configuration properties -->
<!-- 指定 MR 走 shuffle -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>

<!--启用 resourcemanager ha-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>

<!--声明 resourcemanager 的地址-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>cluster-yarn</value>
</property>

<!---->
<property>
<name>yarn.client.failover-proxy-provider</name>
<value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider</value>
</property>

<!--故障转移的延迟时间-->
<property>
<name>yarn.client.failover-sleep-base-ms</name>
<value>1000</value>
</property>

<!--故障转移延迟时间-->
<property>
<name>yarn.client.failover-sleep-max-ms</name>
<value>3000</value>
</property>


<!--指定 resourcemanager的逻辑列表-->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>

<!-- ===============rm1的配置================= -->
<!-- 指定rm1的主机名-->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop1</value>
</property>
<!-- 指定rm1的web端地址-->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>hadoop1:8088</value>
</property>
<!-- 指定rm1的内部通信地址-->
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>hadoop1:8032</value>
</property>
<!-- 指定AM向rm1申请资源地址-->
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>hadoop1:8030</value>
</property>
<!-- 指定供NameNode连接的地址-->
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>hadoop1:8031</value>
</property>

<!-- ===============rm2配置================= -->
<!-- 指定rm2的主机名-->
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop2</value>
</property>
<!-- 指定rm2的web端地址-->
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>hadoop2:8088</value>
</property>
<!-- 指定rm2的内部通信地址-->
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>hadoop2:8032</value>
</property>
<!-- 指定AM向rm2申请资源地址-->
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>hadoop2:8030</value>
</property>
<!-- 指定供NameNode连接的地址-->
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>hadoop2:8031</value>
</property>


<!--指定 zookeeper 集群的地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>192.168.11.81:2181,192.168.11.82:2181,192.168.11.83:2181</value>
</property>

<!--启用自动恢复-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>

<!--指定 resourcemanager 的状态信息存储在 zookeeper 集群-->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>

<!-- 环境变量的继承 -->
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>

<!-- 开启日志聚集功能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>

<!-- 设置日志聚集服务器地址 -->
<property>
<name>yarn.log.server.url</name>
<value>http://hadoop1:19888/jobhistory/logs</value>
</property>

<!-- 设置日志保留时间为 7 天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>

<!-- ===============资源调度配置================= -->

<!--是否启动一个线程检查每个任务正使用的虚拟内存量,如果任务超出分配值,则直接将其杀掉,默认是 true -->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>

<!-- application master重启时,尝试的最大次数。-->
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>4</value>
<description>The maximum number of application master execution attempts. </description>
</property>

<!-- NM管理的CPU大小-->
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>40</value>
</property>

<!-- NM管理的内存大小-->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>131072</value>
</property>

<!-- 应用程序申请的Container中最小的vcore数量-->
<property>
<name>yarn.scheduler.minimum-allocation-vcores</name>
<value>1</value>
</property>
<!-- 应用程序申请的Container中最大的vcore数量 -->
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>4</value>
</property>

<!-- 应用程序申请的Container中最小的内存数量-->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
<!-- 应用程序申请的Container中最大的内存数量-->
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>16384</value>
</property>

</configuration>

六.集群调优

The End

 评论