<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Do not modify this file directly. Instead, copy entries that you -->
<!-- wish to modify from this file into hdfs-site.xml and change them -->
<!-- there. If hdfs-site.xml does not already exist, create it. -->
<configuration>
<property>
<name>hadoop.hdfs.configuration.version</name>
<value>1</value>
<description>version of this configuration file</description>
</property>
<property>
<name>dfs.namenode.rpc-address</name>
<value></value>
<description>
RPC address that handles all clients requests. In the case of HA/Federation where multiple namenodes exist,
the name service id is added to the name e.g. dfs.namenode.rpc-address.ns1
dfs.namenode.rpc-address.EXAMPLENAMESERVICE
The value of this property will take the form of nn-host1:rpc-port. The NameNode's default RPC port is 8020.
</description>
</property>
<property>
<name>dfs.namenode.rpc-bind-host</name>
<value></value>
<description>
The actual address the RPC server will bind to. If this optional address is
set, it overrides only the hostname portion of dfs.namenode.rpc-address.
It can also be specified per name node or name service for HA/Federation.
This is useful for making the name node listen on all interfaces by
setting it to 0.0.0.0.
</description>
</property>
<property>
<name>dfs.namenode.servicerpc-address</name>
<value></value>
<description>
RPC address for HDFS Services communication. BackupNode, Datanodes and all other services should be
connecting to this address if it is configured. In the case of HA/Federation where multiple namenodes exist,
the name service id is added to the name e.g. dfs.namenode.servicerpc-address.ns1
dfs.namenode.rpc-address.EXAMPLENAMESERVICE
The value of this property will take the form of nn-host1:rpc-port.
If the value of this property is unset the value of dfs.namenode.rpc-address will be used as the default.
</description>
</property>
<property>
<name>dfs.namenode.servicerpc-bind-host</name>
<value></value>
<description>
The actual address the service RPC server will bind to. If this optional address is
set, it overrides only the hostname portion of dfs.namenode.servicerpc-address.
It can also be specified per name node or name service for HA/Federation.
This is useful for making the name node listen on all interfaces by
setting it to 0.0.0.0.
</description>
</property>
<property>
<name>dfs.namenode.lifeline.rpc-address</name>
<value></value>
<description>
NameNode RPC lifeline address. This is an optional separate RPC address
that can be used to isolate health checks and liveness to protect against
resource exhaustion in the main RPC handler pool. In the case of
HA/Federation where multiple NameNodes exist, the name service ID is added
to the name e.g. dfs.namenode.lifeline.rpc-address.ns1. The value of this
property will take the form of nn-host1:rpc-port. If this property is not
defined, then the NameNode will not start a lifeline RPC server. By
default, the property is not defined.
</description>
</property>
<property>
<name>dfs.namenode.lifeline.rpc-bind-host</name>
<value></value>
<description>
The actual address the lifeline RPC server will bind to. If this optional
address is set, it overrides only the hostname portion of
dfs.namenode.lifeline.rpc-address. It can also be specified per name node
or name service for HA/Federation. This is useful for making the name node
listen on all interfaces by setting it to 0.0.0.0.
</description>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>0.0.0.0:9868</value>
<description>
The secondary namenode http server address and port.
</description>
</property>
<property>
<name>dfs.namenode.secondary.https-address</name>
<value>0.0.0.0:9869</value>
<description>
The secondary namenode HTTPS server address and port.
</description>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:9866</value>
<description>
The datanode server address and port for data transfer.
</description>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:9864</value>
<description>
The datanode http server address and port.
</description>
</property>
<property>
<name>dfs.datanode.ipc.address</name>
<value>0.0.0.0:9867</value>
<description>
The datanode ipc server address and port.
</description>
</property>
<property>
<name>dfs.datanode.http.internal-proxy.port</name>
<value>0</value>
<description>
The datanode's internal web proxy port.
By default it selects a random port available in runtime.
</description>
</property>
<property>
<name>dfs.datanode.handler.count</name>
<value>10</value>
<description>The number of server threads for the datanode.</description>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>0.0.0.0:9870</value>
<description>
The address and the base port where the dfs namenode web ui will listen on.
</description>
</property>
<property>
<name>dfs.namenode.http-bind-host</name>
<value></value>
<description>
The actual address the HTTP server will bind to. If this optional address
is set, it overrides only the hostname portion of dfs.namenode.http-address.
It can also be specified per name node or name service for HA/Federation.
This is useful for making the name node HTTP server listen on all
interfaces by setting it to 0.0.0.0.
</description>
</property>
<property>
<name>dfs.namenode.heartbeat.recheck-interval</name>
<value>300000</value>
<description>
This time decides the interval to check for expired datanodes.
With this value and dfs.heartbeat.interval, the interval of
deciding the datanode is stale or not is also calculated.
The unit of this configuration is millisecond.
</description>
</property>
<property>
<name>dfs.http.policy</name>
<value>HTTP_ONLY</value>
<description>Decide if HTTPS(SSL) is supported on HDFS
This configures the HTTP endpoint for HDFS daemons:
The following values are supported:
- HTTP_ONLY : Service is provided only on http
- HTTPS_ONLY : Service is provided only on https
- HTTP_AND_HTTPS : Service is provided both on http and https
</description>
</property>
<property>
<name>dfs.client.https.need-auth</name>
<value>false</value>
<description>Whether SSL client certificate authentication is required
</description>
</property>
<property>
<name>dfs.client.cached.conn.retry</name>
<value>3</value>
<description>The number of times the HDFS client will pull a socket from the
cache. Once this number is exceeded, the client will try to create a new
socket.
</description>
</property>
<property>
<name>dfs.https.server.keystore.resource</name>
<value>ssl-server.xml</value>
<description>Resource file from which ssl server keystore
information will be extracted
</description>
</property>
<property>
<name>dfs
hadoop3.1.4参数默认配置项


在Hadoop生态系统中,配置参数是管理和优化集群性能的关键元素。Hadoop 3.1.4版本提供了四个主要组件的默认配置文件:core-default.xml、hdfs-default.xml、mapred-default.xml和yarn-default.xml。这些文件定义了Hadoop运行时的默认行为,包括存储、计算、网络和资源管理等方面。以下将对这四个配置文件中的关键参数进行详解。 1. **core-default.xml**: 核心组件,提供Hadoop的基本功能和设置。 - **fs.defaultFS**:默认文件系统URI,通常指向HDFS的NameNode。 - **io.file.buffer.size**:读写文件时的缓冲区大小,默认为4096字节。 - **fs.permissions.umask-mode**:创建文件或目录时的umask,默认为022,意味着新建的文件和目录对所有用户都有读和执行权限,只有创建者有写权限。 2. **hdfs-default.xml**: Hadoop分布式文件系统(HDFS)的配置。 - **dfs.replication**:数据块的副本数,默认为3,确保容错性。 - **dfs.namenode.name.dir**:NameNode保存元数据的目录路径。 - **dfs.datanode.data.dir**:DataNode存储数据块的目录路径。 - **dfs.client.read.shortcircuit**:如果启用,客户端可以直接从DataNode读取数据,提高性能。 3. **mapred-default.xml**: MapReduce框架的配置。 - **mapreduce.framework.name**:默认为yarn,表示MapReduce任务由YARN调度。 - **mapreduce.map.memory.mb**:每个Map任务分配的内存,默认值可能因环境而异。 - **mapreduce.reduce.memory.mb**:每个Reduce任务分配的内存,默认值也可能因环境而异。 - **mapreduce.task.io.sort.mb**:排序阶段使用的内存,默认为100MB。 4. **yarn-default.xml**: 分布式应用程序资源管理器YARN的配置。 - **yarn.nodemanager.resource.cpu-vcores**:每个节点的虚拟CPU核心数。 - **yarn.nodemanager.resource.memory-mb**:每个节点可用的总内存。 - **yarn.scheduler.minimum-allocation-mb**:容器的最小内存分配。 - **yarn.scheduler.maximum-allocation-mb**:容器的最大内存分配。 - **yarn.app.mapreduce.am.resource.mb**:应用程序管理器(AM)分配的内存。 了解这些默认配置对于调整Hadoop集群的性能至关重要。例如,根据实际工作负载调整mapreduce.map.memory.mb和mapreduce.reduce.memory.mb可以避免内存溢出错误。同时,合理设置YARN的资源分配参数能优化集群的整体利用率和响应速度。 Hadoop 3.1.4的这些默认配置文件为管理员提供了基础设定,但为了满足特定应用场景的需求,通常需要根据实际环境进行调整和优化。在部署和运行Hadoop集群时,深入理解这些配置参数的含义和作用,是提升系统效率和稳定性的关键步骤。





- 1






















- 粉丝: 0
- 资源: 1
我的内容管理 展开
我的资源 快来上传第一个资源
我的收益
登录查看自己的收益我的积分 登录查看自己的积分
我的C币 登录后查看C币余额
我的收藏
我的下载
下载帮助


最新资源
- 声辐射力计算及其Python实现-近壁球形物体的研究(含27页详细可运行代码及解释)
- 轻量级文字识别技术创新大赛终榜第5名.zip
- 大创网页设计.zip
- 大创项目,层级注意力机器翻译.zip
- 基于OpenCVSharp的角点检测技术在平整度检测中的应用:代码示例与解析,基于OpenCVSharp技术的角点检测:快速高效评估材料表面平整度,06OpenCVSharp 角点检测 检测平整度
- 2021 基于城市信息模型(CIM) 的智慧社区建设指南.pdf
- 2022金融数据仓库发展报告(白皮书).pdf
- 2023 年第一季度支付体系运行总体情况.pdf
- 2022年中国银行业远程银行发展白皮书.pdf
- 2022中国大模型发展白皮书.pdf
- 2024年银行业风险雷达图.pdf
- 2023开放银行数据保护与合规实践案例报告.pdf
- 2018080815354881198条码支付安全技术规范.pdf
- 1669860271187084970金融业上云指引.pdf
- 1783266795647054233工商银行分布式技术体系转型.pdf
- BANCS系统解读.pdf



评论0