Yarn 配置多队列容量调度器

首先配置hadoop/etc/capacity-scheduler.xml文件

<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

   <!-- 容量调度器最多可以容纳多少个job-->
  <property>
    <name>yarn.scheduler.capacity.maximum-applications</name>
    <value>10000</value>
    <description>
      Maximum number of applications that can be pending and running.
    </description>
  </property>

  <!-- 当前队列中启动的MRAppMaster进程,所占用的资源可以达到队列总资源的多少
        通过这个参数可以限制队列中提交的Job数量
  -->
  <property>
    <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
    <value>0.1</value>
    <description>
      Maximum percent of resources in the cluster which can be used to run 
      application masters i.e. controls number of concurrent running
      applications.
    </description>
  </property>

  <!-- 为Job分配资源时,使用什么策略进行计算
  -->
  <property>
    <name>yarn.scheduler.capacity.resource-calculator</name>
    <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
    <description>
      The ResourceCalculator implementation to be used to compare 
      Resources in the scheduler.
      The default i.e. DefaultResourceCalculator only uses Memory while
      DominantResourceCalculator uses dominant-resource to compare 
      multi-dimensional resources such as Memory, CPU etc.
    </description>
  </property>

   <!-- root队列中有哪些子队列,新增了a,b队列---->
  <property>
    <name>yarn.scheduler.capacity.root.queues</name>
    <value>default,a,b</value>
    <description>
      The queues at the this level (root is the root queue).
    </description>
  </property>

  <!-- root队列中default队列占用的容量百分比
        所有子队列的容量相加必须等于100
  -->
  <property>
    <name>yarn.scheduler.capacity.root.default.capacity</name>
    <value>40</value>
    <description>Default queue target capacity.</description>
  </property>
  
  <property>
    <name>yarn.scheduler.capacity.root.a.capacity</name>
    <value>30</value>
    <description>Default queue target capacity.</description>
  </property>
  
  <property>
    <name>yarn.scheduler.capacity.root.b.capacity</name>
    <value>30</value>
    <description>Default queue target capacity.</description>
  </property>

    <!-- 队列中用户能使用此队列资源的极限百分比
  -->
  <property>
    <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
    <value>1</value>
    <description>
      Default queue user limit a percentage from 0.0 to 1.0.
    </description>
  </property>
  
   <property>
    <name>yarn.scheduler.capacity.root.a.user-limit-factor</name>
    <value>1</value>
    <description>
      Default queue user limit a percentage from 0.0 to 1.0.
    </description>
  </property>
  
   <property>
    <name>yarn.scheduler.capacity.root.b.user-limit-factor</name>
    <value>1</value>
    <description>
      Default queue user limit a percentage from 0.0 to 1.0.
    </description>
  </property>

  <!-- root队列中default队列占用的容量百分比的最大值
  -->
  <property>
    <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
    <value>100</value>
    <description>
      The maximum capacity of the default queue. 
    </description>
  </property>
  
   <property>
    <name>yarn.scheduler.capacity.root.a.maximum-capacity</name>
    <value>100</value>
    <description>
      The maximum capacity of the default queue. 
    </description>
  </property>
  
   <property>
    <name>yarn.scheduler.capacity.root.b.maximum-capacity</name>
    <value>100</value>
    <description>
      The maximum capacity of the default queue. 
    </description>
  </property>

    <!-- root队列中default队列的状态
  -->
  <property>
    <name>yarn.scheduler.capacity.root.default.state</name>
    <value>RUNNING</value>
    <description>
      The state of the default queue. State can be one of RUNNING or STOPPED.
    </description>
  </property>
  
    <property>
    <name>yarn.scheduler.capacity.root.a.state</name>
    <value>RUNNING</value>
    <description>
      The state of the default queue. State can be one of RUNNING or STOPPED.
    </description>
  </property>

  
    <property>
    <name>yarn.scheduler.capacity.root.b.state</name>
    <value>RUNNING</value>
    <description>
      The state of the default queue. State can be one of RUNNING or STOPPED.
    </description>
  </property>

  <!-- 限制向default队列提交的用户,即访问权限-->
  <property>
    <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
    <value>*</value>
    <description>
      The ACL of who can submit jobs to the default queue.
    </description>
  </property>
  
  <property>
    <name>yarn.scheduler.capacity.root.a.acl_submit_applications</name>
    <value>*</value>
    <description>
      The ACL of who can submit jobs to the default queue.
    </description>
  </property>
  
  <property>
    <name>yarn.scheduler.capacity.root.b.acl_submit_applications</name>
    <value>*</value>
    <description>
      The ACL of who can submit jobs to the default queue.
    </description>
  </property>
<!-- 设置管理员-->
  <property>
    <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
    <value>*</value>
    <description>
      The ACL of who can administer jobs on the default queue.
    </description>
  </property>
  
  <property>
    <name>yarn.scheduler.capacity.root.a.acl_administer_queue</name>
    <value>*</value>
    <description>
      The ACL of who can administer jobs on the default queue.
    </description>
  </property>
  
  <property>
    <name>yarn.scheduler.capacity.root.b.acl_administer_queue</name>
    <value>*</value>
    <description>
      The ACL of who can administer jobs on the default queue.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.node-locality-delay</name>
    <value>40</value>
    <description>
      Number of missed scheduling opportunities after which the CapacityScheduler 
      attempts to schedule rack-local containers. 
      Typically this should be set to number of nodes in the cluster, By default is setting 
      approximately number of nodes in one rack which is 40.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.queue-mappings</name>
    <value></value>
    <description>
      A list of mappings that will be used to assign jobs to queues
      The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
      Typically this list will be used to map users to queues,
      for example, u:%user:%user maps all users to queues with the same name
      as the user.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
    <value>false</value>
    <description>
      If a queue mapping is present, will it override the value specified
      by the user? This can be used by administrators to place jobs in queues
      that are different than the one specified by the user.
      The default is false.
    </description>
  </property>

</configuration>

配置完之后使用刷新命令

yarn rmadmin -refreshQueues

然后进入集群的yarn界面就可以看到队列变成了三个

 那么接下来就是怎么设置job在其他队列运行

要知道,决定job在哪个队列运行是由mapred -default.xml 文件中来决定的

所以需要更改这个配置:

1.如果用idea,可以在用

conf.set("mapred.job.queue.name", "a");

这样就指定了在a队列运行job

2.如果在Linux上运行jar包,则可以用

hadoop jar hadoop-mapreduce-examples-2.7.2.jar  wordcount -D mapreduce.job.queuename=a /mapjoin /output3

 如图,job切换到了a队列

原文地址:https://www.cnblogs.com/yangxusun9/p/12404136.html