001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.yarn.client.api; 020 021 import java.io.IOException; 022 import java.util.Collection; 023 import java.util.List; 024 025 import org.apache.hadoop.classification.InterfaceAudience; 026 import org.apache.hadoop.classification.InterfaceAudience.Private; 027 import org.apache.hadoop.classification.InterfaceAudience.Public; 028 import org.apache.hadoop.classification.InterfaceStability; 029 import org.apache.hadoop.service.AbstractService; 030 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; 031 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; 032 import org.apache.hadoop.yarn.api.records.ContainerId; 033 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; 034 import org.apache.hadoop.yarn.api.records.Priority; 035 import org.apache.hadoop.yarn.api.records.Resource; 036 import org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl; 037 import org.apache.hadoop.yarn.exceptions.YarnException; 038 039 import com.google.common.base.Preconditions; 040 import com.google.common.collect.ImmutableList; 041 042 @InterfaceAudience.Public 043 @InterfaceStability.Stable 044 public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends 045 AbstractService { 046 047 /** 048 * Create a new instance of AMRMClient. 049 * For usage: 050 * <pre> 051 * {@code 052 * AMRMClient.<T>createAMRMClientContainerRequest() 053 * }</pre> 054 * @return the newly create AMRMClient instance. 055 */ 056 @Public 057 public static <T extends ContainerRequest> AMRMClient<T> createAMRMClient() { 058 AMRMClient<T> client = new AMRMClientImpl<T>(); 059 return client; 060 } 061 062 private NMTokenCache nmTokenCache; 063 064 @Private 065 protected AMRMClient(String name) { 066 super(name); 067 nmTokenCache = NMTokenCache.getSingleton(); 068 } 069 070 /** 071 * Object to represent a single container request for resources. Scheduler 072 * documentation should be consulted for the specifics of how the parameters 073 * are honored. 074 * 075 * By default, YARN schedulers try to allocate containers at the requested 076 * locations but they may relax the constraints in order to expedite meeting 077 * allocations limits. They first relax the constraint to the same rack as the 078 * requested node and then to anywhere in the cluster. The relaxLocality flag 079 * may be used to disable locality relaxation and request containers at only 080 * specific locations. The following conditions apply. 081 * <ul> 082 * <li>Within a priority, all container requests must have the same value for 083 * locality relaxation. Either enabled or disabled.</li> 084 * <li>If locality relaxation is disabled, then across requests, locations at 085 * different network levels may not be specified. E.g. its invalid to make a 086 * request for a specific node and another request for a specific rack.</li> 087 * <li>If locality relaxation is disabled, then only within the same request, 088 * a node and its rack may be specified together. This allows for a specific 089 * rack with a preference for a specific node within that rack.</li> 090 * <li></li> 091 * </ul> 092 * To re-enable locality relaxation at a given priority, all pending requests 093 * with locality relaxation disabled must be first removed. Then they can be 094 * added back with locality relaxation enabled. 095 * 096 * All getters return immutable values. 097 */ 098 public static class ContainerRequest { 099 final Resource capability; 100 final List<String> nodes; 101 final List<String> racks; 102 final Priority priority; 103 final boolean relaxLocality; 104 105 /** 106 * Instantiates a {@link ContainerRequest} with the given constraints and 107 * locality relaxation enabled. 108 * 109 * @param capability 110 * The {@link Resource} to be requested for each container. 111 * @param nodes 112 * Any hosts to request that the containers are placed on. 113 * @param racks 114 * Any racks to request that the containers are placed on. The 115 * racks corresponding to any hosts requested will be automatically 116 * added to this list. 117 * @param priority 118 * The priority at which to request the containers. Higher 119 * priorities have lower numerical values. 120 */ 121 public ContainerRequest(Resource capability, String[] nodes, 122 String[] racks, Priority priority) { 123 this(capability, nodes, racks, priority, true); 124 } 125 126 /** 127 * Instantiates a {@link ContainerRequest} with the given constraints. 128 * 129 * @param capability 130 * The {@link Resource} to be requested for each container. 131 * @param nodes 132 * Any hosts to request that the containers are placed on. 133 * @param racks 134 * Any racks to request that the containers are placed on. The 135 * racks corresponding to any hosts requested will be automatically 136 * added to this list. 137 * @param priority 138 * The priority at which to request the containers. Higher 139 * priorities have lower numerical values. 140 * @param relaxLocality 141 * If true, containers for this request may be assigned on hosts 142 * and racks other than the ones explicitly requested. 143 */ 144 public ContainerRequest(Resource capability, String[] nodes, 145 String[] racks, Priority priority, boolean relaxLocality) { 146 // Validate request 147 Preconditions.checkArgument(capability != null, 148 "The Resource to be requested for each container " + 149 "should not be null "); 150 Preconditions.checkArgument(priority != null, 151 "The priority at which to request containers should not be null "); 152 Preconditions.checkArgument( 153 !(!relaxLocality && (racks == null || racks.length == 0) 154 && (nodes == null || nodes.length == 0)), 155 "Can't turn off locality relaxation on a " + 156 "request with no location constraints"); 157 this.capability = capability; 158 this.nodes = (nodes != null ? ImmutableList.copyOf(nodes) : null); 159 this.racks = (racks != null ? ImmutableList.copyOf(racks) : null); 160 this.priority = priority; 161 this.relaxLocality = relaxLocality; 162 } 163 164 public Resource getCapability() { 165 return capability; 166 } 167 168 public List<String> getNodes() { 169 return nodes; 170 } 171 172 public List<String> getRacks() { 173 return racks; 174 } 175 176 public Priority getPriority() { 177 return priority; 178 } 179 180 public boolean getRelaxLocality() { 181 return relaxLocality; 182 } 183 184 public String toString() { 185 StringBuilder sb = new StringBuilder(); 186 sb.append("Capability[").append(capability).append("]"); 187 sb.append("Priority[").append(priority).append("]"); 188 return sb.toString(); 189 } 190 } 191 192 /** 193 * Register the application master. This must be called before any 194 * other interaction 195 * @param appHostName Name of the host on which master is running 196 * @param appHostPort Port master is listening on 197 * @param appTrackingUrl URL at which the master info can be seen 198 * @return <code>RegisterApplicationMasterResponse</code> 199 * @throws YarnException 200 * @throws IOException 201 */ 202 public abstract RegisterApplicationMasterResponse 203 registerApplicationMaster(String appHostName, 204 int appHostPort, 205 String appTrackingUrl) 206 throws YarnException, IOException; 207 208 /** 209 * Request additional containers and receive new container allocations. 210 * Requests made via <code>addContainerRequest</code> are sent to the 211 * <code>ResourceManager</code>. New containers assigned to the master are 212 * retrieved. Status of completed containers and node health updates are 213 * also retrieved. 214 * This also doubles up as a heartbeat to the ResourceManager and must be 215 * made periodically. 216 * The call may not always return any new allocations of containers. 217 * App should not make concurrent allocate requests. May cause request loss. 218 * @param progressIndicator Indicates progress made by the master 219 * @return the response of the allocate request 220 * @throws YarnException 221 * @throws IOException 222 */ 223 public abstract AllocateResponse allocate(float progressIndicator) 224 throws YarnException, IOException; 225 226 /** 227 * Unregister the application master. This must be called in the end. 228 * @param appStatus Success/Failure status of the master 229 * @param appMessage Diagnostics message on failure 230 * @param appTrackingUrl New URL to get master info 231 * @throws YarnException 232 * @throws IOException 233 */ 234 public abstract void unregisterApplicationMaster(FinalApplicationStatus appStatus, 235 String appMessage, 236 String appTrackingUrl) 237 throws YarnException, IOException; 238 239 /** 240 * Request containers for resources before calling <code>allocate</code> 241 * @param req Resource request 242 */ 243 public abstract void addContainerRequest(T req); 244 245 /** 246 * Remove previous container request. The previous container request may have 247 * already been sent to the ResourceManager. So even after the remove request 248 * the app must be prepared to receive an allocation for the previous request 249 * even after the remove request 250 * @param req Resource request 251 */ 252 public abstract void removeContainerRequest(T req); 253 254 /** 255 * Release containers assigned by the Resource Manager. If the app cannot use 256 * the container or wants to give up the container then it can release them. 257 * The app needs to make new requests for the released resource capability if 258 * it still needs it. eg. it released non-local resources 259 * @param containerId 260 */ 261 public abstract void releaseAssignedContainer(ContainerId containerId); 262 263 /** 264 * Get the currently available resources in the cluster. 265 * A valid value is available after a call to allocate has been made 266 * @return Currently available resources 267 */ 268 public abstract Resource getAvailableResources(); 269 270 /** 271 * Get the current number of nodes in the cluster. 272 * A valid values is available after a call to allocate has been made 273 * @return Current number of nodes in the cluster 274 */ 275 public abstract int getClusterNodeCount(); 276 277 /** 278 * Get outstanding <code>ContainerRequest</code>s matching the given 279 * parameters. These ContainerRequests should have been added via 280 * <code>addContainerRequest</code> earlier in the lifecycle. For performance, 281 * the AMRMClient may return its internal collection directly without creating 282 * a copy. Users should not perform mutable operations on the return value. 283 * Each collection in the list contains requests with identical 284 * <code>Resource</code> size that fit in the given capability. In a 285 * collection, requests will be returned in the same order as they were added. 286 * @return Collection of request matching the parameters 287 */ 288 public abstract List<? extends Collection<T>> getMatchingRequests( 289 Priority priority, 290 String resourceName, 291 Resource capability); 292 293 /** 294 * Update application's blacklist with addition or removal resources. 295 * 296 * @param blacklistAdditions list of resources which should be added to the 297 * application blacklist 298 * @param blacklistRemovals list of resources which should be removed from the 299 * application blacklist 300 */ 301 public abstract void updateBlacklist(List<String> blacklistAdditions, 302 List<String> blacklistRemovals); 303 304 /** 305 * Set the NM token cache for the <code>AMRMClient</code>. This cache must 306 * be shared with the {@link NMClient} used to manage containers for the 307 * <code>AMRMClient</code> 308 * <p/> 309 * If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} 310 * singleton instance will be used. 311 * 312 * @param nmTokenCache the NM token cache to use. 313 */ 314 public void setNMTokenCache(NMTokenCache nmTokenCache) { 315 this.nmTokenCache = nmTokenCache; 316 } 317 318 /** 319 * Get the NM token cache of the <code>AMRMClient</code>. This cache must be 320 * shared with the {@link NMClient} used to manage containers for the 321 * <code>AMRMClient</code>. 322 * <p/> 323 * If a NM token cache is not set, the {@link NMTokenCache#getSingleton()} 324 * singleton instance will be used. 325 * 326 * @return the NM token cache. 327 */ 328 public NMTokenCache getNMTokenCache() { 329 return nmTokenCache; 330 } 331 332 }