001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.yarn.client.api;
020
021 import java.io.IOException;
022 import java.util.Collection;
023 import java.util.List;
024
025 import org.apache.hadoop.classification.InterfaceAudience;
026 import org.apache.hadoop.classification.InterfaceAudience.Private;
027 import org.apache.hadoop.classification.InterfaceAudience.Public;
028 import org.apache.hadoop.classification.InterfaceStability;
029 import org.apache.hadoop.service.AbstractService;
030 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
031 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
032 import org.apache.hadoop.yarn.api.records.ContainerId;
033 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
034 import org.apache.hadoop.yarn.api.records.Priority;
035 import org.apache.hadoop.yarn.api.records.Resource;
036 import org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl;
037 import org.apache.hadoop.yarn.exceptions.YarnException;
038
039 import com.google.common.base.Preconditions;
040 import com.google.common.collect.ImmutableList;
041
042 @InterfaceAudience.Public
043 @InterfaceStability.Stable
044 public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
045 AbstractService {
046
047 /**
048 * Create a new instance of AMRMClient.
049 * For usage:
050 * <pre>
051 * {@code
052 * AMRMClient.<T>createAMRMClientContainerRequest()
053 * }</pre>
054 * @return the newly create AMRMClient instance.
055 */
056 @Public
057 public static <T extends ContainerRequest> AMRMClient<T> createAMRMClient() {
058 AMRMClient<T> client = new AMRMClientImpl<T>();
059 return client;
060 }
061
062 private NMTokenCache nmTokenCache;
063
064 @Private
065 protected AMRMClient(String name) {
066 super(name);
067 nmTokenCache = NMTokenCache.getSingleton();
068 }
069
070 /**
071 * Object to represent a single container request for resources. Scheduler
072 * documentation should be consulted for the specifics of how the parameters
073 * are honored.
074 *
075 * By default, YARN schedulers try to allocate containers at the requested
076 * locations but they may relax the constraints in order to expedite meeting
077 * allocations limits. They first relax the constraint to the same rack as the
078 * requested node and then to anywhere in the cluster. The relaxLocality flag
079 * may be used to disable locality relaxation and request containers at only
080 * specific locations. The following conditions apply.
081 * <ul>
082 * <li>Within a priority, all container requests must have the same value for
083 * locality relaxation. Either enabled or disabled.</li>
084 * <li>If locality relaxation is disabled, then across requests, locations at
085 * different network levels may not be specified. E.g. its invalid to make a
086 * request for a specific node and another request for a specific rack.</li>
087 * <li>If locality relaxation is disabled, then only within the same request,
088 * a node and its rack may be specified together. This allows for a specific
089 * rack with a preference for a specific node within that rack.</li>
090 * <li></li>
091 * </ul>
092 * To re-enable locality relaxation at a given priority, all pending requests
093 * with locality relaxation disabled must be first removed. Then they can be
094 * added back with locality relaxation enabled.
095 *
096 * All getters return immutable values.
097 */
098 public static class ContainerRequest {
099 final Resource capability;
100 final List<String> nodes;
101 final List<String> racks;
102 final Priority priority;
103 final boolean relaxLocality;
104
105 /**
106 * Instantiates a {@link ContainerRequest} with the given constraints and
107 * locality relaxation enabled.
108 *
109 * @param capability
110 * The {@link Resource} to be requested for each container.
111 * @param nodes
112 * Any hosts to request that the containers are placed on.
113 * @param racks
114 * Any racks to request that the containers are placed on. The
115 * racks corresponding to any hosts requested will be automatically
116 * added to this list.
117 * @param priority
118 * The priority at which to request the containers. Higher
119 * priorities have lower numerical values.
120 */
121 public ContainerRequest(Resource capability, String[] nodes,
122 String[] racks, Priority priority) {
123 this(capability, nodes, racks, priority, true);
124 }
125
126 /**
127 * Instantiates a {@link ContainerRequest} with the given constraints.
128 *
129 * @param capability
130 * The {@link Resource} to be requested for each container.
131 * @param nodes
132 * Any hosts to request that the containers are placed on.
133 * @param racks
134 * Any racks to request that the containers are placed on. The
135 * racks corresponding to any hosts requested will be automatically
136 * added to this list.
137 * @param priority
138 * The priority at which to request the containers. Higher
139 * priorities have lower numerical values.
140 * @param relaxLocality
141 * If true, containers for this request may be assigned on hosts
142 * and racks other than the ones explicitly requested.
143 */
144 public ContainerRequest(Resource capability, String[] nodes,
145 String[] racks, Priority priority, boolean relaxLocality) {
146 // Validate request
147 Preconditions.checkArgument(capability != null,
148 "The Resource to be requested for each container " +
149 "should not be null ");
150 Preconditions.checkArgument(priority != null,
151 "The priority at which to request containers should not be null ");
152 Preconditions.checkArgument(
153 !(!relaxLocality && (racks == null || racks.length == 0)
154 && (nodes == null || nodes.length == 0)),
155 "Can't turn off locality relaxation on a " +
156 "request with no location constraints");
157 this.capability = capability;
158 this.nodes = (nodes != null ? ImmutableList.copyOf(nodes) : null);
159 this.racks = (racks != null ? ImmutableList.copyOf(racks) : null);
160 this.priority = priority;
161 this.relaxLocality = relaxLocality;
162 }
163
164 public Resource getCapability() {
165 return capability;
166 }
167
168 public List<String> getNodes() {
169 return nodes;
170 }
171
172 public List<String> getRacks() {
173 return racks;
174 }
175
176 public Priority getPriority() {
177 return priority;
178 }
179
180 public boolean getRelaxLocality() {
181 return relaxLocality;
182 }
183
184 public String toString() {
185 StringBuilder sb = new StringBuilder();
186 sb.append("Capability[").append(capability).append("]");
187 sb.append("Priority[").append(priority).append("]");
188 return sb.toString();
189 }
190 }
191
192 /**
193 * Register the application master. This must be called before any
194 * other interaction
195 * @param appHostName Name of the host on which master is running
196 * @param appHostPort Port master is listening on
197 * @param appTrackingUrl URL at which the master info can be seen
198 * @return <code>RegisterApplicationMasterResponse</code>
199 * @throws YarnException
200 * @throws IOException
201 */
202 public abstract RegisterApplicationMasterResponse
203 registerApplicationMaster(String appHostName,
204 int appHostPort,
205 String appTrackingUrl)
206 throws YarnException, IOException;
207
208 /**
209 * Request additional containers and receive new container allocations.
210 * Requests made via <code>addContainerRequest</code> are sent to the
211 * <code>ResourceManager</code>. New containers assigned to the master are
212 * retrieved. Status of completed containers and node health updates are
213 * also retrieved.
214 * This also doubles up as a heartbeat to the ResourceManager and must be
215 * made periodically.
216 * The call may not always return any new allocations of containers.
217 * App should not make concurrent allocate requests. May cause request loss.
218 * @param progressIndicator Indicates progress made by the master
219 * @return the response of the allocate request
220 * @throws YarnException
221 * @throws IOException
222 */
223 public abstract AllocateResponse allocate(float progressIndicator)
224 throws YarnException, IOException;
225
226 /**
227 * Unregister the application master. This must be called in the end.
228 * @param appStatus Success/Failure status of the master
229 * @param appMessage Diagnostics message on failure
230 * @param appTrackingUrl New URL to get master info
231 * @throws YarnException
232 * @throws IOException
233 */
234 public abstract void unregisterApplicationMaster(FinalApplicationStatus appStatus,
235 String appMessage,
236 String appTrackingUrl)
237 throws YarnException, IOException;
238
239 /**
240 * Request containers for resources before calling <code>allocate</code>
241 * @param req Resource request
242 */
243 public abstract void addContainerRequest(T req);
244
245 /**
246 * Remove previous container request. The previous container request may have
247 * already been sent to the ResourceManager. So even after the remove request
248 * the app must be prepared to receive an allocation for the previous request
249 * even after the remove request
250 * @param req Resource request
251 */
252 public abstract void removeContainerRequest(T req);
253
254 /**
255 * Release containers assigned by the Resource Manager. If the app cannot use
256 * the container or wants to give up the container then it can release them.
257 * The app needs to make new requests for the released resource capability if
258 * it still needs it. eg. it released non-local resources
259 * @param containerId
260 */
261 public abstract void releaseAssignedContainer(ContainerId containerId);
262
263 /**
264 * Get the currently available resources in the cluster.
265 * A valid value is available after a call to allocate has been made
266 * @return Currently available resources
267 */
268 public abstract Resource getAvailableResources();
269
270 /**
271 * Get the current number of nodes in the cluster.
272 * A valid values is available after a call to allocate has been made
273 * @return Current number of nodes in the cluster
274 */
275 public abstract int getClusterNodeCount();
276
277 /**
278 * Get outstanding <code>ContainerRequest</code>s matching the given
279 * parameters. These ContainerRequests should have been added via
280 * <code>addContainerRequest</code> earlier in the lifecycle. For performance,
281 * the AMRMClient may return its internal collection directly without creating
282 * a copy. Users should not perform mutable operations on the return value.
283 * Each collection in the list contains requests with identical
284 * <code>Resource</code> size that fit in the given capability. In a
285 * collection, requests will be returned in the same order as they were added.
286 * @return Collection of request matching the parameters
287 */
288 public abstract List<? extends Collection<T>> getMatchingRequests(
289 Priority priority,
290 String resourceName,
291 Resource capability);
292
293 /**
294 * Update application's blacklist with addition or removal resources.
295 *
296 * @param blacklistAdditions list of resources which should be added to the
297 * application blacklist
298 * @param blacklistRemovals list of resources which should be removed from the
299 * application blacklist
300 */
301 public abstract void updateBlacklist(List<String> blacklistAdditions,
302 List<String> blacklistRemovals);
303
304 /**
305 * Set the NM token cache for the <code>AMRMClient</code>. This cache must
306 * be shared with the {@link NMClient} used to manage containers for the
307 * <code>AMRMClient</code>
308 * <p/>
309 * If a NM token cache is not set, the {@link NMTokenCache#getSingleton()}
310 * singleton instance will be used.
311 *
312 * @param nmTokenCache the NM token cache to use.
313 */
314 public void setNMTokenCache(NMTokenCache nmTokenCache) {
315 this.nmTokenCache = nmTokenCache;
316 }
317
318 /**
319 * Get the NM token cache of the <code>AMRMClient</code>. This cache must be
320 * shared with the {@link NMClient} used to manage containers for the
321 * <code>AMRMClient</code>.
322 * <p/>
323 * If a NM token cache is not set, the {@link NMTokenCache#getSingleton()}
324 * singleton instance will be used.
325 *
326 * @return the NM token cache.
327 */
328 public NMTokenCache getNMTokenCache() {
329 return nmTokenCache;
330 }
331
332 }