org.apache.hadoop.mapred
Class JobInProgress

java.lang.Object
  extended by org.apache.hadoop.mapred.JobInProgress

public class JobInProgress
extends Object

JobInProgress maintains all the info for keeping a Job on the straight and narrow. It keeps its JobProfile and its latest JobStatus, plus a set of tables for doing bookkeeping of its Tasks. ***********************************************************


Nested Class Summary
static class JobInProgress.Counter
           
 
Field Summary
protected  Credentials tokenStorage
           
 
Constructor Summary
protected JobInProgress(JobID jobid, JobConf conf, JobTracker tracker)
          Create an almost empty JobInProgress, which can be used only for tests
 
Method Summary
 void cleanUpMetrics()
          Called when the job is complete
 boolean completedTask(org.apache.hadoop.mapred.TaskInProgress tip, TaskStatus status)
          A taskid assigned to this JobInProgress has reported in successfully.
 int desiredMaps()
           
 int desiredReduces()
           
 int desiredTasks()
          Return total number of map and reduce tasks desired by the job.
 void failedTask(org.apache.hadoop.mapred.TaskInProgress tip, TaskAttemptID taskid, String reason, TaskStatus.Phase phase, TaskStatus.State state, String trackerName)
          Fail a task with a given reason, but without a status object.
 TaskStatus findFinishedMap(int mapId)
          Find the details of someplace where a map has finished
protected  org.apache.hadoop.mapred.TaskInProgress findSpeculativeTask(Collection<org.apache.hadoop.mapred.TaskInProgress> list, TaskTrackerStatus ttStatus, double avgProgress, long currentTime, boolean shouldRemove)
          Find a speculative task
 int finishedMaps()
           
 int finishedReduces()
           
 boolean getCounters(Counters result)
          Returns the total job counters, by adding together the job, the map and the reduce counters.
 long getFinishTime()
           
 Counters getJobCounters()
          Returns the job-level counters.
 JobID getJobID()
           
 String getJobSubmitHostAddress()
           
 String getJobSubmitHostName()
           
 long getLaunchTime()
           
 boolean getMapCounters(Counters counters)
          Returns map phase counters by summing over all map tasks in progress.
 int getNumReservedTaskTrackersForMaps()
           
 int getNumReservedTaskTrackersForReduces()
           
 long getNumSchedulingOpportunities()
           
 int getNumSlotsPerTask(TaskType taskType)
           
 JobPriority getPriority()
           
 JobProfile getProfile()
           
 org.apache.hadoop.mapred.QueueMetrics getQueueMetrics()
          Get the QueueMetrics object associated with this job
 boolean getReduceCounters(Counters counters)
          Returns map phase counters by summing over all map tasks in progress.
 Object getSchedulingInfo()
           
 long getStartTime()
           
 JobStatus getStatus()
           
 TaskCompletionEvent[] getTaskCompletionEvents(int fromEventId, int maxEvents)
           
 org.apache.hadoop.mapred.TaskInProgress getTaskInProgress(TaskID tipid)
          Return the TaskInProgress that matches the tipid.
 String getUser()
          Get the user for the job
 boolean inited()
          Check if the job has been initialized.
 void initTasks()
          Construct the splits, etc.
 void kill()
          Kill the job and all its component tasks.
 Task obtainJobCleanupTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts, boolean isMapSlot)
          Return a CleanupTask, if appropriate, to run on the given tasktracker
 Task obtainJobSetupTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts, boolean isMapSlot)
          Return a SetupTask, if appropriate, to run on the given tasktracker
 Task obtainNewMapTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts)
          Return a MapTask, if appropriate, to run on the given tasktracker
 Task obtainNewMapTaskCommon(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts, int maxCacheLevel)
          Return a MapTask with locality level that smaller or equal than a given locality level to tasktracker.
 Task obtainNewNodeLocalMapTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts)
           
 Task obtainNewNodeOrRackLocalMapTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts)
           
 Task obtainNewNonLocalMapTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts)
           
 Task obtainNewReduceTask(TaskTrackerStatus tts, int clusterSize, int numUniqueHosts)
          Return a ReduceTask, if appropriate, to run on the given tasktracker.
 Task obtainTaskCleanupTask(TaskTrackerStatus tts, boolean isMapSlot)
           
 void overrideSchedulingOpportunities()
           
 int pendingMaps()
           
 int pendingReduces()
           
 Vector<org.apache.hadoop.mapred.TaskInProgress> reportCleanupTIPs(boolean shouldBeComplete)
          Return a vector of cleanup TaskInProgress objects
 Vector<org.apache.hadoop.mapred.TaskInProgress> reportSetupTIPs(boolean shouldBeComplete)
          Return a vector of setup TaskInProgress objects
 Vector<org.apache.hadoop.mapred.TaskInProgress> reportTasksInProgress(boolean shouldBeMap, boolean shouldBeComplete)
          Return a vector of completed TaskInProgress objects
 void reserveTaskTracker(TaskTracker taskTracker, TaskType type, int numSlots)
           
 void resetSchedulingOpportunities()
           
 int runningMaps()
           
 int runningReduces()
           
protected  void scheduleMap(org.apache.hadoop.mapred.TaskInProgress tip)
          Adds a map tip to the list of running maps.
 boolean scheduleOffSwitch(int numTaskTrackers)
          Check if we can schedule an off-switch task for this job.
protected  void scheduleReduce(org.apache.hadoop.mapred.TaskInProgress tip)
          Adds a reduce tip to the list of running reduces
 boolean scheduleReduces()
           
 void schedulingOpportunity()
           
 void setPriority(JobPriority priority)
           
 void setSchedulingInfo(Object schedulingInfo)
           
 void unreserveTaskTracker(TaskTracker taskTracker, TaskType type)
           
 void updateTaskStatus(org.apache.hadoop.mapred.TaskInProgress tip, TaskStatus status)
          Assuming JobTracker is locked on entry.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

tokenStorage

protected Credentials tokenStorage
Constructor Detail

JobInProgress

protected JobInProgress(JobID jobid,
                        JobConf conf,
                        JobTracker tracker)
                 throws IOException
Create an almost empty JobInProgress, which can be used only for tests

Throws:
IOException
Method Detail

getQueueMetrics

public org.apache.hadoop.mapred.QueueMetrics getQueueMetrics()
Get the QueueMetrics object associated with this job

Returns:
QueueMetrics

cleanUpMetrics

public void cleanUpMetrics()
Called when the job is complete


inited

public boolean inited()
Check if the job has been initialized.

Returns:
true if the job has been initialized, false otherwise

getUser

public String getUser()
Get the user for the job


initTasks

public void initTasks()
               throws IOException,
                      org.apache.hadoop.mapred.JobInProgress.KillInterruptedException,
                      UnknownHostException
Construct the splits, etc. This is invoked from an async thread so that split-computation doesn't block anyone.

Throws:
IOException
org.apache.hadoop.mapred.JobInProgress.KillInterruptedException
UnknownHostException

getProfile

public JobProfile getProfile()

getStatus

public JobStatus getStatus()

getLaunchTime

public long getLaunchTime()

getStartTime

public long getStartTime()

getFinishTime

public long getFinishTime()

desiredMaps

public int desiredMaps()

finishedMaps

public int finishedMaps()

desiredReduces

public int desiredReduces()

runningMaps

public int runningMaps()

runningReduces

public int runningReduces()

finishedReduces

public int finishedReduces()

pendingMaps

public int pendingMaps()

pendingReduces

public int pendingReduces()

desiredTasks

public int desiredTasks()
Return total number of map and reduce tasks desired by the job.

Returns:
total number of map and reduce tasks desired by the job

getNumSlotsPerTask

public int getNumSlotsPerTask(TaskType taskType)

getPriority

public JobPriority getPriority()

setPriority

public void setPriority(JobPriority priority)

reportTasksInProgress

public Vector<org.apache.hadoop.mapred.TaskInProgress> reportTasksInProgress(boolean shouldBeMap,
                                                                             boolean shouldBeComplete)
Return a vector of completed TaskInProgress objects


reportCleanupTIPs

public Vector<org.apache.hadoop.mapred.TaskInProgress> reportCleanupTIPs(boolean shouldBeComplete)
Return a vector of cleanup TaskInProgress objects


reportSetupTIPs

public Vector<org.apache.hadoop.mapred.TaskInProgress> reportSetupTIPs(boolean shouldBeComplete)
Return a vector of setup TaskInProgress objects


updateTaskStatus

public void updateTaskStatus(org.apache.hadoop.mapred.TaskInProgress tip,
                             TaskStatus status)
Assuming JobTracker is locked on entry.


getJobCounters

public Counters getJobCounters()
Returns the job-level counters.

Returns:
the job-level counters.

getMapCounters

public boolean getMapCounters(Counters counters)
Returns map phase counters by summing over all map tasks in progress. This method returns true if counters are within limit or false.


getReduceCounters

public boolean getReduceCounters(Counters counters)
Returns map phase counters by summing over all map tasks in progress. This method returns true if counters are within limits and false otherwise.


getCounters

public boolean getCounters(Counters result)
Returns the total job counters, by adding together the job, the map and the reduce counters. This method returns true if counters are within limits and false otherwise.


obtainNewMapTask

public Task obtainNewMapTask(TaskTrackerStatus tts,
                             int clusterSize,
                             int numUniqueHosts)
                      throws IOException
Return a MapTask, if appropriate, to run on the given tasktracker

Throws:
IOException

obtainNewMapTaskCommon

public Task obtainNewMapTaskCommon(TaskTrackerStatus tts,
                                   int clusterSize,
                                   int numUniqueHosts,
                                   int maxCacheLevel)
                            throws IOException
Return a MapTask with locality level that smaller or equal than a given locality level to tasktracker.

Parameters:
tts - The task tracker that is asking for a task
clusterSize - The number of task trackers in the cluster
numUniqueHosts - The number of hosts that run task trackers
avgProgress - The average progress of this kind of task in this job
maxCacheLevel - The maximum topology level until which to schedule maps.
Returns:
the index in tasks of the selected task (or -1 for no task)
Throws:
IOException

obtainTaskCleanupTask

public Task obtainTaskCleanupTask(TaskTrackerStatus tts,
                                  boolean isMapSlot)
                           throws IOException
Throws:
IOException

obtainNewNodeLocalMapTask

public Task obtainNewNodeLocalMapTask(TaskTrackerStatus tts,
                                      int clusterSize,
                                      int numUniqueHosts)
                               throws IOException
Throws:
IOException

obtainNewNodeOrRackLocalMapTask

public Task obtainNewNodeOrRackLocalMapTask(TaskTrackerStatus tts,
                                            int clusterSize,
                                            int numUniqueHosts)
                                     throws IOException
Throws:
IOException

obtainNewNonLocalMapTask

public Task obtainNewNonLocalMapTask(TaskTrackerStatus tts,
                                     int clusterSize,
                                     int numUniqueHosts)
                              throws IOException
Throws:
IOException

schedulingOpportunity

public void schedulingOpportunity()

resetSchedulingOpportunities

public void resetSchedulingOpportunities()

getNumSchedulingOpportunities

public long getNumSchedulingOpportunities()

overrideSchedulingOpportunities

public void overrideSchedulingOpportunities()

scheduleOffSwitch

public boolean scheduleOffSwitch(int numTaskTrackers)
Check if we can schedule an off-switch task for this job.

Parameters:
numTaskTrackers - number of tasktrackers
Returns:
true if we can schedule off-switch, false otherwise We check the number of missed opportunities for the job. If it has 'waited' long enough we go ahead and schedule.

obtainJobCleanupTask

public Task obtainJobCleanupTask(TaskTrackerStatus tts,
                                 int clusterSize,
                                 int numUniqueHosts,
                                 boolean isMapSlot)
                          throws IOException
Return a CleanupTask, if appropriate, to run on the given tasktracker

Throws:
IOException

obtainJobSetupTask

public Task obtainJobSetupTask(TaskTrackerStatus tts,
                               int clusterSize,
                               int numUniqueHosts,
                               boolean isMapSlot)
                        throws IOException
Return a SetupTask, if appropriate, to run on the given tasktracker

Throws:
IOException

scheduleReduces

public boolean scheduleReduces()

obtainNewReduceTask

public Task obtainNewReduceTask(TaskTrackerStatus tts,
                                int clusterSize,
                                int numUniqueHosts)
                         throws IOException
Return a ReduceTask, if appropriate, to run on the given tasktracker. We don't have cache-sensitivity for reduce tasks, as they work on temporary MapRed files.

Throws:
IOException

reserveTaskTracker

public void reserveTaskTracker(TaskTracker taskTracker,
                               TaskType type,
                               int numSlots)

unreserveTaskTracker

public void unreserveTaskTracker(TaskTracker taskTracker,
                                 TaskType type)

getNumReservedTaskTrackersForMaps

public int getNumReservedTaskTrackersForMaps()

getNumReservedTaskTrackersForReduces

public int getNumReservedTaskTrackersForReduces()

scheduleMap

protected void scheduleMap(org.apache.hadoop.mapred.TaskInProgress tip)
Adds a map tip to the list of running maps.

Parameters:
tip - the tip that needs to be scheduled as running

scheduleReduce

protected void scheduleReduce(org.apache.hadoop.mapred.TaskInProgress tip)
Adds a reduce tip to the list of running reduces

Parameters:
tip - the tip that needs to be scheduled as running

findSpeculativeTask

protected org.apache.hadoop.mapred.TaskInProgress findSpeculativeTask(Collection<org.apache.hadoop.mapred.TaskInProgress> list,
                                                                      TaskTrackerStatus ttStatus,
                                                                      double avgProgress,
                                                                      long currentTime,
                                                                      boolean shouldRemove)
Find a speculative task

Parameters:
list - a list of tips
ttStatus - status of the tracker that has requested a tip
avgProgress - the average progress for speculation
currentTime - current time in milliseconds
shouldRemove - whether to remove the tips
Returns:
a tip that can be speculated on the tracker

completedTask

public boolean completedTask(org.apache.hadoop.mapred.TaskInProgress tip,
                             TaskStatus status)
A taskid assigned to this JobInProgress has reported in successfully.


kill

public void kill()
Kill the job and all its component tasks. This method should be called from jobtracker and should return fast as it locks the jobtracker.


failedTask

public void failedTask(org.apache.hadoop.mapred.TaskInProgress tip,
                       TaskAttemptID taskid,
                       String reason,
                       TaskStatus.Phase phase,
                       TaskStatus.State state,
                       String trackerName)
Fail a task with a given reason, but without a status object. Assuming JobTracker is locked on entry.

Parameters:
tip - The task's tip
taskid - The task id
reason - The reason that the task failed
trackerName - The task tracker the task failed on

getTaskInProgress

public org.apache.hadoop.mapred.TaskInProgress getTaskInProgress(TaskID tipid)
Return the TaskInProgress that matches the tipid.


findFinishedMap

public TaskStatus findFinishedMap(int mapId)
Find the details of someplace where a map has finished

Parameters:
mapId - the id of the map
Returns:
the task status of the completed task

getTaskCompletionEvents

public TaskCompletionEvent[] getTaskCompletionEvents(int fromEventId,
                                                     int maxEvents)

getJobID

public JobID getJobID()
Returns:
The JobID of this JobInProgress.

getJobSubmitHostName

public String getJobSubmitHostName()
Returns:
submitHostName of this JobInProgress.

getJobSubmitHostAddress

public String getJobSubmitHostAddress()
Returns:
submitHostAddress of this JobInProgress.

getSchedulingInfo

public Object getSchedulingInfo()

setSchedulingInfo

public void setSchedulingInfo(Object schedulingInfo)


Copyright © 2009 The Apache Software Foundation