org.apache.hadoop.streaming
Class StreamJob

java.lang.Object
  extended by org.apache.hadoop.streaming.StreamJob
All Implemented Interfaces:
Configurable, Tool

public class StreamJob
extends Object
implements Tool

All the client-side work happens here. (Jar packaging, MapRed job submission and monitoring)


Field Summary
protected  String additionalConfSpec_
           
protected  String addTaskEnvironment_
           
protected  URI[] archiveURIs
           
protected  String[] argv_
           
protected  String cacheArchives
           
protected  String cacheFiles
           
protected  String comCmd_
           
protected  Configuration config_
           
protected  int debug_
           
protected  boolean detailedUsage_
           
protected  Environment env_
           
protected  URI[] fileURIs
           
protected  boolean hasSimpleInputSpecs_
           
protected  String inputFormatSpec_
           
protected  ArrayList inputSpecs_
           
protected  String inReaderSpec_
           
protected  String ioSpec_
           
protected  String jar_
           
protected  JobClient jc_
           
protected  JobConf jobConf_
           
protected  JobID jobId_
           
protected static String LINK_URI
           
protected  boolean localHadoop_
           
protected static org.apache.commons.logging.Log LOG
           
protected  String mapCmd_
           
protected  String mapDebugSpec_
           
protected  long minRecWrittenToEnableSkip_
           
protected  String numReduceTasksSpec_
           
protected  String output_
           
protected  String outputFormatSpec_
           
protected  boolean outputSingleNode_
           
protected  ArrayList packageFiles_
           
protected  String partitionerSpec_
           
protected  String redCmd_
           
protected  String reduceDebugSpec_
           
protected  RunningJob running_
           
protected  TreeSet seenPrimary_
           
protected  ArrayList shippedCanonFiles_
           
protected  boolean verbose_
           
 
Constructor Summary
StreamJob()
           
StreamJob(String[] argv, boolean mayExit)
          Deprecated. use StreamJob() with ToolRunner or set the Configuration using setConf(Configuration) and run with run(String[]).
 
Method Summary
static JobConf createJob(String[] argv)
          This method creates a streaming job from the given argument list.
 void exitUsage(boolean detailed)
           
 void fail(String message)
           
protected  String getClusterNick()
          Deprecated. 
 Configuration getConf()
          Return the configuration used by this object.
protected  String getHadoopClientHome()
           
protected  String getJobTrackerHostPort()
           
protected  void getURIs(String lcacheArchives, String lcacheFiles)
          get the uris of all the files/caches
 int go()
          Deprecated. use run(String[]) instead.
protected  void init()
           
protected  boolean isLocalHadoop()
           
protected  void jobInfo()
           
protected  void listJobConfProperties()
          Prints out the jobconf properties on stdout when verbose is specified.
protected  void msg(String msg)
           
protected  String packageJobJar()
           
 int run(String[] args)
          Execute the command with the given arguments.
 void setConf(Configuration conf)
          Set the configuration to be used by this object.
protected  void setJobConf()
           
 int submitAndMonitorJob()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

LOG

protected static final org.apache.commons.logging.Log LOG

argv_

protected String[] argv_

verbose_

protected boolean verbose_

detailedUsage_

protected boolean detailedUsage_

debug_

protected int debug_

env_

protected Environment env_

jar_

protected String jar_

localHadoop_

protected boolean localHadoop_

config_

protected Configuration config_

jobConf_

protected JobConf jobConf_

jc_

protected JobClient jc_

inputSpecs_

protected ArrayList inputSpecs_

seenPrimary_

protected TreeSet seenPrimary_

hasSimpleInputSpecs_

protected boolean hasSimpleInputSpecs_

packageFiles_

protected ArrayList packageFiles_

shippedCanonFiles_

protected ArrayList shippedCanonFiles_

output_

protected String output_

mapCmd_

protected String mapCmd_

comCmd_

protected String comCmd_

redCmd_

protected String redCmd_

cacheFiles

protected String cacheFiles

cacheArchives

protected String cacheArchives

fileURIs

protected URI[] fileURIs

archiveURIs

protected URI[] archiveURIs

inReaderSpec_

protected String inReaderSpec_

inputFormatSpec_

protected String inputFormatSpec_

outputFormatSpec_

protected String outputFormatSpec_

partitionerSpec_

protected String partitionerSpec_

numReduceTasksSpec_

protected String numReduceTasksSpec_

additionalConfSpec_

protected String additionalConfSpec_

mapDebugSpec_

protected String mapDebugSpec_

reduceDebugSpec_

protected String reduceDebugSpec_

ioSpec_

protected String ioSpec_

addTaskEnvironment_

protected String addTaskEnvironment_

outputSingleNode_

protected boolean outputSingleNode_

minRecWrittenToEnableSkip_

protected long minRecWrittenToEnableSkip_

running_

protected RunningJob running_

jobId_

protected JobID jobId_

LINK_URI

protected static final String LINK_URI
See Also:
Constant Field Values
Constructor Detail

StreamJob

@Deprecated
public StreamJob(String[] argv,
                            boolean mayExit)
Deprecated. use StreamJob() with ToolRunner or set the Configuration using setConf(Configuration) and run with run(String[]).


StreamJob

public StreamJob()
Method Detail

getConf

public Configuration getConf()
Description copied from interface: Configurable
Return the configuration used by this object.

Specified by:
getConf in interface Configurable

setConf

public void setConf(Configuration conf)
Description copied from interface: Configurable
Set the configuration to be used by this object.

Specified by:
setConf in interface Configurable

run

public int run(String[] args)
        throws Exception
Description copied from interface: Tool
Execute the command with the given arguments.

Specified by:
run in interface Tool
Parameters:
args - command specific arguments.
Returns:
exit code.
Throws:
Exception

createJob

public static JobConf createJob(String[] argv)
                         throws IOException
This method creates a streaming job from the given argument list. The created object can be used and/or submitted to a jobtracker for execution by a job agent such as JobControl

Parameters:
argv - the list args for creating a streaming job
Returns:
the created JobConf object
Throws:
IOException

go

@Deprecated
public int go()
       throws IOException
Deprecated. use run(String[]) instead.

This is the method that actually intializes the job conf and submits the job to the jobtracker

Throws:
IOException

init

protected void init()

msg

protected void msg(String msg)

exitUsage

public void exitUsage(boolean detailed)

fail

public void fail(String message)

getHadoopClientHome

protected String getHadoopClientHome()

isLocalHadoop

protected boolean isLocalHadoop()

getClusterNick

@Deprecated
protected String getClusterNick()
Deprecated. 


packageJobJar

protected String packageJobJar()
                        throws IOException
Returns:
path to the created Jar file or null if no files are necessary.
Throws:
IOException

getURIs

protected void getURIs(String lcacheArchives,
                       String lcacheFiles)
get the uris of all the files/caches


setJobConf

protected void setJobConf()
                   throws IOException
Throws:
IOException

listJobConfProperties

protected void listJobConfProperties()
Prints out the jobconf properties on stdout when verbose is specified.


getJobTrackerHostPort

protected String getJobTrackerHostPort()

jobInfo

protected void jobInfo()

submitAndMonitorJob

public int submitAndMonitorJob()
                        throws IOException
Throws:
IOException


Copyright © 2009 The Apache Software Foundation