001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.mapreduce; 020 021 import java.io.IOException; 022 023 import org.apache.hadoop.classification.InterfaceAudience; 024 import org.apache.hadoop.classification.InterfaceStability; 025 import org.apache.hadoop.classification.InterfaceStability.Evolving; 026 import org.apache.hadoop.mapred.SplitLocationInfo; 027 import org.apache.hadoop.mapreduce.InputFormat; 028 import org.apache.hadoop.mapreduce.Mapper; 029 import org.apache.hadoop.mapreduce.RecordReader; 030 031 /** 032 * <code>InputSplit</code> represents the data to be processed by an 033 * individual {@link Mapper}. 034 * 035 * <p>Typically, it presents a byte-oriented view on the input and is the 036 * responsibility of {@link RecordReader} of the job to process this and present 037 * a record-oriented view. 038 * 039 * @see InputFormat 040 * @see RecordReader 041 */ 042 @InterfaceAudience.Public 043 @InterfaceStability.Stable 044 public abstract class InputSplit { 045 /** 046 * Get the size of the split, so that the input splits can be sorted by size. 047 * @return the number of bytes in the split 048 * @throws IOException 049 * @throws InterruptedException 050 */ 051 public abstract long getLength() throws IOException, InterruptedException; 052 053 /** 054 * Get the list of nodes by name where the data for the split would be local. 055 * The locations do not need to be serialized. 056 * 057 * @return a new array of the node nodes. 058 * @throws IOException 059 * @throws InterruptedException 060 */ 061 public abstract 062 String[] getLocations() throws IOException, InterruptedException; 063 064 /** 065 * Gets info about which nodes the input split is stored on and how it is 066 * stored at each location. 067 * 068 * @return list of <code>SplitLocationInfo</code>s describing how the split 069 * data is stored at each location. A null value indicates that all the 070 * locations have the data stored on disk. 071 * @throws IOException 072 */ 073 @Evolving 074 public SplitLocationInfo[] getLocationInfo() throws IOException { 075 return null; 076 } 077 }