001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.mapreduce.lib.input;
020
021import java.io.IOException;
022import java.io.DataInput;
023import java.io.DataOutput;
024
025import org.apache.hadoop.mapred.SplitLocationInfo;
026import org.apache.hadoop.mapreduce.InputFormat;
027import org.apache.hadoop.mapreduce.InputSplit;
028import org.apache.hadoop.mapreduce.TaskAttemptContext;
029import org.apache.hadoop.classification.InterfaceAudience;
030import org.apache.hadoop.classification.InterfaceStability;
031import org.apache.hadoop.classification.InterfaceStability.Evolving;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.io.Text;
034import org.apache.hadoop.io.Writable;
035
036/** A section of an input file.  Returned by {@link
037 * InputFormat#getSplits(JobContext)} and passed to
038 * {@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}. */
039@InterfaceAudience.Public
040@InterfaceStability.Stable
041public class FileSplit extends InputSplit implements Writable {
042  private Path file;
043  private long start;
044  private long length;
045  private String[] hosts;
046  private SplitLocationInfo[] hostInfos;
047
048  public FileSplit() {}
049
050  /** Constructs a split with host information
051   *
052   * @param file the file name
053   * @param start the position of the first byte in the file to process
054   * @param length the number of bytes in the file to process
055   * @param hosts the list of hosts containing the block, possibly null
056   */
057  public FileSplit(Path file, long start, long length, String[] hosts) {
058    this.file = file;
059    this.start = start;
060    this.length = length;
061    this.hosts = hosts;
062  }
063  
064  /** Constructs a split with host and cached-blocks information
065  *
066  * @param file the file name
067  * @param start the position of the first byte in the file to process
068  * @param length the number of bytes in the file to process
069  * @param hosts the list of hosts containing the block
070  * @param inMemoryHosts the list of hosts containing the block in memory
071  */
072 public FileSplit(Path file, long start, long length, String[] hosts,
073     String[] inMemoryHosts) {
074   this(file, start, length, hosts);
075   hostInfos = new SplitLocationInfo[hosts.length];
076   for (int i = 0; i < hosts.length; i++) {
077     // because N will be tiny, scanning is probably faster than a HashSet
078     boolean inMemory = false;
079     for (String inMemoryHost : inMemoryHosts) {
080       if (inMemoryHost.equals(hosts[i])) {
081         inMemory = true;
082         break;
083       }
084     }
085     hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory);
086   }
087 }
088 
089  /** The file containing this split's data. */
090  public Path getPath() { return file; }
091  
092  /** The position of the first byte in the file to process. */
093  public long getStart() { return start; }
094  
095  /** The number of bytes in the file to process. */
096  @Override
097  public long getLength() { return length; }
098
099  @Override
100  public String toString() { return file + ":" + start + "+" + length; }
101
102  ////////////////////////////////////////////
103  // Writable methods
104  ////////////////////////////////////////////
105
106  @Override
107  public void write(DataOutput out) throws IOException {
108    Text.writeString(out, file.toString());
109    out.writeLong(start);
110    out.writeLong(length);
111  }
112
113  @Override
114  public void readFields(DataInput in) throws IOException {
115    file = new Path(Text.readString(in));
116    start = in.readLong();
117    length = in.readLong();
118    hosts = null;
119  }
120
121  @Override
122  public String[] getLocations() throws IOException {
123    if (this.hosts == null) {
124      return new String[]{};
125    } else {
126      return this.hosts;
127    }
128  }
129  
130  @Override
131  @Evolving
132  public SplitLocationInfo[] getLocationInfo() throws IOException {
133    return hostInfos;
134  }
135}