001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.mapreduce.lib.input;
020    
021    import java.io.IOException;
022    import java.io.DataInput;
023    import java.io.DataOutput;
024    
025    import org.apache.hadoop.mapred.SplitLocationInfo;
026    import org.apache.hadoop.mapreduce.InputFormat;
027    import org.apache.hadoop.mapreduce.InputSplit;
028    import org.apache.hadoop.mapreduce.TaskAttemptContext;
029    import org.apache.hadoop.classification.InterfaceAudience;
030    import org.apache.hadoop.classification.InterfaceStability;
031    import org.apache.hadoop.classification.InterfaceStability.Evolving;
032    import org.apache.hadoop.fs.Path;
033    import org.apache.hadoop.io.Text;
034    import org.apache.hadoop.io.Writable;
035    
036    /** A section of an input file.  Returned by {@link
037     * InputFormat#getSplits(JobContext)} and passed to
038     * {@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}. */
039    @InterfaceAudience.Public
040    @InterfaceStability.Stable
041    public class FileSplit extends InputSplit implements Writable {
042      private Path file;
043      private long start;
044      private long length;
045      private String[] hosts;
046      private SplitLocationInfo[] hostInfos;
047    
048      public FileSplit() {}
049    
050      /** Constructs a split with host information
051       *
052       * @param file the file name
053       * @param start the position of the first byte in the file to process
054       * @param length the number of bytes in the file to process
055       * @param hosts the list of hosts containing the block, possibly null
056       */
057      public FileSplit(Path file, long start, long length, String[] hosts) {
058        this.file = file;
059        this.start = start;
060        this.length = length;
061        this.hosts = hosts;
062      }
063      
064      /** Constructs a split with host and cached-blocks information
065      *
066      * @param file the file name
067      * @param start the position of the first byte in the file to process
068      * @param length the number of bytes in the file to process
069      * @param hosts the list of hosts containing the block
070      * @param inMemoryHosts the list of hosts containing the block in memory
071      */
072     public FileSplit(Path file, long start, long length, String[] hosts,
073         String[] inMemoryHosts) {
074       this(file, start, length, hosts);
075       hostInfos = new SplitLocationInfo[hosts.length];
076       for (int i = 0; i < hosts.length; i++) {
077         // because N will be tiny, scanning is probably faster than a HashSet
078         boolean inMemory = false;
079         for (String inMemoryHost : inMemoryHosts) {
080           if (inMemoryHost.equals(hosts[i])) {
081             inMemory = true;
082             break;
083           }
084         }
085         hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory);
086       }
087     }
088     
089      /** The file containing this split's data. */
090      public Path getPath() { return file; }
091      
092      /** The position of the first byte in the file to process. */
093      public long getStart() { return start; }
094      
095      /** The number of bytes in the file to process. */
096      @Override
097      public long getLength() { return length; }
098    
099      @Override
100      public String toString() { return file + ":" + start + "+" + length; }
101    
102      ////////////////////////////////////////////
103      // Writable methods
104      ////////////////////////////////////////////
105    
106      @Override
107      public void write(DataOutput out) throws IOException {
108        Text.writeString(out, file.toString());
109        out.writeLong(start);
110        out.writeLong(length);
111      }
112    
113      @Override
114      public void readFields(DataInput in) throws IOException {
115        file = new Path(Text.readString(in));
116        start = in.readLong();
117        length = in.readLong();
118        hosts = null;
119      }
120    
121      @Override
122      public String[] getLocations() throws IOException {
123        if (this.hosts == null) {
124          return new String[]{};
125        } else {
126          return this.hosts;
127        }
128      }
129      
130      @Override
131      @Evolving
132      public SplitLocationInfo[] getLocationInfo() throws IOException {
133        return hostInfos;
134      }
135    }