001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.mapreduce.lib.input;
020
021 import java.io.IOException;
022 import java.io.DataInput;
023 import java.io.DataOutput;
024
025 import org.apache.hadoop.mapred.SplitLocationInfo;
026 import org.apache.hadoop.mapreduce.InputFormat;
027 import org.apache.hadoop.mapreduce.InputSplit;
028 import org.apache.hadoop.mapreduce.TaskAttemptContext;
029 import org.apache.hadoop.classification.InterfaceAudience;
030 import org.apache.hadoop.classification.InterfaceStability;
031 import org.apache.hadoop.classification.InterfaceStability.Evolving;
032 import org.apache.hadoop.fs.Path;
033 import org.apache.hadoop.io.Text;
034 import org.apache.hadoop.io.Writable;
035
036 /** A section of an input file. Returned by {@link
037 * InputFormat#getSplits(JobContext)} and passed to
038 * {@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}. */
039 @InterfaceAudience.Public
040 @InterfaceStability.Stable
041 public class FileSplit extends InputSplit implements Writable {
042 private Path file;
043 private long start;
044 private long length;
045 private String[] hosts;
046 private SplitLocationInfo[] hostInfos;
047
048 public FileSplit() {}
049
050 /** Constructs a split with host information
051 *
052 * @param file the file name
053 * @param start the position of the first byte in the file to process
054 * @param length the number of bytes in the file to process
055 * @param hosts the list of hosts containing the block, possibly null
056 */
057 public FileSplit(Path file, long start, long length, String[] hosts) {
058 this.file = file;
059 this.start = start;
060 this.length = length;
061 this.hosts = hosts;
062 }
063
064 /** Constructs a split with host and cached-blocks information
065 *
066 * @param file the file name
067 * @param start the position of the first byte in the file to process
068 * @param length the number of bytes in the file to process
069 * @param hosts the list of hosts containing the block
070 * @param inMemoryHosts the list of hosts containing the block in memory
071 */
072 public FileSplit(Path file, long start, long length, String[] hosts,
073 String[] inMemoryHosts) {
074 this(file, start, length, hosts);
075 hostInfos = new SplitLocationInfo[hosts.length];
076 for (int i = 0; i < hosts.length; i++) {
077 // because N will be tiny, scanning is probably faster than a HashSet
078 boolean inMemory = false;
079 for (String inMemoryHost : inMemoryHosts) {
080 if (inMemoryHost.equals(hosts[i])) {
081 inMemory = true;
082 break;
083 }
084 }
085 hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory);
086 }
087 }
088
089 /** The file containing this split's data. */
090 public Path getPath() { return file; }
091
092 /** The position of the first byte in the file to process. */
093 public long getStart() { return start; }
094
095 /** The number of bytes in the file to process. */
096 @Override
097 public long getLength() { return length; }
098
099 @Override
100 public String toString() { return file + ":" + start + "+" + length; }
101
102 ////////////////////////////////////////////
103 // Writable methods
104 ////////////////////////////////////////////
105
106 @Override
107 public void write(DataOutput out) throws IOException {
108 Text.writeString(out, file.toString());
109 out.writeLong(start);
110 out.writeLong(length);
111 }
112
113 @Override
114 public void readFields(DataInput in) throws IOException {
115 file = new Path(Text.readString(in));
116 start = in.readLong();
117 length = in.readLong();
118 hosts = null;
119 }
120
121 @Override
122 public String[] getLocations() throws IOException {
123 if (this.hosts == null) {
124 return new String[]{};
125 } else {
126 return this.hosts;
127 }
128 }
129
130 @Override
131 @Evolving
132 public SplitLocationInfo[] getLocationInfo() throws IOException {
133 return hostInfos;
134 }
135 }