001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.mapreduce.lib.input; 020 021import java.io.IOException; 022import java.io.DataInput; 023import java.io.DataOutput; 024 025import org.apache.hadoop.mapred.SplitLocationInfo; 026import org.apache.hadoop.mapreduce.InputFormat; 027import org.apache.hadoop.mapreduce.InputSplit; 028import org.apache.hadoop.mapreduce.TaskAttemptContext; 029import org.apache.hadoop.classification.InterfaceAudience; 030import org.apache.hadoop.classification.InterfaceStability; 031import org.apache.hadoop.classification.InterfaceStability.Evolving; 032import org.apache.hadoop.fs.Path; 033import org.apache.hadoop.io.Text; 034import org.apache.hadoop.io.Writable; 035 036/** A section of an input file. Returned by {@link 037 * InputFormat#getSplits(JobContext)} and passed to 038 * {@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}. */ 039@InterfaceAudience.Public 040@InterfaceStability.Stable 041public class FileSplit extends InputSplit implements Writable { 042 private Path file; 043 private long start; 044 private long length; 045 private String[] hosts; 046 private SplitLocationInfo[] hostInfos; 047 048 public FileSplit() {} 049 050 /** Constructs a split with host information 051 * 052 * @param file the file name 053 * @param start the position of the first byte in the file to process 054 * @param length the number of bytes in the file to process 055 * @param hosts the list of hosts containing the block, possibly null 056 */ 057 public FileSplit(Path file, long start, long length, String[] hosts) { 058 this.file = file; 059 this.start = start; 060 this.length = length; 061 this.hosts = hosts; 062 } 063 064 /** Constructs a split with host and cached-blocks information 065 * 066 * @param file the file name 067 * @param start the position of the first byte in the file to process 068 * @param length the number of bytes in the file to process 069 * @param hosts the list of hosts containing the block 070 * @param inMemoryHosts the list of hosts containing the block in memory 071 */ 072 public FileSplit(Path file, long start, long length, String[] hosts, 073 String[] inMemoryHosts) { 074 this(file, start, length, hosts); 075 hostInfos = new SplitLocationInfo[hosts.length]; 076 for (int i = 0; i < hosts.length; i++) { 077 // because N will be tiny, scanning is probably faster than a HashSet 078 boolean inMemory = false; 079 for (String inMemoryHost : inMemoryHosts) { 080 if (inMemoryHost.equals(hosts[i])) { 081 inMemory = true; 082 break; 083 } 084 } 085 hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory); 086 } 087 } 088 089 /** The file containing this split's data. */ 090 public Path getPath() { return file; } 091 092 /** The position of the first byte in the file to process. */ 093 public long getStart() { return start; } 094 095 /** The number of bytes in the file to process. */ 096 @Override 097 public long getLength() { return length; } 098 099 @Override 100 public String toString() { return file + ":" + start + "+" + length; } 101 102 //////////////////////////////////////////// 103 // Writable methods 104 //////////////////////////////////////////// 105 106 @Override 107 public void write(DataOutput out) throws IOException { 108 Text.writeString(out, file.toString()); 109 out.writeLong(start); 110 out.writeLong(length); 111 } 112 113 @Override 114 public void readFields(DataInput in) throws IOException { 115 file = new Path(Text.readString(in)); 116 start = in.readLong(); 117 length = in.readLong(); 118 hosts = null; 119 } 120 121 @Override 122 public String[] getLocations() throws IOException { 123 if (this.hosts == null) { 124 return new String[]{}; 125 } else { 126 return this.hosts; 127 } 128 } 129 130 @Override 131 @Evolving 132 public SplitLocationInfo[] getLocationInfo() throws IOException { 133 return hostInfos; 134 } 135}