001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.mapreduce.lib.join;
020
021 import java.io.DataInput;
022 import java.io.DataInputStream;
023 import java.io.DataOutput;
024 import java.io.DataOutputStream;
025 import java.io.IOException;
026 import java.util.HashSet;
027
028 import org.apache.hadoop.classification.InterfaceAudience;
029 import org.apache.hadoop.classification.InterfaceStability;
030 import org.apache.hadoop.conf.Configuration;
031 import org.apache.hadoop.io.Text;
032 import org.apache.hadoop.io.Writable;
033 import org.apache.hadoop.io.WritableUtils;
034 import org.apache.hadoop.io.serializer.*;
035 import org.apache.hadoop.mapreduce.InputSplit;
036 import org.apache.hadoop.util.ReflectionUtils;
037
038 /**
039 * This InputSplit contains a set of child InputSplits. Any InputSplit inserted
040 * into this collection must have a public default constructor.
041 */
042 @InterfaceAudience.Public
043 @InterfaceStability.Stable
044 public class CompositeInputSplit extends InputSplit implements Writable {
045
046 private int fill = 0;
047 private long totsize = 0L;
048 private InputSplit[] splits;
049 private Configuration conf = new Configuration();
050
051 public CompositeInputSplit() { }
052
053 public CompositeInputSplit(int capacity) {
054 splits = new InputSplit[capacity];
055 }
056
057 /**
058 * Add an InputSplit to this collection.
059 * @throws IOException If capacity was not specified during construction
060 * or if capacity has been reached.
061 */
062 public void add(InputSplit s) throws IOException, InterruptedException {
063 if (null == splits) {
064 throw new IOException("Uninitialized InputSplit");
065 }
066 if (fill == splits.length) {
067 throw new IOException("Too many splits");
068 }
069 splits[fill++] = s;
070 totsize += s.getLength();
071 }
072
073 /**
074 * Get ith child InputSplit.
075 */
076 public InputSplit get(int i) {
077 return splits[i];
078 }
079
080 /**
081 * Return the aggregate length of all child InputSplits currently added.
082 */
083 public long getLength() throws IOException {
084 return totsize;
085 }
086
087 /**
088 * Get the length of ith child InputSplit.
089 */
090 public long getLength(int i) throws IOException, InterruptedException {
091 return splits[i].getLength();
092 }
093
094 /**
095 * Collect a set of hosts from all child InputSplits.
096 */
097 public String[] getLocations() throws IOException, InterruptedException {
098 HashSet<String> hosts = new HashSet<String>();
099 for (InputSplit s : splits) {
100 String[] hints = s.getLocations();
101 if (hints != null && hints.length > 0) {
102 for (String host : hints) {
103 hosts.add(host);
104 }
105 }
106 }
107 return hosts.toArray(new String[hosts.size()]);
108 }
109
110 /**
111 * getLocations from ith InputSplit.
112 */
113 public String[] getLocation(int i) throws IOException, InterruptedException {
114 return splits[i].getLocations();
115 }
116
117 /**
118 * Write splits in the following format.
119 * {@code
120 * <count><class1><class2>...<classn><split1><split2>...<splitn>
121 * }
122 */
123 @SuppressWarnings("unchecked")
124 public void write(DataOutput out) throws IOException {
125 WritableUtils.writeVInt(out, splits.length);
126 for (InputSplit s : splits) {
127 Text.writeString(out, s.getClass().getName());
128 }
129 for (InputSplit s : splits) {
130 SerializationFactory factory = new SerializationFactory(conf);
131 Serializer serializer =
132 factory.getSerializer(s.getClass());
133 serializer.open((DataOutputStream)out);
134 serializer.serialize(s);
135 }
136 }
137
138 /**
139 * {@inheritDoc}
140 * @throws IOException If the child InputSplit cannot be read, typically
141 * for failing access checks.
142 */
143 @SuppressWarnings("unchecked") // Generic array assignment
144 public void readFields(DataInput in) throws IOException {
145 int card = WritableUtils.readVInt(in);
146 if (splits == null || splits.length != card) {
147 splits = new InputSplit[card];
148 }
149 Class<? extends InputSplit>[] cls = new Class[card];
150 try {
151 for (int i = 0; i < card; ++i) {
152 cls[i] =
153 Class.forName(Text.readString(in)).asSubclass(InputSplit.class);
154 }
155 for (int i = 0; i < card; ++i) {
156 splits[i] = ReflectionUtils.newInstance(cls[i], null);
157 SerializationFactory factory = new SerializationFactory(conf);
158 Deserializer deserializer = factory.getDeserializer(cls[i]);
159 deserializer.open((DataInputStream)in);
160 splits[i] = (InputSplit)deserializer.deserialize(splits[i]);
161 }
162 } catch (ClassNotFoundException e) {
163 throw new IOException("Failed split init", e);
164 }
165 }
166 }