001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.io;
020
021 import java.io.IOException;
022 import java.io.DataInput;
023 import java.io.DataOutput;
024 import java.io.DataOutputStream;
025 import java.io.DataInputStream;
026 import java.io.ByteArrayOutputStream;
027 import java.io.ByteArrayInputStream;
028 import java.util.zip.Deflater;
029 import java.util.zip.DeflaterOutputStream;
030 import java.util.zip.InflaterInputStream;
031
032 import org.apache.hadoop.classification.InterfaceAudience;
033 import org.apache.hadoop.classification.InterfaceStability;
034
035 /** A base-class for Writables which store themselves compressed and lazily
036 * inflate on field access. This is useful for large objects whose fields are
037 * not be altered during a map or reduce operation: leaving the field data
038 * compressed makes copying the instance from one file to another much
039 * faster. */
040 @InterfaceAudience.Public
041 @InterfaceStability.Stable
042 public abstract class CompressedWritable implements Writable {
043 // if non-null, the compressed field data of this instance.
044 private byte[] compressed;
045
046 public CompressedWritable() {}
047
048 @Override
049 public final void readFields(DataInput in) throws IOException {
050 compressed = new byte[in.readInt()];
051 in.readFully(compressed, 0, compressed.length);
052 }
053
054 /** Must be called by all methods which access fields to ensure that the data
055 * has been uncompressed. */
056 protected void ensureInflated() {
057 if (compressed != null) {
058 try {
059 ByteArrayInputStream deflated = new ByteArrayInputStream(compressed);
060 DataInput inflater =
061 new DataInputStream(new InflaterInputStream(deflated));
062 readFieldsCompressed(inflater);
063 compressed = null;
064 } catch (IOException e) {
065 throw new RuntimeException(e);
066 }
067 }
068 }
069
070 /** Subclasses implement this instead of {@link #readFields(DataInput)}. */
071 protected abstract void readFieldsCompressed(DataInput in)
072 throws IOException;
073
074 @Override
075 public final void write(DataOutput out) throws IOException {
076 if (compressed == null) {
077 ByteArrayOutputStream deflated = new ByteArrayOutputStream();
078 Deflater deflater = new Deflater(Deflater.BEST_SPEED);
079 DataOutputStream dout =
080 new DataOutputStream(new DeflaterOutputStream(deflated, deflater));
081 writeCompressed(dout);
082 dout.close();
083 deflater.end();
084 compressed = deflated.toByteArray();
085 }
086 out.writeInt(compressed.length);
087 out.write(compressed);
088 }
089
090 /** Subclasses implement this instead of {@link #write(DataOutput)}. */
091 protected abstract void writeCompressed(DataOutput out) throws IOException;
092
093 }