001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.io;
020    
021    import java.io.IOException;
022    import java.io.DataInput;
023    import java.io.DataOutput;
024    import java.io.DataOutputStream;
025    import java.io.DataInputStream;
026    import java.io.ByteArrayOutputStream;
027    import java.io.ByteArrayInputStream;
028    import java.util.zip.Deflater;
029    import java.util.zip.DeflaterOutputStream;
030    import java.util.zip.InflaterInputStream;
031    
032    import org.apache.hadoop.classification.InterfaceAudience;
033    import org.apache.hadoop.classification.InterfaceStability;
034    
035    /** A base-class for Writables which store themselves compressed and lazily
036     * inflate on field access.  This is useful for large objects whose fields are
037     * not be altered during a map or reduce operation: leaving the field data
038     * compressed makes copying the instance from one file to another much
039     * faster. */
040    @InterfaceAudience.Public
041    @InterfaceStability.Stable
042    public abstract class CompressedWritable implements Writable {
043      // if non-null, the compressed field data of this instance.
044      private byte[] compressed;
045    
046      public CompressedWritable() {}
047    
048      @Override
049      public final void readFields(DataInput in) throws IOException {
050        compressed = new byte[in.readInt()];
051        in.readFully(compressed, 0, compressed.length);
052      }
053    
054      /** Must be called by all methods which access fields to ensure that the data
055       * has been uncompressed. */
056      protected void ensureInflated() {
057        if (compressed != null) {
058          try {
059            ByteArrayInputStream deflated = new ByteArrayInputStream(compressed);
060            DataInput inflater =
061              new DataInputStream(new InflaterInputStream(deflated));
062            readFieldsCompressed(inflater);
063            compressed = null;
064          } catch (IOException e) {
065            throw new RuntimeException(e);
066          }
067        }
068      }
069    
070      /** Subclasses implement this instead of {@link #readFields(DataInput)}. */
071      protected abstract void readFieldsCompressed(DataInput in)
072        throws IOException;
073    
074      @Override
075      public final void write(DataOutput out) throws IOException {
076        if (compressed == null) {
077          ByteArrayOutputStream deflated = new ByteArrayOutputStream();
078          Deflater deflater = new Deflater(Deflater.BEST_SPEED);
079          DataOutputStream dout =
080            new DataOutputStream(new DeflaterOutputStream(deflated, deflater));
081          writeCompressed(dout);
082          dout.close();
083          deflater.end();
084          compressed = deflated.toByteArray();
085        }
086        out.writeInt(compressed.length);
087        out.write(compressed);
088      }
089    
090      /** Subclasses implement this instead of {@link #write(DataOutput)}. */
091      protected abstract void writeCompressed(DataOutput out) throws IOException;
092    
093    }