001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.io; 020 021import java.io.IOException; 022import java.io.DataInput; 023import java.io.DataOutput; 024import java.io.DataOutputStream; 025import java.io.DataInputStream; 026import java.io.ByteArrayOutputStream; 027import java.io.ByteArrayInputStream; 028import java.util.zip.Deflater; 029import java.util.zip.DeflaterOutputStream; 030import java.util.zip.InflaterInputStream; 031 032import org.apache.hadoop.classification.InterfaceAudience; 033import org.apache.hadoop.classification.InterfaceStability; 034 035/** A base-class for Writables which store themselves compressed and lazily 036 * inflate on field access. This is useful for large objects whose fields are 037 * not be altered during a map or reduce operation: leaving the field data 038 * compressed makes copying the instance from one file to another much 039 * faster. */ 040@InterfaceAudience.Public 041@InterfaceStability.Stable 042public abstract class CompressedWritable implements Writable { 043 // if non-null, the compressed field data of this instance. 044 private byte[] compressed; 045 046 public CompressedWritable() {} 047 048 public final void readFields(DataInput in) throws IOException { 049 compressed = new byte[in.readInt()]; 050 in.readFully(compressed, 0, compressed.length); 051 } 052 053 /** Must be called by all methods which access fields to ensure that the data 054 * has been uncompressed. */ 055 protected void ensureInflated() { 056 if (compressed != null) { 057 try { 058 ByteArrayInputStream deflated = new ByteArrayInputStream(compressed); 059 DataInput inflater = 060 new DataInputStream(new InflaterInputStream(deflated)); 061 readFieldsCompressed(inflater); 062 compressed = null; 063 } catch (IOException e) { 064 throw new RuntimeException(e); 065 } 066 } 067 } 068 069 /** Subclasses implement this instead of {@link #readFields(DataInput)}. */ 070 protected abstract void readFieldsCompressed(DataInput in) 071 throws IOException; 072 073 public final void write(DataOutput out) throws IOException { 074 if (compressed == null) { 075 ByteArrayOutputStream deflated = new ByteArrayOutputStream(); 076 Deflater deflater = new Deflater(Deflater.BEST_SPEED); 077 DataOutputStream dout = 078 new DataOutputStream(new DeflaterOutputStream(deflated, deflater)); 079 writeCompressed(dout); 080 dout.close(); 081 deflater.end(); 082 compressed = deflated.toByteArray(); 083 } 084 out.writeInt(compressed.length); 085 out.write(compressed); 086 } 087 088 /** Subclasses implement this instead of {@link #write(DataOutput)}. */ 089 protected abstract void writeCompressed(DataOutput out) throws IOException; 090 091}