001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.io; 020 021import java.io.IOException; 022import java.io.DataInput; 023import java.io.DataOutput; 024import java.io.DataOutputStream; 025import java.io.DataInputStream; 026import java.io.ByteArrayOutputStream; 027import java.io.ByteArrayInputStream; 028import java.util.zip.Deflater; 029import java.util.zip.DeflaterOutputStream; 030import java.util.zip.InflaterInputStream; 031 032import org.apache.hadoop.classification.InterfaceAudience; 033import org.apache.hadoop.classification.InterfaceStability; 034 035/** A base-class for Writables which store themselves compressed and lazily 036 * inflate on field access. This is useful for large objects whose fields are 037 * not be altered during a map or reduce operation: leaving the field data 038 * compressed makes copying the instance from one file to another much 039 * faster. */ 040@InterfaceAudience.Public 041@InterfaceStability.Stable 042public abstract class CompressedWritable implements Writable { 043 // if non-null, the compressed field data of this instance. 044 private byte[] compressed; 045 046 public CompressedWritable() {} 047 048 @Override 049 public final void readFields(DataInput in) throws IOException { 050 compressed = new byte[in.readInt()]; 051 in.readFully(compressed, 0, compressed.length); 052 } 053 054 /** Must be called by all methods which access fields to ensure that the data 055 * has been uncompressed. */ 056 protected void ensureInflated() { 057 if (compressed != null) { 058 try { 059 ByteArrayInputStream deflated = new ByteArrayInputStream(compressed); 060 DataInput inflater = 061 new DataInputStream(new InflaterInputStream(deflated)); 062 readFieldsCompressed(inflater); 063 compressed = null; 064 } catch (IOException e) { 065 throw new RuntimeException(e); 066 } 067 } 068 } 069 070 /** Subclasses implement this instead of {@link #readFields(DataInput)}. */ 071 protected abstract void readFieldsCompressed(DataInput in) 072 throws IOException; 073 074 @Override 075 public final void write(DataOutput out) throws IOException { 076 if (compressed == null) { 077 ByteArrayOutputStream deflated = new ByteArrayOutputStream(); 078 Deflater deflater = new Deflater(Deflater.BEST_SPEED); 079 DataOutputStream dout = 080 new DataOutputStream(new DeflaterOutputStream(deflated, deflater)); 081 writeCompressed(dout); 082 dout.close(); 083 deflater.end(); 084 compressed = deflated.toByteArray(); 085 } 086 out.writeInt(compressed.length); 087 out.write(compressed); 088 } 089 090 /** Subclasses implement this instead of {@link #write(DataOutput)}. */ 091 protected abstract void writeCompressed(DataOutput out) throws IOException; 092 093}