001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.io.compress; 020 021 import java.io.IOException; 022 023 import org.apache.hadoop.classification.InterfaceAudience; 024 import org.apache.hadoop.classification.InterfaceStability; 025 import org.apache.hadoop.conf.Configuration; 026 027 /** 028 * Specification of a stream-based 'compressor' which can be 029 * plugged into a {@link CompressionOutputStream} to compress data. 030 * This is modelled after {@link java.util.zip.Deflater} 031 * 032 */ 033 @InterfaceAudience.Public 034 @InterfaceStability.Evolving 035 public interface Compressor { 036 /** 037 * Sets input data for compression. 038 * This should be called whenever #needsInput() returns 039 * <code>true</code> indicating that more input data is required. 040 * 041 * @param b Input data 042 * @param off Start offset 043 * @param len Length 044 */ 045 public void setInput(byte[] b, int off, int len); 046 047 /** 048 * Returns true if the input data buffer is empty and 049 * #setInput() should be called to provide more input. 050 * 051 * @return <code>true</code> if the input data buffer is empty and 052 * #setInput() should be called in order to provide more input. 053 */ 054 public boolean needsInput(); 055 056 /** 057 * Sets preset dictionary for compression. A preset dictionary 058 * is used when the history buffer can be predetermined. 059 * 060 * @param b Dictionary data bytes 061 * @param off Start offset 062 * @param len Length 063 */ 064 public void setDictionary(byte[] b, int off, int len); 065 066 /** 067 * Return number of uncompressed bytes input so far. 068 */ 069 public long getBytesRead(); 070 071 /** 072 * Return number of compressed bytes output so far. 073 */ 074 public long getBytesWritten(); 075 076 /** 077 * When called, indicates that compression should end 078 * with the current contents of the input buffer. 079 */ 080 public void finish(); 081 082 /** 083 * Returns true if the end of the compressed 084 * data output stream has been reached. 085 * @return <code>true</code> if the end of the compressed 086 * data output stream has been reached. 087 */ 088 public boolean finished(); 089 090 /** 091 * Fills specified buffer with compressed data. Returns actual number 092 * of bytes of compressed data. A return value of 0 indicates that 093 * needsInput() should be called in order to determine if more input 094 * data is required. 095 * 096 * @param b Buffer for the compressed data 097 * @param off Start offset of the data 098 * @param len Size of the buffer 099 * @return The actual number of bytes of compressed data. 100 */ 101 public int compress(byte[] b, int off, int len) throws IOException; 102 103 /** 104 * Resets compressor so that a new set of input data can be processed. 105 */ 106 public void reset(); 107 108 /** 109 * Closes the compressor and discards any unprocessed input. 110 */ 111 public void end(); 112 113 /** 114 * Prepare the compressor to be used in a new stream with settings defined in 115 * the given Configuration 116 * 117 * @param conf Configuration from which new setting are fetched 118 */ 119 public void reinit(Configuration conf); 120 }