001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.io.compress; 020 021import java.io.IOException; 022 023import org.apache.hadoop.classification.InterfaceAudience; 024import org.apache.hadoop.classification.InterfaceStability; 025 026/** 027 * Specification of a stream-based 'de-compressor' which can be 028 * plugged into a {@link CompressionInputStream} to compress data. 029 * This is modelled after {@link java.util.zip.Inflater} 030 * 031 */ 032@InterfaceAudience.Public 033@InterfaceStability.Evolving 034public interface Decompressor { 035 /** 036 * Sets input data for decompression. 037 * This should be called if and only if {@link #needsInput()} returns 038 * <code>true</code> indicating that more input data is required. 039 * (Both native and non-native versions of various Decompressors require 040 * that the data passed in via <code>b[]</code> remain unmodified until 041 * the caller is explicitly notified--via {@link #needsInput()}--that the 042 * buffer may be safely modified. With this requirement, an extra 043 * buffer-copy can be avoided.) 044 * 045 * @param b Input data 046 * @param off Start offset 047 * @param len Length 048 */ 049 public void setInput(byte[] b, int off, int len); 050 051 /** 052 * Returns <code>true</code> if the input data buffer is empty and 053 * {@link #setInput(byte[], int, int)} should be called to 054 * provide more input. 055 * 056 * @return <code>true</code> if the input data buffer is empty and 057 * {@link #setInput(byte[], int, int)} should be called in 058 * order to provide more input. 059 */ 060 public boolean needsInput(); 061 062 /** 063 * Sets preset dictionary for compression. A preset dictionary 064 * is used when the history buffer can be predetermined. 065 * 066 * @param b Dictionary data bytes 067 * @param off Start offset 068 * @param len Length 069 */ 070 public void setDictionary(byte[] b, int off, int len); 071 072 /** 073 * Returns <code>true</code> if a preset dictionary is needed for decompression. 074 * @return <code>true</code> if a preset dictionary is needed for decompression 075 */ 076 public boolean needsDictionary(); 077 078 /** 079 * Returns <code>true</code> if the end of the decompressed 080 * data output stream has been reached. Indicates a concatenated data stream 081 * when finished() returns <code>true</code> and {@link #getRemaining()} 082 * returns a positive value. finished() will be reset with the 083 * {@link #reset()} method. 084 * @return <code>true</code> if the end of the decompressed 085 * data output stream has been reached. 086 */ 087 public boolean finished(); 088 089 /** 090 * Fills specified buffer with uncompressed data. Returns actual number 091 * of bytes of uncompressed data. A return value of 0 indicates that 092 * {@link #needsInput()} should be called in order to determine if more 093 * input data is required. 094 * 095 * @param b Buffer for the compressed data 096 * @param off Start offset of the data 097 * @param len Size of the buffer 098 * @return The actual number of bytes of compressed data. 099 * @throws IOException 100 */ 101 public int decompress(byte[] b, int off, int len) throws IOException; 102 103 /** 104 * Returns the number of bytes remaining in the compressed data buffer. 105 * Indicates a concatenated data stream if {@link #finished()} returns 106 * <code>true</code> and getRemaining() returns a positive value. If 107 * {@link #finished()} returns <code>true</code> and getRemaining() returns 108 * a zero value, indicates that the end of data stream has been reached and 109 * is not a concatenated data stream. 110 * @return The number of bytes remaining in the compressed data buffer. 111 */ 112 public int getRemaining(); 113 114 /** 115 * Resets decompressor and input and output buffers so that a new set of 116 * input data can be processed. If {@link #finished()}} returns 117 * <code>true</code> and {@link #getRemaining()} returns a positive value, 118 * reset() is called before processing of the next data stream in the 119 * concatenated data stream. {@link #finished()} will be reset and will 120 * return <code>false</code> when reset() is called. 121 */ 122 public void reset(); 123 124 /** 125 * Closes the decompressor and discards any unprocessed input. 126 */ 127 public void end(); 128}