001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.io.compress;
020    
021    import java.io.IOException;
022    
023    import org.apache.hadoop.classification.InterfaceAudience;
024    import org.apache.hadoop.classification.InterfaceStability;
025    
026    /**
027     * Specification of a stream-based 'de-compressor' which can be  
028     * plugged into a {@link CompressionInputStream} to compress data.
029     * This is modelled after {@link java.util.zip.Inflater}
030     * 
031     */
032    @InterfaceAudience.Public
033    @InterfaceStability.Evolving
034    public interface Decompressor {
035      /**
036       * Sets input data for decompression. 
037       * This should be called if and only if {@link #needsInput()} returns 
038       * <code>true</code> indicating that more input data is required.
039       * (Both native and non-native versions of various Decompressors require
040       * that the data passed in via <code>b[]</code> remain unmodified until
041       * the caller is explicitly notified--via {@link #needsInput()}--that the
042       * buffer may be safely modified.  With this requirement, an extra
043       * buffer-copy can be avoided.)
044       * 
045       * @param b Input data
046       * @param off Start offset
047       * @param len Length
048       */
049      public void setInput(byte[] b, int off, int len);
050      
051      /**
052       * Returns <code>true</code> if the input data buffer is empty and 
053       * {@link #setInput(byte[], int, int)} should be called to
054       * provide more input. 
055       * 
056       * @return <code>true</code> if the input data buffer is empty and 
057       * {@link #setInput(byte[], int, int)} should be called in
058       * order to provide more input.
059       */
060      public boolean needsInput();
061      
062      /**
063       * Sets preset dictionary for compression. A preset dictionary
064       * is used when the history buffer can be predetermined. 
065       *
066       * @param b Dictionary data bytes
067       * @param off Start offset
068       * @param len Length
069       */
070      public void setDictionary(byte[] b, int off, int len);
071      
072      /**
073       * Returns <code>true</code> if a preset dictionary is needed for decompression.
074       * @return <code>true</code> if a preset dictionary is needed for decompression
075       */
076      public boolean needsDictionary();
077    
078      /**
079       * Returns <code>true</code> if the end of the decompressed 
080       * data output stream has been reached. Indicates a concatenated data stream
081       * when finished() returns <code>true</code> and {@link #getRemaining()}
082       * returns a positive value. finished() will be reset with the
083       * {@link #reset()} method.
084       * @return <code>true</code> if the end of the decompressed
085       * data output stream has been reached.
086       */
087      public boolean finished();
088      
089      /**
090       * Fills specified buffer with uncompressed data. Returns actual number
091       * of bytes of uncompressed data. A return value of 0 indicates that
092       * {@link #needsInput()} should be called in order to determine if more
093       * input data is required.
094       * 
095       * @param b Buffer for the compressed data
096       * @param off Start offset of the data
097       * @param len Size of the buffer
098       * @return The actual number of bytes of compressed data.
099       * @throws IOException
100       */
101      public int decompress(byte[] b, int off, int len) throws IOException;
102    
103      /**
104       * Returns the number of bytes remaining in the compressed data buffer.
105       * Indicates a concatenated data stream if {@link #finished()} returns
106       * <code>true</code> and getRemaining() returns a positive value. If
107       * {@link #finished()} returns <code>true</code> and getRemaining() returns
108       * a zero value, indicates that the end of data stream has been reached and
109       * is not a concatenated data stream. 
110       * @return The number of bytes remaining in the compressed data buffer.
111       */
112      public int getRemaining();
113    
114      /**
115       * Resets decompressor and input and output buffers so that a new set of
116       * input data can be processed. If {@link #finished()}} returns
117       * <code>true</code> and {@link #getRemaining()} returns a positive value,
118       * reset() is called before processing of the next data stream in the
119       * concatenated data stream. {@link #finished()} will be reset and will
120       * return <code>false</code> when reset() is called.
121       */
122      public void reset();
123    
124      /**
125       * Closes the decompressor and discards any unprocessed input.
126       */
127      public void end(); 
128    }