001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.io.compress;
020
021import java.io.IOException;
022
023import org.apache.hadoop.classification.InterfaceAudience;
024import org.apache.hadoop.classification.InterfaceStability;
025
026/**
027 * Specification of a stream-based 'de-compressor' which can be  
028 * plugged into a {@link CompressionInputStream} to compress data.
029 * This is modelled after {@link java.util.zip.Inflater}
030 * 
031 */
032@InterfaceAudience.Public
033@InterfaceStability.Evolving
034public interface Decompressor {
035  /**
036   * Sets input data for decompression. 
037   * This should be called if and only if {@link #needsInput()} returns 
038   * <code>true</code> indicating that more input data is required.
039   * (Both native and non-native versions of various Decompressors require
040   * that the data passed in via <code>b[]</code> remain unmodified until
041   * the caller is explicitly notified--via {@link #needsInput()}--that the
042   * buffer may be safely modified.  With this requirement, an extra
043   * buffer-copy can be avoided.)
044   * 
045   * @param b Input data
046   * @param off Start offset
047   * @param len Length
048   */
049  public void setInput(byte[] b, int off, int len);
050  
051  /**
052   * Returns <code>true</code> if the input data buffer is empty and 
053   * {@link #setInput(byte[], int, int)} should be called to
054   * provide more input. 
055   * 
056   * @return <code>true</code> if the input data buffer is empty and 
057   * {@link #setInput(byte[], int, int)} should be called in
058   * order to provide more input.
059   */
060  public boolean needsInput();
061  
062  /**
063   * Sets preset dictionary for compression. A preset dictionary
064   * is used when the history buffer can be predetermined. 
065   *
066   * @param b Dictionary data bytes
067   * @param off Start offset
068   * @param len Length
069   */
070  public void setDictionary(byte[] b, int off, int len);
071  
072  /**
073   * Returns <code>true</code> if a preset dictionary is needed for decompression.
074   * @return <code>true</code> if a preset dictionary is needed for decompression
075   */
076  public boolean needsDictionary();
077
078  /**
079   * Returns <code>true</code> if the end of the decompressed 
080   * data output stream has been reached. Indicates a concatenated data stream
081   * when finished() returns <code>true</code> and {@link #getRemaining()}
082   * returns a positive value. finished() will be reset with the
083   * {@link #reset()} method.
084   * @return <code>true</code> if the end of the decompressed
085   * data output stream has been reached.
086   */
087  public boolean finished();
088  
089  /**
090   * Fills specified buffer with uncompressed data. Returns actual number
091   * of bytes of uncompressed data. A return value of 0 indicates that
092   * {@link #needsInput()} should be called in order to determine if more
093   * input data is required.
094   * 
095   * @param b Buffer for the compressed data
096   * @param off Start offset of the data
097   * @param len Size of the buffer
098   * @return The actual number of bytes of compressed data.
099   * @throws IOException
100   */
101  public int decompress(byte[] b, int off, int len) throws IOException;
102
103  /**
104   * Returns the number of bytes remaining in the compressed data buffer.
105   * Indicates a concatenated data stream if {@link #finished()} returns
106   * <code>true</code> and getRemaining() returns a positive value. If
107   * {@link #finished()} returns <code>true</code> and getRemaining() returns
108   * a zero value, indicates that the end of data stream has been reached and
109   * is not a concatenated data stream. 
110   * @return The number of bytes remaining in the compressed data buffer.
111   */
112  public int getRemaining();
113
114  /**
115   * Resets decompressor and input and output buffers so that a new set of
116   * input data can be processed. If {@link #finished()}} returns
117   * <code>true</code> and {@link #getRemaining()} returns a positive value,
118   * reset() is called before processing of the next data stream in the
119   * concatenated data stream. {@link #finished()} will be reset and will
120   * return <code>false</code> when reset() is called.
121   */
122  public void reset();
123
124  /**
125   * Closes the decompressor and discards any unprocessed input.
126   */
127  public void end(); 
128}