001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.io.compress;
020    
021    import java.io.IOException;
022    
023    import org.apache.hadoop.classification.InterfaceAudience;
024    import org.apache.hadoop.classification.InterfaceStability;
025    import org.apache.hadoop.conf.Configuration;
026    
027    /**
028     * Specification of a stream-based 'compressor' which can be  
029     * plugged into a {@link CompressionOutputStream} to compress data.
030     * This is modelled after {@link java.util.zip.Deflater}
031     * 
032     */
033    @InterfaceAudience.Public
034    @InterfaceStability.Evolving
035    public interface Compressor {
036      /**
037       * Sets input data for compression. 
038       * This should be called whenever #needsInput() returns 
039       * <code>true</code> indicating that more input data is required.
040       * 
041       * @param b Input data
042       * @param off Start offset
043       * @param len Length
044       */
045      public void setInput(byte[] b, int off, int len);
046      
047      /**
048       * Returns true if the input data buffer is empty and 
049       * #setInput() should be called to provide more input. 
050       * 
051       * @return <code>true</code> if the input data buffer is empty and 
052       * #setInput() should be called in order to provide more input.
053       */
054      public boolean needsInput();
055      
056      /**
057       * Sets preset dictionary for compression. A preset dictionary 
058       * is used when the history buffer can be predetermined. 
059       *
060       * @param b Dictionary data bytes
061       * @param off Start offset
062       * @param len Length
063       */
064      public void setDictionary(byte[] b, int off, int len);
065    
066      /**
067       * Return number of uncompressed bytes input so far.
068       */
069      public long getBytesRead();
070    
071      /**
072       * Return number of compressed bytes output so far.
073       */
074      public long getBytesWritten();
075    
076      /**
077       * When called, indicates that compression should end
078       * with the current contents of the input buffer.
079       */
080      public void finish();
081      
082      /**
083       * Returns true if the end of the compressed 
084       * data output stream has been reached.
085       * @return <code>true</code> if the end of the compressed
086       * data output stream has been reached.
087       */
088      public boolean finished();
089      
090      /**
091       * Fills specified buffer with compressed data. Returns actual number
092       * of bytes of compressed data. A return value of 0 indicates that
093       * needsInput() should be called in order to determine if more input
094       * data is required.
095       * 
096       * @param b Buffer for the compressed data
097       * @param off Start offset of the data
098       * @param len Size of the buffer
099       * @return The actual number of bytes of compressed data.
100       */
101      public int compress(byte[] b, int off, int len) throws IOException;
102      
103      /**
104       * Resets compressor so that a new set of input data can be processed.
105       */
106      public void reset();
107      
108      /**
109       * Closes the compressor and discards any unprocessed input.
110       */
111      public void end();
112    
113      /**
114       * Prepare the compressor to be used in a new stream with settings defined in
115       * the given Configuration
116       * 
117       * @param conf Configuration from which new setting are fetched
118       */
119      public void reinit(Configuration conf);
120    }