001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.io.compress;
020
021import java.io.IOException;
022
023import org.apache.hadoop.classification.InterfaceAudience;
024import org.apache.hadoop.classification.InterfaceStability;
025import org.apache.hadoop.conf.Configuration;
026
027/**
028 * Specification of a stream-based 'compressor' which can be  
029 * plugged into a {@link CompressionOutputStream} to compress data.
030 * This is modelled after {@link java.util.zip.Deflater}
031 * 
032 */
033@InterfaceAudience.Public
034@InterfaceStability.Evolving
035public interface Compressor {
036  /**
037   * Sets input data for compression. 
038   * This should be called whenever #needsInput() returns 
039   * <code>true</code> indicating that more input data is required.
040   * 
041   * @param b Input data
042   * @param off Start offset
043   * @param len Length
044   */
045  public void setInput(byte[] b, int off, int len);
046  
047  /**
048   * Returns true if the input data buffer is empty and 
049   * #setInput() should be called to provide more input. 
050   * 
051   * @return <code>true</code> if the input data buffer is empty and 
052   * #setInput() should be called in order to provide more input.
053   */
054  public boolean needsInput();
055  
056  /**
057   * Sets preset dictionary for compression. A preset dictionary 
058   * is used when the history buffer can be predetermined. 
059   *
060   * @param b Dictionary data bytes
061   * @param off Start offset
062   * @param len Length
063   */
064  public void setDictionary(byte[] b, int off, int len);
065
066  /**
067   * Return number of uncompressed bytes input so far.
068   */
069  public long getBytesRead();
070
071  /**
072   * Return number of compressed bytes output so far.
073   */
074  public long getBytesWritten();
075
076  /**
077   * When called, indicates that compression should end
078   * with the current contents of the input buffer.
079   */
080  public void finish();
081  
082  /**
083   * Returns true if the end of the compressed 
084   * data output stream has been reached.
085   * @return <code>true</code> if the end of the compressed
086   * data output stream has been reached.
087   */
088  public boolean finished();
089  
090  /**
091   * Fills specified buffer with compressed data. Returns actual number
092   * of bytes of compressed data. A return value of 0 indicates that
093   * needsInput() should be called in order to determine if more input
094   * data is required.
095   * 
096   * @param b Buffer for the compressed data
097   * @param off Start offset of the data
098   * @param len Size of the buffer
099   * @return The actual number of bytes of compressed data.
100   */
101  public int compress(byte[] b, int off, int len) throws IOException;
102  
103  /**
104   * Resets compressor so that a new set of input data can be processed.
105   */
106  public void reset();
107  
108  /**
109   * Closes the compressor and discards any unprocessed input.
110   */
111  public void end();
112
113  /**
114   * Prepare the compressor to be used in a new stream with settings defined in
115   * the given Configuration
116   * 
117   * @param conf Configuration from which new setting are fetched
118   */
119  public void reinit(Configuration conf);
120}