001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.io.compress;
020
021 import java.io.IOException;
022
023 import org.apache.hadoop.classification.InterfaceAudience;
024 import org.apache.hadoop.classification.InterfaceStability;
025 import org.apache.hadoop.conf.Configuration;
026
027 /**
028 * Specification of a stream-based 'compressor' which can be
029 * plugged into a {@link CompressionOutputStream} to compress data.
030 * This is modelled after {@link java.util.zip.Deflater}
031 *
032 */
033 @InterfaceAudience.Public
034 @InterfaceStability.Evolving
035 public interface Compressor {
036 /**
037 * Sets input data for compression.
038 * This should be called whenever #needsInput() returns
039 * <code>true</code> indicating that more input data is required.
040 *
041 * @param b Input data
042 * @param off Start offset
043 * @param len Length
044 */
045 public void setInput(byte[] b, int off, int len);
046
047 /**
048 * Returns true if the input data buffer is empty and
049 * #setInput() should be called to provide more input.
050 *
051 * @return <code>true</code> if the input data buffer is empty and
052 * #setInput() should be called in order to provide more input.
053 */
054 public boolean needsInput();
055
056 /**
057 * Sets preset dictionary for compression. A preset dictionary
058 * is used when the history buffer can be predetermined.
059 *
060 * @param b Dictionary data bytes
061 * @param off Start offset
062 * @param len Length
063 */
064 public void setDictionary(byte[] b, int off, int len);
065
066 /**
067 * Return number of uncompressed bytes input so far.
068 */
069 public long getBytesRead();
070
071 /**
072 * Return number of compressed bytes output so far.
073 */
074 public long getBytesWritten();
075
076 /**
077 * When called, indicates that compression should end
078 * with the current contents of the input buffer.
079 */
080 public void finish();
081
082 /**
083 * Returns true if the end of the compressed
084 * data output stream has been reached.
085 * @return <code>true</code> if the end of the compressed
086 * data output stream has been reached.
087 */
088 public boolean finished();
089
090 /**
091 * Fills specified buffer with compressed data. Returns actual number
092 * of bytes of compressed data. A return value of 0 indicates that
093 * needsInput() should be called in order to determine if more input
094 * data is required.
095 *
096 * @param b Buffer for the compressed data
097 * @param off Start offset of the data
098 * @param len Size of the buffer
099 * @return The actual number of bytes of compressed data.
100 */
101 public int compress(byte[] b, int off, int len) throws IOException;
102
103 /**
104 * Resets compressor so that a new set of input data can be processed.
105 */
106 public void reset();
107
108 /**
109 * Closes the compressor and discards any unprocessed input.
110 */
111 public void end();
112
113 /**
114 * Prepare the compressor to be used in a new stream with settings defined in
115 * the given Configuration
116 *
117 * @param conf Configuration from which new setting are fetched
118 */
119 public void reinit(Configuration conf);
120 }