001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.io.compress;
020
021import java.io.EOFException;
022import java.io.IOException;
023import java.io.InputStream;
024
025import org.apache.hadoop.classification.InterfaceAudience;
026import org.apache.hadoop.classification.InterfaceStability;
027import org.apache.hadoop.io.compress.Decompressor;
028
029@InterfaceAudience.Public
030@InterfaceStability.Evolving
031public class DecompressorStream extends CompressionInputStream {
032  protected Decompressor decompressor = null;
033  protected byte[] buffer;
034  protected boolean eof = false;
035  protected boolean closed = false;
036  private int lastBytesSent = 0;
037
038  public DecompressorStream(InputStream in, Decompressor decompressor,
039                            int bufferSize)
040  throws IOException {
041    super(in);
042
043    if (decompressor == null) {
044      throw new NullPointerException();
045    } else if (bufferSize <= 0) {
046      throw new IllegalArgumentException("Illegal bufferSize");
047    }
048
049    this.decompressor = decompressor;
050    buffer = new byte[bufferSize];
051  }
052
053  public DecompressorStream(InputStream in, Decompressor decompressor)
054  throws IOException {
055    this(in, decompressor, 512);
056  }
057
058  /**
059   * Allow derived classes to directly set the underlying stream.
060   * 
061   * @param in Underlying input stream.
062   * @throws IOException
063   */
064  protected DecompressorStream(InputStream in) throws IOException {
065    super(in);
066  }
067  
068  private byte[] oneByte = new byte[1];
069  @Override
070  public int read() throws IOException {
071    checkStream();
072    return (read(oneByte, 0, oneByte.length) == -1) ? -1 : (oneByte[0] & 0xff);
073  }
074
075  @Override
076  public int read(byte[] b, int off, int len) throws IOException {
077    checkStream();
078    
079    if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
080      throw new IndexOutOfBoundsException();
081    } else if (len == 0) {
082      return 0;
083    }
084
085    return decompress(b, off, len);
086  }
087
088  protected int decompress(byte[] b, int off, int len) throws IOException {
089    int n = 0;
090
091    while ((n = decompressor.decompress(b, off, len)) == 0) {
092      if (decompressor.needsDictionary()) {
093        eof = true;
094        return -1;
095      }
096
097      if (decompressor.finished()) {
098        // First see if there was any leftover buffered input from previous
099        // stream; if not, attempt to refill buffer.  If refill -> EOF, we're
100        // all done; else reset, fix up input buffer, and get ready for next
101        // concatenated substream/"member".
102        int nRemaining = decompressor.getRemaining();
103        if (nRemaining == 0) {
104          int m = getCompressedData();
105          if (m == -1) {
106            // apparently the previous end-of-stream was also end-of-file:
107            // return success, as if we had never called getCompressedData()
108            eof = true;
109            return -1;
110          }
111          decompressor.reset();
112          decompressor.setInput(buffer, 0, m);
113          lastBytesSent = m;
114        } else {
115          // looks like it's a concatenated stream:  reset low-level zlib (or
116          // other engine) and buffers, then "resend" remaining input data
117          decompressor.reset();
118          int leftoverOffset = lastBytesSent - nRemaining;
119          assert (leftoverOffset >= 0);
120          // this recopies userBuf -> direct buffer if using native libraries:
121          decompressor.setInput(buffer, leftoverOffset, nRemaining);
122          // NOTE:  this is the one place we do NOT want to save the number
123          // of bytes sent (nRemaining here) into lastBytesSent:  since we
124          // are resending what we've already sent before, offset is nonzero
125          // in general (only way it could be zero is if it already equals
126          // nRemaining), which would then screw up the offset calculation
127          // _next_ time around.  IOW, getRemaining() is in terms of the
128          // original, zero-offset bufferload, so lastBytesSent must be as
129          // well.  Cheesy ASCII art:
130          //
131          //          <------------ m, lastBytesSent ----------->
132          //          +===============================================+
133          // buffer:  |1111111111|22222222222222222|333333333333|     |
134          //          +===============================================+
135          //     #1:  <-- off -->|<-------- nRemaining --------->
136          //     #2:  <----------- off ----------->|<-- nRem. -->
137          //     #3:  (final substream:  nRemaining == 0; eof = true)
138          //
139          // If lastBytesSent is anything other than m, as shown, then "off"
140          // will be calculated incorrectly.
141        }
142      } else if (decompressor.needsInput()) {
143        int m = getCompressedData();
144        if (m == -1) {
145          throw new EOFException("Unexpected end of input stream");
146        }
147        decompressor.setInput(buffer, 0, m);
148        lastBytesSent = m;
149      }
150    }
151
152    return n;
153  }
154
155  protected int getCompressedData() throws IOException {
156    checkStream();
157  
158    // note that the _caller_ is now required to call setInput() or throw
159    return in.read(buffer, 0, buffer.length);
160  }
161
162  protected void checkStream() throws IOException {
163    if (closed) {
164      throw new IOException("Stream closed");
165    }
166  }
167  
168  @Override
169  public void resetState() throws IOException {
170    decompressor.reset();
171  }
172
173  private byte[] skipBytes = new byte[512];
174  @Override
175  public long skip(long n) throws IOException {
176    // Sanity checks
177    if (n < 0) {
178      throw new IllegalArgumentException("negative skip length");
179    }
180    checkStream();
181    
182    // Read 'n' bytes
183    int skipped = 0;
184    while (skipped < n) {
185      int len = Math.min(((int)n - skipped), skipBytes.length);
186      len = read(skipBytes, 0, len);
187      if (len == -1) {
188        eof = true;
189        break;
190      }
191      skipped += len;
192    }
193    return skipped;
194  }
195
196  @Override
197  public int available() throws IOException {
198    checkStream();
199    return (eof) ? 0 : 1;
200  }
201
202  @Override
203  public void close() throws IOException {
204    if (!closed) {
205      in.close();
206      closed = true;
207    }
208  }
209
210  @Override
211  public boolean markSupported() {
212    return false;
213  }
214
215  @Override
216  public synchronized void mark(int readlimit) {
217  }
218
219  @Override
220  public synchronized void reset() throws IOException {
221    throw new IOException("mark/reset not supported");
222  }
223
224}