001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements. See the NOTICE file distributed with this
004     * work for additional information regarding copyright ownership. The ASF
005     * licenses this file to you under the Apache License, Version 2.0 (the
006     * "License"); you may not use this file except in compliance with the License.
007     * You may obtain a copy of the License at
008     * 
009     * http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
013     * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
014     * License for the specific language governing permissions and limitations under
015     * the License.
016     */
017    
018    package org.apache.hadoop.io.file.tfile;
019    
020    import java.io.DataInput;
021    import java.io.DataOutput;
022    import java.io.IOException;
023    import java.util.Comparator;
024    import java.util.List;
025    
026    import org.apache.hadoop.classification.InterfaceAudience;
027    import org.apache.hadoop.classification.InterfaceStability;
028    import org.apache.hadoop.io.Text;
029    
030    /**
031     * Supporting Utility classes used by TFile, and shared by users of TFile.
032     */
033    @InterfaceAudience.Public
034    @InterfaceStability.Evolving
035    public final class Utils {
036    
037      /**
038       * Prevent the instantiation of Utils.
039       */
040      private Utils() {
041        // nothing
042      }
043    
044      /**
045       * Encoding an integer into a variable-length encoding format. Synonymous to
046       * <code>Utils#writeVLong(out, n)</code>.
047       * 
048       * @param out
049       *          output stream
050       * @param n
051       *          The integer to be encoded
052       * @throws IOException
053       * @see Utils#writeVLong(DataOutput, long)
054       */
055      public static void writeVInt(DataOutput out, int n) throws IOException {
056        writeVLong(out, n);
057      }
058    
059      /**
060       * Encoding a Long integer into a variable-length encoding format.
061       * <ul>
062       * <li>if n in [-32, 127): encode in one byte with the actual value.
063       * Otherwise,
064       * <li>if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52;
065       * byte[1]=n&0xff. Otherwise,
066       * <li>if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 -
067       * 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
068       * <li>if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112;
069       * byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise:
070       * <li>if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] =
071       * (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
072       * <li>if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] =
073       * (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff;
074       * byte[4]=(n>>8)&0xff; byte[5]=n&0xff
075       * <li>if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] =
076       * (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff;
077       * byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
078       * <li>if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] =
079       * (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff;
080       * byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff;
081       * byte[7]=n&0xff;
082       * <li>if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] =
083       * (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff;
084       * byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff;
085       * byte[7]=(n>>8)&0xff; byte[8]=n&0xff;
086       * </ul>
087       * 
088       * @param out
089       *          output stream
090       * @param n
091       *          the integer number
092       * @throws IOException
093       */
094      @SuppressWarnings("fallthrough")
095      public static void writeVLong(DataOutput out, long n) throws IOException {
096        if ((n < 128) && (n >= -32)) {
097          out.writeByte((int) n);
098          return;
099        }
100    
101        long un = (n < 0) ? ~n : n;
102        // how many bytes do we need to represent the number with sign bit?
103        int len = (Long.SIZE - Long.numberOfLeadingZeros(un)) / 8 + 1;
104        int firstByte = (int) (n >> ((len - 1) * 8));
105        switch (len) {
106          case 1:
107            // fall it through to firstByte==-1, len=2.
108            firstByte >>= 8;
109          case 2:
110            if ((firstByte < 20) && (firstByte >= -20)) {
111              out.writeByte(firstByte - 52);
112              out.writeByte((int) n);
113              return;
114            }
115            // fall it through to firstByte==0/-1, len=3.
116            firstByte >>= 8;
117          case 3:
118            if ((firstByte < 16) && (firstByte >= -16)) {
119              out.writeByte(firstByte - 88);
120              out.writeShort((int) n);
121              return;
122            }
123            // fall it through to firstByte==0/-1, len=4.
124            firstByte >>= 8;
125          case 4:
126            if ((firstByte < 8) && (firstByte >= -8)) {
127              out.writeByte(firstByte - 112);
128              out.writeShort(((int) n) >>> 8);
129              out.writeByte((int) n);
130              return;
131            }
132            out.writeByte(len - 129);
133            out.writeInt((int) n);
134            return;
135          case 5:
136            out.writeByte(len - 129);
137            out.writeInt((int) (n >>> 8));
138            out.writeByte((int) n);
139            return;
140          case 6:
141            out.writeByte(len - 129);
142            out.writeInt((int) (n >>> 16));
143            out.writeShort((int) n);
144            return;
145          case 7:
146            out.writeByte(len - 129);
147            out.writeInt((int) (n >>> 24));
148            out.writeShort((int) (n >>> 8));
149            out.writeByte((int) n);
150            return;
151          case 8:
152            out.writeByte(len - 129);
153            out.writeLong(n);
154            return;
155          default:
156            throw new RuntimeException("Internel error");
157        }
158      }
159    
160      /**
161       * Decoding the variable-length integer. Synonymous to
162       * <code>(int)Utils#readVLong(in)</code>.
163       * 
164       * @param in
165       *          input stream
166       * @return the decoded integer
167       * @throws IOException
168       * 
169       * @see Utils#readVLong(DataInput)
170       */
171      public static int readVInt(DataInput in) throws IOException {
172        long ret = readVLong(in);
173        if ((ret > Integer.MAX_VALUE) || (ret < Integer.MIN_VALUE)) {
174          throw new RuntimeException(
175              "Number too large to be represented as Integer");
176        }
177        return (int) ret;
178      }
179    
180      /**
181       * Decoding the variable-length integer. Suppose the value of the first byte
182       * is FB, and the following bytes are NB[*].
183       * <ul>
184       * <li>if (FB >= -32), return (long)FB;
185       * <li>if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
186       * <li>if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 +
187       * NB[1]&0xff;
188       * <li>if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 +
189       * (NB[1]&0xff)<<8 + NB[2]&0xff;
190       * <li>if (FB in [-128, -121]), return interpret NB[FB+129] as a signed
191       * big-endian integer.
192       * 
193       * @param in
194       *          input stream
195       * @return the decoded long integer.
196       * @throws IOException
197       */
198    
199      public static long readVLong(DataInput in) throws IOException {
200        int firstByte = in.readByte();
201        if (firstByte >= -32) {
202          return firstByte;
203        }
204    
205        switch ((firstByte + 128) / 8) {
206          case 11:
207          case 10:
208          case 9:
209          case 8:
210          case 7:
211            return ((firstByte + 52) << 8) | in.readUnsignedByte();
212          case 6:
213          case 5:
214          case 4:
215          case 3:
216            return ((firstByte + 88) << 16) | in.readUnsignedShort();
217          case 2:
218          case 1:
219            return ((firstByte + 112) << 24) | (in.readUnsignedShort() << 8)
220                | in.readUnsignedByte();
221          case 0:
222            int len = firstByte + 129;
223            switch (len) {
224              case 4:
225                return in.readInt();
226              case 5:
227                return ((long) in.readInt()) << 8 | in.readUnsignedByte();
228              case 6:
229                return ((long) in.readInt()) << 16 | in.readUnsignedShort();
230              case 7:
231                return ((long) in.readInt()) << 24 | (in.readUnsignedShort() << 8)
232                    | in.readUnsignedByte();
233              case 8:
234                return in.readLong();
235              default:
236                throw new IOException("Corrupted VLong encoding");
237            }
238          default:
239            throw new RuntimeException("Internal error");
240        }
241      }
242    
243      /**
244       * Write a String as a VInt n, followed by n Bytes as in Text format.
245       * 
246       * @param out
247       * @param s
248       * @throws IOException
249       */
250      public static void writeString(DataOutput out, String s) throws IOException {
251        if (s != null) {
252          Text text = new Text(s);
253          byte[] buffer = text.getBytes();
254          int len = text.getLength();
255          writeVInt(out, len);
256          out.write(buffer, 0, len);
257        } else {
258          writeVInt(out, -1);
259        }
260      }
261    
262      /**
263       * Read a String as a VInt n, followed by n Bytes in Text format.
264       * 
265       * @param in
266       *          The input stream.
267       * @return The string
268       * @throws IOException
269       */
270      public static String readString(DataInput in) throws IOException {
271        int length = readVInt(in);
272        if (length == -1) return null;
273        byte[] buffer = new byte[length];
274        in.readFully(buffer);
275        return Text.decode(buffer);
276      }
277    
278      /**
279       * A generic Version class. We suggest applications built on top of TFile use
280       * this class to maintain version information in their meta blocks.
281       * 
282       * A version number consists of a major version and a minor version. The
283       * suggested usage of major and minor version number is to increment major
284       * version number when the new storage format is not backward compatible, and
285       * increment the minor version otherwise.
286       */
287      public static final class Version implements Comparable<Version> {
288        private final short major;
289        private final short minor;
290    
291        /**
292         * Construct the Version object by reading from the input stream.
293         * 
294         * @param in
295         *          input stream
296         * @throws IOException
297         */
298        public Version(DataInput in) throws IOException {
299          major = in.readShort();
300          minor = in.readShort();
301        }
302    
303        /**
304         * Constructor.
305         * 
306         * @param major
307         *          major version.
308         * @param minor
309         *          minor version.
310         */
311        public Version(short major, short minor) {
312          this.major = major;
313          this.minor = minor;
314        }
315    
316        /**
317         * Write the objec to a DataOutput. The serialized format of the Version is
318         * major version followed by minor version, both as big-endian short
319         * integers.
320         * 
321         * @param out
322         *          The DataOutput object.
323         * @throws IOException
324         */
325        public void write(DataOutput out) throws IOException {
326          out.writeShort(major);
327          out.writeShort(minor);
328        }
329    
330        /**
331         * Get the major version.
332         * 
333         * @return Major version.
334         */
335        public int getMajor() {
336          return major;
337        }
338    
339        /**
340         * Get the minor version.
341         * 
342         * @return The minor version.
343         */
344        public int getMinor() {
345          return minor;
346        }
347    
348        /**
349         * Get the size of the serialized Version object.
350         * 
351         * @return serialized size of the version object.
352         */
353        public static int size() {
354          return (Short.SIZE + Short.SIZE) / Byte.SIZE;
355        }
356    
357        /**
358         * Return a string representation of the version.
359         */
360        @Override
361        public String toString() {
362          return new StringBuilder("v").append(major).append(".").append(minor)
363              .toString();
364        }
365    
366        /**
367         * Test compatibility.
368         * 
369         * @param other
370         *          The Version object to test compatibility with.
371         * @return true if both versions have the same major version number; false
372         *         otherwise.
373         */
374        public boolean compatibleWith(Version other) {
375          return major == other.major;
376        }
377    
378        /**
379         * Compare this version with another version.
380         */
381        @Override
382        public int compareTo(Version that) {
383          if (major != that.major) {
384            return major - that.major;
385          }
386          return minor - that.minor;
387        }
388    
389        @Override
390        public boolean equals(Object other) {
391          if (this == other) return true;
392          if (!(other instanceof Version)) return false;
393          return compareTo((Version) other) == 0;
394        }
395    
396        @Override
397        public int hashCode() {
398          return (major << 16 + minor);
399        }
400      }
401    
402      /**
403       * Lower bound binary search. Find the index to the first element in the list
404       * that compares greater than or equal to key.
405       * 
406       * @param <T>
407       *          Type of the input key.
408       * @param list
409       *          The list
410       * @param key
411       *          The input key.
412       * @param cmp
413       *          Comparator for the key.
414       * @return The index to the desired element if it exists; or list.size()
415       *         otherwise.
416       */
417      public static <T> int lowerBound(List<? extends T> list, T key,
418          Comparator<? super T> cmp) {
419        int low = 0;
420        int high = list.size();
421    
422        while (low < high) {
423          int mid = (low + high) >>> 1;
424          T midVal = list.get(mid);
425          int ret = cmp.compare(midVal, key);
426          if (ret < 0)
427            low = mid + 1;
428          else high = mid;
429        }
430        return low;
431      }
432    
433      /**
434       * Upper bound binary search. Find the index to the first element in the list
435       * that compares greater than the input key.
436       * 
437       * @param <T>
438       *          Type of the input key.
439       * @param list
440       *          The list
441       * @param key
442       *          The input key.
443       * @param cmp
444       *          Comparator for the key.
445       * @return The index to the desired element if it exists; or list.size()
446       *         otherwise.
447       */
448      public static <T> int upperBound(List<? extends T> list, T key,
449          Comparator<? super T> cmp) {
450        int low = 0;
451        int high = list.size();
452    
453        while (low < high) {
454          int mid = (low + high) >>> 1;
455          T midVal = list.get(mid);
456          int ret = cmp.compare(midVal, key);
457          if (ret <= 0)
458            low = mid + 1;
459          else high = mid;
460        }
461        return low;
462      }
463    
464      /**
465       * Lower bound binary search. Find the index to the first element in the list
466       * that compares greater than or equal to key.
467       * 
468       * @param <T>
469       *          Type of the input key.
470       * @param list
471       *          The list
472       * @param key
473       *          The input key.
474       * @return The index to the desired element if it exists; or list.size()
475       *         otherwise.
476       */
477      public static <T> int lowerBound(List<? extends Comparable<? super T>> list,
478          T key) {
479        int low = 0;
480        int high = list.size();
481    
482        while (low < high) {
483          int mid = (low + high) >>> 1;
484          Comparable<? super T> midVal = list.get(mid);
485          int ret = midVal.compareTo(key);
486          if (ret < 0)
487            low = mid + 1;
488          else high = mid;
489        }
490        return low;
491      }
492    
493      /**
494       * Upper bound binary search. Find the index to the first element in the list
495       * that compares greater than the input key.
496       * 
497       * @param <T>
498       *          Type of the input key.
499       * @param list
500       *          The list
501       * @param key
502       *          The input key.
503       * @return The index to the desired element if it exists; or list.size()
504       *         otherwise.
505       */
506      public static <T> int upperBound(List<? extends Comparable<? super T>> list,
507          T key) {
508        int low = 0;
509        int high = list.size();
510    
511        while (low < high) {
512          int mid = (low + high) >>> 1;
513          Comparable<? super T> midVal = list.get(mid);
514          int ret = midVal.compareTo(key);
515          if (ret <= 0)
516            low = mid + 1;
517          else high = mid;
518        }
519        return low;
520      }
521    }