001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with this
004 * work for additional information regarding copyright ownership. The ASF
005 * licenses this file to you under the Apache License, Version 2.0 (the
006 * "License"); you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
014 * License for the specific language governing permissions and limitations under
015 * the License.
016 */
017
018 package org.apache.hadoop.io.file.tfile;
019
020 import java.io.DataInput;
021 import java.io.DataOutput;
022 import java.io.IOException;
023 import java.util.Comparator;
024 import java.util.List;
025
026 import org.apache.hadoop.classification.InterfaceAudience;
027 import org.apache.hadoop.classification.InterfaceStability;
028 import org.apache.hadoop.io.Text;
029
030 /**
031 * Supporting Utility classes used by TFile, and shared by users of TFile.
032 */
033 @InterfaceAudience.Public
034 @InterfaceStability.Evolving
035 public final class Utils {
036
037 /**
038 * Prevent the instantiation of Utils.
039 */
040 private Utils() {
041 // nothing
042 }
043
044 /**
045 * Encoding an integer into a variable-length encoding format. Synonymous to
046 * <code>Utils#writeVLong(out, n)</code>.
047 *
048 * @param out
049 * output stream
050 * @param n
051 * The integer to be encoded
052 * @throws IOException
053 * @see Utils#writeVLong(DataOutput, long)
054 */
055 public static void writeVInt(DataOutput out, int n) throws IOException {
056 writeVLong(out, n);
057 }
058
059 /**
060 * Encoding a Long integer into a variable-length encoding format.
061 * <ul>
062 * <li>if n in [-32, 127): encode in one byte with the actual value.
063 * Otherwise,
064 * <li>if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52;
065 * byte[1]=n&0xff. Otherwise,
066 * <li>if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 -
067 * 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
068 * <li>if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112;
069 * byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise:
070 * <li>if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] =
071 * (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
072 * <li>if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] =
073 * (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff;
074 * byte[4]=(n>>8)&0xff; byte[5]=n&0xff
075 * <li>if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] =
076 * (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff;
077 * byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
078 * <li>if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] =
079 * (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff;
080 * byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff;
081 * byte[7]=n&0xff;
082 * <li>if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] =
083 * (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff;
084 * byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff;
085 * byte[7]=(n>>8)&0xff; byte[8]=n&0xff;
086 * </ul>
087 *
088 * @param out
089 * output stream
090 * @param n
091 * the integer number
092 * @throws IOException
093 */
094 @SuppressWarnings("fallthrough")
095 public static void writeVLong(DataOutput out, long n) throws IOException {
096 if ((n < 128) && (n >= -32)) {
097 out.writeByte((int) n);
098 return;
099 }
100
101 long un = (n < 0) ? ~n : n;
102 // how many bytes do we need to represent the number with sign bit?
103 int len = (Long.SIZE - Long.numberOfLeadingZeros(un)) / 8 + 1;
104 int firstByte = (int) (n >> ((len - 1) * 8));
105 switch (len) {
106 case 1:
107 // fall it through to firstByte==-1, len=2.
108 firstByte >>= 8;
109 case 2:
110 if ((firstByte < 20) && (firstByte >= -20)) {
111 out.writeByte(firstByte - 52);
112 out.writeByte((int) n);
113 return;
114 }
115 // fall it through to firstByte==0/-1, len=3.
116 firstByte >>= 8;
117 case 3:
118 if ((firstByte < 16) && (firstByte >= -16)) {
119 out.writeByte(firstByte - 88);
120 out.writeShort((int) n);
121 return;
122 }
123 // fall it through to firstByte==0/-1, len=4.
124 firstByte >>= 8;
125 case 4:
126 if ((firstByte < 8) && (firstByte >= -8)) {
127 out.writeByte(firstByte - 112);
128 out.writeShort(((int) n) >>> 8);
129 out.writeByte((int) n);
130 return;
131 }
132 out.writeByte(len - 129);
133 out.writeInt((int) n);
134 return;
135 case 5:
136 out.writeByte(len - 129);
137 out.writeInt((int) (n >>> 8));
138 out.writeByte((int) n);
139 return;
140 case 6:
141 out.writeByte(len - 129);
142 out.writeInt((int) (n >>> 16));
143 out.writeShort((int) n);
144 return;
145 case 7:
146 out.writeByte(len - 129);
147 out.writeInt((int) (n >>> 24));
148 out.writeShort((int) (n >>> 8));
149 out.writeByte((int) n);
150 return;
151 case 8:
152 out.writeByte(len - 129);
153 out.writeLong(n);
154 return;
155 default:
156 throw new RuntimeException("Internel error");
157 }
158 }
159
160 /**
161 * Decoding the variable-length integer. Synonymous to
162 * <code>(int)Utils#readVLong(in)</code>.
163 *
164 * @param in
165 * input stream
166 * @return the decoded integer
167 * @throws IOException
168 *
169 * @see Utils#readVLong(DataInput)
170 */
171 public static int readVInt(DataInput in) throws IOException {
172 long ret = readVLong(in);
173 if ((ret > Integer.MAX_VALUE) || (ret < Integer.MIN_VALUE)) {
174 throw new RuntimeException(
175 "Number too large to be represented as Integer");
176 }
177 return (int) ret;
178 }
179
180 /**
181 * Decoding the variable-length integer. Suppose the value of the first byte
182 * is FB, and the following bytes are NB[*].
183 * <ul>
184 * <li>if (FB >= -32), return (long)FB;
185 * <li>if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
186 * <li>if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 +
187 * NB[1]&0xff;
188 * <li>if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 +
189 * (NB[1]&0xff)<<8 + NB[2]&0xff;
190 * <li>if (FB in [-128, -121]), return interpret NB[FB+129] as a signed
191 * big-endian integer.
192 *
193 * @param in
194 * input stream
195 * @return the decoded long integer.
196 * @throws IOException
197 */
198
199 public static long readVLong(DataInput in) throws IOException {
200 int firstByte = in.readByte();
201 if (firstByte >= -32) {
202 return firstByte;
203 }
204
205 switch ((firstByte + 128) / 8) {
206 case 11:
207 case 10:
208 case 9:
209 case 8:
210 case 7:
211 return ((firstByte + 52) << 8) | in.readUnsignedByte();
212 case 6:
213 case 5:
214 case 4:
215 case 3:
216 return ((firstByte + 88) << 16) | in.readUnsignedShort();
217 case 2:
218 case 1:
219 return ((firstByte + 112) << 24) | (in.readUnsignedShort() << 8)
220 | in.readUnsignedByte();
221 case 0:
222 int len = firstByte + 129;
223 switch (len) {
224 case 4:
225 return in.readInt();
226 case 5:
227 return ((long) in.readInt()) << 8 | in.readUnsignedByte();
228 case 6:
229 return ((long) in.readInt()) << 16 | in.readUnsignedShort();
230 case 7:
231 return ((long) in.readInt()) << 24 | (in.readUnsignedShort() << 8)
232 | in.readUnsignedByte();
233 case 8:
234 return in.readLong();
235 default:
236 throw new IOException("Corrupted VLong encoding");
237 }
238 default:
239 throw new RuntimeException("Internal error");
240 }
241 }
242
243 /**
244 * Write a String as a VInt n, followed by n Bytes as in Text format.
245 *
246 * @param out
247 * @param s
248 * @throws IOException
249 */
250 public static void writeString(DataOutput out, String s) throws IOException {
251 if (s != null) {
252 Text text = new Text(s);
253 byte[] buffer = text.getBytes();
254 int len = text.getLength();
255 writeVInt(out, len);
256 out.write(buffer, 0, len);
257 } else {
258 writeVInt(out, -1);
259 }
260 }
261
262 /**
263 * Read a String as a VInt n, followed by n Bytes in Text format.
264 *
265 * @param in
266 * The input stream.
267 * @return The string
268 * @throws IOException
269 */
270 public static String readString(DataInput in) throws IOException {
271 int length = readVInt(in);
272 if (length == -1) return null;
273 byte[] buffer = new byte[length];
274 in.readFully(buffer);
275 return Text.decode(buffer);
276 }
277
278 /**
279 * A generic Version class. We suggest applications built on top of TFile use
280 * this class to maintain version information in their meta blocks.
281 *
282 * A version number consists of a major version and a minor version. The
283 * suggested usage of major and minor version number is to increment major
284 * version number when the new storage format is not backward compatible, and
285 * increment the minor version otherwise.
286 */
287 public static final class Version implements Comparable<Version> {
288 private final short major;
289 private final short minor;
290
291 /**
292 * Construct the Version object by reading from the input stream.
293 *
294 * @param in
295 * input stream
296 * @throws IOException
297 */
298 public Version(DataInput in) throws IOException {
299 major = in.readShort();
300 minor = in.readShort();
301 }
302
303 /**
304 * Constructor.
305 *
306 * @param major
307 * major version.
308 * @param minor
309 * minor version.
310 */
311 public Version(short major, short minor) {
312 this.major = major;
313 this.minor = minor;
314 }
315
316 /**
317 * Write the objec to a DataOutput. The serialized format of the Version is
318 * major version followed by minor version, both as big-endian short
319 * integers.
320 *
321 * @param out
322 * The DataOutput object.
323 * @throws IOException
324 */
325 public void write(DataOutput out) throws IOException {
326 out.writeShort(major);
327 out.writeShort(minor);
328 }
329
330 /**
331 * Get the major version.
332 *
333 * @return Major version.
334 */
335 public int getMajor() {
336 return major;
337 }
338
339 /**
340 * Get the minor version.
341 *
342 * @return The minor version.
343 */
344 public int getMinor() {
345 return minor;
346 }
347
348 /**
349 * Get the size of the serialized Version object.
350 *
351 * @return serialized size of the version object.
352 */
353 public static int size() {
354 return (Short.SIZE + Short.SIZE) / Byte.SIZE;
355 }
356
357 /**
358 * Return a string representation of the version.
359 */
360 @Override
361 public String toString() {
362 return new StringBuilder("v").append(major).append(".").append(minor)
363 .toString();
364 }
365
366 /**
367 * Test compatibility.
368 *
369 * @param other
370 * The Version object to test compatibility with.
371 * @return true if both versions have the same major version number; false
372 * otherwise.
373 */
374 public boolean compatibleWith(Version other) {
375 return major == other.major;
376 }
377
378 /**
379 * Compare this version with another version.
380 */
381 @Override
382 public int compareTo(Version that) {
383 if (major != that.major) {
384 return major - that.major;
385 }
386 return minor - that.minor;
387 }
388
389 @Override
390 public boolean equals(Object other) {
391 if (this == other) return true;
392 if (!(other instanceof Version)) return false;
393 return compareTo((Version) other) == 0;
394 }
395
396 @Override
397 public int hashCode() {
398 return (major << 16 + minor);
399 }
400 }
401
402 /**
403 * Lower bound binary search. Find the index to the first element in the list
404 * that compares greater than or equal to key.
405 *
406 * @param <T>
407 * Type of the input key.
408 * @param list
409 * The list
410 * @param key
411 * The input key.
412 * @param cmp
413 * Comparator for the key.
414 * @return The index to the desired element if it exists; or list.size()
415 * otherwise.
416 */
417 public static <T> int lowerBound(List<? extends T> list, T key,
418 Comparator<? super T> cmp) {
419 int low = 0;
420 int high = list.size();
421
422 while (low < high) {
423 int mid = (low + high) >>> 1;
424 T midVal = list.get(mid);
425 int ret = cmp.compare(midVal, key);
426 if (ret < 0)
427 low = mid + 1;
428 else high = mid;
429 }
430 return low;
431 }
432
433 /**
434 * Upper bound binary search. Find the index to the first element in the list
435 * that compares greater than the input key.
436 *
437 * @param <T>
438 * Type of the input key.
439 * @param list
440 * The list
441 * @param key
442 * The input key.
443 * @param cmp
444 * Comparator for the key.
445 * @return The index to the desired element if it exists; or list.size()
446 * otherwise.
447 */
448 public static <T> int upperBound(List<? extends T> list, T key,
449 Comparator<? super T> cmp) {
450 int low = 0;
451 int high = list.size();
452
453 while (low < high) {
454 int mid = (low + high) >>> 1;
455 T midVal = list.get(mid);
456 int ret = cmp.compare(midVal, key);
457 if (ret <= 0)
458 low = mid + 1;
459 else high = mid;
460 }
461 return low;
462 }
463
464 /**
465 * Lower bound binary search. Find the index to the first element in the list
466 * that compares greater than or equal to key.
467 *
468 * @param <T>
469 * Type of the input key.
470 * @param list
471 * The list
472 * @param key
473 * The input key.
474 * @return The index to the desired element if it exists; or list.size()
475 * otherwise.
476 */
477 public static <T> int lowerBound(List<? extends Comparable<? super T>> list,
478 T key) {
479 int low = 0;
480 int high = list.size();
481
482 while (low < high) {
483 int mid = (low + high) >>> 1;
484 Comparable<? super T> midVal = list.get(mid);
485 int ret = midVal.compareTo(key);
486 if (ret < 0)
487 low = mid + 1;
488 else high = mid;
489 }
490 return low;
491 }
492
493 /**
494 * Upper bound binary search. Find the index to the first element in the list
495 * that compares greater than the input key.
496 *
497 * @param <T>
498 * Type of the input key.
499 * @param list
500 * The list
501 * @param key
502 * The input key.
503 * @return The index to the desired element if it exists; or list.size()
504 * otherwise.
505 */
506 public static <T> int upperBound(List<? extends Comparable<? super T>> list,
507 T key) {
508 int low = 0;
509 int high = list.size();
510
511 while (low < high) {
512 int mid = (low + high) >>> 1;
513 Comparable<? super T> midVal = list.get(mid);
514 int ret = midVal.compareTo(key);
515 if (ret <= 0)
516 low = mid + 1;
517 else high = mid;
518 }
519 return low;
520 }
521 }