001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.mapreduce.lib.db; 020 021 import java.math.BigDecimal; 022 import java.sql.ResultSet; 023 import java.sql.SQLException; 024 import java.util.ArrayList; 025 import java.util.List; 026 027 import org.apache.commons.logging.Log; 028 import org.apache.commons.logging.LogFactory; 029 030 import org.apache.hadoop.classification.InterfaceAudience; 031 import org.apache.hadoop.classification.InterfaceStability; 032 import org.apache.hadoop.conf.Configuration; 033 import org.apache.hadoop.mapreduce.InputSplit; 034 import org.apache.hadoop.mapreduce.MRJobConfig; 035 036 /** 037 * Implement DBSplitter over BigDecimal values. 038 */ 039 @InterfaceAudience.Public 040 @InterfaceStability.Evolving 041 public class BigDecimalSplitter implements DBSplitter { 042 private static final Log LOG = LogFactory.getLog(BigDecimalSplitter.class); 043 044 public List<InputSplit> split(Configuration conf, ResultSet results, String colName) 045 throws SQLException { 046 047 BigDecimal minVal = results.getBigDecimal(1); 048 BigDecimal maxVal = results.getBigDecimal(2); 049 050 String lowClausePrefix = colName + " >= "; 051 String highClausePrefix = colName + " < "; 052 053 BigDecimal numSplits = new BigDecimal(conf.getInt(MRJobConfig.NUM_MAPS, 1)); 054 055 if (minVal == null && maxVal == null) { 056 // Range is null to null. Return a null split accordingly. 057 List<InputSplit> splits = new ArrayList<InputSplit>(); 058 splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( 059 colName + " IS NULL", colName + " IS NULL")); 060 return splits; 061 } 062 063 if (minVal == null || maxVal == null) { 064 // Don't know what is a reasonable min/max value for interpolation. Fail. 065 LOG.error("Cannot find a range for NUMERIC or DECIMAL fields with one end NULL."); 066 return null; 067 } 068 069 // Get all the split points together. 070 List<BigDecimal> splitPoints = split(numSplits, minVal, maxVal); 071 List<InputSplit> splits = new ArrayList<InputSplit>(); 072 073 // Turn the split points into a set of intervals. 074 BigDecimal start = splitPoints.get(0); 075 for (int i = 1; i < splitPoints.size(); i++) { 076 BigDecimal end = splitPoints.get(i); 077 078 if (i == splitPoints.size() - 1) { 079 // This is the last one; use a closed interval. 080 splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( 081 lowClausePrefix + start.toString(), 082 colName + " <= " + end.toString())); 083 } else { 084 // Normal open-interval case. 085 splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( 086 lowClausePrefix + start.toString(), 087 highClausePrefix + end.toString())); 088 } 089 090 start = end; 091 } 092 093 return splits; 094 } 095 096 private static final BigDecimal MIN_INCREMENT = new BigDecimal(10000 * Double.MIN_VALUE); 097 098 /** 099 * Divide numerator by denominator. If impossible in exact mode, use rounding. 100 */ 101 protected BigDecimal tryDivide(BigDecimal numerator, BigDecimal denominator) { 102 try { 103 return numerator.divide(denominator); 104 } catch (ArithmeticException ae) { 105 return numerator.divide(denominator, BigDecimal.ROUND_HALF_UP); 106 } 107 } 108 109 /** 110 * Returns a list of BigDecimals one element longer than the list of input splits. 111 * This represents the boundaries between input splits. 112 * All splits are open on the top end, except the last one. 113 * 114 * So the list [0, 5, 8, 12, 18] would represent splits capturing the intervals: 115 * 116 * [0, 5) 117 * [5, 8) 118 * [8, 12) 119 * [12, 18] note the closed interval for the last split. 120 */ 121 List<BigDecimal> split(BigDecimal numSplits, BigDecimal minVal, BigDecimal maxVal) 122 throws SQLException { 123 124 List<BigDecimal> splits = new ArrayList<BigDecimal>(); 125 126 // Use numSplits as a hint. May need an extra task if the size doesn't 127 // divide cleanly. 128 129 BigDecimal splitSize = tryDivide(maxVal.subtract(minVal), (numSplits)); 130 if (splitSize.compareTo(MIN_INCREMENT) < 0) { 131 splitSize = MIN_INCREMENT; 132 LOG.warn("Set BigDecimal splitSize to MIN_INCREMENT"); 133 } 134 135 BigDecimal curVal = minVal; 136 137 while (curVal.compareTo(maxVal) <= 0) { 138 splits.add(curVal); 139 curVal = curVal.add(splitSize); 140 } 141 142 if (splits.get(splits.size() - 1).compareTo(maxVal) != 0 || splits.size() == 1) { 143 // We didn't end on the maxVal. Add that to the end of the list. 144 splits.add(maxVal); 145 } 146 147 return splits; 148 } 149 }