001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.mapreduce.lib.output; 020 021 import java.io.IOException; 022 023 import org.apache.commons.logging.Log; 024 import org.apache.commons.logging.LogFactory; 025 import org.apache.hadoop.classification.InterfaceAudience; 026 import org.apache.hadoop.classification.InterfaceStability; 027 import org.apache.hadoop.conf.Configuration; 028 import org.apache.hadoop.fs.FileSystem; 029 import org.apache.hadoop.fs.Path; 030 import org.apache.hadoop.mapreduce.JobContext; 031 import org.apache.hadoop.mapreduce.OutputCommitter; 032 import org.apache.hadoop.mapreduce.TaskAttemptContext; 033 import org.apache.hadoop.mapreduce.TaskAttemptID; 034 import org.apache.hadoop.mapreduce.TaskID; 035 import org.apache.hadoop.mapreduce.task.annotation.Checkpointable; 036 037 import com.google.common.annotations.VisibleForTesting; 038 039 /** An {@link OutputCommitter} that commits files specified 040 * in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}. 041 **/ 042 @Checkpointable 043 @InterfaceAudience.Public 044 @InterfaceStability.Evolving 045 public class PartialFileOutputCommitter 046 extends FileOutputCommitter implements PartialOutputCommitter { 047 048 private static final Log LOG = 049 LogFactory.getLog(PartialFileOutputCommitter.class); 050 051 052 public PartialFileOutputCommitter(Path outputPath, 053 TaskAttemptContext context) throws IOException { 054 super(outputPath, context); 055 } 056 057 public PartialFileOutputCommitter(Path outputPath, 058 JobContext context) throws IOException { 059 super(outputPath, context); 060 } 061 062 @Override 063 public Path getCommittedTaskPath(int appAttemptId, TaskAttemptContext context) { 064 return new Path(getJobAttemptPath(appAttemptId), 065 String.valueOf(context.getTaskAttemptID())); 066 } 067 068 @VisibleForTesting 069 FileSystem fsFor(Path p, Configuration conf) throws IOException { 070 return p.getFileSystem(conf); 071 } 072 073 @Override 074 public void cleanUpPartialOutputForTask(TaskAttemptContext context) 075 throws IOException { 076 077 // we double check this is never invoked from a non-preemptable subclass. 078 // This should never happen, since the invoking codes is checking it too, 079 // but it is safer to double check. Errors handling this would produce 080 // inconsistent output. 081 082 if (!this.getClass().isAnnotationPresent(Checkpointable.class)) { 083 throw new IllegalStateException("Invoking cleanUpPartialOutputForTask() " + 084 "from non @Preemptable class"); 085 } 086 FileSystem fs = 087 fsFor(getTaskAttemptPath(context), context.getConfiguration()); 088 089 LOG.info("cleanUpPartialOutputForTask: removing everything belonging to " + 090 context.getTaskAttemptID().getTaskID() + " in: " + 091 getCommittedTaskPath(context).getParent()); 092 093 final TaskAttemptID taid = context.getTaskAttemptID(); 094 final TaskID tid = taid.getTaskID(); 095 Path pCommit = getCommittedTaskPath(context).getParent(); 096 // remove any committed output 097 for (int i = 0; i < taid.getId(); ++i) { 098 TaskAttemptID oldId = new TaskAttemptID(tid, i); 099 Path pTask = new Path(pCommit, oldId.toString()); 100 if (fs.exists(pTask) && !fs.delete(pTask, true)) { 101 throw new IOException("Failed to delete " + pTask); 102 } 103 } 104 } 105 106 }