001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.mapred;
020
021import java.io.DataOutputStream;
022import java.io.IOException;
023import java.io.UnsupportedEncodingException;
024
025import org.apache.hadoop.classification.InterfaceAudience;
026import org.apache.hadoop.classification.InterfaceStability;
027import org.apache.hadoop.fs.FileSystem;
028import org.apache.hadoop.fs.Path;
029import org.apache.hadoop.fs.FSDataOutputStream;
030
031import org.apache.hadoop.io.NullWritable;
032import org.apache.hadoop.io.Text;
033import org.apache.hadoop.io.compress.CompressionCodec;
034import org.apache.hadoop.io.compress.GzipCodec;
035import org.apache.hadoop.util.*;
036
037/** 
038 * An {@link OutputFormat} that writes plain text files. 
039 */
040@InterfaceAudience.Public
041@InterfaceStability.Stable
042public class TextOutputFormat<K, V> extends FileOutputFormat<K, V> {
043
044  protected static class LineRecordWriter<K, V>
045    implements RecordWriter<K, V> {
046    private static final String utf8 = "UTF-8";
047    private static final byte[] newline;
048    static {
049      try {
050        newline = "\n".getBytes(utf8);
051      } catch (UnsupportedEncodingException uee) {
052        throw new IllegalArgumentException("can't find " + utf8 + " encoding");
053      }
054    }
055
056    protected DataOutputStream out;
057    private final byte[] keyValueSeparator;
058
059    public LineRecordWriter(DataOutputStream out, String keyValueSeparator) {
060      this.out = out;
061      try {
062        this.keyValueSeparator = keyValueSeparator.getBytes(utf8);
063      } catch (UnsupportedEncodingException uee) {
064        throw new IllegalArgumentException("can't find " + utf8 + " encoding");
065      }
066    }
067
068    public LineRecordWriter(DataOutputStream out) {
069      this(out, "\t");
070    }
071
072    /**
073     * Write the object to the byte stream, handling Text as a special
074     * case.
075     * @param o the object to print
076     * @throws IOException if the write throws, we pass it on
077     */
078    private void writeObject(Object o) throws IOException {
079      if (o instanceof Text) {
080        Text to = (Text) o;
081        out.write(to.getBytes(), 0, to.getLength());
082      } else {
083        out.write(o.toString().getBytes(utf8));
084      }
085    }
086
087    public synchronized void write(K key, V value)
088      throws IOException {
089
090      boolean nullKey = key == null || key instanceof NullWritable;
091      boolean nullValue = value == null || value instanceof NullWritable;
092      if (nullKey && nullValue) {
093        return;
094      }
095      if (!nullKey) {
096        writeObject(key);
097      }
098      if (!(nullKey || nullValue)) {
099        out.write(keyValueSeparator);
100      }
101      if (!nullValue) {
102        writeObject(value);
103      }
104      out.write(newline);
105    }
106
107    public synchronized void close(Reporter reporter) throws IOException {
108      out.close();
109    }
110  }
111
112  public RecordWriter<K, V> getRecordWriter(FileSystem ignored,
113                                                  JobConf job,
114                                                  String name,
115                                                  Progressable progress)
116    throws IOException {
117    boolean isCompressed = getCompressOutput(job);
118    String keyValueSeparator = job.get("mapreduce.output.textoutputformat.separator", 
119                                       "\t");
120    if (!isCompressed) {
121      Path file = FileOutputFormat.getTaskOutputPath(job, name);
122      FileSystem fs = file.getFileSystem(job);
123      FSDataOutputStream fileOut = fs.create(file, progress);
124      return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
125    } else {
126      Class<? extends CompressionCodec> codecClass =
127        getOutputCompressorClass(job, GzipCodec.class);
128      // create the named codec
129      CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job);
130      // build the filename including the extension
131      Path file = 
132        FileOutputFormat.getTaskOutputPath(job, 
133                                           name + codec.getDefaultExtension());
134      FileSystem fs = file.getFileSystem(job);
135      FSDataOutputStream fileOut = fs.create(file, progress);
136      return new LineRecordWriter<K, V>(new DataOutputStream
137                                        (codec.createOutputStream(fileOut)),
138                                        keyValueSeparator);
139    }
140  }
141}
142