001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.fs.azure.metrics;
020
021import java.util.UUID;
022import java.util.concurrent.atomic.AtomicLong;
023
024import org.apache.hadoop.classification.InterfaceAudience;
025import org.apache.hadoop.classification.InterfaceStability;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.metrics2.MetricsCollector;
028import org.apache.hadoop.metrics2.MetricsInfo;
029import org.apache.hadoop.metrics2.MetricsSource;
030import org.apache.hadoop.metrics2.annotation.Metrics;
031import org.apache.hadoop.metrics2.lib.MetricsRegistry;
032import org.apache.hadoop.metrics2.lib.MutableCounterLong;
033import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
034
035/**
036 * A metrics source for the WASB file system to track all the metrics we care
037 * about for getting a clear picture of the performance/reliability/interaction
038 * of the Hadoop cluster with Azure Storage.
039 */
040@Metrics(about="Metrics for WASB", context="azureFileSystem")
041@InterfaceAudience.Public
042@InterfaceStability.Evolving
043public final class AzureFileSystemInstrumentation implements MetricsSource {
044
045  public static final String METRIC_TAG_FILESYSTEM_ID = "wasbFileSystemId";
046  public static final String METRIC_TAG_ACCOUNT_NAME = "accountName";
047  public static final String METRIC_TAG_CONTAINTER_NAME = "containerName";
048
049  public static final String WASB_WEB_RESPONSES = "wasb_web_responses";
050  public static final String WASB_BYTES_WRITTEN =
051      "wasb_bytes_written_last_second";
052  public static final String WASB_BYTES_READ =
053      "wasb_bytes_read_last_second";
054  public static final String WASB_RAW_BYTES_UPLOADED =
055      "wasb_raw_bytes_uploaded";
056  public static final String WASB_RAW_BYTES_DOWNLOADED =
057      "wasb_raw_bytes_downloaded";
058  public static final String WASB_FILES_CREATED = "wasb_files_created";
059  public static final String WASB_FILES_DELETED = "wasb_files_deleted";
060  public static final String WASB_DIRECTORIES_CREATED = "wasb_directories_created";
061  public static final String WASB_DIRECTORIES_DELETED = "wasb_directories_deleted";
062  public static final String WASB_UPLOAD_RATE =
063      "wasb_maximum_upload_bytes_per_second";
064  public static final String WASB_DOWNLOAD_RATE =
065      "wasb_maximum_download_bytes_per_second";
066  public static final String WASB_UPLOAD_LATENCY =
067      "wasb_average_block_upload_latency_ms";
068  public static final String WASB_DOWNLOAD_LATENCY =
069      "wasb_average_block_download_latency_ms";
070  public static final String WASB_CLIENT_ERRORS = "wasb_client_errors";
071  public static final String WASB_SERVER_ERRORS = "wasb_server_errors";
072
073  /**
074   * Config key for how big the rolling window size for latency metrics should
075   * be (in seconds).
076   */
077  private static final String KEY_ROLLING_WINDOW_SIZE = "fs.azure.metrics.rolling.window.size";
078
079  private final MetricsRegistry registry =
080      new MetricsRegistry("azureFileSystem")
081      .setContext("azureFileSystem");
082  private final MutableCounterLong numberOfWebResponses =
083      registry.newCounter(
084          WASB_WEB_RESPONSES,
085          "Total number of web responses obtained from Azure Storage",
086          0L);
087  private AtomicLong inMemoryNumberOfWebResponses = new AtomicLong(0);
088  private final MutableCounterLong numberOfFilesCreated =
089      registry.newCounter(
090          WASB_FILES_CREATED,
091          "Total number of files created through the WASB file system.",
092          0L);
093  private final MutableCounterLong numberOfFilesDeleted =
094      registry.newCounter(
095          WASB_FILES_DELETED,
096          "Total number of files deleted through the WASB file system.",
097          0L);
098  private final MutableCounterLong numberOfDirectoriesCreated =
099      registry.newCounter(
100          WASB_DIRECTORIES_CREATED,
101          "Total number of directories created through the WASB file system.",
102          0L);
103  private final MutableCounterLong numberOfDirectoriesDeleted =
104      registry.newCounter(
105          WASB_DIRECTORIES_DELETED,
106          "Total number of directories deleted through the WASB file system.",
107          0L);
108  private final MutableGaugeLong bytesWrittenInLastSecond =
109      registry.newGauge(
110          WASB_BYTES_WRITTEN,
111          "Total number of bytes written to Azure Storage during the last second.",
112          0L);
113  private final MutableGaugeLong bytesReadInLastSecond =
114      registry.newGauge(
115          WASB_BYTES_READ,
116          "Total number of bytes read from Azure Storage during the last second.",
117          0L);
118  private final MutableGaugeLong maximumUploadBytesPerSecond =
119      registry.newGauge(
120          WASB_UPLOAD_RATE,
121          "The maximum upload rate encountered to Azure Storage in bytes/second.",
122          0L);
123  private final MutableGaugeLong maximumDownloadBytesPerSecond =
124      registry.newGauge(
125          WASB_DOWNLOAD_RATE,
126          "The maximum download rate encountered to Azure Storage in bytes/second.",
127          0L);
128  private final MutableCounterLong rawBytesUploaded =
129      registry.newCounter(
130          WASB_RAW_BYTES_UPLOADED,
131          "Total number of raw bytes (including overhead) uploaded to Azure" 
132          + " Storage.",
133          0L);
134  private final MutableCounterLong rawBytesDownloaded =
135      registry.newCounter(
136          WASB_RAW_BYTES_DOWNLOADED,
137          "Total number of raw bytes (including overhead) downloaded from Azure" 
138          + " Storage.",
139          0L);
140  private final MutableCounterLong clientErrors =
141      registry.newCounter(
142          WASB_CLIENT_ERRORS,
143          "Total number of client-side errors by WASB (excluding 404).",
144          0L);
145  private final MutableCounterLong serverErrors =
146      registry.newCounter(
147          WASB_SERVER_ERRORS,
148          "Total number of server-caused errors by WASB.",
149          0L);
150  private final MutableGaugeLong averageBlockUploadLatencyMs;
151  private final MutableGaugeLong averageBlockDownloadLatencyMs;
152  private long currentMaximumUploadBytesPerSecond;
153  private long currentMaximumDownloadBytesPerSecond;
154  private static final int DEFAULT_LATENCY_ROLLING_AVERAGE_WINDOW =
155      5; // seconds
156  private final RollingWindowAverage currentBlockUploadLatency;
157  private final RollingWindowAverage currentBlockDownloadLatency;
158  private UUID fileSystemInstanceId;
159
160  public AzureFileSystemInstrumentation(Configuration conf) {
161    fileSystemInstanceId = UUID.randomUUID();
162    registry.tag("wasbFileSystemId",
163        "A unique identifier for the file ",
164        fileSystemInstanceId.toString());
165    final int rollingWindowSizeInSeconds =
166        conf.getInt(KEY_ROLLING_WINDOW_SIZE,
167            DEFAULT_LATENCY_ROLLING_AVERAGE_WINDOW);
168    averageBlockUploadLatencyMs =
169        registry.newGauge(
170            WASB_UPLOAD_LATENCY,
171            String.format("The average latency in milliseconds of uploading a single block" 
172            + ". The average latency is calculated over a %d-second rolling" 
173            + " window.", rollingWindowSizeInSeconds),
174            0L);
175    averageBlockDownloadLatencyMs =
176        registry.newGauge(
177            WASB_DOWNLOAD_LATENCY,
178            String.format("The average latency in milliseconds of downloading a single block" 
179            + ". The average latency is calculated over a %d-second rolling" 
180            + " window.", rollingWindowSizeInSeconds),
181            0L);
182    currentBlockUploadLatency =
183        new RollingWindowAverage(rollingWindowSizeInSeconds * 1000);
184    currentBlockDownloadLatency =
185        new RollingWindowAverage(rollingWindowSizeInSeconds * 1000);
186  }
187
188  /**
189   * The unique identifier for this file system in the metrics.
190   */
191  public UUID getFileSystemInstanceId() {
192    return fileSystemInstanceId;
193  }
194  
195  /**
196   * Get the metrics registry information.
197   */
198  public MetricsInfo getMetricsRegistryInfo() {
199    return registry.info();
200  }
201
202  /**
203   * Sets the account name to tag all the metrics with.
204   * @param accountName The account name.
205   */
206  public void setAccountName(String accountName) {
207    registry.tag("accountName",
208        "Name of the Azure Storage account that these metrics are going against",
209        accountName);
210  }
211
212  /**
213   * Sets the container name to tag all the metrics with.
214   * @param containerName The container name.
215   */
216  public void setContainerName(String containerName) {
217    registry.tag("containerName",
218        "Name of the Azure Storage container that these metrics are going against",
219        containerName);
220  }
221
222  /**
223   * Indicate that we just got a web response from Azure Storage. This should
224   * be called for every web request/response we do (to get accurate metrics
225   * of how we're hitting the storage service).
226   */
227  public void webResponse() {
228    numberOfWebResponses.incr();
229    inMemoryNumberOfWebResponses.incrementAndGet();
230  }
231
232  /**
233   * Gets the current number of web responses obtained from Azure Storage.
234   * @return The number of web responses.
235   */
236  public long getCurrentWebResponses() {
237    return inMemoryNumberOfWebResponses.get();
238  }
239
240  /**
241   * Indicate that we just created a file through WASB.
242   */
243  public void fileCreated() {
244    numberOfFilesCreated.incr();
245  }
246
247  /**
248   * Indicate that we just deleted a file through WASB.
249   */
250  public void fileDeleted() {
251    numberOfFilesDeleted.incr();
252  }
253
254  /**
255   * Indicate that we just created a directory through WASB.
256   */
257  public void directoryCreated() {
258    numberOfDirectoriesCreated.incr();
259  }
260
261  /**
262   * Indicate that we just deleted a directory through WASB.
263   */
264  public void directoryDeleted() {
265    numberOfDirectoriesDeleted.incr();
266  }
267
268  /**
269   * Sets the current gauge value for how many bytes were written in the last
270   *  second.
271   * @param currentBytesWritten The number of bytes.
272   */
273  public void updateBytesWrittenInLastSecond(long currentBytesWritten) {
274    bytesWrittenInLastSecond.set(currentBytesWritten);
275  }
276
277  /**
278   * Sets the current gauge value for how many bytes were read in the last
279   *  second.
280   * @param currentBytesRead The number of bytes.
281   */
282  public void updateBytesReadInLastSecond(long currentBytesRead) {
283    bytesReadInLastSecond.set(currentBytesRead);
284  }
285
286  /**
287   * Record the current bytes-per-second upload rate seen.
288   * @param bytesPerSecond The bytes per second.
289   */
290  public synchronized void currentUploadBytesPerSecond(long bytesPerSecond) {
291    if (bytesPerSecond > currentMaximumUploadBytesPerSecond) {
292      currentMaximumUploadBytesPerSecond = bytesPerSecond;
293      maximumUploadBytesPerSecond.set(bytesPerSecond);
294    }
295  }
296
297  /**
298   * Record the current bytes-per-second download rate seen.
299   * @param bytesPerSecond The bytes per second.
300   */
301  public synchronized void currentDownloadBytesPerSecond(long bytesPerSecond) {
302    if (bytesPerSecond > currentMaximumDownloadBytesPerSecond) {
303      currentMaximumDownloadBytesPerSecond = bytesPerSecond;
304      maximumDownloadBytesPerSecond.set(bytesPerSecond);
305    }
306  }
307
308  /**
309   * Indicate that we just uploaded some data to Azure storage.
310   * @param numberOfBytes The raw number of bytes uploaded (including overhead).
311   */
312  public void rawBytesUploaded(long numberOfBytes) {
313    rawBytesUploaded.incr(numberOfBytes);
314  }
315
316  /**
317   * Indicate that we just downloaded some data to Azure storage.
318   * @param numberOfBytes The raw number of bytes downloaded (including overhead).
319   */
320  public void rawBytesDownloaded(long numberOfBytes) {
321    rawBytesDownloaded.incr(numberOfBytes);
322  }
323
324  /**
325   * Indicate that we just uploaded a block and record its latency.
326   * @param latency The latency in milliseconds.
327   */
328  public void blockUploaded(long latency) {
329    currentBlockUploadLatency.addPoint(latency);
330  }
331
332  /**
333   * Indicate that we just downloaded a block and record its latency.
334   * @param latency The latency in milliseconds.
335   */
336  public void blockDownloaded(long latency) {
337    currentBlockDownloadLatency.addPoint(latency);
338  }
339
340  /**
341   * Indicate that we just encountered a client-side error.
342   */
343  public void clientErrorEncountered() {
344    clientErrors.incr();
345  }
346
347  /**
348   * Indicate that we just encountered a server-caused error.
349   */
350  public void serverErrorEncountered() {
351    serverErrors.incr();
352  }
353
354  /**
355   * Get the current rolling average of the upload latency.
356   * @return rolling average of upload latency in milliseconds.
357   */
358  public long getBlockUploadLatency() {
359    return currentBlockUploadLatency.getCurrentAverage();
360  }
361
362  /**
363   * Get the current rolling average of the download latency.
364   * @return rolling average of download latency in milliseconds.
365   */
366  public long getBlockDownloadLatency() {
367    return currentBlockDownloadLatency.getCurrentAverage();
368  }
369
370  /**
371   * Get the current maximum upload bandwidth.
372   * @return maximum upload bandwidth in bytes per second.
373   */
374  public long getCurrentMaximumUploadBandwidth() {
375    return currentMaximumUploadBytesPerSecond;
376  }
377
378  /**
379   * Get the current maximum download bandwidth.
380   * @return maximum download bandwidth in bytes per second.
381   */
382  public long getCurrentMaximumDownloadBandwidth() {
383    return currentMaximumDownloadBytesPerSecond;
384  }
385
386  @Override
387  public void getMetrics(MetricsCollector builder, boolean all) {
388    averageBlockDownloadLatencyMs.set(
389        currentBlockDownloadLatency.getCurrentAverage());
390    averageBlockUploadLatencyMs.set(
391        currentBlockUploadLatency.getCurrentAverage());
392    registry.snapshot(builder.addRecord(registry.info().name()), true);
393  }
394}