001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.fs.azure.metrics; 020 021import java.util.UUID; 022import java.util.concurrent.atomic.AtomicLong; 023 024import org.apache.hadoop.classification.InterfaceAudience; 025import org.apache.hadoop.classification.InterfaceStability; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.metrics2.MetricsCollector; 028import org.apache.hadoop.metrics2.MetricsInfo; 029import org.apache.hadoop.metrics2.MetricsSource; 030import org.apache.hadoop.metrics2.annotation.Metrics; 031import org.apache.hadoop.metrics2.lib.MetricsRegistry; 032import org.apache.hadoop.metrics2.lib.MutableCounterLong; 033import org.apache.hadoop.metrics2.lib.MutableGaugeLong; 034 035/** 036 * A metrics source for the WASB file system to track all the metrics we care 037 * about for getting a clear picture of the performance/reliability/interaction 038 * of the Hadoop cluster with Azure Storage. 039 */ 040@Metrics(about="Metrics for WASB", context="azureFileSystem") 041@InterfaceAudience.Public 042@InterfaceStability.Evolving 043public final class AzureFileSystemInstrumentation implements MetricsSource { 044 045 public static final String METRIC_TAG_FILESYSTEM_ID = "wasbFileSystemId"; 046 public static final String METRIC_TAG_ACCOUNT_NAME = "accountName"; 047 public static final String METRIC_TAG_CONTAINTER_NAME = "containerName"; 048 049 public static final String WASB_WEB_RESPONSES = "wasb_web_responses"; 050 public static final String WASB_BYTES_WRITTEN = 051 "wasb_bytes_written_last_second"; 052 public static final String WASB_BYTES_READ = 053 "wasb_bytes_read_last_second"; 054 public static final String WASB_RAW_BYTES_UPLOADED = 055 "wasb_raw_bytes_uploaded"; 056 public static final String WASB_RAW_BYTES_DOWNLOADED = 057 "wasb_raw_bytes_downloaded"; 058 public static final String WASB_FILES_CREATED = "wasb_files_created"; 059 public static final String WASB_FILES_DELETED = "wasb_files_deleted"; 060 public static final String WASB_DIRECTORIES_CREATED = "wasb_directories_created"; 061 public static final String WASB_DIRECTORIES_DELETED = "wasb_directories_deleted"; 062 public static final String WASB_UPLOAD_RATE = 063 "wasb_maximum_upload_bytes_per_second"; 064 public static final String WASB_DOWNLOAD_RATE = 065 "wasb_maximum_download_bytes_per_second"; 066 public static final String WASB_UPLOAD_LATENCY = 067 "wasb_average_block_upload_latency_ms"; 068 public static final String WASB_DOWNLOAD_LATENCY = 069 "wasb_average_block_download_latency_ms"; 070 public static final String WASB_CLIENT_ERRORS = "wasb_client_errors"; 071 public static final String WASB_SERVER_ERRORS = "wasb_server_errors"; 072 073 /** 074 * Config key for how big the rolling window size for latency metrics should 075 * be (in seconds). 076 */ 077 private static final String KEY_ROLLING_WINDOW_SIZE = "fs.azure.metrics.rolling.window.size"; 078 079 private final MetricsRegistry registry = 080 new MetricsRegistry("azureFileSystem") 081 .setContext("azureFileSystem"); 082 private final MutableCounterLong numberOfWebResponses = 083 registry.newCounter( 084 WASB_WEB_RESPONSES, 085 "Total number of web responses obtained from Azure Storage", 086 0L); 087 private AtomicLong inMemoryNumberOfWebResponses = new AtomicLong(0); 088 private final MutableCounterLong numberOfFilesCreated = 089 registry.newCounter( 090 WASB_FILES_CREATED, 091 "Total number of files created through the WASB file system.", 092 0L); 093 private final MutableCounterLong numberOfFilesDeleted = 094 registry.newCounter( 095 WASB_FILES_DELETED, 096 "Total number of files deleted through the WASB file system.", 097 0L); 098 private final MutableCounterLong numberOfDirectoriesCreated = 099 registry.newCounter( 100 WASB_DIRECTORIES_CREATED, 101 "Total number of directories created through the WASB file system.", 102 0L); 103 private final MutableCounterLong numberOfDirectoriesDeleted = 104 registry.newCounter( 105 WASB_DIRECTORIES_DELETED, 106 "Total number of directories deleted through the WASB file system.", 107 0L); 108 private final MutableGaugeLong bytesWrittenInLastSecond = 109 registry.newGauge( 110 WASB_BYTES_WRITTEN, 111 "Total number of bytes written to Azure Storage during the last second.", 112 0L); 113 private final MutableGaugeLong bytesReadInLastSecond = 114 registry.newGauge( 115 WASB_BYTES_READ, 116 "Total number of bytes read from Azure Storage during the last second.", 117 0L); 118 private final MutableGaugeLong maximumUploadBytesPerSecond = 119 registry.newGauge( 120 WASB_UPLOAD_RATE, 121 "The maximum upload rate encountered to Azure Storage in bytes/second.", 122 0L); 123 private final MutableGaugeLong maximumDownloadBytesPerSecond = 124 registry.newGauge( 125 WASB_DOWNLOAD_RATE, 126 "The maximum download rate encountered to Azure Storage in bytes/second.", 127 0L); 128 private final MutableCounterLong rawBytesUploaded = 129 registry.newCounter( 130 WASB_RAW_BYTES_UPLOADED, 131 "Total number of raw bytes (including overhead) uploaded to Azure" 132 + " Storage.", 133 0L); 134 private final MutableCounterLong rawBytesDownloaded = 135 registry.newCounter( 136 WASB_RAW_BYTES_DOWNLOADED, 137 "Total number of raw bytes (including overhead) downloaded from Azure" 138 + " Storage.", 139 0L); 140 private final MutableCounterLong clientErrors = 141 registry.newCounter( 142 WASB_CLIENT_ERRORS, 143 "Total number of client-side errors by WASB (excluding 404).", 144 0L); 145 private final MutableCounterLong serverErrors = 146 registry.newCounter( 147 WASB_SERVER_ERRORS, 148 "Total number of server-caused errors by WASB.", 149 0L); 150 private final MutableGaugeLong averageBlockUploadLatencyMs; 151 private final MutableGaugeLong averageBlockDownloadLatencyMs; 152 private long currentMaximumUploadBytesPerSecond; 153 private long currentMaximumDownloadBytesPerSecond; 154 private static final int DEFAULT_LATENCY_ROLLING_AVERAGE_WINDOW = 155 5; // seconds 156 private final RollingWindowAverage currentBlockUploadLatency; 157 private final RollingWindowAverage currentBlockDownloadLatency; 158 private UUID fileSystemInstanceId; 159 160 public AzureFileSystemInstrumentation(Configuration conf) { 161 fileSystemInstanceId = UUID.randomUUID(); 162 registry.tag("wasbFileSystemId", 163 "A unique identifier for the file ", 164 fileSystemInstanceId.toString()); 165 final int rollingWindowSizeInSeconds = 166 conf.getInt(KEY_ROLLING_WINDOW_SIZE, 167 DEFAULT_LATENCY_ROLLING_AVERAGE_WINDOW); 168 averageBlockUploadLatencyMs = 169 registry.newGauge( 170 WASB_UPLOAD_LATENCY, 171 String.format("The average latency in milliseconds of uploading a single block" 172 + ". The average latency is calculated over a %d-second rolling" 173 + " window.", rollingWindowSizeInSeconds), 174 0L); 175 averageBlockDownloadLatencyMs = 176 registry.newGauge( 177 WASB_DOWNLOAD_LATENCY, 178 String.format("The average latency in milliseconds of downloading a single block" 179 + ". The average latency is calculated over a %d-second rolling" 180 + " window.", rollingWindowSizeInSeconds), 181 0L); 182 currentBlockUploadLatency = 183 new RollingWindowAverage(rollingWindowSizeInSeconds * 1000); 184 currentBlockDownloadLatency = 185 new RollingWindowAverage(rollingWindowSizeInSeconds * 1000); 186 } 187 188 /** 189 * The unique identifier for this file system in the metrics. 190 */ 191 public UUID getFileSystemInstanceId() { 192 return fileSystemInstanceId; 193 } 194 195 /** 196 * Get the metrics registry information. 197 */ 198 public MetricsInfo getMetricsRegistryInfo() { 199 return registry.info(); 200 } 201 202 /** 203 * Sets the account name to tag all the metrics with. 204 * @param accountName The account name. 205 */ 206 public void setAccountName(String accountName) { 207 registry.tag("accountName", 208 "Name of the Azure Storage account that these metrics are going against", 209 accountName); 210 } 211 212 /** 213 * Sets the container name to tag all the metrics with. 214 * @param containerName The container name. 215 */ 216 public void setContainerName(String containerName) { 217 registry.tag("containerName", 218 "Name of the Azure Storage container that these metrics are going against", 219 containerName); 220 } 221 222 /** 223 * Indicate that we just got a web response from Azure Storage. This should 224 * be called for every web request/response we do (to get accurate metrics 225 * of how we're hitting the storage service). 226 */ 227 public void webResponse() { 228 numberOfWebResponses.incr(); 229 inMemoryNumberOfWebResponses.incrementAndGet(); 230 } 231 232 /** 233 * Gets the current number of web responses obtained from Azure Storage. 234 * @return The number of web responses. 235 */ 236 public long getCurrentWebResponses() { 237 return inMemoryNumberOfWebResponses.get(); 238 } 239 240 /** 241 * Indicate that we just created a file through WASB. 242 */ 243 public void fileCreated() { 244 numberOfFilesCreated.incr(); 245 } 246 247 /** 248 * Indicate that we just deleted a file through WASB. 249 */ 250 public void fileDeleted() { 251 numberOfFilesDeleted.incr(); 252 } 253 254 /** 255 * Indicate that we just created a directory through WASB. 256 */ 257 public void directoryCreated() { 258 numberOfDirectoriesCreated.incr(); 259 } 260 261 /** 262 * Indicate that we just deleted a directory through WASB. 263 */ 264 public void directoryDeleted() { 265 numberOfDirectoriesDeleted.incr(); 266 } 267 268 /** 269 * Sets the current gauge value for how many bytes were written in the last 270 * second. 271 * @param currentBytesWritten The number of bytes. 272 */ 273 public void updateBytesWrittenInLastSecond(long currentBytesWritten) { 274 bytesWrittenInLastSecond.set(currentBytesWritten); 275 } 276 277 /** 278 * Sets the current gauge value for how many bytes were read in the last 279 * second. 280 * @param currentBytesRead The number of bytes. 281 */ 282 public void updateBytesReadInLastSecond(long currentBytesRead) { 283 bytesReadInLastSecond.set(currentBytesRead); 284 } 285 286 /** 287 * Record the current bytes-per-second upload rate seen. 288 * @param bytesPerSecond The bytes per second. 289 */ 290 public synchronized void currentUploadBytesPerSecond(long bytesPerSecond) { 291 if (bytesPerSecond > currentMaximumUploadBytesPerSecond) { 292 currentMaximumUploadBytesPerSecond = bytesPerSecond; 293 maximumUploadBytesPerSecond.set(bytesPerSecond); 294 } 295 } 296 297 /** 298 * Record the current bytes-per-second download rate seen. 299 * @param bytesPerSecond The bytes per second. 300 */ 301 public synchronized void currentDownloadBytesPerSecond(long bytesPerSecond) { 302 if (bytesPerSecond > currentMaximumDownloadBytesPerSecond) { 303 currentMaximumDownloadBytesPerSecond = bytesPerSecond; 304 maximumDownloadBytesPerSecond.set(bytesPerSecond); 305 } 306 } 307 308 /** 309 * Indicate that we just uploaded some data to Azure storage. 310 * @param numberOfBytes The raw number of bytes uploaded (including overhead). 311 */ 312 public void rawBytesUploaded(long numberOfBytes) { 313 rawBytesUploaded.incr(numberOfBytes); 314 } 315 316 /** 317 * Indicate that we just downloaded some data to Azure storage. 318 * @param numberOfBytes The raw number of bytes downloaded (including overhead). 319 */ 320 public void rawBytesDownloaded(long numberOfBytes) { 321 rawBytesDownloaded.incr(numberOfBytes); 322 } 323 324 /** 325 * Indicate that we just uploaded a block and record its latency. 326 * @param latency The latency in milliseconds. 327 */ 328 public void blockUploaded(long latency) { 329 currentBlockUploadLatency.addPoint(latency); 330 } 331 332 /** 333 * Indicate that we just downloaded a block and record its latency. 334 * @param latency The latency in milliseconds. 335 */ 336 public void blockDownloaded(long latency) { 337 currentBlockDownloadLatency.addPoint(latency); 338 } 339 340 /** 341 * Indicate that we just encountered a client-side error. 342 */ 343 public void clientErrorEncountered() { 344 clientErrors.incr(); 345 } 346 347 /** 348 * Indicate that we just encountered a server-caused error. 349 */ 350 public void serverErrorEncountered() { 351 serverErrors.incr(); 352 } 353 354 /** 355 * Get the current rolling average of the upload latency. 356 * @return rolling average of upload latency in milliseconds. 357 */ 358 public long getBlockUploadLatency() { 359 return currentBlockUploadLatency.getCurrentAverage(); 360 } 361 362 /** 363 * Get the current rolling average of the download latency. 364 * @return rolling average of download latency in milliseconds. 365 */ 366 public long getBlockDownloadLatency() { 367 return currentBlockDownloadLatency.getCurrentAverage(); 368 } 369 370 /** 371 * Get the current maximum upload bandwidth. 372 * @return maximum upload bandwidth in bytes per second. 373 */ 374 public long getCurrentMaximumUploadBandwidth() { 375 return currentMaximumUploadBytesPerSecond; 376 } 377 378 /** 379 * Get the current maximum download bandwidth. 380 * @return maximum download bandwidth in bytes per second. 381 */ 382 public long getCurrentMaximumDownloadBandwidth() { 383 return currentMaximumDownloadBytesPerSecond; 384 } 385 386 @Override 387 public void getMetrics(MetricsCollector builder, boolean all) { 388 averageBlockDownloadLatencyMs.set( 389 currentBlockDownloadLatency.getCurrentAverage()); 390 averageBlockUploadLatencyMs.set( 391 currentBlockUploadLatency.getCurrentAverage()); 392 registry.snapshot(builder.addRecord(registry.info().name()), true); 393 } 394}