001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.util; 020 021import java.io.DataInputStream; 022import java.io.DataOutputStream; 023import java.io.IOException; 024import java.nio.ByteBuffer; 025import java.util.zip.CRC32; 026import java.util.zip.Checksum; 027 028import org.apache.hadoop.classification.InterfaceAudience; 029import org.apache.hadoop.classification.InterfaceStability; 030import org.apache.hadoop.fs.ChecksumException; 031 032/** 033 * This class provides inteface and utilities for processing checksums for 034 * DFS data transfers. 035 */ 036@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 037@InterfaceStability.Evolving 038public class DataChecksum implements Checksum { 039 040 // Misc constants 041 public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len 042 043 // checksum types 044 public static final int CHECKSUM_NULL = 0; 045 public static final int CHECKSUM_CRC32 = 1; 046 public static final int CHECKSUM_CRC32C = 2; 047 public static final int CHECKSUM_DEFAULT = 3; 048 public static final int CHECKSUM_MIXED = 4; 049 050 /** The checksum types */ 051 public static enum Type { 052 NULL (CHECKSUM_NULL, 0), 053 CRC32 (CHECKSUM_CRC32, 4), 054 CRC32C(CHECKSUM_CRC32C, 4), 055 DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum 056 MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum 057 058 public final int id; 059 public final int size; 060 061 private Type(int id, int size) { 062 this.id = id; 063 this.size = size; 064 } 065 066 /** @return the type corresponding to the id. */ 067 public static Type valueOf(int id) { 068 if (id < 0 || id >= values().length) { 069 throw new IllegalArgumentException("id=" + id 070 + " out of range [0, " + values().length + ")"); 071 } 072 return values()[id]; 073 } 074 } 075 076 /** 077 * Create a Crc32 Checksum object. The implementation of the Crc32 algorithm 078 * is chosen depending on the platform. 079 */ 080 public static Checksum newCrc32() { 081 return Shell.isJava7OrAbove()? new CRC32(): new PureJavaCrc32(); 082 } 083 084 public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) { 085 if ( bytesPerChecksum <= 0 ) { 086 return null; 087 } 088 089 switch ( type ) { 090 case NULL : 091 return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum ); 092 case CRC32 : 093 return new DataChecksum(type, newCrc32(), bytesPerChecksum ); 094 case CRC32C: 095 return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum); 096 default: 097 return null; 098 } 099 } 100 101 /** 102 * Creates a DataChecksum from HEADER_LEN bytes from arr[offset]. 103 * @return DataChecksum of the type in the array or null in case of an error. 104 */ 105 public static DataChecksum newDataChecksum( byte bytes[], int offset ) { 106 if ( offset < 0 || bytes.length < offset + HEADER_LEN ) { 107 return null; 108 } 109 110 // like readInt(): 111 int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 112 ( (bytes[offset+2] & 0xff) << 16 ) | 113 ( (bytes[offset+3] & 0xff) << 8 ) | 114 ( (bytes[offset+4] & 0xff) ); 115 return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum ); 116 } 117 118 /** 119 * This constructucts a DataChecksum by reading HEADER_LEN bytes from 120 * input stream <i>in</i> 121 */ 122 public static DataChecksum newDataChecksum( DataInputStream in ) 123 throws IOException { 124 int type = in.readByte(); 125 int bpc = in.readInt(); 126 DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc ); 127 if ( summer == null ) { 128 throw new IOException( "Could not create DataChecksum of type " + 129 type + " with bytesPerChecksum " + bpc ); 130 } 131 return summer; 132 } 133 134 /** 135 * Writes the checksum header to the output stream <i>out</i>. 136 */ 137 public void writeHeader( DataOutputStream out ) 138 throws IOException { 139 out.writeByte( type.id ); 140 out.writeInt( bytesPerChecksum ); 141 } 142 143 public byte[] getHeader() { 144 byte[] header = new byte[DataChecksum.HEADER_LEN]; 145 header[0] = (byte) (type.id & 0xff); 146 // Writing in buffer just like DataOutput.WriteInt() 147 header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff); 148 header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff); 149 header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff); 150 header[1+3] = (byte) (bytesPerChecksum & 0xff); 151 return header; 152 } 153 154 /** 155 * Writes the current checksum to the stream. 156 * If <i>reset</i> is true, then resets the checksum. 157 * @return number of bytes written. Will be equal to getChecksumSize(); 158 */ 159 public int writeValue( DataOutputStream out, boolean reset ) 160 throws IOException { 161 if ( type.size <= 0 ) { 162 return 0; 163 } 164 165 if ( type.size == 4 ) { 166 out.writeInt( (int) summer.getValue() ); 167 } else { 168 throw new IOException( "Unknown Checksum " + type ); 169 } 170 171 if ( reset ) { 172 reset(); 173 } 174 175 return type.size; 176 } 177 178 /** 179 * Writes the current checksum to a buffer. 180 * If <i>reset</i> is true, then resets the checksum. 181 * @return number of bytes written. Will be equal to getChecksumSize(); 182 */ 183 public int writeValue( byte[] buf, int offset, boolean reset ) 184 throws IOException { 185 if ( type.size <= 0 ) { 186 return 0; 187 } 188 189 if ( type.size == 4 ) { 190 int checksum = (int) summer.getValue(); 191 buf[offset+0] = (byte) ((checksum >>> 24) & 0xff); 192 buf[offset+1] = (byte) ((checksum >>> 16) & 0xff); 193 buf[offset+2] = (byte) ((checksum >>> 8) & 0xff); 194 buf[offset+3] = (byte) (checksum & 0xff); 195 } else { 196 throw new IOException( "Unknown Checksum " + type ); 197 } 198 199 if ( reset ) { 200 reset(); 201 } 202 203 return type.size; 204 } 205 206 /** 207 * Compares the checksum located at buf[offset] with the current checksum. 208 * @return true if the checksum matches and false otherwise. 209 */ 210 public boolean compare( byte buf[], int offset ) { 211 if ( type.size == 4 ) { 212 int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 213 ( (buf[offset+1] & 0xff) << 16 ) | 214 ( (buf[offset+2] & 0xff) << 8 ) | 215 ( (buf[offset+3] & 0xff) ); 216 return checksum == (int) summer.getValue(); 217 } 218 return type.size == 0; 219 } 220 221 private final Type type; 222 private final Checksum summer; 223 private final int bytesPerChecksum; 224 private int inSum = 0; 225 226 private DataChecksum( Type type, Checksum checksum, int chunkSize ) { 227 this.type = type; 228 summer = checksum; 229 bytesPerChecksum = chunkSize; 230 } 231 232 // Accessors 233 public Type getChecksumType() { 234 return type; 235 } 236 public int getChecksumSize() { 237 return type.size; 238 } 239 public int getBytesPerChecksum() { 240 return bytesPerChecksum; 241 } 242 public int getNumBytesInSum() { 243 return inSum; 244 } 245 246 public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE; 247 static public int getChecksumHeaderSize() { 248 return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int 249 } 250 //Checksum Interface. Just a wrapper around member summer. 251 @Override 252 public long getValue() { 253 return summer.getValue(); 254 } 255 @Override 256 public void reset() { 257 summer.reset(); 258 inSum = 0; 259 } 260 @Override 261 public void update( byte[] b, int off, int len ) { 262 if ( len > 0 ) { 263 summer.update( b, off, len ); 264 inSum += len; 265 } 266 } 267 @Override 268 public void update( int b ) { 269 summer.update( b ); 270 inSum += 1; 271 } 272 273 /** 274 * Verify that the given checksums match the given data. 275 * 276 * The 'mark' of the ByteBuffer parameters may be modified by this function,. 277 * but the position is maintained. 278 * 279 * @param data the DirectByteBuffer pointing to the data to verify. 280 * @param checksums the DirectByteBuffer pointing to a series of stored 281 * checksums 282 * @param fileName the name of the file being read, for error-reporting 283 * @param basePos the file position to which the start of 'data' corresponds 284 * @throws ChecksumException if the checksums do not match 285 */ 286 public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums, 287 String fileName, long basePos) 288 throws ChecksumException { 289 if (type.size == 0) return; 290 291 if (data.hasArray() && checksums.hasArray()) { 292 verifyChunkedSums( 293 data.array(), data.arrayOffset() + data.position(), data.remaining(), 294 checksums.array(), checksums.arrayOffset() + checksums.position(), 295 fileName, basePos); 296 return; 297 } 298 if (NativeCrc32.isAvailable()) { 299 NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data, 300 fileName, basePos); 301 return; 302 } 303 304 int startDataPos = data.position(); 305 data.mark(); 306 checksums.mark(); 307 try { 308 byte[] buf = new byte[bytesPerChecksum]; 309 byte[] sum = new byte[type.size]; 310 while (data.remaining() > 0) { 311 int n = Math.min(data.remaining(), bytesPerChecksum); 312 checksums.get(sum); 313 data.get(buf, 0, n); 314 summer.reset(); 315 summer.update(buf, 0, n); 316 int calculated = (int)summer.getValue(); 317 int stored = (sum[0] << 24 & 0xff000000) | 318 (sum[1] << 16 & 0xff0000) | 319 (sum[2] << 8 & 0xff00) | 320 sum[3] & 0xff; 321 if (calculated != stored) { 322 long errPos = basePos + data.position() - startDataPos - n; 323 throw new ChecksumException( 324 "Checksum error: "+ fileName + " at "+ errPos + 325 " exp: " + stored + " got: " + calculated, errPos); 326 } 327 } 328 } finally { 329 data.reset(); 330 checksums.reset(); 331 } 332 } 333 334 /** 335 * Implementation of chunked verification specifically on byte arrays. This 336 * is to avoid the copy when dealing with ByteBuffers that have array backing. 337 */ 338 private void verifyChunkedSums( 339 byte[] data, int dataOff, int dataLen, 340 byte[] checksums, int checksumsOff, String fileName, 341 long basePos) throws ChecksumException { 342 343 int remaining = dataLen; 344 int dataPos = 0; 345 while (remaining > 0) { 346 int n = Math.min(remaining, bytesPerChecksum); 347 348 summer.reset(); 349 summer.update(data, dataOff + dataPos, n); 350 dataPos += n; 351 remaining -= n; 352 353 int calculated = (int)summer.getValue(); 354 int stored = (checksums[checksumsOff] << 24 & 0xff000000) | 355 (checksums[checksumsOff + 1] << 16 & 0xff0000) | 356 (checksums[checksumsOff + 2] << 8 & 0xff00) | 357 checksums[checksumsOff + 3] & 0xff; 358 checksumsOff += 4; 359 if (calculated != stored) { 360 long errPos = basePos + dataPos - n; 361 throw new ChecksumException( 362 "Checksum error: "+ fileName + " at "+ errPos + 363 " exp: " + stored + " got: " + calculated, errPos); 364 } 365 } 366 } 367 368 /** 369 * Calculate checksums for the given data. 370 * 371 * The 'mark' of the ByteBuffer parameters may be modified by this function, 372 * but the position is maintained. 373 * 374 * @param data the DirectByteBuffer pointing to the data to checksum. 375 * @param checksums the DirectByteBuffer into which checksums will be 376 * stored. Enough space must be available in this 377 * buffer to put the checksums. 378 */ 379 public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) { 380 if (type.size == 0) return; 381 382 if (data.hasArray() && checksums.hasArray()) { 383 calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(), 384 checksums.array(), checksums.arrayOffset() + checksums.position()); 385 return; 386 } 387 388 data.mark(); 389 checksums.mark(); 390 try { 391 byte[] buf = new byte[bytesPerChecksum]; 392 while (data.remaining() > 0) { 393 int n = Math.min(data.remaining(), bytesPerChecksum); 394 data.get(buf, 0, n); 395 summer.reset(); 396 summer.update(buf, 0, n); 397 checksums.putInt((int)summer.getValue()); 398 } 399 } finally { 400 data.reset(); 401 checksums.reset(); 402 } 403 } 404 405 /** 406 * Implementation of chunked calculation specifically on byte arrays. This 407 * is to avoid the copy when dealing with ByteBuffers that have array backing. 408 */ 409 private void calculateChunkedSums( 410 byte[] data, int dataOffset, int dataLength, 411 byte[] sums, int sumsOffset) { 412 413 int remaining = dataLength; 414 while (remaining > 0) { 415 int n = Math.min(remaining, bytesPerChecksum); 416 summer.reset(); 417 summer.update(data, dataOffset, n); 418 dataOffset += n; 419 remaining -= n; 420 long calculated = summer.getValue(); 421 sums[sumsOffset++] = (byte) (calculated >> 24); 422 sums[sumsOffset++] = (byte) (calculated >> 16); 423 sums[sumsOffset++] = (byte) (calculated >> 8); 424 sums[sumsOffset++] = (byte) (calculated); 425 } 426 } 427 428 @Override 429 public boolean equals(Object other) { 430 if (!(other instanceof DataChecksum)) { 431 return false; 432 } 433 DataChecksum o = (DataChecksum)other; 434 return o.bytesPerChecksum == this.bytesPerChecksum && 435 o.type == this.type; 436 } 437 438 @Override 439 public int hashCode() { 440 return (this.type.id + 31) * this.bytesPerChecksum; 441 } 442 443 @Override 444 public String toString() { 445 return "DataChecksum(type=" + type + 446 ", chunkSize=" + bytesPerChecksum + ")"; 447 } 448 449 /** 450 * This just provides a dummy implimentation for Checksum class 451 * This is used when there is no checksum available or required for 452 * data 453 */ 454 static class ChecksumNull implements Checksum { 455 456 public ChecksumNull() {} 457 458 //Dummy interface 459 @Override 460 public long getValue() { return 0; } 461 @Override 462 public void reset() {} 463 @Override 464 public void update(byte[] b, int off, int len) {} 465 @Override 466 public void update(int b) {} 467 }; 468}