001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.tar; 020 021import static java.nio.charset.StandardCharsets.UTF_8; 022import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN; 023import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET; 024import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_NUMBYTES_LEN; 025import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_OFFSET_LEN; 026 027import java.io.ByteArrayOutputStream; 028import java.io.IOException; 029import java.io.InputStream; 030import java.io.UncheckedIOException; 031import java.math.BigInteger; 032import java.nio.ByteBuffer; 033import java.nio.charset.Charset; 034import java.util.ArrayList; 035import java.util.Collections; 036import java.util.HashMap; 037import java.util.List; 038import java.util.Map; 039 040import org.apache.commons.compress.archivers.zip.ZipEncoding; 041import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 042import org.apache.commons.compress.utils.CharsetNames; 043import org.apache.commons.compress.utils.IOUtils; 044 045/** 046 * This class provides static utility methods to work with byte streams. 047 * 048 * @Immutable 049 */ 050// CheckStyle:HideUtilityClassConstructorCheck OFF (bc) 051public class TarUtils { 052 053 private static final int BYTE_MASK = 255; 054 055 static final ZipEncoding DEFAULT_ENCODING = 056 ZipEncodingHelper.getZipEncoding(null); 057 058 /** 059 * Encapsulates the algorithms used up to Commons Compress 1.3 as 060 * ZipEncoding. 061 */ 062 static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() { 063 @Override 064 public boolean canEncode(final String name) { return true; } 065 066 @Override 067 public ByteBuffer encode(final String name) { 068 final int length = name.length(); 069 final byte[] buf = new byte[length]; 070 071 // copy until end of input or output is reached. 072 for (int i = 0; i < length; ++i) { 073 buf[i] = (byte) name.charAt(i); 074 } 075 return ByteBuffer.wrap(buf); 076 } 077 078 @Override 079 public String decode(final byte[] buffer) { 080 final int length = buffer.length; 081 final StringBuilder result = new StringBuilder(length); 082 083 for (final byte b : buffer) { 084 if (b == 0) { // Trailing null 085 break; 086 } 087 result.append((char) (b & 0xFF)); // Allow for sign-extension 088 } 089 090 return result.toString(); 091 } 092 }; 093 094 /** Private constructor to prevent instantiation of this utility class. */ 095 private TarUtils(){ 096 } 097 098 /** 099 * Parse an octal string from a buffer. 100 * 101 * <p>Leading spaces are ignored. 102 * The buffer must contain a trailing space or NUL, 103 * and may contain an additional trailing space or NUL.</p> 104 * 105 * <p>The input buffer is allowed to contain all NULs, 106 * in which case the method returns 0L 107 * (this allows for missing fields).</p> 108 * 109 * <p>To work-around some tar implementations that insert a 110 * leading NUL this method returns 0 if it detects a leading NUL 111 * since Commons Compress 1.4.</p> 112 * 113 * @param buffer The buffer from which to parse. 114 * @param offset The offset into the buffer from which to parse. 115 * @param length The maximum number of bytes to parse - must be at least 2 bytes. 116 * @return The long value of the octal string. 117 * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected. 118 */ 119 public static long parseOctal(final byte[] buffer, final int offset, final int length) { 120 long result = 0; 121 int end = offset + length; 122 int start = offset; 123 124 if (length < 2) { 125 throw new IllegalArgumentException("Length " + length + " must be at least 2"); 126 } 127 128 if (buffer[start] == 0) { 129 return 0L; 130 } 131 132 // Skip leading spaces 133 while (start < end) { 134 if (buffer[start] != ' ') { 135 break; 136 } 137 start++; 138 } 139 140 // Trim all trailing NULs and spaces. 141 // The ustar and POSIX tar specs require a trailing NUL or 142 // space but some implementations use the extra digit for big 143 // sizes/uids/gids ... 144 byte trailer = buffer[end - 1]; 145 while (start < end && (trailer == 0 || trailer == ' ')) { 146 end--; 147 trailer = buffer[end - 1]; 148 } 149 150 for (; start < end; start++) { 151 final byte currentByte = buffer[start]; 152 // CheckStyle:MagicNumber OFF 153 if (currentByte < '0' || currentByte > '7') { 154 throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte)); 155 } 156 result = (result << 3) + (currentByte - '0'); // convert from ASCII 157 // CheckStyle:MagicNumber ON 158 } 159 160 return result; 161 } 162 163 /** 164 * Compute the value contained in a byte buffer. If the most 165 * significant bit of the first byte in the buffer is set, this 166 * bit is ignored and the rest of the buffer is interpreted as a 167 * binary number. Otherwise, the buffer is interpreted as an 168 * octal number as per the parseOctal function above. 169 * 170 * @param buffer The buffer from which to parse. 171 * @param offset The offset into the buffer from which to parse. 172 * @param length The maximum number of bytes to parse. 173 * @return The long value of the octal or binary string. 174 * @throws IllegalArgumentException if the trailing space/NUL is 175 * missing or an invalid byte is detected in an octal number, or 176 * if a binary number would exceed the size of a signed long 177 * 64-bit integer. 178 * @since 1.4 179 */ 180 public static long parseOctalOrBinary(final byte[] buffer, final int offset, 181 final int length) { 182 183 if ((buffer[offset] & 0x80) == 0) { 184 return parseOctal(buffer, offset, length); 185 } 186 final boolean negative = buffer[offset] == (byte) 0xff; 187 if (length < 9) { 188 return parseBinaryLong(buffer, offset, length, negative); 189 } 190 return parseBinaryBigInteger(buffer, offset, length, negative); 191 } 192 193 private static long parseBinaryLong(final byte[] buffer, final int offset, 194 final int length, 195 final boolean negative) { 196 if (length >= 9) { 197 throw new IllegalArgumentException("At offset " + offset + ", " 198 + length + " byte binary number" 199 + " exceeds maximum signed long" 200 + " value"); 201 } 202 long val = 0; 203 for (int i = 1; i < length; i++) { 204 val = (val << 8) + (buffer[offset + i] & 0xff); 205 } 206 if (negative) { 207 // 2's complement 208 val--; 209 val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1; 210 } 211 return negative ? -val : val; 212 } 213 214 private static long parseBinaryBigInteger(final byte[] buffer, 215 final int offset, 216 final int length, 217 final boolean negative) { 218 final byte[] remainder = new byte[length - 1]; 219 System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); 220 BigInteger val = new BigInteger(remainder); 221 if (negative) { 222 // 2's complement 223 val = val.add(BigInteger.valueOf(-1)).not(); 224 } 225 if (val.bitLength() > 63) { 226 throw new IllegalArgumentException("At offset " + offset + ", " 227 + length + " byte binary number" 228 + " exceeds maximum signed long" 229 + " value"); 230 } 231 return negative ? -val.longValue() : val.longValue(); 232 } 233 234 /** 235 * Parse a boolean byte from a buffer. 236 * Leading spaces and NUL are ignored. 237 * The buffer may contain trailing spaces or NULs. 238 * 239 * @param buffer The buffer from which to parse. 240 * @param offset The offset into the buffer from which to parse. 241 * @return The boolean value of the bytes. 242 * @throws IllegalArgumentException if an invalid byte is detected. 243 */ 244 public static boolean parseBoolean(final byte[] buffer, final int offset) { 245 return buffer[offset] == 1; 246 } 247 248 // Helper method to generate the exception message 249 private static String exceptionMessage(final byte[] buffer, final int offset, 250 final int length, final int current, final byte currentByte) { 251 // default charset is good enough for an exception message, 252 // 253 // the alternative was to modify parseOctal and 254 // parseOctalOrBinary to receive the ZipEncoding of the 255 // archive (deprecating the existing public methods, of 256 // course) and dealing with the fact that ZipEncoding#decode 257 // can throw an IOException which parseOctal* doesn't declare 258 String string = new String(buffer, offset, length, Charset.defaultCharset()); 259 260 string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed 261 return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length; 262 } 263 264 /** 265 * Parse an entry name from a buffer. 266 * Parsing stops when a NUL is found 267 * or the buffer length is reached. 268 * 269 * @param buffer The buffer from which to parse. 270 * @param offset The offset into the buffer from which to parse. 271 * @param length The maximum number of bytes to parse. 272 * @return The entry name. 273 */ 274 public static String parseName(final byte[] buffer, final int offset, final int length) { 275 try { 276 return parseName(buffer, offset, length, DEFAULT_ENCODING); 277 } catch (final IOException ex) { // NOSONAR 278 try { 279 return parseName(buffer, offset, length, FALLBACK_ENCODING); 280 } catch (final IOException ex2) { 281 // impossible 282 throw new UncheckedIOException(ex2); //NOSONAR 283 } 284 } 285 } 286 287 /** 288 * Parse an entry name from a buffer. 289 * Parsing stops when a NUL is found 290 * or the buffer length is reached. 291 * 292 * @param buffer The buffer from which to parse. 293 * @param offset The offset into the buffer from which to parse. 294 * @param length The maximum number of bytes to parse. 295 * @param encoding name of the encoding to use for file names 296 * @since 1.4 297 * @return The entry name. 298 * @throws IOException on error 299 */ 300 public static String parseName(final byte[] buffer, final int offset, 301 final int length, 302 final ZipEncoding encoding) 303 throws IOException { 304 305 int len = 0; 306 for (int i = offset; len < length && buffer[i] != 0; i++) { 307 len++; 308 } 309 if (len > 0) { 310 final byte[] b = new byte[len]; 311 System.arraycopy(buffer, offset, b, 0, len); 312 return encoding.decode(b); 313 } 314 return ""; 315 } 316 317 /** 318 * Parses the content of a PAX 1.0 sparse block. 319 * @since 1.20 320 * @param buffer The buffer from which to parse. 321 * @param offset The offset into the buffer from which to parse. 322 * @return a parsed sparse struct 323 */ 324 public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) { 325 final long sparseOffset = parseOctalOrBinary(buffer, offset, SPARSE_OFFSET_LEN); 326 final long sparseNumbytes = parseOctalOrBinary(buffer, offset + SPARSE_OFFSET_LEN, SPARSE_NUMBYTES_LEN); 327 328 return new TarArchiveStructSparse(sparseOffset, sparseNumbytes); 329 } 330 331 /** 332 * @since 1.21 333 */ 334 static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) 335 throws IOException { 336 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 337 for (int i = 0; i < entries; i++) { 338 try { 339 final TarArchiveStructSparse sparseHeader = 340 parseSparse(buffer, offset + i * (SPARSE_OFFSET_LEN + SPARSE_NUMBYTES_LEN)); 341 342 if (sparseHeader.getOffset() < 0) { 343 throw new IOException("Corrupted TAR archive, sparse entry with negative offset"); 344 } 345 if (sparseHeader.getNumbytes() < 0) { 346 throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes"); 347 } 348 sparseHeaders.add(sparseHeader); 349 } catch (IllegalArgumentException ex) { 350 // thrown internally by parseOctalOrBinary 351 throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex); 352 } 353 } 354 return Collections.unmodifiableList(sparseHeaders); 355 } 356 357 /** 358 * Copy a name into a buffer. 359 * Copies characters from the name into the buffer 360 * starting at the specified offset. 361 * If the buffer is longer than the name, the buffer 362 * is filled with trailing NULs. 363 * If the name is longer than the buffer, 364 * the output is truncated. 365 * 366 * @param name The header name from which to copy the characters. 367 * @param buf The buffer where the name is to be stored. 368 * @param offset The starting offset into the buffer 369 * @param length The maximum number of header bytes to copy. 370 * @return The updated offset, i.e. offset + length 371 */ 372 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) { 373 try { 374 return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); 375 } catch (final IOException ex) { // NOSONAR 376 try { 377 return formatNameBytes(name, buf, offset, length, 378 FALLBACK_ENCODING); 379 } catch (final IOException ex2) { 380 // impossible 381 throw new UncheckedIOException(ex2); //NOSONAR 382 } 383 } 384 } 385 386 /** 387 * Copy a name into a buffer. 388 * Copies characters from the name into the buffer 389 * starting at the specified offset. 390 * If the buffer is longer than the name, the buffer 391 * is filled with trailing NULs. 392 * If the name is longer than the buffer, 393 * the output is truncated. 394 * 395 * @param name The header name from which to copy the characters. 396 * @param buf The buffer where the name is to be stored. 397 * @param offset The starting offset into the buffer 398 * @param length The maximum number of header bytes to copy. 399 * @param encoding name of the encoding to use for file names 400 * @since 1.4 401 * @return The updated offset, i.e. offset + length 402 * @throws IOException on error 403 */ 404 public static int formatNameBytes(final String name, final byte[] buf, final int offset, 405 final int length, 406 final ZipEncoding encoding) 407 throws IOException { 408 int len = name.length(); 409 ByteBuffer b = encoding.encode(name); 410 while (b.limit() > length && len > 0) { 411 b = encoding.encode(name.substring(0, --len)); 412 } 413 final int limit = b.limit() - b.position(); 414 System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); 415 416 // Pad any remaining output bytes with NUL 417 for (int i = limit; i < length; ++i) { 418 buf[offset + i] = 0; 419 } 420 421 return offset + length; 422 } 423 424 /** 425 * Fill buffer with unsigned octal number, padded with leading zeroes. 426 * 427 * @param value number to convert to octal - treated as unsigned 428 * @param buffer destination buffer 429 * @param offset starting offset in buffer 430 * @param length length of buffer to fill 431 * @throws IllegalArgumentException if the value will not fit in the buffer 432 */ 433 public static void formatUnsignedOctalString(final long value, final byte[] buffer, 434 final int offset, final int length) { 435 int remaining = length; 436 remaining--; 437 if (value == 0) { 438 buffer[offset + remaining--] = (byte) '0'; 439 } else { 440 long val = value; 441 for (; remaining >= 0 && val != 0; --remaining) { 442 // CheckStyle:MagicNumber OFF 443 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); 444 val = val >>> 3; 445 // CheckStyle:MagicNumber ON 446 } 447 if (val != 0){ 448 throw new IllegalArgumentException 449 (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length); 450 } 451 } 452 453 for (; remaining >= 0; --remaining) { // leading zeros 454 buffer[offset + remaining] = (byte) '0'; 455 } 456 } 457 458 /** 459 * Write an octal integer into a buffer. 460 * 461 * Uses {@link #formatUnsignedOctalString} to format 462 * the value as an octal string with leading zeros. 463 * The converted number is followed by space and NUL 464 * 465 * @param value The value to write 466 * @param buf The buffer to receive the output 467 * @param offset The starting offset into the buffer 468 * @param length The size of the output buffer 469 * @return The updated offset, i.e offset+length 470 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 471 */ 472 public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 473 474 int idx=length-2; // For space and trailing null 475 formatUnsignedOctalString(value, buf, offset, idx); 476 477 buf[offset + idx++] = (byte) ' '; // Trailing space 478 buf[offset + idx] = 0; // Trailing null 479 480 return offset + length; 481 } 482 483 /** 484 * Write an octal long integer into a buffer. 485 * 486 * Uses {@link #formatUnsignedOctalString} to format 487 * the value as an octal string with leading zeros. 488 * The converted number is followed by a space. 489 * 490 * @param value The value to write as octal 491 * @param buf The destinationbuffer. 492 * @param offset The starting offset into the buffer. 493 * @param length The length of the buffer 494 * @return The updated offset 495 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 496 */ 497 public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 498 499 final int idx=length-1; // For space 500 501 formatUnsignedOctalString(value, buf, offset, idx); 502 buf[offset + idx] = (byte) ' '; // Trailing space 503 504 return offset + length; 505 } 506 507 /** 508 * Write an long integer into a buffer as an octal string if this 509 * will fit, or as a binary number otherwise. 510 * 511 * Uses {@link #formatUnsignedOctalString} to format 512 * the value as an octal string with leading zeros. 513 * The converted number is followed by a space. 514 * 515 * @param value The value to write into the buffer. 516 * @param buf The destination buffer. 517 * @param offset The starting offset into the buffer. 518 * @param length The length of the buffer. 519 * @return The updated offset. 520 * @throws IllegalArgumentException if the value (and trailer) 521 * will not fit in the buffer. 522 * @since 1.4 523 */ 524 public static int formatLongOctalOrBinaryBytes( 525 final long value, final byte[] buf, final int offset, final int length) { 526 527 // Check whether we are dealing with UID/GID or SIZE field 528 final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; 529 530 final boolean negative = value < 0; 531 if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars 532 return formatLongOctalBytes(value, buf, offset, length); 533 } 534 535 if (length < 9) { 536 formatLongBinary(value, buf, offset, length, negative); 537 } else { 538 formatBigIntegerBinary(value, buf, offset, length, negative); 539 } 540 541 buf[offset] = (byte) (negative ? 0xff : 0x80); 542 return offset + length; 543 } 544 545 private static void formatLongBinary(final long value, final byte[] buf, 546 final int offset, final int length, 547 final boolean negative) { 548 final int bits = (length - 1) * 8; 549 final long max = 1L << bits; 550 long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE 551 if (val < 0 || val >= max) { 552 throw new IllegalArgumentException("Value " + value + 553 " is too large for " + length + " byte field."); 554 } 555 if (negative) { 556 val ^= max - 1; 557 val++; 558 val |= 0xffL << bits; 559 } 560 for (int i = offset + length - 1; i >= offset; i--) { 561 buf[i] = (byte) val; 562 val >>= 8; 563 } 564 } 565 566 private static void formatBigIntegerBinary(final long value, final byte[] buf, 567 final int offset, 568 final int length, 569 final boolean negative) { 570 final BigInteger val = BigInteger.valueOf(value); 571 final byte[] b = val.toByteArray(); 572 final int len = b.length; 573 if (len > length - 1) { 574 throw new IllegalArgumentException("Value " + value + 575 " is too large for " + length + " byte field."); 576 } 577 final int off = offset + length - len; 578 System.arraycopy(b, 0, buf, off, len); 579 final byte fill = (byte) (negative ? 0xff : 0); 580 for (int i = offset + 1; i < off; i++) { 581 buf[i] = fill; 582 } 583 } 584 585 /** 586 * Writes an octal value into a buffer. 587 * 588 * Uses {@link #formatUnsignedOctalString} to format 589 * the value as an octal string with leading zeros. 590 * The converted number is followed by NUL and then space. 591 * 592 * @param value The value to convert 593 * @param buf The destination buffer 594 * @param offset The starting offset into the buffer. 595 * @param length The size of the buffer. 596 * @return The updated value of offset, i.e. offset+length 597 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 598 */ 599 public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 600 601 int idx=length-2; // for NUL and space 602 formatUnsignedOctalString(value, buf, offset, idx); 603 604 buf[offset + idx++] = 0; // Trailing null 605 buf[offset + idx] = (byte) ' '; // Trailing space 606 607 return offset + length; 608 } 609 610 /** 611 * Compute the checksum of a tar entry header. 612 * 613 * @param buf The tar entry's header buffer. 614 * @return The computed checksum. 615 */ 616 public static long computeCheckSum(final byte[] buf) { 617 long sum = 0; 618 619 for (final byte element : buf) { 620 sum += BYTE_MASK & element; 621 } 622 623 return sum; 624 } 625 626 /** 627 * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: 628 * <blockquote> 629 * The checksum is calculated by taking the sum of the unsigned byte values 630 * of the header block with the eight checksum bytes taken to be ascii 631 * spaces (decimal value 32). It is stored as a six digit octal number with 632 * leading zeroes followed by a NUL and then a space. Various 633 * implementations do not adhere to this format. For better compatibility, 634 * ignore leading and trailing whitespace, and get the first six digits. In 635 * addition, some historic tar implementations treated bytes as signed. 636 * Implementations typically calculate the checksum both ways, and treat it 637 * as good if either the signed or unsigned sum matches the included 638 * checksum. 639 * </blockquote> 640 * <p> 641 * The return value of this method should be treated as a best-effort 642 * heuristic rather than an absolute and final truth. The checksum 643 * verification logic may well evolve over time as more special cases 644 * are encountered. 645 * 646 * @param header tar header 647 * @return whether the checksum is reasonably good 648 * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a> 649 * @since 1.5 650 */ 651 public static boolean verifyCheckSum(final byte[] header) { 652 final long storedSum = parseOctal(header, CHKSUM_OFFSET, CHKSUMLEN); 653 long unsignedSum = 0; 654 long signedSum = 0; 655 656 for (int i = 0; i < header.length; i++) { 657 byte b = header[i]; 658 if (CHKSUM_OFFSET <= i && i < CHKSUM_OFFSET + CHKSUMLEN) { 659 b = ' '; 660 } 661 unsignedSum += 0xff & b; 662 signedSum += b; 663 } 664 return storedSum == unsignedSum || storedSum == signedSum; 665 } 666 667 /** 668 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 669 * may appear multi times, and they look like: 670 * 671 * GNU.sparse.size=size 672 * GNU.sparse.numblocks=numblocks 673 * repeat numblocks times 674 * GNU.sparse.offset=offset 675 * GNU.sparse.numbytes=numbytes 676 * end repeat 677 * 678 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 679 * 680 * GNU.sparse.map 681 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 682 * 683 * @param inputStream input stream to read keys and values 684 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, 685 * the sparse headers need to be stored in an array, not a map 686 * @param globalPaxHeaders global PAX headers of the tar archive 687 * @return map of PAX headers values found inside of the current (local or global) PAX headers tar entry. 688 * @throws IOException if an I/O error occurs. 689 * @deprecated use the four-arg version instead 690 */ 691 @Deprecated 692 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders) 693 throws IOException { 694 return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1); 695 } 696 697 /** 698 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 699 * may appear multi times, and they look like: 700 * 701 * GNU.sparse.size=size 702 * GNU.sparse.numblocks=numblocks 703 * repeat numblocks times 704 * GNU.sparse.offset=offset 705 * GNU.sparse.numbytes=numbytes 706 * end repeat 707 * 708 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 709 * 710 * GNU.sparse.map 711 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 712 * 713 * @param inputStream input stream to read keys and values 714 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, 715 * the sparse headers need to be stored in an array, not a map 716 * @param globalPaxHeaders global PAX headers of the tar archive 717 * @param headerSize total size of the PAX header, will be ignored if negative 718 * @return map of PAX headers values found inside of the current (local or global) PAX headers tar entry. 719 * @throws IOException if an I/O error occurs. 720 * @since 1.21 721 */ 722 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, 723 final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders, 724 final long headerSize) throws IOException { 725 final Map<String, String> headers = new HashMap<>(globalPaxHeaders); 726 Long offset = null; 727 // Format is "length keyword=value\n"; 728 int totalRead = 0; 729 while(true) { // get length 730 int ch; 731 int len = 0; 732 int read = 0; 733 while((ch = inputStream.read()) != -1) { 734 read++; 735 totalRead++; 736 if (ch == '\n') { // blank line in header 737 break; 738 } 739 if (ch == ' '){ // End of length string 740 // Get keyword 741 final ByteArrayOutputStream coll = new ByteArrayOutputStream(); 742 while((ch = inputStream.read()) != -1) { 743 read++; 744 totalRead++; 745 if (totalRead < 0 || (headerSize >= 0 && totalRead >= headerSize)) { 746 break; 747 } 748 if (ch == '='){ // end of keyword 749 final String keyword = coll.toString(CharsetNames.UTF_8); 750 // Get rest of entry 751 final int restLen = len - read; 752 if (restLen <= 1) { // only NL 753 headers.remove(keyword); 754 } else if (headerSize >= 0 && restLen > headerSize - totalRead) { 755 throw new IOException("Paxheader value size " + restLen 756 + " exceeds size of header record"); 757 } else { 758 final byte[] rest = IOUtils.readRange(inputStream, restLen); 759 final int got = rest.length; 760 if (got != restLen) { 761 throw new IOException("Failed to read " 762 + "Paxheader. Expected " 763 + restLen 764 + " bytes, read " 765 + got); 766 } 767 totalRead += restLen; 768 // Drop trailing NL 769 if (rest[restLen - 1] != '\n') { 770 throw new IOException("Failed to read Paxheader." 771 + "Value should end with a newline"); 772 } 773 final String value = new String(rest, 0, restLen - 1, UTF_8); 774 headers.put(keyword, value); 775 776 // for 0.0 PAX Headers 777 if (keyword.equals("GNU.sparse.offset")) { 778 if (offset != null) { 779 // previous GNU.sparse.offset header but but no numBytes 780 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 781 } 782 try { 783 offset = Long.valueOf(value); 784 } catch (NumberFormatException ex) { 785 throw new IOException("Failed to read Paxheader." 786 + "GNU.sparse.offset contains a non-numeric value"); 787 } 788 if (offset < 0) { 789 throw new IOException("Failed to read Paxheader." 790 + "GNU.sparse.offset contains negative value"); 791 } 792 } 793 794 // for 0.0 PAX Headers 795 if (keyword.equals("GNU.sparse.numbytes")) { 796 if (offset == null) { 797 throw new IOException("Failed to read Paxheader." + 798 "GNU.sparse.offset is expected before GNU.sparse.numbytes shows up."); 799 } 800 long numbytes; 801 try { 802 numbytes = Long.parseLong(value); 803 } catch (NumberFormatException ex) { 804 throw new IOException("Failed to read Paxheader." 805 + "GNU.sparse.numbytes contains a non-numeric value."); 806 } 807 if (numbytes < 0) { 808 throw new IOException("Failed to read Paxheader." 809 + "GNU.sparse.numbytes contains negative value"); 810 } 811 sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes)); 812 offset = null; 813 } 814 } 815 break; 816 } 817 coll.write((byte) ch); 818 } 819 break; // Processed single header 820 } 821 822 // COMPRESS-530 : throw if we encounter a non-number while reading length 823 if (ch < '0' || ch > '9') { 824 throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length"); 825 } 826 827 len *= 10; 828 len += ch - '0'; 829 } 830 if (ch == -1){ // EOF 831 break; 832 } 833 } 834 if (offset != null) { 835 // offset but no numBytes 836 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 837 } 838 return headers; 839 } 840 841 /** 842 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 843 * GNU.sparse.map 844 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 845 * 846 * <p>Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You 847 * should use {@link #parseFromPAX01SparseHeaders} directly instead. 848 * 849 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 850 * @return sparse headers parsed from sparse map 851 * @deprecated use #parseFromPAX01SparseHeaders instead 852 */ 853 @Deprecated 854 protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(String sparseMap) { 855 try { 856 return parseFromPAX01SparseHeaders(sparseMap); 857 } catch (IOException ex) { 858 throw new UncheckedIOException(ex.getMessage(), ex); 859 } 860 } 861 862 /** 863 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 864 * GNU.sparse.map 865 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 866 * 867 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 868 * @return unmodifiable list of sparse headers parsed from sparse map 869 * @throws IOException Corrupted TAR archive. 870 * @since 1.21 871 */ 872 protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(String sparseMap) 873 throws IOException { 874 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 875 String[] sparseHeaderStrings = sparseMap.split(","); 876 if (sparseHeaderStrings.length % 2 == 1) { 877 throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header"); 878 } 879 880 for (int i = 0; i < sparseHeaderStrings.length; i += 2) { 881 long sparseOffset; 882 try { 883 sparseOffset = Long.parseLong(sparseHeaderStrings[i]); 884 } catch (NumberFormatException ex) { 885 throw new IOException("Corrupted TAR archive." 886 + " Sparse struct offset contains a non-numeric value"); 887 } 888 if (sparseOffset < 0) { 889 throw new IOException("Corrupted TAR archive." 890 + " Sparse struct offset contains negative value"); 891 } 892 long sparseNumbytes; 893 try { 894 sparseNumbytes = Long.parseLong(sparseHeaderStrings[i + 1]); 895 } catch (NumberFormatException ex) { 896 throw new IOException("Corrupted TAR archive." 897 + " Sparse struct numbytes contains a non-numeric value"); 898 } 899 if (sparseNumbytes < 0) { 900 throw new IOException("Corrupted TAR archive." 901 + " Sparse struct numbytes contains negative value"); 902 } 903 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 904 } 905 906 return Collections.unmodifiableList(sparseHeaders); 907 } 908 909 /** 910 * For PAX Format 1.X: 911 * The sparse map itself is stored in the file data block, preceding the actual file data. 912 * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary. 913 * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers 914 * giving the offset and size of the data block it describes. 915 * @param inputStream parsing source. 916 * @param recordSize The size the TAR header 917 * @return sparse headers 918 * @throws IOException if an I/O error occurs. 919 */ 920 protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException { 921 // for 1.X PAX Headers 922 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 923 long bytesRead = 0; 924 925 long[] readResult = readLineOfNumberForPax1X(inputStream); 926 long sparseHeadersCount = readResult[0]; 927 if (sparseHeadersCount < 0) { 928 // overflow while reading number? 929 throw new IOException("Corrupted TAR archive. Negative value in sparse headers block"); 930 } 931 bytesRead += readResult[1]; 932 while (sparseHeadersCount-- > 0) { 933 readResult = readLineOfNumberForPax1X(inputStream); 934 final long sparseOffset = readResult[0]; 935 if (sparseOffset < 0) { 936 throw new IOException("Corrupted TAR archive." 937 + " Sparse header block offset contains negative value"); 938 } 939 bytesRead += readResult[1]; 940 941 readResult = readLineOfNumberForPax1X(inputStream); 942 final long sparseNumbytes = readResult[0]; 943 if (sparseNumbytes < 0) { 944 throw new IOException("Corrupted TAR archive." 945 + " Sparse header block numbytes contains negative value"); 946 } 947 bytesRead += readResult[1]; 948 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 949 } 950 951 // skip the rest of this record data 952 long bytesToSkip = recordSize - bytesRead % recordSize; 953 IOUtils.skip(inputStream, bytesToSkip); 954 return sparseHeaders; 955 } 956 957 /** 958 * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. 959 * It consists of a series of decimal numbers delimited by newlines. 960 * 961 * @param inputStream the input stream of the tar file 962 * @return the decimal number delimited by '\n', and the bytes read from input stream 963 * @throws IOException 964 */ 965 private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException { 966 int number; 967 long result = 0; 968 long bytesRead = 0; 969 970 while ((number = inputStream.read()) != '\n') { 971 bytesRead += 1; 972 if (number == -1) { 973 throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format"); 974 } 975 if (number < '0' || number > '9') { 976 throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block"); 977 } 978 result = result * 10 + (number - '0'); 979 } 980 bytesRead += 1; 981 982 return new long[]{result, bytesRead}; 983 } 984 985}