001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.sevenz; 019 020import static java.nio.charset.StandardCharsets.UTF_16LE; 021 022import java.io.BufferedInputStream; 023import java.io.ByteArrayInputStream; 024import java.io.Closeable; 025import java.io.DataInputStream; 026import java.io.EOFException; 027import java.io.File; 028import java.io.FilterInputStream; 029import java.io.IOException; 030import java.io.InputStream; 031import java.nio.ByteBuffer; 032import java.nio.ByteOrder; 033import java.nio.CharBuffer; 034import java.nio.channels.Channels; 035import java.nio.channels.SeekableByteChannel; 036import java.nio.file.Files; 037import java.nio.file.StandardOpenOption; 038import java.util.ArrayList; 039import java.util.Arrays; 040import java.util.BitSet; 041import java.util.EnumSet; 042import java.util.LinkedHashMap; 043import java.util.LinkedList; 044import java.util.List; 045import java.util.Map; 046import java.util.Objects; 047import java.util.zip.CRC32; 048import java.util.zip.CheckedInputStream; 049 050import org.apache.commons.compress.MemoryLimitException; 051import org.apache.commons.compress.utils.BoundedInputStream; 052import org.apache.commons.compress.utils.ByteUtils; 053import org.apache.commons.compress.utils.CRC32VerifyingInputStream; 054import org.apache.commons.compress.utils.IOUtils; 055import org.apache.commons.compress.utils.InputStreamStatistics; 056 057/** 058 * Reads a 7z file, using SeekableByteChannel under 059 * the covers. 060 * <p> 061 * The 7z file format is a flexible container 062 * that can contain many compression and 063 * encryption types, but at the moment only 064 * only Copy, LZMA, LZMA2, BZIP2, Deflate and AES-256 + SHA-256 065 * are supported. 066 * <p> 067 * The format is very Windows/Intel specific, 068 * so it uses little-endian byte order, 069 * doesn't store user/group or permission bits, 070 * and represents times using NTFS timestamps 071 * (100 nanosecond units since 1 January 1601). 072 * Hence the official tools recommend against 073 * using it for backup purposes on *nix, and 074 * recommend .tar.7z or .tar.lzma or .tar.xz 075 * instead. 076 * <p> 077 * Both the header and file contents may be 078 * compressed and/or encrypted. With both 079 * encrypted, neither file names nor file 080 * contents can be read, but the use of 081 * encryption isn't plausibly deniable. 082 * 083 * <p>Multi volume archives can be read by concatenating the parts in 084 * correct order - either manually or by using {link 085 * org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel} 086 * for example.</p> 087 * 088 * @NotThreadSafe 089 * @since 1.6 090 */ 091public class SevenZFile implements Closeable { 092 static final int SIGNATURE_HEADER_SIZE = 32; 093 094 private static final String DEFAULT_FILE_NAME = "unknown archive"; 095 096 private final String fileName; 097 private SeekableByteChannel channel; 098 private final Archive archive; 099 private int currentEntryIndex = -1; 100 private int currentFolderIndex = -1; 101 private InputStream currentFolderInputStream; 102 private byte[] password; 103 private final SevenZFileOptions options; 104 105 private long compressedBytesReadFromCurrentEntry; 106 private long uncompressedBytesReadFromCurrentEntry; 107 108 private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); 109 110 // shared with SevenZOutputFile and tests, neither mutates it 111 static final byte[] sevenZSignature = { //NOSONAR 112 (byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C 113 }; 114 115 /** 116 * Reads a file as 7z archive 117 * 118 * @param fileName the file to read 119 * @param password optional password if the archive is encrypted 120 * @throws IOException if reading the archive fails 121 * @since 1.17 122 */ 123 public SevenZFile(final File fileName, final char[] password) throws IOException { 124 this(fileName, password, SevenZFileOptions.DEFAULT); 125 } 126 127 /** 128 * Reads a file as 7z archive with additional options. 129 * 130 * @param fileName the file to read 131 * @param password optional password if the archive is encrypted 132 * @param options the options to apply 133 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 134 * @since 1.19 135 */ 136 public SevenZFile(final File fileName, final char[] password, final SevenZFileOptions options) throws IOException { 137 this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), // NOSONAR 138 fileName.getAbsolutePath(), utf16Decode(password), true, options); 139 } 140 141 /** 142 * Reads a file as 7z archive 143 * 144 * @param fileName the file to read 145 * @param password optional password if the archive is encrypted - 146 * the byte array is supposed to be the UTF16-LE encoded 147 * representation of the password. 148 * @throws IOException if reading the archive fails 149 * @deprecated use the char[]-arg version for the password instead 150 */ 151 @Deprecated 152 public SevenZFile(final File fileName, final byte[] password) throws IOException { 153 this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), 154 fileName.getAbsolutePath(), password, true, SevenZFileOptions.DEFAULT); 155 } 156 157 /** 158 * Reads a SeekableByteChannel as 7z archive 159 * 160 * <p>{@link 161 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 162 * allows you to read from an in-memory archive.</p> 163 * 164 * @param channel the channel to read 165 * @throws IOException if reading the archive fails 166 * @since 1.13 167 */ 168 public SevenZFile(final SeekableByteChannel channel) throws IOException { 169 this(channel, SevenZFileOptions.DEFAULT); 170 } 171 172 /** 173 * Reads a SeekableByteChannel as 7z archive with addtional options. 174 * 175 * <p>{@link 176 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 177 * allows you to read from an in-memory archive.</p> 178 * 179 * @param channel the channel to read 180 * @param options the options to apply 181 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 182 * @since 1.19 183 */ 184 public SevenZFile(final SeekableByteChannel channel, final SevenZFileOptions options) throws IOException { 185 this(channel, DEFAULT_FILE_NAME, null, options); 186 } 187 188 /** 189 * Reads a SeekableByteChannel as 7z archive 190 * 191 * <p>{@link 192 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 193 * allows you to read from an in-memory archive.</p> 194 * 195 * @param channel the channel to read 196 * @param password optional password if the archive is encrypted 197 * @throws IOException if reading the archive fails 198 * @since 1.17 199 */ 200 public SevenZFile(final SeekableByteChannel channel, 201 final char[] password) throws IOException { 202 this(channel, password, SevenZFileOptions.DEFAULT); 203 } 204 205 /** 206 * Reads a SeekableByteChannel as 7z archive with additional options. 207 * 208 * <p>{@link 209 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 210 * allows you to read from an in-memory archive.</p> 211 * 212 * @param channel the channel to read 213 * @param password optional password if the archive is encrypted 214 * @param options the options to apply 215 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 216 * @since 1.19 217 */ 218 public SevenZFile(final SeekableByteChannel channel, final char[] password, final SevenZFileOptions options) 219 throws IOException { 220 this(channel, DEFAULT_FILE_NAME, password, options); 221 } 222 223 /** 224 * Reads a SeekableByteChannel as 7z archive 225 * 226 * <p>{@link 227 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 228 * allows you to read from an in-memory archive.</p> 229 * 230 * @param channel the channel to read 231 * @param fileName name of the archive - only used for error reporting 232 * @param password optional password if the archive is encrypted 233 * @throws IOException if reading the archive fails 234 * @since 1.17 235 */ 236 public SevenZFile(final SeekableByteChannel channel, final String fileName, 237 final char[] password) throws IOException { 238 this(channel, fileName, password, SevenZFileOptions.DEFAULT); 239 } 240 241 /** 242 * Reads a SeekableByteChannel as 7z archive with addtional options. 243 * 244 * <p>{@link 245 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 246 * allows you to read from an in-memory archive.</p> 247 * 248 * @param channel the channel to read 249 * @param fileName name of the archive - only used for error reporting 250 * @param password optional password if the archive is encrypted 251 * @param options the options to apply 252 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 253 * @since 1.19 254 */ 255 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password, 256 final SevenZFileOptions options) throws IOException { 257 this(channel, fileName, utf16Decode(password), false, options); 258 } 259 260 /** 261 * Reads a SeekableByteChannel as 7z archive 262 * 263 * <p>{@link 264 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 265 * allows you to read from an in-memory archive.</p> 266 * 267 * @param channel the channel to read 268 * @param fileName name of the archive - only used for error reporting 269 * @throws IOException if reading the archive fails 270 * @since 1.17 271 */ 272 public SevenZFile(final SeekableByteChannel channel, final String fileName) 273 throws IOException { 274 this(channel, fileName, SevenZFileOptions.DEFAULT); 275 } 276 277 /** 278 * Reads a SeekableByteChannel as 7z archive with additional options. 279 * 280 * <p>{@link 281 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 282 * allows you to read from an in-memory archive.</p> 283 * 284 * @param channel the channel to read 285 * @param fileName name of the archive - only used for error reporting 286 * @param options the options to apply 287 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 288 * @since 1.19 289 */ 290 public SevenZFile(final SeekableByteChannel channel, final String fileName, final SevenZFileOptions options) 291 throws IOException { 292 this(channel, fileName, null, false, options); 293 } 294 295 /** 296 * Reads a SeekableByteChannel as 7z archive 297 * 298 * <p>{@link 299 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 300 * allows you to read from an in-memory archive.</p> 301 * 302 * @param channel the channel to read 303 * @param password optional password if the archive is encrypted - 304 * the byte array is supposed to be the UTF16-LE encoded 305 * representation of the password. 306 * @throws IOException if reading the archive fails 307 * @since 1.13 308 * @deprecated use the char[]-arg version for the password instead 309 */ 310 @Deprecated 311 public SevenZFile(final SeekableByteChannel channel, 312 final byte[] password) throws IOException { 313 this(channel, DEFAULT_FILE_NAME, password); 314 } 315 316 /** 317 * Reads a SeekableByteChannel as 7z archive 318 * 319 * <p>{@link 320 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 321 * allows you to read from an in-memory archive.</p> 322 * 323 * @param channel the channel to read 324 * @param fileName name of the archive - only used for error reporting 325 * @param password optional password if the archive is encrypted - 326 * the byte array is supposed to be the UTF16-LE encoded 327 * representation of the password. 328 * @throws IOException if reading the archive fails 329 * @since 1.13 330 * @deprecated use the char[]-arg version for the password instead 331 */ 332 @Deprecated 333 public SevenZFile(final SeekableByteChannel channel, final String fileName, 334 final byte[] password) throws IOException { 335 this(channel, fileName, password, false, SevenZFileOptions.DEFAULT); 336 } 337 338 private SevenZFile(final SeekableByteChannel channel, final String filename, 339 final byte[] password, final boolean closeOnError, final SevenZFileOptions options) throws IOException { 340 boolean succeeded = false; 341 this.channel = channel; 342 this.fileName = filename; 343 this.options = options; 344 try { 345 archive = readHeaders(password); 346 if (password != null) { 347 this.password = Arrays.copyOf(password, password.length); 348 } else { 349 this.password = null; 350 } 351 succeeded = true; 352 } finally { 353 if (!succeeded && closeOnError) { 354 this.channel.close(); 355 } 356 } 357 } 358 359 /** 360 * Reads a file as unencrypted 7z archive 361 * 362 * @param fileName the file to read 363 * @throws IOException if reading the archive fails 364 */ 365 public SevenZFile(final File fileName) throws IOException { 366 this(fileName, SevenZFileOptions.DEFAULT); 367 } 368 369 /** 370 * Reads a file as unencrypted 7z archive 371 * 372 * @param fileName the file to read 373 * @param options the options to apply 374 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 375 * @since 1.19 376 */ 377 public SevenZFile(final File fileName, final SevenZFileOptions options) throws IOException { 378 this(fileName, null, options); 379 } 380 381 /** 382 * Closes the archive. 383 * @throws IOException if closing the file fails 384 */ 385 @Override 386 public void close() throws IOException { 387 if (channel != null) { 388 try { 389 channel.close(); 390 } finally { 391 channel = null; 392 if (password != null) { 393 Arrays.fill(password, (byte) 0); 394 } 395 password = null; 396 } 397 } 398 } 399 400 /** 401 * Returns the next Archive Entry in this archive. 402 * 403 * @return the next entry, 404 * or {@code null} if there are no more entries 405 * @throws IOException if the next entry could not be read 406 */ 407 public SevenZArchiveEntry getNextEntry() throws IOException { 408 if (currentEntryIndex >= archive.files.length - 1) { 409 return null; 410 } 411 ++currentEntryIndex; 412 final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; 413 if (entry.getName() == null && options.getUseDefaultNameForUnnamedEntries()) { 414 entry.setName(getDefaultName()); 415 } 416 buildDecodingStream(currentEntryIndex, false); 417 uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; 418 return entry; 419 } 420 421 /** 422 * Returns a copy of meta-data of all archive entries. 423 * 424 * <p>This method only provides meta-data, the entries can not be 425 * used to read the contents, you still need to process all 426 * entries in order using {@link #getNextEntry} for that.</p> 427 * 428 * <p>The content methods are only available for entries that have 429 * already been reached via {@link #getNextEntry}.</p> 430 * 431 * @return a copy of meta-data of all archive entries. 432 * @since 1.11 433 */ 434 public Iterable<SevenZArchiveEntry> getEntries() { 435 return new ArrayList<>(Arrays.asList(archive.files)); 436 } 437 438 private Archive readHeaders(final byte[] password) throws IOException { 439 final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */) 440 .order(ByteOrder.LITTLE_ENDIAN); 441 readFully(buf); 442 final byte[] signature = new byte[6]; 443 buf.get(signature); 444 if (!Arrays.equals(signature, sevenZSignature)) { 445 throw new IOException("Bad 7z signature"); 446 } 447 // 7zFormat.txt has it wrong - it's first major then minor 448 final byte archiveVersionMajor = buf.get(); 449 final byte archiveVersionMinor = buf.get(); 450 if (archiveVersionMajor != 0) { 451 throw new IOException(String.format("Unsupported 7z version (%d,%d)", 452 archiveVersionMajor, archiveVersionMinor)); 453 } 454 455 boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" 456 final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); 457 if (startHeaderCrc == 0) { 458 // This is an indication of a corrupt header - peek the next 20 bytes 459 final long currentPosition = channel.position(); 460 final ByteBuffer peekBuf = ByteBuffer.allocate(20); 461 readFully(peekBuf); 462 channel.position(currentPosition); 463 // Header invalid if all data is 0 464 while (peekBuf.hasRemaining()) { 465 if (peekBuf.get()!=0) { 466 headerLooksValid = true; 467 break; 468 } 469 } 470 } else { 471 headerLooksValid = true; 472 } 473 474 if (headerLooksValid) { 475 return initializeArchive(readStartHeader(startHeaderCrc), password, true); 476 } 477 // No valid header found - probably first file of multipart archive was removed too early. Scan for end header. 478 if (options.getTryToRecoverBrokenArchives()) { 479 return tryToLocateEndHeader(password); 480 } 481 throw new IOException("archive seems to be invalid.\nYou may want to retry and enable the" 482 + " tryToRecoverBrokenArchives if the archive could be a multi volume archive that has been closed" 483 + " prematurely."); 484 } 485 486 private Archive tryToLocateEndHeader(final byte[] password) throws IOException { 487 final ByteBuffer nidBuf = ByteBuffer.allocate(1); 488 final long searchLimit = 1024L * 1024 * 1; 489 // Main header, plus bytes that readStartHeader would read 490 final long previousDataSize = channel.position() + 20; 491 final long minPos; 492 // Determine minimal position - can't start before current position 493 if (channel.position() + searchLimit > channel.size()) { 494 minPos = channel.position(); 495 } else { 496 minPos = channel.size() - searchLimit; 497 } 498 long pos = channel.size() - 1; 499 // Loop: Try from end of archive 500 while (pos > minPos) { 501 pos--; 502 channel.position(pos); 503 nidBuf.rewind(); 504 if (channel.read(nidBuf) < 1) { 505 throw new EOFException(); 506 } 507 final int nid = nidBuf.array()[0]; 508 // First indicator: Byte equals one of these header identifiers 509 if (nid == NID.kEncodedHeader || nid == NID.kHeader) { 510 try { 511 // Try to initialize Archive structure from here 512 final StartHeader startHeader = new StartHeader(); 513 startHeader.nextHeaderOffset = pos - previousDataSize; 514 startHeader.nextHeaderSize = channel.size() - pos; 515 final Archive result = initializeArchive(startHeader, password, false); 516 // Sanity check: There must be some data... 517 if (result.packSizes.length > 0 && result.files.length > 0) { 518 return result; 519 } 520 } catch (final Exception ignore) { 521 // Wrong guess... 522 } 523 } 524 } 525 throw new IOException("Start header corrupt and unable to guess end header"); 526 } 527 528 private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { 529 assertFitsIntoNonNegativeInt("nextHeaderSize", startHeader.nextHeaderSize); 530 final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; 531 channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); 532 if (verifyCrc) { 533 final long position = channel.position(); 534 CheckedInputStream cis = new CheckedInputStream(Channels.newInputStream(channel), new CRC32()); 535 if (cis.skip(nextHeaderSizeInt) != nextHeaderSizeInt) { 536 throw new IOException("Problem computing NextHeader CRC-32"); 537 } 538 if (startHeader.nextHeaderCrc != cis.getChecksum().getValue()) { 539 throw new IOException("NextHeader CRC-32 mismatch"); 540 } 541 channel.position(position); 542 } 543 Archive archive = new Archive(); 544 ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); 545 readFully(buf); 546 int nid = getUnsignedByte(buf); 547 if (nid == NID.kEncodedHeader) { 548 buf = readEncodedHeader(buf, archive, password); 549 // Archive gets rebuilt with the new header 550 archive = new Archive(); 551 nid = getUnsignedByte(buf); 552 } 553 if (nid != NID.kHeader) { 554 throw new IOException("Broken or unsupported archive: no Header"); 555 } 556 readHeader(buf, archive); 557 archive.subStreamsInfo = null; 558 return archive; 559 } 560 561 private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { 562 final StartHeader startHeader = new StartHeader(); 563 // using Stream rather than ByteBuffer for the benefit of the 564 // built-in CRC check 565 try (DataInputStream dataInputStream = new DataInputStream(new CRC32VerifyingInputStream( 566 new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { 567 startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); 568 if (startHeader.nextHeaderOffset < 0 569 || startHeader.nextHeaderOffset + SIGNATURE_HEADER_SIZE > channel.size()) { 570 throw new IOException("nextHeaderOffset is out of bounds"); 571 } 572 573 startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); 574 final long nextHeaderEnd = startHeader.nextHeaderOffset + startHeader.nextHeaderSize; 575 if (nextHeaderEnd < startHeader.nextHeaderOffset 576 || nextHeaderEnd + SIGNATURE_HEADER_SIZE > channel.size()) { 577 throw new IOException("nextHeaderSize is out of bounds"); 578 } 579 580 startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); 581 582 return startHeader; 583 } 584 } 585 586 private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { 587 final int pos = header.position(); 588 final ArchiveStatistics stats = sanityCheckAndCollectStatistics(header); 589 stats.assertValidity(options.getMaxMemoryLimitInKb()); 590 header.position(pos); 591 592 int nid = getUnsignedByte(header); 593 594 if (nid == NID.kArchiveProperties) { 595 readArchiveProperties(header); 596 nid = getUnsignedByte(header); 597 } 598 599 if (nid == NID.kAdditionalStreamsInfo) { 600 throw new IOException("Additional streams unsupported"); 601 //nid = getUnsignedByte(header); 602 } 603 604 if (nid == NID.kMainStreamsInfo) { 605 readStreamsInfo(header, archive); 606 nid = getUnsignedByte(header); 607 } 608 609 if (nid == NID.kFilesInfo) { 610 readFilesInfo(header, archive); 611 nid = getUnsignedByte(header); 612 } 613 } 614 615 private ArchiveStatistics sanityCheckAndCollectStatistics(final ByteBuffer header) 616 throws IOException { 617 final ArchiveStatistics stats = new ArchiveStatistics(); 618 619 int nid = getUnsignedByte(header); 620 621 if (nid == NID.kArchiveProperties) { 622 sanityCheckArchiveProperties(header); 623 nid = getUnsignedByte(header); 624 } 625 626 if (nid == NID.kAdditionalStreamsInfo) { 627 throw new IOException("Additional streams unsupported"); 628 //nid = getUnsignedByte(header); 629 } 630 631 if (nid == NID.kMainStreamsInfo) { 632 sanityCheckStreamsInfo(header, stats); 633 nid = getUnsignedByte(header); 634 } 635 636 if (nid == NID.kFilesInfo) { 637 sanityCheckFilesInfo(header, stats); 638 nid = getUnsignedByte(header); 639 } 640 641 if (nid != NID.kEnd) { 642 throw new IOException("Badly terminated header, found " + nid); 643 } 644 645 return stats; 646 } 647 648 private void readArchiveProperties(final ByteBuffer input) throws IOException { 649 // FIXME: the reference implementation just throws them away? 650 int nid = getUnsignedByte(input); 651 while (nid != NID.kEnd) { 652 final long propertySize = readUint64(input); 653 final byte[] property = new byte[(int)propertySize]; 654 get(input, property); 655 nid = getUnsignedByte(input); 656 } 657 } 658 659 private void sanityCheckArchiveProperties(final ByteBuffer header) 660 throws IOException { 661 int nid = getUnsignedByte(header); 662 while (nid != NID.kEnd) { 663 final int propertySize = 664 assertFitsIntoNonNegativeInt("propertySize", readUint64(header)); 665 if (skipBytesFully(header, propertySize) < propertySize) { 666 throw new IOException("invalid property size"); 667 } 668 nid = getUnsignedByte(header); 669 } 670 } 671 672 private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, 673 final byte[] password) throws IOException { 674 final int pos = header.position(); 675 final ArchiveStatistics stats = new ArchiveStatistics(); 676 sanityCheckStreamsInfo(header, stats); 677 stats.assertValidity(options.getMaxMemoryLimitInKb()); 678 header.position(pos); 679 680 readStreamsInfo(header, archive); 681 682 if (archive.folders == null || archive.folders.length == 0) { 683 throw new IOException("no folders, can't read encoded header"); 684 } 685 if (archive.packSizes == null || archive.packSizes.length == 0) { 686 throw new IOException("no packed streams, can't read encoded header"); 687 } 688 689 // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? 690 final Folder folder = archive.folders[0]; 691 final int firstPackStreamIndex = 0; 692 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 693 0; 694 695 channel.position(folderOffset); 696 InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, 697 archive.packSizes[firstPackStreamIndex]); 698 for (final Coder coder : folder.getOrderedCoders()) { 699 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 700 throw new IOException("Multi input/output stream coders are not yet supported"); 701 } 702 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, //NOSONAR 703 folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); 704 } 705 if (folder.hasCrc) { 706 inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, 707 folder.getUnpackSize(), folder.crc); 708 } 709 final int unpackSize = assertFitsIntoNonNegativeInt("unpackSize", folder.getUnpackSize()); 710 final byte[] nextHeader = IOUtils.readRange(inputStreamStack, unpackSize); 711 if (nextHeader.length < unpackSize) { 712 throw new IOException("premature end of stream"); 713 } 714 inputStreamStack.close(); 715 return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); 716 } 717 718 private void sanityCheckStreamsInfo(final ByteBuffer header, 719 final ArchiveStatistics stats) throws IOException { 720 int nid = getUnsignedByte(header); 721 722 if (nid == NID.kPackInfo) { 723 sanityCheckPackInfo(header, stats); 724 nid = getUnsignedByte(header); 725 } 726 727 if (nid == NID.kUnpackInfo) { 728 sanityCheckUnpackInfo(header, stats); 729 nid = getUnsignedByte(header); 730 } 731 732 if (nid == NID.kSubStreamsInfo) { 733 sanityCheckSubStreamsInfo(header, stats); 734 nid = getUnsignedByte(header); 735 } 736 737 if (nid != NID.kEnd) { 738 throw new IOException("Badly terminated StreamsInfo"); 739 } 740 } 741 742 private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 743 int nid = getUnsignedByte(header); 744 745 if (nid == NID.kPackInfo) { 746 readPackInfo(header, archive); 747 nid = getUnsignedByte(header); 748 } 749 750 if (nid == NID.kUnpackInfo) { 751 readUnpackInfo(header, archive); 752 nid = getUnsignedByte(header); 753 } else { 754 // archive without unpack/coders info 755 archive.folders = Folder.EMPTY_FOLDER_ARRAY; 756 } 757 758 if (nid == NID.kSubStreamsInfo) { 759 readSubStreamsInfo(header, archive); 760 nid = getUnsignedByte(header); 761 } 762 } 763 764 private void sanityCheckPackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 765 final long packPos = readUint64(header); 766 if (packPos < 0 || SIGNATURE_HEADER_SIZE + packPos > channel.size() 767 || SIGNATURE_HEADER_SIZE + packPos < 0) { 768 throw new IOException("packPos (" + packPos + ") is out of range"); 769 } 770 final long numPackStreams = readUint64(header); 771 stats.numberOfPackedStreams = assertFitsIntoNonNegativeInt("numPackStreams", numPackStreams); 772 int nid = getUnsignedByte(header); 773 if (nid == NID.kSize) { 774 long totalPackSizes = 0; 775 for (int i = 0; i < stats.numberOfPackedStreams; i++) { 776 final long packSize = readUint64(header); 777 totalPackSizes += packSize; 778 final long endOfPackStreams = SIGNATURE_HEADER_SIZE + packPos + totalPackSizes; 779 if (packSize < 0 780 || endOfPackStreams > channel.size() 781 || endOfPackStreams < packPos) { 782 throw new IOException("packSize (" + packSize + ") is out of range"); 783 } 784 } 785 nid = getUnsignedByte(header); 786 } 787 788 if (nid == NID.kCRC) { 789 final int crcsDefined = readAllOrBits(header, stats.numberOfPackedStreams) 790 .cardinality(); 791 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 792 throw new IOException("invalid number of CRCs in PackInfo"); 793 } 794 nid = getUnsignedByte(header); 795 } 796 797 if (nid != NID.kEnd) { 798 throw new IOException("Badly terminated PackInfo (" + nid + ")"); 799 } 800 } 801 802 private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { 803 archive.packPos = readUint64(header); 804 final int numPackStreamsInt = (int) readUint64(header); 805 int nid = getUnsignedByte(header); 806 if (nid == NID.kSize) { 807 archive.packSizes = new long[numPackStreamsInt]; 808 for (int i = 0; i < archive.packSizes.length; i++) { 809 archive.packSizes[i] = readUint64(header); 810 } 811 nid = getUnsignedByte(header); 812 } 813 814 if (nid == NID.kCRC) { 815 archive.packCrcsDefined = readAllOrBits(header, numPackStreamsInt); 816 archive.packCrcs = new long[numPackStreamsInt]; 817 for (int i = 0; i < numPackStreamsInt; i++) { 818 if (archive.packCrcsDefined.get(i)) { 819 archive.packCrcs[i] = 0xffffFFFFL & getInt(header); 820 } 821 } 822 823 nid = getUnsignedByte(header); 824 } 825 } 826 827 private void sanityCheckUnpackInfo(final ByteBuffer header, final ArchiveStatistics stats) 828 throws IOException { 829 int nid = getUnsignedByte(header); 830 if (nid != NID.kFolder) { 831 throw new IOException("Expected kFolder, got " + nid); 832 } 833 final long numFolders = readUint64(header); 834 stats.numberOfFolders = assertFitsIntoNonNegativeInt("numFolders", numFolders); 835 final int external = getUnsignedByte(header); 836 if (external != 0) { 837 throw new IOException("External unsupported"); 838 } 839 840 final List<Integer> numberOfOutputStreamsPerFolder = new LinkedList<>(); 841 for (int i = 0; i < stats.numberOfFolders; i++) { 842 numberOfOutputStreamsPerFolder.add(sanityCheckFolder(header, stats)); 843 } 844 845 final long totalNumberOfBindPairs = stats.numberOfOutStreams - stats.numberOfFolders; 846 final long packedStreamsRequiredByFolders = stats.numberOfInStreams - totalNumberOfBindPairs; 847 if (packedStreamsRequiredByFolders < stats.numberOfPackedStreams) { 848 throw new IOException("archive doesn't contain enough packed streams"); 849 } 850 851 nid = getUnsignedByte(header); 852 if (nid != NID.kCodersUnpackSize) { 853 throw new IOException("Expected kCodersUnpackSize, got " + nid); 854 } 855 856 for (final int numberOfOutputStreams : numberOfOutputStreamsPerFolder) { 857 for (int i = 0; i < numberOfOutputStreams; i++) { 858 final long unpackSize = readUint64(header); 859 if (unpackSize < 0) { 860 throw new IllegalArgumentException("negative unpackSize"); 861 } 862 } 863 } 864 865 nid = getUnsignedByte(header); 866 if (nid == NID.kCRC) { 867 stats.folderHasCrc = readAllOrBits(header, stats.numberOfFolders); 868 final int crcsDefined = stats.folderHasCrc.cardinality(); 869 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 870 throw new IOException("invalid number of CRCs in UnpackInfo"); 871 } 872 nid = getUnsignedByte(header); 873 } 874 875 if (nid != NID.kEnd) { 876 throw new IOException("Badly terminated UnpackInfo"); 877 } 878 } 879 880 private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { 881 int nid = getUnsignedByte(header); 882 final int numFoldersInt = (int) readUint64(header); 883 final Folder[] folders = new Folder[numFoldersInt]; 884 archive.folders = folders; 885 /* final int external = */ getUnsignedByte(header); 886 for (int i = 0; i < numFoldersInt; i++) { 887 folders[i] = readFolder(header); 888 } 889 890 nid = getUnsignedByte(header); 891 for (final Folder folder : folders) { 892 assertFitsIntoNonNegativeInt("totalOutputStreams", folder.totalOutputStreams); 893 folder.unpackSizes = new long[(int)folder.totalOutputStreams]; 894 for (int i = 0; i < folder.totalOutputStreams; i++) { 895 folder.unpackSizes[i] = readUint64(header); 896 } 897 } 898 899 nid = getUnsignedByte(header); 900 if (nid == NID.kCRC) { 901 final BitSet crcsDefined = readAllOrBits(header, numFoldersInt); 902 for (int i = 0; i < numFoldersInt; i++) { 903 if (crcsDefined.get(i)) { 904 folders[i].hasCrc = true; 905 folders[i].crc = 0xffffFFFFL & getInt(header); 906 } else { 907 folders[i].hasCrc = false; 908 } 909 } 910 911 nid = getUnsignedByte(header); 912 } 913 } 914 915 private void sanityCheckSubStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 916 917 int nid = getUnsignedByte(header); 918 final List<Integer> numUnpackSubStreamsPerFolder = new LinkedList<>(); 919 if (nid == NID.kNumUnpackStream) { 920 for (int i = 0; i < stats.numberOfFolders; i++) { 921 numUnpackSubStreamsPerFolder.add(assertFitsIntoNonNegativeInt("numStreams", readUint64(header))); 922 } 923 stats.numberOfUnpackSubStreams = numUnpackSubStreamsPerFolder.stream().mapToLong(Integer::longValue).sum(); 924 nid = getUnsignedByte(header); 925 } else { 926 stats.numberOfUnpackSubStreams = stats.numberOfFolders; 927 } 928 929 assertFitsIntoNonNegativeInt("totalUnpackStreams", stats.numberOfUnpackSubStreams); 930 931 if (nid == NID.kSize) { 932 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 933 if (numUnpackSubStreams == 0) { 934 continue; 935 } 936 for (int i = 0; i < numUnpackSubStreams - 1; i++) { 937 final long size = readUint64(header); 938 if (size < 0) { 939 throw new IOException("negative unpackSize"); 940 } 941 } 942 } 943 nid = getUnsignedByte(header); 944 } 945 946 int numDigests = 0; 947 if (numUnpackSubStreamsPerFolder.isEmpty()) { 948 numDigests = stats.folderHasCrc == null ? stats.numberOfFolders 949 : stats.numberOfFolders - stats.folderHasCrc.cardinality(); 950 } else { 951 int folderIdx = 0; 952 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 953 if (numUnpackSubStreams != 1 || stats.folderHasCrc == null 954 || !stats.folderHasCrc.get(folderIdx++)) { 955 numDigests += numUnpackSubStreams; 956 } 957 } 958 } 959 960 if (nid == NID.kCRC) { 961 assertFitsIntoNonNegativeInt("numDigests", numDigests); 962 final int missingCrcs = readAllOrBits(header, numDigests) 963 .cardinality(); 964 if (skipBytesFully(header, 4 * missingCrcs) < 4 * missingCrcs) { 965 throw new IOException("invalid number of missing CRCs in SubStreamInfo"); 966 } 967 nid = getUnsignedByte(header); 968 } 969 970 if (nid != NID.kEnd) { 971 throw new IOException("Badly terminated SubStreamsInfo"); 972 } 973 } 974 975 private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 976 for (final Folder folder : archive.folders) { 977 folder.numUnpackSubStreams = 1; 978 } 979 long unpackStreamsCount = archive.folders.length; 980 981 int nid = getUnsignedByte(header); 982 if (nid == NID.kNumUnpackStream) { 983 unpackStreamsCount = 0; 984 for (final Folder folder : archive.folders) { 985 final long numStreams = readUint64(header); 986 folder.numUnpackSubStreams = (int)numStreams; 987 unpackStreamsCount += numStreams; 988 } 989 nid = getUnsignedByte(header); 990 } 991 992 final int totalUnpackStreams = (int) unpackStreamsCount; 993 final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(); 994 subStreamsInfo.unpackSizes = new long[totalUnpackStreams]; 995 subStreamsInfo.hasCrc = new BitSet(totalUnpackStreams); 996 subStreamsInfo.crcs = new long[totalUnpackStreams]; 997 998 int nextUnpackStream = 0; 999 for (final Folder folder : archive.folders) { 1000 if (folder.numUnpackSubStreams == 0) { 1001 continue; 1002 } 1003 long sum = 0; 1004 if (nid == NID.kSize) { 1005 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { 1006 final long size = readUint64(header); 1007 subStreamsInfo.unpackSizes[nextUnpackStream++] = size; 1008 sum += size; 1009 } 1010 } 1011 if (sum > folder.getUnpackSize()) { 1012 throw new IOException("sum of unpack sizes of folder exceeds total unpack size"); 1013 } 1014 subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; 1015 } 1016 if (nid == NID.kSize) { 1017 nid = getUnsignedByte(header); 1018 } 1019 1020 int numDigests = 0; 1021 for (final Folder folder : archive.folders) { 1022 if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { 1023 numDigests += folder.numUnpackSubStreams; 1024 } 1025 } 1026 1027 if (nid == NID.kCRC) { 1028 final BitSet hasMissingCrc = readAllOrBits(header, numDigests); 1029 final long[] missingCrcs = new long[numDigests]; 1030 for (int i = 0; i < numDigests; i++) { 1031 if (hasMissingCrc.get(i)) { 1032 missingCrcs[i] = 0xffffFFFFL & getInt(header); 1033 } 1034 } 1035 int nextCrc = 0; 1036 int nextMissingCrc = 0; 1037 for (final Folder folder: archive.folders) { 1038 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { 1039 subStreamsInfo.hasCrc.set(nextCrc, true); 1040 subStreamsInfo.crcs[nextCrc] = folder.crc; 1041 ++nextCrc; 1042 } else { 1043 for (int i = 0; i < folder.numUnpackSubStreams; i++) { 1044 subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); 1045 subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; 1046 ++nextCrc; 1047 ++nextMissingCrc; 1048 } 1049 } 1050 } 1051 1052 nid = getUnsignedByte(header); 1053 } 1054 1055 archive.subStreamsInfo = subStreamsInfo; 1056 } 1057 1058 private int sanityCheckFolder(final ByteBuffer header, final ArchiveStatistics stats) 1059 throws IOException { 1060 1061 final int numCoders = assertFitsIntoNonNegativeInt("numCoders", readUint64(header)); 1062 if (numCoders == 0) { 1063 throw new IOException("Folder without coders"); 1064 } 1065 stats.numberOfCoders += numCoders; 1066 1067 long totalOutStreams = 0; 1068 long totalInStreams = 0; 1069 for (int i = 0; i < numCoders; i++) { 1070 final int bits = getUnsignedByte(header); 1071 final int idSize = bits & 0xf; 1072 get(header, new byte[idSize]); 1073 1074 final boolean isSimple = (bits & 0x10) == 0; 1075 final boolean hasAttributes = (bits & 0x20) != 0; 1076 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1077 if (moreAlternativeMethods) { 1078 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1079 "The reference implementation doesn't support them either."); 1080 } 1081 1082 if (isSimple) { 1083 totalInStreams++; 1084 totalOutStreams++; 1085 } else { 1086 totalInStreams += 1087 assertFitsIntoNonNegativeInt("numInStreams", readUint64(header)); 1088 totalOutStreams += 1089 assertFitsIntoNonNegativeInt("numOutStreams", readUint64(header)); 1090 } 1091 1092 if (hasAttributes) { 1093 final int propertiesSize = 1094 assertFitsIntoNonNegativeInt("propertiesSize", readUint64(header)); 1095 if (skipBytesFully(header, propertiesSize) < propertiesSize) { 1096 throw new IOException("invalid propertiesSize in folder"); 1097 } 1098 } 1099 } 1100 assertFitsIntoNonNegativeInt("totalInStreams", totalInStreams); 1101 assertFitsIntoNonNegativeInt("totalOutStreams", totalOutStreams); 1102 stats.numberOfOutStreams += totalOutStreams; 1103 stats.numberOfInStreams += totalInStreams; 1104 1105 if (totalOutStreams == 0) { 1106 throw new IOException("Total output streams can't be 0"); 1107 } 1108 1109 final int numBindPairs = 1110 assertFitsIntoNonNegativeInt("numBindPairs", totalOutStreams - 1); 1111 if (totalInStreams < numBindPairs) { 1112 throw new IOException("Total input streams can't be less than the number of bind pairs"); 1113 } 1114 final BitSet inStreamsBound = new BitSet((int) totalInStreams); 1115 for (int i = 0; i < numBindPairs; i++) { 1116 final int inIndex = assertFitsIntoNonNegativeInt("inIndex", readUint64(header)); 1117 if (totalInStreams <= inIndex) { 1118 throw new IOException("inIndex is bigger than number of inStreams"); 1119 } 1120 inStreamsBound.set(inIndex); 1121 final int outIndex = assertFitsIntoNonNegativeInt("outIndex", readUint64(header)); 1122 if (totalOutStreams <= outIndex) { 1123 throw new IOException("outIndex is bigger than number of outStreams"); 1124 } 1125 } 1126 1127 final int numPackedStreams = 1128 assertFitsIntoNonNegativeInt("numPackedStreams", totalInStreams - numBindPairs); 1129 1130 if (numPackedStreams == 1) { 1131 if (inStreamsBound.nextClearBit(0) == -1) { 1132 throw new IOException("Couldn't find stream's bind pair index"); 1133 } 1134 } else { 1135 for (int i = 0; i < numPackedStreams; i++) { 1136 final int packedStreamIndex = 1137 assertFitsIntoNonNegativeInt("packedStreamIndex", readUint64(header)); 1138 if (packedStreamIndex >= totalInStreams) { 1139 throw new IOException("packedStreamIndex is bigger than number of totalInStreams"); 1140 } 1141 } 1142 } 1143 1144 return (int) totalOutStreams; 1145 } 1146 1147 private Folder readFolder(final ByteBuffer header) throws IOException { 1148 final Folder folder = new Folder(); 1149 1150 final long numCoders = readUint64(header); 1151 final Coder[] coders = new Coder[(int)numCoders]; 1152 long totalInStreams = 0; 1153 long totalOutStreams = 0; 1154 for (int i = 0; i < coders.length; i++) { 1155 coders[i] = new Coder(); 1156 final int bits = getUnsignedByte(header); 1157 final int idSize = bits & 0xf; 1158 final boolean isSimple = (bits & 0x10) == 0; 1159 final boolean hasAttributes = (bits & 0x20) != 0; 1160 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1161 1162 coders[i].decompressionMethodId = new byte[idSize]; 1163 get(header, coders[i].decompressionMethodId); 1164 if (isSimple) { 1165 coders[i].numInStreams = 1; 1166 coders[i].numOutStreams = 1; 1167 } else { 1168 coders[i].numInStreams = readUint64(header); 1169 coders[i].numOutStreams = readUint64(header); 1170 } 1171 totalInStreams += coders[i].numInStreams; 1172 totalOutStreams += coders[i].numOutStreams; 1173 if (hasAttributes) { 1174 final long propertiesSize = readUint64(header); 1175 coders[i].properties = new byte[(int)propertiesSize]; 1176 get(header, coders[i].properties); 1177 } 1178 // would need to keep looping as above: 1179 if (moreAlternativeMethods) { 1180 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1181 "The reference implementation doesn't support them either."); 1182 } 1183 } 1184 folder.coders = coders; 1185 folder.totalInputStreams = totalInStreams; 1186 folder.totalOutputStreams = totalOutStreams; 1187 1188 final long numBindPairs = totalOutStreams - 1; 1189 final BindPair[] bindPairs = new BindPair[(int)numBindPairs]; 1190 for (int i = 0; i < bindPairs.length; i++) { 1191 bindPairs[i] = new BindPair(); 1192 bindPairs[i].inIndex = readUint64(header); 1193 bindPairs[i].outIndex = readUint64(header); 1194 } 1195 folder.bindPairs = bindPairs; 1196 1197 final long numPackedStreams = totalInStreams - numBindPairs; 1198 final long[] packedStreams = new long[(int)numPackedStreams]; 1199 if (numPackedStreams == 1) { 1200 int i; 1201 for (i = 0; i < (int)totalInStreams; i++) { 1202 if (folder.findBindPairForInStream(i) < 0) { 1203 break; 1204 } 1205 } 1206 packedStreams[0] = i; 1207 } else { 1208 for (int i = 0; i < (int)numPackedStreams; i++) { 1209 packedStreams[i] = readUint64(header); 1210 } 1211 } 1212 folder.packedStreams = packedStreams; 1213 1214 return folder; 1215 } 1216 1217 private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { 1218 final int areAllDefined = getUnsignedByte(header); 1219 final BitSet bits; 1220 if (areAllDefined != 0) { 1221 bits = new BitSet(size); 1222 for (int i = 0; i < size; i++) { 1223 bits.set(i, true); 1224 } 1225 } else { 1226 bits = readBits(header, size); 1227 } 1228 return bits; 1229 } 1230 1231 private BitSet readBits(final ByteBuffer header, final int size) throws IOException { 1232 final BitSet bits = new BitSet(size); 1233 int mask = 0; 1234 int cache = 0; 1235 for (int i = 0; i < size; i++) { 1236 if (mask == 0) { 1237 mask = 0x80; 1238 cache = getUnsignedByte(header); 1239 } 1240 bits.set(i, (cache & mask) != 0); 1241 mask >>>= 1; 1242 } 1243 return bits; 1244 } 1245 1246 private void sanityCheckFilesInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1247 stats.numberOfEntries = assertFitsIntoNonNegativeInt("numFiles", readUint64(header)); 1248 1249 int emptyStreams = -1; 1250 while (true) { 1251 final int propertyType = getUnsignedByte(header); 1252 if (propertyType == 0) { 1253 break; 1254 } 1255 final long size = readUint64(header); 1256 switch (propertyType) { 1257 case NID.kEmptyStream: { 1258 emptyStreams = readBits(header, stats.numberOfEntries).cardinality(); 1259 break; 1260 } 1261 case NID.kEmptyFile: { 1262 if (emptyStreams == -1) { 1263 throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); 1264 } 1265 readBits(header, emptyStreams); 1266 break; 1267 } 1268 case NID.kAnti: { 1269 if (emptyStreams == -1) { 1270 throw new IOException("Header format error: kEmptyStream must appear before kAnti"); 1271 } 1272 readBits(header, emptyStreams); 1273 break; 1274 } 1275 case NID.kName: { 1276 final int external = getUnsignedByte(header); 1277 if (external != 0) { 1278 throw new IOException("Not implemented"); 1279 } 1280 final int namesLength = 1281 assertFitsIntoNonNegativeInt("file names length", size - 1); 1282 if ((namesLength & 1) != 0) { 1283 throw new IOException("File names length invalid"); 1284 } 1285 1286 int filesSeen = 0; 1287 for (int i = 0; i < namesLength; i += 2) { 1288 final char c = getChar(header); 1289 if (c == 0) { 1290 filesSeen++; 1291 } 1292 } 1293 if (filesSeen != stats.numberOfEntries) { 1294 throw new IOException("Invalid number of file names (" + filesSeen + " instead of " 1295 + stats.numberOfEntries + ")"); 1296 } 1297 break; 1298 } 1299 case NID.kCTime: { 1300 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1301 .cardinality(); 1302 final int external = getUnsignedByte(header); 1303 if (external != 0) { 1304 throw new IOException("Not implemented"); 1305 } 1306 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1307 throw new IOException("invalid creation dates size"); 1308 } 1309 break; 1310 } 1311 case NID.kATime: { 1312 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1313 .cardinality(); 1314 final int external = getUnsignedByte(header); 1315 if (external != 0) { 1316 throw new IOException("Not implemented"); 1317 } 1318 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1319 throw new IOException("invalid access dates size"); 1320 } 1321 break; 1322 } 1323 case NID.kMTime: { 1324 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1325 .cardinality(); 1326 final int external = getUnsignedByte(header); 1327 if (external != 0) { 1328 throw new IOException("Not implemented"); 1329 } 1330 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1331 throw new IOException("invalid modification dates size"); 1332 } 1333 break; 1334 } 1335 case NID.kWinAttributes: { 1336 final int attributesDefined = readAllOrBits(header, stats.numberOfEntries) 1337 .cardinality(); 1338 final int external = getUnsignedByte(header); 1339 if (external != 0) { 1340 throw new IOException("Not implemented"); 1341 } 1342 if (skipBytesFully(header, 4 * attributesDefined) < 4 * attributesDefined) { 1343 throw new IOException("invalid windows attributes size"); 1344 } 1345 break; 1346 } 1347 case NID.kStartPos: { 1348 throw new IOException("kStartPos is unsupported, please report"); 1349 } 1350 case NID.kDummy: { 1351 // 7z 9.20 asserts the content is all zeros and ignores the property 1352 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1353 1354 if (skipBytesFully(header, size) < size) { 1355 throw new IOException("Incomplete kDummy property"); 1356 } 1357 break; 1358 } 1359 1360 default: { 1361 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1362 if (skipBytesFully(header, size) < size) { 1363 throw new IOException("Incomplete property of type " + propertyType); 1364 } 1365 break; 1366 } 1367 } 1368 } 1369 stats.numberOfEntriesWithStream = stats.numberOfEntries - Math.max(emptyStreams, 0); 1370 } 1371 1372 private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { 1373 final int numFilesInt = (int) readUint64(header); 1374 final Map<Integer, SevenZArchiveEntry> fileMap = new LinkedHashMap<>(); 1375 BitSet isEmptyStream = null; 1376 BitSet isEmptyFile = null; 1377 BitSet isAnti = null; 1378 while (true) { 1379 final int propertyType = getUnsignedByte(header); 1380 if (propertyType == 0) { 1381 break; 1382 } 1383 final long size = readUint64(header); 1384 switch (propertyType) { 1385 case NID.kEmptyStream: { 1386 isEmptyStream = readBits(header, numFilesInt); 1387 break; 1388 } 1389 case NID.kEmptyFile: { 1390 isEmptyFile = readBits(header, isEmptyStream.cardinality()); 1391 break; 1392 } 1393 case NID.kAnti: { 1394 isAnti = readBits(header, isEmptyStream.cardinality()); 1395 break; 1396 } 1397 case NID.kName: { 1398 /* final int external = */ getUnsignedByte(header); 1399 final byte[] names = new byte[(int) (size - 1)]; 1400 final int namesLength = names.length; 1401 get(header, names); 1402 int nextFile = 0; 1403 int nextName = 0; 1404 for (int i = 0; i < namesLength; i += 2) { 1405 if (names[i] == 0 && names[i + 1] == 0) { 1406 checkEntryIsInitialized(fileMap, nextFile); 1407 fileMap.get(nextFile).setName(new String(names, nextName, i - nextName, UTF_16LE)); 1408 nextName = i + 2; 1409 nextFile++; 1410 } 1411 } 1412 if (nextName != namesLength || nextFile != numFilesInt) { 1413 throw new IOException("Error parsing file names"); 1414 } 1415 break; 1416 } 1417 case NID.kCTime: { 1418 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1419 /* final int external = */ getUnsignedByte(header); 1420 for (int i = 0; i < numFilesInt; i++) { 1421 checkEntryIsInitialized(fileMap, i); 1422 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1423 entryAtIndex.setHasCreationDate(timesDefined.get(i)); 1424 if (entryAtIndex.getHasCreationDate()) { 1425 entryAtIndex.setCreationDate(getLong(header)); 1426 } 1427 } 1428 break; 1429 } 1430 case NID.kATime: { 1431 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1432 /* final int external = */ getUnsignedByte(header); 1433 for (int i = 0; i < numFilesInt; i++) { 1434 checkEntryIsInitialized(fileMap, i); 1435 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1436 entryAtIndex.setHasAccessDate(timesDefined.get(i)); 1437 if (entryAtIndex.getHasAccessDate()) { 1438 entryAtIndex.setAccessDate(getLong(header)); 1439 } 1440 } 1441 break; 1442 } 1443 case NID.kMTime: { 1444 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1445 /* final int external = */ getUnsignedByte(header); 1446 for (int i = 0; i < numFilesInt; i++) { 1447 checkEntryIsInitialized(fileMap, i); 1448 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1449 entryAtIndex.setHasLastModifiedDate(timesDefined.get(i)); 1450 if (entryAtIndex.getHasLastModifiedDate()) { 1451 entryAtIndex.setLastModifiedDate(getLong(header)); 1452 } 1453 } 1454 break; 1455 } 1456 case NID.kWinAttributes: { 1457 final BitSet attributesDefined = readAllOrBits(header, numFilesInt); 1458 /* final int external = */ getUnsignedByte(header); 1459 for (int i = 0; i < numFilesInt; i++) { 1460 checkEntryIsInitialized(fileMap, i); 1461 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1462 entryAtIndex.setHasWindowsAttributes(attributesDefined.get(i)); 1463 if (entryAtIndex.getHasWindowsAttributes()) { 1464 entryAtIndex.setWindowsAttributes(getInt(header)); 1465 } 1466 } 1467 break; 1468 } 1469 case NID.kDummy: { 1470 // 7z 9.20 asserts the content is all zeros and ignores the property 1471 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1472 1473 skipBytesFully(header, size); 1474 break; 1475 } 1476 1477 default: { 1478 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1479 skipBytesFully(header, size); 1480 break; 1481 } 1482 } 1483 } 1484 int nonEmptyFileCounter = 0; 1485 int emptyFileCounter = 0; 1486 for (int i = 0; i < numFilesInt; i++) { 1487 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1488 if (entryAtIndex == null) { 1489 continue; 1490 } 1491 entryAtIndex.setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); 1492 if (entryAtIndex.hasStream()) { 1493 if (archive.subStreamsInfo == null) { 1494 throw new IOException("Archive contains file with streams but no subStreamsInfo"); 1495 } 1496 entryAtIndex.setDirectory(false); 1497 entryAtIndex.setAntiItem(false); 1498 entryAtIndex.setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); 1499 entryAtIndex.setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); 1500 entryAtIndex.setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); 1501 if (entryAtIndex.getSize() < 0) { 1502 throw new IOException("broken archive, entry with negative size"); 1503 } 1504 ++nonEmptyFileCounter; 1505 } else { 1506 entryAtIndex.setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); 1507 entryAtIndex.setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); 1508 entryAtIndex.setHasCrc(false); 1509 entryAtIndex.setSize(0); 1510 ++emptyFileCounter; 1511 } 1512 } 1513 archive.files = fileMap.values().stream().filter(Objects::nonNull).toArray(SevenZArchiveEntry[]::new); 1514 calculateStreamMap(archive); 1515 } 1516 1517 private void checkEntryIsInitialized(final Map<Integer, SevenZArchiveEntry> archiveEntries, final int index) { 1518 if (archiveEntries.get(index) == null) { 1519 archiveEntries.put(index, new SevenZArchiveEntry()); 1520 } 1521 } 1522 1523 private void calculateStreamMap(final Archive archive) throws IOException { 1524 final StreamMap streamMap = new StreamMap(); 1525 1526 int nextFolderPackStreamIndex = 0; 1527 final int numFolders = archive.folders != null ? archive.folders.length : 0; 1528 streamMap.folderFirstPackStreamIndex = new int[numFolders]; 1529 for (int i = 0; i < numFolders; i++) { 1530 streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; 1531 nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; 1532 } 1533 1534 long nextPackStreamOffset = 0; 1535 final int numPackSizes = archive.packSizes.length; 1536 streamMap.packStreamOffsets = new long[numPackSizes]; 1537 for (int i = 0; i < numPackSizes; i++) { 1538 streamMap.packStreamOffsets[i] = nextPackStreamOffset; 1539 nextPackStreamOffset += archive.packSizes[i]; 1540 } 1541 1542 streamMap.folderFirstFileIndex = new int[numFolders]; 1543 streamMap.fileFolderIndex = new int[archive.files.length]; 1544 int nextFolderIndex = 0; 1545 int nextFolderUnpackStreamIndex = 0; 1546 for (int i = 0; i < archive.files.length; i++) { 1547 if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { 1548 streamMap.fileFolderIndex[i] = -1; 1549 continue; 1550 } 1551 if (nextFolderUnpackStreamIndex == 0) { 1552 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { 1553 streamMap.folderFirstFileIndex[nextFolderIndex] = i; 1554 if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { 1555 break; 1556 } 1557 } 1558 if (nextFolderIndex >= archive.folders.length) { 1559 throw new IOException("Too few folders in archive"); 1560 } 1561 } 1562 streamMap.fileFolderIndex[i] = nextFolderIndex; 1563 if (!archive.files[i].hasStream()) { 1564 continue; 1565 } 1566 ++nextFolderUnpackStreamIndex; 1567 if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { 1568 ++nextFolderIndex; 1569 nextFolderUnpackStreamIndex = 0; 1570 } 1571 } 1572 1573 archive.streamMap = streamMap; 1574 } 1575 1576 /** 1577 * Build the decoding stream for the entry to be read. 1578 * This method may be called from a random access(getInputStream) or 1579 * sequential access(getNextEntry). 1580 * If this method is called from a random access, some entries may 1581 * need to be skipped(we put them to the deferredBlockStreams and 1582 * skip them when actually needed to improve the performance) 1583 * 1584 * @param entryIndex the index of the entry to be read 1585 * @param isRandomAccess is this called in a random access 1586 * @throws IOException if there are exceptions when reading the file 1587 */ 1588 private void buildDecodingStream(final int entryIndex, final boolean isRandomAccess) throws IOException { 1589 if (archive.streamMap == null) { 1590 throw new IOException("Archive doesn't contain stream information to read entries"); 1591 } 1592 final int folderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 1593 if (folderIndex < 0) { 1594 deferredBlockStreams.clear(); 1595 // TODO: previously it'd return an empty stream? 1596 // new BoundedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY), 0); 1597 return; 1598 } 1599 final SevenZArchiveEntry file = archive.files[entryIndex]; 1600 boolean isInSameFolder = false; 1601 if (currentFolderIndex == folderIndex) { 1602 // (COMPRESS-320). 1603 // The current entry is within the same (potentially opened) folder. The 1604 // previous stream has to be fully decoded before we can start reading 1605 // but don't do it eagerly -- if the user skips over the entire folder nothing 1606 // is effectively decompressed. 1607 if (entryIndex > 0) { 1608 file.setContentMethods(archive.files[entryIndex - 1].getContentMethods()); 1609 } 1610 1611 // if this is called in a random access, then the content methods of previous entry may be null 1612 // the content methods should be set to methods of the first entry as it must not be null, 1613 // and the content methods would only be set if the content methods was not set 1614 if(isRandomAccess && file.getContentMethods() == null) { 1615 final int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; 1616 final SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex]; 1617 file.setContentMethods(folderFirstFile.getContentMethods()); 1618 } 1619 isInSameFolder = true; 1620 } else { 1621 currentFolderIndex = folderIndex; 1622 // We're opening a new folder. Discard any queued streams/ folder stream. 1623 reopenFolderInputStream(folderIndex, file); 1624 } 1625 1626 boolean haveSkippedEntries = false; 1627 if (isRandomAccess) { 1628 // entries will only need to be skipped if it's a random access 1629 haveSkippedEntries = skipEntriesWhenNeeded(entryIndex, isInSameFolder, folderIndex); 1630 } 1631 1632 if (isRandomAccess && currentEntryIndex == entryIndex && !haveSkippedEntries) { 1633 // we don't need to add another entry to the deferredBlockStreams when : 1634 // 1. If this method is called in a random access and the entry index 1635 // to be read equals to the current entry index, the input stream 1636 // has already been put in the deferredBlockStreams 1637 // 2. If this entry has not been read(which means no entries are skipped) 1638 return; 1639 } 1640 1641 InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); 1642 if (file.getHasCrc()) { 1643 fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); 1644 } 1645 1646 deferredBlockStreams.add(fileStream); 1647 } 1648 1649 /** 1650 * Discard any queued streams/ folder stream, and reopen the current folder input stream. 1651 * 1652 * @param folderIndex the index of the folder to reopen 1653 * @param file the 7z entry to read 1654 * @throws IOException if exceptions occur when reading the 7z file 1655 */ 1656 private void reopenFolderInputStream(final int folderIndex, final SevenZArchiveEntry file) throws IOException { 1657 deferredBlockStreams.clear(); 1658 if (currentFolderInputStream != null) { 1659 currentFolderInputStream.close(); 1660 currentFolderInputStream = null; 1661 } 1662 final Folder folder = archive.folders[folderIndex]; 1663 final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; 1664 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 1665 archive.streamMap.packStreamOffsets[firstPackStreamIndex]; 1666 1667 currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); 1668 } 1669 1670 /** 1671 * Skip all the entries if needed. 1672 * Entries need to be skipped when: 1673 * <p> 1674 * 1. it's a random access 1675 * 2. one of these 2 condition is meet : 1676 * <p> 1677 * 2.1 currentEntryIndex != entryIndex : this means there are some entries 1678 * to be skipped(currentEntryIndex < entryIndex) or the entry has already 1679 * been read(currentEntryIndex > entryIndex) 1680 * <p> 1681 * 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead: 1682 * if the entry to be read is the current entry, but some data of it has 1683 * been read before, then we need to reopen the stream of the folder and 1684 * skip all the entries before the current entries 1685 * 1686 * @param entryIndex the entry to be read 1687 * @param isInSameFolder are the entry to be read and the current entry in the same folder 1688 * @param folderIndex the index of the folder which contains the entry 1689 * @return true if there are entries actually skipped 1690 * @throws IOException there are exceptions when skipping entries 1691 * @since 1.21 1692 */ 1693 private boolean skipEntriesWhenNeeded(final int entryIndex, final boolean isInSameFolder, final int folderIndex) throws IOException { 1694 final SevenZArchiveEntry file = archive.files[entryIndex]; 1695 // if the entry to be read is the current entry, and the entry has not 1696 // been read yet, then there's nothing we need to do 1697 if (currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead()) { 1698 return false; 1699 } 1700 1701 // 1. if currentEntryIndex < entryIndex : 1702 // this means there are some entries to be skipped(currentEntryIndex < entryIndex) 1703 // 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) : 1704 // this means the entry has already been read before, and we need to reopen the 1705 // stream of the folder and skip all the entries before the current entries 1706 int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex]; 1707 if (isInSameFolder) { 1708 if (currentEntryIndex < entryIndex) { 1709 // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped 1710 filesToSkipStartIndex = currentEntryIndex + 1; 1711 } else { 1712 // the entry is in the same folder of current entry, but it has already been read before, we need to reset 1713 // the position of the currentFolderInputStream to the beginning of folder, and then skip the files 1714 // from the start entry of the folder again 1715 reopenFolderInputStream(folderIndex, file); 1716 } 1717 } 1718 1719 for (int i = filesToSkipStartIndex; i < entryIndex; i++) { 1720 final SevenZArchiveEntry fileToSkip = archive.files[i]; 1721 InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize()); 1722 if (fileToSkip.getHasCrc()) { 1723 fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue()); 1724 } 1725 deferredBlockStreams.add(fileStreamToSkip); 1726 1727 // set the content methods as well, it equals to file.getContentMethods() because they are in same folder 1728 fileToSkip.setContentMethods(file.getContentMethods()); 1729 } 1730 return true; 1731 } 1732 1733 /** 1734 * Find out if any data of current entry has been read or not. 1735 * This is achieved by comparing the bytes remaining to read 1736 * and the size of the file. 1737 * 1738 * @return true if any data of current entry has been read 1739 * @since 1.21 1740 */ 1741 private boolean hasCurrentEntryBeenRead() { 1742 boolean hasCurrentEntryBeenRead = false; 1743 if (!deferredBlockStreams.isEmpty()) { 1744 final InputStream currentEntryInputStream = deferredBlockStreams.get(deferredBlockStreams.size() - 1); 1745 // get the bytes remaining to read, and compare it with the size of 1746 // the file to figure out if the file has been read 1747 if (currentEntryInputStream instanceof CRC32VerifyingInputStream) { 1748 hasCurrentEntryBeenRead = ((CRC32VerifyingInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 1749 } 1750 1751 if (currentEntryInputStream instanceof BoundedInputStream) { 1752 hasCurrentEntryBeenRead = ((BoundedInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 1753 } 1754 } 1755 return hasCurrentEntryBeenRead; 1756 } 1757 1758 private InputStream buildDecoderStack(final Folder folder, final long folderOffset, 1759 final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException { 1760 channel.position(folderOffset); 1761 InputStream inputStreamStack = new FilterInputStream(new BufferedInputStream( 1762 new BoundedSeekableByteChannelInputStream(channel, 1763 archive.packSizes[firstPackStreamIndex]))) { 1764 @Override 1765 public int read() throws IOException { 1766 final int r = in.read(); 1767 if (r >= 0) { 1768 count(1); 1769 } 1770 return r; 1771 } 1772 @Override 1773 public int read(final byte[] b) throws IOException { 1774 return read(b, 0, b.length); 1775 } 1776 @Override 1777 public int read(final byte[] b, final int off, final int len) throws IOException { 1778 if (len == 0) { 1779 return 0; 1780 } 1781 final int r = in.read(b, off, len); 1782 if (r >= 0) { 1783 count(r); 1784 } 1785 return r; 1786 } 1787 private void count(final int c) { 1788 compressedBytesReadFromCurrentEntry += c; 1789 } 1790 }; 1791 final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); 1792 for (final Coder coder : folder.getOrderedCoders()) { 1793 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 1794 throw new IOException("Multi input/output stream coders are not yet supported"); 1795 } 1796 final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); 1797 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, 1798 folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); 1799 methods.addFirst(new SevenZMethodConfiguration(method, 1800 Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); 1801 } 1802 entry.setContentMethods(methods); 1803 if (folder.hasCrc) { 1804 return new CRC32VerifyingInputStream(inputStreamStack, 1805 folder.getUnpackSize(), folder.crc); 1806 } 1807 return inputStreamStack; 1808 } 1809 1810 /** 1811 * Reads a byte of data. 1812 * 1813 * @return the byte read, or -1 if end of input is reached 1814 * @throws IOException 1815 * if an I/O error has occurred 1816 */ 1817 public int read() throws IOException { 1818 final int b = getCurrentStream().read(); 1819 if (b >= 0) { 1820 uncompressedBytesReadFromCurrentEntry++; 1821 } 1822 return b; 1823 } 1824 1825 private InputStream getCurrentStream() throws IOException { 1826 if (archive.files[currentEntryIndex].getSize() == 0) { 1827 return new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY); 1828 } 1829 if (deferredBlockStreams.isEmpty()) { 1830 throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); 1831 } 1832 1833 while (deferredBlockStreams.size() > 1) { 1834 // In solid compression mode we need to decompress all leading folder' 1835 // streams to get access to an entry. We defer this until really needed 1836 // so that entire blocks can be skipped without wasting time for decompression. 1837 try (final InputStream stream = deferredBlockStreams.remove(0)) { 1838 IOUtils.skip(stream, Long.MAX_VALUE); 1839 } 1840 compressedBytesReadFromCurrentEntry = 0; 1841 } 1842 1843 return deferredBlockStreams.get(0); 1844 } 1845 1846 /** 1847 * Returns an InputStream for reading the contents of the given entry. 1848 * 1849 * <p>For archives using solid compression randomly accessing 1850 * entries will be significantly slower than reading the archive 1851 * sequentially.</p> 1852 * 1853 * @param entry the entry to get the stream for. 1854 * @return a stream to read the entry from. 1855 * @throws IOException if unable to create an input stream from the zipentry 1856 * @since 1.20 1857 */ 1858 public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { 1859 int entryIndex = -1; 1860 for (int i = 0; i < this.archive.files.length;i++) { 1861 if (entry == this.archive.files[i]) { 1862 entryIndex = i; 1863 break; 1864 } 1865 } 1866 1867 if (entryIndex < 0) { 1868 throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + this.fileName); 1869 } 1870 1871 buildDecodingStream(entryIndex, true); 1872 currentEntryIndex = entryIndex; 1873 currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 1874 return getCurrentStream(); 1875 } 1876 1877 /** 1878 * Reads data into an array of bytes. 1879 * 1880 * @param b the array to write data to 1881 * @return the number of bytes read, or -1 if end of input is reached 1882 * @throws IOException 1883 * if an I/O error has occurred 1884 */ 1885 public int read(final byte[] b) throws IOException { 1886 return read(b, 0, b.length); 1887 } 1888 1889 /** 1890 * Reads data into an array of bytes. 1891 * 1892 * @param b the array to write data to 1893 * @param off offset into the buffer to start filling at 1894 * @param len of bytes to read 1895 * @return the number of bytes read, or -1 if end of input is reached 1896 * @throws IOException 1897 * if an I/O error has occurred 1898 */ 1899 public int read(final byte[] b, final int off, final int len) throws IOException { 1900 if (len == 0) { 1901 return 0; 1902 } 1903 final int cnt = getCurrentStream().read(b, off, len); 1904 if (cnt > 0) { 1905 uncompressedBytesReadFromCurrentEntry += cnt; 1906 } 1907 return cnt; 1908 } 1909 1910 /** 1911 * Provides statistics for bytes read from the current entry. 1912 * 1913 * @return statistics for bytes read from the current entry 1914 * @since 1.17 1915 */ 1916 public InputStreamStatistics getStatisticsForCurrentEntry() { 1917 return new InputStreamStatistics() { 1918 @Override 1919 public long getCompressedCount() { 1920 return compressedBytesReadFromCurrentEntry; 1921 } 1922 @Override 1923 public long getUncompressedCount() { 1924 return uncompressedBytesReadFromCurrentEntry; 1925 } 1926 }; 1927 } 1928 1929 private static long readUint64(final ByteBuffer in) throws IOException { 1930 // long rather than int as it might get shifted beyond the range of an int 1931 final long firstByte = getUnsignedByte(in); 1932 int mask = 0x80; 1933 long value = 0; 1934 for (int i = 0; i < 8; i++) { 1935 if ((firstByte & mask) == 0) { 1936 return value | (firstByte & mask - 1) << 8 * i; 1937 } 1938 final long nextByte = getUnsignedByte(in); 1939 value |= nextByte << 8 * i; 1940 mask >>>= 1; 1941 } 1942 return value; 1943 } 1944 1945 private static char getChar(final ByteBuffer buf) throws IOException { 1946 if (buf.remaining() < 2) { 1947 throw new EOFException(); 1948 } 1949 return buf.getChar(); 1950 } 1951 1952 private static int getInt(final ByteBuffer buf) throws IOException { 1953 if (buf.remaining() < 4) { 1954 throw new EOFException(); 1955 } 1956 return buf.getInt(); 1957 } 1958 1959 private static long getLong(final ByteBuffer buf) throws IOException { 1960 if (buf.remaining() < 8) { 1961 throw new EOFException(); 1962 } 1963 return buf.getLong(); 1964 } 1965 1966 private static void get(final ByteBuffer buf, final byte[] to) throws IOException { 1967 if (buf.remaining() < to.length) { 1968 throw new EOFException(); 1969 } 1970 buf.get(to); 1971 } 1972 1973 private static int getUnsignedByte(final ByteBuffer buf) throws IOException { 1974 if (!buf.hasRemaining()) { 1975 throw new EOFException(); 1976 } 1977 return buf.get() & 0xff; 1978 } 1979 1980 /** 1981 * Checks if the signature matches what is expected for a 7z file. 1982 * 1983 * @param signature 1984 * the bytes to check 1985 * @param length 1986 * the number of bytes to check 1987 * @return true, if this is the signature of a 7z archive. 1988 * @since 1.8 1989 */ 1990 public static boolean matches(final byte[] signature, final int length) { 1991 if (length < sevenZSignature.length) { 1992 return false; 1993 } 1994 1995 for (int i = 0; i < sevenZSignature.length; i++) { 1996 if (signature[i] != sevenZSignature[i]) { 1997 return false; 1998 } 1999 } 2000 return true; 2001 } 2002 2003 private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) { 2004 if (bytesToSkip < 1) { 2005 return 0; 2006 } 2007 final int current = input.position(); 2008 final int maxSkip = input.remaining(); 2009 if (maxSkip < bytesToSkip) { 2010 bytesToSkip = maxSkip; 2011 } 2012 input.position(current + (int) bytesToSkip); 2013 return bytesToSkip; 2014 } 2015 2016 private void readFully(final ByteBuffer buf) throws IOException { 2017 buf.rewind(); 2018 IOUtils.readFully(channel, buf); 2019 buf.flip(); 2020 } 2021 2022 @Override 2023 public String toString() { 2024 return archive.toString(); 2025 } 2026 2027 /** 2028 * Derives a default file name from the archive name - if known. 2029 * 2030 * <p>This implements the same heuristics the 7z tools use. In 2031 * 7z's case if an archive contains entries without a name - 2032 * i.e. {@link SevenZArchiveEntry#getName} returns {@code null} - 2033 * then its command line and GUI tools will use this default name 2034 * when extracting the entries.</p> 2035 * 2036 * @return null if the name of the archive is unknown. Otherwise 2037 * if the name of the archive has got any extension, it is 2038 * stripped and the remainder returned. Finally if the name of the 2039 * archive hasn't got any extension then a {@code ~} character is 2040 * appended to the archive name. 2041 * 2042 * @since 1.19 2043 */ 2044 public String getDefaultName() { 2045 if (DEFAULT_FILE_NAME.equals(fileName) || fileName == null) { 2046 return null; 2047 } 2048 2049 final String lastSegment = new File(fileName).getName(); 2050 final int dotPos = lastSegment.lastIndexOf("."); 2051 if (dotPos > 0) { // if the file starts with a dot then this is not an extension 2052 return lastSegment.substring(0, dotPos); 2053 } 2054 return lastSegment + "~"; 2055 } 2056 2057 private static byte[] utf16Decode(final char[] chars) { 2058 if (chars == null) { 2059 return null; 2060 } 2061 final ByteBuffer encoded = UTF_16LE.encode(CharBuffer.wrap(chars)); 2062 if (encoded.hasArray()) { 2063 return encoded.array(); 2064 } 2065 final byte[] e = new byte[encoded.remaining()]; 2066 encoded.get(e); 2067 return e; 2068 } 2069 2070 private static int assertFitsIntoNonNegativeInt(final String what, final long value) throws IOException { 2071 if (value > Integer.MAX_VALUE || value < 0) { 2072 throw new IOException("Cannot handle " + what + " " + value); 2073 } 2074 return (int) value; 2075 } 2076 2077 private static class ArchiveStatistics { 2078 private int numberOfPackedStreams; 2079 private long numberOfCoders; 2080 private long numberOfOutStreams; 2081 private long numberOfInStreams; 2082 private long numberOfUnpackSubStreams; 2083 private int numberOfFolders; 2084 private BitSet folderHasCrc; 2085 private int numberOfEntries; 2086 private int numberOfEntriesWithStream; 2087 2088 @Override 2089 public String toString() { 2090 return "Archive with " + numberOfEntries + " entries in " + numberOfFolders 2091 + " folders. Estimated size " + estimateSize()/ 1024L + " kB."; 2092 } 2093 2094 long estimateSize() { 2095 final long lowerBound = 16L * numberOfPackedStreams /* packSizes, packCrcs in Archive */ 2096 + numberOfPackedStreams / 8 /* packCrcsDefined in Archive */ 2097 + numberOfFolders * folderSize() /* folders in Archive */ 2098 + numberOfCoders * coderSize() /* coders in Folder */ 2099 + (numberOfOutStreams - numberOfFolders) * bindPairSize() /* bindPairs in Folder */ 2100 + 8L * (numberOfInStreams - numberOfOutStreams + numberOfFolders) /* packedStreams in Folder */ 2101 + 8L * numberOfOutStreams /* unpackSizes in Folder */ 2102 + numberOfEntries * entrySize() /* files in Archive */ 2103 + streamMapSize() 2104 ; 2105 return 2 * lowerBound /* conservative guess */; 2106 } 2107 2108 void assertValidity(final int maxMemoryLimitInKb) throws IOException { 2109 if (numberOfEntriesWithStream > 0 && numberOfFolders == 0) { 2110 throw new IOException("archive with entries but no folders"); 2111 } 2112 if (numberOfEntriesWithStream > numberOfUnpackSubStreams) { 2113 throw new IOException("archive doesn't contain enough substreams for entries"); 2114 } 2115 2116 final long memoryNeededInKb = estimateSize() / 1024; 2117 if (maxMemoryLimitInKb < memoryNeededInKb) { 2118 throw new MemoryLimitException(memoryNeededInKb, maxMemoryLimitInKb); 2119 } 2120 } 2121 2122 private long folderSize() { 2123 return 30; /* nested arrays are accounted for separately */ 2124 } 2125 2126 private long coderSize() { 2127 return 2 /* methodId is between 1 and four bytes currently, COPY and LZMA2 are the most common with 1 */ 2128 + 16 2129 + 4 /* properties, guess */ 2130 ; 2131 } 2132 2133 private long bindPairSize() { 2134 return 16; 2135 } 2136 2137 private long entrySize() { 2138 return 100; /* real size depends on name length, everything without name is about 70 bytes */ 2139 } 2140 2141 private long streamMapSize() { 2142 return 8 * numberOfFolders /* folderFirstPackStreamIndex, folderFirstFileIndex */ 2143 + 8 * numberOfPackedStreams /* packStreamOffsets */ 2144 + 4 * numberOfEntries /* fileFolderIndex */ 2145 ; 2146 } 2147 } 2148}