001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 */
018 package org.apache.commons.compress.archivers.zip;
019
020 import java.io.EOFException;
021 import java.io.File;
022 import java.io.IOException;
023 import java.io.InputStream;
024 import java.io.RandomAccessFile;
025 import java.util.Arrays;
026 import java.util.Collections;
027 import java.util.Comparator;
028 import java.util.Enumeration;
029 import java.util.HashMap;
030 import java.util.LinkedHashMap;
031 import java.util.Map;
032 import java.util.zip.Inflater;
033 import java.util.zip.InflaterInputStream;
034 import java.util.zip.ZipEntry;
035 import java.util.zip.ZipException;
036
037 import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
038 import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
039 import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
040 import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
041 import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
042
043 /**
044 * Replacement for <code>java.util.ZipFile</code>.
045 *
046 * <p>This class adds support for file name encodings other than UTF-8
047 * (which is required to work on ZIP files created by native zip tools
048 * and is able to skip a preamble like the one found in self
049 * extracting archives. Furthermore it returns instances of
050 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
051 * instead of <code>java.util.zip.ZipEntry</code>.</p>
052 *
053 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
054 * have to reimplement all methods anyway. Like
055 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
056 * covers and supports compressed and uncompressed entries. As of
057 * Apache Commons Compress it also transparently supports Zip64
058 * extensions and thus individual entries and archives larger than 4
059 * GB or with more than 65536 entries.</p>
060 *
061 * <p>The method signatures mimic the ones of
062 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
063 *
064 * <ul>
065 * <li>There is no getName method.</li>
066 * <li>entries has been renamed to getEntries.</li>
067 * <li>getEntries and getEntry return
068 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
069 * instances.</li>
070 * <li>close is allowed to throw IOException.</li>
071 * </ul>
072 *
073 */
074 public class ZipFile {
075 private static final int HASH_SIZE = 509;
076 static final int NIBLET_MASK = 0x0f;
077 static final int BYTE_SHIFT = 8;
078 private static final int POS_0 = 0;
079 private static final int POS_1 = 1;
080 private static final int POS_2 = 2;
081 private static final int POS_3 = 3;
082
083 /**
084 * Maps ZipArchiveEntrys to two longs, recording the offsets of
085 * the local file headers and the start of entry data.
086 */
087 private final Map<ZipArchiveEntry, OffsetEntry> entries =
088 new LinkedHashMap<ZipArchiveEntry, OffsetEntry>(HASH_SIZE);
089
090 /**
091 * Maps String to ZipArchiveEntrys, name -> actual entry.
092 */
093 private final Map<String, ZipArchiveEntry> nameMap =
094 new HashMap<String, ZipArchiveEntry>(HASH_SIZE);
095
096 private static final class OffsetEntry {
097 private long headerOffset = -1;
098 private long dataOffset = -1;
099 }
100
101 /**
102 * The encoding to use for filenames and the file comment.
103 *
104 * <p>For a list of possible values see <a
105 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
106 * Defaults to UTF-8.</p>
107 */
108 private final String encoding;
109
110 /**
111 * The zip encoding to use for filenames and the file comment.
112 */
113 private final ZipEncoding zipEncoding;
114
115 /**
116 * File name of actual source.
117 */
118 private final String archiveName;
119
120 /**
121 * The actual data source.
122 */
123 private final RandomAccessFile archive;
124
125 /**
126 * Whether to look for and use Unicode extra fields.
127 */
128 private final boolean useUnicodeExtraFields;
129
130 /**
131 * Whether the file is closed.
132 */
133 private boolean closed;
134
135 /**
136 * Opens the given file for reading, assuming "UTF8" for file names.
137 *
138 * @param f the archive.
139 *
140 * @throws IOException if an error occurs while reading the file.
141 */
142 public ZipFile(File f) throws IOException {
143 this(f, ZipEncodingHelper.UTF8);
144 }
145
146 /**
147 * Opens the given file for reading, assuming "UTF8".
148 *
149 * @param name name of the archive.
150 *
151 * @throws IOException if an error occurs while reading the file.
152 */
153 public ZipFile(String name) throws IOException {
154 this(new File(name), ZipEncodingHelper.UTF8);
155 }
156
157 /**
158 * Opens the given file for reading, assuming the specified
159 * encoding for file names, scanning unicode extra fields.
160 *
161 * @param name name of the archive.
162 * @param encoding the encoding to use for file names, use null
163 * for the platform's default encoding
164 *
165 * @throws IOException if an error occurs while reading the file.
166 */
167 public ZipFile(String name, String encoding) throws IOException {
168 this(new File(name), encoding, true);
169 }
170
171 /**
172 * Opens the given file for reading, assuming the specified
173 * encoding for file names and scanning for unicode extra fields.
174 *
175 * @param f the archive.
176 * @param encoding the encoding to use for file names, use null
177 * for the platform's default encoding
178 *
179 * @throws IOException if an error occurs while reading the file.
180 */
181 public ZipFile(File f, String encoding) throws IOException {
182 this(f, encoding, true);
183 }
184
185 /**
186 * Opens the given file for reading, assuming the specified
187 * encoding for file names.
188 *
189 * @param f the archive.
190 * @param encoding the encoding to use for file names, use null
191 * for the platform's default encoding
192 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
193 * Extra Fields (if present) to set the file names.
194 *
195 * @throws IOException if an error occurs while reading the file.
196 */
197 public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
198 throws IOException {
199 this.archiveName = f.getAbsolutePath();
200 this.encoding = encoding;
201 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
202 this.useUnicodeExtraFields = useUnicodeExtraFields;
203 archive = new RandomAccessFile(f, "r");
204 boolean success = false;
205 try {
206 Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
207 populateFromCentralDirectory();
208 resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
209 success = true;
210 } finally {
211 if (!success) {
212 try {
213 closed = true;
214 archive.close();
215 } catch (IOException e2) { // NOPMD
216 // swallow, throw the original exception instead
217 }
218 }
219 }
220 }
221
222 /**
223 * The encoding to use for filenames and the file comment.
224 *
225 * @return null if using the platform's default character encoding.
226 */
227 public String getEncoding() {
228 return encoding;
229 }
230
231 /**
232 * Closes the archive.
233 * @throws IOException if an error occurs closing the archive.
234 */
235 public void close() throws IOException {
236 // this flag is only written here and read in finalize() which
237 // can never be run in parallel.
238 // no synchronization needed.
239 closed = true;
240
241 archive.close();
242 }
243
244 /**
245 * close a zipfile quietly; throw no io fault, do nothing
246 * on a null parameter
247 * @param zipfile file to close, can be null
248 */
249 public static void closeQuietly(ZipFile zipfile) {
250 if (zipfile != null) {
251 try {
252 zipfile.close();
253 } catch (IOException e) { // NOPMD
254 //ignore, that's why the method is called "quietly"
255 }
256 }
257 }
258
259 /**
260 * Returns all entries.
261 *
262 * <p>Entries will be returned in the same order they appear
263 * within the archive's central directory.</p>
264 *
265 * @return all entries as {@link ZipArchiveEntry} instances
266 */
267 public Enumeration<ZipArchiveEntry> getEntries() {
268 return Collections.enumeration(entries.keySet());
269 }
270
271 /**
272 * Returns all entries in physical order.
273 *
274 * <p>Entries will be returned in the same order their contents
275 * appear within the archive.</p>
276 *
277 * @return all entries as {@link ZipArchiveEntry} instances
278 *
279 * @since Commons Compress 1.1
280 */
281 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
282 ZipArchiveEntry[] allEntries =
283 entries.keySet().toArray(new ZipArchiveEntry[0]);
284 Arrays.sort(allEntries, OFFSET_COMPARATOR);
285 return Collections.enumeration(Arrays.asList(allEntries));
286 }
287
288 /**
289 * Returns a named entry - or {@code null} if no entry by
290 * that name exists.
291 * @param name name of the entry.
292 * @return the ZipArchiveEntry corresponding to the given name - or
293 * {@code null} if not present.
294 */
295 public ZipArchiveEntry getEntry(String name) {
296 return nameMap.get(name);
297 }
298
299 /**
300 * Whether this class is able to read the given entry.
301 *
302 * <p>May return false if it is set up to use encryption or a
303 * compression method that hasn't been implemented yet.</p>
304 * @since 1.1
305 */
306 public boolean canReadEntryData(ZipArchiveEntry ze) {
307 return ZipUtil.canHandleEntryData(ze);
308 }
309
310 /**
311 * Returns an InputStream for reading the contents of the given entry.
312 *
313 * @param ze the entry to get the stream for.
314 * @return a stream to read the entry from.
315 * @throws IOException if unable to create an input stream from the zipenty
316 * @throws ZipException if the zipentry uses an unsupported feature
317 */
318 public InputStream getInputStream(ZipArchiveEntry ze)
319 throws IOException, ZipException {
320 OffsetEntry offsetEntry = entries.get(ze);
321 if (offsetEntry == null) {
322 return null;
323 }
324 ZipUtil.checkRequestedFeatures(ze);
325 long start = offsetEntry.dataOffset;
326 BoundedInputStream bis =
327 new BoundedInputStream(start, ze.getCompressedSize());
328 switch (ze.getMethod()) {
329 case ZipEntry.STORED:
330 return bis;
331 case ZipEntry.DEFLATED:
332 bis.addDummy();
333 final Inflater inflater = new Inflater(true);
334 return new InflaterInputStream(bis, inflater) {
335 @Override
336 public void close() throws IOException {
337 super.close();
338 inflater.end();
339 }
340 };
341 default:
342 throw new ZipException("Found unsupported compression method "
343 + ze.getMethod());
344 }
345 }
346
347 /**
348 * Ensures that the close method of this zipfile is called when
349 * there are no more references to it.
350 * @see #close()
351 */
352 @Override
353 protected void finalize() throws Throwable {
354 try {
355 if (!closed) {
356 System.err.println("Cleaning up unclosed ZipFile for archive "
357 + archiveName);
358 close();
359 }
360 } finally {
361 super.finalize();
362 }
363 }
364
365 /**
366 * Length of a "central directory" entry structure without file
367 * name, extra fields or comment.
368 */
369 private static final int CFH_LEN =
370 /* version made by */ SHORT
371 /* version needed to extract */ + SHORT
372 /* general purpose bit flag */ + SHORT
373 /* compression method */ + SHORT
374 /* last mod file time */ + SHORT
375 /* last mod file date */ + SHORT
376 /* crc-32 */ + WORD
377 /* compressed size */ + WORD
378 /* uncompressed size */ + WORD
379 /* filename length */ + SHORT
380 /* extra field length */ + SHORT
381 /* file comment length */ + SHORT
382 /* disk number start */ + SHORT
383 /* internal file attributes */ + SHORT
384 /* external file attributes */ + WORD
385 /* relative offset of local header */ + WORD;
386
387 private static final long CFH_SIG =
388 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
389
390 /**
391 * Reads the central directory of the given archive and populates
392 * the internal tables with ZipArchiveEntry instances.
393 *
394 * <p>The ZipArchiveEntrys will know all data that can be obtained from
395 * the central directory alone, but not the data that requires the
396 * local file header or additional data to be read.</p>
397 *
398 * @return a map of zipentries that didn't have the language
399 * encoding flag set when read.
400 */
401 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
402 throws IOException {
403 HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
404 new HashMap<ZipArchiveEntry, NameAndComment>();
405
406 positionAtCentralDirectory();
407
408 byte[] signatureBytes = new byte[WORD];
409 archive.readFully(signatureBytes);
410 long sig = ZipLong.getValue(signatureBytes);
411
412 if (sig != CFH_SIG && startsWithLocalFileHeader()) {
413 throw new IOException("central directory is empty, can't expand"
414 + " corrupt archive.");
415 }
416
417 while (sig == CFH_SIG) {
418 readCentralDirectoryEntry(noUTF8Flag);
419 archive.readFully(signatureBytes);
420 sig = ZipLong.getValue(signatureBytes);
421 }
422 return noUTF8Flag;
423 }
424
425 /**
426 * Reads an individual entry of the central directory, creats an
427 * ZipArchiveEntry from it and adds it to the global maps.
428 *
429 * @param noUTF8Flag map used to collect entries that don't have
430 * their UTF-8 flag set and whose name will be set by data read
431 * from the local file header later. The current entry may be
432 * added to this map.
433 */
434 private void
435 readCentralDirectoryEntry(Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
436 throws IOException {
437 byte[] cfh = new byte[CFH_LEN];
438
439 archive.readFully(cfh);
440 int off = 0;
441 ZipArchiveEntry ze = new ZipArchiveEntry();
442
443 int versionMadeBy = ZipShort.getValue(cfh, off);
444 off += SHORT;
445 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
446
447 off += SHORT; // skip version info
448
449 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfh, off);
450 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
451 final ZipEncoding entryEncoding =
452 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
453 ze.setGeneralPurposeBit(gpFlag);
454
455 off += SHORT;
456
457 ze.setMethod(ZipShort.getValue(cfh, off));
458 off += SHORT;
459
460 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfh, off));
461 ze.setTime(time);
462 off += WORD;
463
464 ze.setCrc(ZipLong.getValue(cfh, off));
465 off += WORD;
466
467 ze.setCompressedSize(ZipLong.getValue(cfh, off));
468 off += WORD;
469
470 ze.setSize(ZipLong.getValue(cfh, off));
471 off += WORD;
472
473 int fileNameLen = ZipShort.getValue(cfh, off);
474 off += SHORT;
475
476 int extraLen = ZipShort.getValue(cfh, off);
477 off += SHORT;
478
479 int commentLen = ZipShort.getValue(cfh, off);
480 off += SHORT;
481
482 int diskStart = ZipShort.getValue(cfh, off);
483 off += SHORT;
484
485 ze.setInternalAttributes(ZipShort.getValue(cfh, off));
486 off += SHORT;
487
488 ze.setExternalAttributes(ZipLong.getValue(cfh, off));
489 off += WORD;
490
491 byte[] fileName = new byte[fileNameLen];
492 archive.readFully(fileName);
493 ze.setName(entryEncoding.decode(fileName), fileName);
494
495 // LFH offset,
496 OffsetEntry offset = new OffsetEntry();
497 offset.headerOffset = ZipLong.getValue(cfh, off);
498 // data offset will be filled later
499 entries.put(ze, offset);
500
501 nameMap.put(ze.getName(), ze);
502
503 byte[] cdExtraData = new byte[extraLen];
504 archive.readFully(cdExtraData);
505 ze.setCentralDirectoryExtra(cdExtraData);
506
507 setSizesAndOffsetFromZip64Extra(ze, offset, diskStart);
508
509 byte[] comment = new byte[commentLen];
510 archive.readFully(comment);
511 ze.setComment(entryEncoding.decode(comment));
512
513 if (!hasUTF8Flag && useUnicodeExtraFields) {
514 noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
515 }
516 }
517
518 /**
519 * If the entry holds a Zip64 extended information extra field,
520 * read sizes from there if the entry's sizes are set to
521 * 0xFFFFFFFFF, do the same for the offset of the local file
522 * header.
523 *
524 * <p>Ensures the Zip64 extra either knows both compressed and
525 * uncompressed size or neither of both as the internal logic in
526 * ExtraFieldUtils forces the field to create local header data
527 * even if they are never used - and here a field with only one
528 * size would be invalid.</p>
529 */
530 private void setSizesAndOffsetFromZip64Extra(ZipArchiveEntry ze,
531 OffsetEntry offset,
532 int diskStart)
533 throws IOException {
534 Zip64ExtendedInformationExtraField z64 =
535 (Zip64ExtendedInformationExtraField)
536 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
537 if (z64 != null) {
538 boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
539 boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
540 boolean hasRelativeHeaderOffset =
541 offset.headerOffset == ZIP64_MAGIC;
542 z64.reparseCentralDirectoryData(hasUncompressedSize,
543 hasCompressedSize,
544 hasRelativeHeaderOffset,
545 diskStart == ZIP64_MAGIC_SHORT);
546
547 if (hasUncompressedSize) {
548 ze.setSize(z64.getSize().getLongValue());
549 } else if (hasCompressedSize) {
550 z64.setSize(new ZipEightByteInteger(ze.getSize()));
551 }
552
553 if (hasCompressedSize) {
554 ze.setCompressedSize(z64.getCompressedSize().getLongValue());
555 } else if (hasUncompressedSize) {
556 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
557 }
558
559 if (hasRelativeHeaderOffset) {
560 offset.headerOffset =
561 z64.getRelativeHeaderOffset().getLongValue();
562 }
563 }
564 }
565
566 /**
567 * Length of the "End of central directory record" - which is
568 * supposed to be the last structure of the archive - without file
569 * comment.
570 */
571 private static final int MIN_EOCD_SIZE =
572 /* end of central dir signature */ WORD
573 /* number of this disk */ + SHORT
574 /* number of the disk with the */
575 /* start of the central directory */ + SHORT
576 /* total number of entries in */
577 /* the central dir on this disk */ + SHORT
578 /* total number of entries in */
579 /* the central dir */ + SHORT
580 /* size of the central directory */ + WORD
581 /* offset of start of central */
582 /* directory with respect to */
583 /* the starting disk number */ + WORD
584 /* zipfile comment length */ + SHORT;
585
586 /**
587 * Maximum length of the "End of central directory record" with a
588 * file comment.
589 */
590 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
591 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
592
593 /**
594 * Offset of the field that holds the location of the first
595 * central directory entry inside the "End of central directory
596 * record" relative to the start of the "End of central directory
597 * record".
598 */
599 private static final int CFD_LOCATOR_OFFSET =
600 /* end of central dir signature */ WORD
601 /* number of this disk */ + SHORT
602 /* number of the disk with the */
603 /* start of the central directory */ + SHORT
604 /* total number of entries in */
605 /* the central dir on this disk */ + SHORT
606 /* total number of entries in */
607 /* the central dir */ + SHORT
608 /* size of the central directory */ + WORD;
609
610 /**
611 * Length of the "Zip64 end of central directory locator" - which
612 * should be right in front of the "end of central directory
613 * record" if one is present at all.
614 */
615 private static final int ZIP64_EOCDL_LENGTH =
616 /* zip64 end of central dir locator sig */ WORD
617 /* number of the disk with the start */
618 /* start of the zip64 end of */
619 /* central directory */ + WORD
620 /* relative offset of the zip64 */
621 /* end of central directory record */ + DWORD
622 /* total number of disks */ + WORD;
623
624 /**
625 * Offset of the field that holds the location of the "Zip64 end
626 * of central directory record" inside the "Zip64 end of central
627 * directory locator" relative to the start of the "Zip64 end of
628 * central directory locator".
629 */
630 private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
631 /* zip64 end of central dir locator sig */ WORD
632 /* number of the disk with the start */
633 /* start of the zip64 end of */
634 /* central directory */ + WORD;
635
636 /**
637 * Offset of the field that holds the location of the first
638 * central directory entry inside the "Zip64 end of central
639 * directory record" relative to the start of the "Zip64 end of
640 * central directory record".
641 */
642 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
643 /* zip64 end of central dir */
644 /* signature */ WORD
645 /* size of zip64 end of central */
646 /* directory record */ + DWORD
647 /* version made by */ + SHORT
648 /* version needed to extract */ + SHORT
649 /* number of this disk */ + WORD
650 /* number of the disk with the */
651 /* start of the central directory */ + WORD
652 /* total number of entries in the */
653 /* central directory on this disk */ + DWORD
654 /* total number of entries in the */
655 /* central directory */ + DWORD
656 /* size of the central directory */ + DWORD;
657
658 /**
659 * Searches for either the "Zip64 end of central directory
660 * locator" or the "End of central dir record", parses
661 * it and positions the stream at the first central directory
662 * record.
663 */
664 private void positionAtCentralDirectory()
665 throws IOException {
666 boolean found = tryToLocateSignature(MIN_EOCD_SIZE + ZIP64_EOCDL_LENGTH,
667 MAX_EOCD_SIZE + ZIP64_EOCDL_LENGTH,
668 ZipArchiveOutputStream
669 .ZIP64_EOCD_LOC_SIG);
670 if (!found) {
671 // not a ZIP64 archive
672 positionAtCentralDirectory32();
673 } else {
674 positionAtCentralDirectory64();
675 }
676 }
677
678 /**
679 * Parses the "Zip64 end of central directory locator",
680 * finds the "Zip64 end of central directory record" using the
681 * parsed information, parses that and positions the stream at the
682 * first central directory record.
683 */
684 private void positionAtCentralDirectory64()
685 throws IOException {
686 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET);
687 byte[] zip64EocdOffset = new byte[DWORD];
688 archive.readFully(zip64EocdOffset);
689 archive.seek(ZipEightByteInteger.getLongValue(zip64EocdOffset));
690 byte[] sig = new byte[WORD];
691 archive.readFully(sig);
692 if (sig[POS_0] != ZipArchiveOutputStream.ZIP64_EOCD_SIG[POS_0]
693 || sig[POS_1] != ZipArchiveOutputStream.ZIP64_EOCD_SIG[POS_1]
694 || sig[POS_2] != ZipArchiveOutputStream.ZIP64_EOCD_SIG[POS_2]
695 || sig[POS_3] != ZipArchiveOutputStream.ZIP64_EOCD_SIG[POS_3]
696 ) {
697 throw new ZipException("archive's ZIP64 end of central "
698 + "directory locator is corrupt.");
699 }
700 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
701 - WORD /* signature has already been read */);
702 byte[] cfdOffset = new byte[DWORD];
703 archive.readFully(cfdOffset);
704 archive.seek(ZipEightByteInteger.getLongValue(cfdOffset));
705 }
706
707 /**
708 * Searches for the "End of central dir record", parses
709 * it and positions the stream at the first central directory
710 * record.
711 */
712 private void positionAtCentralDirectory32()
713 throws IOException {
714 boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
715 ZipArchiveOutputStream.EOCD_SIG);
716 if (!found) {
717 throw new ZipException("archive is not a ZIP archive");
718 }
719 skipBytes(CFD_LOCATOR_OFFSET);
720 byte[] cfdOffset = new byte[WORD];
721 archive.readFully(cfdOffset);
722 archive.seek(ZipLong.getValue(cfdOffset));
723 }
724
725 /**
726 * Searches the archive backwards from minDistance to maxDistance
727 * for the given signature, positions the RandomaccessFile right
728 * at the signature if it has been found.
729 */
730 private boolean tryToLocateSignature(long minDistanceFromEnd,
731 long maxDistanceFromEnd,
732 byte[] sig) throws IOException {
733 boolean found = false;
734 long off = archive.length() - minDistanceFromEnd;
735 final long stopSearching =
736 Math.max(0L, archive.length() - maxDistanceFromEnd);
737 if (off >= 0) {
738 for (; off >= stopSearching; off--) {
739 archive.seek(off);
740 int curr = archive.read();
741 if (curr == -1) {
742 break;
743 }
744 if (curr == sig[POS_0]) {
745 curr = archive.read();
746 if (curr == sig[POS_1]) {
747 curr = archive.read();
748 if (curr == sig[POS_2]) {
749 curr = archive.read();
750 if (curr == sig[POS_3]) {
751 found = true;
752 break;
753 }
754 }
755 }
756 }
757 }
758 }
759 if (found) {
760 archive.seek(off);
761 }
762 return found;
763 }
764
765 /**
766 * Skips the given number of bytes or throws an EOFException if
767 * skipping failed.
768 */
769 private void skipBytes(final int count) throws IOException {
770 int totalSkipped = 0;
771 while (totalSkipped < count) {
772 int skippedNow = archive.skipBytes(count - totalSkipped);
773 if (skippedNow <= 0) {
774 throw new EOFException();
775 }
776 totalSkipped += skippedNow;
777 }
778 }
779
780 /**
781 * Number of bytes in local file header up to the "length of
782 * filename" entry.
783 */
784 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
785 /* local file header signature */ WORD
786 /* version needed to extract */ + SHORT
787 /* general purpose bit flag */ + SHORT
788 /* compression method */ + SHORT
789 /* last mod file time */ + SHORT
790 /* last mod file date */ + SHORT
791 /* crc-32 */ + WORD
792 /* compressed size */ + WORD
793 /* uncompressed size */ + WORD;
794
795 /**
796 * Walks through all recorded entries and adds the data available
797 * from the local file header.
798 *
799 * <p>Also records the offsets for the data to read from the
800 * entries.</p>
801 */
802 private void resolveLocalFileHeaderData(Map<ZipArchiveEntry, NameAndComment>
803 entriesWithoutUTF8Flag)
804 throws IOException {
805 // changing the name of a ZipArchiveEntry is going to change
806 // the hashcode - see COMPRESS-164
807 // Map needs to be reconstructed in order to keep central
808 // directory order
809 Map<ZipArchiveEntry, OffsetEntry> origMap =
810 new LinkedHashMap<ZipArchiveEntry, OffsetEntry>(entries);
811 entries.clear();
812 for (Map.Entry<ZipArchiveEntry, OffsetEntry> ent : origMap.entrySet()) {
813 ZipArchiveEntry ze = ent.getKey();
814 OffsetEntry offsetEntry = ent.getValue();
815 long offset = offsetEntry.headerOffset;
816 archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
817 byte[] b = new byte[SHORT];
818 archive.readFully(b);
819 int fileNameLen = ZipShort.getValue(b);
820 archive.readFully(b);
821 int extraFieldLen = ZipShort.getValue(b);
822 int lenToSkip = fileNameLen;
823 while (lenToSkip > 0) {
824 int skipped = archive.skipBytes(lenToSkip);
825 if (skipped <= 0) {
826 throw new IOException("failed to skip file name in"
827 + " local file header");
828 }
829 lenToSkip -= skipped;
830 }
831 byte[] localExtraData = new byte[extraFieldLen];
832 archive.readFully(localExtraData);
833 ze.setExtra(localExtraData);
834 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
835 + SHORT + SHORT + fileNameLen + extraFieldLen;
836
837 if (entriesWithoutUTF8Flag.containsKey(ze)) {
838 String orig = ze.getName();
839 NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
840 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
841 nc.comment);
842 if (!orig.equals(ze.getName())) {
843 nameMap.remove(orig);
844 nameMap.put(ze.getName(), ze);
845 }
846 }
847 entries.put(ze, offsetEntry);
848 }
849 }
850
851 /**
852 * Checks whether the archive starts with a LFH. If it doesn't,
853 * it may be an empty archive.
854 */
855 private boolean startsWithLocalFileHeader() throws IOException {
856 archive.seek(0);
857 final byte[] start = new byte[WORD];
858 archive.readFully(start);
859 for (int i = 0; i < start.length; i++) {
860 if (start[i] != ZipArchiveOutputStream.LFH_SIG[i]) {
861 return false;
862 }
863 }
864 return true;
865 }
866
867 /**
868 * InputStream that delegates requests to the underlying
869 * RandomAccessFile, making sure that only bytes from a certain
870 * range can be read.
871 */
872 private class BoundedInputStream extends InputStream {
873 private long remaining;
874 private long loc;
875 private boolean addDummyByte = false;
876
877 BoundedInputStream(long start, long remaining) {
878 this.remaining = remaining;
879 loc = start;
880 }
881
882 @Override
883 public int read() throws IOException {
884 if (remaining-- <= 0) {
885 if (addDummyByte) {
886 addDummyByte = false;
887 return 0;
888 }
889 return -1;
890 }
891 synchronized (archive) {
892 archive.seek(loc++);
893 return archive.read();
894 }
895 }
896
897 @Override
898 public int read(byte[] b, int off, int len) throws IOException {
899 if (remaining <= 0) {
900 if (addDummyByte) {
901 addDummyByte = false;
902 b[off] = 0;
903 return 1;
904 }
905 return -1;
906 }
907
908 if (len <= 0) {
909 return 0;
910 }
911
912 if (len > remaining) {
913 len = (int) remaining;
914 }
915 int ret = -1;
916 synchronized (archive) {
917 archive.seek(loc);
918 ret = archive.read(b, off, len);
919 }
920 if (ret > 0) {
921 loc += ret;
922 remaining -= ret;
923 }
924 return ret;
925 }
926
927 /**
928 * Inflater needs an extra dummy byte for nowrap - see
929 * Inflater's javadocs.
930 */
931 void addDummy() {
932 addDummyByte = true;
933 }
934 }
935
936 private static final class NameAndComment {
937 private final byte[] name;
938 private final byte[] comment;
939 private NameAndComment(byte[] name, byte[] comment) {
940 this.name = name;
941 this.comment = comment;
942 }
943 }
944
945 /**
946 * Compares two ZipArchiveEntries based on their offset within the archive.
947 *
948 * <p>Won't return any meaningful results if one of the entries
949 * isn't part of the archive at all.</p>
950 *
951 * @since Commons Compress 1.1
952 */
953 private final Comparator<ZipArchiveEntry> OFFSET_COMPARATOR =
954 new Comparator<ZipArchiveEntry>() {
955 public int compare(ZipArchiveEntry e1, ZipArchiveEntry e2) {
956 if (e1 == e2) {
957 return 0;
958 }
959
960 OffsetEntry off1 = entries.get(e1);
961 OffsetEntry off2 = entries.get(e2);
962 if (off1 == null) {
963 return 1;
964 }
965 if (off2 == null) {
966 return -1;
967 }
968 long val = (off1.headerOffset - off2.headerOffset);
969 return val == 0 ? 0 : val < 0 ? -1 : +1;
970 }
971 };
972 }