001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 */
018 package org.apache.commons.compress.archivers.zip;
019
020 import java.io.File;
021 import java.io.IOException;
022 import java.io.InputStream;
023 import java.io.RandomAccessFile;
024 import java.util.Collections;
025 import java.util.Enumeration;
026 import java.util.HashMap;
027 import java.util.Map;
028 import java.util.zip.Inflater;
029 import java.util.zip.InflaterInputStream;
030 import java.util.zip.ZipException;
031
032 /**
033 * Replacement for <code>java.util.ZipFile</code>.
034 *
035 * <p>This class adds support for file name encodings other than UTF-8
036 * (which is required to work on ZIP files created by native zip tools
037 * and is able to skip a preamble like the one found in self
038 * extracting archives. Furthermore it returns instances of
039 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
040 * instead of <code>java.util.zip.ZipEntry</code>.</p>
041 *
042 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
043 * have to reimplement all methods anyway. Like
044 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
045 * covers and supports compressed and uncompressed entries.</p>
046 *
047 * <p>The method signatures mimic the ones of
048 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
049 *
050 * <ul>
051 * <li>There is no getName method.</li>
052 * <li>entries has been renamed to getEntries.</li>
053 * <li>getEntries and getEntry return
054 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
055 * instances.</li>
056 * <li>close is allowed to throw IOException.</li>
057 * </ul>
058 *
059 */
060 public class ZipFile {
061 private static final int HASH_SIZE = 509;
062 private static final int SHORT = 2;
063 private static final int WORD = 4;
064 static final int NIBLET_MASK = 0x0f;
065 static final int BYTE_SHIFT = 8;
066 private static final int POS_0 = 0;
067 private static final int POS_1 = 1;
068 private static final int POS_2 = 2;
069 private static final int POS_3 = 3;
070
071 /**
072 * Maps ZipArchiveEntrys to Longs, recording the offsets of the local
073 * file headers.
074 */
075 private final Map entries = new HashMap(HASH_SIZE);
076
077 /**
078 * Maps String to ZipArchiveEntrys, name -> actual entry.
079 */
080 private final Map nameMap = new HashMap(HASH_SIZE);
081
082 private static final class OffsetEntry {
083 private long headerOffset = -1;
084 private long dataOffset = -1;
085 }
086
087 /**
088 * The encoding to use for filenames and the file comment.
089 *
090 * <p>For a list of possible values see <a
091 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
092 * Defaults to UTF-8.</p>
093 */
094 private final String encoding;
095
096 /**
097 * The zip encoding to use for filenames and the file comment.
098 */
099 private final ZipEncoding zipEncoding;
100
101 /**
102 * The actual data source.
103 */
104 private final RandomAccessFile archive;
105
106 /**
107 * Whether to look for and use Unicode extra fields.
108 */
109 private final boolean useUnicodeExtraFields;
110
111 /**
112 * Opens the given file for reading, assuming "UTF8" for file names.
113 *
114 * @param f the archive.
115 *
116 * @throws IOException if an error occurs while reading the file.
117 */
118 public ZipFile(File f) throws IOException {
119 this(f, ZipEncodingHelper.UTF8);
120 }
121
122 /**
123 * Opens the given file for reading, assuming "UTF8".
124 *
125 * @param name name of the archive.
126 *
127 * @throws IOException if an error occurs while reading the file.
128 */
129 public ZipFile(String name) throws IOException {
130 this(new File(name), ZipEncodingHelper.UTF8);
131 }
132
133 /**
134 * Opens the given file for reading, assuming the specified
135 * encoding for file names, scanning unicode extra fields.
136 *
137 * @param name name of the archive.
138 * @param encoding the encoding to use for file names, use null
139 * for the platform's default encoding
140 *
141 * @throws IOException if an error occurs while reading the file.
142 */
143 public ZipFile(String name, String encoding) throws IOException {
144 this(new File(name), encoding, true);
145 }
146
147 /**
148 * Opens the given file for reading, assuming the specified
149 * encoding for file names and scanning for unicode extra fields.
150 *
151 * @param f the archive.
152 * @param encoding the encoding to use for file names, use null
153 * for the platform's default encoding
154 *
155 * @throws IOException if an error occurs while reading the file.
156 */
157 public ZipFile(File f, String encoding) throws IOException {
158 this(f, encoding, true);
159 }
160
161 /**
162 * Opens the given file for reading, assuming the specified
163 * encoding for file names.
164 *
165 * @param f the archive.
166 * @param encoding the encoding to use for file names, use null
167 * for the platform's default encoding
168 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
169 * Extra Fields (if present) to set the file names.
170 *
171 * @throws IOException if an error occurs while reading the file.
172 */
173 public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
174 throws IOException {
175 this.encoding = encoding;
176 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
177 this.useUnicodeExtraFields = useUnicodeExtraFields;
178 archive = new RandomAccessFile(f, "r");
179 boolean success = false;
180 try {
181 Map entriesWithoutEFS = populateFromCentralDirectory();
182 resolveLocalFileHeaderData(entriesWithoutEFS);
183 success = true;
184 } finally {
185 if (!success) {
186 try {
187 archive.close();
188 } catch (IOException e2) {
189 // swallow, throw the original exception instead
190 }
191 }
192 }
193 }
194
195 /**
196 * The encoding to use for filenames and the file comment.
197 *
198 * @return null if using the platform's default character encoding.
199 */
200 public String getEncoding() {
201 return encoding;
202 }
203
204 /**
205 * Closes the archive.
206 * @throws IOException if an error occurs closing the archive.
207 */
208 public void close() throws IOException {
209 archive.close();
210 }
211
212 /**
213 * close a zipfile quietly; throw no io fault, do nothing
214 * on a null parameter
215 * @param zipfile file to close, can be null
216 */
217 public static void closeQuietly(ZipFile zipfile) {
218 if (zipfile != null) {
219 try {
220 zipfile.close();
221 } catch (IOException e) {
222 //ignore
223 }
224 }
225 }
226
227 /**
228 * Returns all entries.
229 * @return all entries as {@link ZipArchiveEntry} instances
230 */
231 public Enumeration getEntries() {
232 return Collections.enumeration(entries.keySet());
233 }
234
235 /**
236 * Returns a named entry - or <code>null</code> if no entry by
237 * that name exists.
238 * @param name name of the entry.
239 * @return the ZipArchiveEntry corresponding to the given name - or
240 * <code>null</code> if not present.
241 */
242 public ZipArchiveEntry getEntry(String name) {
243 return (ZipArchiveEntry) nameMap.get(name);
244 }
245
246 /**
247 * Returns an InputStream for reading the contents of the given entry.
248 * @param ze the entry to get the stream for.
249 * @return a stream to read the entry from.
250 * @throws IOException if unable to create an input stream from the zipenty
251 * @throws ZipException if the zipentry has an unsupported
252 * compression method
253 */
254 public InputStream getInputStream(ZipArchiveEntry ze)
255 throws IOException, ZipException {
256 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
257 if (offsetEntry == null) {
258 return null;
259 }
260 long start = offsetEntry.dataOffset;
261 BoundedInputStream bis =
262 new BoundedInputStream(start, ze.getCompressedSize());
263 switch (ze.getMethod()) {
264 case ZipArchiveEntry.STORED:
265 return bis;
266 case ZipArchiveEntry.DEFLATED:
267 bis.addDummy();
268 return new InflaterInputStream(bis, new Inflater(true));
269 default:
270 throw new ZipException("Found unsupported compression method "
271 + ze.getMethod());
272 }
273 }
274
275 private static final int CFH_LEN =
276 /* version made by */ SHORT
277 /* version needed to extract */ + SHORT
278 /* general purpose bit flag */ + SHORT
279 /* compression method */ + SHORT
280 /* last mod file time */ + SHORT
281 /* last mod file date */ + SHORT
282 /* crc-32 */ + WORD
283 /* compressed size */ + WORD
284 /* uncompressed size */ + WORD
285 /* filename length */ + SHORT
286 /* extra field length */ + SHORT
287 /* file comment length */ + SHORT
288 /* disk number start */ + SHORT
289 /* internal file attributes */ + SHORT
290 /* external file attributes */ + WORD
291 /* relative offset of local header */ + WORD;
292
293 /**
294 * Reads the central directory of the given archive and populates
295 * the internal tables with ZipArchiveEntry instances.
296 *
297 * <p>The ZipArchiveEntrys will know all data that can be obtained from
298 * the central directory alone, but not the data that requires the
299 * local file header or additional data to be read.</p>
300 *
301 * @return a Map<ZipArchiveEntry, NameAndComment>> of
302 * zipentries that didn't have the language encoding flag set when
303 * read.
304 */
305 private Map populateFromCentralDirectory()
306 throws IOException {
307 HashMap noEFS = new HashMap();
308
309 positionAtCentralDirectory();
310
311 byte[] cfh = new byte[CFH_LEN];
312
313 byte[] signatureBytes = new byte[WORD];
314 archive.readFully(signatureBytes);
315 long sig = ZipLong.getValue(signatureBytes);
316 final long cfhSig = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
317 if (sig != cfhSig && startsWithLocalFileHeader()) {
318 throw new IOException("central directory is empty, can't expand"
319 + " corrupt archive.");
320 }
321 while (sig == cfhSig) {
322 archive.readFully(cfh);
323 int off = 0;
324 ZipArchiveEntry ze = new ZipArchiveEntry();
325
326 int versionMadeBy = ZipShort.getValue(cfh, off);
327 off += SHORT;
328 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
329
330 off += SHORT; // skip version info
331
332 final int generalPurposeFlag = ZipShort.getValue(cfh, off);
333 final boolean hasEFS =
334 (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0;
335 final ZipEncoding entryEncoding =
336 hasEFS ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
337
338 off += SHORT;
339
340 ze.setMethod(ZipShort.getValue(cfh, off));
341 off += SHORT;
342
343 // FIXME this is actually not very cpu cycles friendly as we are converting from
344 // dos to java while the underlying Sun implementation will convert
345 // from java to dos time for internal storage...
346 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfh, off));
347 ze.setTime(time);
348 off += WORD;
349
350 ze.setCrc(ZipLong.getValue(cfh, off));
351 off += WORD;
352
353 ze.setCompressedSize(ZipLong.getValue(cfh, off));
354 off += WORD;
355
356 ze.setSize(ZipLong.getValue(cfh, off));
357 off += WORD;
358
359 int fileNameLen = ZipShort.getValue(cfh, off);
360 off += SHORT;
361
362 int extraLen = ZipShort.getValue(cfh, off);
363 off += SHORT;
364
365 int commentLen = ZipShort.getValue(cfh, off);
366 off += SHORT;
367
368 off += SHORT; // disk number
369
370 ze.setInternalAttributes(ZipShort.getValue(cfh, off));
371 off += SHORT;
372
373 ze.setExternalAttributes(ZipLong.getValue(cfh, off));
374 off += WORD;
375
376 byte[] fileName = new byte[fileNameLen];
377 archive.readFully(fileName);
378 ze.setName(entryEncoding.decode(fileName));
379
380 // LFH offset,
381 OffsetEntry offset = new OffsetEntry();
382 offset.headerOffset = ZipLong.getValue(cfh, off);
383 // data offset will be filled later
384 entries.put(ze, offset);
385
386 nameMap.put(ze.getName(), ze);
387
388 byte[] cdExtraData = new byte[extraLen];
389 archive.readFully(cdExtraData);
390 ze.setCentralDirectoryExtra(cdExtraData);
391
392 byte[] comment = new byte[commentLen];
393 archive.readFully(comment);
394 ze.setComment(entryEncoding.decode(comment));
395
396 archive.readFully(signatureBytes);
397 sig = ZipLong.getValue(signatureBytes);
398
399 if (!hasEFS && useUnicodeExtraFields) {
400 noEFS.put(ze, new NameAndComment(fileName, comment));
401 }
402 }
403 return noEFS;
404 }
405
406 private static final int MIN_EOCD_SIZE =
407 /* end of central dir signature */ WORD
408 /* number of this disk */ + SHORT
409 /* number of the disk with the */
410 /* start of the central directory */ + SHORT
411 /* total number of entries in */
412 /* the central dir on this disk */ + SHORT
413 /* total number of entries in */
414 /* the central dir */ + SHORT
415 /* size of the central directory */ + WORD
416 /* offset of start of central */
417 /* directory with respect to */
418 /* the starting disk number */ + WORD
419 /* zipfile comment length */ + SHORT;
420
421 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
422 /* maximum length of zipfile comment */ + 0xFFFF;
423
424 private static final int CFD_LOCATOR_OFFSET =
425 /* end of central dir signature */ WORD
426 /* number of this disk */ + SHORT
427 /* number of the disk with the */
428 /* start of the central directory */ + SHORT
429 /* total number of entries in */
430 /* the central dir on this disk */ + SHORT
431 /* total number of entries in */
432 /* the central dir */ + SHORT
433 /* size of the central directory */ + WORD;
434
435 /**
436 * Searches for the "End of central dir record", parses
437 * it and positions the stream at the first central directory
438 * record.
439 */
440 private void positionAtCentralDirectory()
441 throws IOException {
442 boolean found = false;
443 long off = archive.length() - MIN_EOCD_SIZE;
444 long stopSearching = Math.max(0L, archive.length() - MAX_EOCD_SIZE);
445 if (off >= 0) {
446 archive.seek(off);
447 byte[] sig = ZipArchiveOutputStream.EOCD_SIG;
448 int curr = archive.read();
449 while (off >= stopSearching && curr != -1) {
450 if (curr == sig[POS_0]) {
451 curr = archive.read();
452 if (curr == sig[POS_1]) {
453 curr = archive.read();
454 if (curr == sig[POS_2]) {
455 curr = archive.read();
456 if (curr == sig[POS_3]) {
457 found = true;
458 break;
459 }
460 }
461 }
462 }
463 archive.seek(--off);
464 curr = archive.read();
465 }
466 }
467 if (!found) {
468 throw new ZipException("archive is not a ZIP archive");
469 }
470 archive.seek(off + CFD_LOCATOR_OFFSET);
471 byte[] cfdOffset = new byte[WORD];
472 archive.readFully(cfdOffset);
473 archive.seek(ZipLong.getValue(cfdOffset));
474 }
475
476 /**
477 * Number of bytes in local file header up to the "length of
478 * filename" entry.
479 */
480 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
481 /* local file header signature */ WORD
482 /* version needed to extract */ + SHORT
483 /* general purpose bit flag */ + SHORT
484 /* compression method */ + SHORT
485 /* last mod file time */ + SHORT
486 /* last mod file date */ + SHORT
487 /* crc-32 */ + WORD
488 /* compressed size */ + WORD
489 /* uncompressed size */ + WORD;
490
491 /**
492 * Walks through all recorded entries and adds the data available
493 * from the local file header.
494 *
495 * <p>Also records the offsets for the data to read from the
496 * entries.</p>
497 */
498 private void resolveLocalFileHeaderData(Map entriesWithoutEFS)
499 throws IOException {
500 Enumeration e = getEntries();
501 while (e.hasMoreElements()) {
502 ZipArchiveEntry ze = (ZipArchiveEntry) e.nextElement();
503 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
504 long offset = offsetEntry.headerOffset;
505 archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
506 byte[] b = new byte[SHORT];
507 archive.readFully(b);
508 int fileNameLen = ZipShort.getValue(b);
509 archive.readFully(b);
510 int extraFieldLen = ZipShort.getValue(b);
511 int lenToSkip = fileNameLen;
512 while (lenToSkip > 0) {
513 int skipped = archive.skipBytes(lenToSkip);
514 if (skipped <= 0) {
515 throw new RuntimeException("failed to skip file name in"
516 + " local file header");
517 }
518 lenToSkip -= skipped;
519 }
520 byte[] localExtraData = new byte[extraFieldLen];
521 archive.readFully(localExtraData);
522 ze.setExtra(localExtraData);
523 /*dataOffsets.put(ze,
524 new Long(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
525 + SHORT + SHORT + fileNameLen + extraFieldLen));
526 */
527 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
528 + SHORT + SHORT + fileNameLen + extraFieldLen;
529
530 if (entriesWithoutEFS.containsKey(ze)) {
531 String orig = ze.getName();
532 NameAndComment nc = (NameAndComment) entriesWithoutEFS.get(ze);
533 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
534 nc.comment);
535 if (!orig.equals(ze.getName())) {
536 nameMap.remove(orig);
537 nameMap.put(ze.getName(), ze);
538 }
539 }
540 }
541 }
542
543 /**
544 * Checks whether the archive starts with a LFH. If it doesn't,
545 * it may be an empty archive.
546 */
547 private boolean startsWithLocalFileHeader() throws IOException {
548 archive.seek(0);
549 final byte[] start = new byte[WORD];
550 archive.readFully(start);
551 for (int i = 0; i < start.length; i++) {
552 if (start[i] != ZipArchiveOutputStream.LFH_SIG[i]) {
553 return false;
554 }
555 }
556 return true;
557 }
558
559 /**
560 * InputStream that delegates requests to the underlying
561 * RandomAccessFile, making sure that only bytes from a certain
562 * range can be read.
563 */
564 private class BoundedInputStream extends InputStream {
565 private long remaining;
566 private long loc;
567 private boolean addDummyByte = false;
568
569 BoundedInputStream(long start, long remaining) {
570 this.remaining = remaining;
571 loc = start;
572 }
573
574 public int read() throws IOException {
575 if (remaining-- <= 0) {
576 if (addDummyByte) {
577 addDummyByte = false;
578 return 0;
579 }
580 return -1;
581 }
582 synchronized (archive) {
583 archive.seek(loc++);
584 return archive.read();
585 }
586 }
587
588 public int read(byte[] b, int off, int len) throws IOException {
589 if (remaining <= 0) {
590 if (addDummyByte) {
591 addDummyByte = false;
592 b[off] = 0;
593 return 1;
594 }
595 return -1;
596 }
597
598 if (len <= 0) {
599 return 0;
600 }
601
602 if (len > remaining) {
603 len = (int) remaining;
604 }
605 int ret = -1;
606 synchronized (archive) {
607 archive.seek(loc);
608 ret = archive.read(b, off, len);
609 }
610 if (ret > 0) {
611 loc += ret;
612 remaining -= ret;
613 }
614 return ret;
615 }
616
617 /**
618 * Inflater needs an extra dummy byte for nowrap - see
619 * Inflater's javadocs.
620 */
621 void addDummy() {
622 addDummyByte = true;
623 }
624 }
625
626 private static final class NameAndComment {
627 private final byte[] name;
628 private final byte[] comment;
629 private NameAndComment(byte[] name, byte[] comment) {
630 this.name = name;
631 this.comment = comment;
632 }
633 }
634 }