001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019 package org.apache.commons.compress.archivers.zip;
020
021 import java.io.EOFException;
022 import java.io.IOException;
023 import java.io.InputStream;
024 import java.io.PushbackInputStream;
025 import java.util.zip.CRC32;
026 import java.util.zip.DataFormatException;
027 import java.util.zip.Inflater;
028 import java.util.zip.ZipException;
029
030 import org.apache.commons.compress.archivers.ArchiveEntry;
031 import org.apache.commons.compress.archivers.ArchiveInputStream;
032
033 /**
034 * Implements an input stream that can read Zip archives.
035 * <p>
036 * Note that {@link ZipArchiveEntry#getSize()} may return -1 if the DEFLATE algorithm is used, as the size information
037 * is not available from the header.
038 * <p>
039 * The {@link ZipFile} class is preferred when reading from files.
040 *
041 * @see ZipFile
042 * @NotThreadSafe
043 */
044 public class ZipArchiveInputStream extends ArchiveInputStream {
045
046 private static final int SHORT = 2;
047 private static final int WORD = 4;
048
049 /**
050 * The zip encoding to use for filenames and the file comment.
051 */
052 private final ZipEncoding zipEncoding;
053
054 /**
055 * Whether to look for and use Unicode extra fields.
056 */
057 private final boolean useUnicodeExtraFields;
058
059 private final InputStream in;
060
061 private final Inflater inf = new Inflater(true);
062 private final CRC32 crc = new CRC32();
063
064 private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE];
065
066 private ZipArchiveEntry current = null;
067 private boolean closed = false;
068 private boolean hitCentralDirectory = false;
069 private int readBytesOfEntry = 0, offsetInBuffer = 0;
070 private int bytesReadFromStream = 0;
071 private int lengthOfLastRead = 0;
072 private boolean hasDataDescriptor = false;
073
074 private static final int LFH_LEN = 30;
075 /*
076 local file header signature 4 bytes (0x04034b50)
077 version needed to extract 2 bytes
078 general purpose bit flag 2 bytes
079 compression method 2 bytes
080 last mod file time 2 bytes
081 last mod file date 2 bytes
082 crc-32 4 bytes
083 compressed size 4 bytes
084 uncompressed size 4 bytes
085 file name length 2 bytes
086 extra field length 2 bytes
087 */
088
089 public ZipArchiveInputStream(InputStream inputStream) {
090 this(inputStream, ZipEncodingHelper.UTF8, true);
091 }
092
093 /**
094 * @param encoding the encoding to use for file names, use null
095 * for the platform's default encoding
096 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
097 * Extra Fields (if present) to set the file names.
098 */
099 public ZipArchiveInputStream(InputStream inputStream,
100 String encoding,
101 boolean useUnicodeExtraFields) {
102 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
103 this.useUnicodeExtraFields = useUnicodeExtraFields;
104 in = new PushbackInputStream(inputStream, buf.length);
105 }
106
107 public ZipArchiveEntry getNextZipEntry() throws IOException {
108 if (closed || hitCentralDirectory) {
109 return null;
110 }
111 if (current != null) {
112 closeEntry();
113 }
114 byte[] lfh = new byte[LFH_LEN];
115 try {
116 readFully(lfh);
117 } catch (EOFException e) {
118 return null;
119 }
120 ZipLong sig = new ZipLong(lfh);
121 if (sig.equals(ZipLong.CFH_SIG)) {
122 hitCentralDirectory = true;
123 return null;
124 }
125 if (!sig.equals(ZipLong.LFH_SIG)) {
126 return null;
127 }
128
129 int off = WORD;
130 current = new ZipArchiveEntry();
131
132 int versionMadeBy = ZipShort.getValue(lfh, off);
133 off += SHORT;
134 current.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT)
135 & ZipFile.NIBLET_MASK);
136
137 final int generalPurposeFlag = ZipShort.getValue(lfh, off);
138 final boolean hasEFS =
139 (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0;
140 final ZipEncoding entryEncoding =
141 hasEFS ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
142 hasDataDescriptor = (generalPurposeFlag & 8) != 0;
143
144 off += SHORT;
145
146 current.setMethod(ZipShort.getValue(lfh, off));
147 off += SHORT;
148
149 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfh, off));
150 current.setTime(time);
151 off += WORD;
152
153 if (!hasDataDescriptor) {
154 current.setCrc(ZipLong.getValue(lfh, off));
155 off += WORD;
156
157 current.setCompressedSize(ZipLong.getValue(lfh, off));
158 off += WORD;
159
160 current.setSize(ZipLong.getValue(lfh, off));
161 off += WORD;
162 } else {
163 off += 3 * WORD;
164 }
165
166 int fileNameLen = ZipShort.getValue(lfh, off);
167
168 off += SHORT;
169
170 int extraLen = ZipShort.getValue(lfh, off);
171 off += SHORT;
172
173 byte[] fileName = new byte[fileNameLen];
174 readFully(fileName);
175 current.setName(entryEncoding.decode(fileName));
176
177 byte[] extraData = new byte[extraLen];
178 readFully(extraData);
179 current.setExtra(extraData);
180
181 if (!hasEFS && useUnicodeExtraFields) {
182 ZipUtil.setNameAndCommentFromExtraFields(current, fileName, null);
183 }
184 return current;
185 }
186
187 public ArchiveEntry getNextEntry() throws IOException {
188 return getNextZipEntry();
189 }
190
191 public int read(byte[] buffer, int start, int length) throws IOException {
192 if (closed) {
193 throw new IOException("The stream is closed");
194 }
195 if (inf.finished() || current == null) {
196 return -1;
197 }
198
199 // avoid int overflow, check null buffer
200 if (start <= buffer.length && length >= 0 && start >= 0
201 && buffer.length - start >= length) {
202 if (current.getMethod() == ZipArchiveOutputStream.STORED) {
203 int csize = (int) current.getSize();
204 if (readBytesOfEntry >= csize) {
205 return -1;
206 }
207 if (offsetInBuffer >= lengthOfLastRead) {
208 offsetInBuffer = 0;
209 if ((lengthOfLastRead = in.read(buf)) == -1) {
210 return -1;
211 }
212 count(lengthOfLastRead);
213 bytesReadFromStream += lengthOfLastRead;
214 }
215 int toRead = length > lengthOfLastRead
216 ? lengthOfLastRead - offsetInBuffer
217 : length;
218 if ((csize - readBytesOfEntry) < toRead) {
219 toRead = csize - readBytesOfEntry;
220 }
221 System.arraycopy(buf, offsetInBuffer, buffer, start, toRead);
222 offsetInBuffer += toRead;
223 readBytesOfEntry += toRead;
224 crc.update(buffer, start, toRead);
225 return toRead;
226 }
227 if (inf.needsInput()) {
228 fill();
229 if (lengthOfLastRead > 0) {
230 bytesReadFromStream += lengthOfLastRead;
231 }
232 }
233 int read = 0;
234 try {
235 read = inf.inflate(buffer, start, length);
236 } catch (DataFormatException e) {
237 throw new ZipException(e.getMessage());
238 }
239 if (read == 0 && inf.finished()) {
240 return -1;
241 }
242 crc.update(buffer, start, read);
243 return read;
244 }
245 throw new ArrayIndexOutOfBoundsException();
246 }
247
248 public void close() throws IOException {
249 if (!closed) {
250 closed = true;
251 in.close();
252 }
253 }
254
255 public long skip(long value) throws IOException {
256 if (value >= 0) {
257 long skipped = 0;
258 byte[] b = new byte[1024];
259 while (skipped != value) {
260 long rem = value - skipped;
261 int x = read(b, 0, (int) (b.length > rem ? rem : b.length));
262 if (x == -1) {
263 return skipped;
264 }
265 skipped += x;
266 }
267 return skipped;
268 }
269 throw new IllegalArgumentException();
270 }
271
272 /*
273 * This test assumes that the zip file does not have any additional leading content,
274 * which is something that is allowed by the specification (e.g. self-extracting zips)
275 */
276 public static boolean matches(byte[] signature, int length) {
277 if (length < ZipArchiveOutputStream.LFH_SIG.length) {
278 return false;
279 }
280
281 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
282 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG); // empty zip
283 }
284
285 private static boolean checksig(byte[] signature, byte[] expected){
286 for (int i = 0; i < expected.length; i++) {
287 if (signature[i] != expected[i]) {
288 return false;
289 }
290 }
291 return true;
292 }
293
294 private void closeEntry() throws IOException {
295 if (closed) {
296 throw new IOException("The stream is closed");
297 }
298 if (current == null) {
299 return;
300 }
301 // Ensure all entry bytes are read
302 skip(Long.MAX_VALUE);
303 int inB;
304 if (current.getMethod() == ZipArchiveOutputStream.DEFLATED) {
305 inB = inf.getTotalIn();
306 } else {
307 inB = readBytesOfEntry;
308 }
309 int diff = 0;
310
311 // Pushback any required bytes
312 if ((diff = bytesReadFromStream - inB) != 0) {
313 ((PushbackInputStream) in).unread(buf,
314 lengthOfLastRead - diff, diff);
315 }
316
317 if (hasDataDescriptor) {
318 readFully(new byte[4 * WORD]);
319 }
320
321 inf.reset();
322 readBytesOfEntry = offsetInBuffer = bytesReadFromStream =
323 lengthOfLastRead = 0;
324 crc.reset();
325 current = null;
326 }
327
328 private void fill() throws IOException {
329 if (closed) {
330 throw new IOException("The stream is closed");
331 }
332 if ((lengthOfLastRead = in.read(buf)) > 0) {
333 inf.setInput(buf, 0, lengthOfLastRead);
334 }
335 }
336
337 private void readFully(byte[] b) throws IOException {
338 int count = 0, x = 0;
339 while (count != b.length) {
340 count += x = in.read(b, count, b.length - count);
341 if (x == -1) {
342 throw new EOFException();
343 }
344 }
345 }
346 }