001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 */
018
019 /*
020 * This package is based on the work done by Timothy Gerard Endres
021 * (time@ice.com) to whom the Ant project is very grateful for his great code.
022 */
023
024 package org.apache.commons.compress.archivers.tar;
025
026 import java.io.IOException;
027 import java.io.InputStream;
028 import org.apache.commons.compress.archivers.ArchiveEntry;
029 import org.apache.commons.compress.archivers.ArchiveInputStream;
030 import org.apache.commons.compress.utils.ArchiveUtils;
031
032 /**
033 * The TarInputStream reads a UNIX tar archive as an InputStream.
034 * methods are provided to position at each successive entry in
035 * the archive, and the read each entry as a normal input stream
036 * using read().
037 * @NotThreadSafe
038 */
039 public class TarArchiveInputStream extends ArchiveInputStream {
040 private static final int SMALL_BUFFER_SIZE = 256;
041 private static final int BUFFER_SIZE = 8 * 1024;
042
043 private boolean hasHitEOF;
044 private long entrySize;
045 private long entryOffset;
046 private byte[] readBuf;
047 protected final TarBuffer buffer;
048 private TarArchiveEntry currEntry;
049
050 /**
051 * Constructor for TarInputStream.
052 * @param is the input stream to use
053 */
054 public TarArchiveInputStream(InputStream is) {
055 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
056 }
057
058 /**
059 * Constructor for TarInputStream.
060 * @param is the input stream to use
061 * @param blockSize the block size to use
062 */
063 public TarArchiveInputStream(InputStream is, int blockSize) {
064 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE);
065 }
066
067 /**
068 * Constructor for TarInputStream.
069 * @param is the input stream to use
070 * @param blockSize the block size to use
071 * @param recordSize the record size to use
072 */
073 public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) {
074 this.buffer = new TarBuffer(is, blockSize, recordSize);
075 this.readBuf = null;
076 this.hasHitEOF = false;
077 }
078
079 /**
080 * Closes this stream. Calls the TarBuffer's close() method.
081 * @throws IOException on error
082 */
083 public void close() throws IOException {
084 buffer.close();
085 }
086
087 /**
088 * Get the record size being used by this stream's TarBuffer.
089 *
090 * @return The TarBuffer record size.
091 */
092 public int getRecordSize() {
093 return buffer.getRecordSize();
094 }
095
096 /**
097 * Get the available data that can be read from the current
098 * entry in the archive. This does not indicate how much data
099 * is left in the entire archive, only in the current entry.
100 * This value is determined from the entry's size header field
101 * and the amount of data already read from the current entry.
102 * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE
103 * bytes are left in the current entry in the archive.
104 *
105 * @return The number of available bytes for the current entry.
106 * @throws IOException for signature
107 */
108 public int available() throws IOException {
109 if (entrySize - entryOffset > Integer.MAX_VALUE) {
110 return Integer.MAX_VALUE;
111 }
112 return (int) (entrySize - entryOffset);
113 }
114
115 /**
116 * Skip bytes in the input buffer. This skips bytes in the
117 * current entry's data, not the entire archive, and will
118 * stop at the end of the current entry's data if the number
119 * to skip extends beyond that point.
120 *
121 * @param numToSkip The number of bytes to skip.
122 * @return the number actually skipped
123 * @throws IOException on error
124 */
125 public long skip(long numToSkip) throws IOException {
126 // REVIEW
127 // This is horribly inefficient, but it ensures that we
128 // properly skip over bytes via the TarBuffer...
129 //
130 byte[] skipBuf = new byte[BUFFER_SIZE];
131 long skip = numToSkip;
132 while (skip > 0) {
133 int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip);
134 int numRead = read(skipBuf, 0, realSkip);
135 if (numRead == -1) {
136 break;
137 }
138 skip -= numRead;
139 }
140 return (numToSkip - skip);
141 }
142
143 /**
144 * Since we do not support marking just yet, we do nothing.
145 */
146 public void reset() {
147 }
148
149 /**
150 * Get the next entry in this tar archive. This will skip
151 * over any remaining data in the current entry, if there
152 * is one, and place the input stream at the header of the
153 * next entry, and read the header and instantiate a new
154 * TarEntry from the header bytes and return that entry.
155 * If there are no more entries in the archive, null will
156 * be returned to indicate that the end of the archive has
157 * been reached.
158 *
159 * @return The next TarEntry in the archive, or null.
160 * @throws IOException on error
161 */
162 public TarArchiveEntry getNextTarEntry() throws IOException {
163 if (hasHitEOF) {
164 return null;
165 }
166
167 if (currEntry != null) {
168 long numToSkip = entrySize - entryOffset;
169
170 while (numToSkip > 0) {
171 long skipped = skip(numToSkip);
172 if (skipped <= 0) {
173 throw new RuntimeException("failed to skip current tar entry");
174 }
175 numToSkip -= skipped;
176 }
177
178 readBuf = null;
179 }
180
181 byte[] headerBuf = buffer.readRecord();
182
183 if (headerBuf == null) {
184 hasHitEOF = true;
185 } else if (buffer.isEOFRecord(headerBuf)) {
186 hasHitEOF = true;
187 }
188
189 if (hasHitEOF) {
190 currEntry = null;
191 } else {
192 currEntry = new TarArchiveEntry(headerBuf);
193 entryOffset = 0;
194 entrySize = currEntry.getSize();
195 }
196
197 if (currEntry != null && currEntry.isGNULongNameEntry()) {
198 // read in the name
199 StringBuffer longName = new StringBuffer();
200 byte[] buf = new byte[SMALL_BUFFER_SIZE];
201 int length = 0;
202 while ((length = read(buf)) >= 0) {
203 longName.append(new String(buf, 0, length));
204 }
205 getNextEntry();
206 if (currEntry == null) {
207 // Bugzilla: 40334
208 // Malformed tar file - long entry name not followed by entry
209 return null;
210 }
211 // remove trailing null terminator
212 if (longName.length() > 0
213 && longName.charAt(longName.length() - 1) == 0) {
214 longName.deleteCharAt(longName.length() - 1);
215 }
216 currEntry.setName(longName.toString());
217 }
218
219 return currEntry;
220 }
221
222 public ArchiveEntry getNextEntry() throws IOException {
223 return getNextTarEntry();
224 }
225
226 /**
227 * Reads bytes from the current tar archive entry.
228 *
229 * This method is aware of the boundaries of the current
230 * entry in the archive and will deal with them as if they
231 * were this stream's start and EOF.
232 *
233 * @param buf The buffer into which to place bytes read.
234 * @param offset The offset at which to place bytes read.
235 * @param numToRead The number of bytes to read.
236 * @return The number of bytes read, or -1 at EOF.
237 * @throws IOException on error
238 */
239 public int read(byte[] buf, int offset, int numToRead) throws IOException {
240 int totalRead = 0;
241
242 if (entryOffset >= entrySize) {
243 return -1;
244 }
245
246 if ((numToRead + entryOffset) > entrySize) {
247 numToRead = (int) (entrySize - entryOffset);
248 }
249
250 if (readBuf != null) {
251 int sz = (numToRead > readBuf.length) ? readBuf.length
252 : numToRead;
253
254 System.arraycopy(readBuf, 0, buf, offset, sz);
255
256 if (sz >= readBuf.length) {
257 readBuf = null;
258 } else {
259 int newLen = readBuf.length - sz;
260 byte[] newBuf = new byte[newLen];
261
262 System.arraycopy(readBuf, sz, newBuf, 0, newLen);
263
264 readBuf = newBuf;
265 }
266
267 totalRead += sz;
268 numToRead -= sz;
269 offset += sz;
270 }
271
272 while (numToRead > 0) {
273 byte[] rec = buffer.readRecord();
274
275 if (rec == null) {
276 // Unexpected EOF!
277 throw new IOException("unexpected EOF with " + numToRead
278 + " bytes unread. Occured at byte: " + getCount());
279 }
280 count(rec.length);
281 int sz = numToRead;
282 int recLen = rec.length;
283
284 if (recLen > sz) {
285 System.arraycopy(rec, 0, buf, offset, sz);
286
287 readBuf = new byte[recLen - sz];
288
289 System.arraycopy(rec, sz, readBuf, 0, recLen - sz);
290 } else {
291 sz = recLen;
292
293 System.arraycopy(rec, 0, buf, offset, recLen);
294 }
295
296 totalRead += sz;
297 numToRead -= sz;
298 offset += sz;
299 }
300
301 entryOffset += totalRead;
302
303 return totalRead;
304 }
305
306 protected final TarArchiveEntry getCurrentEntry() {
307 return currEntry;
308 }
309
310 protected final void setCurrentEntry(TarArchiveEntry e) {
311 currEntry = e;
312 }
313
314 protected final boolean isAtEOF() {
315 return hasHitEOF;
316 }
317
318 protected final void setAtEOF(boolean b) {
319 hasHitEOF = b;
320 }
321
322 // ArchiveInputStream
323
324 public static boolean matches(byte[] signature, int length) {
325 if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) {
326 return false;
327 }
328
329 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX,
330 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
331 &&
332 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX,
333 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
334 ){
335 return true;
336 }
337 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU,
338 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
339 &&
340 (
341 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE,
342 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
343 ||
344 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO,
345 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
346 )
347 ){
348 return true;
349 }
350 return false;
351 }
352
353 }