001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019 package org.apache.commons.compress.archivers.dump;
020
021 import org.apache.commons.compress.archivers.ArchiveException;
022 import org.apache.commons.compress.archivers.ArchiveInputStream;
023
024 import java.io.EOFException;
025 import java.io.IOException;
026 import java.io.InputStream;
027
028 import java.util.Arrays;
029 import java.util.Comparator;
030 import java.util.HashMap;
031 import java.util.Map;
032 import java.util.PriorityQueue;
033 import java.util.Queue;
034 import java.util.Stack;
035
036 /**
037 * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream.
038 * Methods are provided to position at each successive entry in
039 * the archive, and the read each entry as a normal input stream
040 * using read().
041 *
042 * @NotThreadSafe
043 */
044 public class DumpArchiveInputStream extends ArchiveInputStream {
045 private DumpArchiveSummary summary;
046 private DumpArchiveEntry active;
047 private boolean isClosed;
048 private boolean hasHitEOF;
049 private long entrySize;
050 private long entryOffset;
051 private int readIdx;
052 private byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE];
053 private byte[] blockBuffer;
054 private int recordOffset;
055 private long filepos;
056 protected TapeInputStream raw;
057
058 // map of ino -> dirent entry. We can use this to reconstruct full paths.
059 private Map<Integer, Dirent> names = new HashMap<Integer, Dirent>();
060
061 // map of ino -> (directory) entry when we're missing one or more elements in the path.
062 private Map<Integer, DumpArchiveEntry> pending = new HashMap<Integer, DumpArchiveEntry>();
063
064 // queue of (directory) entries where we now have the full path.
065 private Queue<DumpArchiveEntry> queue;
066
067 /**
068 * Constructor.
069 *
070 * @param is
071 * @throws ArchiveException
072 */
073 public DumpArchiveInputStream(InputStream is) throws ArchiveException {
074 this.raw = new TapeInputStream(is);
075 this.hasHitEOF = false;
076
077 try {
078 // read header, verify it's a dump archive.
079 byte[] headerBytes = raw.readRecord();
080
081 if (!DumpArchiveUtil.verify(headerBytes)) {
082 throw new UnrecognizedFormatException();
083 }
084
085 // get summary information
086 summary = new DumpArchiveSummary(headerBytes);
087
088 // reset buffer with actual block size.
089 raw.resetBlockSize(summary.getNTRec(), summary.isCompressed());
090
091 // allocate our read buffer.
092 blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE];
093
094 // skip past CLRI and BITS segments since we don't handle them yet.
095 readCLRI();
096 readBITS();
097 } catch (IOException ex) {
098 throw new ArchiveException(ex.getMessage(), ex);
099 }
100
101 // put in a dummy record for the root node.
102 Dirent root = new Dirent(2, 2, 4, ".");
103 names.put(Integer.valueOf(2), root);
104
105 // use priority based on queue to ensure parent directories are
106 // released first.
107 queue = new PriorityQueue<DumpArchiveEntry>(10,
108 new Comparator<DumpArchiveEntry>() {
109 public int compare(DumpArchiveEntry p, DumpArchiveEntry q) {
110 if ((p.getOriginalName() == null) || (q.getOriginalName() == null)) {
111 return Integer.MAX_VALUE;
112 }
113
114 return p.getOriginalName().compareTo(q.getOriginalName());
115 }
116 });
117 }
118
119 @Deprecated
120 @Override
121 public int getCount() {
122 return (int) getBytesRead();
123 }
124
125 @Override
126 public long getBytesRead() {
127 return raw.getBytesRead();
128 }
129
130 /**
131 * Return the archive summary information.
132 */
133 public DumpArchiveSummary getSummary() {
134 return summary;
135 }
136
137 /**
138 * Read CLRI (deleted inode) segment.
139 */
140 private void readCLRI() throws IOException {
141 byte[] readBuf = raw.readRecord();
142
143 if (!DumpArchiveUtil.verify(readBuf)) {
144 throw new InvalidFormatException();
145 }
146
147 active = DumpArchiveEntry.parse(readBuf);
148
149 if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) {
150 throw new InvalidFormatException();
151 }
152
153 // we don't do anything with this yet.
154 if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount())
155 == -1) {
156 throw new EOFException();
157 }
158 readIdx = active.getHeaderCount();
159 }
160
161 /**
162 * Read BITS segment.
163 */
164 private void readBITS() throws IOException {
165 byte[] readBuf = raw.readRecord();
166
167 if (!DumpArchiveUtil.verify(readBuf)) {
168 throw new InvalidFormatException();
169 }
170
171 active = DumpArchiveEntry.parse(readBuf);
172
173 if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) {
174 throw new InvalidFormatException();
175 }
176
177 // we don't do anything with this yet.
178 if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount())
179 == -1) {
180 throw new EOFException();
181 }
182 readIdx = active.getHeaderCount();
183 }
184
185 /**
186 * Read the next entry.
187 */
188 public DumpArchiveEntry getNextDumpEntry() throws IOException {
189 return getNextEntry();
190 }
191
192 /**
193 * Read the next entry.
194 */
195 @Override
196 public DumpArchiveEntry getNextEntry() throws IOException {
197 DumpArchiveEntry entry = null;
198 String path = null;
199
200 // is there anything in the queue?
201 if (!queue.isEmpty()) {
202 return queue.remove();
203 }
204
205 while (entry == null) {
206 if (hasHitEOF) {
207 return null;
208 }
209
210 // skip any remaining records in this segment for prior file.
211 // we might still have holes... easiest to do it
212 // block by block. We may want to revisit this if
213 // the unnecessary decompression time adds up.
214 while (readIdx < active.getHeaderCount()) {
215 if (!active.isSparseRecord(readIdx++)
216 && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) {
217 throw new EOFException();
218 }
219 }
220
221 readIdx = 0;
222 filepos = raw.getBytesRead();
223
224 byte[] headerBytes = raw.readRecord();
225
226 if (!DumpArchiveUtil.verify(headerBytes)) {
227 throw new InvalidFormatException();
228 }
229
230 active = DumpArchiveEntry.parse(headerBytes);
231
232 // skip any remaining segments for prior file.
233 while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) {
234 if (raw.skip(DumpArchiveConstants.TP_SIZE
235 * (active.getHeaderCount()
236 - active.getHeaderHoles())) == -1) {
237 throw new EOFException();
238 }
239
240 filepos = raw.getBytesRead();
241 headerBytes = raw.readRecord();
242
243 if (!DumpArchiveUtil.verify(headerBytes)) {
244 throw new InvalidFormatException();
245 }
246
247 active = DumpArchiveEntry.parse(headerBytes);
248 }
249
250 // check if this is an end-of-volume marker.
251 if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) {
252 hasHitEOF = true;
253 isClosed = true;
254 raw.close();
255
256 return null;
257 }
258
259 entry = active;
260
261 if (entry.isDirectory()) {
262 readDirectoryEntry(active);
263
264 // now we create an empty InputStream.
265 entryOffset = 0;
266 entrySize = 0;
267 readIdx = active.getHeaderCount();
268 } else {
269 entryOffset = 0;
270 entrySize = active.getEntrySize();
271 readIdx = 0;
272 }
273
274 recordOffset = readBuf.length;
275
276 path = getPath(entry);
277
278 if (path == null) {
279 entry = null;
280 }
281 }
282
283 entry.setName(path);
284 entry.setSimpleName(names.get(Integer.valueOf(entry.getIno())).getName());
285 entry.setOffset(filepos);
286
287 return entry;
288 }
289
290 /**
291 * Read directory entry.
292 */
293 private void readDirectoryEntry(DumpArchiveEntry entry)
294 throws IOException {
295 long size = entry.getEntrySize();
296 boolean first = true;
297
298 while (first ||
299 (DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType())) {
300 // read the header that we just peeked at.
301 if (!first) {
302 raw.readRecord();
303 }
304
305 if (!names.containsKey(Integer.valueOf(entry.getIno())) &&
306 (DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType())) {
307 pending.put(Integer.valueOf(entry.getIno()), entry);
308 }
309
310 int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount();
311
312 if (blockBuffer.length < datalen) {
313 blockBuffer = new byte[datalen];
314 }
315
316 if (raw.read(blockBuffer, 0, datalen) != datalen) {
317 throw new EOFException();
318 }
319
320 int reclen = 0;
321
322 for (int i = 0; (i < (datalen - 8)) && (i < (size - 8));
323 i += reclen) {
324 int ino = DumpArchiveUtil.convert32(blockBuffer, i);
325 reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4);
326
327 byte type = blockBuffer[i + 6];
328
329 String name = new String(blockBuffer, i + 8, blockBuffer[i + 7]); // TODO default charset?
330
331 if (".".equals(name) || "..".equals(name)) {
332 // do nothing...
333 continue;
334 }
335
336 Dirent d = new Dirent(ino, entry.getIno(), type, name);
337
338 /*
339 if ((type == 4) && names.containsKey(ino)) {
340 System.out.println("we already have ino: " +
341 names.get(ino));
342 }
343 */
344
345 names.put(Integer.valueOf(ino), d);
346
347 // check whether this allows us to fill anything in the pending list.
348 for (Map.Entry<Integer, DumpArchiveEntry> e : pending.entrySet()) {
349 String path = getPath(e.getValue());
350
351 if (path != null) {
352 e.getValue().setName(path);
353 e.getValue()
354 .setSimpleName(names.get(e.getKey()).getName());
355 queue.add(e.getValue());
356 }
357 }
358
359 // remove anything that we found. (We can't do it earlier
360 // because of concurrent modification exceptions.)
361 for (DumpArchiveEntry e : queue) {
362 pending.remove(Integer.valueOf(e.getIno()));
363 }
364 }
365
366 byte[] peekBytes = raw.peek();
367
368 if (!DumpArchiveUtil.verify(peekBytes)) {
369 throw new InvalidFormatException();
370 }
371
372 entry = DumpArchiveEntry.parse(peekBytes);
373 first = false;
374 size -= DumpArchiveConstants.TP_SIZE;
375 }
376 }
377
378 /**
379 * Get full path for specified archive entry, or null if there's a gap.
380 *
381 * @param entry
382 * @return full path for specified archive entry, or null if there's a gap.
383 */
384 private String getPath(DumpArchiveEntry entry) {
385 // build the stack of elements. It's possible that we're
386 // still missing an intermediate value and if so we
387 Stack<String> elements = new Stack<String>();
388 Dirent dirent = null;
389
390 for (int i = entry.getIno();; i = dirent.getParentIno()) {
391 if (!names.containsKey(Integer.valueOf(i))) {
392 elements.clear();
393 break;
394 }
395
396 dirent = names.get(Integer.valueOf(i));
397 elements.push(dirent.getName());
398
399 if (dirent.getIno() == dirent.getParentIno()) {
400 break;
401 }
402 }
403
404 // if an element is missing defer the work and read next entry.
405 if (elements.isEmpty()) {
406 pending.put(Integer.valueOf(entry.getIno()), entry);
407
408 return null;
409 }
410
411 // generate full path from stack of elements.
412 StringBuilder sb = new StringBuilder(elements.pop());
413
414 while (!elements.isEmpty()) {
415 sb.append('/');
416 sb.append(elements.pop());
417 }
418
419 return sb.toString();
420 }
421
422 /**
423 * Reads bytes from the current dump archive entry.
424 *
425 * This method is aware of the boundaries of the current
426 * entry in the archive and will deal with them as if they
427 * were this stream's start and EOF.
428 *
429 * @param buf The buffer into which to place bytes read.
430 * @param off The offset at which to place bytes read.
431 * @param len The number of bytes to read.
432 * @return The number of bytes read, or -1 at EOF.
433 * @throws IOException on error
434 */
435 @Override
436 public int read(byte[] buf, int off, int len) throws IOException {
437 int totalRead = 0;
438
439 if (isClosed || (entryOffset >= entrySize)) {
440 return -1;
441 }
442
443 if ((len + entryOffset) > entrySize) {
444 len = (int) (entrySize - entryOffset);
445 }
446
447 while (len > 0) {
448 int sz = (len > (readBuf.length - recordOffset))
449 ? (readBuf.length - recordOffset) : len;
450
451 // copy any data we have
452 if ((recordOffset + sz) <= readBuf.length) {
453 System.arraycopy(readBuf, recordOffset, buf, off, sz);
454 totalRead += sz;
455 recordOffset += sz;
456 len -= sz;
457 off += sz;
458 }
459
460 // load next block if necessary.
461 if (len > 0) {
462 if (readIdx >= 512) {
463 byte[] headerBytes = raw.readRecord();
464
465 if (!DumpArchiveUtil.verify(headerBytes)) {
466 throw new InvalidFormatException();
467 }
468
469 active = DumpArchiveEntry.parse(headerBytes);
470 readIdx = 0;
471 }
472
473 if (!active.isSparseRecord(readIdx++)) {
474 int r = raw.read(readBuf, 0, readBuf.length);
475 if (r != readBuf.length) {
476 throw new EOFException();
477 }
478 } else {
479 Arrays.fill(readBuf, (byte) 0);
480 }
481
482 recordOffset = 0;
483 }
484 }
485
486 entryOffset += totalRead;
487
488 return totalRead;
489 }
490
491 /**
492 * Closes the stream for this entry.
493 */
494 @Override
495 public void close() throws IOException {
496 if (!isClosed) {
497 isClosed = true;
498 raw.close();
499 }
500 }
501
502 /**
503 * Look at the first few bytes of the file to decide if it's a dump
504 * archive. With 32 bytes we can look at the magic value, with a full
505 * 1k we can verify the checksum.
506 */
507 public static boolean matches(byte[] buffer, int length) {
508 // do we have enough of the header?
509 if (length < 32) {
510 return false;
511 }
512
513 // this is the best test
514 if (length >= DumpArchiveConstants.TP_SIZE) {
515 return DumpArchiveUtil.verify(buffer);
516 }
517
518 // this will work in a pinch.
519 return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer,
520 24);
521 }
522
523 }