001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.dump; 020 021import java.io.EOFException; 022import java.io.IOException; 023import java.io.InputStream; 024import java.util.Arrays; 025import java.util.HashMap; 026import java.util.Map; 027import java.util.PriorityQueue; 028import java.util.Queue; 029import java.util.Stack; 030 031import org.apache.commons.compress.archivers.ArchiveException; 032import org.apache.commons.compress.archivers.ArchiveInputStream; 033import org.apache.commons.compress.archivers.zip.ZipEncoding; 034import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 035import org.apache.commons.compress.utils.IOUtils; 036 037/** 038 * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream. 039 * Methods are provided to position at each successive entry in 040 * the archive, and the read each entry as a normal input stream 041 * using read(). 042 * 043 * There doesn't seem to exist a hint on the encoding of string values 044 * in any piece documentation. Given the main purpose of dump/restore 045 * is backing up a system it seems very likely the format uses the 046 * current default encoding of the system. 047 * 048 * @NotThreadSafe 049 */ 050public class DumpArchiveInputStream extends ArchiveInputStream { 051 private final DumpArchiveSummary summary; 052 private DumpArchiveEntry active; 053 private boolean isClosed; 054 private boolean hasHitEOF; 055 private long entrySize; 056 private long entryOffset; 057 private int readIdx; 058 private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE]; 059 private byte[] blockBuffer; 060 private int recordOffset; 061 private long filepos; 062 protected TapeInputStream raw; 063 064 // map of ino -> dirent entry. We can use this to reconstruct full paths. 065 private final Map<Integer, Dirent> names = new HashMap<>(); 066 067 // map of ino -> (directory) entry when we're missing one or more elements in the path. 068 private final Map<Integer, DumpArchiveEntry> pending = new HashMap<>(); 069 070 // queue of (directory) entries where we now have the full path. 071 private final Queue<DumpArchiveEntry> queue; 072 073 /** 074 * The encoding to use for file names and labels. 075 */ 076 private final ZipEncoding zipEncoding; 077 078 // the provided encoding (for unit tests) 079 final String encoding; 080 081 /** 082 * Constructor using the platform's default encoding for file 083 * names. 084 * 085 * @param is stream to read from 086 * @throws ArchiveException on error 087 */ 088 public DumpArchiveInputStream(final InputStream is) throws ArchiveException { 089 this(is, null); 090 } 091 092 /** 093 * Constructor. 094 * 095 * @param is stream to read from 096 * @param encoding the encoding to use for file names, use null 097 * for the platform's default encoding 098 * @since 1.6 099 * @throws ArchiveException on error 100 */ 101 public DumpArchiveInputStream(final InputStream is, final String encoding) 102 throws ArchiveException { 103 this.raw = new TapeInputStream(is); 104 this.hasHitEOF = false; 105 this.encoding = encoding; 106 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 107 108 try { 109 // read header, verify it's a dump archive. 110 final byte[] headerBytes = raw.readRecord(); 111 112 if (!DumpArchiveUtil.verify(headerBytes)) { 113 throw new UnrecognizedFormatException(); 114 } 115 116 // get summary information 117 summary = new DumpArchiveSummary(headerBytes, this.zipEncoding); 118 119 // reset buffer with actual block size. 120 raw.resetBlockSize(summary.getNTRec(), summary.isCompressed()); 121 122 // allocate our read buffer. 123 blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE]; 124 125 // skip past CLRI and BITS segments since we don't handle them yet. 126 readCLRI(); 127 readBITS(); 128 } catch (final IOException ex) { 129 throw new ArchiveException(ex.getMessage(), ex); 130 } 131 132 // put in a dummy record for the root node. 133 final Dirent root = new Dirent(2, 2, 4, "."); 134 names.put(2, root); 135 136 // use priority based on queue to ensure parent directories are 137 // released first. 138 queue = new PriorityQueue<>(10, 139 (p, q) -> { 140 if (p.getOriginalName() == null || q.getOriginalName() == null) { 141 return Integer.MAX_VALUE; 142 } 143 144 return p.getOriginalName().compareTo(q.getOriginalName()); 145 }); 146 } 147 148 @Deprecated 149 @Override 150 public int getCount() { 151 return (int) getBytesRead(); 152 } 153 154 @Override 155 public long getBytesRead() { 156 return raw.getBytesRead(); 157 } 158 159 /** 160 * Return the archive summary information. 161 * @return the summary 162 */ 163 public DumpArchiveSummary getSummary() { 164 return summary; 165 } 166 167 /** 168 * Read CLRI (deleted inode) segment. 169 */ 170 private void readCLRI() throws IOException { 171 final byte[] buffer = raw.readRecord(); 172 173 if (!DumpArchiveUtil.verify(buffer)) { 174 throw new InvalidFormatException(); 175 } 176 177 active = DumpArchiveEntry.parse(buffer); 178 179 if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) { 180 throw new InvalidFormatException(); 181 } 182 183 // we don't do anything with this yet. 184 if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) 185 == -1) { 186 throw new EOFException(); 187 } 188 readIdx = active.getHeaderCount(); 189 } 190 191 /** 192 * Read BITS segment. 193 */ 194 private void readBITS() throws IOException { 195 final byte[] buffer = raw.readRecord(); 196 197 if (!DumpArchiveUtil.verify(buffer)) { 198 throw new InvalidFormatException(); 199 } 200 201 active = DumpArchiveEntry.parse(buffer); 202 203 if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) { 204 throw new InvalidFormatException(); 205 } 206 207 // we don't do anything with this yet. 208 if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) 209 == -1) { 210 throw new EOFException(); 211 } 212 readIdx = active.getHeaderCount(); 213 } 214 215 /** 216 * Read the next entry. 217 * @return the next entry 218 * @throws IOException on error 219 */ 220 public DumpArchiveEntry getNextDumpEntry() throws IOException { 221 return getNextEntry(); 222 } 223 224 @Override 225 public DumpArchiveEntry getNextEntry() throws IOException { 226 DumpArchiveEntry entry = null; 227 String path = null; 228 229 // is there anything in the queue? 230 if (!queue.isEmpty()) { 231 return queue.remove(); 232 } 233 234 while (entry == null) { 235 if (hasHitEOF) { 236 return null; 237 } 238 239 // skip any remaining records in this segment for prior file. 240 // we might still have holes... easiest to do it 241 // block by block. We may want to revisit this if 242 // the unnecessary decompression time adds up. 243 while (readIdx < active.getHeaderCount()) { 244 if (!active.isSparseRecord(readIdx++) 245 && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) { 246 throw new EOFException(); 247 } 248 } 249 250 readIdx = 0; 251 filepos = raw.getBytesRead(); 252 253 byte[] headerBytes = raw.readRecord(); 254 255 if (!DumpArchiveUtil.verify(headerBytes)) { 256 throw new InvalidFormatException(); 257 } 258 259 active = DumpArchiveEntry.parse(headerBytes); 260 261 // skip any remaining segments for prior file. 262 while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) { 263 if (raw.skip((long) DumpArchiveConstants.TP_SIZE 264 * (active.getHeaderCount() 265 - active.getHeaderHoles())) == -1) { 266 throw new EOFException(); 267 } 268 269 filepos = raw.getBytesRead(); 270 headerBytes = raw.readRecord(); 271 272 if (!DumpArchiveUtil.verify(headerBytes)) { 273 throw new InvalidFormatException(); 274 } 275 276 active = DumpArchiveEntry.parse(headerBytes); 277 } 278 279 // check if this is an end-of-volume marker. 280 if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) { 281 hasHitEOF = true; 282 283 return null; 284 } 285 286 entry = active; 287 288 if (entry.isDirectory()) { 289 readDirectoryEntry(active); 290 291 // now we create an empty InputStream. 292 entryOffset = 0; 293 entrySize = 0; 294 readIdx = active.getHeaderCount(); 295 } else { 296 entryOffset = 0; 297 entrySize = active.getEntrySize(); 298 readIdx = 0; 299 } 300 301 recordOffset = readBuf.length; 302 303 path = getPath(entry); 304 305 if (path == null) { 306 entry = null; 307 } 308 } 309 310 entry.setName(path); 311 entry.setSimpleName(names.get(entry.getIno()).getName()); 312 entry.setOffset(filepos); 313 314 return entry; 315 } 316 317 /** 318 * Read directory entry. 319 */ 320 private void readDirectoryEntry(DumpArchiveEntry entry) 321 throws IOException { 322 long size = entry.getEntrySize(); 323 boolean first = true; 324 325 while (first || 326 DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) { 327 // read the header that we just peeked at. 328 if (!first) { 329 raw.readRecord(); 330 } 331 332 if (!names.containsKey(entry.getIno()) && 333 DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) { 334 pending.put(entry.getIno(), entry); 335 } 336 337 final int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount(); 338 339 if (blockBuffer.length < datalen) { 340 blockBuffer = IOUtils.readRange(raw, datalen); 341 if (blockBuffer.length != datalen) { 342 throw new EOFException(); 343 } 344 } else if (raw.read(blockBuffer, 0, datalen) != datalen) { 345 throw new EOFException(); 346 } 347 348 int reclen = 0; 349 350 for (int i = 0; i < datalen - 8 && i < size - 8; 351 i += reclen) { 352 final int ino = DumpArchiveUtil.convert32(blockBuffer, i); 353 reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4); 354 355 final byte type = blockBuffer[i + 6]; 356 357 final String name = DumpArchiveUtil.decode(zipEncoding, blockBuffer, i + 8, blockBuffer[i + 7]); 358 359 if (".".equals(name) || "..".equals(name)) { 360 // do nothing... 361 continue; 362 } 363 364 final Dirent d = new Dirent(ino, entry.getIno(), type, name); 365 366 /* 367 if ((type == 4) && names.containsKey(ino)) { 368 System.out.println("we already have ino: " + 369 names.get(ino)); 370 } 371 */ 372 373 names.put(ino, d); 374 375 // check whether this allows us to fill anything in the pending list. 376 pending.forEach((k, v) -> { 377 final String path = getPath(v); 378 379 if (path != null) { 380 v.setName(path); 381 v.setSimpleName(names.get(k).getName()); 382 queue.add(v); 383 } 384 }); 385 386 // remove anything that we found. (We can't do it earlier 387 // because of concurrent modification exceptions.) 388 queue.forEach(e -> pending.remove(e.getIno())); 389 } 390 391 final byte[] peekBytes = raw.peek(); 392 393 if (!DumpArchiveUtil.verify(peekBytes)) { 394 throw new InvalidFormatException(); 395 } 396 397 entry = DumpArchiveEntry.parse(peekBytes); 398 first = false; 399 size -= DumpArchiveConstants.TP_SIZE; 400 } 401 } 402 403 /** 404 * Get full path for specified archive entry, or null if there's a gap. 405 * 406 * @param entry 407 * @return full path for specified archive entry, or null if there's a gap. 408 */ 409 private String getPath(final DumpArchiveEntry entry) { 410 // build the stack of elements. It's possible that we're 411 // still missing an intermediate value and if so we 412 final Stack<String> elements = new Stack<>(); 413 Dirent dirent = null; 414 415 for (int i = entry.getIno();; i = dirent.getParentIno()) { 416 if (!names.containsKey(i)) { 417 elements.clear(); 418 break; 419 } 420 421 dirent = names.get(i); 422 elements.push(dirent.getName()); 423 424 if (dirent.getIno() == dirent.getParentIno()) { 425 break; 426 } 427 } 428 429 // if an element is missing defer the work and read next entry. 430 if (elements.isEmpty()) { 431 pending.put(entry.getIno(), entry); 432 433 return null; 434 } 435 436 // generate full path from stack of elements. 437 final StringBuilder sb = new StringBuilder(elements.pop()); 438 439 while (!elements.isEmpty()) { 440 sb.append('/'); 441 sb.append(elements.pop()); 442 } 443 444 return sb.toString(); 445 } 446 447 /** 448 * Reads bytes from the current dump archive entry. 449 * 450 * This method is aware of the boundaries of the current 451 * entry in the archive and will deal with them as if they 452 * were this stream's start and EOF. 453 * 454 * @param buf The buffer into which to place bytes read. 455 * @param off The offset at which to place bytes read. 456 * @param len The number of bytes to read. 457 * @return The number of bytes read, or -1 at EOF. 458 * @throws IOException on error 459 */ 460 @Override 461 public int read(final byte[] buf, int off, int len) throws IOException { 462 if (len == 0) { 463 return 0; 464 } 465 int totalRead = 0; 466 467 if (hasHitEOF || isClosed || entryOffset >= entrySize) { 468 return -1; 469 } 470 471 if (active == null) { 472 throw new IllegalStateException("No current dump entry"); 473 } 474 475 if (len + entryOffset > entrySize) { 476 len = (int) (entrySize - entryOffset); 477 } 478 479 while (len > 0) { 480 final int sz = Math.min(len, readBuf.length - recordOffset); 481 482 // copy any data we have 483 if (recordOffset + sz <= readBuf.length) { 484 System.arraycopy(readBuf, recordOffset, buf, off, sz); 485 totalRead += sz; 486 recordOffset += sz; 487 len -= sz; 488 off += sz; 489 } 490 491 // load next block if necessary. 492 if (len > 0) { 493 if (readIdx >= 512) { 494 final byte[] headerBytes = raw.readRecord(); 495 496 if (!DumpArchiveUtil.verify(headerBytes)) { 497 throw new InvalidFormatException(); 498 } 499 500 active = DumpArchiveEntry.parse(headerBytes); 501 readIdx = 0; 502 } 503 504 if (!active.isSparseRecord(readIdx++)) { 505 final int r = raw.read(readBuf, 0, readBuf.length); 506 if (r != readBuf.length) { 507 throw new EOFException(); 508 } 509 } else { 510 Arrays.fill(readBuf, (byte) 0); 511 } 512 513 recordOffset = 0; 514 } 515 } 516 517 entryOffset += totalRead; 518 519 return totalRead; 520 } 521 522 /** 523 * Closes the stream for this entry. 524 */ 525 @Override 526 public void close() throws IOException { 527 if (!isClosed) { 528 isClosed = true; 529 raw.close(); 530 } 531 } 532 533 /** 534 * Look at the first few bytes of the file to decide if it's a dump 535 * archive. With 32 bytes we can look at the magic value, with a full 536 * 1k we can verify the checksum. 537 * @param buffer data to match 538 * @param length length of data 539 * @return whether the buffer seems to contain dump data 540 */ 541 public static boolean matches(final byte[] buffer, final int length) { 542 // do we have enough of the header? 543 if (length < 32) { 544 return false; 545 } 546 547 // this is the best test 548 if (length >= DumpArchiveConstants.TP_SIZE) { 549 return DumpArchiveUtil.verify(buffer); 550 } 551 552 // this will work in a pinch. 553 return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer, 554 24); 555 } 556 557}