001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers; 020 021import java.io.ByteArrayInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.OutputStream; 025import java.security.AccessController; 026import java.security.PrivilegedAction; 027import java.util.Collections; 028import java.util.Locale; 029import java.util.ServiceLoader; 030import java.util.Set; 031import java.util.SortedMap; 032import java.util.TreeMap; 033 034import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; 035import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; 036import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream; 037import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; 038import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream; 039import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream; 040import org.apache.commons.compress.archivers.jar.JarArchiveInputStream; 041import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream; 042import org.apache.commons.compress.archivers.sevenz.SevenZFile; 043import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; 044import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; 045import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; 046import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; 047import org.apache.commons.compress.utils.IOUtils; 048import org.apache.commons.compress.utils.Sets; 049 050/** 051 * Factory to create Archive[In|Out]putStreams from names or the first bytes of 052 * the InputStream. In order to add other implementations, you should extend 053 * ArchiveStreamFactory and override the appropriate methods (and call their 054 * implementation from super of course). 055 * 056 * Compressing a ZIP-File: 057 * 058 * <pre> 059 * final OutputStream out = Files.newOutputStream(output.toPath()); 060 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out); 061 * 062 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); 063 * IOUtils.copy(Files.newInputStream(file1.toPath()), os); 064 * os.closeArchiveEntry(); 065 * 066 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); 067 * IOUtils.copy(Files.newInputStream(file2.toPath()), os); 068 * os.closeArchiveEntry(); 069 * os.close(); 070 * </pre> 071 * 072 * Decompressing a ZIP-File: 073 * 074 * <pre> 075 * final InputStream is = Files.newInputStream(input.toPath()); 076 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is); 077 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry(); 078 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName())); 079 * IOUtils.copy(in, out); 080 * out.close(); 081 * in.close(); 082 * </pre> 083 * @Immutable provided that the deprecated method setEntryEncoding is not used. 084 * @ThreadSafe even if the deprecated method setEntryEncoding is used 085 */ 086public class ArchiveStreamFactory implements ArchiveStreamProvider { 087 088 private static final int TAR_HEADER_SIZE = 512; 089 090 private static final int DUMP_SIGNATURE_SIZE = 32; 091 092 private static final int SIGNATURE_SIZE = 12; 093 094 /** 095 * The singleton instance using the platform default encoding. 096 * @since 1.21 097 */ 098 public static final ArchiveStreamFactory DEFAULT = new ArchiveStreamFactory(); 099 100 /** 101 * Constant (value {@value}) used to identify the APK archive format. 102 * <p> 103 * APK file extensions are .apk, .xapk, .apks, .apkm 104 * </p> 105 * 106 * @since 1.22 107 */ 108 public static final String APK = "apk"; 109 110 /** 111 * Constant (value {@value}) used to identify the XAPK archive format. 112 * <p> 113 * APK file extensions are .apk, .xapk, .apks, .apkm 114 * </p> 115 * 116 * @since 1.22 117 */ 118 public static final String XAPK = "xapk"; 119 120 /** 121 * Constant (value {@value}) used to identify the APKS archive format. 122 * <p> 123 * APK file extensions are .apk, .xapk, .apks, .apkm 124 * </p> 125 * 126 * @since 1.22 127 */ 128 public static final String APKS = "apks"; 129 130 /** 131 * Constant (value {@value}) used to identify the APKM archive format. 132 * <p> 133 * APK file extensions are .apk, .xapk, .apks, .apkm 134 * </p> 135 * 136 * @since 1.22 137 */ 138 public static final String APKM = "apkm"; 139 140 /** 141 * Constant (value {@value}) used to identify the AR archive format. 142 * @since 1.1 143 */ 144 public static final String AR = "ar"; 145 146 /** 147 * Constant (value {@value}) used to identify the ARJ archive format. 148 * Not supported as an output stream type. 149 * @since 1.6 150 */ 151 public static final String ARJ = "arj"; 152 153 /** 154 * Constant (value {@value}) used to identify the CPIO archive format. 155 * @since 1.1 156 */ 157 public static final String CPIO = "cpio"; 158 159 /** 160 * Constant (value {@value}) used to identify the Unix DUMP archive format. 161 * Not supported as an output stream type. 162 * @since 1.3 163 */ 164 public static final String DUMP = "dump"; 165 166 /** 167 * Constant (value {@value}) used to identify the JAR archive format. 168 * @since 1.1 169 */ 170 public static final String JAR = "jar"; 171 172 /** 173 * Constant used to identify the TAR archive format. 174 * @since 1.1 175 */ 176 public static final String TAR = "tar"; 177 178 /** 179 * Constant (value {@value}) used to identify the ZIP archive format. 180 * @since 1.1 181 */ 182 public static final String ZIP = "zip"; 183 184 /** 185 * Constant (value {@value}) used to identify the 7z archive format. 186 * @since 1.8 187 */ 188 public static final String SEVEN_Z = "7z"; 189 190 /** 191 * Entry encoding, null for the platform default. 192 */ 193 private final String encoding; 194 195 /** 196 * Entry encoding, null for the default. 197 */ 198 private volatile String entryEncoding; 199 200 private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders; 201 202 private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders; 203 204 static void putAll(final Set<String> names, final ArchiveStreamProvider provider, final TreeMap<String, ArchiveStreamProvider> map) { 205 names.forEach(name -> map.put(toKey(name), provider)); 206 } 207 208 private static Iterable<ArchiveStreamProvider> archiveStreamProviderIterable() { 209 return ServiceLoader.load(ArchiveStreamProvider.class, ClassLoader.getSystemClassLoader()); 210 } 211 212 private static String toKey(final String name) { 213 return name.toUpperCase(Locale.ROOT); 214 } 215 216 /** 217 * Constructs a new sorted map from input stream provider names to provider 218 * objects. 219 * 220 * <p> 221 * The map returned by this method will have one entry for each provider for 222 * which support is available in the current Java virtual machine. If two or 223 * more supported provider have the same name then the resulting map will 224 * contain just one of them; which one it will contain is not specified. 225 * </p> 226 * 227 * <p> 228 * The invocation of this method, and the subsequent use of the resulting 229 * map, may cause time-consuming disk or network I/O operations to occur. 230 * This method is provided for applications that need to enumerate all of 231 * the available providers, for example to allow user provider selection. 232 * </p> 233 * 234 * <p> 235 * This method may return different results at different times if new 236 * providers are dynamically made available to the current Java virtual 237 * machine. 238 * </p> 239 * 240 * @return An immutable, map from names to provider objects 241 * @since 1.13 242 */ 243 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() { 244 return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> { 245 final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 246 putAll(DEFAULT.getInputStreamArchiveNames(), DEFAULT, map); 247 archiveStreamProviderIterable().forEach(provider -> putAll(provider.getInputStreamArchiveNames(), provider, map)); 248 return map; 249 }); 250 } 251 252 /** 253 * Constructs a new sorted map from output stream provider names to provider 254 * objects. 255 * 256 * <p> 257 * The map returned by this method will have one entry for each provider for 258 * which support is available in the current Java virtual machine. If two or 259 * more supported provider have the same name then the resulting map will 260 * contain just one of them; which one it will contain is not specified. 261 * </p> 262 * 263 * <p> 264 * The invocation of this method, and the subsequent use of the resulting 265 * map, may cause time-consuming disk or network I/O operations to occur. 266 * This method is provided for applications that need to enumerate all of 267 * the available providers, for example to allow user provider selection. 268 * </p> 269 * 270 * <p> 271 * This method may return different results at different times if new 272 * providers are dynamically made available to the current Java virtual 273 * machine. 274 * </p> 275 * 276 * @return An immutable, map from names to provider objects 277 * @since 1.13 278 */ 279 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() { 280 return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> { 281 final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 282 putAll(DEFAULT.getOutputStreamArchiveNames(), DEFAULT, map); 283 archiveStreamProviderIterable().forEach(provider -> putAll(provider.getOutputStreamArchiveNames(), provider, map)); 284 return map; 285 }); 286 } 287 288 /** 289 * Create an instance using the platform default encoding. 290 */ 291 public ArchiveStreamFactory() { 292 this(null); 293 } 294 295 /** 296 * Create an instance using the specified encoding. 297 * 298 * @param encoding the encoding to be used. 299 * 300 * @since 1.10 301 */ 302 public ArchiveStreamFactory(final String encoding) { 303 this.encoding = encoding; 304 // Also set the original field so can continue to use it. 305 this.entryEncoding = encoding; 306 } 307 308 /** 309 * Returns the encoding to use for arj, jar, zip, dump, cpio and tar 310 * files, or null for the archiver default. 311 * 312 * @return entry encoding, or null for the archiver default 313 * @since 1.5 314 */ 315 public String getEntryEncoding() { 316 return entryEncoding; 317 } 318 319 /** 320 * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default. 321 * 322 * @param entryEncoding the entry encoding, null uses the archiver default. 323 * @since 1.5 324 * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding 325 * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)} 326 * was used to specify the factory encoding. 327 */ 328 @Deprecated 329 public void setEntryEncoding(final String entryEncoding) { 330 // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway 331 if (encoding != null) { 332 throw new IllegalStateException("Cannot overide encoding set by the constructor"); 333 } 334 this.entryEncoding = entryEncoding; 335 } 336 337 /** 338 * Creates an archive input stream from an archiver name and an input stream. 339 * 340 * @param archiverName the archive name, 341 * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z} 342 * @param in the input stream 343 * @return the archive input stream 344 * @throws ArchiveException if the archiver name is not known 345 * @throws StreamingNotSupportedException if the format cannot be 346 * read from a stream 347 * @throws IllegalArgumentException if the archiver name or stream is null 348 */ 349 public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in) throws ArchiveException { 350 return createArchiveInputStream(archiverName, in, entryEncoding); 351 } 352 353 @Override 354 public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in, 355 final String actualEncoding) throws ArchiveException { 356 357 if (archiverName == null) { 358 throw new IllegalArgumentException("Archivername must not be null."); 359 } 360 361 if (in == null) { 362 throw new IllegalArgumentException("InputStream must not be null."); 363 } 364 365 if (AR.equalsIgnoreCase(archiverName)) { 366 return new ArArchiveInputStream(in); 367 } 368 if (ARJ.equalsIgnoreCase(archiverName)) { 369 if (actualEncoding != null) { 370 return new ArjArchiveInputStream(in, actualEncoding); 371 } 372 return new ArjArchiveInputStream(in); 373 } 374 if (ZIP.equalsIgnoreCase(archiverName)) { 375 if (actualEncoding != null) { 376 return new ZipArchiveInputStream(in, actualEncoding); 377 } 378 return new ZipArchiveInputStream(in); 379 } 380 if (TAR.equalsIgnoreCase(archiverName)) { 381 if (actualEncoding != null) { 382 return new TarArchiveInputStream(in, actualEncoding); 383 } 384 return new TarArchiveInputStream(in); 385 } 386 if (JAR.equalsIgnoreCase(archiverName) || APK.equalsIgnoreCase(archiverName)) { 387 if (actualEncoding != null) { 388 return new JarArchiveInputStream(in, actualEncoding); 389 } 390 return new JarArchiveInputStream(in); 391 } 392 if (CPIO.equalsIgnoreCase(archiverName)) { 393 if (actualEncoding != null) { 394 return new CpioArchiveInputStream(in, actualEncoding); 395 } 396 return new CpioArchiveInputStream(in); 397 } 398 if (DUMP.equalsIgnoreCase(archiverName)) { 399 if (actualEncoding != null) { 400 return new DumpArchiveInputStream(in, actualEncoding); 401 } 402 return new DumpArchiveInputStream(in); 403 } 404 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 405 throw new StreamingNotSupportedException(SEVEN_Z); 406 } 407 408 final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName)); 409 if (archiveStreamProvider != null) { 410 return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding); 411 } 412 413 throw new ArchiveException("Archiver: " + archiverName + " not found."); 414 } 415 416 /** 417 * Creates an archive output stream from an archiver name and an output stream. 418 * 419 * @param archiverName the archive name, 420 * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO} 421 * @param out the output stream 422 * @return the archive output stream 423 * @throws ArchiveException if the archiver name is not known 424 * @throws StreamingNotSupportedException if the format cannot be 425 * written to a stream 426 * @throws IllegalArgumentException if the archiver name or stream is null 427 */ 428 public ArchiveOutputStream createArchiveOutputStream(final String archiverName, final OutputStream out) 429 throws ArchiveException { 430 return createArchiveOutputStream(archiverName, out, entryEncoding); 431 } 432 433 @Override 434 public ArchiveOutputStream createArchiveOutputStream( 435 final String archiverName, final OutputStream out, final String actualEncoding) 436 throws ArchiveException { 437 if (archiverName == null) { 438 throw new IllegalArgumentException("Archivername must not be null."); 439 } 440 if (out == null) { 441 throw new IllegalArgumentException("OutputStream must not be null."); 442 } 443 444 if (AR.equalsIgnoreCase(archiverName)) { 445 return new ArArchiveOutputStream(out); 446 } 447 if (ZIP.equalsIgnoreCase(archiverName)) { 448 final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out); 449 if (actualEncoding != null) { 450 zip.setEncoding(actualEncoding); 451 } 452 return zip; 453 } 454 if (TAR.equalsIgnoreCase(archiverName)) { 455 if (actualEncoding != null) { 456 return new TarArchiveOutputStream(out, actualEncoding); 457 } 458 return new TarArchiveOutputStream(out); 459 } 460 if (JAR.equalsIgnoreCase(archiverName)) { 461 if (actualEncoding != null) { 462 return new JarArchiveOutputStream(out, actualEncoding); 463 } 464 return new JarArchiveOutputStream(out); 465 } 466 if (CPIO.equalsIgnoreCase(archiverName)) { 467 if (actualEncoding != null) { 468 return new CpioArchiveOutputStream(out, actualEncoding); 469 } 470 return new CpioArchiveOutputStream(out); 471 } 472 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 473 throw new StreamingNotSupportedException(SEVEN_Z); 474 } 475 476 final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName)); 477 if (archiveStreamProvider != null) { 478 return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding); 479 } 480 481 throw new ArchiveException("Archiver: " + archiverName + " not found."); 482 } 483 484 /** 485 * Create an archive input stream from an input stream, autodetecting 486 * the archive type from the first few bytes of the stream. The InputStream 487 * must support marks, like BufferedInputStream. 488 * 489 * @param in the input stream 490 * @return the archive input stream 491 * @throws ArchiveException if the archiver name is not known 492 * @throws StreamingNotSupportedException if the format cannot be 493 * read from a stream 494 * @throws IllegalArgumentException if the stream is null or does not support mark 495 */ 496 public ArchiveInputStream createArchiveInputStream(final InputStream in) 497 throws ArchiveException { 498 return createArchiveInputStream(detect(in), in); 499 } 500 501 /** 502 * Try to determine the type of Archiver 503 * @param in input stream 504 * @return type of archiver if found 505 * @throws ArchiveException if an archiver cannot be detected in the stream 506 * @since 1.14 507 */ 508 public static String detect(final InputStream in) throws ArchiveException { 509 if (in == null) { 510 throw new IllegalArgumentException("Stream must not be null."); 511 } 512 513 if (!in.markSupported()) { 514 throw new IllegalArgumentException("Mark is not supported."); 515 } 516 517 final byte[] signature = new byte[SIGNATURE_SIZE]; 518 in.mark(signature.length); 519 int signatureLength = -1; 520 try { 521 signatureLength = IOUtils.readFully(in, signature); 522 in.reset(); 523 } catch (final IOException e) { 524 throw new ArchiveException("IOException while reading signature.", e); 525 } 526 527 if (ZipArchiveInputStream.matches(signature, signatureLength)) { 528 return ZIP; 529 } 530 if (JarArchiveInputStream.matches(signature, signatureLength)) { 531 return JAR; 532 } 533 if (ArArchiveInputStream.matches(signature, signatureLength)) { 534 return AR; 535 } 536 if (CpioArchiveInputStream.matches(signature, signatureLength)) { 537 return CPIO; 538 } 539 if (ArjArchiveInputStream.matches(signature, signatureLength)) { 540 return ARJ; 541 } 542 if (SevenZFile.matches(signature, signatureLength)) { 543 return SEVEN_Z; 544 } 545 546 // Dump needs a bigger buffer to check the signature; 547 final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE]; 548 in.mark(dumpsig.length); 549 try { 550 signatureLength = IOUtils.readFully(in, dumpsig); 551 in.reset(); 552 } catch (final IOException e) { 553 throw new ArchiveException("IOException while reading dump signature", e); 554 } 555 if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) { 556 return DUMP; 557 } 558 559 // Tar needs an even bigger buffer to check the signature; read the first block 560 final byte[] tarHeader = new byte[TAR_HEADER_SIZE]; 561 in.mark(tarHeader.length); 562 try { 563 signatureLength = IOUtils.readFully(in, tarHeader); 564 in.reset(); 565 } catch (final IOException e) { 566 throw new ArchiveException("IOException while reading tar signature", e); 567 } 568 if (TarArchiveInputStream.matches(tarHeader, signatureLength)) { 569 return TAR; 570 } 571 572 // COMPRESS-117 - improve auto-recognition 573 if (signatureLength >= TAR_HEADER_SIZE) { 574 TarArchiveInputStream tais = null; 575 try { 576 tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader)); 577 // COMPRESS-191 - verify the header checksum 578 if (tais.getNextTarEntry().isCheckSumOK()) { 579 return TAR; 580 } 581 } catch (final Exception e) { // NOPMD NOSONAR 582 // can generate IllegalArgumentException as well 583 // as IOException 584 // autodetection, simply not a TAR 585 // ignored 586 } finally { 587 IOUtils.closeQuietly(tais); 588 } 589 } 590 throw new ArchiveException("No Archiver found for the stream signature"); 591 } 592 593 public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() { 594 if (archiveInputStreamProviders == null) { 595 archiveInputStreamProviders = Collections 596 .unmodifiableSortedMap(findAvailableArchiveInputStreamProviders()); 597 } 598 return archiveInputStreamProviders; 599 } 600 601 public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() { 602 if (archiveOutputStreamProviders == null) { 603 archiveOutputStreamProviders = Collections 604 .unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders()); 605 } 606 return archiveOutputStreamProviders; 607 } 608 609 @Override 610 public Set<String> getInputStreamArchiveNames() { 611 return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z); 612 } 613 614 @Override 615 public Set<String> getOutputStreamArchiveNames() { 616 return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z); 617 } 618 619}