001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import static org.apache.commons.compress.archivers.zip.ZipArchiveEntryRequest.createZipArchiveEntryRequest; 021 022import java.io.IOException; 023import java.io.UncheckedIOException; 024import java.nio.file.Files; 025import java.nio.file.Path; 026import java.util.Deque; 027import java.util.concurrent.Callable; 028import java.util.concurrent.ConcurrentLinkedDeque; 029import java.util.concurrent.ExecutionException; 030import java.util.concurrent.ExecutorService; 031import java.util.concurrent.Executors; 032import java.util.concurrent.Future; 033import java.util.concurrent.TimeUnit; 034import java.util.concurrent.atomic.AtomicInteger; 035import java.util.zip.Deflater; 036 037import org.apache.commons.compress.parallel.FileBasedScatterGatherBackingStore; 038import org.apache.commons.compress.parallel.InputStreamSupplier; 039import org.apache.commons.compress.parallel.ScatterGatherBackingStore; 040import org.apache.commons.compress.parallel.ScatterGatherBackingStoreSupplier; 041 042/** 043 * Creates a zip in parallel by using multiple threadlocal {@link ScatterZipOutputStream} instances. 044 * <p> 045 * Note that until 1.18, this class generally made no guarantees about the order of things written to 046 * the output file. Things that needed to come in a specific order (manifests, directories) 047 * had to be handled by the client of this class, usually by writing these things to the 048 * {@link ZipArchiveOutputStream} <em>before</em> calling {@link #writeTo writeTo} on this class.</p> 049 * <p> 050 * The client can supply an {@link java.util.concurrent.ExecutorService}, but for reasons of 051 * memory model consistency, this will be shut down by this class prior to completion. 052 * </p> 053 * @since 1.10 054 */ 055public class ParallelScatterZipCreator { 056 private final Deque<ScatterZipOutputStream> streams = new ConcurrentLinkedDeque<>(); 057 private final ExecutorService es; 058 private final ScatterGatherBackingStoreSupplier backingStoreSupplier; 059 private final Deque<Future<? extends ScatterZipOutputStream>> futures = new ConcurrentLinkedDeque<>(); 060 061 private final long startedAt = System.currentTimeMillis(); 062 private long compressionDoneAt; 063 private long scatterDoneAt; 064 private final int compressionLevel; 065 066 private static class DefaultBackingStoreSupplier implements ScatterGatherBackingStoreSupplier { 067 final AtomicInteger storeNum = new AtomicInteger(0); 068 069 @Override 070 public ScatterGatherBackingStore get() throws IOException { 071 final Path tempFile = Files.createTempFile("parallelscatter", "n" + storeNum.incrementAndGet()); 072 return new FileBasedScatterGatherBackingStore(tempFile); 073 } 074 } 075 076 private ScatterZipOutputStream createDeferred(final ScatterGatherBackingStoreSupplier scatterGatherBackingStoreSupplier) 077 throws IOException { 078 final ScatterGatherBackingStore bs = scatterGatherBackingStoreSupplier.get(); 079 // lifecycle is bound to the ScatterZipOutputStream returned 080 final StreamCompressor sc = StreamCompressor.create(compressionLevel, bs); //NOSONAR 081 return new ScatterZipOutputStream(bs, sc); 082 } 083 084 private final ThreadLocal<ScatterZipOutputStream> tlScatterStreams = new ThreadLocal<ScatterZipOutputStream>() { 085 @Override 086 protected ScatterZipOutputStream initialValue() { 087 try { 088 final ScatterZipOutputStream scatterStream = createDeferred(backingStoreSupplier); 089 streams.add(scatterStream); 090 return scatterStream; 091 } catch (final IOException e) { 092 throw new UncheckedIOException(e); //NOSONAR 093 } 094 } 095 }; 096 097 /** 098 * Create a ParallelScatterZipCreator with default threads, which is set to the number of available 099 * processors, as defined by {@link java.lang.Runtime#availableProcessors} 100 */ 101 public ParallelScatterZipCreator() { 102 this(Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors())); 103 } 104 105 /** 106 * Create a ParallelScatterZipCreator 107 * 108 * @param executorService The executorService to use for parallel scheduling. For technical reasons, 109 * this will be shut down by this class. 110 */ 111 public ParallelScatterZipCreator(final ExecutorService executorService) { 112 this(executorService, new DefaultBackingStoreSupplier()); 113 } 114 115 /** 116 * Create a ParallelScatterZipCreator 117 * 118 * @param executorService The executorService to use. For technical reasons, this will be shut down 119 * by this class. 120 * @param backingStoreSupplier The supplier of backing store which shall be used 121 */ 122 public ParallelScatterZipCreator(final ExecutorService executorService, 123 final ScatterGatherBackingStoreSupplier backingStoreSupplier) { 124 this(executorService, backingStoreSupplier, Deflater.DEFAULT_COMPRESSION); 125 } 126 127 /** 128 * Create a ParallelScatterZipCreator 129 * 130 * @param executorService The executorService to use. For technical reasons, this will be shut down 131 * by this class. 132 * @param backingStoreSupplier The supplier of backing store which shall be used 133 * @param compressionLevel The compression level used in compression, this value should be 134 * -1(default level) or between 0~9. 135 * @throws IllegalArgumentException if the compression level is illegal 136 * @since 1.21 137 */ 138 public ParallelScatterZipCreator(final ExecutorService executorService, 139 final ScatterGatherBackingStoreSupplier backingStoreSupplier, 140 final int compressionLevel) throws IllegalArgumentException { 141 if ((compressionLevel < Deflater.NO_COMPRESSION || compressionLevel > Deflater.BEST_COMPRESSION) 142 && compressionLevel != Deflater.DEFAULT_COMPRESSION) { 143 throw new IllegalArgumentException("Compression level is expected between -1~9"); 144 } 145 146 this.backingStoreSupplier = backingStoreSupplier; 147 es = executorService; 148 this.compressionLevel = compressionLevel; 149 } 150 151 /** 152 * Adds an archive entry to this archive. 153 * <p> 154 * This method is expected to be called from a single client thread 155 * </p> 156 * 157 * @param zipArchiveEntry The entry to add. 158 * @param source The source input stream supplier 159 */ 160 161 public void addArchiveEntry(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier source) { 162 submitStreamAwareCallable(createCallable(zipArchiveEntry, source)); 163 } 164 165 /** 166 * Adds an archive entry to this archive. 167 * <p> 168 * This method is expected to be called from a single client thread 169 * </p> 170 * 171 * @param zipArchiveEntryRequestSupplier Should supply the entry to be added. 172 * @since 1.13 173 */ 174 public void addArchiveEntry(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) { 175 submitStreamAwareCallable(createCallable(zipArchiveEntryRequestSupplier)); 176 } 177 178 /** 179 * Submit a callable for compression. 180 * 181 * @see ParallelScatterZipCreator#createCallable for details of if/when to use this. 182 * 183 * @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller. 184 */ 185 public final void submit(final Callable<? extends Object> callable) { 186 submitStreamAwareCallable(() -> { 187 callable.call(); 188 return tlScatterStreams.get(); 189 }); 190 } 191 192 /** 193 * Submit a callable for compression. 194 * 195 * @see ParallelScatterZipCreator#createCallable for details of if/when to use this. 196 * 197 * @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller. 198 * @since 1.19 199 */ 200 public final void submitStreamAwareCallable(final Callable<? extends ScatterZipOutputStream> callable) { 201 futures.add(es.submit(callable)); 202 } 203 204 /** 205 * Create a callable that will compress the given archive entry. 206 * 207 * <p>This method is expected to be called from a single client thread.</p> 208 * 209 * Consider using {@link #addArchiveEntry addArchiveEntry}, which wraps this method and {@link #submitStreamAwareCallable submitStreamAwareCallable}. 210 * The most common use case for using {@link #createCallable createCallable} and {@link #submitStreamAwareCallable submitStreamAwareCallable} from a 211 * client is if you want to wrap the callable in something that can be prioritized by the supplied 212 * {@link ExecutorService}, for instance to process large or slow files first. 213 * Since the creation of the {@link ExecutorService} is handled by the client, all of this is up to the client. 214 * 215 * @param zipArchiveEntry The entry to add. 216 * @param source The source input stream supplier 217 * @return A callable that should subsequently passed to #submitStreamAwareCallable, possibly in a wrapped/adapted from. The 218 * value of this callable is not used, but any exceptions happening inside the compression 219 * will be propagated through the callable. 220 */ 221 222 public final Callable<ScatterZipOutputStream> createCallable(final ZipArchiveEntry zipArchiveEntry, 223 final InputStreamSupplier source) { 224 final int method = zipArchiveEntry.getMethod(); 225 if (method == ZipMethod.UNKNOWN_CODE) { 226 throw new IllegalArgumentException("Method must be set on zipArchiveEntry: " + zipArchiveEntry); 227 } 228 final ZipArchiveEntryRequest zipArchiveEntryRequest = createZipArchiveEntryRequest(zipArchiveEntry, source); 229 return () -> { 230 final ScatterZipOutputStream scatterStream = tlScatterStreams.get(); 231 scatterStream.addArchiveEntry(zipArchiveEntryRequest); 232 return scatterStream; 233 }; 234 } 235 236 /** 237 * Create a callable that will compress archive entry supplied by {@link ZipArchiveEntryRequestSupplier}. 238 * 239 * <p>This method is expected to be called from a single client thread.</p> 240 * 241 * The same as {@link #createCallable(ZipArchiveEntry, InputStreamSupplier)}, but the archive entry 242 * to be added is supplied by a {@link ZipArchiveEntryRequestSupplier}. 243 * 244 * @see #createCallable(ZipArchiveEntry, InputStreamSupplier) 245 * 246 * @param zipArchiveEntryRequestSupplier Should supply the entry to be added. 247 * @return A callable that should subsequently passed to #submitStreamAwareCallable, possibly in a wrapped/adapted from. The 248 * value of this callable is not used, but any exceptions happening inside the compression 249 * will be propagated through the callable. 250 * @since 1.13 251 */ 252 public final Callable<ScatterZipOutputStream> createCallable(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) { 253 return () -> { 254 final ScatterZipOutputStream scatterStream = tlScatterStreams.get(); 255 scatterStream.addArchiveEntry(zipArchiveEntryRequestSupplier.get()); 256 return scatterStream; 257 }; 258 } 259 260 /** 261 * Write the contents this to the target {@link ZipArchiveOutputStream}. 262 * <p> 263 * It may be beneficial to write things like directories and manifest files to the targetStream 264 * before calling this method. 265 * </p> 266 * 267 * <p>Calling this method will shut down the {@link ExecutorService} used by this class. If any of the {@link 268 * Callable}s {@link #submitStreamAwareCallable submit}ted to this instance throws an exception, the archive can not be created properly and 269 * this method will throw an exception.</p> 270 * 271 * @param targetStream The {@link ZipArchiveOutputStream} to receive the contents of the scatter streams 272 * @throws IOException If writing fails 273 * @throws InterruptedException If we get interrupted 274 * @throws ExecutionException If something happens in the parallel execution 275 */ 276 public void writeTo(final ZipArchiveOutputStream targetStream) 277 throws IOException, InterruptedException, ExecutionException { 278 279 try { 280 // Make sure we catch any exceptions from parallel phase 281 try { 282 for (final Future<?> future : futures) { 283 future.get(); 284 } 285 } finally { 286 es.shutdown(); 287 } 288 289 es.awaitTermination(1000 * 60L, TimeUnit.SECONDS); // == Infinity. We really *must* wait for this to complete 290 291 // It is important that all threads terminate before we go on, ensure happens-before relationship 292 compressionDoneAt = System.currentTimeMillis(); 293 294 for (final Future<? extends ScatterZipOutputStream> future : futures) { 295 final ScatterZipOutputStream scatterStream = future.get(); 296 scatterStream.zipEntryWriter().writeNextZipEntry(targetStream); 297 } 298 299 for (final ScatterZipOutputStream scatterStream : streams) { 300 scatterStream.close(); 301 } 302 303 scatterDoneAt = System.currentTimeMillis(); 304 } finally { 305 closeAll(); 306 } 307 } 308 309 /** 310 * Returns a message describing the overall statistics of the compression run 311 * 312 * @return A string 313 */ 314 public ScatterStatistics getStatisticsMessage() { 315 return new ScatterStatistics(compressionDoneAt - startedAt, scatterDoneAt - compressionDoneAt); 316 } 317 318 private void closeAll() { 319 for (final ScatterZipOutputStream scatterStream : streams) { 320 try { 321 scatterStream.close(); 322 } catch (final IOException ex) { //NOSONAR 323 // no way to properly log this 324 } 325 } 326 } 327} 328