001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.zip;
019
020import static org.apache.commons.compress.archivers.zip.ZipArchiveEntryRequest.createZipArchiveEntryRequest;
021
022import java.io.IOException;
023import java.io.UncheckedIOException;
024import java.nio.file.Files;
025import java.nio.file.Path;
026import java.util.Deque;
027import java.util.concurrent.Callable;
028import java.util.concurrent.ConcurrentLinkedDeque;
029import java.util.concurrent.ExecutionException;
030import java.util.concurrent.ExecutorService;
031import java.util.concurrent.Executors;
032import java.util.concurrent.Future;
033import java.util.concurrent.TimeUnit;
034import java.util.concurrent.atomic.AtomicInteger;
035import java.util.zip.Deflater;
036
037import org.apache.commons.compress.parallel.FileBasedScatterGatherBackingStore;
038import org.apache.commons.compress.parallel.InputStreamSupplier;
039import org.apache.commons.compress.parallel.ScatterGatherBackingStore;
040import org.apache.commons.compress.parallel.ScatterGatherBackingStoreSupplier;
041
042/**
043 * Creates a zip in parallel by using multiple threadlocal {@link ScatterZipOutputStream} instances.
044 * <p>
045 * Note that until 1.18, this class generally made no guarantees about the order of things written to
046 * the output file. Things that needed to come in a specific order (manifests, directories)
047 * had to be handled by the client of this class, usually by writing these things to the
048 * {@link ZipArchiveOutputStream} <em>before</em> calling {@link #writeTo writeTo} on this class.</p>
049 * <p>
050 * The client can supply an {@link java.util.concurrent.ExecutorService}, but for reasons of
051 * memory model consistency, this will be shut down by this class prior to completion.
052 * </p>
053 * @since 1.10
054 */
055public class ParallelScatterZipCreator {
056    private final Deque<ScatterZipOutputStream> streams = new ConcurrentLinkedDeque<>();
057    private final ExecutorService es;
058    private final ScatterGatherBackingStoreSupplier backingStoreSupplier;
059    private final Deque<Future<? extends ScatterZipOutputStream>> futures = new ConcurrentLinkedDeque<>();
060
061    private final long startedAt = System.currentTimeMillis();
062    private long compressionDoneAt;
063    private long scatterDoneAt;
064    private final int compressionLevel;
065
066    private static class DefaultBackingStoreSupplier implements ScatterGatherBackingStoreSupplier {
067        final AtomicInteger storeNum = new AtomicInteger(0);
068
069        @Override
070        public ScatterGatherBackingStore get() throws IOException {
071            final Path tempFile = Files.createTempFile("parallelscatter", "n" + storeNum.incrementAndGet());
072            return new FileBasedScatterGatherBackingStore(tempFile);
073        }
074    }
075
076    private ScatterZipOutputStream createDeferred(final ScatterGatherBackingStoreSupplier scatterGatherBackingStoreSupplier)
077            throws IOException {
078        final ScatterGatherBackingStore bs = scatterGatherBackingStoreSupplier.get();
079        // lifecycle is bound to the ScatterZipOutputStream returned
080        final StreamCompressor sc = StreamCompressor.create(compressionLevel, bs); //NOSONAR
081        return new ScatterZipOutputStream(bs, sc);
082    }
083
084    private final ThreadLocal<ScatterZipOutputStream> tlScatterStreams = new ThreadLocal<ScatterZipOutputStream>() {
085        @Override
086        protected ScatterZipOutputStream initialValue() {
087            try {
088                final ScatterZipOutputStream scatterStream = createDeferred(backingStoreSupplier);
089                streams.add(scatterStream);
090                return scatterStream;
091            } catch (final IOException e) {
092                throw new UncheckedIOException(e); //NOSONAR
093            }
094        }
095    };
096
097    /**
098     * Create a ParallelScatterZipCreator with default threads, which is set to the number of available
099     * processors, as defined by {@link java.lang.Runtime#availableProcessors}
100     */
101    public ParallelScatterZipCreator() {
102        this(Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()));
103    }
104
105    /**
106     * Create a ParallelScatterZipCreator
107     *
108     * @param executorService The executorService to use for parallel scheduling. For technical reasons,
109     *                        this will be shut down by this class.
110     */
111    public ParallelScatterZipCreator(final ExecutorService executorService) {
112        this(executorService, new DefaultBackingStoreSupplier());
113    }
114
115    /**
116     * Create a ParallelScatterZipCreator
117     *
118     * @param executorService The executorService to use. For technical reasons, this will be shut down
119     *                        by this class.
120     * @param backingStoreSupplier The supplier of backing store which shall be used
121     */
122    public ParallelScatterZipCreator(final ExecutorService executorService,
123                                     final ScatterGatherBackingStoreSupplier backingStoreSupplier) {
124        this(executorService, backingStoreSupplier, Deflater.DEFAULT_COMPRESSION);
125    }
126
127    /**
128     * Create a ParallelScatterZipCreator
129     *
130     * @param executorService      The executorService to use. For technical reasons, this will be shut down
131     *                             by this class.
132     * @param backingStoreSupplier The supplier of backing store which shall be used
133     * @param compressionLevel     The compression level used in compression, this value should be
134     *                             -1(default level) or between 0~9.
135     * @throws IllegalArgumentException if the compression level is illegal
136     * @since 1.21
137     */
138    public ParallelScatterZipCreator(final ExecutorService executorService,
139                                     final ScatterGatherBackingStoreSupplier backingStoreSupplier,
140                                     final int compressionLevel) throws IllegalArgumentException {
141        if ((compressionLevel < Deflater.NO_COMPRESSION || compressionLevel > Deflater.BEST_COMPRESSION)
142                && compressionLevel != Deflater.DEFAULT_COMPRESSION) {
143            throw new IllegalArgumentException("Compression level is expected between -1~9");
144        }
145
146        this.backingStoreSupplier = backingStoreSupplier;
147        es = executorService;
148        this.compressionLevel = compressionLevel;
149    }
150
151    /**
152     * Adds an archive entry to this archive.
153     * <p>
154     * This method is expected to be called from a single client thread
155     * </p>
156     *
157     * @param zipArchiveEntry The entry to add.
158     * @param source          The source input stream supplier
159     */
160
161    public void addArchiveEntry(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier source) {
162        submitStreamAwareCallable(createCallable(zipArchiveEntry, source));
163    }
164
165    /**
166     * Adds an archive entry to this archive.
167     * <p>
168     * This method is expected to be called from a single client thread
169     * </p>
170     *
171     * @param zipArchiveEntryRequestSupplier Should supply the entry to be added.
172     * @since 1.13
173     */
174    public void addArchiveEntry(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) {
175        submitStreamAwareCallable(createCallable(zipArchiveEntryRequestSupplier));
176    }
177
178    /**
179     * Submit a callable for compression.
180     *
181     * @see ParallelScatterZipCreator#createCallable for details of if/when to use this.
182     *
183     * @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller.
184     */
185    public final void submit(final Callable<? extends Object> callable) {
186        submitStreamAwareCallable(() -> {
187            callable.call();
188            return tlScatterStreams.get();
189        });
190    }
191
192    /**
193     * Submit a callable for compression.
194     *
195     * @see ParallelScatterZipCreator#createCallable for details of if/when to use this.
196     *
197     * @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller.
198     * @since 1.19
199     */
200    public final void submitStreamAwareCallable(final Callable<? extends ScatterZipOutputStream> callable) {
201        futures.add(es.submit(callable));
202    }
203
204    /**
205     * Create a callable that will compress the given archive entry.
206     *
207     * <p>This method is expected to be called from a single client thread.</p>
208     *
209     * Consider using {@link #addArchiveEntry addArchiveEntry}, which wraps this method and {@link #submitStreamAwareCallable submitStreamAwareCallable}.
210     * The most common use case for using {@link #createCallable createCallable} and {@link #submitStreamAwareCallable submitStreamAwareCallable} from a
211     * client is if you want to wrap the callable in something that can be prioritized by the supplied
212     * {@link ExecutorService}, for instance to process large or slow files first.
213     * Since the creation of the {@link ExecutorService} is handled by the client, all of this is up to the client.
214     *
215     * @param zipArchiveEntry The entry to add.
216     * @param source          The source input stream supplier
217     * @return A callable that should subsequently passed to #submitStreamAwareCallable, possibly in a wrapped/adapted from. The
218     * value of this callable is not used, but any exceptions happening inside the compression
219     * will be propagated through the callable.
220     */
221
222    public final Callable<ScatterZipOutputStream> createCallable(final ZipArchiveEntry zipArchiveEntry,
223        final InputStreamSupplier source) {
224        final int method = zipArchiveEntry.getMethod();
225        if (method == ZipMethod.UNKNOWN_CODE) {
226            throw new IllegalArgumentException("Method must be set on zipArchiveEntry: " + zipArchiveEntry);
227        }
228        final ZipArchiveEntryRequest zipArchiveEntryRequest = createZipArchiveEntryRequest(zipArchiveEntry, source);
229        return () -> {
230            final ScatterZipOutputStream scatterStream = tlScatterStreams.get();
231            scatterStream.addArchiveEntry(zipArchiveEntryRequest);
232            return scatterStream;
233        };
234    }
235
236    /**
237     * Create a callable that will compress archive entry supplied by {@link ZipArchiveEntryRequestSupplier}.
238     *
239     * <p>This method is expected to be called from a single client thread.</p>
240     *
241     * The same as {@link #createCallable(ZipArchiveEntry, InputStreamSupplier)}, but the archive entry
242     * to be added is supplied by a {@link ZipArchiveEntryRequestSupplier}.
243     *
244     * @see #createCallable(ZipArchiveEntry, InputStreamSupplier)
245     *
246     * @param zipArchiveEntryRequestSupplier Should supply the entry to be added.
247     * @return A callable that should subsequently passed to #submitStreamAwareCallable, possibly in a wrapped/adapted from. The
248     * value of this callable is not used, but any exceptions happening inside the compression
249     * will be propagated through the callable.
250     * @since 1.13
251     */
252    public final Callable<ScatterZipOutputStream> createCallable(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) {
253        return () -> {
254            final ScatterZipOutputStream scatterStream = tlScatterStreams.get();
255            scatterStream.addArchiveEntry(zipArchiveEntryRequestSupplier.get());
256            return scatterStream;
257        };
258    }
259
260    /**
261     * Write the contents this to the target {@link ZipArchiveOutputStream}.
262     * <p>
263     * It may be beneficial to write things like directories and manifest files to the targetStream
264     * before calling this method.
265     * </p>
266     *
267     * <p>Calling this method will shut down the {@link ExecutorService} used by this class. If any of the {@link
268     * Callable}s {@link #submitStreamAwareCallable submit}ted to this instance throws an exception, the archive can not be created properly and
269     * this method will throw an exception.</p>
270     *
271     * @param targetStream The {@link ZipArchiveOutputStream} to receive the contents of the scatter streams
272     * @throws IOException          If writing fails
273     * @throws InterruptedException If we get interrupted
274     * @throws ExecutionException   If something happens in the parallel execution
275     */
276    public void writeTo(final ZipArchiveOutputStream targetStream)
277            throws IOException, InterruptedException, ExecutionException {
278
279        try {
280            // Make sure we catch any exceptions from parallel phase
281            try {
282                for (final Future<?> future : futures) {
283                    future.get();
284                }
285            } finally {
286                es.shutdown();
287            }
288
289            es.awaitTermination(1000 * 60L, TimeUnit.SECONDS);  // == Infinity. We really *must* wait for this to complete
290
291            // It is important that all threads terminate before we go on, ensure happens-before relationship
292            compressionDoneAt = System.currentTimeMillis();
293
294            for (final Future<? extends ScatterZipOutputStream> future : futures) {
295                final ScatterZipOutputStream scatterStream = future.get();
296                scatterStream.zipEntryWriter().writeNextZipEntry(targetStream);
297            }
298
299            for (final ScatterZipOutputStream scatterStream : streams) {
300                scatterStream.close();
301            }
302
303            scatterDoneAt = System.currentTimeMillis();
304        } finally {
305            closeAll();
306        }
307    }
308
309    /**
310     * Returns a message describing the overall statistics of the compression run
311     *
312     * @return A string
313     */
314    public ScatterStatistics getStatisticsMessage() {
315        return new ScatterStatistics(compressionDoneAt - startedAt, scatterDoneAt - compressionDoneAt);
316    }
317
318    private void closeAll() {
319        for (final ScatterZipOutputStream scatterStream : streams) {
320            try {
321                scatterStream.close();
322            } catch (final IOException ex) { //NOSONAR
323                // no way to properly log this
324            }
325        }
326    }
327}
328