001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017package org.apache.commons.compress.harmony.pack200;
018
019import java.io.IOException;
020import java.io.OutputStream;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.HashMap;
024import java.util.List;
025import java.util.Map;
026import java.util.function.Function;
027import java.util.stream.Collectors;
028import java.util.stream.IntStream;
029
030/**
031 * Abstract superclass for a set of bands
032 */
033public abstract class BandSet {
034
035    protected final SegmentHeader segmentHeader;
036    final int effort;
037
038    // Minimum size of band for each effort level where we consider alternative codecs
039    // Note: these values have been tuned - please test carefully if changing them
040    private static final int[] effortThresholds = {0, 0, 1000, 500, 100, 100, 100, 100, 100, 0};
041
042    private long[] canonicalLargest;
043    private long[] canonicalSmallest;
044
045    /**
046     * Create a new BandSet
047     *
048     * @param effort - the packing effort to be used (must be 1-9)
049     * @param header - the segment header
050     */
051    public BandSet(final int effort, final SegmentHeader header) {
052        this.effort = effort;
053        this.segmentHeader = header;
054    }
055
056    /**
057     * Write the packed set of bands to the given output stream
058     *
059     * @param out TODO
060     * @throws IOException If an I/O error occurs.
061     * @throws Pack200Exception TODO
062     */
063    public abstract void pack(OutputStream out) throws IOException, Pack200Exception;
064
065    /**
066     * Encode a band without considering other Codecs
067     *
068     * @param band - the band
069     * @param codec - the Codec to use
070     * @return the encoded band
071     * @throws Pack200Exception TODO
072     */
073    public byte[] encodeScalar(final int[] band, final BHSDCodec codec) throws Pack200Exception {
074        return codec.encode(band);
075    }
076
077    /**
078     * Encode a single value with the given Codec
079     *
080     * @param value - the value to encode
081     * @param codec - Codec to use
082     * @return the encoded value
083     * @throws Pack200Exception TODO
084     */
085    public byte[] encodeScalar(final int value, final BHSDCodec codec) throws Pack200Exception {
086        return codec.encode(value);
087    }
088
089    /**
090     * Encode a band of integers. The default codec may be used, but other Codecs are considered if effort is greater
091     * than 1.
092     *
093     * @param name - name of the band (used for debugging)
094     * @param ints - the band
095     * @param defaultCodec - the default Codec
096     * @return the encoded band
097     * @throws Pack200Exception TODO
098     */
099    public byte[] encodeBandInt(final String name, final int[] ints, final BHSDCodec defaultCodec)
100        throws Pack200Exception {
101        byte[] encodedBand = null;
102        // Useful for debugging
103//        if(ints.length > 0) {
104//            System.out.println("encoding " + name + " " + ints.length);
105//        }
106        if (effort > 1 && (ints.length >= effortThresholds[effort])) {
107            final BandAnalysisResults results = analyseBand(name, ints, defaultCodec);
108            final Codec betterCodec = results.betterCodec;
109            encodedBand = results.encodedBand;
110            if (betterCodec != null) {
111                if (betterCodec instanceof BHSDCodec) {
112                    final int[] specifierBand = CodecEncoding.getSpecifier(betterCodec, defaultCodec);
113                    int specifier = specifierBand[0];
114                    if (specifierBand.length > 1) {
115                        for (int i = 1; i < specifierBand.length; i++) {
116                            segmentHeader.appendBandCodingSpecifier(specifierBand[i]);
117                        }
118                    }
119                    if (defaultCodec.isSigned()) {
120                        specifier = -1 - specifier;
121                    } else {
122                        specifier = specifier + defaultCodec.getL();
123                    }
124                    final byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier});
125                    final byte[] band = new byte[specifierEncoded.length + encodedBand.length];
126                    System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length);
127                    System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length);
128                    return band;
129                }
130                if (betterCodec instanceof PopulationCodec) {
131                    IntStream.of(results.extraMetadata).forEach(segmentHeader::appendBandCodingSpecifier);
132                    return encodedBand;
133                }
134                if (betterCodec instanceof RunCodec) {
135
136                }
137            }
138        }
139
140        // If we get here then we've decided to use the default codec.
141        if (ints.length > 0) {
142            if (encodedBand == null) {
143                encodedBand = defaultCodec.encode(ints);
144            }
145            final int first = ints[0];
146            if (defaultCodec.getB() != 1) {
147                if (defaultCodec.isSigned() && first >= -256 && first <= -1) {
148                    final int specifier = -1 - CodecEncoding.getSpecifierForDefaultCodec(defaultCodec);
149                    final byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier});
150                    final byte[] band = new byte[specifierEncoded.length + encodedBand.length];
151                    System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length);
152                    System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length);
153                    return band;
154                }
155                if (!defaultCodec.isSigned() && first >= defaultCodec.getL() && first <= defaultCodec.getL() + 255) {
156                    final int specifier = CodecEncoding.getSpecifierForDefaultCodec(defaultCodec) + defaultCodec.getL();
157                    final byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier});
158                    final byte[] band = new byte[specifierEncoded.length + encodedBand.length];
159                    System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length);
160                    System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length);
161                    return band;
162                }
163            }
164            return encodedBand;
165        }
166        return new byte[0];
167    }
168
169    private BandAnalysisResults analyseBand(final String name, final int[] band, final BHSDCodec defaultCodec)
170        throws Pack200Exception {
171
172        final BandAnalysisResults results = new BandAnalysisResults();
173
174        if (canonicalLargest == null) {
175            canonicalLargest = new long[116];
176            canonicalSmallest = new long[116];
177            for (int i = 1; i < canonicalLargest.length; i++) {
178                canonicalLargest[i] = CodecEncoding.getCanonicalCodec(i).largest();
179                canonicalSmallest[i] = CodecEncoding.getCanonicalCodec(i).smallest();
180            }
181        }
182        final BandData bandData = new BandData(band);
183
184        // Check that there is a reasonable saving to be made
185        final byte[] encoded = defaultCodec.encode(band);
186        results.encodedBand = encoded;
187
188        // Note: these values have been tuned - please test carefully if changing them
189        if (encoded.length <= band.length + 23 - 2 * effort) { // TODO: tweak
190            return results;
191        }
192
193        // Check if we can use BYTE1 as that's a 1:1 mapping if we can
194        if (!bandData.anyNegatives() && bandData.largest <= Codec.BYTE1.largest()) {
195            results.encodedBand = Codec.BYTE1.encode(band);
196            results.betterCodec = Codec.BYTE1;
197            return results;
198        }
199
200        // Consider a population codec (but can't be nested)
201        if (effort > 3 && !name.equals("POPULATION")) {
202            final int numDistinctValues = bandData.numDistinctValues();
203            final float distinctValuesAsProportion = (float) numDistinctValues / (float) band.length;
204
205            // Note: these values have been tuned - please test carefully if changing them
206            if (numDistinctValues < 100 || distinctValuesAsProportion < 0.02
207                || (effort > 6 && distinctValuesAsProportion < 0.04)) { // TODO: tweak
208                encodeWithPopulationCodec(name, band, defaultCodec, bandData, results);
209                if (timeToStop(results)) {
210                    return results;
211                }
212            }
213        }
214
215        final List<BHSDCodec[]> codecFamiliesToTry = new ArrayList<>();
216
217        // See if the deltas are mainly small increments
218        if (bandData.mainlyPositiveDeltas() && bandData.mainlySmallDeltas()) {
219            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs2);
220        }
221
222        if (bandData.wellCorrelated()) { // Try delta encodings
223            if (bandData.mainlyPositiveDeltas()) {
224                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1);
225                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3);
226                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4);
227                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5);
228                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1);
229                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3);
230                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4);
231                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5);
232                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2);
233            } else {
234                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1);
235                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3);
236                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2);
237                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4);
238                codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5);
239                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1);
240                codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2);
241            }
242        } else if (bandData.anyNegatives()) {
243            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1);
244            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2);
245            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1);
246            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2);
247            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3);
248            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4);
249            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5);
250        } else {
251            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1);
252            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3);
253            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4);
254            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5);
255            codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2);
256            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1);
257            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3);
258            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4);
259            codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5);
260        }
261        if (name.equalsIgnoreCase("cpint")) {
262            System.out.print("");
263        }
264
265        for (BHSDCodec[] family : codecFamiliesToTry) {
266            tryCodecs(name, band, defaultCodec, bandData, results, encoded, family);
267            if (timeToStop(results)) {
268                break;
269            }
270        }
271
272        return results;
273    }
274
275    private boolean timeToStop(final BandAnalysisResults results) {
276        // if tried more than effort number of codecs for this band then return
277        // Note: these values have been tuned - please test carefully if changing them
278        if (effort > 6) {
279            return results.numCodecsTried >= effort * 2;
280        }
281        return results.numCodecsTried >= effort;
282        // May want to also check how much we've saved if performance needs improving, e.g. saved more than effort*2 %
283        // || (float)results.saved/(float)results.encodedBand.length > (float)effort * 2/100;
284    }
285
286    private void tryCodecs(final String name, final int[] band, final BHSDCodec defaultCodec, final BandData bandData,
287        final BandAnalysisResults results, final byte[] encoded, final BHSDCodec[] potentialCodecs)
288        throws Pack200Exception {
289        for (final BHSDCodec potential : potentialCodecs) {
290            if (potential.equals(defaultCodec)) {
291                return; // don't try codecs with greater cardinality in the same 'family' as the default codec as there
292                        // won't be any savings
293            }
294            if (potential.isDelta()) {
295                if (potential.largest() >= bandData.largestDelta && potential.smallest() <= bandData.smallestDelta
296                    && potential.largest() >= bandData.largest && potential.smallest() <= bandData.smallest) {
297                    // TODO: can represent some negative deltas with overflow
298                    final byte[] encoded2 = potential.encode(band);
299                    results.numCodecsTried++;
300                    final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null));
301                    final int saved = encoded.length - encoded2.length - specifierEncoded.length;
302                    if (saved > results.saved) {
303                        results.betterCodec = potential;
304                        results.encodedBand = encoded2;
305                        results.saved = saved;
306                    }
307                }
308            } else if (potential.largest() >= bandData.largest && potential.smallest() <= bandData.smallest) {
309                final byte[] encoded2 = potential.encode(band);
310                results.numCodecsTried++;
311                final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null));
312                final int saved = encoded.length - encoded2.length - specifierEncoded.length;
313                if (saved > results.saved) {
314                    results.betterCodec = potential;
315                    results.encodedBand = encoded2;
316                    results.saved = saved;
317                }
318            }
319            if (timeToStop(results)) {
320                return;
321            }
322        }
323    }
324
325// This could be useful if further enhancements are done but is not currently used
326//
327//    private void encodeWithRunCodec(String name, int[] band, int index,
328//            BHSDCodec defaultCodec, BandData bandData,
329//            BandAnalysisResults results) throws Pack200Exception {
330//        int[] firstBand = new int[index];
331//        int[] secondBand = new int[band.length - index];
332//        System.arraycopy(band, 0, firstBand, 0, index);
333//        System.arraycopy(band, index, secondBand, 0, secondBand.length);
334//        BandAnalysisResults firstResults = analyseBand(name + "A", firstBand, defaultCodec);
335//        BandAnalysisResults secondResults = analyseBand(name + "B", secondBand, defaultCodec);
336//        int specifier = 117;
337//        byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier});
338//        int totalLength = firstResults.encodedBand.length + secondResults.encodedBand.length + specifierEncoded.length + 4; // TODO actual
339//        if(totalLength < results.encodedBand.length) {
340//            System.out.println("using run codec");
341//            results.saved += results.encodedBand.length - totalLength;
342//            byte[] encodedBand = new byte[specifierEncoded.length + firstResults.encodedBand.length + secondResults.encodedBand.length];
343//            System.arraycopy(specifierEncoded, 0, encodedBand, 0, specifierEncoded.length);
344//            System.arraycopy(firstResults.encodedBand, 0, encodedBand, specifierEncoded.length, firstResults.encodedBand.length);
345//            System.arraycopy(secondResults.encodedBand, 0, encodedBand, specifierEncoded.length + firstResults.encodedBand.length, secondResults.encodedBand.length);
346//            results.encodedBand = encodedBand;
347//            results.betterCodec = new RunCodec(index, firstResults.betterCodec, secondResults.betterCodec);
348//        }
349//    }
350
351    private void encodeWithPopulationCodec(final String name, final int[] band, final BHSDCodec defaultCodec,
352        final BandData bandData, final BandAnalysisResults results) throws Pack200Exception {
353        results.numCodecsTried += 3; // quite a bit more effort to try this codec
354        final Map<Integer, Integer> distinctValues = bandData.distinctValues;
355
356        final List<Integer> favored = new ArrayList<>();
357        distinctValues.forEach((k, v) -> {
358            if (v.intValue() > 2 || distinctValues.size() < 256) { // TODO: tweak
359                favored.add(k);
360            }
361        });
362
363        // Sort the favored list with the most commonly occurring first
364        if (distinctValues.size() > 255) {
365            favored.sort((arg0, arg1) -> distinctValues.get(arg1).compareTo(distinctValues.get(arg0)));
366        }
367
368        final Map<Integer, Integer> favoredToIndex = new HashMap<>();
369        for (int i = 0; i < favored.size(); i++) {
370            favoredToIndex.put(favored.get(i), Integer.valueOf(i));
371        }
372
373        final IntList unfavoured = new IntList();
374        final int[] tokens = new int[band.length];
375        for (int i = 0; i < band.length; i++) {
376            final Integer favouredIndex = favoredToIndex.get(Integer.valueOf(band[i]));
377            if (favouredIndex == null) {
378                tokens[i] = 0;
379                unfavoured.add(band[i]);
380            } else {
381                tokens[i] = favouredIndex.intValue() + 1;
382            }
383        }
384        favored.add(favored.get(favored.size() - 1)); // repeat last value
385        final int[] favouredBand = integerListToArray(favored);
386        final int[] unfavouredBand = unfavoured.toArray();
387
388        // Analyse the three bands to get the best codec
389        final BandAnalysisResults favouredResults = analyseBand("POPULATION", favouredBand, defaultCodec);
390        final BandAnalysisResults unfavouredResults = analyseBand("POPULATION", unfavouredBand, defaultCodec);
391
392        int tdefL = 0;
393        int l = 0;
394        Codec tokenCodec = null;
395        byte[] tokensEncoded;
396        final int k = favored.size() - 1;
397        if (k < 256) {
398            tdefL = 1;
399            tokensEncoded = Codec.BYTE1.encode(tokens);
400        } else {
401            final BandAnalysisResults tokenResults = analyseBand("POPULATION", tokens, defaultCodec);
402            tokenCodec = tokenResults.betterCodec;
403            tokensEncoded = tokenResults.encodedBand;
404            if (tokenCodec == null) {
405                tokenCodec = defaultCodec;
406            }
407            l = ((BHSDCodec) tokenCodec).getL();
408            final int h = ((BHSDCodec) tokenCodec).getH();
409            final int s = ((BHSDCodec) tokenCodec).getS();
410            final int b = ((BHSDCodec) tokenCodec).getB();
411            final int d = ((BHSDCodec) tokenCodec).isDelta() ? 1 : 0;
412            if (s == 0 && d == 0) {
413                boolean canUseTDefL = true;
414                if (b > 1) {
415                    final BHSDCodec oneLowerB = new BHSDCodec(b - 1, h);
416                    if (oneLowerB.largest() >= k) {
417                        canUseTDefL = false;
418                    }
419                }
420                if (canUseTDefL) {
421                    switch (l) {
422                    case 4:
423                        tdefL = 1;
424                        break;
425                    case 8:
426                        tdefL = 2;
427                        break;
428                    case 16:
429                        tdefL = 3;
430                        break;
431                    case 32:
432                        tdefL = 4;
433                        break;
434                    case 64:
435                        tdefL = 5;
436                        break;
437                    case 128:
438                        tdefL = 6;
439                        break;
440                    case 192:
441                        tdefL = 7;
442                        break;
443                    case 224:
444                        tdefL = 8;
445                        break;
446                    case 240:
447                        tdefL = 9;
448                        break;
449                    case 248:
450                        tdefL = 10;
451                        break;
452                    case 252:
453                        tdefL = 11;
454                        break;
455                    }
456                }
457            }
458        }
459
460        final byte[] favouredEncoded = favouredResults.encodedBand;
461        final byte[] unfavouredEncoded = unfavouredResults.encodedBand;
462        final Codec favouredCodec = favouredResults.betterCodec;
463        final Codec unfavouredCodec = unfavouredResults.betterCodec;
464
465        int specifier = 141 + (favouredCodec == null ? 1 : 0) + (4 * tdefL) + (unfavouredCodec == null ? 2 : 0);
466        final IntList extraBandMetadata = new IntList(3);
467        if (favouredCodec != null) {
468            IntStream.of(CodecEncoding.getSpecifier(favouredCodec, null)).forEach(extraBandMetadata::add);
469        }
470        if (tdefL == 0) {
471            IntStream.of(CodecEncoding.getSpecifier(tokenCodec, null)).forEach(extraBandMetadata::add);
472        }
473        if (unfavouredCodec != null) {
474            IntStream.of(CodecEncoding.getSpecifier(unfavouredCodec, null)).forEach(extraBandMetadata::add);
475        }
476        final int[] extraMetadata = extraBandMetadata.toArray();
477        final byte[] extraMetadataEncoded = Codec.UNSIGNED5.encode(extraMetadata);
478        if (defaultCodec.isSigned()) {
479            specifier = -1 - specifier;
480        } else {
481            specifier = specifier + defaultCodec.getL();
482        }
483        final byte[] firstValueEncoded = defaultCodec.encode(new int[] {specifier});
484        final int totalBandLength = firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length
485            + unfavouredEncoded.length;
486
487        if (totalBandLength + extraMetadataEncoded.length < results.encodedBand.length) {
488            results.saved += results.encodedBand.length - (totalBandLength + extraMetadataEncoded.length);
489            final byte[] encodedBand = new byte[totalBandLength];
490            System.arraycopy(firstValueEncoded, 0, encodedBand, 0, firstValueEncoded.length);
491            System.arraycopy(favouredEncoded, 0, encodedBand, firstValueEncoded.length, favouredEncoded.length);
492            System.arraycopy(tokensEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length,
493                tokensEncoded.length);
494            System.arraycopy(unfavouredEncoded, 0, encodedBand,
495                firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length, unfavouredEncoded.length);
496            results.encodedBand = encodedBand;
497            results.extraMetadata = extraMetadata;
498            if (l != 0) {
499                results.betterCodec = new PopulationCodec(favouredCodec, l, unfavouredCodec);
500            } else {
501                results.betterCodec = new PopulationCodec(favouredCodec, tokenCodec, unfavouredCodec);
502            }
503        }
504    }
505
506    /**
507     * Encode a band of longs (values are split into their high and low 32 bits and then encoded as two separate bands
508     *
509     * @param name - name of the band (for debugging purposes)
510     * @param flags - the band
511     * @param loCodec - Codec for the low 32-bits band
512     * @param hiCodec - Codec for the high 32-bits band
513     * @param haveHiFlags - ignores the high band if true as all values would be zero
514     * @return the encoded band
515     * @throws Pack200Exception TODO
516     */
517    protected byte[] encodeFlags(final String name, final long[] flags, final BHSDCodec loCodec,
518        final BHSDCodec hiCodec, final boolean haveHiFlags) throws Pack200Exception {
519        if (!haveHiFlags) {
520            final int[] loBits = new int[flags.length];
521            Arrays.setAll(loBits, i -> (int) flags[i]);
522            return encodeBandInt(name, loBits, loCodec);
523        }
524        final int[] hiBits = new int[flags.length];
525        final int[] loBits = new int[flags.length];
526        for (int i = 0; i < flags.length; i++) {
527            final long l = flags[i];
528            hiBits[i] = (int) (l >> 32);
529            loBits[i] = (int) l;
530        }
531        final byte[] hi = encodeBandInt(name, hiBits, hiCodec);
532        final byte[] lo = encodeBandInt(name, loBits, loCodec);
533        final byte[] total = new byte[hi.length + lo.length];
534        System.arraycopy(hi, 0, total, 0, hi.length);
535        System.arraycopy(lo, 0, total, hi.length + 1, lo.length);
536        return total;
537    }
538
539    /**
540     * Converts a list of Integers to an int[] array.
541     *
542     * @param integerList conversion source.
543     * @return conversion result.
544     */
545    protected int[] integerListToArray(final List<Integer> integerList) {
546        return integerList.stream().mapToInt(Integer::intValue).toArray();
547    }
548
549    /**
550     * Converts a list of Longs to an long[] array.
551     *
552     * @param longList conversion source.
553     * @return conversion result.
554     */
555    protected long[] longListToArray(final List<Long> longList) {
556        return longList.stream().mapToLong(Long::longValue).toArray();
557    }
558
559    /**
560     * Converts a list of ConstantPoolEntrys to an int[] array of their indices
561     *
562     * @param list conversion source.
563     * @return conversion result.
564     */
565    protected int[] cpEntryListToArray(final List<? extends ConstantPoolEntry> list) {
566        final int[] array = new int[list.size()];
567        for (int i = 0; i < array.length; i++) {
568            array[i] = list.get(i).getIndex();
569            if (array[i] < 0) {
570                throw new IllegalArgumentException("Index should be > 0");
571            }
572        }
573        return array;
574    }
575
576    /**
577     * Converts a list of ConstantPoolEntrys or nulls to an int[] array of their indices +1 (or 0 for nulls)
578     *
579     * @param list conversion source.
580     * @return conversion result.
581     */
582    protected int[] cpEntryOrNullListToArray(final List<? extends ConstantPoolEntry> list) {
583        final int[] array = new int[list.size()];
584        for (int j = 0; j < array.length; j++) {
585            final ConstantPoolEntry cpEntry = list.get(j);
586            array[j] = cpEntry == null ? 0 : cpEntry.getIndex() + 1;
587            if (cpEntry != null && cpEntry.getIndex() < 0) {
588                throw new IllegalArgumentException("Index should be > 0");
589            }
590        }
591        return array;
592    }
593
594    protected byte[] encodeFlags(final String name, final long[][] flags, final BHSDCodec loCodec,
595        final BHSDCodec hiCodec, final boolean haveHiFlags) throws Pack200Exception {
596        return encodeFlags(name, flatten(flags), loCodec, hiCodec, haveHiFlags);
597    }
598
599    /*
600     * Flatten a 2-dimension array into a 1-dimension array
601     */
602    private long[] flatten(final long[][] flags) {
603        int totalSize = 0;
604        for (long[] flag : flags) {
605            totalSize += flag.length;
606        }
607        final long[] flatArray = new long[totalSize];
608        int index = 0;
609        for (long[] flag : flags) {
610            for (long element : flag) {
611                flatArray[index] = element;
612                index++;
613            }
614        }
615        return flatArray;
616    }
617
618    /**
619     * BandData represents information about a band, e.g. largest value etc and is used in the heuristics that calculate
620     * whether an alternative Codec could make the encoded band smaller.
621     */
622    public class BandData {
623
624        private final int[] band;
625        private int smallest = Integer.MAX_VALUE;
626        private int largest = Integer.MIN_VALUE;
627        private int smallestDelta;
628        private int largestDelta;
629
630        private int deltaIsAscending = 0;
631        private int smallDeltaCount = 0;
632
633        private double averageAbsoluteDelta = 0;
634        private double averageAbsoluteValue = 0;
635
636        private Map<Integer, Integer> distinctValues;
637
638        /**
639         * Create a new instance of BandData. The band is then analysed.
640         *
641         * @param band - the band of integers
642         */
643        public BandData(final int[] band) {
644            this.band = band;
645            final Integer one = Integer.valueOf(1);
646            for (int i = 0; i < band.length; i++) {
647                if (band[i] < smallest) {
648                    smallest = band[i];
649                }
650                if (band[i] > largest) {
651                    largest = band[i];
652                }
653                if (i != 0) {
654                    final int delta = band[i] - band[i - 1];
655                    if (delta < smallestDelta) {
656                        smallestDelta = delta;
657                    }
658                    if (delta > largestDelta) {
659                        largestDelta = delta;
660                    }
661                    if (delta >= 0) {
662                        deltaIsAscending++;
663                    }
664                    averageAbsoluteDelta += (double) Math.abs(delta) / (double) (band.length - 1);
665                    if (Math.abs(delta) < 256) {
666                        smallDeltaCount++;
667                    }
668                } else {
669                    smallestDelta = band[0];
670                    largestDelta = band[0];
671                }
672                averageAbsoluteValue += (double) Math.abs(band[i]) / (double) band.length;
673                if (effort > 3) { // do calculations needed to consider population codec
674                    if (distinctValues == null) {
675                        distinctValues = new HashMap<>();
676                    }
677                    final Integer value = Integer.valueOf(band[i]);
678                    Integer count = distinctValues.get(value);
679                    if (count == null) {
680                        count = one;
681                    } else {
682                        count = Integer.valueOf(count.intValue() + 1);
683                    }
684                    distinctValues.put(value, count);
685                }
686            }
687        }
688
689        /**
690         * Returns true if the deltas between adjacent band elements are mainly small (heuristic).
691         *
692         * @return true if the deltas between adjacent band elements are mainly small (heuristic).
693         */
694        public boolean mainlySmallDeltas() {
695            // Note: the value below has been tuned - please test carefully if changing it
696            return (float) smallDeltaCount / (float) band.length > 0.7F;
697        }
698
699        /**
700         * Returns true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic).
701         *
702         * @return true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic).
703         */
704        public boolean wellCorrelated() {
705            // Note: the value below has been tuned - please test carefully if changing it
706            return averageAbsoluteDelta * 3.1 < averageAbsoluteValue;
707        }
708
709        /**
710         * Returns true if the band deltas are mainly positive (heuristic).
711         *
712         * @return true if the band deltas are mainly positive (heuristic).
713         */
714        public boolean mainlyPositiveDeltas() {
715            // Note: the value below has been tuned - please test carefully if changing it
716            return (float) deltaIsAscending / (float) band.length > 0.95F;
717        }
718
719        /**
720         * Returns true if any band elements are negative.
721         *
722         * @return true if any band elements are negative.
723         */
724        public boolean anyNegatives() {
725            return smallest < 0;
726        }
727
728        /**
729         * Returns the total number of distinct values found in the band.
730         *
731         * @return the total number of distinct values found in the band.
732         */
733        public int numDistinctValues() {
734            if (distinctValues == null) {
735                return band.length;
736            }
737            return distinctValues.size();
738        }
739
740    }
741
742    /**
743     * Results obtained by trying different Codecs to encode a band
744     */
745    public class BandAnalysisResults {
746
747        // The number of Codecs tried so far
748        private int numCodecsTried = 0;
749
750        // The number of bytes saved by using betterCodec instead of the default codec
751        private int saved = 0;
752
753        // Extra metadata to pass to the segment header (to be appended to the
754        // band_headers band)
755        private int[] extraMetadata;
756
757        // The results of encoding the band with betterCodec
758        private byte[] encodedBand;
759
760        // The best Codec found so far, or should be null if the default is the
761        // best so far
762        private Codec betterCodec;
763
764    }
765
766}