001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.harmony.pack200; 018 019import java.io.IOException; 020import java.io.OutputStream; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Map; 026import java.util.function.Function; 027import java.util.stream.Collectors; 028import java.util.stream.IntStream; 029 030/** 031 * Abstract superclass for a set of bands 032 */ 033public abstract class BandSet { 034 035 protected final SegmentHeader segmentHeader; 036 final int effort; 037 038 // Minimum size of band for each effort level where we consider alternative codecs 039 // Note: these values have been tuned - please test carefully if changing them 040 private static final int[] effortThresholds = {0, 0, 1000, 500, 100, 100, 100, 100, 100, 0}; 041 042 private long[] canonicalLargest; 043 private long[] canonicalSmallest; 044 045 /** 046 * Create a new BandSet 047 * 048 * @param effort - the packing effort to be used (must be 1-9) 049 * @param header - the segment header 050 */ 051 public BandSet(final int effort, final SegmentHeader header) { 052 this.effort = effort; 053 this.segmentHeader = header; 054 } 055 056 /** 057 * Write the packed set of bands to the given output stream 058 * 059 * @param out TODO 060 * @throws IOException If an I/O error occurs. 061 * @throws Pack200Exception TODO 062 */ 063 public abstract void pack(OutputStream out) throws IOException, Pack200Exception; 064 065 /** 066 * Encode a band without considering other Codecs 067 * 068 * @param band - the band 069 * @param codec - the Codec to use 070 * @return the encoded band 071 * @throws Pack200Exception TODO 072 */ 073 public byte[] encodeScalar(final int[] band, final BHSDCodec codec) throws Pack200Exception { 074 return codec.encode(band); 075 } 076 077 /** 078 * Encode a single value with the given Codec 079 * 080 * @param value - the value to encode 081 * @param codec - Codec to use 082 * @return the encoded value 083 * @throws Pack200Exception TODO 084 */ 085 public byte[] encodeScalar(final int value, final BHSDCodec codec) throws Pack200Exception { 086 return codec.encode(value); 087 } 088 089 /** 090 * Encode a band of integers. The default codec may be used, but other Codecs are considered if effort is greater 091 * than 1. 092 * 093 * @param name - name of the band (used for debugging) 094 * @param ints - the band 095 * @param defaultCodec - the default Codec 096 * @return the encoded band 097 * @throws Pack200Exception TODO 098 */ 099 public byte[] encodeBandInt(final String name, final int[] ints, final BHSDCodec defaultCodec) 100 throws Pack200Exception { 101 byte[] encodedBand = null; 102 // Useful for debugging 103// if(ints.length > 0) { 104// System.out.println("encoding " + name + " " + ints.length); 105// } 106 if (effort > 1 && (ints.length >= effortThresholds[effort])) { 107 final BandAnalysisResults results = analyseBand(name, ints, defaultCodec); 108 final Codec betterCodec = results.betterCodec; 109 encodedBand = results.encodedBand; 110 if (betterCodec != null) { 111 if (betterCodec instanceof BHSDCodec) { 112 final int[] specifierBand = CodecEncoding.getSpecifier(betterCodec, defaultCodec); 113 int specifier = specifierBand[0]; 114 if (specifierBand.length > 1) { 115 for (int i = 1; i < specifierBand.length; i++) { 116 segmentHeader.appendBandCodingSpecifier(specifierBand[i]); 117 } 118 } 119 if (defaultCodec.isSigned()) { 120 specifier = -1 - specifier; 121 } else { 122 specifier = specifier + defaultCodec.getL(); 123 } 124 final byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier}); 125 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 126 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 127 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 128 return band; 129 } 130 if (betterCodec instanceof PopulationCodec) { 131 IntStream.of(results.extraMetadata).forEach(segmentHeader::appendBandCodingSpecifier); 132 return encodedBand; 133 } 134 if (betterCodec instanceof RunCodec) { 135 136 } 137 } 138 } 139 140 // If we get here then we've decided to use the default codec. 141 if (ints.length > 0) { 142 if (encodedBand == null) { 143 encodedBand = defaultCodec.encode(ints); 144 } 145 final int first = ints[0]; 146 if (defaultCodec.getB() != 1) { 147 if (defaultCodec.isSigned() && first >= -256 && first <= -1) { 148 final int specifier = -1 - CodecEncoding.getSpecifierForDefaultCodec(defaultCodec); 149 final byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier}); 150 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 151 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 152 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 153 return band; 154 } 155 if (!defaultCodec.isSigned() && first >= defaultCodec.getL() && first <= defaultCodec.getL() + 255) { 156 final int specifier = CodecEncoding.getSpecifierForDefaultCodec(defaultCodec) + defaultCodec.getL(); 157 final byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier}); 158 final byte[] band = new byte[specifierEncoded.length + encodedBand.length]; 159 System.arraycopy(specifierEncoded, 0, band, 0, specifierEncoded.length); 160 System.arraycopy(encodedBand, 0, band, specifierEncoded.length, encodedBand.length); 161 return band; 162 } 163 } 164 return encodedBand; 165 } 166 return new byte[0]; 167 } 168 169 private BandAnalysisResults analyseBand(final String name, final int[] band, final BHSDCodec defaultCodec) 170 throws Pack200Exception { 171 172 final BandAnalysisResults results = new BandAnalysisResults(); 173 174 if (canonicalLargest == null) { 175 canonicalLargest = new long[116]; 176 canonicalSmallest = new long[116]; 177 for (int i = 1; i < canonicalLargest.length; i++) { 178 canonicalLargest[i] = CodecEncoding.getCanonicalCodec(i).largest(); 179 canonicalSmallest[i] = CodecEncoding.getCanonicalCodec(i).smallest(); 180 } 181 } 182 final BandData bandData = new BandData(band); 183 184 // Check that there is a reasonable saving to be made 185 final byte[] encoded = defaultCodec.encode(band); 186 results.encodedBand = encoded; 187 188 // Note: these values have been tuned - please test carefully if changing them 189 if (encoded.length <= band.length + 23 - 2 * effort) { // TODO: tweak 190 return results; 191 } 192 193 // Check if we can use BYTE1 as that's a 1:1 mapping if we can 194 if (!bandData.anyNegatives() && bandData.largest <= Codec.BYTE1.largest()) { 195 results.encodedBand = Codec.BYTE1.encode(band); 196 results.betterCodec = Codec.BYTE1; 197 return results; 198 } 199 200 // Consider a population codec (but can't be nested) 201 if (effort > 3 && !name.equals("POPULATION")) { 202 final int numDistinctValues = bandData.numDistinctValues(); 203 final float distinctValuesAsProportion = (float) numDistinctValues / (float) band.length; 204 205 // Note: these values have been tuned - please test carefully if changing them 206 if (numDistinctValues < 100 || distinctValuesAsProportion < 0.02 207 || (effort > 6 && distinctValuesAsProportion < 0.04)) { // TODO: tweak 208 encodeWithPopulationCodec(name, band, defaultCodec, bandData, results); 209 if (timeToStop(results)) { 210 return results; 211 } 212 } 213 } 214 215 final List<BHSDCodec[]> codecFamiliesToTry = new ArrayList<>(); 216 217 // See if the deltas are mainly small increments 218 if (bandData.mainlyPositiveDeltas() && bandData.mainlySmallDeltas()) { 219 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs2); 220 } 221 222 if (bandData.wellCorrelated()) { // Try delta encodings 223 if (bandData.mainlyPositiveDeltas()) { 224 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1); 225 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3); 226 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4); 227 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5); 228 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1); 229 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3); 230 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4); 231 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5); 232 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2); 233 } else { 234 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1); 235 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3); 236 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2); 237 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4); 238 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5); 239 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1); 240 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2); 241 } 242 } else if (bandData.anyNegatives()) { 243 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs1); 244 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaSignedCodecs2); 245 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs1); 246 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs2); 247 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs3); 248 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs4); 249 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaSignedCodecs5); 250 } else { 251 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs1); 252 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs3); 253 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs4); 254 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs5); 255 codecFamiliesToTry.add(CanonicalCodecFamilies.nonDeltaUnsignedCodecs2); 256 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs1); 257 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs3); 258 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs4); 259 codecFamiliesToTry.add(CanonicalCodecFamilies.deltaUnsignedCodecs5); 260 } 261 if (name.equalsIgnoreCase("cpint")) { 262 System.out.print(""); 263 } 264 265 for (BHSDCodec[] family : codecFamiliesToTry) { 266 tryCodecs(name, band, defaultCodec, bandData, results, encoded, family); 267 if (timeToStop(results)) { 268 break; 269 } 270 } 271 272 return results; 273 } 274 275 private boolean timeToStop(final BandAnalysisResults results) { 276 // if tried more than effort number of codecs for this band then return 277 // Note: these values have been tuned - please test carefully if changing them 278 if (effort > 6) { 279 return results.numCodecsTried >= effort * 2; 280 } 281 return results.numCodecsTried >= effort; 282 // May want to also check how much we've saved if performance needs improving, e.g. saved more than effort*2 % 283 // || (float)results.saved/(float)results.encodedBand.length > (float)effort * 2/100; 284 } 285 286 private void tryCodecs(final String name, final int[] band, final BHSDCodec defaultCodec, final BandData bandData, 287 final BandAnalysisResults results, final byte[] encoded, final BHSDCodec[] potentialCodecs) 288 throws Pack200Exception { 289 for (final BHSDCodec potential : potentialCodecs) { 290 if (potential.equals(defaultCodec)) { 291 return; // don't try codecs with greater cardinality in the same 'family' as the default codec as there 292 // won't be any savings 293 } 294 if (potential.isDelta()) { 295 if (potential.largest() >= bandData.largestDelta && potential.smallest() <= bandData.smallestDelta 296 && potential.largest() >= bandData.largest && potential.smallest() <= bandData.smallest) { 297 // TODO: can represent some negative deltas with overflow 298 final byte[] encoded2 = potential.encode(band); 299 results.numCodecsTried++; 300 final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null)); 301 final int saved = encoded.length - encoded2.length - specifierEncoded.length; 302 if (saved > results.saved) { 303 results.betterCodec = potential; 304 results.encodedBand = encoded2; 305 results.saved = saved; 306 } 307 } 308 } else if (potential.largest() >= bandData.largest && potential.smallest() <= bandData.smallest) { 309 final byte[] encoded2 = potential.encode(band); 310 results.numCodecsTried++; 311 final byte[] specifierEncoded = defaultCodec.encode(CodecEncoding.getSpecifier(potential, null)); 312 final int saved = encoded.length - encoded2.length - specifierEncoded.length; 313 if (saved > results.saved) { 314 results.betterCodec = potential; 315 results.encodedBand = encoded2; 316 results.saved = saved; 317 } 318 } 319 if (timeToStop(results)) { 320 return; 321 } 322 } 323 } 324 325// This could be useful if further enhancements are done but is not currently used 326// 327// private void encodeWithRunCodec(String name, int[] band, int index, 328// BHSDCodec defaultCodec, BandData bandData, 329// BandAnalysisResults results) throws Pack200Exception { 330// int[] firstBand = new int[index]; 331// int[] secondBand = new int[band.length - index]; 332// System.arraycopy(band, 0, firstBand, 0, index); 333// System.arraycopy(band, index, secondBand, 0, secondBand.length); 334// BandAnalysisResults firstResults = analyseBand(name + "A", firstBand, defaultCodec); 335// BandAnalysisResults secondResults = analyseBand(name + "B", secondBand, defaultCodec); 336// int specifier = 117; 337// byte[] specifierEncoded = defaultCodec.encode(new int[] {specifier}); 338// int totalLength = firstResults.encodedBand.length + secondResults.encodedBand.length + specifierEncoded.length + 4; // TODO actual 339// if(totalLength < results.encodedBand.length) { 340// System.out.println("using run codec"); 341// results.saved += results.encodedBand.length - totalLength; 342// byte[] encodedBand = new byte[specifierEncoded.length + firstResults.encodedBand.length + secondResults.encodedBand.length]; 343// System.arraycopy(specifierEncoded, 0, encodedBand, 0, specifierEncoded.length); 344// System.arraycopy(firstResults.encodedBand, 0, encodedBand, specifierEncoded.length, firstResults.encodedBand.length); 345// System.arraycopy(secondResults.encodedBand, 0, encodedBand, specifierEncoded.length + firstResults.encodedBand.length, secondResults.encodedBand.length); 346// results.encodedBand = encodedBand; 347// results.betterCodec = new RunCodec(index, firstResults.betterCodec, secondResults.betterCodec); 348// } 349// } 350 351 private void encodeWithPopulationCodec(final String name, final int[] band, final BHSDCodec defaultCodec, 352 final BandData bandData, final BandAnalysisResults results) throws Pack200Exception { 353 results.numCodecsTried += 3; // quite a bit more effort to try this codec 354 final Map<Integer, Integer> distinctValues = bandData.distinctValues; 355 356 final List<Integer> favored = new ArrayList<>(); 357 distinctValues.forEach((k, v) -> { 358 if (v.intValue() > 2 || distinctValues.size() < 256) { // TODO: tweak 359 favored.add(k); 360 } 361 }); 362 363 // Sort the favored list with the most commonly occurring first 364 if (distinctValues.size() > 255) { 365 favored.sort((arg0, arg1) -> distinctValues.get(arg1).compareTo(distinctValues.get(arg0))); 366 } 367 368 final Map<Integer, Integer> favoredToIndex = new HashMap<>(); 369 for (int i = 0; i < favored.size(); i++) { 370 favoredToIndex.put(favored.get(i), Integer.valueOf(i)); 371 } 372 373 final IntList unfavoured = new IntList(); 374 final int[] tokens = new int[band.length]; 375 for (int i = 0; i < band.length; i++) { 376 final Integer favouredIndex = favoredToIndex.get(Integer.valueOf(band[i])); 377 if (favouredIndex == null) { 378 tokens[i] = 0; 379 unfavoured.add(band[i]); 380 } else { 381 tokens[i] = favouredIndex.intValue() + 1; 382 } 383 } 384 favored.add(favored.get(favored.size() - 1)); // repeat last value 385 final int[] favouredBand = integerListToArray(favored); 386 final int[] unfavouredBand = unfavoured.toArray(); 387 388 // Analyse the three bands to get the best codec 389 final BandAnalysisResults favouredResults = analyseBand("POPULATION", favouredBand, defaultCodec); 390 final BandAnalysisResults unfavouredResults = analyseBand("POPULATION", unfavouredBand, defaultCodec); 391 392 int tdefL = 0; 393 int l = 0; 394 Codec tokenCodec = null; 395 byte[] tokensEncoded; 396 final int k = favored.size() - 1; 397 if (k < 256) { 398 tdefL = 1; 399 tokensEncoded = Codec.BYTE1.encode(tokens); 400 } else { 401 final BandAnalysisResults tokenResults = analyseBand("POPULATION", tokens, defaultCodec); 402 tokenCodec = tokenResults.betterCodec; 403 tokensEncoded = tokenResults.encodedBand; 404 if (tokenCodec == null) { 405 tokenCodec = defaultCodec; 406 } 407 l = ((BHSDCodec) tokenCodec).getL(); 408 final int h = ((BHSDCodec) tokenCodec).getH(); 409 final int s = ((BHSDCodec) tokenCodec).getS(); 410 final int b = ((BHSDCodec) tokenCodec).getB(); 411 final int d = ((BHSDCodec) tokenCodec).isDelta() ? 1 : 0; 412 if (s == 0 && d == 0) { 413 boolean canUseTDefL = true; 414 if (b > 1) { 415 final BHSDCodec oneLowerB = new BHSDCodec(b - 1, h); 416 if (oneLowerB.largest() >= k) { 417 canUseTDefL = false; 418 } 419 } 420 if (canUseTDefL) { 421 switch (l) { 422 case 4: 423 tdefL = 1; 424 break; 425 case 8: 426 tdefL = 2; 427 break; 428 case 16: 429 tdefL = 3; 430 break; 431 case 32: 432 tdefL = 4; 433 break; 434 case 64: 435 tdefL = 5; 436 break; 437 case 128: 438 tdefL = 6; 439 break; 440 case 192: 441 tdefL = 7; 442 break; 443 case 224: 444 tdefL = 8; 445 break; 446 case 240: 447 tdefL = 9; 448 break; 449 case 248: 450 tdefL = 10; 451 break; 452 case 252: 453 tdefL = 11; 454 break; 455 } 456 } 457 } 458 } 459 460 final byte[] favouredEncoded = favouredResults.encodedBand; 461 final byte[] unfavouredEncoded = unfavouredResults.encodedBand; 462 final Codec favouredCodec = favouredResults.betterCodec; 463 final Codec unfavouredCodec = unfavouredResults.betterCodec; 464 465 int specifier = 141 + (favouredCodec == null ? 1 : 0) + (4 * tdefL) + (unfavouredCodec == null ? 2 : 0); 466 final IntList extraBandMetadata = new IntList(3); 467 if (favouredCodec != null) { 468 IntStream.of(CodecEncoding.getSpecifier(favouredCodec, null)).forEach(extraBandMetadata::add); 469 } 470 if (tdefL == 0) { 471 IntStream.of(CodecEncoding.getSpecifier(tokenCodec, null)).forEach(extraBandMetadata::add); 472 } 473 if (unfavouredCodec != null) { 474 IntStream.of(CodecEncoding.getSpecifier(unfavouredCodec, null)).forEach(extraBandMetadata::add); 475 } 476 final int[] extraMetadata = extraBandMetadata.toArray(); 477 final byte[] extraMetadataEncoded = Codec.UNSIGNED5.encode(extraMetadata); 478 if (defaultCodec.isSigned()) { 479 specifier = -1 - specifier; 480 } else { 481 specifier = specifier + defaultCodec.getL(); 482 } 483 final byte[] firstValueEncoded = defaultCodec.encode(new int[] {specifier}); 484 final int totalBandLength = firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length 485 + unfavouredEncoded.length; 486 487 if (totalBandLength + extraMetadataEncoded.length < results.encodedBand.length) { 488 results.saved += results.encodedBand.length - (totalBandLength + extraMetadataEncoded.length); 489 final byte[] encodedBand = new byte[totalBandLength]; 490 System.arraycopy(firstValueEncoded, 0, encodedBand, 0, firstValueEncoded.length); 491 System.arraycopy(favouredEncoded, 0, encodedBand, firstValueEncoded.length, favouredEncoded.length); 492 System.arraycopy(tokensEncoded, 0, encodedBand, firstValueEncoded.length + favouredEncoded.length, 493 tokensEncoded.length); 494 System.arraycopy(unfavouredEncoded, 0, encodedBand, 495 firstValueEncoded.length + favouredEncoded.length + tokensEncoded.length, unfavouredEncoded.length); 496 results.encodedBand = encodedBand; 497 results.extraMetadata = extraMetadata; 498 if (l != 0) { 499 results.betterCodec = new PopulationCodec(favouredCodec, l, unfavouredCodec); 500 } else { 501 results.betterCodec = new PopulationCodec(favouredCodec, tokenCodec, unfavouredCodec); 502 } 503 } 504 } 505 506 /** 507 * Encode a band of longs (values are split into their high and low 32 bits and then encoded as two separate bands 508 * 509 * @param name - name of the band (for debugging purposes) 510 * @param flags - the band 511 * @param loCodec - Codec for the low 32-bits band 512 * @param hiCodec - Codec for the high 32-bits band 513 * @param haveHiFlags - ignores the high band if true as all values would be zero 514 * @return the encoded band 515 * @throws Pack200Exception TODO 516 */ 517 protected byte[] encodeFlags(final String name, final long[] flags, final BHSDCodec loCodec, 518 final BHSDCodec hiCodec, final boolean haveHiFlags) throws Pack200Exception { 519 if (!haveHiFlags) { 520 final int[] loBits = new int[flags.length]; 521 Arrays.setAll(loBits, i -> (int) flags[i]); 522 return encodeBandInt(name, loBits, loCodec); 523 } 524 final int[] hiBits = new int[flags.length]; 525 final int[] loBits = new int[flags.length]; 526 for (int i = 0; i < flags.length; i++) { 527 final long l = flags[i]; 528 hiBits[i] = (int) (l >> 32); 529 loBits[i] = (int) l; 530 } 531 final byte[] hi = encodeBandInt(name, hiBits, hiCodec); 532 final byte[] lo = encodeBandInt(name, loBits, loCodec); 533 final byte[] total = new byte[hi.length + lo.length]; 534 System.arraycopy(hi, 0, total, 0, hi.length); 535 System.arraycopy(lo, 0, total, hi.length + 1, lo.length); 536 return total; 537 } 538 539 /** 540 * Converts a list of Integers to an int[] array. 541 * 542 * @param integerList conversion source. 543 * @return conversion result. 544 */ 545 protected int[] integerListToArray(final List<Integer> integerList) { 546 return integerList.stream().mapToInt(Integer::intValue).toArray(); 547 } 548 549 /** 550 * Converts a list of Longs to an long[] array. 551 * 552 * @param longList conversion source. 553 * @return conversion result. 554 */ 555 protected long[] longListToArray(final List<Long> longList) { 556 return longList.stream().mapToLong(Long::longValue).toArray(); 557 } 558 559 /** 560 * Converts a list of ConstantPoolEntrys to an int[] array of their indices 561 * 562 * @param list conversion source. 563 * @return conversion result. 564 */ 565 protected int[] cpEntryListToArray(final List<? extends ConstantPoolEntry> list) { 566 final int[] array = new int[list.size()]; 567 for (int i = 0; i < array.length; i++) { 568 array[i] = list.get(i).getIndex(); 569 if (array[i] < 0) { 570 throw new IllegalArgumentException("Index should be > 0"); 571 } 572 } 573 return array; 574 } 575 576 /** 577 * Converts a list of ConstantPoolEntrys or nulls to an int[] array of their indices +1 (or 0 for nulls) 578 * 579 * @param list conversion source. 580 * @return conversion result. 581 */ 582 protected int[] cpEntryOrNullListToArray(final List<? extends ConstantPoolEntry> list) { 583 final int[] array = new int[list.size()]; 584 for (int j = 0; j < array.length; j++) { 585 final ConstantPoolEntry cpEntry = list.get(j); 586 array[j] = cpEntry == null ? 0 : cpEntry.getIndex() + 1; 587 if (cpEntry != null && cpEntry.getIndex() < 0) { 588 throw new IllegalArgumentException("Index should be > 0"); 589 } 590 } 591 return array; 592 } 593 594 protected byte[] encodeFlags(final String name, final long[][] flags, final BHSDCodec loCodec, 595 final BHSDCodec hiCodec, final boolean haveHiFlags) throws Pack200Exception { 596 return encodeFlags(name, flatten(flags), loCodec, hiCodec, haveHiFlags); 597 } 598 599 /* 600 * Flatten a 2-dimension array into a 1-dimension array 601 */ 602 private long[] flatten(final long[][] flags) { 603 int totalSize = 0; 604 for (long[] flag : flags) { 605 totalSize += flag.length; 606 } 607 final long[] flatArray = new long[totalSize]; 608 int index = 0; 609 for (long[] flag : flags) { 610 for (long element : flag) { 611 flatArray[index] = element; 612 index++; 613 } 614 } 615 return flatArray; 616 } 617 618 /** 619 * BandData represents information about a band, e.g. largest value etc and is used in the heuristics that calculate 620 * whether an alternative Codec could make the encoded band smaller. 621 */ 622 public class BandData { 623 624 private final int[] band; 625 private int smallest = Integer.MAX_VALUE; 626 private int largest = Integer.MIN_VALUE; 627 private int smallestDelta; 628 private int largestDelta; 629 630 private int deltaIsAscending = 0; 631 private int smallDeltaCount = 0; 632 633 private double averageAbsoluteDelta = 0; 634 private double averageAbsoluteValue = 0; 635 636 private Map<Integer, Integer> distinctValues; 637 638 /** 639 * Create a new instance of BandData. The band is then analysed. 640 * 641 * @param band - the band of integers 642 */ 643 public BandData(final int[] band) { 644 this.band = band; 645 final Integer one = Integer.valueOf(1); 646 for (int i = 0; i < band.length; i++) { 647 if (band[i] < smallest) { 648 smallest = band[i]; 649 } 650 if (band[i] > largest) { 651 largest = band[i]; 652 } 653 if (i != 0) { 654 final int delta = band[i] - band[i - 1]; 655 if (delta < smallestDelta) { 656 smallestDelta = delta; 657 } 658 if (delta > largestDelta) { 659 largestDelta = delta; 660 } 661 if (delta >= 0) { 662 deltaIsAscending++; 663 } 664 averageAbsoluteDelta += (double) Math.abs(delta) / (double) (band.length - 1); 665 if (Math.abs(delta) < 256) { 666 smallDeltaCount++; 667 } 668 } else { 669 smallestDelta = band[0]; 670 largestDelta = band[0]; 671 } 672 averageAbsoluteValue += (double) Math.abs(band[i]) / (double) band.length; 673 if (effort > 3) { // do calculations needed to consider population codec 674 if (distinctValues == null) { 675 distinctValues = new HashMap<>(); 676 } 677 final Integer value = Integer.valueOf(band[i]); 678 Integer count = distinctValues.get(value); 679 if (count == null) { 680 count = one; 681 } else { 682 count = Integer.valueOf(count.intValue() + 1); 683 } 684 distinctValues.put(value, count); 685 } 686 } 687 } 688 689 /** 690 * Returns true if the deltas between adjacent band elements are mainly small (heuristic). 691 * 692 * @return true if the deltas between adjacent band elements are mainly small (heuristic). 693 */ 694 public boolean mainlySmallDeltas() { 695 // Note: the value below has been tuned - please test carefully if changing it 696 return (float) smallDeltaCount / (float) band.length > 0.7F; 697 } 698 699 /** 700 * Returns true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic). 701 * 702 * @return true if the band is well correlated (i.e. would be suitable for a delta encoding) (heuristic). 703 */ 704 public boolean wellCorrelated() { 705 // Note: the value below has been tuned - please test carefully if changing it 706 return averageAbsoluteDelta * 3.1 < averageAbsoluteValue; 707 } 708 709 /** 710 * Returns true if the band deltas are mainly positive (heuristic). 711 * 712 * @return true if the band deltas are mainly positive (heuristic). 713 */ 714 public boolean mainlyPositiveDeltas() { 715 // Note: the value below has been tuned - please test carefully if changing it 716 return (float) deltaIsAscending / (float) band.length > 0.95F; 717 } 718 719 /** 720 * Returns true if any band elements are negative. 721 * 722 * @return true if any band elements are negative. 723 */ 724 public boolean anyNegatives() { 725 return smallest < 0; 726 } 727 728 /** 729 * Returns the total number of distinct values found in the band. 730 * 731 * @return the total number of distinct values found in the band. 732 */ 733 public int numDistinctValues() { 734 if (distinctValues == null) { 735 return band.length; 736 } 737 return distinctValues.size(); 738 } 739 740 } 741 742 /** 743 * Results obtained by trying different Codecs to encode a band 744 */ 745 public class BandAnalysisResults { 746 747 // The number of Codecs tried so far 748 private int numCodecsTried = 0; 749 750 // The number of bytes saved by using betterCodec instead of the default codec 751 private int saved = 0; 752 753 // Extra metadata to pass to the segment header (to be appended to the 754 // band_headers band) 755 private int[] extraMetadata; 756 757 // The results of encoding the band with betterCodec 758 private byte[] encodedBand; 759 760 // The best Codec found so far, or should be null if the default is the 761 // best so far 762 private Codec betterCodec; 763 764 } 765 766}