001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.bzip2; 020 021import java.io.IOException; 022import java.io.OutputStream; 023import java.util.Arrays; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026 027/** 028 * An output stream that compresses into the BZip2 format into another stream. 029 * 030 * <p> 031 * The compression requires large amounts of memory. Thus you should call the 032 * {@link #close() close()} method as soon as possible, to force 033 * {@code BZip2CompressorOutputStream} to release the allocated memory. 034 * </p> 035 * 036 * <p> You can shrink the amount of allocated memory and maybe raise 037 * the compression speed by choosing a lower blocksize, which in turn 038 * may cause a lower compression ratio. You can avoid unnecessary 039 * memory allocation by avoiding using a blocksize which is bigger 040 * than the size of the input. </p> 041 * 042 * <p> You can compute the memory usage for compressing by the 043 * following formula: </p> 044 * 045 * <pre> 046 * <code>400k + (9 * blocksize)</code>. 047 * </pre> 048 * 049 * <p> To get the memory required for decompression by {@link 050 * BZip2CompressorInputStream} use </p> 051 * 052 * <pre> 053 * <code>65k + (5 * blocksize)</code>. 054 * </pre> 055 * 056 * <table style="width:100%" border="1"> 057 * <caption>Memory usage by blocksize</caption> 058 * <tr> 059 * <th colspan="3">Memory usage by blocksize</th> 060 * </tr> 061 * <tr> 062 * <th style="text-align: right">Blocksize</th> <th style="text-align: right">Compression<br> 063 * memory usage</th> <th style="text-align: right">Decompression<br> 064 * memory usage</th> 065 * </tr> 066 * <tr> 067 * <td style="text-align: right">100k</td> 068 * <td style="text-align: right">1300k</td> 069 * <td style="text-align: right">565k</td> 070 * </tr> 071 * <tr> 072 * <td style="text-align: right">200k</td> 073 * <td style="text-align: right">2200k</td> 074 * <td style="text-align: right">1065k</td> 075 * </tr> 076 * <tr> 077 * <td style="text-align: right">300k</td> 078 * <td style="text-align: right">3100k</td> 079 * <td style="text-align: right">1565k</td> 080 * </tr> 081 * <tr> 082 * <td style="text-align: right">400k</td> 083 * <td style="text-align: right">4000k</td> 084 * <td style="text-align: right">2065k</td> 085 * </tr> 086 * <tr> 087 * <td style="text-align: right">500k</td> 088 * <td style="text-align: right">4900k</td> 089 * <td style="text-align: right">2565k</td> 090 * </tr> 091 * <tr> 092 * <td style="text-align: right">600k</td> 093 * <td style="text-align: right">5800k</td> 094 * <td style="text-align: right">3065k</td> 095 * </tr> 096 * <tr> 097 * <td style="text-align: right">700k</td> 098 * <td style="text-align: right">6700k</td> 099 * <td style="text-align: right">3565k</td> 100 * </tr> 101 * <tr> 102 * <td style="text-align: right">800k</td> 103 * <td style="text-align: right">7600k</td> 104 * <td style="text-align: right">4065k</td> 105 * </tr> 106 * <tr> 107 * <td style="text-align: right">900k</td> 108 * <td style="text-align: right">8500k</td> 109 * <td style="text-align: right">4565k</td> 110 * </tr> 111 * </table> 112 * 113 * <p> 114 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the 115 * bzipped input is smaller than one block. 116 * </p> 117 * 118 * <p> 119 * Instances of this class are not threadsafe. 120 * </p> 121 * 122 * <p> 123 * TODO: Update to BZip2 1.0.1 124 * </p> 125 * @NotThreadSafe 126 */ 127public class BZip2CompressorOutputStream extends CompressorOutputStream 128 implements BZip2Constants { 129 130 /** 131 * The minimum supported blocksize {@code == 1}. 132 */ 133 public static final int MIN_BLOCKSIZE = 1; 134 135 /** 136 * The maximum supported blocksize {@code == 9}. 137 */ 138 public static final int MAX_BLOCKSIZE = 9; 139 140 private static final int GREATER_ICOST = 15; 141 private static final int LESSER_ICOST = 0; 142 143 private static void hbMakeCodeLengths(final byte[] len, final int[] freq, 144 final Data dat, final int alphaSize, 145 final int maxLen) { 146 /* 147 * Nodes and heap entries run from 1. Entry 0 for both the heap and 148 * nodes is a sentinel. 149 */ 150 final int[] heap = dat.heap; 151 final int[] weight = dat.weight; 152 final int[] parent = dat.parent; 153 154 for (int i = alphaSize; --i >= 0;) { 155 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 156 } 157 158 for (boolean tooLong = true; tooLong;) { 159 tooLong = false; 160 161 int nNodes = alphaSize; 162 int nHeap = 0; 163 heap[0] = 0; 164 weight[0] = 0; 165 parent[0] = -2; 166 167 for (int i = 1; i <= alphaSize; i++) { 168 parent[i] = -1; 169 nHeap++; 170 heap[nHeap] = i; 171 172 int zz = nHeap; 173 final int tmp = heap[zz]; 174 while (weight[tmp] < weight[heap[zz >> 1]]) { 175 heap[zz] = heap[zz >> 1]; 176 zz >>= 1; 177 } 178 heap[zz] = tmp; 179 } 180 181 while (nHeap > 1) { 182 final int n1 = heap[1]; 183 heap[1] = heap[nHeap]; 184 nHeap--; 185 186 int yy = 0; 187 int zz = 1; 188 int tmp = heap[1]; 189 190 while (true) { 191 yy = zz << 1; 192 193 if (yy > nHeap) { 194 break; 195 } 196 197 if ((yy < nHeap) 198 && (weight[heap[yy + 1]] < weight[heap[yy]])) { 199 yy++; 200 } 201 202 if (weight[tmp] < weight[heap[yy]]) { 203 break; 204 } 205 206 heap[zz] = heap[yy]; 207 zz = yy; 208 } 209 210 heap[zz] = tmp; 211 212 final int n2 = heap[1]; 213 heap[1] = heap[nHeap]; 214 nHeap--; 215 216 yy = 0; 217 zz = 1; 218 tmp = heap[1]; 219 220 while (true) { 221 yy = zz << 1; 222 223 if (yy > nHeap) { 224 break; 225 } 226 227 if ((yy < nHeap) 228 && (weight[heap[yy + 1]] < weight[heap[yy]])) { 229 yy++; 230 } 231 232 if (weight[tmp] < weight[heap[yy]]) { 233 break; 234 } 235 236 heap[zz] = heap[yy]; 237 zz = yy; 238 } 239 240 heap[zz] = tmp; 241 nNodes++; 242 parent[n1] = parent[n2] = nNodes; 243 244 final int weight_n1 = weight[n1]; 245 final int weight_n2 = weight[n2]; 246 weight[nNodes] = ((weight_n1 & 0xffffff00) 247 + (weight_n2 & 0xffffff00)) 248 | (1 + Math.max(weight_n1 & 0x000000ff, weight_n2 & 0x000000ff)); 249 250 parent[nNodes] = -1; 251 nHeap++; 252 heap[nHeap] = nNodes; 253 254 tmp = 0; 255 zz = nHeap; 256 tmp = heap[zz]; 257 final int weight_tmp = weight[tmp]; 258 while (weight_tmp < weight[heap[zz >> 1]]) { 259 heap[zz] = heap[zz >> 1]; 260 zz >>= 1; 261 } 262 heap[zz] = tmp; 263 264 } 265 266 for (int i = 1; i <= alphaSize; i++) { 267 int j = 0; 268 int k = i; 269 270 for (int parent_k; (parent_k = parent[k]) >= 0;) { 271 k = parent_k; 272 j++; 273 } 274 275 len[i - 1] = (byte) j; 276 if (j > maxLen) { 277 tooLong = true; 278 } 279 } 280 281 if (tooLong) { 282 for (int i = 1; i < alphaSize; i++) { 283 int j = weight[i] >> 8; 284 j = 1 + (j >> 1); 285 weight[i] = j << 8; 286 } 287 } 288 } 289 } 290 291 /** 292 * Index of the last char in the block, so the block size == last + 1. 293 */ 294 private int last; 295 296 /** 297 * Always: in the range 0 .. 9. The current block size is 100000 * this 298 * number. 299 */ 300 private final int blockSize100k; 301 302 private int bsBuff; 303 private int bsLive; 304 private final CRC crc = new CRC(); 305 306 private int nInUse; 307 308 private int nMTF; 309 310 private int currentChar = -1; 311 private int runLength; 312 313 private int blockCRC; 314 private int combinedCRC; 315 private final int allowableBlockSize; 316 317 /** 318 * All memory intensive stuff. 319 */ 320 private Data data; 321 private BlockSort blockSorter; 322 323 private OutputStream out; 324 private volatile boolean closed; 325 326 /** 327 * Chooses a blocksize based on the given length of the data to compress. 328 * 329 * @return The blocksize, between {@link #MIN_BLOCKSIZE} and 330 * {@link #MAX_BLOCKSIZE} both inclusive. For a negative 331 * {@code inputLength} this method returns {@code MAX_BLOCKSIZE} 332 * always. 333 * 334 * @param inputLength 335 * The length of the data which will be compressed by 336 * {@code BZip2CompressorOutputStream}. 337 */ 338 public static int chooseBlockSize(final long inputLength) { 339 return (inputLength > 0) ? (int) Math 340 .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE; 341 } 342 343 /** 344 * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k. 345 * 346 * @param out 347 * the destination stream. 348 * 349 * @throws IOException 350 * if an I/O error occurs in the specified stream. 351 * @throws NullPointerException 352 * if {@code out == null}. 353 */ 354 public BZip2CompressorOutputStream(final OutputStream out) 355 throws IOException { 356 this(out, MAX_BLOCKSIZE); 357 } 358 359 /** 360 * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize. 361 * 362 * @param out 363 * the destination stream. 364 * @param blockSize 365 * the blockSize as 100k units. 366 * 367 * @throws IOException 368 * if an I/O error occurs in the specified stream. 369 * @throws IllegalArgumentException 370 * if {@code (blockSize < 1) || (blockSize > 9)}. 371 * @throws NullPointerException 372 * if {@code out == null}. 373 * 374 * @see #MIN_BLOCKSIZE 375 * @see #MAX_BLOCKSIZE 376 */ 377 public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException { 378 if (blockSize < 1) { 379 throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1"); 380 } 381 if (blockSize > 9) { 382 throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9"); 383 } 384 385 this.blockSize100k = blockSize; 386 this.out = out; 387 388 /* 20 is just a paranoia constant */ 389 this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20; 390 init(); 391 } 392 393 @Override 394 public void write(final int b) throws IOException { 395 if (closed) { 396 throw new IOException("Closed"); 397 } 398 write0(b); 399 } 400 401 /** 402 * Writes the current byte to the buffer, run-length encoding it 403 * if it has been repeated at least four times (the first step 404 * RLEs sequences of four identical bytes). 405 * 406 * <p>Flushes the current block before writing data if it is 407 * full.</p> 408 * 409 * <p>"write to the buffer" means adding to data.buffer starting 410 * two steps "after" this.last - initially starting at index 1 411 * (not 0) - and updating this.last to point to the last index 412 * written minus 1.</p> 413 */ 414 private void writeRun() throws IOException { 415 final int lastShadow = this.last; 416 417 if (lastShadow < this.allowableBlockSize) { 418 final int currentCharShadow = this.currentChar; 419 final Data dataShadow = this.data; 420 dataShadow.inUse[currentCharShadow] = true; 421 final byte ch = (byte) currentCharShadow; 422 423 int runLengthShadow = this.runLength; 424 this.crc.updateCRC(currentCharShadow, runLengthShadow); 425 426 switch (runLengthShadow) { 427 case 1: 428 dataShadow.block[lastShadow + 2] = ch; 429 this.last = lastShadow + 1; 430 break; 431 432 case 2: 433 dataShadow.block[lastShadow + 2] = ch; 434 dataShadow.block[lastShadow + 3] = ch; 435 this.last = lastShadow + 2; 436 break; 437 438 case 3: { 439 final byte[] block = dataShadow.block; 440 block[lastShadow + 2] = ch; 441 block[lastShadow + 3] = ch; 442 block[lastShadow + 4] = ch; 443 this.last = lastShadow + 3; 444 } 445 break; 446 447 default: { 448 runLengthShadow -= 4; 449 dataShadow.inUse[runLengthShadow] = true; 450 final byte[] block = dataShadow.block; 451 block[lastShadow + 2] = ch; 452 block[lastShadow + 3] = ch; 453 block[lastShadow + 4] = ch; 454 block[lastShadow + 5] = ch; 455 block[lastShadow + 6] = (byte) runLengthShadow; 456 this.last = lastShadow + 5; 457 } 458 break; 459 460 } 461 } else { 462 endBlock(); 463 initBlock(); 464 writeRun(); 465 } 466 } 467 468 /** 469 * Overridden to warn about an unclosed stream. 470 */ 471 @Override 472 protected void finalize() throws Throwable { 473 if (!closed) { 474 System.err.println("Unclosed BZip2CompressorOutputStream detected, will *not* close it"); 475 } 476 super.finalize(); 477 } 478 479 480 public void finish() throws IOException { 481 if (!closed) { 482 closed = true; 483 try { 484 if (this.runLength > 0) { 485 writeRun(); 486 } 487 this.currentChar = -1; 488 endBlock(); 489 endCompression(); 490 } finally { 491 this.out = null; 492 this.blockSorter = null; 493 this.data = null; 494 } 495 } 496 } 497 498 @Override 499 public void close() throws IOException { 500 if (!closed) { 501 try (OutputStream outShadow = this.out) { 502 finish(); 503 } 504 } 505 } 506 507 @Override 508 public void flush() throws IOException { 509 final OutputStream outShadow = this.out; 510 if (outShadow != null) { 511 outShadow.flush(); 512 } 513 } 514 515 /** 516 * Writes magic bytes like BZ on the first position of the stream 517 * and bytes indicating the file-format, which is 518 * huffmanised, followed by a digit indicating blockSize100k. 519 * @throws IOException if the magic bytes could not been written 520 */ 521 private void init() throws IOException { 522 bsPutUByte('B'); 523 bsPutUByte('Z'); 524 525 this.data = new Data(this.blockSize100k); 526 this.blockSorter = new BlockSort(this.data); 527 528 // huffmanised magic bytes 529 bsPutUByte('h'); 530 bsPutUByte('0' + this.blockSize100k); 531 532 this.combinedCRC = 0; 533 initBlock(); 534 } 535 536 private void initBlock() { 537 // blockNo++; 538 this.crc.initializeCRC(); 539 this.last = -1; 540 // ch = 0; 541 542 final boolean[] inUse = this.data.inUse; 543 for (int i = 256; --i >= 0;) { 544 inUse[i] = false; 545 } 546 547 } 548 549 private void endBlock() throws IOException { 550 this.blockCRC = this.crc.getFinalCRC(); 551 this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31); 552 this.combinedCRC ^= this.blockCRC; 553 554 // empty block at end of file 555 if (this.last == -1) { 556 return; 557 } 558 559 /* sort the block and establish posn of original string */ 560 blockSort(); 561 562 /* 563 * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 564 * :-). A 32 bit value does not really give a strong enough guarantee 565 * that the value will not appear by chance in the compressed 566 * datastream. Worst-case probability of this event, for a 900k block, 567 * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 568 * bits. For a compressed file of size 100Gb -- about 100000 blocks -- 569 * only a 48-bit marker will do. NB: normal compression/ decompression 570 * donot rely on these statistical properties. They are only important 571 * when trying to recover blocks from damaged files. 572 */ 573 bsPutUByte(0x31); 574 bsPutUByte(0x41); 575 bsPutUByte(0x59); 576 bsPutUByte(0x26); 577 bsPutUByte(0x53); 578 bsPutUByte(0x59); 579 580 /* Now the block's CRC, so it is in a known place. */ 581 bsPutInt(this.blockCRC); 582 583 /* Now a single bit indicating no randomisation. */ 584 bsW(1, 0); 585 586 /* Finally, block's contents proper. */ 587 moveToFrontCodeAndSend(); 588 } 589 590 private void endCompression() throws IOException { 591 /* 592 * Now another magic 48-bit number, 0x177245385090, to indicate the end 593 * of the last block. (sqrt(pi), if you want to know. I did want to use 594 * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me 595 * to feel statistically comfortable. Call me paranoid.) 596 */ 597 bsPutUByte(0x17); 598 bsPutUByte(0x72); 599 bsPutUByte(0x45); 600 bsPutUByte(0x38); 601 bsPutUByte(0x50); 602 bsPutUByte(0x90); 603 604 bsPutInt(this.combinedCRC); 605 bsFinishedWithStream(); 606 } 607 608 /** 609 * Returns the blocksize parameter specified at construction time. 610 * @return the blocksize parameter specified at construction time 611 */ 612 public final int getBlockSize() { 613 return this.blockSize100k; 614 } 615 616 @Override 617 public void write(final byte[] buf, int offs, final int len) 618 throws IOException { 619 if (offs < 0) { 620 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 621 } 622 if (len < 0) { 623 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 624 } 625 if (offs + len > buf.length) { 626 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" 627 + len + ") > buf.length(" 628 + buf.length + ")."); 629 } 630 if (closed) { 631 throw new IOException("Stream closed"); 632 } 633 634 for (final int hi = offs + len; offs < hi;) { 635 write0(buf[offs++]); 636 } 637 } 638 639 /** 640 * Keeps track of the last bytes written and implicitly performs 641 * run-length encoding as the first step of the bzip2 algorithm. 642 */ 643 private void write0(int b) throws IOException { 644 if (this.currentChar != -1) { 645 b &= 0xff; 646 if (this.currentChar == b) { 647 if (++this.runLength > 254) { 648 writeRun(); 649 this.currentChar = -1; 650 this.runLength = 0; 651 } 652 // else nothing to do 653 } else { 654 writeRun(); 655 this.runLength = 1; 656 this.currentChar = b; 657 } 658 } else { 659 this.currentChar = b & 0xff; 660 this.runLength++; 661 } 662 } 663 664 private static void hbAssignCodes(final int[] code, final byte[] length, 665 final int minLen, final int maxLen, 666 final int alphaSize) { 667 int vec = 0; 668 for (int n = minLen; n <= maxLen; n++) { 669 for (int i = 0; i < alphaSize; i++) { 670 if ((length[i] & 0xff) == n) { 671 code[i] = vec; 672 vec++; 673 } 674 } 675 vec <<= 1; 676 } 677 } 678 679 private void bsFinishedWithStream() throws IOException { 680 while (this.bsLive > 0) { 681 final int ch = this.bsBuff >> 24; 682 this.out.write(ch); // write 8-bit 683 this.bsBuff <<= 8; 684 this.bsLive -= 8; 685 } 686 } 687 688 private void bsW(final int n, final int v) throws IOException { 689 final OutputStream outShadow = this.out; 690 int bsLiveShadow = this.bsLive; 691 int bsBuffShadow = this.bsBuff; 692 693 while (bsLiveShadow >= 8) { 694 outShadow.write(bsBuffShadow >> 24); // write 8-bit 695 bsBuffShadow <<= 8; 696 bsLiveShadow -= 8; 697 } 698 699 this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n)); 700 this.bsLive = bsLiveShadow + n; 701 } 702 703 private void bsPutUByte(final int c) throws IOException { 704 bsW(8, c); 705 } 706 707 private void bsPutInt(final int u) throws IOException { 708 bsW(8, (u >> 24) & 0xff); 709 bsW(8, (u >> 16) & 0xff); 710 bsW(8, (u >> 8) & 0xff); 711 bsW(8, u & 0xff); 712 } 713 714 private void sendMTFValues() throws IOException { 715 final byte[][] len = this.data.sendMTFValues_len; 716 final int alphaSize = this.nInUse + 2; 717 718 for (int t = N_GROUPS; --t >= 0;) { 719 final byte[] len_t = len[t]; 720 for (int v = alphaSize; --v >= 0;) { 721 len_t[v] = GREATER_ICOST; 722 } 723 } 724 725 /* Decide how many coding tables to use */ 726 // assert (this.nMTF > 0) : this.nMTF; 727 final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3 728 : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6; 729 730 /* Generate an initial set of coding tables */ 731 sendMTFValues0(nGroups, alphaSize); 732 733 /* 734 * Iterate up to N_ITERS times to improve the tables. 735 */ 736 final int nSelectors = sendMTFValues1(nGroups, alphaSize); 737 738 /* Compute MTF values for the selectors. */ 739 sendMTFValues2(nGroups, nSelectors); 740 741 /* Assign actual codes for the tables. */ 742 sendMTFValues3(nGroups, alphaSize); 743 744 /* Transmit the mapping table. */ 745 sendMTFValues4(); 746 747 /* Now the selectors. */ 748 sendMTFValues5(nGroups, nSelectors); 749 750 /* Now the coding tables. */ 751 sendMTFValues6(nGroups, alphaSize); 752 753 /* And finally, the block data proper */ 754 sendMTFValues7(); 755 } 756 757 private void sendMTFValues0(final int nGroups, final int alphaSize) { 758 final byte[][] len = this.data.sendMTFValues_len; 759 final int[] mtfFreq = this.data.mtfFreq; 760 761 int remF = this.nMTF; 762 int gs = 0; 763 764 for (int nPart = nGroups; nPart > 0; nPart--) { 765 final int tFreq = remF / nPart; 766 int ge = gs - 1; 767 int aFreq = 0; 768 769 for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) { 770 aFreq += mtfFreq[++ge]; 771 } 772 773 if ((ge > gs) && (nPart != nGroups) && (nPart != 1) 774 && (((nGroups - nPart) & 1) != 0)) { 775 aFreq -= mtfFreq[ge--]; 776 } 777 778 final byte[] len_np = len[nPart - 1]; 779 for (int v = alphaSize; --v >= 0;) { 780 if ((v >= gs) && (v <= ge)) { 781 len_np[v] = LESSER_ICOST; 782 } else { 783 len_np[v] = GREATER_ICOST; 784 } 785 } 786 787 gs = ge + 1; 788 remF -= aFreq; 789 } 790 } 791 792 private int sendMTFValues1(final int nGroups, final int alphaSize) { 793 final Data dataShadow = this.data; 794 final int[][] rfreq = dataShadow.sendMTFValues_rfreq; 795 final int[] fave = dataShadow.sendMTFValues_fave; 796 final short[] cost = dataShadow.sendMTFValues_cost; 797 final char[] sfmap = dataShadow.sfmap; 798 final byte[] selector = dataShadow.selector; 799 final byte[][] len = dataShadow.sendMTFValues_len; 800 final byte[] len_0 = len[0]; 801 final byte[] len_1 = len[1]; 802 final byte[] len_2 = len[2]; 803 final byte[] len_3 = len[3]; 804 final byte[] len_4 = len[4]; 805 final byte[] len_5 = len[5]; 806 final int nMTFShadow = this.nMTF; 807 808 int nSelectors = 0; 809 810 for (int iter = 0; iter < N_ITERS; iter++) { 811 for (int t = nGroups; --t >= 0;) { 812 fave[t] = 0; 813 final int[] rfreqt = rfreq[t]; 814 for (int i = alphaSize; --i >= 0;) { 815 rfreqt[i] = 0; 816 } 817 } 818 819 nSelectors = 0; 820 821 for (int gs = 0; gs < this.nMTF;) { 822 /* Set group start & end marks. */ 823 824 /* 825 * Calculate the cost of this group as coded by each of the 826 * coding tables. 827 */ 828 829 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 830 831 if (nGroups == N_GROUPS) { 832 // unrolled version of the else-block 833 834 short cost0 = 0; 835 short cost1 = 0; 836 short cost2 = 0; 837 short cost3 = 0; 838 short cost4 = 0; 839 short cost5 = 0; 840 841 for (int i = gs; i <= ge; i++) { 842 final int icv = sfmap[i]; 843 cost0 += len_0[icv] & 0xff; 844 cost1 += len_1[icv] & 0xff; 845 cost2 += len_2[icv] & 0xff; 846 cost3 += len_3[icv] & 0xff; 847 cost4 += len_4[icv] & 0xff; 848 cost5 += len_5[icv] & 0xff; 849 } 850 851 cost[0] = cost0; 852 cost[1] = cost1; 853 cost[2] = cost2; 854 cost[3] = cost3; 855 cost[4] = cost4; 856 cost[5] = cost5; 857 858 } else { 859 for (int t = nGroups; --t >= 0;) { 860 cost[t] = 0; 861 } 862 863 for (int i = gs; i <= ge; i++) { 864 final int icv = sfmap[i]; 865 for (int t = nGroups; --t >= 0;) { 866 cost[t] += len[t][icv] & 0xff; 867 } 868 } 869 } 870 871 /* 872 * Find the coding table which is best for this group, and 873 * record its identity in the selector table. 874 */ 875 int bt = -1; 876 for (int t = nGroups, bc = 999999999; --t >= 0;) { 877 final int cost_t = cost[t]; 878 if (cost_t < bc) { 879 bc = cost_t; 880 bt = t; 881 } 882 } 883 884 fave[bt]++; 885 selector[nSelectors] = (byte) bt; 886 nSelectors++; 887 888 /* 889 * Increment the symbol frequencies for the selected table. 890 */ 891 final int[] rfreq_bt = rfreq[bt]; 892 for (int i = gs; i <= ge; i++) { 893 rfreq_bt[sfmap[i]]++; 894 } 895 896 gs = ge + 1; 897 } 898 899 /* 900 * Recompute the tables based on the accumulated frequencies. 901 */ 902 for (int t = 0; t < nGroups; t++) { 903 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); 904 } 905 } 906 907 return nSelectors; 908 } 909 910 private void sendMTFValues2(final int nGroups, final int nSelectors) { 911 // assert (nGroups < 8) : nGroups; 912 913 final Data dataShadow = this.data; 914 final byte[] pos = dataShadow.sendMTFValues2_pos; 915 916 for (int i = nGroups; --i >= 0;) { 917 pos[i] = (byte) i; 918 } 919 920 for (int i = 0; i < nSelectors; i++) { 921 final byte ll_i = dataShadow.selector[i]; 922 byte tmp = pos[0]; 923 int j = 0; 924 925 while (ll_i != tmp) { 926 j++; 927 final byte tmp2 = tmp; 928 tmp = pos[j]; 929 pos[j] = tmp2; 930 } 931 932 pos[0] = tmp; 933 dataShadow.selectorMtf[i] = (byte) j; 934 } 935 } 936 937 private void sendMTFValues3(final int nGroups, final int alphaSize) { 938 final int[][] code = this.data.sendMTFValues_code; 939 final byte[][] len = this.data.sendMTFValues_len; 940 941 for (int t = 0; t < nGroups; t++) { 942 int minLen = 32; 943 int maxLen = 0; 944 final byte[] len_t = len[t]; 945 for (int i = alphaSize; --i >= 0;) { 946 final int l = len_t[i] & 0xff; 947 if (l > maxLen) { 948 maxLen = l; 949 } 950 if (l < minLen) { 951 minLen = l; 952 } 953 } 954 955 // assert (maxLen <= 20) : maxLen; 956 // assert (minLen >= 1) : minLen; 957 958 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); 959 } 960 } 961 962 private void sendMTFValues4() throws IOException { 963 final boolean[] inUse = this.data.inUse; 964 final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; 965 966 for (int i = 16; --i >= 0;) { 967 inUse16[i] = false; 968 final int i16 = i * 16; 969 for (int j = 16; --j >= 0;) { 970 if (inUse[i16 + j]) { 971 inUse16[i] = true; 972 break; 973 } 974 } 975 } 976 977 for (int i = 0; i < 16; i++) { 978 bsW(1, inUse16[i] ? 1 : 0); 979 } 980 981 final OutputStream outShadow = this.out; 982 int bsLiveShadow = this.bsLive; 983 int bsBuffShadow = this.bsBuff; 984 985 for (int i = 0; i < 16; i++) { 986 if (inUse16[i]) { 987 final int i16 = i * 16; 988 for (int j = 0; j < 16; j++) { 989 // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); 990 while (bsLiveShadow >= 8) { 991 outShadow.write(bsBuffShadow >> 24); // write 8-bit 992 bsBuffShadow <<= 8; 993 bsLiveShadow -= 8; 994 } 995 if (inUse[i16 + j]) { 996 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); 997 } 998 bsLiveShadow++; 999 } 1000 } 1001 } 1002 1003 this.bsBuff = bsBuffShadow; 1004 this.bsLive = bsLiveShadow; 1005 } 1006 1007 private void sendMTFValues5(final int nGroups, final int nSelectors) 1008 throws IOException { 1009 bsW(3, nGroups); 1010 bsW(15, nSelectors); 1011 1012 final OutputStream outShadow = this.out; 1013 final byte[] selectorMtf = this.data.selectorMtf; 1014 1015 int bsLiveShadow = this.bsLive; 1016 int bsBuffShadow = this.bsBuff; 1017 1018 for (int i = 0; i < nSelectors; i++) { 1019 for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { 1020 // inlined: bsW(1, 1); 1021 while (bsLiveShadow >= 8) { 1022 outShadow.write(bsBuffShadow >> 24); 1023 bsBuffShadow <<= 8; 1024 bsLiveShadow -= 8; 1025 } 1026 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); 1027 bsLiveShadow++; 1028 } 1029 1030 // inlined: bsW(1, 0); 1031 while (bsLiveShadow >= 8) { 1032 outShadow.write(bsBuffShadow >> 24); 1033 bsBuffShadow <<= 8; 1034 bsLiveShadow -= 8; 1035 } 1036 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1037 bsLiveShadow++; 1038 } 1039 1040 this.bsBuff = bsBuffShadow; 1041 this.bsLive = bsLiveShadow; 1042 } 1043 1044 private void sendMTFValues6(final int nGroups, final int alphaSize) 1045 throws IOException { 1046 final byte[][] len = this.data.sendMTFValues_len; 1047 final OutputStream outShadow = this.out; 1048 1049 int bsLiveShadow = this.bsLive; 1050 int bsBuffShadow = this.bsBuff; 1051 1052 for (int t = 0; t < nGroups; t++) { 1053 final byte[] len_t = len[t]; 1054 int curr = len_t[0] & 0xff; 1055 1056 // inlined: bsW(5, curr); 1057 while (bsLiveShadow >= 8) { 1058 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1059 bsBuffShadow <<= 8; 1060 bsLiveShadow -= 8; 1061 } 1062 bsBuffShadow |= curr << (32 - bsLiveShadow - 5); 1063 bsLiveShadow += 5; 1064 1065 for (int i = 0; i < alphaSize; i++) { 1066 final int lti = len_t[i] & 0xff; 1067 while (curr < lti) { 1068 // inlined: bsW(2, 2); 1069 while (bsLiveShadow >= 8) { 1070 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1071 bsBuffShadow <<= 8; 1072 bsLiveShadow -= 8; 1073 } 1074 bsBuffShadow |= 2 << (32 - bsLiveShadow - 2); 1075 bsLiveShadow += 2; 1076 1077 curr++; /* 10 */ 1078 } 1079 1080 while (curr > lti) { 1081 // inlined: bsW(2, 3); 1082 while (bsLiveShadow >= 8) { 1083 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1084 bsBuffShadow <<= 8; 1085 bsLiveShadow -= 8; 1086 } 1087 bsBuffShadow |= 3 << (32 - bsLiveShadow - 2); 1088 bsLiveShadow += 2; 1089 1090 curr--; /* 11 */ 1091 } 1092 1093 // inlined: bsW(1, 0); 1094 while (bsLiveShadow >= 8) { 1095 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1096 bsBuffShadow <<= 8; 1097 bsLiveShadow -= 8; 1098 } 1099 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1100 bsLiveShadow++; 1101 } 1102 } 1103 1104 this.bsBuff = bsBuffShadow; 1105 this.bsLive = bsLiveShadow; 1106 } 1107 1108 private void sendMTFValues7() throws IOException { 1109 final Data dataShadow = this.data; 1110 final byte[][] len = dataShadow.sendMTFValues_len; 1111 final int[][] code = dataShadow.sendMTFValues_code; 1112 final OutputStream outShadow = this.out; 1113 final byte[] selector = dataShadow.selector; 1114 final char[] sfmap = dataShadow.sfmap; 1115 final int nMTFShadow = this.nMTF; 1116 1117 int selCtr = 0; 1118 1119 int bsLiveShadow = this.bsLive; 1120 int bsBuffShadow = this.bsBuff; 1121 1122 for (int gs = 0; gs < nMTFShadow;) { 1123 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 1124 final int selector_selCtr = selector[selCtr] & 0xff; 1125 final int[] code_selCtr = code[selector_selCtr]; 1126 final byte[] len_selCtr = len[selector_selCtr]; 1127 1128 while (gs <= ge) { 1129 final int sfmap_i = sfmap[gs]; 1130 1131 // 1132 // inlined: bsW(len_selCtr[sfmap_i] & 0xff, 1133 // code_selCtr[sfmap_i]); 1134 // 1135 while (bsLiveShadow >= 8) { 1136 outShadow.write(bsBuffShadow >> 24); 1137 bsBuffShadow <<= 8; 1138 bsLiveShadow -= 8; 1139 } 1140 final int n = len_selCtr[sfmap_i] & 0xFF; 1141 bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n); 1142 bsLiveShadow += n; 1143 1144 gs++; 1145 } 1146 1147 gs = ge + 1; 1148 selCtr++; 1149 } 1150 1151 this.bsBuff = bsBuffShadow; 1152 this.bsLive = bsLiveShadow; 1153 } 1154 1155 private void moveToFrontCodeAndSend() throws IOException { 1156 bsW(24, this.data.origPtr); 1157 generateMTFValues(); 1158 sendMTFValues(); 1159 } 1160 1161 private void blockSort() { 1162 blockSorter.blockSort(data, last); 1163 } 1164 1165 /* 1166 * Performs Move-To-Front on the Burrows-Wheeler transformed 1167 * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB 1168 * run-length-encoded form. 1169 * 1170 * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> 1171 */ 1172 private void generateMTFValues() { 1173 final int lastShadow = this.last; 1174 final Data dataShadow = this.data; 1175 final boolean[] inUse = dataShadow.inUse; 1176 final byte[] block = dataShadow.block; 1177 final int[] fmap = dataShadow.fmap; 1178 final char[] sfmap = dataShadow.sfmap; 1179 final int[] mtfFreq = dataShadow.mtfFreq; 1180 final byte[] unseqToSeq = dataShadow.unseqToSeq; 1181 final byte[] yy = dataShadow.generateMTFValues_yy; 1182 1183 // make maps 1184 int nInUseShadow = 0; 1185 for (int i = 0; i < 256; i++) { 1186 if (inUse[i]) { 1187 unseqToSeq[i] = (byte) nInUseShadow; 1188 nInUseShadow++; 1189 } 1190 } 1191 this.nInUse = nInUseShadow; 1192 1193 final int eob = nInUseShadow + 1; 1194 1195 Arrays.fill(mtfFreq, 0, eob + 1, 0); 1196 1197 for (int i = nInUseShadow; --i >= 0;) { 1198 yy[i] = (byte) i; 1199 } 1200 1201 int wr = 0; 1202 int zPend = 0; 1203 1204 for (int i = 0; i <= lastShadow; i++) { 1205 final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; 1206 byte tmp = yy[0]; 1207 int j = 0; 1208 1209 while (ll_i != tmp) { 1210 j++; 1211 final byte tmp2 = tmp; 1212 tmp = yy[j]; 1213 yy[j] = tmp2; 1214 } 1215 yy[0] = tmp; 1216 1217 if (j == 0) { 1218 zPend++; 1219 } else { 1220 if (zPend > 0) { 1221 zPend--; 1222 while (true) { 1223 if ((zPend & 1) == 0) { 1224 sfmap[wr] = RUNA; 1225 wr++; 1226 mtfFreq[RUNA]++; 1227 } else { 1228 sfmap[wr] = RUNB; 1229 wr++; 1230 mtfFreq[RUNB]++; 1231 } 1232 1233 if (zPend < 2) { 1234 break; 1235 } 1236 zPend = (zPend - 2) >> 1; 1237 } 1238 zPend = 0; 1239 } 1240 sfmap[wr] = (char) (j + 1); 1241 wr++; 1242 mtfFreq[j + 1]++; 1243 } 1244 } 1245 1246 if (zPend > 0) { 1247 zPend--; 1248 while (true) { 1249 if ((zPend & 1) == 0) { 1250 sfmap[wr] = RUNA; 1251 wr++; 1252 mtfFreq[RUNA]++; 1253 } else { 1254 sfmap[wr] = RUNB; 1255 wr++; 1256 mtfFreq[RUNB]++; 1257 } 1258 1259 if (zPend < 2) { 1260 break; 1261 } 1262 zPend = (zPend - 2) >> 1; 1263 } 1264 } 1265 1266 sfmap[wr] = (char) eob; 1267 mtfFreq[eob]++; 1268 this.nMTF = wr + 1; 1269 } 1270 1271 static final class Data { 1272 1273 // with blockSize 900k 1274 /* maps unsigned byte => "does it occur in block" */ 1275 final boolean[] inUse = new boolean[256]; // 256 byte 1276 final byte[] unseqToSeq = new byte[256]; // 256 byte 1277 final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte 1278 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 1279 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 1280 1281 final byte[] generateMTFValues_yy = new byte[256]; // 256 byte 1282 final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 1283 // byte 1284 final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 1285 // byte 1286 final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte 1287 final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte 1288 final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 1289 // byte 1290 final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte 1291 final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte 1292 1293 final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte 1294 final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 1295 final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 1296 1297 // ------------ 1298 // 333408 byte 1299 1300 /* holds the RLEd block of original data starting at index 1. 1301 * After sorting the last byte added to the buffer is at index 1302 * 0. */ 1303 final byte[] block; // 900021 byte 1304 /* maps index in Burrows-Wheeler transformed block => index of 1305 * byte in original block */ 1306 final int[] fmap; // 3600000 byte 1307 final char[] sfmap; // 3600000 byte 1308 // ------------ 1309 // 8433529 byte 1310 // ============ 1311 1312 /** 1313 * Index of original line in Burrows-Wheeler table. 1314 * 1315 * <p>This is the index in fmap that points to the last byte 1316 * of the original data.</p> 1317 */ 1318 int origPtr; 1319 1320 Data(final int blockSize100k) { 1321 final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE; 1322 this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; 1323 this.fmap = new int[n]; 1324 this.sfmap = new char[2 * n]; 1325 } 1326 1327 } 1328 1329}