001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019 package org.apache.commons.compress.compressors.bzip2;
020
021 import java.io.IOException;
022 import java.io.OutputStream;
023
024 import org.apache.commons.compress.compressors.CompressorOutputStream;
025
026 /**
027 * An output stream that compresses into the BZip2 format into another stream.
028 *
029 * <p>
030 * The compression requires large amounts of memory. Thus you should call the
031 * {@link #close() close()} method as soon as possible, to force
032 * <tt>BZip2CompressorOutputStream</tt> to release the allocated memory.
033 * </p>
034 *
035 * <p> You can shrink the amount of allocated memory and maybe raise
036 * the compression speed by choosing a lower blocksize, which in turn
037 * may cause a lower compression ratio. You can avoid unnecessary
038 * memory allocation by avoiding using a blocksize which is bigger
039 * than the size of the input. </p>
040 *
041 * <p> You can compute the memory usage for compressing by the
042 * following formula: </p>
043 *
044 * <pre>
045 * <code>400k + (9 * blocksize)</code>.
046 * </pre>
047 *
048 * <p> To get the memory required for decompression by {@link
049 * BZip2CompressorInputStream} use </p>
050 *
051 * <pre>
052 * <code>65k + (5 * blocksize)</code>.
053 * </pre>
054 *
055 * <table width="100%" border="1">
056 * <colgroup> <col width="33%" /> <col width="33%" /> <col width="33%" />
057 * </colgroup>
058 * <tr>
059 * <th colspan="3">Memory usage by blocksize</th>
060 * </tr>
061 * <tr>
062 * <th align="right">Blocksize</th> <th align="right">Compression<br>
063 * memory usage</th> <th align="right">Decompression<br>
064 * memory usage</th>
065 * </tr>
066 * <tr>
067 * <td align="right">100k</td>
068 * <td align="right">1300k</td>
069 * <td align="right">565k</td>
070 * </tr>
071 * <tr>
072 * <td align="right">200k</td>
073 * <td align="right">2200k</td>
074 * <td align="right">1065k</td>
075 * </tr>
076 * <tr>
077 * <td align="right">300k</td>
078 * <td align="right">3100k</td>
079 * <td align="right">1565k</td>
080 * </tr>
081 * <tr>
082 * <td align="right">400k</td>
083 * <td align="right">4000k</td>
084 * <td align="right">2065k</td>
085 * </tr>
086 * <tr>
087 * <td align="right">500k</td>
088 * <td align="right">4900k</td>
089 * <td align="right">2565k</td>
090 * </tr>
091 * <tr>
092 * <td align="right">600k</td>
093 * <td align="right">5800k</td>
094 * <td align="right">3065k</td>
095 * </tr>
096 * <tr>
097 * <td align="right">700k</td>
098 * <td align="right">6700k</td>
099 * <td align="right">3565k</td>
100 * </tr>
101 * <tr>
102 * <td align="right">800k</td>
103 * <td align="right">7600k</td>
104 * <td align="right">4065k</td>
105 * </tr>
106 * <tr>
107 * <td align="right">900k</td>
108 * <td align="right">8500k</td>
109 * <td align="right">4565k</td>
110 * </tr>
111 * </table>
112 *
113 * <p>
114 * For decompression <tt>BZip2CompressorInputStream</tt> allocates less memory if the
115 * bzipped input is smaller than one block.
116 * </p>
117 *
118 * <p>
119 * Instances of this class are not threadsafe.
120 * </p>
121 *
122 * <p>
123 * TODO: Update to BZip2 1.0.1
124 * </p>
125 * @NotThreadSafe
126 */
127 public class BZip2CompressorOutputStream extends CompressorOutputStream
128 implements BZip2Constants {
129
130 /**
131 * The minimum supported blocksize <tt> == 1</tt>.
132 */
133 public static final int MIN_BLOCKSIZE = 1;
134
135 /**
136 * The maximum supported blocksize <tt> == 9</tt>.
137 */
138 public static final int MAX_BLOCKSIZE = 9;
139
140 private static final int GREATER_ICOST = 15;
141 private static final int LESSER_ICOST = 0;
142
143 private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
144 final Data dat, final int alphaSize,
145 final int maxLen) {
146 /*
147 * Nodes and heap entries run from 1. Entry 0 for both the heap and
148 * nodes is a sentinel.
149 */
150 final int[] heap = dat.heap;
151 final int[] weight = dat.weight;
152 final int[] parent = dat.parent;
153
154 for (int i = alphaSize; --i >= 0;) {
155 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
156 }
157
158 for (boolean tooLong = true; tooLong;) {
159 tooLong = false;
160
161 int nNodes = alphaSize;
162 int nHeap = 0;
163 heap[0] = 0;
164 weight[0] = 0;
165 parent[0] = -2;
166
167 for (int i = 1; i <= alphaSize; i++) {
168 parent[i] = -1;
169 nHeap++;
170 heap[nHeap] = i;
171
172 int zz = nHeap;
173 int tmp = heap[zz];
174 while (weight[tmp] < weight[heap[zz >> 1]]) {
175 heap[zz] = heap[zz >> 1];
176 zz >>= 1;
177 }
178 heap[zz] = tmp;
179 }
180
181 while (nHeap > 1) {
182 int n1 = heap[1];
183 heap[1] = heap[nHeap];
184 nHeap--;
185
186 int yy = 0;
187 int zz = 1;
188 int tmp = heap[1];
189
190 while (true) {
191 yy = zz << 1;
192
193 if (yy > nHeap) {
194 break;
195 }
196
197 if ((yy < nHeap)
198 && (weight[heap[yy + 1]] < weight[heap[yy]])) {
199 yy++;
200 }
201
202 if (weight[tmp] < weight[heap[yy]]) {
203 break;
204 }
205
206 heap[zz] = heap[yy];
207 zz = yy;
208 }
209
210 heap[zz] = tmp;
211
212 int n2 = heap[1];
213 heap[1] = heap[nHeap];
214 nHeap--;
215
216 yy = 0;
217 zz = 1;
218 tmp = heap[1];
219
220 while (true) {
221 yy = zz << 1;
222
223 if (yy > nHeap) {
224 break;
225 }
226
227 if ((yy < nHeap)
228 && (weight[heap[yy + 1]] < weight[heap[yy]])) {
229 yy++;
230 }
231
232 if (weight[tmp] < weight[heap[yy]]) {
233 break;
234 }
235
236 heap[zz] = heap[yy];
237 zz = yy;
238 }
239
240 heap[zz] = tmp;
241 nNodes++;
242 parent[n1] = parent[n2] = nNodes;
243
244 final int weight_n1 = weight[n1];
245 final int weight_n2 = weight[n2];
246 weight[nNodes] = ((weight_n1 & 0xffffff00)
247 + (weight_n2 & 0xffffff00))
248 | (1 + (((weight_n1 & 0x000000ff)
249 > (weight_n2 & 0x000000ff))
250 ? (weight_n1 & 0x000000ff)
251 : (weight_n2 & 0x000000ff)));
252
253 parent[nNodes] = -1;
254 nHeap++;
255 heap[nHeap] = nNodes;
256
257 tmp = 0;
258 zz = nHeap;
259 tmp = heap[zz];
260 final int weight_tmp = weight[tmp];
261 while (weight_tmp < weight[heap[zz >> 1]]) {
262 heap[zz] = heap[zz >> 1];
263 zz >>= 1;
264 }
265 heap[zz] = tmp;
266
267 }
268
269 for (int i = 1; i <= alphaSize; i++) {
270 int j = 0;
271 int k = i;
272
273 for (int parent_k; (parent_k = parent[k]) >= 0;) {
274 k = parent_k;
275 j++;
276 }
277
278 len[i - 1] = (byte) j;
279 if (j > maxLen) {
280 tooLong = true;
281 }
282 }
283
284 if (tooLong) {
285 for (int i = 1; i < alphaSize; i++) {
286 int j = weight[i] >> 8;
287 j = 1 + (j >> 1);
288 weight[i] = j << 8;
289 }
290 }
291 }
292 }
293
294 /**
295 * Index of the last char in the block, so the block size == last + 1.
296 */
297 private int last;
298
299 /**
300 * Always: in the range 0 .. 9. The current block size is 100000 * this
301 * number.
302 */
303 private final int blockSize100k;
304
305 private int bsBuff;
306 private int bsLive;
307 private final CRC crc = new CRC();
308
309 private int nInUse;
310
311 private int nMTF;
312
313 private int currentChar = -1;
314 private int runLength = 0;
315
316 private int blockCRC;
317 private int combinedCRC;
318 private final int allowableBlockSize;
319
320 /**
321 * All memory intensive stuff.
322 */
323 private Data data;
324 private BlockSort blockSorter;
325
326 private OutputStream out;
327
328 /**
329 * Chooses a blocksize based on the given length of the data to compress.
330 *
331 * @return The blocksize, between {@link #MIN_BLOCKSIZE} and
332 * {@link #MAX_BLOCKSIZE} both inclusive. For a negative
333 * <tt>inputLength</tt> this method returns <tt>MAX_BLOCKSIZE</tt>
334 * always.
335 *
336 * @param inputLength
337 * The length of the data which will be compressed by
338 * <tt>BZip2CompressorOutputStream</tt>.
339 */
340 public static int chooseBlockSize(long inputLength) {
341 return (inputLength > 0) ? (int) Math
342 .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
343 }
344
345 /**
346 * Constructs a new <tt>BZip2CompressorOutputStream</tt> with a blocksize of 900k.
347 *
348 * @param out
349 * the destination stream.
350 *
351 * @throws IOException
352 * if an I/O error occurs in the specified stream.
353 * @throws NullPointerException
354 * if <code>out == null</code>.
355 */
356 public BZip2CompressorOutputStream(final OutputStream out)
357 throws IOException {
358 this(out, MAX_BLOCKSIZE);
359 }
360
361 /**
362 * Constructs a new <tt>BZip2CompressorOutputStream</tt> with specified blocksize.
363 *
364 * @param out
365 * the destination stream.
366 * @param blockSize
367 * the blockSize as 100k units.
368 *
369 * @throws IOException
370 * if an I/O error occurs in the specified stream.
371 * @throws IllegalArgumentException
372 * if <code>(blockSize < 1) || (blockSize > 9)</code>.
373 * @throws NullPointerException
374 * if <code>out == null</code>.
375 *
376 * @see #MIN_BLOCKSIZE
377 * @see #MAX_BLOCKSIZE
378 */
379 public BZip2CompressorOutputStream(final OutputStream out,
380 final int blockSize)
381 throws IOException {
382 super();
383
384 if (blockSize < 1) {
385 throw new IllegalArgumentException("blockSize(" + blockSize
386 + ") < 1");
387 }
388 if (blockSize > 9) {
389 throw new IllegalArgumentException("blockSize(" + blockSize
390 + ") > 9");
391 }
392
393 this.blockSize100k = blockSize;
394 this.out = out;
395
396 /* 20 is just a paranoia constant */
397 this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
398 init();
399 }
400
401 /** {@inheritDoc} */
402 @Override
403 public void write(final int b) throws IOException {
404 if (this.out != null) {
405 write0(b);
406 } else {
407 throw new IOException("closed");
408 }
409 }
410
411 /**
412 * Writes the current byte to the buffer, run-length encoding it
413 * if it has been repeated at least four times (the first step
414 * RLEs sequences of four identical bytes).
415 *
416 * <p>Flushes the current block before writing data if it is
417 * full.</p>
418 *
419 * <p>"write to the buffer" means adding to data.buffer starting
420 * two steps "after" this.last - initially starting at index 1
421 * (not 0) - and updating this.last to point to the last index
422 * written minus 1.</p>
423 */
424 private void writeRun() throws IOException {
425 final int lastShadow = this.last;
426
427 if (lastShadow < this.allowableBlockSize) {
428 final int currentCharShadow = this.currentChar;
429 final Data dataShadow = this.data;
430 dataShadow.inUse[currentCharShadow] = true;
431 final byte ch = (byte) currentCharShadow;
432
433 int runLengthShadow = this.runLength;
434 this.crc.updateCRC(currentCharShadow, runLengthShadow);
435
436 switch (runLengthShadow) {
437 case 1:
438 dataShadow.block[lastShadow + 2] = ch;
439 this.last = lastShadow + 1;
440 break;
441
442 case 2:
443 dataShadow.block[lastShadow + 2] = ch;
444 dataShadow.block[lastShadow + 3] = ch;
445 this.last = lastShadow + 2;
446 break;
447
448 case 3: {
449 final byte[] block = dataShadow.block;
450 block[lastShadow + 2] = ch;
451 block[lastShadow + 3] = ch;
452 block[lastShadow + 4] = ch;
453 this.last = lastShadow + 3;
454 }
455 break;
456
457 default: {
458 runLengthShadow -= 4;
459 dataShadow.inUse[runLengthShadow] = true;
460 final byte[] block = dataShadow.block;
461 block[lastShadow + 2] = ch;
462 block[lastShadow + 3] = ch;
463 block[lastShadow + 4] = ch;
464 block[lastShadow + 5] = ch;
465 block[lastShadow + 6] = (byte) runLengthShadow;
466 this.last = lastShadow + 5;
467 }
468 break;
469
470 }
471 } else {
472 endBlock();
473 initBlock();
474 writeRun();
475 }
476 }
477
478 /**
479 * Overriden to close the stream.
480 */
481 @Override
482 protected void finalize() throws Throwable {
483 finish();
484 super.finalize();
485 }
486
487
488 public void finish() throws IOException {
489 if (out != null) {
490 try {
491 if (this.runLength > 0) {
492 writeRun();
493 }
494 this.currentChar = -1;
495 endBlock();
496 endCompression();
497 } finally {
498 this.out = null;
499 this.data = null;
500 this.blockSorter = null;
501 }
502 }
503 }
504
505 @Override
506 public void close() throws IOException {
507 if (out != null) {
508 OutputStream outShadow = this.out;
509 finish();
510 outShadow.close();
511 }
512 }
513
514 @Override
515 public void flush() throws IOException {
516 OutputStream outShadow = this.out;
517 if (outShadow != null) {
518 outShadow.flush();
519 }
520 }
521
522 /**
523 * Writes magic bytes like BZ on the first position of the stream
524 * and bytes indiciating the file-format, which is
525 * huffmanised, followed by a digit indicating blockSize100k.
526 * @throws IOException if the magic bytes could not been written
527 */
528 private void init() throws IOException {
529 bsPutUByte('B');
530 bsPutUByte('Z');
531
532 this.data = new Data(this.blockSize100k);
533 this.blockSorter = new BlockSort(this.data);
534
535 // huffmanised magic bytes
536 bsPutUByte('h');
537 bsPutUByte('0' + this.blockSize100k);
538
539 this.combinedCRC = 0;
540 initBlock();
541 }
542
543 private void initBlock() {
544 // blockNo++;
545 this.crc.initialiseCRC();
546 this.last = -1;
547 // ch = 0;
548
549 boolean[] inUse = this.data.inUse;
550 for (int i = 256; --i >= 0;) {
551 inUse[i] = false;
552 }
553
554 }
555
556 private void endBlock() throws IOException {
557 this.blockCRC = this.crc.getFinalCRC();
558 this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
559 this.combinedCRC ^= this.blockCRC;
560
561 // empty block at end of file
562 if (this.last == -1) {
563 return;
564 }
565
566 /* sort the block and establish posn of original string */
567 blockSort();
568
569 /*
570 * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
571 * :-). A 32 bit value does not really give a strong enough guarantee
572 * that the value will not appear by chance in the compressed
573 * datastream. Worst-case probability of this event, for a 900k block,
574 * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
575 * bits. For a compressed file of size 100Gb -- about 100000 blocks --
576 * only a 48-bit marker will do. NB: normal compression/ decompression
577 * donot rely on these statistical properties. They are only important
578 * when trying to recover blocks from damaged files.
579 */
580 bsPutUByte(0x31);
581 bsPutUByte(0x41);
582 bsPutUByte(0x59);
583 bsPutUByte(0x26);
584 bsPutUByte(0x53);
585 bsPutUByte(0x59);
586
587 /* Now the block's CRC, so it is in a known place. */
588 bsPutInt(this.blockCRC);
589
590 /* Now a single bit indicating no randomisation. */
591 bsW(1, 0);
592
593 /* Finally, block's contents proper. */
594 moveToFrontCodeAndSend();
595 }
596
597 private void endCompression() throws IOException {
598 /*
599 * Now another magic 48-bit number, 0x177245385090, to indicate the end
600 * of the last block. (sqrt(pi), if you want to know. I did want to use
601 * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
602 * to feel statistically comfortable. Call me paranoid.)
603 */
604 bsPutUByte(0x17);
605 bsPutUByte(0x72);
606 bsPutUByte(0x45);
607 bsPutUByte(0x38);
608 bsPutUByte(0x50);
609 bsPutUByte(0x90);
610
611 bsPutInt(this.combinedCRC);
612 bsFinishedWithStream();
613 }
614
615 /**
616 * Returns the blocksize parameter specified at construction time.
617 */
618 public final int getBlockSize() {
619 return this.blockSize100k;
620 }
621
622 @Override
623 public void write(final byte[] buf, int offs, final int len)
624 throws IOException {
625 if (offs < 0) {
626 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
627 }
628 if (len < 0) {
629 throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
630 }
631 if (offs + len > buf.length) {
632 throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
633 + len + ") > buf.length("
634 + buf.length + ").");
635 }
636 if (this.out == null) {
637 throw new IOException("stream closed");
638 }
639
640 for (int hi = offs + len; offs < hi;) {
641 write0(buf[offs++]);
642 }
643 }
644
645 /**
646 * Keeps track of the last bytes written and implicitly performs
647 * run-length encoding as the first step of the bzip2 algorithm.
648 */
649 private void write0(int b) throws IOException {
650 if (this.currentChar != -1) {
651 b &= 0xff;
652 if (this.currentChar == b) {
653 if (++this.runLength > 254) {
654 writeRun();
655 this.currentChar = -1;
656 this.runLength = 0;
657 }
658 // else nothing to do
659 } else {
660 writeRun();
661 this.runLength = 1;
662 this.currentChar = b;
663 }
664 } else {
665 this.currentChar = b & 0xff;
666 this.runLength++;
667 }
668 }
669
670 private static void hbAssignCodes(final int[] code, final byte[] length,
671 final int minLen, final int maxLen,
672 final int alphaSize) {
673 int vec = 0;
674 for (int n = minLen; n <= maxLen; n++) {
675 for (int i = 0; i < alphaSize; i++) {
676 if ((length[i] & 0xff) == n) {
677 code[i] = vec;
678 vec++;
679 }
680 }
681 vec <<= 1;
682 }
683 }
684
685 private void bsFinishedWithStream() throws IOException {
686 while (this.bsLive > 0) {
687 int ch = this.bsBuff >> 24;
688 this.out.write(ch); // write 8-bit
689 this.bsBuff <<= 8;
690 this.bsLive -= 8;
691 }
692 }
693
694 private void bsW(final int n, final int v) throws IOException {
695 final OutputStream outShadow = this.out;
696 int bsLiveShadow = this.bsLive;
697 int bsBuffShadow = this.bsBuff;
698
699 while (bsLiveShadow >= 8) {
700 outShadow.write(bsBuffShadow >> 24); // write 8-bit
701 bsBuffShadow <<= 8;
702 bsLiveShadow -= 8;
703 }
704
705 this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
706 this.bsLive = bsLiveShadow + n;
707 }
708
709 private void bsPutUByte(final int c) throws IOException {
710 bsW(8, c);
711 }
712
713 private void bsPutInt(final int u) throws IOException {
714 bsW(8, (u >> 24) & 0xff);
715 bsW(8, (u >> 16) & 0xff);
716 bsW(8, (u >> 8) & 0xff);
717 bsW(8, u & 0xff);
718 }
719
720 private void sendMTFValues() throws IOException {
721 final byte[][] len = this.data.sendMTFValues_len;
722 final int alphaSize = this.nInUse + 2;
723
724 for (int t = N_GROUPS; --t >= 0;) {
725 byte[] len_t = len[t];
726 for (int v = alphaSize; --v >= 0;) {
727 len_t[v] = GREATER_ICOST;
728 }
729 }
730
731 /* Decide how many coding tables to use */
732 // assert (this.nMTF > 0) : this.nMTF;
733 final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
734 : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
735
736 /* Generate an initial set of coding tables */
737 sendMTFValues0(nGroups, alphaSize);
738
739 /*
740 * Iterate up to N_ITERS times to improve the tables.
741 */
742 final int nSelectors = sendMTFValues1(nGroups, alphaSize);
743
744 /* Compute MTF values for the selectors. */
745 sendMTFValues2(nGroups, nSelectors);
746
747 /* Assign actual codes for the tables. */
748 sendMTFValues3(nGroups, alphaSize);
749
750 /* Transmit the mapping table. */
751 sendMTFValues4();
752
753 /* Now the selectors. */
754 sendMTFValues5(nGroups, nSelectors);
755
756 /* Now the coding tables. */
757 sendMTFValues6(nGroups, alphaSize);
758
759 /* And finally, the block data proper */
760 sendMTFValues7();
761 }
762
763 private void sendMTFValues0(final int nGroups, final int alphaSize) {
764 final byte[][] len = this.data.sendMTFValues_len;
765 final int[] mtfFreq = this.data.mtfFreq;
766
767 int remF = this.nMTF;
768 int gs = 0;
769
770 for (int nPart = nGroups; nPart > 0; nPart--) {
771 final int tFreq = remF / nPart;
772 int ge = gs - 1;
773 int aFreq = 0;
774
775 for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
776 aFreq += mtfFreq[++ge];
777 }
778
779 if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
780 && (((nGroups - nPart) & 1) != 0)) {
781 aFreq -= mtfFreq[ge--];
782 }
783
784 final byte[] len_np = len[nPart - 1];
785 for (int v = alphaSize; --v >= 0;) {
786 if ((v >= gs) && (v <= ge)) {
787 len_np[v] = LESSER_ICOST;
788 } else {
789 len_np[v] = GREATER_ICOST;
790 }
791 }
792
793 gs = ge + 1;
794 remF -= aFreq;
795 }
796 }
797
798 private int sendMTFValues1(final int nGroups, final int alphaSize) {
799 final Data dataShadow = this.data;
800 final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
801 final int[] fave = dataShadow.sendMTFValues_fave;
802 final short[] cost = dataShadow.sendMTFValues_cost;
803 final char[] sfmap = dataShadow.sfmap;
804 final byte[] selector = dataShadow.selector;
805 final byte[][] len = dataShadow.sendMTFValues_len;
806 final byte[] len_0 = len[0];
807 final byte[] len_1 = len[1];
808 final byte[] len_2 = len[2];
809 final byte[] len_3 = len[3];
810 final byte[] len_4 = len[4];
811 final byte[] len_5 = len[5];
812 final int nMTFShadow = this.nMTF;
813
814 int nSelectors = 0;
815
816 for (int iter = 0; iter < N_ITERS; iter++) {
817 for (int t = nGroups; --t >= 0;) {
818 fave[t] = 0;
819 int[] rfreqt = rfreq[t];
820 for (int i = alphaSize; --i >= 0;) {
821 rfreqt[i] = 0;
822 }
823 }
824
825 nSelectors = 0;
826
827 for (int gs = 0; gs < this.nMTF;) {
828 /* Set group start & end marks. */
829
830 /*
831 * Calculate the cost of this group as coded by each of the
832 * coding tables.
833 */
834
835 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
836
837 if (nGroups == N_GROUPS) {
838 // unrolled version of the else-block
839
840 short cost0 = 0;
841 short cost1 = 0;
842 short cost2 = 0;
843 short cost3 = 0;
844 short cost4 = 0;
845 short cost5 = 0;
846
847 for (int i = gs; i <= ge; i++) {
848 final int icv = sfmap[i];
849 cost0 += len_0[icv] & 0xff;
850 cost1 += len_1[icv] & 0xff;
851 cost2 += len_2[icv] & 0xff;
852 cost3 += len_3[icv] & 0xff;
853 cost4 += len_4[icv] & 0xff;
854 cost5 += len_5[icv] & 0xff;
855 }
856
857 cost[0] = cost0;
858 cost[1] = cost1;
859 cost[2] = cost2;
860 cost[3] = cost3;
861 cost[4] = cost4;
862 cost[5] = cost5;
863
864 } else {
865 for (int t = nGroups; --t >= 0;) {
866 cost[t] = 0;
867 }
868
869 for (int i = gs; i <= ge; i++) {
870 final int icv = sfmap[i];
871 for (int t = nGroups; --t >= 0;) {
872 cost[t] += len[t][icv] & 0xff;
873 }
874 }
875 }
876
877 /*
878 * Find the coding table which is best for this group, and
879 * record its identity in the selector table.
880 */
881 int bt = -1;
882 for (int t = nGroups, bc = 999999999; --t >= 0;) {
883 final int cost_t = cost[t];
884 if (cost_t < bc) {
885 bc = cost_t;
886 bt = t;
887 }
888 }
889
890 fave[bt]++;
891 selector[nSelectors] = (byte) bt;
892 nSelectors++;
893
894 /*
895 * Increment the symbol frequencies for the selected table.
896 */
897 final int[] rfreq_bt = rfreq[bt];
898 for (int i = gs; i <= ge; i++) {
899 rfreq_bt[sfmap[i]]++;
900 }
901
902 gs = ge + 1;
903 }
904
905 /*
906 * Recompute the tables based on the accumulated frequencies.
907 */
908 for (int t = 0; t < nGroups; t++) {
909 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
910 }
911 }
912
913 return nSelectors;
914 }
915
916 private void sendMTFValues2(final int nGroups, final int nSelectors) {
917 // assert (nGroups < 8) : nGroups;
918
919 final Data dataShadow = this.data;
920 byte[] pos = dataShadow.sendMTFValues2_pos;
921
922 for (int i = nGroups; --i >= 0;) {
923 pos[i] = (byte) i;
924 }
925
926 for (int i = 0; i < nSelectors; i++) {
927 final byte ll_i = dataShadow.selector[i];
928 byte tmp = pos[0];
929 int j = 0;
930
931 while (ll_i != tmp) {
932 j++;
933 byte tmp2 = tmp;
934 tmp = pos[j];
935 pos[j] = tmp2;
936 }
937
938 pos[0] = tmp;
939 dataShadow.selectorMtf[i] = (byte) j;
940 }
941 }
942
943 private void sendMTFValues3(final int nGroups, final int alphaSize) {
944 int[][] code = this.data.sendMTFValues_code;
945 byte[][] len = this.data.sendMTFValues_len;
946
947 for (int t = 0; t < nGroups; t++) {
948 int minLen = 32;
949 int maxLen = 0;
950 final byte[] len_t = len[t];
951 for (int i = alphaSize; --i >= 0;) {
952 final int l = len_t[i] & 0xff;
953 if (l > maxLen) {
954 maxLen = l;
955 }
956 if (l < minLen) {
957 minLen = l;
958 }
959 }
960
961 // assert (maxLen <= 20) : maxLen;
962 // assert (minLen >= 1) : minLen;
963
964 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
965 }
966 }
967
968 private void sendMTFValues4() throws IOException {
969 final boolean[] inUse = this.data.inUse;
970 final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
971
972 for (int i = 16; --i >= 0;) {
973 inUse16[i] = false;
974 final int i16 = i * 16;
975 for (int j = 16; --j >= 0;) {
976 if (inUse[i16 + j]) {
977 inUse16[i] = true;
978 }
979 }
980 }
981
982 for (int i = 0; i < 16; i++) {
983 bsW(1, inUse16[i] ? 1 : 0);
984 }
985
986 final OutputStream outShadow = this.out;
987 int bsLiveShadow = this.bsLive;
988 int bsBuffShadow = this.bsBuff;
989
990 for (int i = 0; i < 16; i++) {
991 if (inUse16[i]) {
992 final int i16 = i * 16;
993 for (int j = 0; j < 16; j++) {
994 // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
995 while (bsLiveShadow >= 8) {
996 outShadow.write(bsBuffShadow >> 24); // write 8-bit
997 bsBuffShadow <<= 8;
998 bsLiveShadow -= 8;
999 }
1000 if (inUse[i16 + j]) {
1001 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1002 }
1003 bsLiveShadow++;
1004 }
1005 }
1006 }
1007
1008 this.bsBuff = bsBuffShadow;
1009 this.bsLive = bsLiveShadow;
1010 }
1011
1012 private void sendMTFValues5(final int nGroups, final int nSelectors)
1013 throws IOException {
1014 bsW(3, nGroups);
1015 bsW(15, nSelectors);
1016
1017 final OutputStream outShadow = this.out;
1018 final byte[] selectorMtf = this.data.selectorMtf;
1019
1020 int bsLiveShadow = this.bsLive;
1021 int bsBuffShadow = this.bsBuff;
1022
1023 for (int i = 0; i < nSelectors; i++) {
1024 for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1025 // inlined: bsW(1, 1);
1026 while (bsLiveShadow >= 8) {
1027 outShadow.write(bsBuffShadow >> 24);
1028 bsBuffShadow <<= 8;
1029 bsLiveShadow -= 8;
1030 }
1031 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1032 bsLiveShadow++;
1033 }
1034
1035 // inlined: bsW(1, 0);
1036 while (bsLiveShadow >= 8) {
1037 outShadow.write(bsBuffShadow >> 24);
1038 bsBuffShadow <<= 8;
1039 bsLiveShadow -= 8;
1040 }
1041 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1042 bsLiveShadow++;
1043 }
1044
1045 this.bsBuff = bsBuffShadow;
1046 this.bsLive = bsLiveShadow;
1047 }
1048
1049 private void sendMTFValues6(final int nGroups, final int alphaSize)
1050 throws IOException {
1051 final byte[][] len = this.data.sendMTFValues_len;
1052 final OutputStream outShadow = this.out;
1053
1054 int bsLiveShadow = this.bsLive;
1055 int bsBuffShadow = this.bsBuff;
1056
1057 for (int t = 0; t < nGroups; t++) {
1058 byte[] len_t = len[t];
1059 int curr = len_t[0] & 0xff;
1060
1061 // inlined: bsW(5, curr);
1062 while (bsLiveShadow >= 8) {
1063 outShadow.write(bsBuffShadow >> 24); // write 8-bit
1064 bsBuffShadow <<= 8;
1065 bsLiveShadow -= 8;
1066 }
1067 bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
1068 bsLiveShadow += 5;
1069
1070 for (int i = 0; i < alphaSize; i++) {
1071 int lti = len_t[i] & 0xff;
1072 while (curr < lti) {
1073 // inlined: bsW(2, 2);
1074 while (bsLiveShadow >= 8) {
1075 outShadow.write(bsBuffShadow >> 24); // write 8-bit
1076 bsBuffShadow <<= 8;
1077 bsLiveShadow -= 8;
1078 }
1079 bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
1080 bsLiveShadow += 2;
1081
1082 curr++; /* 10 */
1083 }
1084
1085 while (curr > lti) {
1086 // inlined: bsW(2, 3);
1087 while (bsLiveShadow >= 8) {
1088 outShadow.write(bsBuffShadow >> 24); // write 8-bit
1089 bsBuffShadow <<= 8;
1090 bsLiveShadow -= 8;
1091 }
1092 bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
1093 bsLiveShadow += 2;
1094
1095 curr--; /* 11 */
1096 }
1097
1098 // inlined: bsW(1, 0);
1099 while (bsLiveShadow >= 8) {
1100 outShadow.write(bsBuffShadow >> 24); // write 8-bit
1101 bsBuffShadow <<= 8;
1102 bsLiveShadow -= 8;
1103 }
1104 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1105 bsLiveShadow++;
1106 }
1107 }
1108
1109 this.bsBuff = bsBuffShadow;
1110 this.bsLive = bsLiveShadow;
1111 }
1112
1113 private void sendMTFValues7() throws IOException {
1114 final Data dataShadow = this.data;
1115 final byte[][] len = dataShadow.sendMTFValues_len;
1116 final int[][] code = dataShadow.sendMTFValues_code;
1117 final OutputStream outShadow = this.out;
1118 final byte[] selector = dataShadow.selector;
1119 final char[] sfmap = dataShadow.sfmap;
1120 final int nMTFShadow = this.nMTF;
1121
1122 int selCtr = 0;
1123
1124 int bsLiveShadow = this.bsLive;
1125 int bsBuffShadow = this.bsBuff;
1126
1127 for (int gs = 0; gs < nMTFShadow;) {
1128 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1129 final int selector_selCtr = selector[selCtr] & 0xff;
1130 final int[] code_selCtr = code[selector_selCtr];
1131 final byte[] len_selCtr = len[selector_selCtr];
1132
1133 while (gs <= ge) {
1134 final int sfmap_i = sfmap[gs];
1135
1136 //
1137 // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1138 // code_selCtr[sfmap_i]);
1139 //
1140 while (bsLiveShadow >= 8) {
1141 outShadow.write(bsBuffShadow >> 24);
1142 bsBuffShadow <<= 8;
1143 bsLiveShadow -= 8;
1144 }
1145 final int n = len_selCtr[sfmap_i] & 0xFF;
1146 bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
1147 bsLiveShadow += n;
1148
1149 gs++;
1150 }
1151
1152 gs = ge + 1;
1153 selCtr++;
1154 }
1155
1156 this.bsBuff = bsBuffShadow;
1157 this.bsLive = bsLiveShadow;
1158 }
1159
1160 private void moveToFrontCodeAndSend() throws IOException {
1161 bsW(24, this.data.origPtr);
1162 generateMTFValues();
1163 sendMTFValues();
1164 }
1165
1166 private void blockSort() {
1167 blockSorter.blockSort(data, last);
1168 }
1169
1170 /*
1171 * Performs Move-To-Front on the Burrows-Wheeler transformed
1172 * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
1173 * run-length-encoded form.
1174 *
1175 * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
1176 */
1177 private void generateMTFValues() {
1178 final int lastShadow = this.last;
1179 final Data dataShadow = this.data;
1180 final boolean[] inUse = dataShadow.inUse;
1181 final byte[] block = dataShadow.block;
1182 final int[] fmap = dataShadow.fmap;
1183 final char[] sfmap = dataShadow.sfmap;
1184 final int[] mtfFreq = dataShadow.mtfFreq;
1185 final byte[] unseqToSeq = dataShadow.unseqToSeq;
1186 final byte[] yy = dataShadow.generateMTFValues_yy;
1187
1188 // make maps
1189 int nInUseShadow = 0;
1190 for (int i = 0; i < 256; i++) {
1191 if (inUse[i]) {
1192 unseqToSeq[i] = (byte) nInUseShadow;
1193 nInUseShadow++;
1194 }
1195 }
1196 this.nInUse = nInUseShadow;
1197
1198 final int eob = nInUseShadow + 1;
1199
1200 for (int i = eob; i >= 0; i--) {
1201 mtfFreq[i] = 0;
1202 }
1203
1204 for (int i = nInUseShadow; --i >= 0;) {
1205 yy[i] = (byte) i;
1206 }
1207
1208 int wr = 0;
1209 int zPend = 0;
1210
1211 for (int i = 0; i <= lastShadow; i++) {
1212 final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
1213 byte tmp = yy[0];
1214 int j = 0;
1215
1216 while (ll_i != tmp) {
1217 j++;
1218 byte tmp2 = tmp;
1219 tmp = yy[j];
1220 yy[j] = tmp2;
1221 }
1222 yy[0] = tmp;
1223
1224 if (j == 0) {
1225 zPend++;
1226 } else {
1227 if (zPend > 0) {
1228 zPend--;
1229 while (true) {
1230 if ((zPend & 1) == 0) {
1231 sfmap[wr] = RUNA;
1232 wr++;
1233 mtfFreq[RUNA]++;
1234 } else {
1235 sfmap[wr] = RUNB;
1236 wr++;
1237 mtfFreq[RUNB]++;
1238 }
1239
1240 if (zPend >= 2) {
1241 zPend = (zPend - 2) >> 1;
1242 } else {
1243 break;
1244 }
1245 }
1246 zPend = 0;
1247 }
1248 sfmap[wr] = (char) (j + 1);
1249 wr++;
1250 mtfFreq[j + 1]++;
1251 }
1252 }
1253
1254 if (zPend > 0) {
1255 zPend--;
1256 while (true) {
1257 if ((zPend & 1) == 0) {
1258 sfmap[wr] = RUNA;
1259 wr++;
1260 mtfFreq[RUNA]++;
1261 } else {
1262 sfmap[wr] = RUNB;
1263 wr++;
1264 mtfFreq[RUNB]++;
1265 }
1266
1267 if (zPend >= 2) {
1268 zPend = (zPend - 2) >> 1;
1269 } else {
1270 break;
1271 }
1272 }
1273 }
1274
1275 sfmap[wr] = (char) eob;
1276 mtfFreq[eob]++;
1277 this.nMTF = wr + 1;
1278 }
1279
1280 static final class Data extends Object {
1281
1282 // with blockSize 900k
1283 /* maps unsigned byte => "does it occur in block" */
1284 final boolean[] inUse = new boolean[256]; // 256 byte
1285 final byte[] unseqToSeq = new byte[256]; // 256 byte
1286 final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
1287 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
1288 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
1289
1290 final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
1291 final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
1292 // byte
1293 final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1294 // byte
1295 final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
1296 final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
1297 final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1298 // byte
1299 final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
1300 final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
1301
1302 final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
1303 final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1304 final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1305
1306 // ------------
1307 // 333408 byte
1308
1309 /* holds the RLEd block of original data starting at index 1.
1310 * After sorting the last byte added to the buffer is at index
1311 * 0. */
1312 final byte[] block; // 900021 byte
1313 /* maps index in Burrows-Wheeler transformed block => index of
1314 * byte in original block */
1315 final int[] fmap; // 3600000 byte
1316 final char[] sfmap; // 3600000 byte
1317 // ------------
1318 // 8433529 byte
1319 // ============
1320
1321 /**
1322 * Index of original line in Burrows-Wheeler table.
1323 *
1324 * <p>This is the index in fmap that points to the last byte
1325 * of the original data.</p>
1326 */
1327 int origPtr;
1328
1329 Data(int blockSize100k) {
1330 super();
1331
1332 final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE;
1333 this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
1334 this.fmap = new int[n];
1335 this.sfmap = new char[2 * n];
1336 }
1337
1338 }
1339
1340 }