001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.bzip2;
020
021import java.io.IOException;
022import java.io.OutputStream;
023import java.util.Arrays;
024
025import org.apache.commons.compress.compressors.CompressorOutputStream;
026
027/**
028 * An output stream that compresses into the BZip2 format into another stream.
029 *
030 * <p>
031 * The compression requires large amounts of memory. Thus you should call the
032 * {@link #close() close()} method as soon as possible, to force
033 * {@code BZip2CompressorOutputStream} to release the allocated memory.
034 * </p>
035 *
036 * <p> You can shrink the amount of allocated memory and maybe raise
037 * the compression speed by choosing a lower blocksize, which in turn
038 * may cause a lower compression ratio. You can avoid unnecessary
039 * memory allocation by avoiding using a blocksize which is bigger
040 * than the size of the input.  </p>
041 *
042 * <p> You can compute the memory usage for compressing by the
043 * following formula: </p>
044 *
045 * <pre>
046 * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
047 * </pre>
048 *
049 * <p> To get the memory required for decompression by {@link
050 * BZip2CompressorInputStream} use </p>
051 *
052 * <pre>
053 * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
054 * </pre>
055 *
056 * <table style="width:100%" border="1">
057 * <caption>Memory usage by blocksize</caption>
058 * <tr>
059 * <th colspan="3">Memory usage by blocksize</th>
060 * </tr>
061 * <tr>
062 * <th style="text-align: right">Blocksize</th> <th style="text-align: right">Compression<br>
063 * memory usage</th> <th style="text-align: right">Decompression<br>
064 * memory usage</th>
065 * </tr>
066 * <tr>
067 * <td style="text-align: right">100k</td>
068 * <td style="text-align: right">1300k</td>
069 * <td style="text-align: right">565k</td>
070 * </tr>
071 * <tr>
072 * <td style="text-align: right">200k</td>
073 * <td style="text-align: right">2200k</td>
074 * <td style="text-align: right">1065k</td>
075 * </tr>
076 * <tr>
077 * <td style="text-align: right">300k</td>
078 * <td style="text-align: right">3100k</td>
079 * <td style="text-align: right">1565k</td>
080 * </tr>
081 * <tr>
082 * <td style="text-align: right">400k</td>
083 * <td style="text-align: right">4000k</td>
084 * <td style="text-align: right">2065k</td>
085 * </tr>
086 * <tr>
087 * <td style="text-align: right">500k</td>
088 * <td style="text-align: right">4900k</td>
089 * <td style="text-align: right">2565k</td>
090 * </tr>
091 * <tr>
092 * <td style="text-align: right">600k</td>
093 * <td style="text-align: right">5800k</td>
094 * <td style="text-align: right">3065k</td>
095 * </tr>
096 * <tr>
097 * <td style="text-align: right">700k</td>
098 * <td style="text-align: right">6700k</td>
099 * <td style="text-align: right">3565k</td>
100 * </tr>
101 * <tr>
102 * <td style="text-align: right">800k</td>
103 * <td style="text-align: right">7600k</td>
104 * <td style="text-align: right">4065k</td>
105 * </tr>
106 * <tr>
107 * <td style="text-align: right">900k</td>
108 * <td style="text-align: right">8500k</td>
109 * <td style="text-align: right">4565k</td>
110 * </tr>
111 * </table>
112 *
113 * <p>
114 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the
115 * bzipped input is smaller than one block.
116 * </p>
117 *
118 * <p>
119 * Instances of this class are not threadsafe.
120 * </p>
121 *
122 * <p>
123 * TODO: Update to BZip2 1.0.1
124 * </p>
125 * @NotThreadSafe
126 */
127public class BZip2CompressorOutputStream extends CompressorOutputStream
128    implements BZip2Constants {
129
130    /**
131     * The minimum supported blocksize {@code  == 1}.
132     */
133    public static final int MIN_BLOCKSIZE = 1;
134
135    /**
136     * The maximum supported blocksize {@code  == 9}.
137     */
138    public static final int MAX_BLOCKSIZE = 9;
139
140    private static final int GREATER_ICOST = 15;
141    private static final int LESSER_ICOST = 0;
142
143    private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
144                                          final Data dat, final int alphaSize,
145                                          final int maxLen) {
146        /*
147         * Nodes and heap entries run from 1. Entry 0 for both the heap and
148         * nodes is a sentinel.
149         */
150        final int[] heap = dat.heap;
151        final int[] weight = dat.weight;
152        final int[] parent = dat.parent;
153
154        for (int i = alphaSize; --i >= 0;) {
155            weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
156        }
157
158        for (boolean tooLong = true; tooLong;) {
159            tooLong = false;
160
161            int nNodes = alphaSize;
162            int nHeap = 0;
163            heap[0] = 0;
164            weight[0] = 0;
165            parent[0] = -2;
166
167            for (int i = 1; i <= alphaSize; i++) {
168                parent[i] = -1;
169                nHeap++;
170                heap[nHeap] = i;
171
172                int zz = nHeap;
173                final int tmp = heap[zz];
174                while (weight[tmp] < weight[heap[zz >> 1]]) {
175                    heap[zz] = heap[zz >> 1];
176                    zz >>= 1;
177                }
178                heap[zz] = tmp;
179            }
180
181            while (nHeap > 1) {
182                final int n1 = heap[1];
183                heap[1] = heap[nHeap];
184                nHeap--;
185
186                int yy = 0;
187                int zz = 1;
188                int tmp = heap[1];
189
190                while (true) {
191                    yy = zz << 1;
192
193                    if (yy > nHeap) {
194                        break;
195                    }
196
197                    if ((yy < nHeap)
198                        && (weight[heap[yy + 1]] < weight[heap[yy]])) {
199                        yy++;
200                    }
201
202                    if (weight[tmp] < weight[heap[yy]]) {
203                        break;
204                    }
205
206                    heap[zz] = heap[yy];
207                    zz = yy;
208                }
209
210                heap[zz] = tmp;
211
212                final int n2 = heap[1];
213                heap[1] = heap[nHeap];
214                nHeap--;
215
216                yy = 0;
217                zz = 1;
218                tmp = heap[1];
219
220                while (true) {
221                    yy = zz << 1;
222
223                    if (yy > nHeap) {
224                        break;
225                    }
226
227                    if ((yy < nHeap)
228                        && (weight[heap[yy + 1]] < weight[heap[yy]])) {
229                        yy++;
230                    }
231
232                    if (weight[tmp] < weight[heap[yy]]) {
233                        break;
234                    }
235
236                    heap[zz] = heap[yy];
237                    zz = yy;
238                }
239
240                heap[zz] = tmp;
241                nNodes++;
242                parent[n1] = parent[n2] = nNodes;
243
244                final int weight_n1 = weight[n1];
245                final int weight_n2 = weight[n2];
246                weight[nNodes] = ((weight_n1 & 0xffffff00)
247                                  + (weight_n2 & 0xffffff00))
248                    | (1 + Math.max(weight_n1 & 0x000000ff, weight_n2 & 0x000000ff));
249
250                parent[nNodes] = -1;
251                nHeap++;
252                heap[nHeap] = nNodes;
253
254                tmp = 0;
255                zz = nHeap;
256                tmp = heap[zz];
257                final int weight_tmp = weight[tmp];
258                while (weight_tmp < weight[heap[zz >> 1]]) {
259                    heap[zz] = heap[zz >> 1];
260                    zz >>= 1;
261                }
262                heap[zz] = tmp;
263
264            }
265
266            for (int i = 1; i <= alphaSize; i++) {
267                int j = 0;
268                int k = i;
269
270                for (int parent_k; (parent_k = parent[k]) >= 0;) {
271                    k = parent_k;
272                    j++;
273                }
274
275                len[i - 1] = (byte) j;
276                if (j > maxLen) {
277                    tooLong = true;
278                }
279            }
280
281            if (tooLong) {
282                for (int i = 1; i < alphaSize; i++) {
283                    int j = weight[i] >> 8;
284                    j = 1 + (j >> 1);
285                    weight[i] = j << 8;
286                }
287            }
288        }
289    }
290
291    /**
292     * Index of the last char in the block, so the block size == last + 1.
293     */
294    private int last;
295
296    /**
297     * Always: in the range 0 .. 9. The current block size is 100000 * this
298     * number.
299     */
300    private final int blockSize100k;
301
302    private int bsBuff;
303    private int bsLive;
304    private final CRC crc = new CRC();
305
306    private int nInUse;
307
308    private int nMTF;
309
310    private int currentChar = -1;
311    private int runLength;
312
313    private int blockCRC;
314    private int combinedCRC;
315    private final int allowableBlockSize;
316
317    /**
318     * All memory intensive stuff.
319     */
320    private Data data;
321    private BlockSort blockSorter;
322
323    private OutputStream out;
324    private volatile boolean closed;
325
326    /**
327     * Chooses a blocksize based on the given length of the data to compress.
328     *
329     * @return The blocksize, between {@link #MIN_BLOCKSIZE} and
330     *         {@link #MAX_BLOCKSIZE} both inclusive. For a negative
331     *         {@code inputLength} this method returns {@code MAX_BLOCKSIZE}
332     *         always.
333     *
334     * @param inputLength
335     *            The length of the data which will be compressed by
336     *            {@code BZip2CompressorOutputStream}.
337     */
338    public static int chooseBlockSize(final long inputLength) {
339        return (inputLength > 0) ? (int) Math
340            .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
341    }
342
343    /**
344     * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k.
345     *
346     * @param out
347     *            the destination stream.
348     *
349     * @throws IOException
350     *             if an I/O error occurs in the specified stream.
351     * @throws NullPointerException
352     *             if {@code out == null}.
353     */
354    public BZip2CompressorOutputStream(final OutputStream out)
355        throws IOException {
356        this(out, MAX_BLOCKSIZE);
357    }
358
359    /**
360     * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize.
361     *
362     * @param out
363     *            the destination stream.
364     * @param blockSize
365     *            the blockSize as 100k units.
366     *
367     * @throws IOException
368     *             if an I/O error occurs in the specified stream.
369     * @throws IllegalArgumentException
370     *             if {@code (blockSize &lt; 1) || (blockSize &gt; 9)}.
371     * @throws NullPointerException
372     *             if {@code out == null}.
373     *
374     * @see #MIN_BLOCKSIZE
375     * @see #MAX_BLOCKSIZE
376     */
377    public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException {
378        if (blockSize < 1) {
379            throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1");
380        }
381        if (blockSize > 9) {
382            throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9");
383        }
384
385        this.blockSize100k = blockSize;
386        this.out = out;
387
388        /* 20 is just a paranoia constant */
389        this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
390        init();
391    }
392
393    @Override
394    public void write(final int b) throws IOException {
395        if (closed) {
396            throw new IOException("Closed");
397        }
398        write0(b);
399    }
400
401    /**
402     * Writes the current byte to the buffer, run-length encoding it
403     * if it has been repeated at least four times (the first step
404     * RLEs sequences of four identical bytes).
405     *
406     * <p>Flushes the current block before writing data if it is
407     * full.</p>
408     *
409     * <p>"write to the buffer" means adding to data.buffer starting
410     * two steps "after" this.last - initially starting at index 1
411     * (not 0) - and updating this.last to point to the last index
412     * written minus 1.</p>
413     */
414    private void writeRun() throws IOException {
415        final int lastShadow = this.last;
416
417        if (lastShadow < this.allowableBlockSize) {
418            final int currentCharShadow = this.currentChar;
419            final Data dataShadow = this.data;
420            dataShadow.inUse[currentCharShadow] = true;
421            final byte ch = (byte) currentCharShadow;
422
423            int runLengthShadow = this.runLength;
424            this.crc.updateCRC(currentCharShadow, runLengthShadow);
425
426            switch (runLengthShadow) {
427            case 1:
428                dataShadow.block[lastShadow + 2] = ch;
429                this.last = lastShadow + 1;
430                break;
431
432            case 2:
433                dataShadow.block[lastShadow + 2] = ch;
434                dataShadow.block[lastShadow + 3] = ch;
435                this.last = lastShadow + 2;
436                break;
437
438            case 3: {
439                final byte[] block = dataShadow.block;
440                block[lastShadow + 2] = ch;
441                block[lastShadow + 3] = ch;
442                block[lastShadow + 4] = ch;
443                this.last = lastShadow + 3;
444            }
445                break;
446
447            default: {
448                runLengthShadow -= 4;
449                dataShadow.inUse[runLengthShadow] = true;
450                final byte[] block = dataShadow.block;
451                block[lastShadow + 2] = ch;
452                block[lastShadow + 3] = ch;
453                block[lastShadow + 4] = ch;
454                block[lastShadow + 5] = ch;
455                block[lastShadow + 6] = (byte) runLengthShadow;
456                this.last = lastShadow + 5;
457            }
458                break;
459
460            }
461        } else {
462            endBlock();
463            initBlock();
464            writeRun();
465        }
466    }
467
468    /**
469     * Overridden to warn about an unclosed stream.
470     */
471    @Override
472    protected void finalize() throws Throwable {
473        if (!closed) {
474            System.err.println("Unclosed BZip2CompressorOutputStream detected, will *not* close it");
475        }
476        super.finalize();
477    }
478
479
480    public void finish() throws IOException {
481        if (!closed) {
482            closed = true;
483            try {
484                if (this.runLength > 0) {
485                    writeRun();
486                }
487                this.currentChar = -1;
488                endBlock();
489                endCompression();
490            } finally {
491                this.out = null;
492                this.blockSorter = null;
493                this.data = null;
494            }
495        }
496    }
497
498    @Override
499    public void close() throws IOException {
500        if (!closed) {
501            try (OutputStream outShadow = this.out) {
502                finish();
503            }
504        }
505    }
506
507    @Override
508    public void flush() throws IOException {
509        final OutputStream outShadow = this.out;
510        if (outShadow != null) {
511            outShadow.flush();
512        }
513    }
514
515    /**
516     * Writes magic bytes like BZ on the first position of the stream
517     * and bytes indicating the file-format, which is
518     * huffmanised, followed by a digit indicating blockSize100k.
519     * @throws IOException if the magic bytes could not been written
520     */
521    private void init() throws IOException {
522        bsPutUByte('B');
523        bsPutUByte('Z');
524
525        this.data = new Data(this.blockSize100k);
526        this.blockSorter = new BlockSort(this.data);
527
528        // huffmanised magic bytes
529        bsPutUByte('h');
530        bsPutUByte('0' + this.blockSize100k);
531
532        this.combinedCRC = 0;
533        initBlock();
534    }
535
536    private void initBlock() {
537        // blockNo++;
538        this.crc.initializeCRC();
539        this.last = -1;
540        // ch = 0;
541
542        final boolean[] inUse = this.data.inUse;
543        for (int i = 256; --i >= 0;) {
544            inUse[i] = false;
545        }
546
547    }
548
549    private void endBlock() throws IOException {
550        this.blockCRC = this.crc.getFinalCRC();
551        this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
552        this.combinedCRC ^= this.blockCRC;
553
554        // empty block at end of file
555        if (this.last == -1) {
556            return;
557        }
558
559        /* sort the block and establish posn of original string */
560        blockSort();
561
562        /*
563         * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
564         * :-). A 32 bit value does not really give a strong enough guarantee
565         * that the value will not appear by chance in the compressed
566         * datastream. Worst-case probability of this event, for a 900k block,
567         * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
568         * bits. For a compressed file of size 100Gb -- about 100000 blocks --
569         * only a 48-bit marker will do. NB: normal compression/ decompression
570         * donot rely on these statistical properties. They are only important
571         * when trying to recover blocks from damaged files.
572         */
573        bsPutUByte(0x31);
574        bsPutUByte(0x41);
575        bsPutUByte(0x59);
576        bsPutUByte(0x26);
577        bsPutUByte(0x53);
578        bsPutUByte(0x59);
579
580        /* Now the block's CRC, so it is in a known place. */
581        bsPutInt(this.blockCRC);
582
583        /* Now a single bit indicating no randomisation. */
584        bsW(1, 0);
585
586        /* Finally, block's contents proper. */
587        moveToFrontCodeAndSend();
588    }
589
590    private void endCompression() throws IOException {
591        /*
592         * Now another magic 48-bit number, 0x177245385090, to indicate the end
593         * of the last block. (sqrt(pi), if you want to know. I did want to use
594         * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
595         * to feel statistically comfortable. Call me paranoid.)
596         */
597        bsPutUByte(0x17);
598        bsPutUByte(0x72);
599        bsPutUByte(0x45);
600        bsPutUByte(0x38);
601        bsPutUByte(0x50);
602        bsPutUByte(0x90);
603
604        bsPutInt(this.combinedCRC);
605        bsFinishedWithStream();
606    }
607
608    /**
609     * Returns the blocksize parameter specified at construction time.
610     * @return the blocksize parameter specified at construction time
611     */
612    public final int getBlockSize() {
613        return this.blockSize100k;
614    }
615
616    @Override
617    public void write(final byte[] buf, int offs, final int len)
618        throws IOException {
619        if (offs < 0) {
620            throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
621        }
622        if (len < 0) {
623            throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
624        }
625        if (offs + len > buf.length) {
626            throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
627                                                + len + ") > buf.length("
628                                                + buf.length + ").");
629        }
630        if (closed) {
631            throw new IOException("Stream closed");
632        }
633
634        for (final int hi = offs + len; offs < hi;) {
635            write0(buf[offs++]);
636        }
637    }
638
639    /**
640     * Keeps track of the last bytes written and implicitly performs
641     * run-length encoding as the first step of the bzip2 algorithm.
642     */
643    private void write0(int b) throws IOException {
644        if (this.currentChar != -1) {
645            b &= 0xff;
646            if (this.currentChar == b) {
647                if (++this.runLength > 254) {
648                    writeRun();
649                    this.currentChar = -1;
650                    this.runLength = 0;
651                }
652                // else nothing to do
653            } else {
654                writeRun();
655                this.runLength = 1;
656                this.currentChar = b;
657            }
658        } else {
659            this.currentChar = b & 0xff;
660            this.runLength++;
661        }
662    }
663
664    private static void hbAssignCodes(final int[] code, final byte[] length,
665                                      final int minLen, final int maxLen,
666                                      final int alphaSize) {
667        int vec = 0;
668        for (int n = minLen; n <= maxLen; n++) {
669            for (int i = 0; i < alphaSize; i++) {
670                if ((length[i] & 0xff) == n) {
671                    code[i] = vec;
672                    vec++;
673                }
674            }
675            vec <<= 1;
676        }
677    }
678
679    private void bsFinishedWithStream() throws IOException {
680        while (this.bsLive > 0) {
681            final int ch = this.bsBuff >> 24;
682            this.out.write(ch); // write 8-bit
683            this.bsBuff <<= 8;
684            this.bsLive -= 8;
685        }
686    }
687
688    private void bsW(final int n, final int v) throws IOException {
689        final OutputStream outShadow = this.out;
690        int bsLiveShadow = this.bsLive;
691        int bsBuffShadow = this.bsBuff;
692
693        while (bsLiveShadow >= 8) {
694            outShadow.write(bsBuffShadow >> 24); // write 8-bit
695            bsBuffShadow <<= 8;
696            bsLiveShadow -= 8;
697        }
698
699        this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
700        this.bsLive = bsLiveShadow + n;
701    }
702
703    private void bsPutUByte(final int c) throws IOException {
704        bsW(8, c);
705    }
706
707    private void bsPutInt(final int u) throws IOException {
708        bsW(8, (u >> 24) & 0xff);
709        bsW(8, (u >> 16) & 0xff);
710        bsW(8, (u >> 8) & 0xff);
711        bsW(8, u & 0xff);
712    }
713
714    private void sendMTFValues() throws IOException {
715        final byte[][] len = this.data.sendMTFValues_len;
716        final int alphaSize = this.nInUse + 2;
717
718        for (int t = N_GROUPS; --t >= 0;) {
719            final byte[] len_t = len[t];
720            for (int v = alphaSize; --v >= 0;) {
721                len_t[v] = GREATER_ICOST;
722            }
723        }
724
725        /* Decide how many coding tables to use */
726        // assert (this.nMTF > 0) : this.nMTF;
727        final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
728            : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
729
730        /* Generate an initial set of coding tables */
731        sendMTFValues0(nGroups, alphaSize);
732
733        /*
734         * Iterate up to N_ITERS times to improve the tables.
735         */
736        final int nSelectors = sendMTFValues1(nGroups, alphaSize);
737
738        /* Compute MTF values for the selectors. */
739        sendMTFValues2(nGroups, nSelectors);
740
741        /* Assign actual codes for the tables. */
742        sendMTFValues3(nGroups, alphaSize);
743
744        /* Transmit the mapping table. */
745        sendMTFValues4();
746
747        /* Now the selectors. */
748        sendMTFValues5(nGroups, nSelectors);
749
750        /* Now the coding tables. */
751        sendMTFValues6(nGroups, alphaSize);
752
753        /* And finally, the block data proper */
754        sendMTFValues7();
755    }
756
757    private void sendMTFValues0(final int nGroups, final int alphaSize) {
758        final byte[][] len = this.data.sendMTFValues_len;
759        final int[] mtfFreq = this.data.mtfFreq;
760
761        int remF = this.nMTF;
762        int gs = 0;
763
764        for (int nPart = nGroups; nPart > 0; nPart--) {
765            final int tFreq = remF / nPart;
766            int ge = gs - 1;
767            int aFreq = 0;
768
769            for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
770                aFreq += mtfFreq[++ge];
771            }
772
773            if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
774                && (((nGroups - nPart) & 1) != 0)) {
775                aFreq -= mtfFreq[ge--];
776            }
777
778            final byte[] len_np = len[nPart - 1];
779            for (int v = alphaSize; --v >= 0;) {
780                if ((v >= gs) && (v <= ge)) {
781                    len_np[v] = LESSER_ICOST;
782                } else {
783                    len_np[v] = GREATER_ICOST;
784                }
785            }
786
787            gs = ge + 1;
788            remF -= aFreq;
789        }
790    }
791
792    private int sendMTFValues1(final int nGroups, final int alphaSize) {
793        final Data dataShadow = this.data;
794        final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
795        final int[] fave = dataShadow.sendMTFValues_fave;
796        final short[] cost = dataShadow.sendMTFValues_cost;
797        final char[] sfmap = dataShadow.sfmap;
798        final byte[] selector = dataShadow.selector;
799        final byte[][] len = dataShadow.sendMTFValues_len;
800        final byte[] len_0 = len[0];
801        final byte[] len_1 = len[1];
802        final byte[] len_2 = len[2];
803        final byte[] len_3 = len[3];
804        final byte[] len_4 = len[4];
805        final byte[] len_5 = len[5];
806        final int nMTFShadow = this.nMTF;
807
808        int nSelectors = 0;
809
810        for (int iter = 0; iter < N_ITERS; iter++) {
811            for (int t = nGroups; --t >= 0;) {
812                fave[t] = 0;
813                final int[] rfreqt = rfreq[t];
814                for (int i = alphaSize; --i >= 0;) {
815                    rfreqt[i] = 0;
816                }
817            }
818
819            nSelectors = 0;
820
821            for (int gs = 0; gs < this.nMTF;) {
822                /* Set group start & end marks. */
823
824                /*
825                 * Calculate the cost of this group as coded by each of the
826                 * coding tables.
827                 */
828
829                final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
830
831                if (nGroups == N_GROUPS) {
832                    // unrolled version of the else-block
833
834                    short cost0 = 0;
835                    short cost1 = 0;
836                    short cost2 = 0;
837                    short cost3 = 0;
838                    short cost4 = 0;
839                    short cost5 = 0;
840
841                    for (int i = gs; i <= ge; i++) {
842                        final int icv = sfmap[i];
843                        cost0 += len_0[icv] & 0xff;
844                        cost1 += len_1[icv] & 0xff;
845                        cost2 += len_2[icv] & 0xff;
846                        cost3 += len_3[icv] & 0xff;
847                        cost4 += len_4[icv] & 0xff;
848                        cost5 += len_5[icv] & 0xff;
849                    }
850
851                    cost[0] = cost0;
852                    cost[1] = cost1;
853                    cost[2] = cost2;
854                    cost[3] = cost3;
855                    cost[4] = cost4;
856                    cost[5] = cost5;
857
858                } else {
859                    for (int t = nGroups; --t >= 0;) {
860                        cost[t] = 0;
861                    }
862
863                    for (int i = gs; i <= ge; i++) {
864                        final int icv = sfmap[i];
865                        for (int t = nGroups; --t >= 0;) {
866                            cost[t] += len[t][icv] & 0xff;
867                        }
868                    }
869                }
870
871                /*
872                 * Find the coding table which is best for this group, and
873                 * record its identity in the selector table.
874                 */
875                int bt = -1;
876                for (int t = nGroups, bc = 999999999; --t >= 0;) {
877                    final int cost_t = cost[t];
878                    if (cost_t < bc) {
879                        bc = cost_t;
880                        bt = t;
881                    }
882                }
883
884                fave[bt]++;
885                selector[nSelectors] = (byte) bt;
886                nSelectors++;
887
888                /*
889                 * Increment the symbol frequencies for the selected table.
890                 */
891                final int[] rfreq_bt = rfreq[bt];
892                for (int i = gs; i <= ge; i++) {
893                    rfreq_bt[sfmap[i]]++;
894                }
895
896                gs = ge + 1;
897            }
898
899            /*
900             * Recompute the tables based on the accumulated frequencies.
901             */
902            for (int t = 0; t < nGroups; t++) {
903                hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
904            }
905        }
906
907        return nSelectors;
908    }
909
910    private void sendMTFValues2(final int nGroups, final int nSelectors) {
911        // assert (nGroups < 8) : nGroups;
912
913        final Data dataShadow = this.data;
914        final byte[] pos = dataShadow.sendMTFValues2_pos;
915
916        for (int i = nGroups; --i >= 0;) {
917            pos[i] = (byte) i;
918        }
919
920        for (int i = 0; i < nSelectors; i++) {
921            final byte ll_i = dataShadow.selector[i];
922            byte tmp = pos[0];
923            int j = 0;
924
925            while (ll_i != tmp) {
926                j++;
927                final byte tmp2 = tmp;
928                tmp = pos[j];
929                pos[j] = tmp2;
930            }
931
932            pos[0] = tmp;
933            dataShadow.selectorMtf[i] = (byte) j;
934        }
935    }
936
937    private void sendMTFValues3(final int nGroups, final int alphaSize) {
938        final int[][] code = this.data.sendMTFValues_code;
939        final byte[][] len = this.data.sendMTFValues_len;
940
941        for (int t = 0; t < nGroups; t++) {
942            int minLen = 32;
943            int maxLen = 0;
944            final byte[] len_t = len[t];
945            for (int i = alphaSize; --i >= 0;) {
946                final int l = len_t[i] & 0xff;
947                if (l > maxLen) {
948                    maxLen = l;
949                }
950                if (l < minLen) {
951                    minLen = l;
952                }
953            }
954
955            // assert (maxLen <= 20) : maxLen;
956            // assert (minLen >= 1) : minLen;
957
958            hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
959        }
960    }
961
962    private void sendMTFValues4() throws IOException {
963        final boolean[] inUse = this.data.inUse;
964        final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
965
966        for (int i = 16; --i >= 0;) {
967            inUse16[i] = false;
968            final int i16 = i * 16;
969            for (int j = 16; --j >= 0;) {
970                if (inUse[i16 + j]) {
971                    inUse16[i] = true;
972                    break;
973                }
974            }
975        }
976
977        for (int i = 0; i < 16; i++) {
978            bsW(1, inUse16[i] ? 1 : 0);
979        }
980
981        final OutputStream outShadow = this.out;
982        int bsLiveShadow = this.bsLive;
983        int bsBuffShadow = this.bsBuff;
984
985        for (int i = 0; i < 16; i++) {
986            if (inUse16[i]) {
987                final int i16 = i * 16;
988                for (int j = 0; j < 16; j++) {
989                    // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
990                    while (bsLiveShadow >= 8) {
991                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
992                        bsBuffShadow <<= 8;
993                        bsLiveShadow -= 8;
994                    }
995                    if (inUse[i16 + j]) {
996                        bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
997                    }
998                    bsLiveShadow++;
999                }
1000            }
1001        }
1002
1003        this.bsBuff = bsBuffShadow;
1004        this.bsLive = bsLiveShadow;
1005    }
1006
1007    private void sendMTFValues5(final int nGroups, final int nSelectors)
1008        throws IOException {
1009        bsW(3, nGroups);
1010        bsW(15, nSelectors);
1011
1012        final OutputStream outShadow = this.out;
1013        final byte[] selectorMtf = this.data.selectorMtf;
1014
1015        int bsLiveShadow = this.bsLive;
1016        int bsBuffShadow = this.bsBuff;
1017
1018        for (int i = 0; i < nSelectors; i++) {
1019            for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1020                // inlined: bsW(1, 1);
1021                while (bsLiveShadow >= 8) {
1022                    outShadow.write(bsBuffShadow >> 24);
1023                    bsBuffShadow <<= 8;
1024                    bsLiveShadow -= 8;
1025                }
1026                bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1027                bsLiveShadow++;
1028            }
1029
1030            // inlined: bsW(1, 0);
1031            while (bsLiveShadow >= 8) {
1032                outShadow.write(bsBuffShadow >> 24);
1033                bsBuffShadow <<= 8;
1034                bsLiveShadow -= 8;
1035            }
1036            // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1037            bsLiveShadow++;
1038        }
1039
1040        this.bsBuff = bsBuffShadow;
1041        this.bsLive = bsLiveShadow;
1042    }
1043
1044    private void sendMTFValues6(final int nGroups, final int alphaSize)
1045        throws IOException {
1046        final byte[][] len = this.data.sendMTFValues_len;
1047        final OutputStream outShadow = this.out;
1048
1049        int bsLiveShadow = this.bsLive;
1050        int bsBuffShadow = this.bsBuff;
1051
1052        for (int t = 0; t < nGroups; t++) {
1053            final byte[] len_t = len[t];
1054            int curr = len_t[0] & 0xff;
1055
1056            // inlined: bsW(5, curr);
1057            while (bsLiveShadow >= 8) {
1058                outShadow.write(bsBuffShadow >> 24); // write 8-bit
1059                bsBuffShadow <<= 8;
1060                bsLiveShadow -= 8;
1061            }
1062            bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
1063            bsLiveShadow += 5;
1064
1065            for (int i = 0; i < alphaSize; i++) {
1066                final int lti = len_t[i] & 0xff;
1067                while (curr < lti) {
1068                    // inlined: bsW(2, 2);
1069                    while (bsLiveShadow >= 8) {
1070                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1071                        bsBuffShadow <<= 8;
1072                        bsLiveShadow -= 8;
1073                    }
1074                    bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
1075                    bsLiveShadow += 2;
1076
1077                    curr++; /* 10 */
1078                }
1079
1080                while (curr > lti) {
1081                    // inlined: bsW(2, 3);
1082                    while (bsLiveShadow >= 8) {
1083                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
1084                        bsBuffShadow <<= 8;
1085                        bsLiveShadow -= 8;
1086                    }
1087                    bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
1088                    bsLiveShadow += 2;
1089
1090                    curr--; /* 11 */
1091                }
1092
1093                // inlined: bsW(1, 0);
1094                while (bsLiveShadow >= 8) {
1095                    outShadow.write(bsBuffShadow >> 24); // write 8-bit
1096                    bsBuffShadow <<= 8;
1097                    bsLiveShadow -= 8;
1098                }
1099                // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1100                bsLiveShadow++;
1101            }
1102        }
1103
1104        this.bsBuff = bsBuffShadow;
1105        this.bsLive = bsLiveShadow;
1106    }
1107
1108    private void sendMTFValues7() throws IOException {
1109        final Data dataShadow = this.data;
1110        final byte[][] len = dataShadow.sendMTFValues_len;
1111        final int[][] code = dataShadow.sendMTFValues_code;
1112        final OutputStream outShadow = this.out;
1113        final byte[] selector = dataShadow.selector;
1114        final char[] sfmap = dataShadow.sfmap;
1115        final int nMTFShadow = this.nMTF;
1116
1117        int selCtr = 0;
1118
1119        int bsLiveShadow = this.bsLive;
1120        int bsBuffShadow = this.bsBuff;
1121
1122        for (int gs = 0; gs < nMTFShadow;) {
1123            final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1124            final int selector_selCtr = selector[selCtr] & 0xff;
1125            final int[] code_selCtr = code[selector_selCtr];
1126            final byte[] len_selCtr = len[selector_selCtr];
1127
1128            while (gs <= ge) {
1129                final int sfmap_i = sfmap[gs];
1130
1131                //
1132                // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1133                // code_selCtr[sfmap_i]);
1134                //
1135                while (bsLiveShadow >= 8) {
1136                    outShadow.write(bsBuffShadow >> 24);
1137                    bsBuffShadow <<= 8;
1138                    bsLiveShadow -= 8;
1139                }
1140                final int n = len_selCtr[sfmap_i] & 0xFF;
1141                bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
1142                bsLiveShadow += n;
1143
1144                gs++;
1145            }
1146
1147            gs = ge + 1;
1148            selCtr++;
1149        }
1150
1151        this.bsBuff = bsBuffShadow;
1152        this.bsLive = bsLiveShadow;
1153    }
1154
1155    private void moveToFrontCodeAndSend() throws IOException {
1156        bsW(24, this.data.origPtr);
1157        generateMTFValues();
1158        sendMTFValues();
1159    }
1160
1161    private void blockSort() {
1162        blockSorter.blockSort(data, last);
1163    }
1164
1165    /*
1166     * Performs Move-To-Front on the Burrows-Wheeler transformed
1167     * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
1168     * run-length-encoded form.
1169     *
1170     * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
1171     */
1172    private void generateMTFValues() {
1173        final int lastShadow = this.last;
1174        final Data dataShadow = this.data;
1175        final boolean[] inUse = dataShadow.inUse;
1176        final byte[] block = dataShadow.block;
1177        final int[] fmap = dataShadow.fmap;
1178        final char[] sfmap = dataShadow.sfmap;
1179        final int[] mtfFreq = dataShadow.mtfFreq;
1180        final byte[] unseqToSeq = dataShadow.unseqToSeq;
1181        final byte[] yy = dataShadow.generateMTFValues_yy;
1182
1183        // make maps
1184        int nInUseShadow = 0;
1185        for (int i = 0; i < 256; i++) {
1186            if (inUse[i]) {
1187                unseqToSeq[i] = (byte) nInUseShadow;
1188                nInUseShadow++;
1189            }
1190        }
1191        this.nInUse = nInUseShadow;
1192
1193        final int eob = nInUseShadow + 1;
1194
1195        Arrays.fill(mtfFreq, 0, eob + 1, 0);
1196
1197        for (int i = nInUseShadow; --i >= 0;) {
1198            yy[i] = (byte) i;
1199        }
1200        
1201        int wr = 0;
1202        int zPend = 0;
1203
1204        for (int i = 0; i <= lastShadow; i++) {
1205            final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
1206            byte tmp = yy[0];
1207            int j = 0;
1208
1209            while (ll_i != tmp) {
1210                j++;
1211                final byte tmp2 = tmp;
1212                tmp = yy[j];
1213                yy[j] = tmp2;
1214            }
1215            yy[0] = tmp;
1216
1217            if (j == 0) {
1218                zPend++;
1219            } else {
1220                if (zPend > 0) {
1221                    zPend--;
1222                    while (true) {
1223                        if ((zPend & 1) == 0) {
1224                            sfmap[wr] = RUNA;
1225                            wr++;
1226                            mtfFreq[RUNA]++;
1227                        } else {
1228                            sfmap[wr] = RUNB;
1229                            wr++;
1230                            mtfFreq[RUNB]++;
1231                        }
1232
1233                        if (zPend < 2) {
1234                            break;
1235                        }
1236                        zPend = (zPend - 2) >> 1;
1237                    }
1238                    zPend = 0;
1239                }
1240                sfmap[wr] = (char) (j + 1);
1241                wr++;
1242                mtfFreq[j + 1]++;
1243            }
1244        }
1245
1246        if (zPend > 0) {
1247            zPend--;
1248            while (true) {
1249                if ((zPend & 1) == 0) {
1250                    sfmap[wr] = RUNA;
1251                    wr++;
1252                    mtfFreq[RUNA]++;
1253                } else {
1254                    sfmap[wr] = RUNB;
1255                    wr++;
1256                    mtfFreq[RUNB]++;
1257                }
1258
1259                if (zPend < 2) {
1260                    break;
1261                }
1262                zPend = (zPend - 2) >> 1;
1263            }
1264        }
1265
1266        sfmap[wr] = (char) eob;
1267        mtfFreq[eob]++;
1268        this.nMTF = wr + 1;
1269    }
1270
1271    static final class Data {
1272
1273        // with blockSize 900k
1274        /* maps unsigned byte => "does it occur in block" */
1275        final boolean[] inUse = new boolean[256]; // 256 byte
1276        final byte[] unseqToSeq = new byte[256]; // 256 byte
1277        final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
1278        final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
1279        final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
1280
1281        final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
1282        final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
1283        // byte
1284        final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1285        // byte
1286        final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
1287        final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
1288        final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1289        // byte
1290        final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
1291        final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
1292
1293        final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
1294        final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1295        final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1296
1297        // ------------
1298        // 333408 byte
1299
1300        /* holds the RLEd block of original data starting at index 1.
1301         * After sorting the last byte added to the buffer is at index
1302         * 0. */
1303        final byte[] block; // 900021 byte
1304        /* maps index in Burrows-Wheeler transformed block => index of
1305         * byte in original block */
1306        final int[] fmap; // 3600000 byte
1307        final char[] sfmap; // 3600000 byte
1308        // ------------
1309        // 8433529 byte
1310        // ============
1311
1312        /**
1313         * Index of original line in Burrows-Wheeler table.
1314         *
1315         * <p>This is the index in fmap that points to the last byte
1316         * of the original data.</p>
1317         */
1318        int origPtr;
1319
1320        Data(final int blockSize100k) {
1321            final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE;
1322            this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
1323            this.fmap = new int[n];
1324            this.sfmap = new char[2 * n];
1325        }
1326
1327    }
1328
1329}