001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020 /*
021 * This package is based on the work done by Keiron Liddle, Aftex Software
022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
023 * great code.
024 */
025 package org.apache.commons.compress.compressors.bzip2;
026
027 import java.io.IOException;
028 import java.io.InputStream;
029
030 import org.apache.commons.compress.compressors.CompressorInputStream;
031
032 /**
033 * An input stream that decompresses from the BZip2 format to be read as any other stream.
034 *
035 * @NotThreadSafe
036 */
037 public class BZip2CompressorInputStream extends CompressorInputStream implements
038 BZip2Constants {
039
040 /**
041 * Index of the last char in the block, so the block size == last + 1.
042 */
043 private int last;
044
045 /**
046 * Index in zptr[] of original string after sorting.
047 */
048 private int origPtr;
049
050 /**
051 * always: in the range 0 .. 9. The current block size is 100000 * this
052 * number.
053 */
054 private int blockSize100k;
055
056 private boolean blockRandomised;
057
058 private int bsBuff;
059 private int bsLive;
060 private final CRC crc = new CRC();
061
062 private int nInUse;
063
064 private InputStream in;
065 private final boolean decompressConcatenated;
066
067 private int currentChar = -1;
068
069 private static final int EOF = 0;
070 private static final int START_BLOCK_STATE = 1;
071 private static final int RAND_PART_A_STATE = 2;
072 private static final int RAND_PART_B_STATE = 3;
073 private static final int RAND_PART_C_STATE = 4;
074 private static final int NO_RAND_PART_A_STATE = 5;
075 private static final int NO_RAND_PART_B_STATE = 6;
076 private static final int NO_RAND_PART_C_STATE = 7;
077
078 private int currentState = START_BLOCK_STATE;
079
080 private int storedBlockCRC, storedCombinedCRC;
081 private int computedBlockCRC, computedCombinedCRC;
082
083 // Variables used by setup* methods exclusively
084
085 private int su_count;
086 private int su_ch2;
087 private int su_chPrev;
088 private int su_i2;
089 private int su_j2;
090 private int su_rNToGo;
091 private int su_rTPos;
092 private int su_tPos;
093 private char su_z;
094
095 /**
096 * All memory intensive stuff. This field is initialized by initBlock().
097 */
098 private BZip2CompressorInputStream.Data data;
099
100 /**
101 * Constructs a new BZip2CompressorInputStream which decompresses bytes
102 * read from the specified stream. This doesn't suppprt decompressing
103 * concatenated .bz2 files.
104 *
105 * @throws IOException
106 * if the stream content is malformed or an I/O error occurs.
107 * @throws NullPointerException
108 * if <tt>in == null</tt>
109 */
110 public BZip2CompressorInputStream(final InputStream in) throws IOException {
111 this(in, false);
112 }
113
114 /**
115 * Constructs a new BZip2CompressorInputStream which decompresses bytes
116 * read from the specified stream.
117 *
118 * @param in the InputStream from which this object should be created
119 * @param decompressConcatenated
120 * if true, decompress until the end of the input;
121 * if false, stop after the first .bz2 stream and
122 * leave the input position to point to the next
123 * byte after the .bz2 stream
124 *
125 * @throws IOException
126 * if the stream content is malformed or an I/O error occurs.
127 * @throws NullPointerException
128 * if <tt>in == null</tt>
129 */
130 public BZip2CompressorInputStream(final InputStream in,
131 final boolean decompressConcatenated)
132 throws IOException {
133 super();
134
135 this.in = in;
136 this.decompressConcatenated = decompressConcatenated;
137
138 init(true);
139 initBlock();
140 setupBlock();
141 }
142
143 /** {@inheritDoc} */
144 @Override
145 public int read() throws IOException {
146 if (this.in != null) {
147 int r = read0();
148 count(r < 0 ? -1 : 1);
149 return r;
150 } else {
151 throw new IOException("stream closed");
152 }
153 }
154
155 /*
156 * (non-Javadoc)
157 *
158 * @see java.io.InputStream#read(byte[], int, int)
159 */
160 @Override
161 public int read(final byte[] dest, final int offs, final int len)
162 throws IOException {
163 if (offs < 0) {
164 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
165 }
166 if (len < 0) {
167 throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
168 }
169 if (offs + len > dest.length) {
170 throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
171 + len + ") > dest.length(" + dest.length + ").");
172 }
173 if (this.in == null) {
174 throw new IOException("stream closed");
175 }
176
177 final int hi = offs + len;
178 int destOffs = offs;
179 for (int b; (destOffs < hi) && ((b = read0()) >= 0);) {
180 dest[destOffs++] = (byte) b;
181 }
182
183 int c = (destOffs == offs) ? -1 : (destOffs - offs);
184 count(c);
185 return c;
186 }
187
188 private void makeMaps() {
189 final boolean[] inUse = this.data.inUse;
190 final byte[] seqToUnseq = this.data.seqToUnseq;
191
192 int nInUseShadow = 0;
193
194 for (int i = 0; i < 256; i++) {
195 if (inUse[i]) {
196 seqToUnseq[nInUseShadow++] = (byte) i;
197 }
198 }
199
200 this.nInUse = nInUseShadow;
201 }
202
203 private int read0() throws IOException {
204 final int retChar = this.currentChar;
205
206 switch (this.currentState) {
207 case EOF:
208 return -1;
209
210 case START_BLOCK_STATE:
211 throw new IllegalStateException();
212
213 case RAND_PART_A_STATE:
214 throw new IllegalStateException();
215
216 case RAND_PART_B_STATE:
217 setupRandPartB();
218 break;
219
220 case RAND_PART_C_STATE:
221 setupRandPartC();
222 break;
223
224 case NO_RAND_PART_A_STATE:
225 throw new IllegalStateException();
226
227 case NO_RAND_PART_B_STATE:
228 setupNoRandPartB();
229 break;
230
231 case NO_RAND_PART_C_STATE:
232 setupNoRandPartC();
233 break;
234
235 default:
236 throw new IllegalStateException();
237 }
238
239 return retChar;
240 }
241
242 private boolean init(boolean isFirstStream) throws IOException {
243 if (null == in) {
244 throw new IOException("No InputStream");
245 }
246
247 int magic0 = this.in.read();
248 int magic1 = this.in.read();
249 int magic2 = this.in.read();
250 if (magic0 == -1 && !isFirstStream) {
251 return false;
252 }
253
254 if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
255 throw new IOException(isFirstStream
256 ? "Stream is not in the BZip2 format"
257 : "Garbage after a valid BZip2 stream");
258 }
259
260 int blockSize = this.in.read();
261 if ((blockSize < '1') || (blockSize > '9')) {
262 throw new IOException("BZip2 block size is invalid");
263 }
264
265 this.blockSize100k = blockSize - '0';
266
267 this.bsLive = 0;
268 this.computedCombinedCRC = 0;
269
270 return true;
271 }
272
273 private void initBlock() throws IOException {
274 char magic0;
275 char magic1;
276 char magic2;
277 char magic3;
278 char magic4;
279 char magic5;
280
281 while (true) {
282 // Get the block magic bytes.
283 magic0 = bsGetUByte();
284 magic1 = bsGetUByte();
285 magic2 = bsGetUByte();
286 magic3 = bsGetUByte();
287 magic4 = bsGetUByte();
288 magic5 = bsGetUByte();
289
290 // If isn't end of stream magic, break out of the loop.
291 if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45
292 || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) {
293 break;
294 }
295
296 // End of stream was reached. Check the combined CRC and
297 // advance to the next .bz2 stream if decoding concatenated
298 // streams.
299 if (complete()) {
300 return;
301 }
302 }
303
304 if (magic0 != 0x31 || // '1'
305 magic1 != 0x41 || // ')'
306 magic2 != 0x59 || // 'Y'
307 magic3 != 0x26 || // '&'
308 magic4 != 0x53 || // 'S'
309 magic5 != 0x59 // 'Y'
310 ) {
311 this.currentState = EOF;
312 throw new IOException("bad block header");
313 } else {
314 this.storedBlockCRC = bsGetInt();
315 this.blockRandomised = bsR(1) == 1;
316
317 /**
318 * Allocate data here instead in constructor, so we do not allocate
319 * it if the input file is empty.
320 */
321 if (this.data == null) {
322 this.data = new Data(this.blockSize100k);
323 }
324
325 // currBlockNo++;
326 getAndMoveToFrontDecode();
327
328 this.crc.initialiseCRC();
329 this.currentState = START_BLOCK_STATE;
330 }
331 }
332
333 private void endBlock() throws IOException {
334 this.computedBlockCRC = this.crc.getFinalCRC();
335
336 // A bad CRC is considered a fatal error.
337 if (this.storedBlockCRC != this.computedBlockCRC) {
338 // make next blocks readable without error
339 // (repair feature, not yet documented, not tested)
340 this.computedCombinedCRC = (this.storedCombinedCRC << 1)
341 | (this.storedCombinedCRC >>> 31);
342 this.computedCombinedCRC ^= this.storedBlockCRC;
343
344 throw new IOException("BZip2 CRC error");
345 }
346
347 this.computedCombinedCRC = (this.computedCombinedCRC << 1)
348 | (this.computedCombinedCRC >>> 31);
349 this.computedCombinedCRC ^= this.computedBlockCRC;
350 }
351
352 private boolean complete() throws IOException {
353 this.storedCombinedCRC = bsGetInt();
354 this.currentState = EOF;
355 this.data = null;
356
357 if (this.storedCombinedCRC != this.computedCombinedCRC) {
358 throw new IOException("BZip2 CRC error");
359 }
360
361 // Look for the next .bz2 stream if decompressing
362 // concatenated files.
363 return !decompressConcatenated || !init(false);
364 }
365
366 @Override
367 public void close() throws IOException {
368 InputStream inShadow = this.in;
369 if (inShadow != null) {
370 try {
371 if (inShadow != System.in) {
372 inShadow.close();
373 }
374 } finally {
375 this.data = null;
376 this.in = null;
377 }
378 }
379 }
380
381 private int bsR(final int n) throws IOException {
382 int bsLiveShadow = this.bsLive;
383 int bsBuffShadow = this.bsBuff;
384
385 if (bsLiveShadow < n) {
386 final InputStream inShadow = this.in;
387 do {
388 int thech = inShadow.read();
389
390 if (thech < 0) {
391 throw new IOException("unexpected end of stream");
392 }
393
394 bsBuffShadow = (bsBuffShadow << 8) | thech;
395 bsLiveShadow += 8;
396 } while (bsLiveShadow < n);
397
398 this.bsBuff = bsBuffShadow;
399 }
400
401 this.bsLive = bsLiveShadow - n;
402 return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1);
403 }
404
405 private boolean bsGetBit() throws IOException {
406 int bsLiveShadow = this.bsLive;
407 int bsBuffShadow = this.bsBuff;
408
409 if (bsLiveShadow < 1) {
410 int thech = this.in.read();
411
412 if (thech < 0) {
413 throw new IOException("unexpected end of stream");
414 }
415
416 bsBuffShadow = (bsBuffShadow << 8) | thech;
417 bsLiveShadow += 8;
418 this.bsBuff = bsBuffShadow;
419 }
420
421 this.bsLive = bsLiveShadow - 1;
422 return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0;
423 }
424
425 private char bsGetUByte() throws IOException {
426 return (char) bsR(8);
427 }
428
429 private int bsGetInt() throws IOException {
430 return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8);
431 }
432
433 /**
434 * Called by createHuffmanDecodingTables() exclusively.
435 */
436 private static void hbCreateDecodeTables(final int[] limit,
437 final int[] base, final int[] perm, final char[] length,
438 final int minLen, final int maxLen, final int alphaSize) {
439 for (int i = minLen, pp = 0; i <= maxLen; i++) {
440 for (int j = 0; j < alphaSize; j++) {
441 if (length[j] == i) {
442 perm[pp++] = j;
443 }
444 }
445 }
446
447 for (int i = MAX_CODE_LEN; --i > 0;) {
448 base[i] = 0;
449 limit[i] = 0;
450 }
451
452 for (int i = 0; i < alphaSize; i++) {
453 base[length[i] + 1]++;
454 }
455
456 for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
457 b += base[i];
458 base[i] = b;
459 }
460
461 for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
462 final int nb = base[i + 1];
463 vec += nb - b;
464 b = nb;
465 limit[i] = vec - 1;
466 vec <<= 1;
467 }
468
469 for (int i = minLen + 1; i <= maxLen; i++) {
470 base[i] = ((limit[i - 1] + 1) << 1) - base[i];
471 }
472 }
473
474 private void recvDecodingTables() throws IOException {
475 final Data dataShadow = this.data;
476 final boolean[] inUse = dataShadow.inUse;
477 final byte[] pos = dataShadow.recvDecodingTables_pos;
478 final byte[] selector = dataShadow.selector;
479 final byte[] selectorMtf = dataShadow.selectorMtf;
480
481 int inUse16 = 0;
482
483 /* Receive the mapping table */
484 for (int i = 0; i < 16; i++) {
485 if (bsGetBit()) {
486 inUse16 |= 1 << i;
487 }
488 }
489
490 for (int i = 256; --i >= 0;) {
491 inUse[i] = false;
492 }
493
494 for (int i = 0; i < 16; i++) {
495 if ((inUse16 & (1 << i)) != 0) {
496 final int i16 = i << 4;
497 for (int j = 0; j < 16; j++) {
498 if (bsGetBit()) {
499 inUse[i16 + j] = true;
500 }
501 }
502 }
503 }
504
505 makeMaps();
506 final int alphaSize = this.nInUse + 2;
507
508 /* Now the selectors */
509 final int nGroups = bsR(3);
510 final int nSelectors = bsR(15);
511
512 for (int i = 0; i < nSelectors; i++) {
513 int j = 0;
514 while (bsGetBit()) {
515 j++;
516 }
517 selectorMtf[i] = (byte) j;
518 }
519
520 /* Undo the MTF values for the selectors. */
521 for (int v = nGroups; --v >= 0;) {
522 pos[v] = (byte) v;
523 }
524
525 for (int i = 0; i < nSelectors; i++) {
526 int v = selectorMtf[i] & 0xff;
527 final byte tmp = pos[v];
528 while (v > 0) {
529 // nearly all times v is zero, 4 in most other cases
530 pos[v] = pos[v - 1];
531 v--;
532 }
533 pos[0] = tmp;
534 selector[i] = tmp;
535 }
536
537 final char[][] len = dataShadow.temp_charArray2d;
538
539 /* Now the coding tables */
540 for (int t = 0; t < nGroups; t++) {
541 int curr = bsR(5);
542 final char[] len_t = len[t];
543 for (int i = 0; i < alphaSize; i++) {
544 while (bsGetBit()) {
545 curr += bsGetBit() ? -1 : 1;
546 }
547 len_t[i] = (char) curr;
548 }
549 }
550
551 // finally create the Huffman tables
552 createHuffmanDecodingTables(alphaSize, nGroups);
553 }
554
555 /**
556 * Called by recvDecodingTables() exclusively.
557 */
558 private void createHuffmanDecodingTables(final int alphaSize,
559 final int nGroups) {
560 final Data dataShadow = this.data;
561 final char[][] len = dataShadow.temp_charArray2d;
562 final int[] minLens = dataShadow.minLens;
563 final int[][] limit = dataShadow.limit;
564 final int[][] base = dataShadow.base;
565 final int[][] perm = dataShadow.perm;
566
567 for (int t = 0; t < nGroups; t++) {
568 int minLen = 32;
569 int maxLen = 0;
570 final char[] len_t = len[t];
571 for (int i = alphaSize; --i >= 0;) {
572 final char lent = len_t[i];
573 if (lent > maxLen) {
574 maxLen = lent;
575 }
576 if (lent < minLen) {
577 minLen = lent;
578 }
579 }
580 hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
581 maxLen, alphaSize);
582 minLens[t] = minLen;
583 }
584 }
585
586 private void getAndMoveToFrontDecode() throws IOException {
587 this.origPtr = bsR(24);
588 recvDecodingTables();
589
590 final InputStream inShadow = this.in;
591 final Data dataShadow = this.data;
592 final byte[] ll8 = dataShadow.ll8;
593 final int[] unzftab = dataShadow.unzftab;
594 final byte[] selector = dataShadow.selector;
595 final byte[] seqToUnseq = dataShadow.seqToUnseq;
596 final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
597 final int[] minLens = dataShadow.minLens;
598 final int[][] limit = dataShadow.limit;
599 final int[][] base = dataShadow.base;
600 final int[][] perm = dataShadow.perm;
601 final int limitLast = this.blockSize100k * 100000;
602
603 /*
604 * Setting up the unzftab entries here is not strictly necessary, but it
605 * does save having to do it later in a separate pass, and so saves a
606 * block's worth of cache misses.
607 */
608 for (int i = 256; --i >= 0;) {
609 yy[i] = (char) i;
610 unzftab[i] = 0;
611 }
612
613 int groupNo = 0;
614 int groupPos = G_SIZE - 1;
615 final int eob = this.nInUse + 1;
616 int nextSym = getAndMoveToFrontDecode0(0);
617 int bsBuffShadow = this.bsBuff;
618 int bsLiveShadow = this.bsLive;
619 int lastShadow = -1;
620 int zt = selector[groupNo] & 0xff;
621 int[] base_zt = base[zt];
622 int[] limit_zt = limit[zt];
623 int[] perm_zt = perm[zt];
624 int minLens_zt = minLens[zt];
625
626 while (nextSym != eob) {
627 if ((nextSym == RUNA) || (nextSym == RUNB)) {
628 int s = -1;
629
630 for (int n = 1; true; n <<= 1) {
631 if (nextSym == RUNA) {
632 s += n;
633 } else if (nextSym == RUNB) {
634 s += n << 1;
635 } else {
636 break;
637 }
638
639 if (groupPos == 0) {
640 groupPos = G_SIZE - 1;
641 zt = selector[++groupNo] & 0xff;
642 base_zt = base[zt];
643 limit_zt = limit[zt];
644 perm_zt = perm[zt];
645 minLens_zt = minLens[zt];
646 } else {
647 groupPos--;
648 }
649
650 int zn = minLens_zt;
651
652 // Inlined:
653 // int zvec = bsR(zn);
654 while (bsLiveShadow < zn) {
655 final int thech = inShadow.read();
656 if (thech >= 0) {
657 bsBuffShadow = (bsBuffShadow << 8) | thech;
658 bsLiveShadow += 8;
659 continue;
660 } else {
661 throw new IOException("unexpected end of stream");
662 }
663 }
664 int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
665 & ((1 << zn) - 1);
666 bsLiveShadow -= zn;
667
668 while (zvec > limit_zt[zn]) {
669 zn++;
670 while (bsLiveShadow < 1) {
671 final int thech = inShadow.read();
672 if (thech >= 0) {
673 bsBuffShadow = (bsBuffShadow << 8) | thech;
674 bsLiveShadow += 8;
675 continue;
676 } else {
677 throw new IOException(
678 "unexpected end of stream");
679 }
680 }
681 bsLiveShadow--;
682 zvec = (zvec << 1)
683 | ((bsBuffShadow >> bsLiveShadow) & 1);
684 }
685 nextSym = perm_zt[zvec - base_zt[zn]];
686 }
687
688 final byte ch = seqToUnseq[yy[0]];
689 unzftab[ch & 0xff] += s + 1;
690
691 while (s-- >= 0) {
692 ll8[++lastShadow] = ch;
693 }
694
695 if (lastShadow >= limitLast) {
696 throw new IOException("block overrun");
697 }
698 } else {
699 if (++lastShadow >= limitLast) {
700 throw new IOException("block overrun");
701 }
702
703 final char tmp = yy[nextSym - 1];
704 unzftab[seqToUnseq[tmp] & 0xff]++;
705 ll8[lastShadow] = seqToUnseq[tmp];
706
707 /*
708 * This loop is hammered during decompression, hence avoid
709 * native method call overhead of System.arraycopy for very
710 * small ranges to copy.
711 */
712 if (nextSym <= 16) {
713 for (int j = nextSym - 1; j > 0;) {
714 yy[j] = yy[--j];
715 }
716 } else {
717 System.arraycopy(yy, 0, yy, 1, nextSym - 1);
718 }
719
720 yy[0] = tmp;
721
722 if (groupPos == 0) {
723 groupPos = G_SIZE - 1;
724 zt = selector[++groupNo] & 0xff;
725 base_zt = base[zt];
726 limit_zt = limit[zt];
727 perm_zt = perm[zt];
728 minLens_zt = minLens[zt];
729 } else {
730 groupPos--;
731 }
732
733 int zn = minLens_zt;
734
735 // Inlined:
736 // int zvec = bsR(zn);
737 while (bsLiveShadow < zn) {
738 final int thech = inShadow.read();
739 if (thech >= 0) {
740 bsBuffShadow = (bsBuffShadow << 8) | thech;
741 bsLiveShadow += 8;
742 continue;
743 } else {
744 throw new IOException("unexpected end of stream");
745 }
746 }
747 int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
748 & ((1 << zn) - 1);
749 bsLiveShadow -= zn;
750
751 while (zvec > limit_zt[zn]) {
752 zn++;
753 while (bsLiveShadow < 1) {
754 final int thech = inShadow.read();
755 if (thech >= 0) {
756 bsBuffShadow = (bsBuffShadow << 8) | thech;
757 bsLiveShadow += 8;
758 continue;
759 } else {
760 throw new IOException("unexpected end of stream");
761 }
762 }
763 bsLiveShadow--;
764 zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
765 }
766 nextSym = perm_zt[zvec - base_zt[zn]];
767 }
768 }
769
770 this.last = lastShadow;
771 this.bsLive = bsLiveShadow;
772 this.bsBuff = bsBuffShadow;
773 }
774
775 private int getAndMoveToFrontDecode0(final int groupNo) throws IOException {
776 final InputStream inShadow = this.in;
777 final Data dataShadow = this.data;
778 final int zt = dataShadow.selector[groupNo] & 0xff;
779 final int[] limit_zt = dataShadow.limit[zt];
780 int zn = dataShadow.minLens[zt];
781 int zvec = bsR(zn);
782 int bsLiveShadow = this.bsLive;
783 int bsBuffShadow = this.bsBuff;
784
785 while (zvec > limit_zt[zn]) {
786 zn++;
787 while (bsLiveShadow < 1) {
788 final int thech = inShadow.read();
789
790 if (thech >= 0) {
791 bsBuffShadow = (bsBuffShadow << 8) | thech;
792 bsLiveShadow += 8;
793 continue;
794 } else {
795 throw new IOException("unexpected end of stream");
796 }
797 }
798 bsLiveShadow--;
799 zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
800 }
801
802 this.bsLive = bsLiveShadow;
803 this.bsBuff = bsBuffShadow;
804
805 return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]];
806 }
807
808 private void setupBlock() throws IOException {
809 if (this.data == null) {
810 return;
811 }
812
813 final int[] cftab = this.data.cftab;
814 final int[] tt = this.data.initTT(this.last + 1);
815 final byte[] ll8 = this.data.ll8;
816 cftab[0] = 0;
817 System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
818
819 for (int i = 1, c = cftab[0]; i <= 256; i++) {
820 c += cftab[i];
821 cftab[i] = c;
822 }
823
824 for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
825 tt[cftab[ll8[i] & 0xff]++] = i;
826 }
827
828 if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
829 throw new IOException("stream corrupted");
830 }
831
832 this.su_tPos = tt[this.origPtr];
833 this.su_count = 0;
834 this.su_i2 = 0;
835 this.su_ch2 = 256; /* not a char and not EOF */
836
837 if (this.blockRandomised) {
838 this.su_rNToGo = 0;
839 this.su_rTPos = 0;
840 setupRandPartA();
841 } else {
842 setupNoRandPartA();
843 }
844 }
845
846 private void setupRandPartA() throws IOException {
847 if (this.su_i2 <= this.last) {
848 this.su_chPrev = this.su_ch2;
849 int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
850 this.su_tPos = this.data.tt[this.su_tPos];
851 if (this.su_rNToGo == 0) {
852 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
853 if (++this.su_rTPos == 512) {
854 this.su_rTPos = 0;
855 }
856 } else {
857 this.su_rNToGo--;
858 }
859 this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
860 this.su_i2++;
861 this.currentChar = su_ch2Shadow;
862 this.currentState = RAND_PART_B_STATE;
863 this.crc.updateCRC(su_ch2Shadow);
864 } else {
865 endBlock();
866 initBlock();
867 setupBlock();
868 }
869 }
870
871 private void setupNoRandPartA() throws IOException {
872 if (this.su_i2 <= this.last) {
873 this.su_chPrev = this.su_ch2;
874 int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
875 this.su_ch2 = su_ch2Shadow;
876 this.su_tPos = this.data.tt[this.su_tPos];
877 this.su_i2++;
878 this.currentChar = su_ch2Shadow;
879 this.currentState = NO_RAND_PART_B_STATE;
880 this.crc.updateCRC(su_ch2Shadow);
881 } else {
882 this.currentState = NO_RAND_PART_A_STATE;
883 endBlock();
884 initBlock();
885 setupBlock();
886 }
887 }
888
889 private void setupRandPartB() throws IOException {
890 if (this.su_ch2 != this.su_chPrev) {
891 this.currentState = RAND_PART_A_STATE;
892 this.su_count = 1;
893 setupRandPartA();
894 } else if (++this.su_count >= 4) {
895 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
896 this.su_tPos = this.data.tt[this.su_tPos];
897 if (this.su_rNToGo == 0) {
898 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
899 if (++this.su_rTPos == 512) {
900 this.su_rTPos = 0;
901 }
902 } else {
903 this.su_rNToGo--;
904 }
905 this.su_j2 = 0;
906 this.currentState = RAND_PART_C_STATE;
907 if (this.su_rNToGo == 1) {
908 this.su_z ^= 1;
909 }
910 setupRandPartC();
911 } else {
912 this.currentState = RAND_PART_A_STATE;
913 setupRandPartA();
914 }
915 }
916
917 private void setupRandPartC() throws IOException {
918 if (this.su_j2 < this.su_z) {
919 this.currentChar = this.su_ch2;
920 this.crc.updateCRC(this.su_ch2);
921 this.su_j2++;
922 } else {
923 this.currentState = RAND_PART_A_STATE;
924 this.su_i2++;
925 this.su_count = 0;
926 setupRandPartA();
927 }
928 }
929
930 private void setupNoRandPartB() throws IOException {
931 if (this.su_ch2 != this.su_chPrev) {
932 this.su_count = 1;
933 setupNoRandPartA();
934 } else if (++this.su_count >= 4) {
935 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
936 this.su_tPos = this.data.tt[this.su_tPos];
937 this.su_j2 = 0;
938 setupNoRandPartC();
939 } else {
940 setupNoRandPartA();
941 }
942 }
943
944 private void setupNoRandPartC() throws IOException {
945 if (this.su_j2 < this.su_z) {
946 int su_ch2Shadow = this.su_ch2;
947 this.currentChar = su_ch2Shadow;
948 this.crc.updateCRC(su_ch2Shadow);
949 this.su_j2++;
950 this.currentState = NO_RAND_PART_C_STATE;
951 } else {
952 this.su_i2++;
953 this.su_count = 0;
954 setupNoRandPartA();
955 }
956 }
957
958 private static final class Data extends Object {
959
960 // (with blockSize 900k)
961 final boolean[] inUse = new boolean[256]; // 256 byte
962
963 final byte[] seqToUnseq = new byte[256]; // 256 byte
964 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
965 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
966
967 /**
968 * Freq table collected to save a pass over the data during
969 * decompression.
970 */
971 final int[] unzftab = new int[256]; // 1024 byte
972
973 final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
974 final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
975 final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
976 final int[] minLens = new int[N_GROUPS]; // 24 byte
977
978 final int[] cftab = new int[257]; // 1028 byte
979 final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
980 final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
981 // byte
982 final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
983 // ---------------
984 // 60798 byte
985
986 int[] tt; // 3600000 byte
987 byte[] ll8; // 900000 byte
988
989 // ---------------
990 // 4560782 byte
991 // ===============
992
993 Data(int blockSize100k) {
994 super();
995
996 this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE];
997 }
998
999 /**
1000 * Initializes the {@link #tt} array.
1001 *
1002 * This method is called when the required length of the array is known.
1003 * I don't initialize it at construction time to avoid unneccessary
1004 * memory allocation when compressing small files.
1005 */
1006 int[] initTT(int length) {
1007 int[] ttShadow = this.tt;
1008
1009 // tt.length should always be >= length, but theoretically
1010 // it can happen, if the compressor mixed small and large
1011 // blocks. Normally only the last block will be smaller
1012 // than others.
1013 if ((ttShadow == null) || (ttShadow.length < length)) {
1014 this.tt = ttShadow = new int[length];
1015 }
1016
1017 return ttShadow;
1018 }
1019
1020 }
1021
1022 /**
1023 * Checks if the signature matches what is expected for a bzip2 file.
1024 *
1025 * @param signature
1026 * the bytes to check
1027 * @param length
1028 * the number of bytes to check
1029 * @return true, if this stream is a bzip2 compressed stream, false otherwise
1030 *
1031 * @since 1.1
1032 */
1033 public static boolean matches(byte[] signature, int length) {
1034
1035 if (length < 3) {
1036 return false;
1037 }
1038
1039 if (signature[0] != 'B') {
1040 return false;
1041 }
1042
1043 if (signature[1] != 'Z') {
1044 return false;
1045 }
1046
1047 if (signature[2] != 'h') {
1048 return false;
1049 }
1050
1051 return true;
1052 }
1053 }