001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.IOException;
022import java.io.InputStream;
023
024import org.apache.commons.compress.compressors.lz77support.AbstractLZ77CompressorInputStream;
025import org.apache.commons.compress.utils.ByteUtils;
026
027/**
028 * CompressorInputStream for the LZ4 block format.
029 *
030 * @see <a href="http://lz4.github.io/lz4/lz4_Block_format.html">LZ4 Block Format Description</a>
031 * @since 1.14
032 * @NotThreadSafe
033 */
034public class BlockLZ4CompressorInputStream extends AbstractLZ77CompressorInputStream {
035
036    static final int WINDOW_SIZE = 1 << 16;
037    static final int SIZE_BITS = 4;
038    static final int BACK_REFERENCE_SIZE_MASK = (1 << SIZE_BITS) - 1;
039    static final int LITERAL_SIZE_MASK = BACK_REFERENCE_SIZE_MASK << SIZE_BITS;
040
041    /** Back-Reference-size part of the block starting byte. */
042    private int nextBackReferenceSize;
043
044    /** Current state of the stream */
045    private State state = State.NO_BLOCK;
046
047    /**
048     * Creates a new LZ4 input stream.
049     *
050     * @param is
051     *            An InputStream to read compressed data from
052     */
053    public BlockLZ4CompressorInputStream(final InputStream is) {
054        super(is, WINDOW_SIZE);
055    }
056
057    /**
058     * {@inheritDoc}
059     */
060    @Override
061    public int read(final byte[] b, final int off, final int len) throws IOException {
062        if (len == 0) {
063            return 0;
064        }
065        switch (state) {
066        case EOF:
067            return -1;
068        case NO_BLOCK: // NOSONAR - fallthrough intended
069            readSizes();
070            /*FALLTHROUGH*/
071        case IN_LITERAL:
072            final int litLen = readLiteral(b, off, len);
073            if (!hasMoreDataInBlock()) {
074                state = State.LOOKING_FOR_BACK_REFERENCE;
075            }
076            return litLen > 0 ? litLen : read(b, off, len);
077        case LOOKING_FOR_BACK_REFERENCE: // NOSONAR - fallthrough intended
078            if (!initializeBackReference()) {
079                state = State.EOF;
080                return -1;
081            }
082            /*FALLTHROUGH*/
083        case IN_BACK_REFERENCE:
084            final int backReferenceLen = readBackReference(b, off, len);
085            if (!hasMoreDataInBlock()) {
086                state = State.NO_BLOCK;
087            }
088            return backReferenceLen > 0 ? backReferenceLen : read(b, off, len);
089        default:
090            throw new IOException("Unknown stream state " + state);
091        }
092    }
093
094    private void readSizes() throws IOException {
095        final int nextBlock = readOneByte();
096        if (nextBlock == -1) {
097            throw new IOException("Premature end of stream while looking for next block");
098        }
099        nextBackReferenceSize = nextBlock & BACK_REFERENCE_SIZE_MASK;
100        long literalSizePart = (nextBlock & LITERAL_SIZE_MASK) >> SIZE_BITS;
101        if (literalSizePart == BACK_REFERENCE_SIZE_MASK) {
102            literalSizePart += readSizeBytes();
103        }
104        if (literalSizePart < 0) {
105            throw new IOException("Illegal block with a negative literal size found");
106        }
107        startLiteral(literalSizePart);
108        state = State.IN_LITERAL;
109    }
110
111    private long readSizeBytes() throws IOException {
112        long accum = 0;
113        int nextByte;
114        do {
115            nextByte = readOneByte();
116            if (nextByte == -1) {
117                throw new IOException("Premature end of stream while parsing length");
118            }
119            accum += nextByte;
120        } while (nextByte == 255);
121        return accum;
122    }
123
124    /**
125     * @return false if there is no more back-reference - this means this is the
126     * last block of the stream.
127     */
128    private boolean initializeBackReference() throws IOException {
129        int backReferenceOffset = 0;
130        try {
131            backReferenceOffset = (int) ByteUtils.fromLittleEndian(supplier, 2);
132        } catch (final IOException ex) {
133            if (nextBackReferenceSize == 0) { // the last block has no back-reference
134                return false;
135            }
136            throw ex;
137        }
138        long backReferenceSize = nextBackReferenceSize;
139        if (nextBackReferenceSize == BACK_REFERENCE_SIZE_MASK) {
140            backReferenceSize += readSizeBytes();
141        }
142        // minimal match length 4 is encoded as 0
143        if (backReferenceSize < 0) {
144            throw new IOException("Illegal block with a negative match length found");
145        }
146        try {
147            startBackReference(backReferenceOffset, backReferenceSize + 4);
148        } catch (final IllegalArgumentException ex) {
149            throw new IOException("Illegal block with bad offset found", ex);
150        }
151        state = State.IN_BACK_REFERENCE;
152        return true;
153    }
154
155    private enum State {
156        NO_BLOCK, IN_LITERAL, LOOKING_FOR_BACK_REFERENCE, IN_BACK_REFERENCE, EOF
157    }
158}