001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import static java.nio.charset.StandardCharsets.UTF_8;
022import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN;
023import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET;
024import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_NUMBYTES_LEN;
025import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_OFFSET_LEN;
026
027import java.io.ByteArrayOutputStream;
028import java.io.IOException;
029import java.io.InputStream;
030import java.io.UncheckedIOException;
031import java.math.BigInteger;
032import java.nio.ByteBuffer;
033import java.nio.charset.Charset;
034import java.util.ArrayList;
035import java.util.Collections;
036import java.util.HashMap;
037import java.util.List;
038import java.util.Map;
039
040import org.apache.commons.compress.archivers.zip.ZipEncoding;
041import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
042import org.apache.commons.compress.utils.CharsetNames;
043import org.apache.commons.compress.utils.IOUtils;
044
045/**
046 * This class provides static utility methods to work with byte streams.
047 *
048 * @Immutable
049 */
050// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
051public class TarUtils {
052
053    private static final int BYTE_MASK = 255;
054
055    static final ZipEncoding DEFAULT_ENCODING =
056        ZipEncodingHelper.getZipEncoding(null);
057
058    /**
059     * Encapsulates the algorithms used up to Commons Compress 1.3 as
060     * ZipEncoding.
061     */
062    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
063            @Override
064            public boolean canEncode(final String name) { return true; }
065
066            @Override
067            public ByteBuffer encode(final String name) {
068                final int length = name.length();
069                final byte[] buf = new byte[length];
070
071                // copy until end of input or output is reached.
072                for (int i = 0; i < length; ++i) {
073                    buf[i] = (byte) name.charAt(i);
074                }
075                return ByteBuffer.wrap(buf);
076            }
077
078            @Override
079            public String decode(final byte[] buffer) {
080                final int length = buffer.length;
081                final StringBuilder result = new StringBuilder(length);
082
083                for (final byte b : buffer) {
084                    if (b == 0) { // Trailing null
085                        break;
086                    }
087                    result.append((char) (b & 0xFF)); // Allow for sign-extension
088                }
089
090                return result.toString();
091            }
092        };
093
094    /** Private constructor to prevent instantiation of this utility class. */
095    private TarUtils(){
096    }
097
098    /**
099     * Parse an octal string from a buffer.
100     *
101     * <p>Leading spaces are ignored.
102     * The buffer must contain a trailing space or NUL,
103     * and may contain an additional trailing space or NUL.</p>
104     *
105     * <p>The input buffer is allowed to contain all NULs,
106     * in which case the method returns 0L
107     * (this allows for missing fields).</p>
108     *
109     * <p>To work-around some tar implementations that insert a
110     * leading NUL this method returns 0 if it detects a leading NUL
111     * since Commons Compress 1.4.</p>
112     *
113     * @param buffer The buffer from which to parse.
114     * @param offset The offset into the buffer from which to parse.
115     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
116     * @return The long value of the octal string.
117     * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected.
118     */
119    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
120        long result = 0;
121        int end = offset + length;
122        int start = offset;
123
124        if (length < 2) {
125            throw new IllegalArgumentException("Length " + length + " must be at least 2");
126        }
127
128        if (buffer[start] == 0) {
129            return 0L;
130        }
131
132        // Skip leading spaces
133        while (start < end) {
134            if (buffer[start] != ' ') {
135                break;
136            }
137            start++;
138        }
139
140        // Trim all trailing NULs and spaces.
141        // The ustar and POSIX tar specs require a trailing NUL or
142        // space but some implementations use the extra digit for big
143        // sizes/uids/gids ...
144        byte trailer = buffer[end - 1];
145        while (start < end && (trailer == 0 || trailer == ' ')) {
146            end--;
147            trailer = buffer[end - 1];
148        }
149
150        for (; start < end; start++) {
151            final byte currentByte = buffer[start];
152            // CheckStyle:MagicNumber OFF
153            if (currentByte < '0' || currentByte > '7') {
154                throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte));
155            }
156            result = (result << 3) + (currentByte - '0'); // convert from ASCII
157            // CheckStyle:MagicNumber ON
158        }
159
160        return result;
161    }
162
163    /**
164     * Compute the value contained in a byte buffer.  If the most
165     * significant bit of the first byte in the buffer is set, this
166     * bit is ignored and the rest of the buffer is interpreted as a
167     * binary number.  Otherwise, the buffer is interpreted as an
168     * octal number as per the parseOctal function above.
169     *
170     * @param buffer The buffer from which to parse.
171     * @param offset The offset into the buffer from which to parse.
172     * @param length The maximum number of bytes to parse.
173     * @return The long value of the octal or binary string.
174     * @throws IllegalArgumentException if the trailing space/NUL is
175     * missing or an invalid byte is detected in an octal number, or
176     * if a binary number would exceed the size of a signed long
177     * 64-bit integer.
178     * @since 1.4
179     */
180    public static long parseOctalOrBinary(final byte[] buffer, final int offset,
181                                          final int length) {
182
183        if ((buffer[offset] & 0x80) == 0) {
184            return parseOctal(buffer, offset, length);
185        }
186        final boolean negative = buffer[offset] == (byte) 0xff;
187        if (length < 9) {
188            return parseBinaryLong(buffer, offset, length, negative);
189        }
190        return parseBinaryBigInteger(buffer, offset, length, negative);
191    }
192
193    private static long parseBinaryLong(final byte[] buffer, final int offset,
194                                        final int length,
195                                        final boolean negative) {
196        if (length >= 9) {
197            throw new IllegalArgumentException("At offset " + offset + ", "
198                                               + length + " byte binary number"
199                                               + " exceeds maximum signed long"
200                                               + " value");
201        }
202        long val = 0;
203        for (int i = 1; i < length; i++) {
204            val = (val << 8) + (buffer[offset + i] & 0xff);
205        }
206        if (negative) {
207            // 2's complement
208            val--;
209            val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
210        }
211        return negative ? -val : val;
212    }
213
214    private static long parseBinaryBigInteger(final byte[] buffer,
215                                              final int offset,
216                                              final int length,
217                                              final boolean negative) {
218        final byte[] remainder = new byte[length - 1];
219        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
220        BigInteger val = new BigInteger(remainder);
221        if (negative) {
222            // 2's complement
223            val = val.add(BigInteger.valueOf(-1)).not();
224        }
225        if (val.bitLength() > 63) {
226            throw new IllegalArgumentException("At offset " + offset + ", "
227                                               + length + " byte binary number"
228                                               + " exceeds maximum signed long"
229                                               + " value");
230        }
231        return negative ? -val.longValue() : val.longValue();
232    }
233
234    /**
235     * Parse a boolean byte from a buffer.
236     * Leading spaces and NUL are ignored.
237     * The buffer may contain trailing spaces or NULs.
238     *
239     * @param buffer The buffer from which to parse.
240     * @param offset The offset into the buffer from which to parse.
241     * @return The boolean value of the bytes.
242     * @throws IllegalArgumentException if an invalid byte is detected.
243     */
244    public static boolean parseBoolean(final byte[] buffer, final int offset) {
245        return buffer[offset] == 1;
246    }
247
248    // Helper method to generate the exception message
249    private static String exceptionMessage(final byte[] buffer, final int offset,
250            final int length, final int current, final byte currentByte) {
251        // default charset is good enough for an exception message,
252        //
253        // the alternative was to modify parseOctal and
254        // parseOctalOrBinary to receive the ZipEncoding of the
255        // archive (deprecating the existing public methods, of
256        // course) and dealing with the fact that ZipEncoding#decode
257        // can throw an IOException which parseOctal* doesn't declare
258        String string = new String(buffer, offset, length, Charset.defaultCharset());
259
260        string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed
261        return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length;
262    }
263
264    /**
265     * Parse an entry name from a buffer.
266     * Parsing stops when a NUL is found
267     * or the buffer length is reached.
268     *
269     * @param buffer The buffer from which to parse.
270     * @param offset The offset into the buffer from which to parse.
271     * @param length The maximum number of bytes to parse.
272     * @return The entry name.
273     */
274    public static String parseName(final byte[] buffer, final int offset, final int length) {
275        try {
276            return parseName(buffer, offset, length, DEFAULT_ENCODING);
277        } catch (final IOException ex) { // NOSONAR
278            try {
279                return parseName(buffer, offset, length, FALLBACK_ENCODING);
280            } catch (final IOException ex2) {
281                // impossible
282                throw new UncheckedIOException(ex2); //NOSONAR
283            }
284        }
285    }
286
287    /**
288     * Parse an entry name from a buffer.
289     * Parsing stops when a NUL is found
290     * or the buffer length is reached.
291     *
292     * @param buffer The buffer from which to parse.
293     * @param offset The offset into the buffer from which to parse.
294     * @param length The maximum number of bytes to parse.
295     * @param encoding name of the encoding to use for file names
296     * @since 1.4
297     * @return The entry name.
298     * @throws IOException on error
299     */
300    public static String parseName(final byte[] buffer, final int offset,
301                                   final int length,
302                                   final ZipEncoding encoding)
303        throws IOException {
304
305        int len = 0;
306        for (int i = offset; len < length && buffer[i] != 0; i++) {
307            len++;
308        }
309        if (len > 0) {
310            final byte[] b = new byte[len];
311            System.arraycopy(buffer, offset, b, 0, len);
312            return encoding.decode(b);
313        }
314        return "";
315    }
316
317    /**
318     * Parses the content of a PAX 1.0 sparse block.
319     * @since 1.20
320     * @param buffer The buffer from which to parse.
321     * @param offset The offset into the buffer from which to parse.
322     * @return a parsed sparse struct
323     */
324    public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) {
325        final long sparseOffset = parseOctalOrBinary(buffer, offset, SPARSE_OFFSET_LEN);
326        final long sparseNumbytes = parseOctalOrBinary(buffer, offset + SPARSE_OFFSET_LEN, SPARSE_NUMBYTES_LEN);
327
328        return new TarArchiveStructSparse(sparseOffset, sparseNumbytes);
329    }
330
331    /**
332     * @since 1.21
333     */
334    static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries)
335        throws IOException {
336        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
337        for (int i = 0; i < entries; i++) {
338            try {
339                final TarArchiveStructSparse sparseHeader =
340                    parseSparse(buffer, offset + i * (SPARSE_OFFSET_LEN + SPARSE_NUMBYTES_LEN));
341
342                if (sparseHeader.getOffset() < 0) {
343                    throw new IOException("Corrupted TAR archive, sparse entry with negative offset");
344                }
345                if (sparseHeader.getNumbytes() < 0) {
346                    throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes");
347                }
348                sparseHeaders.add(sparseHeader);
349            } catch (IllegalArgumentException ex) {
350                // thrown internally by parseOctalOrBinary
351                throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex);
352            }
353        }
354        return Collections.unmodifiableList(sparseHeaders);
355    }
356
357    /**
358     * Copy a name into a buffer.
359     * Copies characters from the name into the buffer
360     * starting at the specified offset.
361     * If the buffer is longer than the name, the buffer
362     * is filled with trailing NULs.
363     * If the name is longer than the buffer,
364     * the output is truncated.
365     *
366     * @param name The header name from which to copy the characters.
367     * @param buf The buffer where the name is to be stored.
368     * @param offset The starting offset into the buffer
369     * @param length The maximum number of header bytes to copy.
370     * @return The updated offset, i.e. offset + length
371     */
372    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
373        try {
374            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
375        } catch (final IOException ex) { // NOSONAR
376            try {
377                return formatNameBytes(name, buf, offset, length,
378                                       FALLBACK_ENCODING);
379            } catch (final IOException ex2) {
380                // impossible
381                throw new UncheckedIOException(ex2); //NOSONAR
382            }
383        }
384    }
385
386    /**
387     * Copy a name into a buffer.
388     * Copies characters from the name into the buffer
389     * starting at the specified offset.
390     * If the buffer is longer than the name, the buffer
391     * is filled with trailing NULs.
392     * If the name is longer than the buffer,
393     * the output is truncated.
394     *
395     * @param name The header name from which to copy the characters.
396     * @param buf The buffer where the name is to be stored.
397     * @param offset The starting offset into the buffer
398     * @param length The maximum number of header bytes to copy.
399     * @param encoding name of the encoding to use for file names
400     * @since 1.4
401     * @return The updated offset, i.e. offset + length
402     * @throws IOException on error
403     */
404    public static int formatNameBytes(final String name, final byte[] buf, final int offset,
405                                      final int length,
406                                      final ZipEncoding encoding)
407        throws IOException {
408        int len = name.length();
409        ByteBuffer b = encoding.encode(name);
410        while (b.limit() > length && len > 0) {
411            b = encoding.encode(name.substring(0, --len));
412        }
413        final int limit = b.limit() - b.position();
414        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
415
416        // Pad any remaining output bytes with NUL
417        for (int i = limit; i < length; ++i) {
418            buf[offset + i] = 0;
419        }
420
421        return offset + length;
422    }
423
424    /**
425     * Fill buffer with unsigned octal number, padded with leading zeroes.
426     *
427     * @param value number to convert to octal - treated as unsigned
428     * @param buffer destination buffer
429     * @param offset starting offset in buffer
430     * @param length length of buffer to fill
431     * @throws IllegalArgumentException if the value will not fit in the buffer
432     */
433    public static void formatUnsignedOctalString(final long value, final byte[] buffer,
434            final int offset, final int length) {
435        int remaining = length;
436        remaining--;
437        if (value == 0) {
438            buffer[offset + remaining--] = (byte) '0';
439        } else {
440            long val = value;
441            for (; remaining >= 0 && val != 0; --remaining) {
442                // CheckStyle:MagicNumber OFF
443                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
444                val = val >>> 3;
445                // CheckStyle:MagicNumber ON
446            }
447            if (val != 0){
448                throw new IllegalArgumentException
449                (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
450            }
451        }
452
453        for (; remaining >= 0; --remaining) { // leading zeros
454            buffer[offset + remaining] = (byte) '0';
455        }
456    }
457
458    /**
459     * Write an octal integer into a buffer.
460     *
461     * Uses {@link #formatUnsignedOctalString} to format
462     * the value as an octal string with leading zeros.
463     * The converted number is followed by space and NUL
464     *
465     * @param value The value to write
466     * @param buf The buffer to receive the output
467     * @param offset The starting offset into the buffer
468     * @param length The size of the output buffer
469     * @return The updated offset, i.e offset+length
470     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
471     */
472    public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
473
474        int idx=length-2; // For space and trailing null
475        formatUnsignedOctalString(value, buf, offset, idx);
476
477        buf[offset + idx++] = (byte) ' '; // Trailing space
478        buf[offset + idx]   = 0; // Trailing null
479
480        return offset + length;
481    }
482
483    /**
484     * Write an octal long integer into a buffer.
485     *
486     * Uses {@link #formatUnsignedOctalString} to format
487     * the value as an octal string with leading zeros.
488     * The converted number is followed by a space.
489     *
490     * @param value The value to write as octal
491     * @param buf The destinationbuffer.
492     * @param offset The starting offset into the buffer.
493     * @param length The length of the buffer
494     * @return The updated offset
495     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
496     */
497    public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
498
499        final int idx=length-1; // For space
500
501        formatUnsignedOctalString(value, buf, offset, idx);
502        buf[offset + idx] = (byte) ' '; // Trailing space
503
504        return offset + length;
505    }
506
507    /**
508     * Write an long integer into a buffer as an octal string if this
509     * will fit, or as a binary number otherwise.
510     *
511     * Uses {@link #formatUnsignedOctalString} to format
512     * the value as an octal string with leading zeros.
513     * The converted number is followed by a space.
514     *
515     * @param value The value to write into the buffer.
516     * @param buf The destination buffer.
517     * @param offset The starting offset into the buffer.
518     * @param length The length of the buffer.
519     * @return The updated offset.
520     * @throws IllegalArgumentException if the value (and trailer)
521     * will not fit in the buffer.
522     * @since 1.4
523     */
524    public static int formatLongOctalOrBinaryBytes(
525        final long value, final byte[] buf, final int offset, final int length) {
526
527        // Check whether we are dealing with UID/GID or SIZE field
528        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
529
530        final boolean negative = value < 0;
531        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
532            return formatLongOctalBytes(value, buf, offset, length);
533        }
534
535        if (length < 9) {
536            formatLongBinary(value, buf, offset, length, negative);
537        } else {
538            formatBigIntegerBinary(value, buf, offset, length, negative);
539        }
540
541        buf[offset] = (byte) (negative ? 0xff : 0x80);
542        return offset + length;
543    }
544
545    private static void formatLongBinary(final long value, final byte[] buf,
546                                         final int offset, final int length,
547                                         final boolean negative) {
548        final int bits = (length - 1) * 8;
549        final long max = 1L << bits;
550        long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
551        if (val < 0 || val >= max) {
552            throw new IllegalArgumentException("Value " + value +
553                " is too large for " + length + " byte field.");
554        }
555        if (negative) {
556            val ^= max - 1;
557            val++;
558            val |= 0xffL << bits;
559        }
560        for (int i = offset + length - 1; i >= offset; i--) {
561            buf[i] = (byte) val;
562            val >>= 8;
563        }
564    }
565
566    private static void formatBigIntegerBinary(final long value, final byte[] buf,
567                                               final int offset,
568                                               final int length,
569                                               final boolean negative) {
570        final BigInteger val = BigInteger.valueOf(value);
571        final byte[] b = val.toByteArray();
572        final int len = b.length;
573        if (len > length - 1) {
574            throw new IllegalArgumentException("Value " + value +
575                " is too large for " + length + " byte field.");
576        }
577        final int off = offset + length - len;
578        System.arraycopy(b, 0, buf, off, len);
579        final byte fill = (byte) (negative ? 0xff : 0);
580        for (int i = offset + 1; i < off; i++) {
581            buf[i] = fill;
582        }
583    }
584
585    /**
586     * Writes an octal value into a buffer.
587     *
588     * Uses {@link #formatUnsignedOctalString} to format
589     * the value as an octal string with leading zeros.
590     * The converted number is followed by NUL and then space.
591     *
592     * @param value The value to convert
593     * @param buf The destination buffer
594     * @param offset The starting offset into the buffer.
595     * @param length The size of the buffer.
596     * @return The updated value of offset, i.e. offset+length
597     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
598     */
599    public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
600
601        int idx=length-2; // for NUL and space
602        formatUnsignedOctalString(value, buf, offset, idx);
603
604        buf[offset + idx++]   = 0; // Trailing null
605        buf[offset + idx]     = (byte) ' '; // Trailing space
606
607        return offset + length;
608    }
609
610    /**
611     * Compute the checksum of a tar entry header.
612     *
613     * @param buf The tar entry's header buffer.
614     * @return The computed checksum.
615     */
616    public static long computeCheckSum(final byte[] buf) {
617        long sum = 0;
618
619        for (final byte element : buf) {
620            sum += BYTE_MASK & element;
621        }
622
623        return sum;
624    }
625
626    /**
627     * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>:
628     * <blockquote>
629     * The checksum is calculated by taking the sum of the unsigned byte values
630     * of the header block with the eight checksum bytes taken to be ascii
631     * spaces (decimal value 32). It is stored as a six digit octal number with
632     * leading zeroes followed by a NUL and then a space. Various
633     * implementations do not adhere to this format. For better compatibility,
634     * ignore leading and trailing whitespace, and get the first six digits. In
635     * addition, some historic tar implementations treated bytes as signed.
636     * Implementations typically calculate the checksum both ways, and treat it
637     * as good if either the signed or unsigned sum matches the included
638     * checksum.
639     * </blockquote>
640     * <p>
641     * The return value of this method should be treated as a best-effort
642     * heuristic rather than an absolute and final truth. The checksum
643     * verification logic may well evolve over time as more special cases
644     * are encountered.
645     *
646     * @param header tar header
647     * @return whether the checksum is reasonably good
648     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
649     * @since 1.5
650     */
651    public static boolean verifyCheckSum(final byte[] header) {
652        final long storedSum = parseOctal(header, CHKSUM_OFFSET, CHKSUMLEN);
653        long unsignedSum = 0;
654        long signedSum = 0;
655
656        for (int i = 0; i < header.length; i++) {
657            byte b = header[i];
658            if (CHKSUM_OFFSET  <= i && i < CHKSUM_OFFSET + CHKSUMLEN) {
659                b = ' ';
660            }
661            unsignedSum += 0xff & b;
662            signedSum += b;
663        }
664        return storedSum == unsignedSum || storedSum == signedSum;
665    }
666
667    /**
668     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
669     * may appear multi times, and they look like:
670     *
671     * GNU.sparse.size=size
672     * GNU.sparse.numblocks=numblocks
673     * repeat numblocks times
674     *   GNU.sparse.offset=offset
675     *   GNU.sparse.numbytes=numbytes
676     * end repeat
677     *
678     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
679     *
680     * GNU.sparse.map
681     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
682     *
683     * @param inputStream input stream to read keys and values
684     * @param sparseHeaders used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times,
685     *                      the sparse headers need to be stored in an array, not a map
686     * @param globalPaxHeaders global PAX headers of the tar archive
687     * @return map of PAX headers values found inside of the current (local or global) PAX headers tar entry.
688     * @throws IOException if an I/O error occurs.
689     * @deprecated use the four-arg version instead
690     */
691    @Deprecated
692    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders)
693            throws IOException {
694        return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1);
695    }
696
697    /**
698     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
699     * may appear multi times, and they look like:
700     *
701     * GNU.sparse.size=size
702     * GNU.sparse.numblocks=numblocks
703     * repeat numblocks times
704     *   GNU.sparse.offset=offset
705     *   GNU.sparse.numbytes=numbytes
706     * end repeat
707     *
708     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
709     *
710     * GNU.sparse.map
711     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
712     *
713     * @param inputStream input stream to read keys and values
714     * @param sparseHeaders used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times,
715     *                      the sparse headers need to be stored in an array, not a map
716     * @param globalPaxHeaders global PAX headers of the tar archive
717     * @param headerSize total size of the PAX header, will be ignored if negative
718     * @return map of PAX headers values found inside of the current (local or global) PAX headers tar entry.
719     * @throws IOException if an I/O error occurs.
720     * @since 1.21
721     */
722    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream,
723            final List<TarArchiveStructSparse> sparseHeaders, final Map<String, String> globalPaxHeaders,
724            final long headerSize) throws IOException {
725        final Map<String, String> headers = new HashMap<>(globalPaxHeaders);
726        Long offset = null;
727        // Format is "length keyword=value\n";
728        int totalRead = 0;
729        while(true) { // get length
730            int ch;
731            int len = 0;
732            int read = 0;
733            while((ch = inputStream.read()) != -1) {
734                read++;
735                totalRead++;
736                if (ch == '\n') { // blank line in header
737                    break;
738                }
739                if (ch == ' '){ // End of length string
740                    // Get keyword
741                    final ByteArrayOutputStream coll = new ByteArrayOutputStream();
742                    while((ch = inputStream.read()) != -1) {
743                        read++;
744                        totalRead++;
745                        if (totalRead < 0 || (headerSize >= 0 && totalRead >= headerSize)) {
746                            break;
747                        }
748                        if (ch == '='){ // end of keyword
749                            final String keyword = coll.toString(CharsetNames.UTF_8);
750                            // Get rest of entry
751                            final int restLen = len - read;
752                            if (restLen <= 1) { // only NL
753                                headers.remove(keyword);
754                            } else if (headerSize >= 0 && restLen > headerSize - totalRead) {
755                                throw new IOException("Paxheader value size " + restLen
756                                    + " exceeds size of header record");
757                            } else {
758                                final byte[] rest = IOUtils.readRange(inputStream, restLen);
759                                final int got = rest.length;
760                                if (got != restLen) {
761                                    throw new IOException("Failed to read "
762                                            + "Paxheader. Expected "
763                                            + restLen
764                                            + " bytes, read "
765                                            + got);
766                                }
767                                totalRead += restLen;
768                                // Drop trailing NL
769                                if (rest[restLen - 1] != '\n') {
770                                    throw new IOException("Failed to read Paxheader."
771                                       + "Value should end with a newline");
772                                }
773                                final String value = new String(rest, 0, restLen - 1, UTF_8);
774                                headers.put(keyword, value);
775
776                                // for 0.0 PAX Headers
777                                if (keyword.equals("GNU.sparse.offset")) {
778                                    if (offset != null) {
779                                        // previous GNU.sparse.offset header but but no numBytes
780                                        sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
781                                    }
782                                    try {
783                                        offset = Long.valueOf(value);
784                                    } catch (NumberFormatException ex) {
785                                        throw new IOException("Failed to read Paxheader."
786                                            + "GNU.sparse.offset contains a non-numeric value");
787                                    }
788                                    if (offset < 0) {
789                                        throw new IOException("Failed to read Paxheader."
790                                            + "GNU.sparse.offset contains negative value");
791                                    }
792                                }
793
794                                // for 0.0 PAX Headers
795                                if (keyword.equals("GNU.sparse.numbytes")) {
796                                    if (offset == null) {
797                                        throw new IOException("Failed to read Paxheader." +
798                                                "GNU.sparse.offset is expected before GNU.sparse.numbytes shows up.");
799                                    }
800                                    long numbytes;
801                                    try {
802                                        numbytes = Long.parseLong(value);
803                                    } catch (NumberFormatException ex) {
804                                        throw new IOException("Failed to read Paxheader."
805                                            + "GNU.sparse.numbytes contains a non-numeric value.");
806                                    }
807                                    if (numbytes < 0) {
808                                        throw new IOException("Failed to read Paxheader."
809                                            + "GNU.sparse.numbytes contains negative value");
810                                    }
811                                    sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes));
812                                    offset = null;
813                                }
814                            }
815                            break;
816                        }
817                        coll.write((byte) ch);
818                    }
819                    break; // Processed single header
820                }
821
822                // COMPRESS-530 : throw if we encounter a non-number while reading length
823                if (ch < '0' || ch > '9') {
824                    throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length");
825                }
826
827                len *= 10;
828                len += ch - '0';
829            }
830            if (ch == -1){ // EOF
831                break;
832            }
833        }
834        if (offset != null) {
835            // offset but no numBytes
836            sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
837        }
838        return headers;
839    }
840
841    /**
842     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
843     * GNU.sparse.map
844     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
845     *
846     * <p>Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You
847     * should use {@link #parseFromPAX01SparseHeaders} directly instead.
848     *
849     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
850     * @return sparse headers parsed from sparse map
851     * @deprecated use #parseFromPAX01SparseHeaders instead
852     */
853    @Deprecated
854    protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(String sparseMap) {
855        try {
856            return parseFromPAX01SparseHeaders(sparseMap);
857        } catch (IOException ex) {
858            throw new UncheckedIOException(ex.getMessage(), ex);
859        }
860    }
861
862    /**
863     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
864     * GNU.sparse.map
865     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
866     *
867     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
868     * @return unmodifiable list of sparse headers parsed from sparse map
869     * @throws IOException Corrupted TAR archive.
870     * @since 1.21
871     */
872    protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(String sparseMap)
873        throws IOException {
874        List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
875        String[] sparseHeaderStrings = sparseMap.split(",");
876        if (sparseHeaderStrings.length % 2 == 1) {
877            throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header");
878        }
879
880        for (int i = 0; i < sparseHeaderStrings.length; i += 2) {
881            long sparseOffset;
882            try {
883                sparseOffset = Long.parseLong(sparseHeaderStrings[i]);
884            } catch (NumberFormatException ex) {
885                throw new IOException("Corrupted TAR archive."
886                    + " Sparse struct offset contains a non-numeric value");
887            }
888            if (sparseOffset < 0) {
889                throw new IOException("Corrupted TAR archive."
890                    + " Sparse struct offset contains negative value");
891            }
892            long sparseNumbytes;
893            try {
894                sparseNumbytes = Long.parseLong(sparseHeaderStrings[i + 1]);
895            } catch (NumberFormatException ex) {
896                throw new IOException("Corrupted TAR archive."
897                    + " Sparse struct numbytes contains a non-numeric value");
898            }
899            if (sparseNumbytes < 0) {
900                throw new IOException("Corrupted TAR archive."
901                    + " Sparse struct numbytes contains negative value");
902            }
903            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
904        }
905
906        return Collections.unmodifiableList(sparseHeaders);
907    }
908
909    /**
910     * For PAX Format 1.X:
911     * The sparse map itself is stored in the file data block, preceding the actual file data.
912     * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary.
913     * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers
914     * giving the offset and size of the data block it describes.
915     * @param inputStream parsing source.
916     * @param recordSize The size the TAR header
917     * @return sparse headers
918     * @throws IOException if an I/O error occurs.
919     */
920    protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException {
921        // for 1.X PAX Headers
922        List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
923        long bytesRead = 0;
924
925        long[] readResult = readLineOfNumberForPax1X(inputStream);
926        long sparseHeadersCount = readResult[0];
927        if (sparseHeadersCount < 0) {
928            // overflow while reading number?
929            throw new IOException("Corrupted TAR archive. Negative value in sparse headers block");
930        }
931        bytesRead += readResult[1];
932        while (sparseHeadersCount-- > 0) {
933            readResult = readLineOfNumberForPax1X(inputStream);
934            final long sparseOffset = readResult[0];
935            if (sparseOffset < 0) {
936                throw new IOException("Corrupted TAR archive."
937                    + " Sparse header block offset contains negative value");
938            }
939            bytesRead += readResult[1];
940
941            readResult = readLineOfNumberForPax1X(inputStream);
942            final long sparseNumbytes = readResult[0];
943            if (sparseNumbytes < 0) {
944                throw new IOException("Corrupted TAR archive."
945                    + " Sparse header block numbytes contains negative value");
946            }
947            bytesRead += readResult[1];
948            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
949        }
950
951        // skip the rest of this record data
952        long bytesToSkip = recordSize - bytesRead % recordSize;
953        IOUtils.skip(inputStream, bytesToSkip);
954        return sparseHeaders;
955    }
956
957    /**
958     * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data.
959     * It consists of a series of decimal numbers delimited by newlines.
960     *
961     * @param inputStream the input stream of the tar file
962     * @return the decimal number delimited by '\n', and the bytes read from input stream
963     * @throws IOException
964     */
965    private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException {
966        int number;
967        long result = 0;
968        long bytesRead = 0;
969
970        while ((number = inputStream.read()) != '\n') {
971            bytesRead += 1;
972            if (number == -1) {
973                throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format");
974            }
975            if (number < '0' || number > '9') {
976                throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block");
977            }
978            result = result * 10 + (number - '0');
979        }
980        bytesRead += 1;
981
982        return new long[]{result, bytesRead};
983    }
984
985}