001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019 package org.apache.commons.compress.archivers.tar;
020
021 import java.io.File;
022 import java.io.IOException;
023 import java.io.OutputStream;
024 import java.io.StringWriter;
025 import java.util.HashMap;
026 import java.util.Map;
027 import org.apache.commons.compress.archivers.ArchiveEntry;
028 import org.apache.commons.compress.archivers.ArchiveOutputStream;
029 import org.apache.commons.compress.archivers.zip.ZipEncoding;
030 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
031 import org.apache.commons.compress.utils.CharsetNames;
032 import org.apache.commons.compress.utils.CountingOutputStream;
033
034 /**
035 * The TarOutputStream writes a UNIX tar archive as an OutputStream.
036 * Methods are provided to put entries, and then write their contents
037 * by writing to this stream using write().
038 * @NotThreadSafe
039 */
040 public class TarArchiveOutputStream extends ArchiveOutputStream {
041 /** Fail if a long file name is required in the archive. */
042 public static final int LONGFILE_ERROR = 0;
043
044 /** Long paths will be truncated in the archive. */
045 public static final int LONGFILE_TRUNCATE = 1;
046
047 /** GNU tar extensions are used to store long file names in the archive. */
048 public static final int LONGFILE_GNU = 2;
049
050 /** POSIX/PAX extensions are used to store long file names in the archive. */
051 public static final int LONGFILE_POSIX = 3;
052
053 /** Fail if a big number (e.g. size > 8GiB) is required in the archive. */
054 public static final int BIGNUMBER_ERROR = 0;
055
056 /** star/GNU tar/BSD tar extensions are used to store big number in the archive. */
057 public static final int BIGNUMBER_STAR = 1;
058
059 /** POSIX/PAX extensions are used to store big numbers in the archive. */
060 public static final int BIGNUMBER_POSIX = 2;
061
062 private long currSize;
063 private String currName;
064 private long currBytes;
065 private final byte[] recordBuf;
066 private int assemLen;
067 private final byte[] assemBuf;
068 protected final TarBuffer buffer;
069 private int longFileMode = LONGFILE_ERROR;
070 private int bigNumberMode = BIGNUMBER_ERROR;
071
072 private boolean closed = false;
073
074 /** Indicates if putArchiveEntry has been called without closeArchiveEntry */
075 private boolean haveUnclosedEntry = false;
076
077 /** indicates if this archive is finished */
078 private boolean finished = false;
079
080 private final OutputStream out;
081
082 private final ZipEncoding encoding;
083
084 private boolean addPaxHeadersForNonAsciiNames = false;
085 private static final ZipEncoding ASCII =
086 ZipEncodingHelper.getZipEncoding("ASCII");
087
088 /**
089 * Constructor for TarInputStream.
090 * @param os the output stream to use
091 */
092 public TarArchiveOutputStream(OutputStream os) {
093 this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
094 }
095
096 /**
097 * Constructor for TarInputStream.
098 * @param os the output stream to use
099 * @param encoding name of the encoding to use for file names
100 * @since Commons Compress 1.4
101 */
102 public TarArchiveOutputStream(OutputStream os, String encoding) {
103 this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding);
104 }
105
106 /**
107 * Constructor for TarInputStream.
108 * @param os the output stream to use
109 * @param blockSize the block size to use
110 */
111 public TarArchiveOutputStream(OutputStream os, int blockSize) {
112 this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE);
113 }
114
115 /**
116 * Constructor for TarInputStream.
117 * @param os the output stream to use
118 * @param blockSize the block size to use
119 * @param encoding name of the encoding to use for file names
120 * @since Commons Compress 1.4
121 */
122 public TarArchiveOutputStream(OutputStream os, int blockSize,
123 String encoding) {
124 this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding);
125 }
126
127 /**
128 * Constructor for TarInputStream.
129 * @param os the output stream to use
130 * @param blockSize the block size to use
131 * @param recordSize the record size to use
132 */
133 public TarArchiveOutputStream(OutputStream os, int blockSize, int recordSize) {
134 this(os, blockSize, recordSize, null);
135 }
136
137 /**
138 * Constructor for TarInputStream.
139 * @param os the output stream to use
140 * @param blockSize the block size to use
141 * @param recordSize the record size to use
142 * @param encoding name of the encoding to use for file names
143 * @since Commons Compress 1.4
144 */
145 public TarArchiveOutputStream(OutputStream os, int blockSize,
146 int recordSize, String encoding) {
147 out = new CountingOutputStream(os);
148 this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
149
150 this.buffer = new TarBuffer(out, blockSize, recordSize);
151 this.assemLen = 0;
152 this.assemBuf = new byte[recordSize];
153 this.recordBuf = new byte[recordSize];
154 }
155
156 /**
157 * Set the long file mode.
158 * This can be LONGFILE_ERROR(0), LONGFILE_TRUNCATE(1) or LONGFILE_GNU(2).
159 * This specifies the treatment of long file names (names >= TarConstants.NAMELEN).
160 * Default is LONGFILE_ERROR.
161 * @param longFileMode the mode to use
162 */
163 public void setLongFileMode(int longFileMode) {
164 this.longFileMode = longFileMode;
165 }
166
167 /**
168 * Set the big number mode.
169 * This can be BIGNUMBER_ERROR(0), BIGNUMBER_POSIX(1) or BIGNUMBER_STAR(2).
170 * This specifies the treatment of big files (sizes > TarConstants.MAXSIZE) and other numeric values to big to fit into a traditional tar header.
171 * Default is BIGNUMBER_ERROR.
172 * @param bigNumberMode the mode to use
173 * @since 1.4
174 */
175 public void setBigNumberMode(int bigNumberMode) {
176 this.bigNumberMode = bigNumberMode;
177 }
178
179 /**
180 * Whether to add a PAX extension header for non-ASCII file names.
181 * @since 1.4
182 */
183 public void setAddPaxHeadersForNonAsciiNames(boolean b) {
184 addPaxHeadersForNonAsciiNames = b;
185 }
186
187 @Deprecated
188 @Override
189 public int getCount() {
190 return (int) getBytesWritten();
191 }
192
193 @Override
194 public long getBytesWritten() {
195 return ((CountingOutputStream) out).getBytesWritten();
196 }
197
198 /**
199 * Ends the TAR archive without closing the underlying OutputStream.
200 *
201 * An archive consists of a series of file entries terminated by an
202 * end-of-archive entry, which consists of two 512 blocks of zero bytes.
203 * POSIX.1 requires two EOF records, like some other implementations.
204 *
205 * @throws IOException on error
206 */
207 @Override
208 public void finish() throws IOException {
209 if (finished) {
210 throw new IOException("This archive has already been finished");
211 }
212
213 if(haveUnclosedEntry) {
214 throw new IOException("This archives contains unclosed entries.");
215 }
216 writeEOFRecord();
217 writeEOFRecord();
218 buffer.flushBlock();
219 finished = true;
220 }
221
222 /**
223 * Closes the underlying OutputStream.
224 * @throws IOException on error
225 */
226 @Override
227 public void close() throws IOException {
228 if(!finished) {
229 finish();
230 }
231
232 if (!closed) {
233 buffer.close();
234 out.close();
235 closed = true;
236 }
237 }
238
239 /**
240 * Get the record size being used by this stream's TarBuffer.
241 *
242 * @return The TarBuffer record size.
243 */
244 public int getRecordSize() {
245 return buffer.getRecordSize();
246 }
247
248 /**
249 * Put an entry on the output stream. This writes the entry's
250 * header record and positions the output stream for writing
251 * the contents of the entry. Once this method is called, the
252 * stream is ready for calls to write() to write the entry's
253 * contents. Once the contents are written, closeArchiveEntry()
254 * <B>MUST</B> be called to ensure that all buffered data
255 * is completely written to the output stream.
256 *
257 * @param archiveEntry The TarEntry to be written to the archive.
258 * @throws IOException on error
259 * @throws ClassCastException if archiveEntry is not an instance of TarArchiveEntry
260 */
261 @Override
262 public void putArchiveEntry(ArchiveEntry archiveEntry) throws IOException {
263 if(finished) {
264 throw new IOException("Stream has already been finished");
265 }
266 TarArchiveEntry entry = (TarArchiveEntry) archiveEntry;
267 Map<String, String> paxHeaders = new HashMap<String, String>();
268 final String entryName = entry.getName();
269 final byte[] nameBytes = encoding.encode(entryName).array();
270 boolean paxHeaderContainsPath = false;
271 if (nameBytes.length >= TarConstants.NAMELEN) {
272
273 if (longFileMode == LONGFILE_POSIX) {
274 paxHeaders.put("path", entryName);
275 paxHeaderContainsPath = true;
276 } else if (longFileMode == LONGFILE_GNU) {
277 // create a TarEntry for the LongLink, the contents
278 // of which are the entry's name
279 TarArchiveEntry longLinkEntry = new TarArchiveEntry(TarConstants.GNU_LONGLINK,
280 TarConstants.LF_GNUTYPE_LONGNAME);
281
282 longLinkEntry.setSize(nameBytes.length + 1); // +1 for NUL
283 putArchiveEntry(longLinkEntry);
284 write(nameBytes);
285 write(0); // NUL terminator
286 closeArchiveEntry();
287 } else if (longFileMode != LONGFILE_TRUNCATE) {
288 throw new RuntimeException("file name '" + entryName
289 + "' is too long ( > "
290 + TarConstants.NAMELEN + " bytes)");
291 }
292 }
293
294 if (bigNumberMode == BIGNUMBER_POSIX) {
295 addPaxHeadersForBigNumbers(paxHeaders, entry);
296 } else if (bigNumberMode != BIGNUMBER_STAR) {
297 failForBigNumbers(entry);
298 }
299
300 if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath
301 && !ASCII.canEncode(entryName)) {
302 paxHeaders.put("path", entryName);
303 }
304
305 if (addPaxHeadersForNonAsciiNames
306 && (entry.isLink() || entry.isSymbolicLink())
307 && !ASCII.canEncode(entry.getLinkName())) {
308 paxHeaders.put("linkpath", entry.getLinkName());
309 }
310
311 if (paxHeaders.size() > 0) {
312 writePaxHeaders(entryName, paxHeaders);
313 }
314
315 entry.writeEntryHeader(recordBuf, encoding,
316 bigNumberMode == BIGNUMBER_STAR);
317 buffer.writeRecord(recordBuf);
318
319 currBytes = 0;
320
321 if (entry.isDirectory()) {
322 currSize = 0;
323 } else {
324 currSize = entry.getSize();
325 }
326 currName = entryName;
327 haveUnclosedEntry = true;
328 }
329
330 /**
331 * Close an entry. This method MUST be called for all file
332 * entries that contain data. The reason is that we must
333 * buffer data written to the stream in order to satisfy
334 * the buffer's record based writes. Thus, there may be
335 * data fragments still being assembled that must be written
336 * to the output stream before this entry is closed and the
337 * next entry written.
338 * @throws IOException on error
339 */
340 @Override
341 public void closeArchiveEntry() throws IOException {
342 if(finished) {
343 throw new IOException("Stream has already been finished");
344 }
345 if (!haveUnclosedEntry){
346 throw new IOException("No current entry to close");
347 }
348 if (assemLen > 0) {
349 for (int i = assemLen; i < assemBuf.length; ++i) {
350 assemBuf[i] = 0;
351 }
352
353 buffer.writeRecord(assemBuf);
354
355 currBytes += assemLen;
356 assemLen = 0;
357 }
358
359 if (currBytes < currSize) {
360 throw new IOException("entry '" + currName + "' closed at '"
361 + currBytes
362 + "' before the '" + currSize
363 + "' bytes specified in the header were written");
364 }
365 haveUnclosedEntry = false;
366 }
367
368 /**
369 * Writes bytes to the current tar archive entry. This method
370 * is aware of the current entry and will throw an exception if
371 * you attempt to write bytes past the length specified for the
372 * current entry. The method is also (painfully) aware of the
373 * record buffering required by TarBuffer, and manages buffers
374 * that are not a multiple of recordsize in length, including
375 * assembling records from small buffers.
376 *
377 * @param wBuf The buffer to write to the archive.
378 * @param wOffset The offset in the buffer from which to get bytes.
379 * @param numToWrite The number of bytes to write.
380 * @throws IOException on error
381 */
382 @Override
383 public void write(byte[] wBuf, int wOffset, int numToWrite) throws IOException {
384 if ((currBytes + numToWrite) > currSize) {
385 throw new IOException("request to write '" + numToWrite
386 + "' bytes exceeds size in header of '"
387 + currSize + "' bytes for entry '"
388 + currName + "'");
389
390 //
391 // We have to deal with assembly!!!
392 // The programmer can be writing little 32 byte chunks for all
393 // we know, and we must assemble complete records for writing.
394 // REVIEW Maybe this should be in TarBuffer? Could that help to
395 // eliminate some of the buffer copying.
396 //
397 }
398
399 if (assemLen > 0) {
400 if ((assemLen + numToWrite) >= recordBuf.length) {
401 int aLen = recordBuf.length - assemLen;
402
403 System.arraycopy(assemBuf, 0, recordBuf, 0,
404 assemLen);
405 System.arraycopy(wBuf, wOffset, recordBuf,
406 assemLen, aLen);
407 buffer.writeRecord(recordBuf);
408
409 currBytes += recordBuf.length;
410 wOffset += aLen;
411 numToWrite -= aLen;
412 assemLen = 0;
413 } else {
414 System.arraycopy(wBuf, wOffset, assemBuf, assemLen,
415 numToWrite);
416
417 wOffset += numToWrite;
418 assemLen += numToWrite;
419 numToWrite = 0;
420 }
421 }
422
423 //
424 // When we get here we have EITHER:
425 // o An empty "assemble" buffer.
426 // o No bytes to write (numToWrite == 0)
427 //
428 while (numToWrite > 0) {
429 if (numToWrite < recordBuf.length) {
430 System.arraycopy(wBuf, wOffset, assemBuf, assemLen,
431 numToWrite);
432
433 assemLen += numToWrite;
434
435 break;
436 }
437
438 buffer.writeRecord(wBuf, wOffset);
439
440 int num = recordBuf.length;
441
442 currBytes += num;
443 numToWrite -= num;
444 wOffset += num;
445 }
446 }
447
448 /**
449 * Writes a PAX extended header with the given map as contents.
450 * @since 1.4
451 */
452 void writePaxHeaders(String entryName,
453 Map<String, String> headers) throws IOException {
454 String name = "./PaxHeaders.X/" + stripTo7Bits(entryName);
455 if (name.length() >= TarConstants.NAMELEN) {
456 name = name.substring(0, TarConstants.NAMELEN - 1);
457 }
458 TarArchiveEntry pex = new TarArchiveEntry(name,
459 TarConstants.LF_PAX_EXTENDED_HEADER_LC);
460
461 StringWriter w = new StringWriter();
462 for (Map.Entry<String, String> h : headers.entrySet()) {
463 String key = h.getKey();
464 String value = h.getValue();
465 int len = key.length() + value.length()
466 + 3 /* blank, equals and newline */
467 + 2 /* guess 9 < actual length < 100 */;
468 String line = len + " " + key + "=" + value + "\n";
469 int actualLength = line.getBytes(CharsetNames.UTF_8).length;
470 while (len != actualLength) {
471 // Adjust for cases where length < 10 or > 100
472 // or where UTF-8 encoding isn't a single octet
473 // per character.
474 // Must be in loop as size may go from 99 to 100 in
475 // first pass so we'd need a second.
476 len = actualLength;
477 line = len + " " + key + "=" + value + "\n";
478 actualLength = line.getBytes(CharsetNames.UTF_8).length;
479 }
480 w.write(line);
481 }
482 byte[] data = w.toString().getBytes(CharsetNames.UTF_8);
483 pex.setSize(data.length);
484 putArchiveEntry(pex);
485 write(data);
486 closeArchiveEntry();
487 }
488
489 private String stripTo7Bits(String name) {
490 final int length = name.length();
491 StringBuffer result = new StringBuffer(length);
492 for (int i = 0; i < length; i++) {
493 char stripped = (char) (name.charAt(i) & 0x7F);
494 if (stripped != 0) { // would be read as Trailing null
495 result.append(stripped);
496 }
497 }
498 return result.toString();
499 }
500
501 /**
502 * Write an EOF (end of archive) record to the tar archive.
503 * An EOF record consists of a record of all zeros.
504 */
505 private void writeEOFRecord() throws IOException {
506 for (int i = 0; i < recordBuf.length; ++i) {
507 recordBuf[i] = 0;
508 }
509
510 buffer.writeRecord(recordBuf);
511 }
512
513 @Override
514 public void flush() throws IOException {
515 out.flush();
516 }
517
518 /** {@inheritDoc} */
519 @Override
520 public ArchiveEntry createArchiveEntry(File inputFile, String entryName)
521 throws IOException {
522 if(finished) {
523 throw new IOException("Stream has already been finished");
524 }
525 return new TarArchiveEntry(inputFile, entryName);
526 }
527
528 private void addPaxHeadersForBigNumbers(Map<String, String> paxHeaders,
529 TarArchiveEntry entry) {
530 addPaxHeaderForBigNumber(paxHeaders, "size", entry.getSize(),
531 TarConstants.MAXSIZE);
532 addPaxHeaderForBigNumber(paxHeaders, "gid", entry.getGroupId(),
533 TarConstants.MAXID);
534 addPaxHeaderForBigNumber(paxHeaders, "mtime",
535 entry.getModTime().getTime() / 1000,
536 TarConstants.MAXSIZE);
537 addPaxHeaderForBigNumber(paxHeaders, "uid", entry.getUserId(),
538 TarConstants.MAXID);
539 // star extensions by J\u00f6rg Schilling
540 addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devmajor",
541 entry.getDevMajor(), TarConstants.MAXID);
542 addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devminor",
543 entry.getDevMinor(), TarConstants.MAXID);
544 // there is no PAX header for file mode
545 failForBigNumber("mode", entry.getMode(), TarConstants.MAXID);
546 }
547
548 private void addPaxHeaderForBigNumber(Map<String, String> paxHeaders,
549 String header, long value,
550 long maxValue) {
551 if (value < 0 || value > maxValue) {
552 paxHeaders.put(header, String.valueOf(value));
553 }
554 }
555
556 private void failForBigNumbers(TarArchiveEntry entry) {
557 failForBigNumber("entry size", entry.getSize(), TarConstants.MAXSIZE);
558 failForBigNumber("group id", entry.getGroupId(), TarConstants.MAXID);
559 failForBigNumber("last modification time",
560 entry.getModTime().getTime() / 1000,
561 TarConstants.MAXSIZE);
562 failForBigNumber("user id", entry.getUserId(), TarConstants.MAXID);
563 failForBigNumber("mode", entry.getMode(), TarConstants.MAXID);
564 failForBigNumber("major device number", entry.getDevMajor(),
565 TarConstants.MAXID);
566 failForBigNumber("minor device number", entry.getDevMinor(),
567 TarConstants.MAXID);
568 }
569
570 private void failForBigNumber(String field, long value, long maxValue) {
571 if (value < 0 || value > maxValue) {
572 throw new RuntimeException(field + " '" + value
573 + "' is too big ( > "
574 + maxValue + " )");
575 }
576 }
577 }