001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020 package org.apache.commons.compress.archivers.zip;
021
022 import java.io.IOException;
023 import java.nio.ByteBuffer;
024
025 /**
026 * An interface for encoders that do a pretty encoding of ZIP
027 * filenames.
028 *
029 * <p>There are mostly two implementations, one that uses java.nio
030 * {@link java.nio.charset.Charset Charset} and one implementation,
031 * which copes with simple 8 bit charsets, because java-1.4 did not
032 * support Cp437 in java.nio.</p>
033 *
034 * <p>The main reason for defining an own encoding layer comes from
035 * the problems with {@link java.lang.String#getBytes(String)
036 * String.getBytes}, which encodes unknown characters as ASCII
037 * quotation marks ('?'). Quotation marks are per definition an
038 * invalid filename on some operating systems like Windows, which
039 * leads to ignored ZIP entries.</p>
040 *
041 * <p>All implementations should implement this interface in a
042 * reentrant way.</p>
043 */
044 public interface ZipEncoding {
045 /**
046 * Check, whether the given string may be losslessly encoded using this
047 * encoding.
048 *
049 * @param name A filename or ZIP comment.
050 * @return Whether the given name may be encoded with out any losses.
051 */
052 boolean canEncode(String name);
053
054 /**
055 * Encode a filename or a comment to a byte array suitable for
056 * storing it to a serialized zip entry.
057 *
058 * <p>Examples for CP 437 (in pseudo-notation, right hand side is
059 * C-style notation):</p>
060 * <pre>
061 * encode("\u20AC_for_Dollar.txt") = "%U20AC_for_Dollar.txt"
062 * encode("\u00D6lf\u00E4sser.txt") = "\231lf\204sser.txt"
063 * </pre>
064 *
065 * @param name A filename or ZIP comment.
066 * @return A byte buffer with a backing array containing the
067 * encoded name. Unmappable characters or malformed
068 * character sequences are mapped to a sequence of utf-16
069 * words encoded in the format <code>%Uxxxx</code>. It is
070 * assumed, that the byte buffer is positioned at the
071 * beginning of the encoded result, the byte buffer has a
072 * backing array and the limit of the byte buffer points
073 * to the end of the encoded result.
074 * @throws IOException
075 */
076 ByteBuffer encode(String name) throws IOException;
077
078 /**
079 * @param data The byte values to decode.
080 * @return The decoded string.
081 * @throws IOException
082 */
083 String decode(byte [] data) throws IOException;
084 }