001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.harmony.unpack200; 018 019import java.io.BufferedInputStream; 020import java.io.ByteArrayInputStream; 021import java.io.ByteArrayOutputStream; 022import java.io.DataOutputStream; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.OutputStream; 026import java.io.OutputStreamWriter; 027import java.io.PrintWriter; 028import java.nio.charset.Charset; 029import java.util.ArrayList; 030import java.util.HashSet; 031import java.util.List; 032import java.util.Set; 033import java.util.TimeZone; 034import java.util.jar.JarEntry; 035import java.util.jar.JarOutputStream; 036import java.util.zip.CRC32; 037import java.util.zip.GZIPInputStream; 038import java.util.zip.ZipEntry; 039 040import org.apache.commons.compress.harmony.pack200.Codec; 041import org.apache.commons.compress.harmony.pack200.Pack200Exception; 042import org.apache.commons.compress.harmony.unpack200.bytecode.Attribute; 043import org.apache.commons.compress.harmony.unpack200.bytecode.CPClass; 044import org.apache.commons.compress.harmony.unpack200.bytecode.CPField; 045import org.apache.commons.compress.harmony.unpack200.bytecode.CPMethod; 046import org.apache.commons.compress.harmony.unpack200.bytecode.CPUTF8; 047import org.apache.commons.compress.harmony.unpack200.bytecode.ClassConstantPool; 048import org.apache.commons.compress.harmony.unpack200.bytecode.ClassFile; 049import org.apache.commons.compress.harmony.unpack200.bytecode.ClassFileEntry; 050import org.apache.commons.compress.harmony.unpack200.bytecode.InnerClassesAttribute; 051import org.apache.commons.compress.harmony.unpack200.bytecode.SourceFileAttribute; 052 053/** 054 * A Pack200 archive consists of one or more segments. Each segment is stand-alone, in the sense that every segment has 055 * the magic number header; thus, every segment is also a valid archive. However, it is possible to combine 056 * (non-GZipped) archives into a single large archive by concatenation alone. Thus all the hard work in unpacking an 057 * archive falls to understanding a segment. 058 * 059 * The first component of a segment is the header; this contains (amongst other things) the expected counts of constant 060 * pool entries, which in turn defines how many values need to be read from the stream. Because values are variable 061 * width (see {@link Codec}), it is not possible to calculate the start of the next segment, although one of the header 062 * values does hint at the size of the segment if non-zero, which can be used for buffering purposes. 063 * 064 * Note that this does not perform any buffering of the input stream; each value will be read on a byte-by-byte basis. 065 * It does not perform GZip decompression automatically; both of these are expected to be done by the caller if the 066 * stream has the magic header for GZip streams ({@link GZIPInputStream#GZIP_MAGIC}). In any case, if GZip decompression 067 * is being performed the input stream will be buffered at a higher level, and thus this can read on a byte-oriented 068 * basis. 069 */ 070public class Segment { 071 072 public static final int LOG_LEVEL_VERBOSE = 2; 073 074 public static final int LOG_LEVEL_STANDARD = 1; 075 076 public static final int LOG_LEVEL_QUIET = 0; 077 078 private SegmentHeader header; 079 080 private CpBands cpBands; 081 082 private AttrDefinitionBands attrDefinitionBands; 083 084 private IcBands icBands; 085 086 private ClassBands classBands; 087 088 private BcBands bcBands; 089 090 private FileBands fileBands; 091 092 private boolean overrideDeflateHint; 093 094 private boolean deflateHint; 095 096 private boolean doPreRead; 097 098 private int logLevel; 099 100 private PrintWriter logStream; 101 102 private byte[][] classFilesContents; 103 104 private boolean[] fileDeflate; 105 106 private boolean[] fileIsClass; 107 108 private InputStream internalBuffer; 109 110 private ClassFile buildClassFile(final int classNum) { 111 final ClassFile classFile = new ClassFile(); 112 final int[] major = classBands.getClassVersionMajor(); 113 final int[] minor = classBands.getClassVersionMinor(); 114 if (major != null) { 115 classFile.major = major[classNum]; 116 classFile.minor = minor[classNum]; 117 } else { 118 classFile.major = header.getDefaultClassMajorVersion(); 119 classFile.minor = header.getDefaultClassMinorVersion(); 120 } 121 // build constant pool 122 final ClassConstantPool cp = classFile.pool; 123 final int fullNameIndexInCpClass = classBands.getClassThisInts()[classNum]; 124 final String fullName = cpBands.getCpClass()[fullNameIndexInCpClass]; 125 // SourceFile attribute 126 int i = fullName.lastIndexOf("/") + 1; // if lastIndexOf==-1, then 127 // -1+1=0, so str.substring(0) 128 // == str 129 130 // Get the source file attribute 131 final List<Attribute> classAttributes = classBands.getClassAttributes()[classNum]; 132 SourceFileAttribute sourceFileAttribute = null; 133 for (Attribute classAttribute : classAttributes) { 134 if (classAttribute.isSourceFileAttribute()) { 135 sourceFileAttribute = ((SourceFileAttribute) classAttribute); 136 } 137 } 138 139 if (sourceFileAttribute == null) { 140 // If we don't have a source file attribute yet, we need 141 // to infer it from the class. 142 final AttributeLayout SOURCE_FILE = attrDefinitionBands.getAttributeDefinitionMap() 143 .getAttributeLayout(AttributeLayout.ATTRIBUTE_SOURCE_FILE, AttributeLayout.CONTEXT_CLASS); 144 if (SOURCE_FILE.matches(classBands.getRawClassFlags()[classNum])) { 145 int firstDollar = -1; 146 for (int index = 0; index < fullName.length(); index++) { 147 if (fullName.charAt(index) <= '$') { 148 firstDollar = index; 149 } 150 } 151 String fileName = null; 152 153 if (firstDollar > -1 && (i <= firstDollar)) { 154 fileName = fullName.substring(i, firstDollar) + ".java"; 155 } else { 156 fileName = fullName.substring(i) + ".java"; 157 } 158 sourceFileAttribute = new SourceFileAttribute(cpBands.cpUTF8Value(fileName, false)); 159 classFile.attributes = new Attribute[] {(Attribute) cp.add(sourceFileAttribute)}; 160 } else { 161 classFile.attributes = new Attribute[] {}; 162 } 163 } else { 164 classFile.attributes = new Attribute[] {(Attribute) cp.add(sourceFileAttribute)}; 165 } 166 167 // If we see any class attributes, add them to the class's attributes 168 // that will 169 // be written out. Keep SourceFileAttributes out since we just 170 // did them above. 171 final List<Attribute> classAttributesWithoutSourceFileAttribute = new ArrayList<>(classAttributes.size()); 172 for (int index = 0; index < classAttributes.size(); index++) { 173 final Attribute attrib = classAttributes.get(index); 174 if (!attrib.isSourceFileAttribute()) { 175 classAttributesWithoutSourceFileAttribute.add(attrib); 176 } 177 } 178 final Attribute[] originalAttributes = classFile.attributes; 179 classFile.attributes = new Attribute[originalAttributes.length 180 + classAttributesWithoutSourceFileAttribute.size()]; 181 System.arraycopy(originalAttributes, 0, classFile.attributes, 0, originalAttributes.length); 182 for (int index = 0; index < classAttributesWithoutSourceFileAttribute.size(); index++) { 183 final Attribute attrib = (classAttributesWithoutSourceFileAttribute.get(index)); 184 cp.add(attrib); 185 classFile.attributes[originalAttributes.length + index] = attrib; 186 } 187 188 // this/superclass 189 final ClassFileEntry cfThis = cp.add(cpBands.cpClassValue(fullNameIndexInCpClass)); 190 final ClassFileEntry cfSuper = cp.add(cpBands.cpClassValue(classBands.getClassSuperInts()[classNum])); 191 // add interfaces 192 final ClassFileEntry[] cfInterfaces = new ClassFileEntry[classBands.getClassInterfacesInts()[classNum].length]; 193 for (i = 0; i < cfInterfaces.length; i++) { 194 cfInterfaces[i] = cp.add(cpBands.cpClassValue(classBands.getClassInterfacesInts()[classNum][i])); 195 } 196 // add fields 197 final ClassFileEntry[] cfFields = new ClassFileEntry[classBands.getClassFieldCount()[classNum]]; 198 // fieldDescr and fieldFlags used to create this 199 for (i = 0; i < cfFields.length; i++) { 200 final int descriptorIndex = classBands.getFieldDescrInts()[classNum][i]; 201 final int nameIndex = cpBands.getCpDescriptorNameInts()[descriptorIndex]; 202 final int typeIndex = cpBands.getCpDescriptorTypeInts()[descriptorIndex]; 203 final CPUTF8 name = cpBands.cpUTF8Value(nameIndex); 204 final CPUTF8 descriptor = cpBands.cpSignatureValue(typeIndex); 205 cfFields[i] = cp.add(new CPField(name, descriptor, classBands.getFieldFlags()[classNum][i], 206 classBands.getFieldAttributes()[classNum][i])); 207 } 208 // add methods 209 final ClassFileEntry[] cfMethods = new ClassFileEntry[classBands.getClassMethodCount()[classNum]]; 210 // methodDescr and methodFlags used to create this 211 for (i = 0; i < cfMethods.length; i++) { 212 final int descriptorIndex = classBands.getMethodDescrInts()[classNum][i]; 213 final int nameIndex = cpBands.getCpDescriptorNameInts()[descriptorIndex]; 214 final int typeIndex = cpBands.getCpDescriptorTypeInts()[descriptorIndex]; 215 final CPUTF8 name = cpBands.cpUTF8Value(nameIndex); 216 final CPUTF8 descriptor = cpBands.cpSignatureValue(typeIndex); 217 cfMethods[i] = cp.add(new CPMethod(name, descriptor, classBands.getMethodFlags()[classNum][i], 218 classBands.getMethodAttributes()[classNum][i])); 219 } 220 cp.addNestedEntries(); 221 222 // add inner class attribute (if required) 223 boolean addInnerClassesAttr = false; 224 final IcTuple[] ic_local = getClassBands().getIcLocal()[classNum]; 225 final boolean ic_local_sent = ic_local != null; 226 final InnerClassesAttribute innerClassesAttribute = new InnerClassesAttribute("InnerClasses"); 227 final IcTuple[] ic_relevant = getIcBands().getRelevantIcTuples(fullName, cp); 228 final List<IcTuple> ic_stored = computeIcStored(ic_local, ic_relevant); 229 for (IcTuple icStored : ic_stored) { 230 final int innerClassIndex = icStored.thisClassIndex(); 231 final int outerClassIndex = icStored.outerClassIndex(); 232 final int simpleClassNameIndex = icStored.simpleClassNameIndex(); 233 234 final String innerClassString = icStored.thisClassString(); 235 final String outerClassString = icStored.outerClassString(); 236 final String simpleClassName = icStored.simpleClassName(); 237 238 CPClass innerClass = null; 239 CPUTF8 innerName = null; 240 CPClass outerClass = null; 241 242 innerClass = innerClassIndex != -1 ? cpBands.cpClassValue(innerClassIndex) 243 : cpBands.cpClassValue(innerClassString); 244 if (!icStored.isAnonymous()) { 245 innerName = simpleClassNameIndex != -1 ? cpBands.cpUTF8Value(simpleClassNameIndex) 246 : cpBands.cpUTF8Value(simpleClassName); 247 } 248 249 if (icStored.isMember()) { 250 outerClass = outerClassIndex != -1 ? cpBands.cpClassValue(outerClassIndex) 251 : cpBands.cpClassValue(outerClassString); 252 } 253 final int flags = icStored.F; 254 innerClassesAttribute.addInnerClassesEntry(innerClass, outerClass, innerName, flags); 255 addInnerClassesAttr = true; 256 } 257 // If ic_local is sent and it's empty, don't add 258 // the inner classes attribute. 259 if (ic_local_sent && (ic_local.length == 0)) { 260 addInnerClassesAttr = false; 261 } 262 263 // If ic_local is not sent and ic_relevant is empty, 264 // don't add the inner class attribute. 265 if (!ic_local_sent && (ic_relevant.length == 0)) { 266 addInnerClassesAttr = false; 267 } 268 269 if (addInnerClassesAttr) { 270 // Need to add the InnerClasses attribute to the 271 // existing classFile attributes. 272 final Attribute[] originalAttrs = classFile.attributes; 273 final Attribute[] newAttrs = new Attribute[originalAttrs.length + 1]; 274 System.arraycopy(originalAttrs, 0, newAttrs, 0, originalAttrs.length); 275 newAttrs[newAttrs.length - 1] = innerClassesAttribute; 276 classFile.attributes = newAttrs; 277 cp.addWithNestedEntries(innerClassesAttribute); 278 } 279 // sort CP according to cp_All 280 cp.resolve(this); 281 // NOTE the indexOf is only valid after the cp.resolve() 282 // build up remainder of file 283 classFile.accessFlags = (int) classBands.getClassFlags()[classNum]; 284 classFile.thisClass = cp.indexOf(cfThis); 285 classFile.superClass = cp.indexOf(cfSuper); 286 // TODO placate format of file for writing purposes 287 classFile.interfaces = new int[cfInterfaces.length]; 288 for (i = 0; i < cfInterfaces.length; i++) { 289 classFile.interfaces[i] = cp.indexOf(cfInterfaces[i]); 290 } 291 classFile.fields = cfFields; 292 classFile.methods = cfMethods; 293 return classFile; 294 } 295 296 /** 297 * Given an ic_local and an ic_relevant, use them to calculate what should be added as ic_stored. 298 * 299 * @param ic_local IcTuple[] array of local transmitted tuples 300 * @param ic_relevant IcTuple[] array of relevant tuples 301 * @return List of tuples to be stored. If ic_local is null or empty, the values returned may not be correct. The 302 * caller will have to determine if this is the case. 303 */ 304 private List<IcTuple> computeIcStored(final IcTuple[] ic_local, final IcTuple[] ic_relevant) { 305 final List<IcTuple> result = new ArrayList<>(ic_relevant.length); 306 final List<IcTuple> duplicates = new ArrayList<>(ic_relevant.length); 307 final Set<IcTuple> isInResult = new HashSet<>(ic_relevant.length); 308 309 // need to compute: 310 // result = ic_local XOR ic_relevant 311 312 // add ic_local 313 if (ic_local != null) { 314 for (IcTuple element : ic_local) { 315 if (isInResult.add(element)) { 316 result.add(element); 317 } 318 } 319 } 320 321 // add ic_relevant 322 for (IcTuple element : ic_relevant) { 323 if (isInResult.add(element)) { 324 result.add(element); 325 } else { 326 duplicates.add(element); 327 } 328 } 329 330 // eliminate "duplicates" 331 duplicates.forEach(result::remove); 332 333 return result; 334 } 335 336 /** 337 * This performs reading the data from the stream into non-static instance of Segment. After the completion of this 338 * method stream can be freed. 339 * 340 * @param in the input stream to read from 341 * @throws IOException if a problem occurs during reading from the underlying stream 342 * @throws Pack200Exception if a problem occurs with an unexpected value or unsupported codec 343 */ 344 private void readSegment(final InputStream in) throws IOException, Pack200Exception { 345 log(LOG_LEVEL_VERBOSE, "-------"); 346 cpBands = new CpBands(this); 347 cpBands.read(in); 348 attrDefinitionBands = new AttrDefinitionBands(this); 349 attrDefinitionBands.read(in); 350 icBands = new IcBands(this); 351 icBands.read(in); 352 classBands = new ClassBands(this); 353 classBands.read(in); 354 bcBands = new BcBands(this); 355 bcBands.read(in); 356 fileBands = new FileBands(this); 357 fileBands.read(in); 358 359 fileBands.processFileBits(); 360 } 361 362 /** 363 * This performs the actual work of parsing against a non-static instance of Segment. This method is intended to run 364 * concurrently for multiple segments. 365 * 366 * @throws IOException if a problem occurs during reading from the underlying stream 367 * @throws Pack200Exception if a problem occurs with an unexpected value or unsupported codec 368 */ 369 private void parseSegment() throws IOException, Pack200Exception { 370 371 header.unpack(); 372 cpBands.unpack(); 373 attrDefinitionBands.unpack(); 374 icBands.unpack(); 375 classBands.unpack(); 376 bcBands.unpack(); 377 fileBands.unpack(); 378 379 int classNum = 0; 380 final int numberOfFiles = header.getNumberOfFiles(); 381 final String[] fileName = fileBands.getFileName(); 382 final int[] fileOptions = fileBands.getFileOptions(); 383 final SegmentOptions options = header.getOptions(); 384 385 classFilesContents = new byte[numberOfFiles][]; 386 fileDeflate = new boolean[numberOfFiles]; 387 fileIsClass = new boolean[numberOfFiles]; 388 389 final ByteArrayOutputStream bos = new ByteArrayOutputStream(); 390 final DataOutputStream dos = new DataOutputStream(bos); 391 392 for (int i = 0; i < numberOfFiles; i++) { 393 String name = fileName[i]; 394 395 final boolean nameIsEmpty = (name == null) || name.equals(""); 396 final boolean isClass = (fileOptions[i] & 2) == 2 || nameIsEmpty; 397 if (isClass && nameIsEmpty) { 398 name = cpBands.getCpClass()[classBands.getClassThisInts()[classNum]] + ".class"; 399 fileName[i] = name; 400 } 401 402 if (!overrideDeflateHint) { 403 fileDeflate[i] = (fileOptions[i] & 1) == 1 || options.shouldDeflate(); 404 } else { 405 fileDeflate[i] = deflateHint; 406 } 407 408 fileIsClass[i] = isClass; 409 410 if (isClass) { 411 final ClassFile classFile = buildClassFile(classNum); 412 classFile.write(dos); 413 dos.flush(); 414 415 classFilesContents[classNum] = bos.toByteArray(); 416 bos.reset(); 417 418 classNum++; 419 } 420 } 421 } 422 423 /** 424 * Unpacks a packed stream (either .pack. or .pack.gz) into a corresponding JarOuputStream. 425 * 426 * @param in a packed stream. 427 * @param out output stream. 428 * @throws Pack200Exception if there is a problem unpacking 429 * @throws IOException if there is a problem with I/O during unpacking 430 */ 431 public void unpack(final InputStream in, final JarOutputStream out) throws IOException, Pack200Exception { 432 unpackRead(in); 433 unpackProcess(); 434 unpackWrite(out); 435 } 436 437 /* 438 * Package-private accessors for unpacking stages 439 */ 440 void unpackRead(InputStream in) throws IOException, Pack200Exception { 441 if (!in.markSupported()) { 442 in = new BufferedInputStream(in); 443 } 444 445 header = new SegmentHeader(this); 446 header.read(in); 447 448 final int size = (int) header.getArchiveSize() - header.getArchiveSizeOffset(); 449 450 if (doPreRead && header.getArchiveSize() != 0) { 451 final byte[] data = new byte[size]; 452 in.read(data); 453 internalBuffer = new BufferedInputStream(new ByteArrayInputStream(data)); 454 } else { 455 readSegment(in); 456 } 457 } 458 459 void unpackProcess() throws IOException, Pack200Exception { 460 if (internalBuffer != null) { 461 readSegment(internalBuffer); 462 } 463 parseSegment(); 464 } 465 466 void unpackWrite(final JarOutputStream out) throws IOException { 467 writeJar(out); 468 if (logStream != null) { 469 logStream.close(); 470 } 471 } 472 473 /** 474 * Writes the segment to an output stream. The output stream should be pre-buffered for efficiency. Also takes the 475 * same input stream for reading, since the file bits may not be loaded and thus just copied from one stream to 476 * another. Doesn't close the output stream when finished, in case there are more entries (e.g. further segments) to 477 * be written. 478 * 479 * @param out the JarOutputStream to write data to 480 * @throws IOException if an error occurs while reading or writing to the streams 481 */ 482 public void writeJar(final JarOutputStream out) throws IOException { 483 final String[] fileName = fileBands.getFileName(); 484 final int[] fileModtime = fileBands.getFileModtime(); 485 final long[] fileSize = fileBands.getFileSize(); 486 final byte[][] fileBits = fileBands.getFileBits(); 487 488 // now write the files out 489 int classNum = 0; 490 final int numberOfFiles = header.getNumberOfFiles(); 491 final long archiveModtime = header.getArchiveModtime(); 492 493 for (int i = 0; i < numberOfFiles; i++) { 494 final String name = fileName[i]; 495 // For Pack200 archives, modtime is in seconds 496 // from the epoch. JarEntries need it to be in 497 // milliseconds from the epoch. 498 // Even though we're adding two longs and multiplying 499 // by 1000, we won't overflow because both longs are 500 // always under 2^32. 501 final long modtime = 1000 * (archiveModtime + fileModtime[i]); 502 final boolean deflate = fileDeflate[i]; 503 504 final JarEntry entry = new JarEntry(name); 505 if (deflate) { 506 entry.setMethod(ZipEntry.DEFLATED); 507 } else { 508 entry.setMethod(ZipEntry.STORED); 509 final CRC32 crc = new CRC32(); 510 if (fileIsClass[i]) { 511 crc.update(classFilesContents[classNum]); 512 entry.setSize(classFilesContents[classNum].length); 513 } else { 514 crc.update(fileBits[i]); 515 entry.setSize(fileSize[i]); 516 } 517 entry.setCrc(crc.getValue()); 518 } 519 // On Windows at least, need to correct for timezone 520 entry.setTime(modtime - TimeZone.getDefault().getRawOffset()); 521 out.putNextEntry(entry); 522 523 // write to output stream 524 if (fileIsClass[i]) { 525 entry.setSize(classFilesContents[classNum].length); 526 out.write(classFilesContents[classNum]); 527 classNum++; 528 } else { 529 entry.setSize(fileSize[i]); 530 out.write(fileBits[i]); 531 } 532 } 533 } 534 535 public SegmentConstantPool getConstantPool() { 536 return cpBands.getConstantPool(); 537 } 538 539 public SegmentHeader getSegmentHeader() { 540 return header; 541 } 542 543 public void setPreRead(final boolean value) { 544 doPreRead = value; 545 } 546 547 protected AttrDefinitionBands getAttrDefinitionBands() { 548 return attrDefinitionBands; 549 } 550 551 protected ClassBands getClassBands() { 552 return classBands; 553 } 554 555 protected CpBands getCpBands() { 556 return cpBands; 557 } 558 559 protected IcBands getIcBands() { 560 return icBands; 561 } 562 563 public void setLogLevel(final int logLevel) { 564 this.logLevel = logLevel; 565 } 566 567 public void setLogStream(final OutputStream logStream) { 568 this.logStream = new PrintWriter(new OutputStreamWriter(logStream, Charset.defaultCharset()), false); 569 } 570 571 public void log(final int logLevel, final String message) { 572 if (this.logLevel >= logLevel) { 573 logStream.println(message); 574 } 575 } 576 577 /** 578 * Override the archive's deflate hint with the given boolean 579 * 580 * @param deflateHint - the deflate hint to use 581 */ 582 public void overrideDeflateHint(final boolean deflateHint) { 583 this.overrideDeflateHint = true; 584 this.deflateHint = deflateHint; 585 } 586 587}