1 | /******************************************************************************* |
2 | * Copyright (c) 2008, 2009 IBM Corporation and others. |
3 | * All rights reserved. This program and the accompanying materials |
4 | * are made available under the terms of the Eclipse Public License v1.0 |
5 | * which accompanies this distribution, and is available at |
6 | * http://www.eclipse.org/legal/epl-v10.html |
7 | * |
8 | * Contributors: |
9 | * IBM Corporation - initial API and implementation |
10 | * Remy Chi Jian Suen <remy.suen@gmail.com> - Bug 243347 TarFile should not throw NPE in finalize() |
11 | *******************************************************************************/ |
12 | package org.eclipse.pde.api.tools.internal.util; |
13 | |
14 | import java.io.File; |
15 | import java.io.FileInputStream; |
16 | import java.io.FilterInputStream; |
17 | import java.io.IOException; |
18 | import java.io.InputStream; |
19 | import java.util.Enumeration; |
20 | import java.util.zip.GZIPInputStream; |
21 | /** |
22 | * Reads a .tar or .tar.gz archive file, providing an index enumeration |
23 | * and allows for accessing an InputStream for arbitrary files in the |
24 | * archive. |
25 | */ |
26 | public class TarFile { |
27 | private static class TarInputStream extends FilterInputStream { |
28 | private int nextEntry = 0; |
29 | private int nextEOF = 0; |
30 | private int filepos = 0; |
31 | private int bytesread = 0; |
32 | private TarEntry firstEntry = null; |
33 | private String longLinkName = null; |
34 | |
35 | /** |
36 | * Creates a new tar input stream on the given input stream. |
37 | * |
38 | * @param in input stream |
39 | * @throws TarException |
40 | * @throws IOException |
41 | */ |
42 | public TarInputStream(InputStream in) throws TarException, IOException { |
43 | super(in); |
44 | |
45 | // Read in the first TarEntry to make sure |
46 | // the input is a valid tar file stream. |
47 | firstEntry = getNextEntry(); |
48 | } |
49 | |
50 | /** |
51 | * Create a new tar input stream, skipping ahead to the given entry |
52 | * in the file. |
53 | * |
54 | * @param in input stream |
55 | * @param entry skips to this entry in the file |
56 | * @throws TarException |
57 | * @throws IOException |
58 | */ |
59 | TarInputStream(InputStream in, TarEntry entry) throws TarException, IOException { |
60 | super(in); |
61 | skipToEntry(entry); |
62 | } |
63 | |
64 | /** |
65 | * The checksum of a tar file header is simply the sum of the bytes in |
66 | * the header. |
67 | * |
68 | * @param header |
69 | * @return checksum |
70 | */ |
71 | private long headerChecksum(byte[] header) { |
72 | long sum = 0; |
73 | for(int i = 0; i < 512; i++) { |
74 | sum += header[i] & 0xff; |
75 | } |
76 | return sum; |
77 | } |
78 | |
79 | /** |
80 | * Skips ahead to the position of the given entry in the file. |
81 | * |
82 | * @param entry |
83 | * @returns false if the entry has already been passed |
84 | * @throws TarException |
85 | * @throws IOException |
86 | */ |
87 | boolean skipToEntry(TarEntry entry) throws TarException, IOException { |
88 | int bytestoskip = entry.filepos - bytesread; |
89 | if(bytestoskip < 0) { |
90 | return false; |
91 | } |
92 | while(bytestoskip > 0) { |
93 | long ret = in.skip(bytestoskip); |
94 | if(ret < 0) { |
95 | throw new IOException("early end of stream"); //$NON-NLS-1$ |
96 | } |
97 | bytestoskip -= ret; |
98 | bytesread += ret; |
99 | } |
100 | filepos = entry.filepos; |
101 | nextEntry = 0; |
102 | nextEOF = 0; |
103 | // Read next header to seek to file data. |
104 | getNextEntry(); |
105 | return true; |
106 | } |
107 | |
108 | /** |
109 | * Returns true if the header checksum is correct. |
110 | * |
111 | * @param header |
112 | * @return true if this header has a valid checksum |
113 | */ |
114 | private boolean isValidTarHeader(byte[] header) { |
115 | long fileChecksum, calculatedChecksum; |
116 | int pos, i; |
117 | |
118 | pos = 148; |
119 | StringBuffer checksumString = new StringBuffer(); |
120 | for(i = 0; i < 8; i++) { |
121 | if(header[pos + i] == ' ') { |
122 | continue; |
123 | } |
124 | if(header[pos + i] == 0 || !Character.isDigit((char) header[pos + i])) { |
125 | break; |
126 | } |
127 | checksumString.append((char) header[pos + i]); |
128 | } |
129 | if(checksumString.length() == 0) { |
130 | return false; |
131 | } |
132 | if(checksumString.charAt(0) != '0') { |
133 | checksumString.insert(0, '0'); |
134 | } |
135 | try { |
136 | fileChecksum = Long.decode(checksumString.toString()).longValue(); |
137 | } catch(NumberFormatException exception) { |
138 | //This is not valid if it cannot be parsed |
139 | return false; |
140 | } |
141 | |
142 | // Blank out the checksum. |
143 | for(i = 0; i < 8; i++) { |
144 | header[pos + i] = ' '; |
145 | } |
146 | calculatedChecksum = headerChecksum(header); |
147 | |
148 | return (fileChecksum == calculatedChecksum); |
149 | } |
150 | |
151 | /** |
152 | * Returns the next entry in the tar file. Does not handle |
153 | * GNU @LongLink extensions. |
154 | * |
155 | * @return the next entry in the tar file |
156 | * @throws TarException |
157 | * @throws IOException |
158 | */ |
159 | TarEntry getNextEntryInternal() throws TarException, IOException { |
160 | byte[] header = new byte[512]; |
161 | int pos = 0; |
162 | int i; |
163 | |
164 | if(firstEntry != null) { |
165 | TarEntry entryReturn = firstEntry; |
166 | firstEntry = null; |
167 | return entryReturn; |
168 | } |
169 | |
170 | while(nextEntry > 0) { |
171 | long ret = in.skip(nextEntry); |
172 | if(ret < 0) { |
173 | throw new IOException("early end of stream"); //$NON-NLS-1$ |
174 | } |
175 | nextEntry -= ret; |
176 | bytesread += ret; |
177 | } |
178 | |
179 | int bytestoread = 512; |
180 | while(bytestoread > 0) { |
181 | int ret = super.read(header, 512 - bytestoread, bytestoread); |
182 | if( ret < 0 ) { |
183 | throw new IOException("early end of stream"); //$NON-NLS-1$ |
184 | } |
185 | bytestoread -= ret; |
186 | bytesread += ret; |
187 | } |
188 | |
189 | // If we have a header of all zeros, this marks the end of the file. |
190 | if(headerChecksum(header) == 0) { |
191 | // We are at the end of the file. |
192 | if(filepos > 0) { |
193 | return null; |
194 | } |
195 | |
196 | // Invalid stream. |
197 | throw new TarException("not in tar format"); //$NON-NLS-1$ |
198 | } |
199 | |
200 | // Validate checksum. |
201 | if(!isValidTarHeader(header)) { |
202 | throw new TarException("not in tar format"); //$NON-NLS-1$ |
203 | } |
204 | |
205 | while (pos < 100 && header[pos] != 0) { |
206 | pos++; |
207 | } |
208 | String name = new String(header, 0, pos, "UTF8"); //$NON-NLS-1$ |
209 | // Prepend the prefix here. |
210 | pos = 345; |
211 | if(header[pos] != 0) { |
212 | while (pos < 500 && header[pos] != 0) { |
213 | pos++; |
214 | } |
215 | String prefix = new String(header, 345, pos - 345, "UTF8"); //$NON-NLS-1$ |
216 | name = prefix + "/" + name; //$NON-NLS-1$ |
217 | } |
218 | |
219 | TarEntry entry; |
220 | if(longLinkName != null) { |
221 | entry = new TarEntry(longLinkName, filepos); |
222 | longLinkName = null; |
223 | } else { |
224 | entry = new TarEntry(name, filepos); |
225 | } |
226 | if(header[156] != 0) { |
227 | entry.setFileType(header[156]); |
228 | } |
229 | |
230 | pos = 100; |
231 | StringBuffer mode = new StringBuffer(); |
232 | for(i = 0; i < 8; i++) { |
233 | if(header[pos + i] == 0) { |
234 | break; |
235 | } |
236 | if(header[pos + i] == ' ') { |
237 | continue; |
238 | } |
239 | mode.append((char) header[pos + i]); |
240 | } |
241 | if(mode.length() > 0 && mode.charAt(0) != '0') { |
242 | mode.insert(0, '0'); |
243 | } |
244 | try { |
245 | long fileMode = Long.decode(mode.toString()).longValue(); |
246 | entry.setMode(fileMode); |
247 | } catch(NumberFormatException nfe) { |
248 | throw new TarException("Not a valid tar format", nfe); //$NON-NLS-1$ |
249 | } |
250 | |
251 | pos = 100 + 24; |
252 | StringBuffer size = new StringBuffer(); |
253 | for(i = 0; i < 12; i++) { |
254 | if(header[pos + i] == 0) { |
255 | break; |
256 | } |
257 | if(header[pos + i] == ' ') { |
258 | continue; |
259 | } |
260 | size.append((char) header[pos + i]); |
261 | } |
262 | if(size.charAt(0) != '0') { |
263 | size.insert(0, '0'); |
264 | } |
265 | int fileSize; |
266 | try { |
267 | fileSize = Integer.decode(size.toString()).intValue(); |
268 | } catch(NumberFormatException nfe) { |
269 | throw new TarException("Not a valid tar format", nfe); //$NON-NLS-1$ |
270 | } |
271 | |
272 | entry.setSize(fileSize); |
273 | nextEOF = fileSize; |
274 | if(fileSize % 512 > 0) { |
275 | nextEntry = fileSize + (512 - (fileSize % 512)); |
276 | } else { |
277 | nextEntry = fileSize; |
278 | } |
279 | filepos += (nextEntry + 512); |
280 | return entry; |
281 | } |
282 | |
283 | /** |
284 | * Moves ahead to the next file in the tar archive and returns |
285 | * a TarEntry object describing it. |
286 | * |
287 | * @return the next entry in the tar file |
288 | * @throws TarException |
289 | * @throws IOException |
290 | */ |
291 | public TarEntry getNextEntry() throws TarException, IOException { |
292 | TarEntry entry = getNextEntryInternal(); |
293 | |
294 | if(entry != null && entry.getName().equals("././@LongLink")) { //$NON-NLS-1$ |
295 | // This is a GNU extension for doing long filenames. |
296 | // We get a file called ././@LongLink which just contains |
297 | // the real pathname. |
298 | byte[] longNameData = new byte[(int) entry.getSize()]; |
299 | int bytesread = 0; |
300 | while (bytesread < longNameData.length) { |
301 | int cur = read(longNameData, bytesread, longNameData.length - bytesread); |
302 | if (cur < 0) { |
303 | throw new IOException("early end of stream"); //$NON-NLS-1$ |
304 | } |
305 | bytesread += cur; |
306 | } |
307 | |
308 | int pos = 0; |
309 | while (pos < longNameData.length && longNameData[pos] != 0) { |
310 | pos++; |
311 | } |
312 | longLinkName = new String(longNameData, 0, pos, "UTF8"); //$NON-NLS-1$ |
313 | return getNextEntryInternal(); |
314 | } |
315 | return entry; |
316 | } |
317 | |
318 | /* (non-Javadoc) |
319 | * @see java.io.FilterInputStream#read(byte[], int, int) |
320 | */ |
321 | public int read(byte[] b, int off, int len) throws IOException { |
322 | if(nextEOF == 0) { |
323 | return -1; |
324 | } |
325 | int size = super.read(b, off, (len > nextEOF ? nextEOF : len)); |
326 | nextEntry -= size; |
327 | nextEOF -= size; |
328 | bytesread += size; |
329 | return size; |
330 | } |
331 | |
332 | /* (non-Javadoc) |
333 | * @see java.io.FilterInputStream#read() |
334 | */ |
335 | public int read() throws IOException { |
336 | byte[] data = new byte[1]; |
337 | int size = read(data, 0, 1); |
338 | if (size < 0) { |
339 | return size; |
340 | } |
341 | return data[0]; |
342 | } |
343 | } |
344 | private File file; |
345 | TarInputStream entryEnumerationStream; |
346 | TarEntry curEntry; |
347 | private TarInputStream entryStream; |
348 | |
349 | private InputStream internalEntryStream; |
350 | |
351 | /** |
352 | * Create a new TarFile for the given file. |
353 | * |
354 | * @param file |
355 | * @throws TarException |
356 | * @throws IOException |
357 | */ |
358 | public TarFile(File file) throws TarException, IOException { |
359 | this.file = file; |
360 | |
361 | InputStream in = new FileInputStream(file); |
362 | // First, check if it's a GZIPInputStream. |
363 | try { |
364 | in = new GZIPInputStream(in); |
365 | } catch(IOException e) { |
366 | //If it is not compressed we close |
367 | //the old one and recreate |
368 | in.close(); |
369 | in = new FileInputStream(file); |
370 | } |
371 | try { |
372 | entryEnumerationStream = new TarInputStream(in); |
373 | } catch (TarException ex) { |
374 | in.close(); |
375 | throw ex; |
376 | } |
377 | curEntry = entryEnumerationStream.getNextEntry(); |
378 | } |
379 | |
380 | /** |
381 | * Close the tar file input stream. |
382 | * |
383 | * @throws IOException if the file cannot be successfully closed |
384 | */ |
385 | public void close() throws IOException { |
386 | if (entryEnumerationStream != null) |
387 | entryEnumerationStream.close(); |
388 | if (internalEntryStream != null) |
389 | internalEntryStream.close(); |
390 | } |
391 | |
392 | /** |
393 | * Create a new TarFile for the given path name. |
394 | * |
395 | * @param filename |
396 | * @throws TarException |
397 | * @throws IOException |
398 | */ |
399 | public TarFile(String filename) throws TarException, IOException { |
400 | this(new File(filename)); |
401 | } |
402 | |
403 | /** |
404 | * Returns an enumeration cataloguing the tar archive. |
405 | * |
406 | * @return enumeration of all files in the archive |
407 | */ |
408 | public Enumeration entries() { |
409 | return new Enumeration() { |
410 | public boolean hasMoreElements() { |
411 | return (curEntry != null); |
412 | } |
413 | |
414 | public Object nextElement() { |
415 | TarEntry oldEntry = curEntry; |
416 | try { |
417 | curEntry = entryEnumerationStream.getNextEntry(); |
418 | } catch(TarException e) { |
419 | curEntry = null; |
420 | } catch(IOException e) { |
421 | curEntry = null; |
422 | } |
423 | return oldEntry; |
424 | } |
425 | }; |
426 | } |
427 | |
428 | /** |
429 | * Returns a new InputStream for the given file in the tar archive. |
430 | * |
431 | * @param entry |
432 | * @return an input stream for the given file |
433 | * @throws TarException |
434 | * @throws IOException |
435 | */ |
436 | public InputStream getInputStream(TarEntry entry) throws TarException, IOException { |
437 | if(entryStream == null || !entryStream.skipToEntry(entry)) { |
438 | if (internalEntryStream != null) { |
439 | internalEntryStream.close(); |
440 | } |
441 | internalEntryStream = new FileInputStream(file); |
442 | // First, check if it's a GZIPInputStream. |
443 | try { |
444 | internalEntryStream = new GZIPInputStream(internalEntryStream); |
445 | } catch(IOException e) { |
446 | //If it is not compressed we close |
447 | //the old one and recreate |
448 | internalEntryStream.close(); |
449 | internalEntryStream = new FileInputStream(file); |
450 | } |
451 | entryStream = new TarInputStream(internalEntryStream, entry) { |
452 | public void close() { |
453 | // Ignore close() since we want to reuse the stream. |
454 | } |
455 | }; |
456 | } |
457 | return entryStream; |
458 | } |
459 | |
460 | /** |
461 | * Returns the path name of the file this archive represents. |
462 | * |
463 | * @return path |
464 | */ |
465 | public String getName() { |
466 | return file.getPath(); |
467 | } |
468 | |
469 | /* (non-Javadoc) |
470 | * @see java.util.zip.ZipFile#finalize() |
471 | * |
472 | */ |
473 | protected void finalize() throws Throwable { |
474 | close(); |
475 | } |
476 | } |