001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.fs;
020
021 import java.io.BufferedReader;
022 import java.io.File;
023 import java.io.FileNotFoundException;
024 import java.io.IOException;
025 import java.io.InputStreamReader;
026 import java.util.Arrays;
027
028 import org.apache.hadoop.util.Shell;
029
030 /**
031 * Class for creating hardlinks.
032 * Supports Unix/Linux, Windows via winutils , and Mac OS X.
033 *
034 * The HardLink class was formerly a static inner class of FSUtil,
035 * and the methods provided were blatantly non-thread-safe.
036 * To enable volume-parallel Update snapshots, we now provide static
037 * threadsafe methods that allocate new buffer string arrays
038 * upon each call. We also provide an API to hardlink all files in a
039 * directory with a single command, which is up to 128 times more
040 * efficient - and minimizes the impact of the extra buffer creations.
041 */
042 public class HardLink {
043
044 private static HardLinkCommandGetter getHardLinkCommand;
045
046 public final LinkStats linkStats; //not static
047
048 //initialize the command "getters" statically, so can use their
049 //methods without instantiating the HardLink object
050 static {
051 if (Shell.WINDOWS) {
052 // Windows
053 getHardLinkCommand = new HardLinkCGWin();
054 } else {
055 // Unix or Linux
056 getHardLinkCommand = new HardLinkCGUnix();
057 //override getLinkCountCommand for the particular Unix variant
058 //Linux is already set as the default - {"stat","-c%h", null}
059 if (Shell.MAC || Shell.FREEBSD) {
060 String[] linkCountCmdTemplate = {"/usr/bin/stat","-f%l", null};
061 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
062 } else if (Shell.SOLARIS) {
063 String[] linkCountCmdTemplate = {"ls","-l", null};
064 HardLinkCGUnix.setLinkCountCmdTemplate(linkCountCmdTemplate);
065 }
066 }
067 }
068
069 public HardLink() {
070 linkStats = new LinkStats();
071 }
072
073 /**
074 * This abstract class bridges the OS-dependent implementations of the
075 * needed functionality for creating hardlinks and querying link counts.
076 * The particular implementation class is chosen during
077 * static initialization phase of the HardLink class.
078 * The "getter" methods construct shell command strings for various purposes.
079 */
080 private static abstract class HardLinkCommandGetter {
081
082 /**
083 * Get the command string needed to hardlink a bunch of files from
084 * a single source directory into a target directory. The source directory
085 * is not specified here, but the command will be executed using the source
086 * directory as the "current working directory" of the shell invocation.
087 *
088 * @param fileBaseNames - array of path-less file names, relative
089 * to the source directory
090 * @param linkDir - target directory where the hardlinks will be put
091 * @return - an array of Strings suitable for use as a single shell command
092 * with {@link Runtime.exec()}
093 * @throws IOException - if any of the file or path names misbehave
094 */
095 abstract String[] linkMult(String[] fileBaseNames, File linkDir)
096 throws IOException;
097
098 /**
099 * Get the command string needed to hardlink a single file
100 */
101 abstract String[] linkOne(File file, File linkName) throws IOException;
102
103 /**
104 * Get the command string to query the hardlink count of a file
105 */
106 abstract String[] linkCount(File file) throws IOException;
107
108 /**
109 * Calculate the total string length of the shell command
110 * resulting from execution of linkMult, plus the length of the
111 * source directory name (which will also be provided to the shell)
112 *
113 * @param fileDir - source directory, parent of fileBaseNames
114 * @param fileBaseNames - array of path-less file names, relative
115 * to the source directory
116 * @param linkDir - target directory where the hardlinks will be put
117 * @return - total data length (must not exceed maxAllowedCmdArgLength)
118 * @throws IOException
119 */
120 abstract int getLinkMultArgLength(
121 File fileDir, String[] fileBaseNames, File linkDir)
122 throws IOException;
123
124 /**
125 * Get the maximum allowed string length of a shell command on this OS,
126 * which is just the documented minimum guaranteed supported command
127 * length - aprx. 32KB for Unix, and 8KB for Windows.
128 */
129 abstract int getMaxAllowedCmdArgLength();
130 }
131
132 /**
133 * Implementation of HardLinkCommandGetter class for Unix
134 */
135 static class HardLinkCGUnix extends HardLinkCommandGetter {
136 private static String[] hardLinkCommand = {"ln", null, null};
137 private static String[] hardLinkMultPrefix = {"ln"};
138 private static String[] hardLinkMultSuffix = {null};
139 private static String[] getLinkCountCommand = {"stat","-c%h", null};
140 //Unix guarantees at least 32K bytes cmd length.
141 //Subtract another 64b to allow for Java 'exec' overhead
142 private static final int maxAllowedCmdArgLength = 32*1024 - 65;
143
144 private static synchronized
145 void setLinkCountCmdTemplate(String[] template) {
146 //May update this for specific unix variants,
147 //after static initialization phase
148 getLinkCountCommand = template;
149 }
150
151 /*
152 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
153 */
154 @Override
155 String[] linkOne(File file, File linkName)
156 throws IOException {
157 String[] buf = new String[hardLinkCommand.length];
158 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
159 //unix wants argument order: "ln <existing> <new>"
160 buf[1] = FileUtil.makeShellPath(file, true);
161 buf[2] = FileUtil.makeShellPath(linkName, true);
162 return buf;
163 }
164
165 /*
166 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
167 */
168 @Override
169 String[] linkMult(String[] fileBaseNames, File linkDir)
170 throws IOException {
171 String[] buf = new String[fileBaseNames.length
172 + hardLinkMultPrefix.length
173 + hardLinkMultSuffix.length];
174 int mark=0;
175 System.arraycopy(hardLinkMultPrefix, 0, buf, mark,
176 hardLinkMultPrefix.length);
177 mark += hardLinkMultPrefix.length;
178 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
179 mark += fileBaseNames.length;
180 buf[mark] = FileUtil.makeShellPath(linkDir, true);
181 return buf;
182 }
183
184 /*
185 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
186 */
187 @Override
188 String[] linkCount(File file)
189 throws IOException {
190 String[] buf = new String[getLinkCountCommand.length];
191 System.arraycopy(getLinkCountCommand, 0, buf, 0,
192 getLinkCountCommand.length);
193 buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
194 return buf;
195 }
196
197 /*
198 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
199 */
200 @Override
201 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir)
202 throws IOException{
203 int sum = 0;
204 for (String x : fileBaseNames) {
205 // add 1 to account for terminal null or delimiter space
206 sum += 1 + ((x == null) ? 0 : x.length());
207 }
208 sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
209 + FileUtil.makeShellPath(linkDir, true).length();
210 //add the fixed overhead of the hardLinkMult prefix and suffix
211 sum += 3; //length("ln") + 1
212 return sum;
213 }
214
215 /*
216 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
217 */
218 @Override
219 int getMaxAllowedCmdArgLength() {
220 return maxAllowedCmdArgLength;
221 }
222 }
223
224
225 /**
226 * Implementation of HardLinkCommandGetter class for Windows
227 */
228 static class HardLinkCGWin extends HardLinkCommandGetter {
229 //The Windows command getter impl class and its member fields are
230 //package-private ("default") access instead of "private" to assist
231 //unit testing (sort of) on non-Win servers
232
233 static String[] hardLinkCommand = {
234 Shell.WINUTILS,"hardlink","create", null, null};
235 static String[] hardLinkMultPrefix = {
236 "cmd","/q","/c","for", "%f", "in", "("};
237 static String hardLinkMultDir = "\\%f";
238 static String[] hardLinkMultSuffix = {
239 ")", "do", Shell.WINUTILS, "hardlink", "create", null,
240 "%f", "1>NUL"};
241 static String[] getLinkCountCommand = {
242 Shell.WINUTILS, "hardlink",
243 "stat", null};
244 //Windows guarantees only 8K - 1 bytes cmd length.
245 //Subtract another 64b to allow for Java 'exec' overhead
246 static final int maxAllowedCmdArgLength = 8*1024 - 65;
247
248 /*
249 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
250 */
251 @Override
252 String[] linkOne(File file, File linkName)
253 throws IOException {
254 String[] buf = new String[hardLinkCommand.length];
255 System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
256 //windows wants argument order: "create <new> <existing>"
257 buf[4] = file.getCanonicalPath();
258 buf[3] = linkName.getCanonicalPath();
259 return buf;
260 }
261
262 /*
263 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
264 */
265 @Override
266 String[] linkMult(String[] fileBaseNames, File linkDir)
267 throws IOException {
268 String[] buf = new String[fileBaseNames.length
269 + hardLinkMultPrefix.length
270 + hardLinkMultSuffix.length];
271 String td = linkDir.getCanonicalPath() + hardLinkMultDir;
272 int mark=0;
273 System.arraycopy(hardLinkMultPrefix, 0, buf, mark,
274 hardLinkMultPrefix.length);
275 mark += hardLinkMultPrefix.length;
276 System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
277 mark += fileBaseNames.length;
278 System.arraycopy(hardLinkMultSuffix, 0, buf, mark,
279 hardLinkMultSuffix.length);
280 mark += hardLinkMultSuffix.length;
281 buf[mark - 3] = td;
282 return buf;
283 }
284
285 /*
286 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
287 */
288 @Override
289 String[] linkCount(File file)
290 throws IOException {
291 String[] buf = new String[getLinkCountCommand.length];
292 System.arraycopy(getLinkCountCommand, 0, buf, 0,
293 getLinkCountCommand.length);
294 buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
295 return buf;
296 }
297
298 /*
299 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
300 */
301 @Override
302 int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir)
303 throws IOException {
304 int sum = 0;
305 for (String x : fileBaseNames) {
306 // add 1 to account for terminal null or delimiter space
307 sum += 1 + ((x == null) ? 0 : x.length());
308 }
309 sum += 2 + fileDir.getCanonicalPath().length() +
310 linkDir.getCanonicalPath().length();
311 //add the fixed overhead of the hardLinkMult command
312 //(prefix, suffix, and Dir suffix)
313 sum += ("cmd.exe /q /c for %f in ( ) do "
314 + Shell.WINUTILS + " hardlink create \\%f %f 1>NUL ").length();
315 return sum;
316 }
317
318 /*
319 * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
320 */
321 @Override
322 int getMaxAllowedCmdArgLength() {
323 return maxAllowedCmdArgLength;
324 }
325 }
326
327
328 /**
329 * Calculate the nominal length of all contributors to the total
330 * commandstring length, including fixed overhead of the OS-dependent
331 * command. It's protected rather than private, to assist unit testing,
332 * but real clients are not expected to need it -- see the way
333 * createHardLinkMult() uses it internally so the user doesn't need to worry
334 * about it.
335 *
336 * @param fileDir - source directory, parent of fileBaseNames
337 * @param fileBaseNames - array of path-less file names, relative
338 * to the source directory
339 * @param linkDir - target directory where the hardlinks will be put
340 * @return - total data length (must not exceed maxAllowedCmdArgLength)
341 * @throws IOException
342 */
343 protected static int getLinkMultArgLength(
344 File fileDir, String[] fileBaseNames, File linkDir)
345 throws IOException {
346 return getHardLinkCommand.getLinkMultArgLength(fileDir,
347 fileBaseNames, linkDir);
348 }
349
350 /**
351 * Return this private value for use by unit tests.
352 * Shell commands are not allowed to have a total string length
353 * exceeding this size.
354 */
355 protected static int getMaxAllowedCmdArgLength() {
356 return getHardLinkCommand.getMaxAllowedCmdArgLength();
357 }
358
359 /*
360 * ****************************************************
361 * Complexity is above. User-visible functionality is below
362 * ****************************************************
363 */
364
365 /**
366 * Creates a hardlink
367 * @param file - existing source file
368 * @param linkName - desired target link file
369 */
370 public static void createHardLink(File file, File linkName)
371 throws IOException {
372 if (file == null) {
373 throw new IOException(
374 "invalid arguments to createHardLink: source file is null");
375 }
376 if (linkName == null) {
377 throw new IOException(
378 "invalid arguments to createHardLink: link name is null");
379 }
380 // construct and execute shell command
381 String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
382 Process process = Runtime.getRuntime().exec(hardLinkCommand);
383 try {
384 if (process.waitFor() != 0) {
385 String errMsg = new BufferedReader(new InputStreamReader(
386 process.getInputStream())).readLine();
387 if (errMsg == null) errMsg = "";
388 String inpMsg = new BufferedReader(new InputStreamReader(
389 process.getErrorStream())).readLine();
390 if (inpMsg == null) inpMsg = "";
391 throw new IOException(errMsg + inpMsg);
392 }
393 } catch (InterruptedException e) {
394 throw new IOException(e);
395 } finally {
396 process.destroy();
397 }
398 }
399
400 /**
401 * Creates hardlinks from multiple existing files within one parent
402 * directory, into one target directory.
403 * @param parentDir - directory containing source files
404 * @param fileBaseNames - list of path-less file names, as returned by
405 * parentDir.list()
406 * @param linkDir - where the hardlinks should be put. It must already exist.
407 *
408 * If the list of files is too long (overflows maxAllowedCmdArgLength),
409 * we will automatically split it into multiple invocations of the
410 * underlying method.
411 */
412 public static void createHardLinkMult(File parentDir, String[] fileBaseNames,
413 File linkDir) throws IOException {
414 //This is the public method all non-test clients are expected to use.
415 //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
416 createHardLinkMult(parentDir, fileBaseNames, linkDir,
417 getHardLinkCommand.getMaxAllowedCmdArgLength());
418 }
419
420 /*
421 * Implements {@link createHardLinkMult} with added variable "maxLength",
422 * to ease unit testing of the auto-splitting feature for long lists.
423 * Likewise why it returns "callCount", the number of sub-arrays that
424 * the file list had to be split into.
425 * Non-test clients are expected to call the public method instead.
426 */
427 protected static int createHardLinkMult(File parentDir,
428 String[] fileBaseNames, File linkDir, int maxLength)
429 throws IOException {
430 if (parentDir == null) {
431 throw new IOException(
432 "invalid arguments to createHardLinkMult: parent directory is null");
433 }
434 if (linkDir == null) {
435 throw new IOException(
436 "invalid arguments to createHardLinkMult: link directory is null");
437 }
438 if (fileBaseNames == null) {
439 throw new IOException(
440 "invalid arguments to createHardLinkMult: "
441 + "filename list can be empty but not null");
442 }
443 if (fileBaseNames.length == 0) {
444 //the OS cmds can't handle empty list of filenames,
445 //but it's legal, so just return.
446 return 0;
447 }
448 if (!linkDir.exists()) {
449 throw new FileNotFoundException(linkDir + " not found.");
450 }
451
452 //if the list is too long, split into multiple invocations
453 int callCount = 0;
454 if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
455 && fileBaseNames.length > 1) {
456 String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
457 callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
458 String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
459 fileBaseNames.length);
460 callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);
461 return callCount;
462 } else {
463 callCount = 1;
464 }
465
466 // construct and execute shell command
467 String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames,
468 linkDir);
469 Process process = Runtime.getRuntime().exec(hardLinkCommand, null,
470 parentDir);
471 try {
472 if (process.waitFor() != 0) {
473 String errMsg = new BufferedReader(new InputStreamReader(
474 process.getInputStream())).readLine();
475 if (errMsg == null) errMsg = "";
476 String inpMsg = new BufferedReader(new InputStreamReader(
477 process.getErrorStream())).readLine();
478 if (inpMsg == null) inpMsg = "";
479 throw new IOException(errMsg + inpMsg);
480 }
481 } catch (InterruptedException e) {
482 throw new IOException(e);
483 } finally {
484 process.destroy();
485 }
486 return callCount;
487 }
488
489 /**
490 * Retrieves the number of links to the specified file.
491 */
492 public static int getLinkCount(File fileName) throws IOException {
493 if (fileName == null) {
494 throw new IOException(
495 "invalid argument to getLinkCount: file name is null");
496 }
497 if (!fileName.exists()) {
498 throw new FileNotFoundException(fileName + " not found.");
499 }
500
501 // construct and execute shell command
502 String[] cmd = getHardLinkCommand.linkCount(fileName);
503 String inpMsg = null;
504 String errMsg = null;
505 int exitValue = -1;
506 BufferedReader in = null;
507 BufferedReader err = null;
508
509 Process process = Runtime.getRuntime().exec(cmd);
510 try {
511 exitValue = process.waitFor();
512 in = new BufferedReader(new InputStreamReader(
513 process.getInputStream()));
514 inpMsg = in.readLine();
515 err = new BufferedReader(new InputStreamReader(
516 process.getErrorStream()));
517 errMsg = err.readLine();
518 if (inpMsg == null || exitValue != 0) {
519 throw createIOException(fileName, inpMsg, errMsg, exitValue, null);
520 }
521 if (Shell.SOLARIS) {
522 String[] result = inpMsg.split("\\s+");
523 return Integer.parseInt(result[1]);
524 } else {
525 return Integer.parseInt(inpMsg);
526 }
527 } catch (NumberFormatException e) {
528 throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
529 } catch (InterruptedException e) {
530 throw createIOException(fileName, inpMsg, errMsg, exitValue, e);
531 } finally {
532 process.destroy();
533 if (in != null) in.close();
534 if (err != null) err.close();
535 }
536 }
537
538 /* Create an IOException for failing to get link count. */
539 private static IOException createIOException(File f, String message,
540 String error, int exitvalue, Exception cause) {
541
542 final String s = "Failed to get link count on file " + f
543 + ": message=" + message
544 + "; error=" + error
545 + "; exit value=" + exitvalue;
546 return (cause == null) ? new IOException(s) : new IOException(s, cause);
547 }
548
549
550 /**
551 * HardLink statistics counters and methods.
552 * Not multi-thread safe, obviously.
553 * Init is called during HardLink instantiation, above.
554 *
555 * These are intended for use by knowledgeable clients, not internally,
556 * because many of the internal methods are static and can't update these
557 * per-instance counters.
558 */
559 public static class LinkStats {
560 public int countDirs = 0;
561 public int countSingleLinks = 0;
562 public int countMultLinks = 0;
563 public int countFilesMultLinks = 0;
564 public int countEmptyDirs = 0;
565 public int countPhysicalFileCopies = 0;
566
567 public void clear() {
568 countDirs = 0;
569 countSingleLinks = 0;
570 countMultLinks = 0;
571 countFilesMultLinks = 0;
572 countEmptyDirs = 0;
573 countPhysicalFileCopies = 0;
574 }
575
576 public String report() {
577 return "HardLinkStats: " + countDirs + " Directories, including "
578 + countEmptyDirs + " Empty Directories, "
579 + countSingleLinks
580 + " single Link operations, " + countMultLinks
581 + " multi-Link operations, linking " + countFilesMultLinks
582 + " files, total " + (countSingleLinks + countFilesMultLinks)
583 + " linkable files. Also physically copied "
584 + countPhysicalFileCopies + " other files.";
585 }
586 }
587 }
588