/* find -- search for files in a directory hierarchy (fts version)
Copyright (C) 1990-2022 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
/* This file was written by James Youngman, based on oldfind.c.
GNU find was written by Eric Decker ,
with enhancements by David MacKenzie ,
Jay Plett ,
and Tim Wood .
The idea for -print0 and xargs -0 came from
Dan Bernstein .
*/
/* config.h must always be included first. */
#include
/* system headers. */
#include
#include
#include
#include
#include
#include
/* gnulib headers. */
#include "argv-iter.h"
#include "cloexec.h"
#include "closeout.h"
#include "error.h"
#include "fts_.h"
#include "intprops.h"
#include "progname.h"
#include "quotearg.h"
#include "same-inode.h"
#include "save-cwd.h"
#include "xgetcwd.h"
#include "xalloc.h"
/* find headers. */
#include "defs.h"
#include "die.h"
#include "dircallback.h"
#include "fdleak.h"
#include "unused-result.h"
#include "system.h"
#undef STAT_MOUNTPOINTS
/* FTS_TIGHT_CYCLE_CHECK tries to work around Savannah bug #17877
* (but actually using it doesn't fix the bug).
*/
static int ftsoptions = FTS_NOSTAT|FTS_TIGHT_CYCLE_CHECK|FTS_CWDFD|FTS_VERBATIM;
static int prev_depth = INT_MIN; /* fts_level can be < 0 */
static int curr_fd = -1;
static bool find (char *arg) __attribute_warn_unused_result__;
static bool process_all_startpoints (int argc, char *argv[]) __attribute_warn_unused_result__;
static void
left_dir (void)
{
if (ftsoptions & FTS_CWDFD)
{
if (curr_fd >= 0)
{
close (curr_fd);
curr_fd = -1;
}
}
else
{
/* do nothing. */
}
}
/*
* Signal that we are now inside a directory pointed to by dir_fd.
* The caller can't tell if this is the first time this happens, so
* we have to be careful not to call dup() more than once
*/
static void
inside_dir (int dir_fd)
{
if (ftsoptions & FTS_CWDFD)
{
assert (dir_fd == AT_FDCWD || dir_fd >= 0);
state.cwd_dir_fd = dir_fd;
if (curr_fd < 0)
{
if (AT_FDCWD == dir_fd)
{
curr_fd = AT_FDCWD;
}
else if (dir_fd >= 0)
{
curr_fd = dup_cloexec (dir_fd);
}
else
{
/* curr_fd is invalid, but dir_fd is also invalid.
* This should not have happened.
*/
assert (curr_fd >= 0 || dir_fd >= 0);
}
}
}
else
{
/* FTS_CWDFD is not in use. We can always assume that
* AT_FDCWD refers to the directory we are currently searching.
*
* Therefore there is nothing to do.
*/
}
}
#ifdef STAT_MOUNTPOINTS
static void init_mounted_dev_list (void);
#endif
#define HANDLECASE(N) case N: return #N;
static const char *
get_fts_info_name (int info)
{
static char buf[1 + INT_BUFSIZE_BOUND (info) + 1];
switch (info)
{
HANDLECASE(FTS_D);
HANDLECASE(FTS_DC);
HANDLECASE(FTS_DEFAULT);
HANDLECASE(FTS_DNR);
HANDLECASE(FTS_DOT);
HANDLECASE(FTS_DP);
HANDLECASE(FTS_ERR);
HANDLECASE(FTS_F);
HANDLECASE(FTS_INIT);
HANDLECASE(FTS_NS);
HANDLECASE(FTS_NSOK);
HANDLECASE(FTS_SL);
HANDLECASE(FTS_SLNONE);
HANDLECASE(FTS_W);
default:
sprintf (buf, "[%d]", info);
return buf;
}
}
static void
visit (FTS *p, FTSENT *ent, struct stat *pstat)
{
struct predicate *eval_tree;
state.have_stat = (ent->fts_info != FTS_NS) && (ent->fts_info != FTS_NSOK);
state.rel_pathname = ent->fts_accpath;
state.cwd_dir_fd = p->fts_cwd_fd;
/* Apply the predicates to this path. */
eval_tree = get_eval_tree ();
apply_predicate (ent->fts_path, pstat, eval_tree);
/* Deal with any side effects of applying the predicates. */
if (state.stop_at_current_level)
{
fts_set (p, ent, FTS_SKIP);
}
}
static const char*
partial_quotearg_n (int n, char *s, size_t len, enum quoting_style style)
{
if (0 == len)
{
return quotearg_n_style (n, style, "");
}
else
{
char saved;
const char *result;
saved = s[len];
s[len] = 0;
result = quotearg_n_style (n, style, s);
s[len] = saved;
return result;
}
}
/* We've detected a file system loop. This is caused by one of
* two things:
*
* 1. Option -L is in effect and we've hit a symbolic link that
* points to an ancestor. This is harmless. We won't traverse the
* symbolic link.
*
* 2. We have hit a real cycle in the directory hierarchy. In this
* case, we issue a diagnostic message (POSIX requires this) and we
* skip that directory entry.
*/
static void
issue_loop_warning (FTSENT * ent)
{
if (S_ISLNK(ent->fts_statp->st_mode))
{
error (0, 0,
_("Symbolic link %s is part of a loop in the directory hierarchy; we have already visited the directory to which it points."),
safely_quote_err_filename (0, ent->fts_path));
}
else
{
/* We have found an infinite loop. POSIX requires us to
* issue a diagnostic. Usually we won't get to here
* because when the leaf optimisation is on, it will cause
* the subdirectory to be skipped. If /a/b/c/d is a hard
* link to /a/b, then the link count of /a/b/c is 2,
* because the ".." entry of /a/b/c/d points to /a, not
* to /a/b/c.
*/
error (0, 0,
_("File system loop detected; "
"%s is part of the same file system loop as %s."),
safely_quote_err_filename (0, ent->fts_path),
partial_quotearg_n (1,
ent->fts_cycle->fts_path,
ent->fts_cycle->fts_pathlen,
options.err_quoting_style));
}
}
/*
* Return true if NAME corresponds to a file which forms part of a
* symbolic link loop. The command
* rm -f a b; ln -s a b; ln -s b a
* produces such a loop.
*/
static bool
symlink_loop (const char *name)
{
struct stat stbuf;
const int rv = options.xstat (name, &stbuf);
return (0 != rv) && (ELOOP == errno);
}
static void
consider_visiting (FTS *p, FTSENT *ent)
{
struct stat statbuf;
mode_t mode;
int ignore, isdir;
if (options.debug_options & DebugSearch)
fprintf (stderr,
"consider_visiting (early): %s: "
"fts_info=%-6s, fts_level=%2d, prev_depth=%d "
"fts_path=%s, fts_accpath=%s\n",
quotearg_n_style (0, options.err_quoting_style, ent->fts_path),
get_fts_info_name (ent->fts_info),
(int)ent->fts_level, prev_depth,
quotearg_n_style (1, options.err_quoting_style, ent->fts_path),
quotearg_n_style (2, options.err_quoting_style, ent->fts_accpath));
if (ent->fts_info == FTS_DP)
{
left_dir ();
}
else if (ent->fts_level > prev_depth || ent->fts_level==0)
{
left_dir ();
}
inside_dir (p->fts_cwd_fd);
prev_depth = ent->fts_level;
statbuf.st_ino = ent->fts_statp->st_ino;
/* Cope with various error conditions. */
if (ent->fts_info == FTS_ERR)
{
nonfatal_target_file_error (ent->fts_errno, ent->fts_path);
return;
}
if (ent->fts_info == FTS_DNR)
{
nonfatal_target_file_error (ent->fts_errno, ent->fts_path);
if (options.do_dir_first)
{
/* Return for unreadable directories without -depth.
* With -depth, the directory itself has to be processed, yet the
* error message above has to be output.
*/
return;
}
}
else if (ent->fts_info == FTS_DC)
{
issue_loop_warning (ent);
state.exit_status = EXIT_FAILURE;
return;
}
else if (ent->fts_info == FTS_SLNONE)
{
/* fts_read() claims that ent->fts_accpath is a broken symbolic
* link. That would be fine, but if this is part of a symbolic
* link loop, we diagnose the problem and also ensure that the
* eventual return value is nonzero. Note that while the path
* we stat is local (fts_accpath), we print the full path name
* of the file (fts_path) in the error message.
*/
if (symlink_loop (ent->fts_accpath))
{
nonfatal_target_file_error (ELOOP, ent->fts_path);
return;
}
}
else if (ent->fts_info == FTS_NS)
{
if (ent->fts_level == 0)
{
/* e.g., nonexistent starting point */
nonfatal_target_file_error (ent->fts_errno, ent->fts_path);
return;
}
else
{
/* The following if statement fixes Savannah bug #19605
* (failure to diagnose a symbolic link loop)
*/
if (symlink_loop (ent->fts_accpath))
{
nonfatal_target_file_error (ELOOP, ent->fts_path);
return;
}
else
{
nonfatal_target_file_error (ent->fts_errno, ent->fts_path);
/* Continue despite the error, as file name without stat info
* might be better than not even processing the file name. This
* can lead to repeated error messages later on, though, if a
* predicate requires stat information.
*
* Not printing an error message here would be even more wrong,
* though, as this could cause the contents of a directory to be
* silently ignored, as the directory wouldn't be identified as
* such.
*/
}
}
}
/* Cope with the usual cases. */
if (ent->fts_info == FTS_NSOK
|| ent->fts_info == FTS_NS /* e.g. symlink loop */)
{
assert (!state.have_stat);
assert (ent->fts_info == FTS_NSOK || state.type == 0);
mode = state.type;
}
else
{
state.have_stat = true;
state.have_type = true;
statbuf = *(ent->fts_statp);
state.type = mode = statbuf.st_mode;
if (00000 == mode)
{
/* Savannah bug #16378. */
error (0, 0, _("WARNING: file %s appears to have mode 0000"),
quotearg_n_style (0, options.err_quoting_style, ent->fts_path));
}
}
/* update state.curdepth before calling digest_mode(), because digest_mode
* may call following_links().
*/
state.curdepth = ent->fts_level;
if (mode)
{
if (!digest_mode (&mode, ent->fts_path, ent->fts_name, &statbuf, 0))
return;
}
/* examine this item. */
ignore = 0;
isdir = S_ISDIR(mode)
|| (FTS_D == ent->fts_info)
|| (FTS_DP == ent->fts_info)
|| (FTS_DC == ent->fts_info);
if (isdir && (ent->fts_info == FTS_NSOK))
{
/* This is a directory, but fts did not stat it, so
* presumably would not be planning to search its
* children. Force a stat of the file so that the
* children can be checked.
*/
fts_set (p, ent, FTS_AGAIN);
return;
}
if (options.maxdepth >= 0)
{
if (ent->fts_level >= options.maxdepth)
{
fts_set (p, ent, FTS_SKIP); /* descend no further */
if (ent->fts_level > options.maxdepth)
ignore = 1; /* don't even look at this one */
}
}
if ( (ent->fts_info == FTS_D) && !options.do_dir_first )
{
/* this is the preorder visit, but user said -depth */
ignore = 1;
}
else if ( (ent->fts_info == FTS_DP) && options.do_dir_first )
{
/* this is the postorder visit, but user didn't say -depth */
ignore = 1;
}
else if (ent->fts_level < options.mindepth)
{
ignore = 1;
}
if (options.debug_options & DebugSearch)
fprintf (stderr,
"consider_visiting (late): %s: "
"fts_info=%-6s, isdir=%d ignore=%d have_stat=%d have_type=%d \n",
quotearg_n_style (0, options.err_quoting_style, ent->fts_path),
get_fts_info_name (ent->fts_info),
isdir, ignore, state.have_stat, state.have_type);
if (!ignore)
{
visit (p, ent, &statbuf);
}
if (ent->fts_info == FTS_DP)
{
/* we're leaving a directory. */
state.stop_at_current_level = false;
}
}
static bool
find (char *arg)
{
char * arglist[2];
FTS *p;
FTSENT *ent;
state.starting_path_length = strlen (arg);
inside_dir (AT_FDCWD);
arglist[0] = arg;
arglist[1] = NULL;
switch (options.symlink_handling)
{
case SYMLINK_ALWAYS_DEREF:
ftsoptions |= FTS_COMFOLLOW|FTS_LOGICAL;
break;
case SYMLINK_DEREF_ARGSONLY:
ftsoptions |= FTS_COMFOLLOW|FTS_PHYSICAL;
break;
case SYMLINK_NEVER_DEREF:
ftsoptions |= FTS_PHYSICAL;
break;
}
if (options.stay_on_filesystem)
ftsoptions |= FTS_XDEV;
p = fts_open (arglist, ftsoptions, NULL);
if (NULL == p)
{
error (0, errno, _("cannot search %s"),
safely_quote_err_filename (0, arg));
state.exit_status = EXIT_FAILURE;
}
else
{
int level = INT_MIN;
while ( (errno=0, ent=fts_read (p)) != NULL )
{
if (state.execdirs_outstanding && ((int)ent->fts_level != level))
{
/* If we changed level, perform any outstanding
* execdirs. If we see a sequence of directory entries
* like this: fffdfffdfff, we could build a command line
* of 9 files, but this simple-minded implementation
* builds a command line for only 3 files at a time
* (since fts descends into the directories).
*/
complete_pending_execdirs ();
}
level = (int)ent->fts_level;
state.already_issued_stat_error_msg = false;
state.have_stat = false;
state.have_type = !!ent->fts_statp->st_mode;
state.type = state.have_type ? ent->fts_statp->st_mode : 0;
consider_visiting (p, ent);
}
/* fts_read returned NULL; distinguish between "finished" and "error". */
if (errno)
{
error (0, errno,
"failed to read file names from file system at or below %s",
safely_quote_err_filename (0, arg));
state.exit_status = EXIT_FAILURE;
return false;
}
if (0 != fts_close (p))
{
/* Here we break the abstraction of fts_close a bit, because we
* are going to skip the rest of the start points, and return with
* nonzero exit status. Hence we need to issue a diagnostic on
* stderr. */
error (0, errno,
_("failed to restore working directory after searching %s"),
arg);
state.exit_status = EXIT_FAILURE;
return false;
}
p = NULL;
}
return true;
}
static bool
process_all_startpoints (int argc, char *argv[])
{
/* Did the user pass starting points on the command line? */
bool argv_starting_points = 0 < argc && !looks_like_expression (argv[0], true);
FILE *stream = NULL;
char const* files0_filename_quoted = NULL;
struct argv_iterator *ai;
if (options.files0_from)
{
/* Option -files0-from must not be combined with passing starting points
* on the command line. */
if (argv_starting_points)
{
error (0, 0, _("extra operand %s"), safely_quote_err_filename (0, argv[0]));
die (EXIT_FAILURE, 0, "%s",
_("file operands cannot be combined with -files0-from"));
}
if (0 == strcmp (options.files0_from, "-"))
{
/* Option -files0-from with argument "-" (=stdin) must not be combined
* with the -ok, -okdir actions: getting the user confirmation would
* mess with stdin. */
if (options.ok_prompt_stdin)
{
die (EXIT_FAILURE, 0, "%s\n",
_("option -files0-from reading from standard input"
" cannot be combined with -ok, -okdir"));
}
files0_filename_quoted = safely_quote_err_filename (0, _("(standard input)"));
stream = stdin;
}
else
{
files0_filename_quoted = safely_quote_err_filename (0, options.files0_from);
stream = fopen (options.files0_from, "r");
if (stream == NULL)
die (EXIT_FAILURE, errno, _("cannot open %s for reading"),
files0_filename_quoted);
const int fd = fileno (stream);
assert (fd >= 0);
if (options.ok_prompt_stdin)
{
/* Check if the given file is associated to the same stream as
* standard input - which is not allowed with -ok, -okdir. This
* is the case with special device names symlinks for stdin like
* $ find -files0-from /dev/stdin -ok
* or when the given FILE is also associated to stdin:
* $ find -files0-from FILE -ok < FILE
*/
struct stat sb1, sb2;
if (fstat (fd, &sb1) == 0 && fstat (STDIN_FILENO, &sb2) == 0
&& SAME_INODE (sb1, sb2))
{
die (EXIT_FAILURE, 0, "%s: %s\n",
_("option -files0-from: standard input must not refer"
" to the same file when combined with -ok, -okdir"),
files0_filename_quoted);
}
}
set_cloexec_flag (fd, true);
}
ai = argv_iter_init_stream (stream);
}
else
{
if (!argv_starting_points)
{
/* If no starting points are given on the comman line, then
* fall back to processing the current directory, i.e., ".".
* We use a temporary variable here because some actions modify
* the path temporarily. Hence if we use a string constant,
* we get a coredump. The best example of this is if we say
* "find -printf %H" (note, not "find . -printf %H").
*/
char defaultpath[2] = ".";
return find (defaultpath);
}
/* Process the starting point(s) from the command line. */
ai = argv_iter_init_argv (argv);
}
if (!ai)
xalloc_die ();
bool ok = true;
while (true)
{
enum argv_iter_err ai_err;
char *file_name = argv_iter (ai, &ai_err);
if (!file_name)
{
switch (ai_err)
{
case AI_ERR_EOF:
goto argv_iter_done;
case AI_ERR_READ: /* may only happen with -files0-from */
error (0, errno, _("%s: read error"), files0_filename_quoted);
state.exit_status = EXIT_FAILURE;
ok = false;
goto argv_iter_done;
case AI_ERR_MEM:
xalloc_die ();
default:
assert (!"unexpected error code from argv_iter");
}
}
/* Report and skip any empty file names before invoking fts.
This works around a glitch in fts, which fails immediately
(without looking at the other file names) when given an empty
file name. */
if (!file_name[0])
{
/* Diagnose a zero-length file name. When it's one
among many, knowing the record number may help. */
if (options.files0_from == NULL)
error (0, ENOENT, "%s", safely_quote_err_filename (0, file_name));
else
{
/* Using the standard 'filename:line-number:' prefix here is
not totally appropriate, since NUL is the separator, not NL,
but it might be better than nothing. */
unsigned long int file_number = argv_iter_n_args (ai);
error (0, 0, "%s:%lu: %s", files0_filename_quoted, file_number,
_("invalid zero-length file name"));
}
state.exit_status = EXIT_FAILURE;
ok = false;
continue;
}
/* Terminate loop when processing the start points from command line,
and reaching the first expression. */
if (!options.files0_from && looks_like_expression (file_name, true))
break;
state.starting_path_length = strlen (file_name); /* TODO: is this redundant? */
if (!find (file_name))
{
ok = false;
goto argv_iter_done;
}
}
argv_iter_done:
argv_iter_free (ai);
if (ok && options.files0_from && (ferror (stream) || fclose (stream) != 0))
die (EXIT_FAILURE, 0, _("error reading %s"), files0_filename_quoted);
return ok;
}
int
main (int argc, char **argv)
{
int end_of_leading_options = 0; /* First arg after any -H/-L etc. */
struct predicate *eval_tree;
if (argv[0])
set_program_name (argv[0]);
else
set_program_name ("find");
record_initial_cwd ();
state.already_issued_stat_error_msg = false;
state.exit_status = EXIT_SUCCESS;
state.execdirs_outstanding = false;
state.cwd_dir_fd = AT_FDCWD;
if (fd_leak_check_is_enabled ())
{
remember_non_cloexec_fds ();
}
state.shared_files = sharefile_init ("w");
if (NULL == state.shared_files)
{
die (EXIT_FAILURE, errno,
_("Failed to initialize shared-file hash table"));
}
/* Set the option defaults before we do the locale initialisation as
* check_nofollow() needs to be executed in the POSIX locale.
*/
set_option_defaults (&options);
#ifdef HAVE_SETLOCALE
setlocale (LC_ALL, "");
#endif
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
if (atexit (close_stdout))
{
die (EXIT_FAILURE, errno, _("The atexit library function failed"));
}
/* Check for -P, -H or -L options. Also -D and -O, which are
* both GNU extensions.
*/
end_of_leading_options = process_leading_options (argc, argv);
if (options.debug_options & DebugStat)
options.xstat = debug_stat;
if (options.debug_options & DebugTime)
fprintf (stderr, "cur_day_start = %s", ctime (&options.cur_day_start.tv_sec));
/* We are now processing the part of the "find" command line
* after the -H/-L options (if any).
*/
eval_tree = build_expression_tree (argc, argv, end_of_leading_options);
/* safely_chdir() needs to check that it has ended up in the right place.
* To avoid bailing out when something gets automounted, it checks if
* the target directory appears to have had a directory mounted on it as
* we chdir()ed. The problem with this is that in order to notice that
* a file system was mounted, we would need to lstat() all the mount points.
* That strategy loses if our machine is a client of a dead NFS server.
*
* Hence if safely_chdir() and wd_sanity_check() can manage without needing
* to know the mounted device list, we do that.
*/
if (!options.open_nofollow_available)
{
#ifdef STAT_MOUNTPOINTS
init_mounted_dev_list ();
#endif
}
/* process_all_startpoints processes the starting points named on
* the command line. A false return value from it means that we
* failed to restore the original context. That means it would not
* be safe to call cleanup() since we might complete an execdir in
* the wrong directory for example.
*/
if (process_all_startpoints (argc-end_of_leading_options,
argv+end_of_leading_options))
{
/* If "-exec ... {} +" has been used, there may be some
* partially-full command lines which have been built,
* but which are not yet complete. Execute those now.
*/
show_success_rates (eval_tree);
cleanup ();
}
return state.exit_status;
}
bool
is_fts_enabled (int *fts_options)
{
/* this version of find (i.e. this main()) uses fts. */
*fts_options = ftsoptions;
return true;
}