mirror of
git://git.sv.gnu.org/findutils.git
synced 2026-02-04 07:25:57 +01:00
Optimise away calls to stat if all we need is the inode number (bug #24342).
* find/util.c (get_info): call get_statinfo if need_inum and we don't already have the inode number. (apply_predicate): Call get_info if need_inum. * find/defs.h (enum EvaluationCost): Add NeedsInodeNumber. (struct predicate): Add need_inum. * find/tree.c (get_new_pred): Default need_inum to false. (get_new_pred_chk_op): Likewise. * find/ftsfind.c (consider_visiting): Copy the value of st_ino into statbuf.st_ino. * find/parser.c (parse_inum): Set need_inum=true. (parse_samefile): Set need_inum=false, but add a comment indicating that there is an optimisation opportunity. (make_segment): For -printf %i, set NeedsInodeNumber rather than NeedsStat. * find/pred.c (pred_inum): assert that the inode number is known (i.e. non-zero). (print_optlist): Print "[need inum]" when need_inum is true. * find/tree.c (opt_expr): Also reorder expressions if the right hand expression is -inum (that is, consider that test to be no more expensive than -type). (set_new_parent): Default need_inum to false. (costlookup): Set pred_inum=NeedsInodeNumber. (get_pred_cost): need_inum implies a data cost of NeedsInodeNumber. (cost_table): Add NeedsInodeNumber. (print_tree): Handle need_inum. Signed-off-by: James Youngman <jay@gnu.org>
This commit is contained in:
31
ChangeLog
31
ChangeLog
@@ -1,3 +1,34 @@
|
||||
2009-03-08 James Youngman <jay@gnu.org>
|
||||
|
||||
Optimise away calls to stat if all we need is the inode number.
|
||||
This fixes Savannah bug #24342.
|
||||
* find/util.c (get_info): call get_statinfo if need_inum and we
|
||||
don't already have the inode number.
|
||||
(apply_predicate): Call get_info if need_inum.
|
||||
* find/defs.h (enum EvaluationCost): Add NeedsInodeNumber.
|
||||
(struct predicate): Add need_inum.
|
||||
* find/tree.c (get_new_pred): Default need_inum to false.
|
||||
(get_new_pred_chk_op): Likewise.
|
||||
* find/ftsfind.c (consider_visiting): Copy the value of st_ino
|
||||
into statbuf.st_ino.
|
||||
* find/parser.c (parse_inum): Set need_inum=true.
|
||||
(parse_samefile): Set need_inum=false, but add a comment
|
||||
indicating that there is an optimisation opportunity.
|
||||
(make_segment): For -printf %i, set NeedsInodeNumber rather than
|
||||
NeedsStat.
|
||||
* find/pred.c (pred_inum): assert that the inode number is known
|
||||
(i.e. non-zero).
|
||||
(print_optlist): Print "[need inum]" when need_inum is true.
|
||||
* find/tree.c (opt_expr): Also reorder expressions if the right
|
||||
hand expression is -inum (that is, consider that test to be no
|
||||
more expensive than -type).
|
||||
(set_new_parent): Default need_inum to false.
|
||||
(costlookup): Set pred_inum=NeedsInodeNumber.
|
||||
(get_pred_cost): need_inum implies a data cost of
|
||||
NeedsInodeNumber.
|
||||
(cost_table): Add NeedsInodeNumber.
|
||||
(print_tree): Handle need_inum.
|
||||
|
||||
2009-03-06 James Youngman <jay@gnu.org>
|
||||
|
||||
Change suffix for non-release versions from -CVS to -git.
|
||||
|
||||
9
NEWS
9
NEWS
@@ -12,6 +12,12 @@ element of struct dirent, for example reiserfs. Anecdotal evidence
|
||||
suggests this can speed updatedb up from about 30 minutes to 3-4
|
||||
minutes.
|
||||
|
||||
The ftsfind executable also now avoids calling stat() functions to
|
||||
discover the inode number of a file, if we already read this
|
||||
information from the directory. This does provide a speed-up, but
|
||||
only for a restricted set of commands such as "find . -inum 4001".
|
||||
This fix is listed below as bug #24342.
|
||||
|
||||
** Bug Fixes
|
||||
|
||||
#25764: remove duplicate entry for 'proc' in updatedb's $PRUNEFS.
|
||||
@@ -26,6 +32,9 @@ unknown user or is missing.
|
||||
#25154: Allow compilation with C compilers that don't allow
|
||||
declarations to follow statements.
|
||||
|
||||
#24342: -inum predicate shoud use dirent.d_ino instead of stat.st_ino
|
||||
(this is a performance bug).
|
||||
|
||||
** Translations
|
||||
|
||||
Updated translations for Bulgarian, German, Irish, Hungarian,
|
||||
|
||||
@@ -242,6 +242,7 @@ struct predicate_performance_info
|
||||
enum EvaluationCost
|
||||
{
|
||||
NeedsNothing,
|
||||
NeedsInodeNumber,
|
||||
NeedsType,
|
||||
NeedsStatInfo,
|
||||
NeedsLinkName,
|
||||
@@ -285,6 +286,9 @@ struct predicate
|
||||
/* True if this predicate node requires knowledge of the file type. */
|
||||
boolean need_type;
|
||||
|
||||
/* True if this predicate node requires knowledge of the inode number. */
|
||||
boolean need_inum;
|
||||
|
||||
enum EvaluationCost p_cost;
|
||||
|
||||
/* est_success_rate is a number between 0.0 and 1.0 */
|
||||
|
||||
@@ -409,6 +409,7 @@ consider_visiting(FTS *p, FTSENT *ent)
|
||||
inside_dir(p->fts_cwd_fd);
|
||||
prev_depth = ent->fts_level;
|
||||
|
||||
statbuf.st_ino = ent->fts_statp->st_ino;
|
||||
|
||||
/* Cope with various error conditions. */
|
||||
if (ent->fts_info == FTS_ERR
|
||||
|
||||
@@ -1247,6 +1247,9 @@ parse_inum (const struct parser_table* entry, char **argv, int *arg_ptr)
|
||||
* files match
|
||||
*/
|
||||
p->est_success_rate = 1e-6;
|
||||
p->need_inum = true;
|
||||
p->need_stat = false;
|
||||
p->need_type = false;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
@@ -2293,6 +2296,8 @@ parse_samefile (const struct parser_table* entry, char **argv, int *arg_ptr)
|
||||
our_pred->args.samefileid.dev = st.st_dev;
|
||||
our_pred->args.samefileid.fd = fd;
|
||||
our_pred->need_type = false;
|
||||
/* smarter way: compare type and inode number first. */
|
||||
/* TODO: maybe optimise this away by being optimistic */
|
||||
our_pred->need_stat = true;
|
||||
our_pred->est_success_rate = 0.01f;
|
||||
return true;
|
||||
@@ -2895,6 +2900,12 @@ make_segment (struct segment **segment,
|
||||
*fmt++ = 's';
|
||||
break;
|
||||
|
||||
case 'i': /* inode number */
|
||||
pred->need_inum = true;
|
||||
mycost = NeedsInodeNumber;
|
||||
*fmt++ = 's';
|
||||
break;
|
||||
|
||||
case 'a': /* atime in `ctime' format */
|
||||
case 'A': /* atime in user-specified strftime format */
|
||||
case 'B': /* birth time in user-specified strftime format */
|
||||
@@ -2902,7 +2913,6 @@ make_segment (struct segment **segment,
|
||||
case 'C': /* ctime in user-specified strftime format */
|
||||
case 'F': /* file system type */
|
||||
case 'g': /* group name */
|
||||
case 'i': /* inode number */
|
||||
case 'M': /* mode in `ls -l' format (eg., "drwxr-xr-x") */
|
||||
case 's': /* size in bytes */
|
||||
case 't': /* mtime in `ctime' format */
|
||||
|
||||
@@ -1216,6 +1216,8 @@ pred_inum (const char *pathname, struct stat *stat_buf, struct predicate *pred_p
|
||||
{
|
||||
(void) pathname;
|
||||
|
||||
assert (stat_buf->st_ino != 0);
|
||||
|
||||
switch (pred_ptr->args.numinfo.kind)
|
||||
{
|
||||
case COMP_GT:
|
||||
@@ -2434,9 +2436,10 @@ print_optlist (FILE *fp, const struct predicate *p)
|
||||
{
|
||||
print_parenthesised(fp, p->pred_left);
|
||||
fprintf (fp,
|
||||
"%s%s",
|
||||
"%s%s%s",
|
||||
p->need_stat ? "[call stat] " : "",
|
||||
p->need_type ? "[need type] " : "");
|
||||
p->need_type ? "[need type] " : "",
|
||||
p->need_inum ? "[need inum] " : "");
|
||||
print_predicate(fp, p);
|
||||
fprintf(fp, " [%g] ", p->est_success_rate);
|
||||
if (options.debug_options & DebugSuccessRates)
|
||||
|
||||
34
find/tree.c
34
find/tree.c
@@ -748,8 +748,9 @@ opt_expr (struct predicate **eval_treep)
|
||||
}
|
||||
|
||||
reorder = ((options.optimisation_level > 1)
|
||||
&& (NeedsType == curr->pred_right->p_cost)
|
||||
&& !curr->pred_right->need_stat) ||
|
||||
&& (NeedsType == curr->pred_right->p_cost
|
||||
|| NeedsInodeNumber == curr->pred_right->p_cost)
|
||||
&& !curr->pred_right->need_stat) ||
|
||||
(options.optimisation_level > 2);
|
||||
|
||||
if (reorder)
|
||||
@@ -831,6 +832,7 @@ set_new_parent (struct predicate *curr, enum predicate_precedence high_prec, str
|
||||
new_parent->p_prec = high_prec;
|
||||
new_parent->need_stat = false;
|
||||
new_parent->need_type = false;
|
||||
new_parent->need_inum = false;
|
||||
new_parent->p_cost = NeedsNothing;
|
||||
|
||||
switch (high_prec)
|
||||
@@ -919,7 +921,7 @@ static struct pred_cost_lookup costlookup[] =
|
||||
{ pred_group , NeedsStatInfo },
|
||||
{ pred_ilname , NeedsLinkName },
|
||||
{ pred_iname , NeedsNothing },
|
||||
{ pred_inum , NeedsStatInfo },
|
||||
{ pred_inum , NeedsInodeNumber },
|
||||
{ pred_ipath , NeedsNothing },
|
||||
{ pred_links , NeedsStatInfo },
|
||||
{ pred_lname , NeedsLinkName },
|
||||
@@ -1004,6 +1006,10 @@ get_pred_cost(const struct predicate *p)
|
||||
{
|
||||
data_requirement_cost = NeedsStatInfo;
|
||||
}
|
||||
else if (p->need_inum)
|
||||
{
|
||||
data_requirement_cost = NeedsInodeNumber;
|
||||
}
|
||||
else if (p->need_type)
|
||||
{
|
||||
data_requirement_cost = NeedsType;
|
||||
@@ -1433,6 +1439,7 @@ get_new_pred (const struct parser_table *entry)
|
||||
last_pred->no_default_print = false;
|
||||
last_pred->need_stat = true;
|
||||
last_pred->need_type = true;
|
||||
last_pred->need_inum = false;
|
||||
last_pred->args.str = NULL;
|
||||
last_pred->pred_next = NULL;
|
||||
last_pred->pred_left = NULL;
|
||||
@@ -1478,6 +1485,7 @@ get_new_pred_chk_op (const struct parser_table *entry)
|
||||
new_pred->p_prec = AND_PREC;
|
||||
new_pred->need_stat = false;
|
||||
new_pred->need_type = false;
|
||||
new_pred->need_inum = false;
|
||||
new_pred->args.str = NULL;
|
||||
new_pred->side_effects = false;
|
||||
new_pred->no_default_print = false;
|
||||
@@ -1500,15 +1508,16 @@ struct cost_assoc
|
||||
struct cost_assoc cost_table[] =
|
||||
{
|
||||
{ NeedsNothing, "Nothing" },
|
||||
{ NeedsType, "Type" },
|
||||
{ NeedsStatInfo, "StatInfo" },
|
||||
{ NeedsLinkName, "LinkName" },
|
||||
{ NeedsAccessInfo, "AccessInfo" },
|
||||
{ NeedsSyncDiskHit, "SyncDiskHit" },
|
||||
{ NeedsInodeNumber, "InodeNumber" },
|
||||
{ NeedsType, "Type" },
|
||||
{ NeedsStatInfo, "StatInfo" },
|
||||
{ NeedsLinkName, "LinkName" },
|
||||
{ NeedsAccessInfo, "AccessInfo" },
|
||||
{ NeedsSyncDiskHit, "SyncDiskHit" },
|
||||
{ NeedsEventualExec, "EventualExec" },
|
||||
{ NeedsImmediateExec, "ImmediateExec" },
|
||||
{ NeedsUserInteraction, "UserInteraction" },
|
||||
{ NeedsUnknown, "Unknown" }
|
||||
{ NeedsUnknown, "Unknown" }
|
||||
};
|
||||
|
||||
struct prec_assoc
|
||||
@@ -1604,7 +1613,7 @@ print_tree (FILE *fp, struct predicate *node, int indent)
|
||||
node->est_success_rate,
|
||||
(node->side_effects ? "" : "no "));
|
||||
|
||||
if (node->need_stat || node->need_type)
|
||||
if (node->need_stat || node->need_type || node->need_inum)
|
||||
{
|
||||
int comma = 0;
|
||||
|
||||
@@ -1614,6 +1623,11 @@ print_tree (FILE *fp, struct predicate *node, int indent)
|
||||
fprintf (fp, "stat");
|
||||
comma = 1;
|
||||
}
|
||||
if (node->need_inum)
|
||||
{
|
||||
fprintf (fp, "%sinode", comma ? "," : "");
|
||||
comma = 1;
|
||||
}
|
||||
if (node->need_type)
|
||||
{
|
||||
fprintf (fp, "%stype", comma ? "," : "");
|
||||
|
||||
55
find/util.c
55
find/util.c
@@ -220,9 +220,8 @@ get_statinfo (const char *pathname, const char *name, struct stat *p)
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Get the stat/type information for a file, if it is
|
||||
* not already known.
|
||||
/* Get the stat/type/inode information for a file, if it is not
|
||||
* already known.
|
||||
*/
|
||||
int
|
||||
get_info (const char *pathname,
|
||||
@@ -235,14 +234,50 @@ get_info (const char *pathname,
|
||||
* already have it, stat the file now.
|
||||
*/
|
||||
if (pred_ptr->need_stat)
|
||||
todo = true;
|
||||
else if ((pred_ptr->need_type && (0 == state.have_type)))
|
||||
todo = true;
|
||||
|
||||
{
|
||||
todo = true; /* need full stat info */
|
||||
}
|
||||
else if (pred_ptr->need_type && !state.have_type)
|
||||
{
|
||||
todo = true; /* need to stat to get the type */
|
||||
}
|
||||
else if (pred_ptr->need_inum)
|
||||
{
|
||||
if (!p->st_ino)
|
||||
{
|
||||
todo = true; /* need to stat to get the inode number */
|
||||
}
|
||||
else if ((!state.have_type) || S_ISDIR(p->st_mode))
|
||||
{
|
||||
/* For now we decide not to trust struct dirent.d_ino for
|
||||
* directory entries that are subdirectories, in case this
|
||||
* subdirectory is a mount point. We also need to call a
|
||||
* stat function if we don't have st_ino (i.e. it is zero).
|
||||
*/
|
||||
todo = true;
|
||||
}
|
||||
}
|
||||
if (todo)
|
||||
return get_statinfo(pathname, state.rel_pathname, p);
|
||||
{
|
||||
int result = get_statinfo(pathname, state.rel_pathname, p);
|
||||
if (result != 0)
|
||||
{
|
||||
/* Verify some postconditions. We can't check st_mode for
|
||||
non-zero-ness because of Savannah bug #16378. */
|
||||
if (pred_ptr->need_type)
|
||||
{
|
||||
assert (state.have_type);
|
||||
}
|
||||
if (pred_ptr->need_inum)
|
||||
{
|
||||
assert (p->st_ino);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Determine if we can use O_NOFOLLOW.
|
||||
@@ -979,7 +1014,7 @@ apply_predicate(const char *pathname, struct stat *stat_buf, struct predicate *p
|
||||
{
|
||||
++p->perf.visits;
|
||||
|
||||
if (p->need_stat || p->need_type)
|
||||
if (p->need_stat || p->need_type || p->need_inum)
|
||||
{
|
||||
/* We may need a stat here. */
|
||||
if (get_info(pathname, stat_buf, p) != 0)
|
||||
|
||||
Reference in New Issue
Block a user