Optimise away calls to stat if all we need is the inode number (bug #24342).

* find/util.c (get_info): call get_statinfo if need_inum and we
don't already have the inode number.
(apply_predicate): Call get_info if need_inum.
* find/defs.h (enum EvaluationCost): Add NeedsInodeNumber.
(struct predicate): Add need_inum.
* find/tree.c (get_new_pred): Default need_inum to false.
(get_new_pred_chk_op): Likewise.
* find/ftsfind.c (consider_visiting): Copy the value of st_ino
into statbuf.st_ino.
* find/parser.c (parse_inum): Set need_inum=true.
(parse_samefile): Set need_inum=false, but add a comment
indicating that there is an optimisation opportunity.
(make_segment): For -printf %i, set NeedsInodeNumber rather than
NeedsStat.
* find/pred.c (pred_inum): assert that the inode number is known
(i.e. non-zero).
(print_optlist): Print "[need inum]" when need_inum is  true.
* find/tree.c (opt_expr): Also reorder expressions if the right
hand expression is -inum (that is, consider that test to be no
more expensive than -type).
(set_new_parent): Default need_inum to false.
(costlookup): Set pred_inum=NeedsInodeNumber.
(get_pred_cost): need_inum implies a data cost of
NeedsInodeNumber.
(cost_table): Add NeedsInodeNumber.
(print_tree): Handle need_inum.

Signed-off-by: James Youngman <jay@gnu.org>
This commit is contained in:
James Youngman
2009-03-07 20:16:49 +00:00
parent 530e4a541d
commit 2bf001636e
8 changed files with 130 additions and 23 deletions

View File

@@ -1,3 +1,34 @@
2009-03-08 James Youngman <jay@gnu.org>
Optimise away calls to stat if all we need is the inode number.
This fixes Savannah bug #24342.
* find/util.c (get_info): call get_statinfo if need_inum and we
don't already have the inode number.
(apply_predicate): Call get_info if need_inum.
* find/defs.h (enum EvaluationCost): Add NeedsInodeNumber.
(struct predicate): Add need_inum.
* find/tree.c (get_new_pred): Default need_inum to false.
(get_new_pred_chk_op): Likewise.
* find/ftsfind.c (consider_visiting): Copy the value of st_ino
into statbuf.st_ino.
* find/parser.c (parse_inum): Set need_inum=true.
(parse_samefile): Set need_inum=false, but add a comment
indicating that there is an optimisation opportunity.
(make_segment): For -printf %i, set NeedsInodeNumber rather than
NeedsStat.
* find/pred.c (pred_inum): assert that the inode number is known
(i.e. non-zero).
(print_optlist): Print "[need inum]" when need_inum is true.
* find/tree.c (opt_expr): Also reorder expressions if the right
hand expression is -inum (that is, consider that test to be no
more expensive than -type).
(set_new_parent): Default need_inum to false.
(costlookup): Set pred_inum=NeedsInodeNumber.
(get_pred_cost): need_inum implies a data cost of
NeedsInodeNumber.
(cost_table): Add NeedsInodeNumber.
(print_tree): Handle need_inum.
2009-03-06 James Youngman <jay@gnu.org>
Change suffix for non-release versions from -CVS to -git.

9
NEWS
View File

@@ -12,6 +12,12 @@ element of struct dirent, for example reiserfs. Anecdotal evidence
suggests this can speed updatedb up from about 30 minutes to 3-4
minutes.
The ftsfind executable also now avoids calling stat() functions to
discover the inode number of a file, if we already read this
information from the directory. This does provide a speed-up, but
only for a restricted set of commands such as "find . -inum 4001".
This fix is listed below as bug #24342.
** Bug Fixes
#25764: remove duplicate entry for 'proc' in updatedb's $PRUNEFS.
@@ -26,6 +32,9 @@ unknown user or is missing.
#25154: Allow compilation with C compilers that don't allow
declarations to follow statements.
#24342: -inum predicate shoud use dirent.d_ino instead of stat.st_ino
(this is a performance bug).
** Translations
Updated translations for Bulgarian, German, Irish, Hungarian,

View File

@@ -242,6 +242,7 @@ struct predicate_performance_info
enum EvaluationCost
{
NeedsNothing,
NeedsInodeNumber,
NeedsType,
NeedsStatInfo,
NeedsLinkName,
@@ -285,6 +286,9 @@ struct predicate
/* True if this predicate node requires knowledge of the file type. */
boolean need_type;
/* True if this predicate node requires knowledge of the inode number. */
boolean need_inum;
enum EvaluationCost p_cost;
/* est_success_rate is a number between 0.0 and 1.0 */

View File

@@ -409,6 +409,7 @@ consider_visiting(FTS *p, FTSENT *ent)
inside_dir(p->fts_cwd_fd);
prev_depth = ent->fts_level;
statbuf.st_ino = ent->fts_statp->st_ino;
/* Cope with various error conditions. */
if (ent->fts_info == FTS_ERR

View File

@@ -1247,6 +1247,9 @@ parse_inum (const struct parser_table* entry, char **argv, int *arg_ptr)
* files match
*/
p->est_success_rate = 1e-6;
p->need_inum = true;
p->need_stat = false;
p->need_type = false;
return true;
}
else
@@ -2293,6 +2296,8 @@ parse_samefile (const struct parser_table* entry, char **argv, int *arg_ptr)
our_pred->args.samefileid.dev = st.st_dev;
our_pred->args.samefileid.fd = fd;
our_pred->need_type = false;
/* smarter way: compare type and inode number first. */
/* TODO: maybe optimise this away by being optimistic */
our_pred->need_stat = true;
our_pred->est_success_rate = 0.01f;
return true;
@@ -2895,6 +2900,12 @@ make_segment (struct segment **segment,
*fmt++ = 's';
break;
case 'i': /* inode number */
pred->need_inum = true;
mycost = NeedsInodeNumber;
*fmt++ = 's';
break;
case 'a': /* atime in `ctime' format */
case 'A': /* atime in user-specified strftime format */
case 'B': /* birth time in user-specified strftime format */
@@ -2902,7 +2913,6 @@ make_segment (struct segment **segment,
case 'C': /* ctime in user-specified strftime format */
case 'F': /* file system type */
case 'g': /* group name */
case 'i': /* inode number */
case 'M': /* mode in `ls -l' format (eg., "drwxr-xr-x") */
case 's': /* size in bytes */
case 't': /* mtime in `ctime' format */

View File

@@ -1216,6 +1216,8 @@ pred_inum (const char *pathname, struct stat *stat_buf, struct predicate *pred_p
{
(void) pathname;
assert (stat_buf->st_ino != 0);
switch (pred_ptr->args.numinfo.kind)
{
case COMP_GT:
@@ -2434,9 +2436,10 @@ print_optlist (FILE *fp, const struct predicate *p)
{
print_parenthesised(fp, p->pred_left);
fprintf (fp,
"%s%s",
"%s%s%s",
p->need_stat ? "[call stat] " : "",
p->need_type ? "[need type] " : "");
p->need_type ? "[need type] " : "",
p->need_inum ? "[need inum] " : "");
print_predicate(fp, p);
fprintf(fp, " [%g] ", p->est_success_rate);
if (options.debug_options & DebugSuccessRates)

View File

@@ -748,8 +748,9 @@ opt_expr (struct predicate **eval_treep)
}
reorder = ((options.optimisation_level > 1)
&& (NeedsType == curr->pred_right->p_cost)
&& !curr->pred_right->need_stat) ||
&& (NeedsType == curr->pred_right->p_cost
|| NeedsInodeNumber == curr->pred_right->p_cost)
&& !curr->pred_right->need_stat) ||
(options.optimisation_level > 2);
if (reorder)
@@ -831,6 +832,7 @@ set_new_parent (struct predicate *curr, enum predicate_precedence high_prec, str
new_parent->p_prec = high_prec;
new_parent->need_stat = false;
new_parent->need_type = false;
new_parent->need_inum = false;
new_parent->p_cost = NeedsNothing;
switch (high_prec)
@@ -919,7 +921,7 @@ static struct pred_cost_lookup costlookup[] =
{ pred_group , NeedsStatInfo },
{ pred_ilname , NeedsLinkName },
{ pred_iname , NeedsNothing },
{ pred_inum , NeedsStatInfo },
{ pred_inum , NeedsInodeNumber },
{ pred_ipath , NeedsNothing },
{ pred_links , NeedsStatInfo },
{ pred_lname , NeedsLinkName },
@@ -1004,6 +1006,10 @@ get_pred_cost(const struct predicate *p)
{
data_requirement_cost = NeedsStatInfo;
}
else if (p->need_inum)
{
data_requirement_cost = NeedsInodeNumber;
}
else if (p->need_type)
{
data_requirement_cost = NeedsType;
@@ -1433,6 +1439,7 @@ get_new_pred (const struct parser_table *entry)
last_pred->no_default_print = false;
last_pred->need_stat = true;
last_pred->need_type = true;
last_pred->need_inum = false;
last_pred->args.str = NULL;
last_pred->pred_next = NULL;
last_pred->pred_left = NULL;
@@ -1478,6 +1485,7 @@ get_new_pred_chk_op (const struct parser_table *entry)
new_pred->p_prec = AND_PREC;
new_pred->need_stat = false;
new_pred->need_type = false;
new_pred->need_inum = false;
new_pred->args.str = NULL;
new_pred->side_effects = false;
new_pred->no_default_print = false;
@@ -1500,15 +1508,16 @@ struct cost_assoc
struct cost_assoc cost_table[] =
{
{ NeedsNothing, "Nothing" },
{ NeedsType, "Type" },
{ NeedsStatInfo, "StatInfo" },
{ NeedsLinkName, "LinkName" },
{ NeedsAccessInfo, "AccessInfo" },
{ NeedsSyncDiskHit, "SyncDiskHit" },
{ NeedsInodeNumber, "InodeNumber" },
{ NeedsType, "Type" },
{ NeedsStatInfo, "StatInfo" },
{ NeedsLinkName, "LinkName" },
{ NeedsAccessInfo, "AccessInfo" },
{ NeedsSyncDiskHit, "SyncDiskHit" },
{ NeedsEventualExec, "EventualExec" },
{ NeedsImmediateExec, "ImmediateExec" },
{ NeedsUserInteraction, "UserInteraction" },
{ NeedsUnknown, "Unknown" }
{ NeedsUnknown, "Unknown" }
};
struct prec_assoc
@@ -1604,7 +1613,7 @@ print_tree (FILE *fp, struct predicate *node, int indent)
node->est_success_rate,
(node->side_effects ? "" : "no "));
if (node->need_stat || node->need_type)
if (node->need_stat || node->need_type || node->need_inum)
{
int comma = 0;
@@ -1614,6 +1623,11 @@ print_tree (FILE *fp, struct predicate *node, int indent)
fprintf (fp, "stat");
comma = 1;
}
if (node->need_inum)
{
fprintf (fp, "%sinode", comma ? "," : "");
comma = 1;
}
if (node->need_type)
{
fprintf (fp, "%stype", comma ? "," : "");

View File

@@ -220,9 +220,8 @@ get_statinfo (const char *pathname, const char *name, struct stat *p)
return 0;
}
/* Get the stat/type information for a file, if it is
* not already known.
/* Get the stat/type/inode information for a file, if it is not
* already known.
*/
int
get_info (const char *pathname,
@@ -235,14 +234,50 @@ get_info (const char *pathname,
* already have it, stat the file now.
*/
if (pred_ptr->need_stat)
todo = true;
else if ((pred_ptr->need_type && (0 == state.have_type)))
todo = true;
{
todo = true; /* need full stat info */
}
else if (pred_ptr->need_type && !state.have_type)
{
todo = true; /* need to stat to get the type */
}
else if (pred_ptr->need_inum)
{
if (!p->st_ino)
{
todo = true; /* need to stat to get the inode number */
}
else if ((!state.have_type) || S_ISDIR(p->st_mode))
{
/* For now we decide not to trust struct dirent.d_ino for
* directory entries that are subdirectories, in case this
* subdirectory is a mount point. We also need to call a
* stat function if we don't have st_ino (i.e. it is zero).
*/
todo = true;
}
}
if (todo)
return get_statinfo(pathname, state.rel_pathname, p);
{
int result = get_statinfo(pathname, state.rel_pathname, p);
if (result != 0)
{
/* Verify some postconditions. We can't check st_mode for
non-zero-ness because of Savannah bug #16378. */
if (pred_ptr->need_type)
{
assert (state.have_type);
}
if (pred_ptr->need_inum)
{
assert (p->st_ino);
}
}
}
else
return 0;
{
return 0;
}
}
/* Determine if we can use O_NOFOLLOW.
@@ -979,7 +1014,7 @@ apply_predicate(const char *pathname, struct stat *stat_buf, struct predicate *p
{
++p->perf.visits;
if (p->need_stat || p->need_type)
if (p->need_stat || p->need_type || p->need_inum)
{
/* We may need a stat here. */
if (get_info(pathname, stat_buf, p) != 0)