From 2575a3b7df86d5ecf4d85c0392f05c4bc03e7b1175a694422b8fb256977b24dc Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 13 Aug 2013 09:15:48 +0000 Subject: [PATCH] Accepting request 186872 from home:Andreas_Schwab:Factory - Update to glibc 2.18 release * No source change - strcoll-overflow.patch: fix buffer overflow in strcoll (CVE-2012-4412, bnc#779320) - readdir_r-overflow.patch: fix readdir_r with long file names (CVE-2013-4237, bnc#834594) OBS-URL: https://build.opensuse.org/request/show/186872 OBS-URL: https://build.opensuse.org/package/show/Base:System/glibc?expand=0&rev=305 --- glibc-2.17.90-85891acadf1b.tar.xz | 3 - glibc-2.18.tar.xz | 3 + glibc-testsuite.changes | 10 + glibc-testsuite.spec | 15 +- glibc-utils.changes | 10 + glibc-utils.spec | 15 +- glibc.changes | 10 + glibc.spec | 15 +- readdir_r-overflow.patch | 297 +++++++++ strcoll-overflow.patch | 1026 +++++++++++++++++++++++++++++ 10 files changed, 1389 insertions(+), 15 deletions(-) delete mode 100644 glibc-2.17.90-85891acadf1b.tar.xz create mode 100644 glibc-2.18.tar.xz create mode 100644 readdir_r-overflow.patch create mode 100644 strcoll-overflow.patch diff --git a/glibc-2.17.90-85891acadf1b.tar.xz b/glibc-2.17.90-85891acadf1b.tar.xz deleted file mode 100644 index 6b7c6a5..0000000 --- a/glibc-2.17.90-85891acadf1b.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a58b32a88c982b0eebdaca9a0b8c14441ebbb15174b10f69a98f82eb849ce73 -size 11780928 diff --git a/glibc-2.18.tar.xz b/glibc-2.18.tar.xz new file mode 100644 index 0000000..38b8bfb --- /dev/null +++ b/glibc-2.18.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb4e1e381928f1e5e55e71ab1ba8e0ea7ede75ff9709770435bfd018ea257a3 +size 11150148 diff --git a/glibc-testsuite.changes b/glibc-testsuite.changes index 21cb864..570e25d 100644 --- a/glibc-testsuite.changes +++ b/glibc-testsuite.changes @@ -1,3 +1,13 @@ +------------------------------------------------------------------- +Tue Aug 13 07:24:59 UTC 2013 - schwab@suse.de + +- Update to glibc 2.18 release + * No source change +- strcoll-overflow.patch: fix buffer overflow in strcoll (CVE-2012-4412, + bnc#779320) +- readdir_r-overflow.patch: fix readdir_r with long file names + (CVE-2013-4237, bnc#834594) + ------------------------------------------------------------------- Tue Aug 6 09:20:04 UTC 2013 - schwab@suse.de diff --git a/glibc-testsuite.spec b/glibc-testsuite.spec index 4a90773..a632ae6 100644 --- a/glibc-testsuite.spec +++ b/glibc-testsuite.spec @@ -106,13 +106,14 @@ BuildRequires: gd-devel # 2.6.16 is the SLES 10 kernel, use this as oldest supported kernel %define enablekernel 2.6.16 %endif -Version: 2.17.90 +Version: 2.18 Release: 0 -%define glibc_major_version 2.17.90 -%define git_id 85891acadf1b +%define glibc_major_version 2.18 +%define git_id %{nil} Url: http://www.gnu.org/software/libc/libc.html BuildRoot: %{_tmppath}/%{name}-%{version}-build -Source: glibc-%{version}-%{git_id}.tar.xz +#Source: glibc-%{version}-%{git_id}.tar.xz +Source: http://ftp.gnu.org/pub/gnu/glibc/glibc-%{version}.tar.xz Source3: noversion.tar.bz2 Source4: manpages.tar.bz2 Source5: nsswitch.conf @@ -257,6 +258,10 @@ Patch2018: i686-strcasecmp-fallback.patch Patch2019: pldd-wait-ptrace-stop.patch # PATCH-FIX-UPSTREAM Fix cbrtl for ldbl-96 Patch2020: cbrtl-ldbl-96.patch +# PATCH-FIX-UPSTREAM Fix buffer overflow in strcoll (CVE-2012-4412) +Patch2021: strcoll-overflow.patch +# PATCH-FIX-UPSTREAM Fix readdir_r with long file names (CVE-2013-4237) +Patch2022: readdir_r-overflow.patch # Non-glibc patches # PATCH-FIX-OPENSUSE Remove debianisms from manpages @@ -465,6 +470,8 @@ rm nscd/s-stamp %patch2018 -p1 %patch2019 -p1 %patch2020 -p1 +%patch2021 -p1 +%patch2022 -p1 %patch3000 diff --git a/glibc-utils.changes b/glibc-utils.changes index 21cb864..570e25d 100644 --- a/glibc-utils.changes +++ b/glibc-utils.changes @@ -1,3 +1,13 @@ +------------------------------------------------------------------- +Tue Aug 13 07:24:59 UTC 2013 - schwab@suse.de + +- Update to glibc 2.18 release + * No source change +- strcoll-overflow.patch: fix buffer overflow in strcoll (CVE-2012-4412, + bnc#779320) +- readdir_r-overflow.patch: fix readdir_r with long file names + (CVE-2013-4237, bnc#834594) + ------------------------------------------------------------------- Tue Aug 6 09:20:04 UTC 2013 - schwab@suse.de diff --git a/glibc-utils.spec b/glibc-utils.spec index 106fdfd..1f295de 100644 --- a/glibc-utils.spec +++ b/glibc-utils.spec @@ -105,13 +105,14 @@ BuildRequires: gd-devel # 2.6.16 is the SLES 10 kernel, use this as oldest supported kernel %define enablekernel 2.6.16 %endif -Version: 2.17.90 +Version: 2.18 Release: 0 -%define glibc_major_version 2.17.90 -%define git_id 85891acadf1b +%define glibc_major_version 2.18 +%define git_id %{nil} Url: http://www.gnu.org/software/libc/libc.html BuildRoot: %{_tmppath}/%{name}-%{version}-build -Source: glibc-%{version}-%{git_id}.tar.xz +#Source: glibc-%{version}-%{git_id}.tar.xz +Source: http://ftp.gnu.org/pub/gnu/glibc/glibc-%{version}.tar.xz Source3: noversion.tar.bz2 Source4: manpages.tar.bz2 Source5: nsswitch.conf @@ -256,6 +257,10 @@ Patch2018: i686-strcasecmp-fallback.patch Patch2019: pldd-wait-ptrace-stop.patch # PATCH-FIX-UPSTREAM Fix cbrtl for ldbl-96 Patch2020: cbrtl-ldbl-96.patch +# PATCH-FIX-UPSTREAM Fix buffer overflow in strcoll (CVE-2012-4412) +Patch2021: strcoll-overflow.patch +# PATCH-FIX-UPSTREAM Fix readdir_r with long file names (CVE-2013-4237) +Patch2022: readdir_r-overflow.patch # Non-glibc patches # PATCH-FIX-OPENSUSE Remove debianisms from manpages @@ -465,6 +470,8 @@ rm nscd/s-stamp %patch2018 -p1 %patch2019 -p1 %patch2020 -p1 +%patch2021 -p1 +%patch2022 -p1 %patch3000 diff --git a/glibc.changes b/glibc.changes index 21cb864..570e25d 100644 --- a/glibc.changes +++ b/glibc.changes @@ -1,3 +1,13 @@ +------------------------------------------------------------------- +Tue Aug 13 07:24:59 UTC 2013 - schwab@suse.de + +- Update to glibc 2.18 release + * No source change +- strcoll-overflow.patch: fix buffer overflow in strcoll (CVE-2012-4412, + bnc#779320) +- readdir_r-overflow.patch: fix readdir_r with long file names + (CVE-2013-4237, bnc#834594) + ------------------------------------------------------------------- Tue Aug 6 09:20:04 UTC 2013 - schwab@suse.de diff --git a/glibc.spec b/glibc.spec index 9fb08fe..515a9b9 100644 --- a/glibc.spec +++ b/glibc.spec @@ -106,13 +106,14 @@ BuildRequires: gd-devel # 2.6.16 is the SLES 10 kernel, use this as oldest supported kernel %define enablekernel 2.6.16 %endif -Version: 2.17.90 +Version: 2.18 Release: 0 -%define glibc_major_version 2.17.90 -%define git_id 85891acadf1b +%define glibc_major_version 2.18 +%define git_id %{nil} Url: http://www.gnu.org/software/libc/libc.html BuildRoot: %{_tmppath}/%{name}-%{version}-build -Source: glibc-%{version}-%{git_id}.tar.xz +#Source: glibc-%{version}-%{git_id}.tar.xz +Source: http://ftp.gnu.org/pub/gnu/glibc/glibc-%{version}.tar.xz Source3: noversion.tar.bz2 Source4: manpages.tar.bz2 Source5: nsswitch.conf @@ -257,6 +258,10 @@ Patch2018: i686-strcasecmp-fallback.patch Patch2019: pldd-wait-ptrace-stop.patch # PATCH-FIX-UPSTREAM Fix cbrtl for ldbl-96 Patch2020: cbrtl-ldbl-96.patch +# PATCH-FIX-UPSTREAM Fix buffer overflow in strcoll (CVE-2012-4412) +Patch2021: strcoll-overflow.patch +# PATCH-FIX-UPSTREAM Fix readdir_r with long file names (CVE-2013-4237) +Patch2022: readdir_r-overflow.patch # Non-glibc patches # PATCH-FIX-OPENSUSE Remove debianisms from manpages @@ -465,6 +470,8 @@ rm nscd/s-stamp %patch2018 -p1 %patch2019 -p1 %patch2020 -p1 +%patch2021 -p1 +%patch2022 -p1 %patch3000 diff --git a/readdir_r-overflow.patch b/readdir_r-overflow.patch new file mode 100644 index 0000000..b2e3182 --- /dev/null +++ b/readdir_r-overflow.patch @@ -0,0 +1,297 @@ +2013-06-12 Florian Weimer + + [BZ #14699] + * sysdeps/posix/dirstream.h (struct __dirstream): Add errcode + member. + * sysdeps/posix/opendir.c (__alloc_dir): Initialize errcode + member. + * sysdeps/posix/rewinddir.c (rewinddir): Reset errcode member. + * sysdeps/posix/readdir_r.c (__READDIR_R): Enforce NAME_MAX limit. + Return delayed error code. Remove GETDENTS_64BIT_ALIGNED + conditional. + * sysdeps/unix/sysv/linux/wordsize-64/readdir_r.c: Do not define + GETDENTS_64BIT_ALIGNED. + * sysdeps/unix/sysv/linux/i386/readdir64_r.c: Likewise. + * manual/filesys.texi (Reading/Closing Directory): Document + ENAMETOOLONG return value of readdir_r. Recommend readdir more + strongly. + * manual/conf.texi (Limits for Files): Add portability note to + NAME_MAX, PATH_MAX. + (Pathconf): Add portability note for _PC_NAME_MAX, _PC_PATH_MAX. + +Index: glibc-2.18/manual/conf.texi +=================================================================== +--- glibc-2.18.orig/manual/conf.texi ++++ glibc-2.18/manual/conf.texi +@@ -1149,6 +1149,9 @@ typed ahead as input. @xref{I/O Queues} + @deftypevr Macro int NAME_MAX + The uniform system limit (if any) for the length of a file name component, not + including the terminating null character. ++ ++@strong{Portability Note:} On some systems, @theglibc{} defines ++@code{NAME_MAX}, but does not actually enforce this limit. + @end deftypevr + + @comment limits.h +@@ -1157,6 +1160,9 @@ including the terminating null character + The uniform system limit (if any) for the length of an entire file name (that + is, the argument given to system calls such as @code{open}), including the + terminating null character. ++ ++@strong{Portability Note:} @Theglibc{} does not enforce this limit ++even if @code{PATH_MAX} is defined. + @end deftypevr + + @cindex limits, pipe buffer size +@@ -1476,6 +1482,9 @@ Inquire about the value of @code{POSIX_R + Inquire about the value of @code{POSIX_REC_XFER_ALIGN}. + @end table + ++@strong{Portability Note:} On some systems, @theglibc{} does not ++enforce @code{_PC_NAME_MAX} or @code{_PC_PATH_MAX} limits. ++ + @node Utility Limits + @section Utility Program Capacity Limits + +Index: glibc-2.18/manual/filesys.texi +=================================================================== +--- glibc-2.18.orig/manual/filesys.texi ++++ glibc-2.18/manual/filesys.texi +@@ -444,9 +444,9 @@ symbols are declared in the header file + @comment POSIX.1 + @deftypefun {struct dirent *} readdir (DIR *@var{dirstream}) + This function reads the next entry from the directory. It normally +-returns a pointer to a structure containing information about the file. +-This structure is statically allocated and can be rewritten by a +-subsequent call. ++returns a pointer to a structure containing information about the ++file. This structure is associated with the @var{dirstream} handle ++and can be rewritten by a subsequent call. + + @strong{Portability Note:} On some systems @code{readdir} may not + return entries for @file{.} and @file{..}, even though these are always +@@ -461,19 +461,59 @@ conditions are defined for this function + The @var{dirstream} argument is not valid. + @end table + +-@code{readdir} is not thread safe. Multiple threads using +-@code{readdir} on the same @var{dirstream} may overwrite the return +-value. Use @code{readdir_r} when this is critical. ++To distinguish between an end-of-directory condition or an error, you ++must set @code{errno} to zero before calling @code{readdir}. To avoid ++entering an infinite loop, you should stop reading from the directory ++after the first error. ++ ++In POSIX.1-2008, @code{readdir} is not thread-safe. In @theglibc{} ++implementation, it is safe to call @code{readdir} concurrently on ++different @var{dirstream}s (but multiple threads accessing the same ++@var{dirstream} result in undefined behavior). @code{readdir_r} is a ++fully thread-safe alternative, but suffers from poor portability (see ++below). It is recommended that you use @code{readdir}, with external ++locking if multiple threads access the same @var{dirstream}. + @end deftypefun + + @comment dirent.h + @comment GNU + @deftypefun int readdir_r (DIR *@var{dirstream}, struct dirent *@var{entry}, struct dirent **@var{result}) +-This function is the reentrant version of @code{readdir}. Like +-@code{readdir} it returns the next entry from the directory. But to +-prevent conflicts between simultaneously running threads the result is +-not stored in statically allocated memory. Instead the argument +-@var{entry} points to a place to store the result. ++This function is a version of @code{readdir} which performs internal ++locking. Like @code{readdir} it returns the next entry from the ++directory. To prevent conflicts between simultaneously running ++threads the result is stored inside the @var{entry} object. ++ ++@strong{Portability Note:} It is recommended to use @code{readdir} ++instead of @code{readdir_r} for the following reasons: ++ ++@itemize @bullet ++@item ++On systems which do not define @code{NAME_MAX}, it may not be possible ++to use @code{readdir_r} safely because the caller does not specify the ++length of the buffer for the directory entry. ++ ++@item ++On some systems, @code{readdir_r} cannot read directory entries with ++very long names. If such a name is encountered, @theglibc{} ++implementation of @code{readdir_r} returns with an error code of ++@code{ENAMETOOLONG} after the final directory entry has been read. On ++other systems, @code{readdir_r} may return successfully, but the ++@code{d_name} member may not be NUL-terminated or may be truncated. ++ ++@item ++POSIX-1.2008 does not guarantee that @code{readdir} is thread-safe, ++even when access to the same @var{dirstream} is serialized. But in ++current implementations (including @theglibc{}), it is safe to call ++@code{readdir} concurrently on different @var{dirstream}s, so there is ++no requirement to use @code{readdir_r} even in multi-threaded ++programs. ++ ++@item ++It is expected that future versions of POSIX will obsolete ++@code{readdir_r} and mandate the level of thread safety for ++@code{readdir} which is provided by @theglibc{} and other ++implementations today. ++@end itemize + + Normally @code{readdir_r} returns zero and sets @code{*@var{result}} + to @var{entry}. If there are no more entries in the directory or an +@@ -481,15 +521,6 @@ error is detected, @code{readdir_r} sets + null pointer and returns a nonzero error code, also stored in + @code{errno}, as described for @code{readdir}. + +-@strong{Portability Note:} On some systems @code{readdir_r} may not +-return a NUL terminated string for the file name, even when there is no +-@code{d_reclen} field in @code{struct dirent} and the file +-name is the maximum allowed size. Modern systems all have the +-@code{d_reclen} field, and on old systems multi-threading is not +-critical. In any case there is no such problem with the @code{readdir} +-function, so that even on systems without the @code{d_reclen} member one +-could use multiple threads by using external locking. +- + It is also important to look at the definition of the @code{struct + dirent} type. Simply passing a pointer to an object of this type for + the second parameter of @code{readdir_r} might not be enough. Some +Index: glibc-2.18/sysdeps/posix/dirstream.h +=================================================================== +--- glibc-2.18.orig/sysdeps/posix/dirstream.h ++++ glibc-2.18/sysdeps/posix/dirstream.h +@@ -39,6 +39,8 @@ struct __dirstream + + off_t filepos; /* Position of next entry to read. */ + ++ int errcode; /* Delayed error code. */ ++ + /* Directory block. */ + char data[0] __attribute__ ((aligned (__alignof__ (void*)))); + }; +Index: glibc-2.18/sysdeps/posix/opendir.c +=================================================================== +--- glibc-2.18.orig/sysdeps/posix/opendir.c ++++ glibc-2.18/sysdeps/posix/opendir.c +@@ -231,6 +231,7 @@ __alloc_dir (int fd, bool close_fd, int + dirp->size = 0; + dirp->offset = 0; + dirp->filepos = 0; ++ dirp->errcode = 0; + + return dirp; + } +Index: glibc-2.18/sysdeps/posix/readdir_r.c +=================================================================== +--- glibc-2.18.orig/sysdeps/posix/readdir_r.c ++++ glibc-2.18/sysdeps/posix/readdir_r.c +@@ -40,6 +40,7 @@ __READDIR_R (DIR *dirp, DIRENT_TYPE *ent + DIRENT_TYPE *dp; + size_t reclen; + const int saved_errno = errno; ++ int ret; + + __libc_lock_lock (dirp->lock); + +@@ -70,10 +71,10 @@ __READDIR_R (DIR *dirp, DIRENT_TYPE *ent + bytes = 0; + __set_errno (saved_errno); + } ++ if (bytes < 0) ++ dirp->errcode = errno; + + dp = NULL; +- /* Reclen != 0 signals that an error occurred. */ +- reclen = bytes != 0; + break; + } + dirp->size = (size_t) bytes; +@@ -106,29 +107,46 @@ __READDIR_R (DIR *dirp, DIRENT_TYPE *ent + dirp->filepos += reclen; + #endif + +- /* Skip deleted files. */ ++#ifdef NAME_MAX ++ if (reclen > offsetof (DIRENT_TYPE, d_name) + NAME_MAX + 1) ++ { ++ /* The record is very long. It could still fit into the ++ caller-supplied buffer if we can skip padding at the ++ end. */ ++ size_t namelen = _D_EXACT_NAMLEN (dp); ++ if (namelen <= NAME_MAX) ++ reclen = offsetof (DIRENT_TYPE, d_name) + namelen + 1; ++ else ++ { ++ /* The name is too long. Ignore this file. */ ++ dirp->errcode = ENAMETOOLONG; ++ dp->d_ino = 0; ++ continue; ++ } ++ } ++#endif ++ ++ /* Skip deleted and ignored files. */ + } + while (dp->d_ino == 0); + + if (dp != NULL) + { +-#ifdef GETDENTS_64BIT_ALIGNED +- /* The d_reclen value might include padding which is not part of +- the DIRENT_TYPE data structure. */ +- reclen = MIN (reclen, +- offsetof (DIRENT_TYPE, d_name) + sizeof (dp->d_name)); +-#endif + *result = memcpy (entry, dp, reclen); +-#ifdef GETDENTS_64BIT_ALIGNED ++#ifdef _DIRENT_HAVE_D_RECLEN + entry->d_reclen = reclen; + #endif ++ ret = 0; + } + else +- *result = NULL; ++ { ++ *result = NULL; ++ ret = dirp->errcode; ++ } + + __libc_lock_unlock (dirp->lock); + +- return dp != NULL ? 0 : reclen ? errno : 0; ++ return ret; + } + + #ifdef __READDIR_R_ALIAS +Index: glibc-2.18/sysdeps/posix/rewinddir.c +=================================================================== +--- glibc-2.18.orig/sysdeps/posix/rewinddir.c ++++ glibc-2.18/sysdeps/posix/rewinddir.c +@@ -33,6 +33,7 @@ rewinddir (dirp) + dirp->filepos = 0; + dirp->offset = 0; + dirp->size = 0; ++ dirp->errcode = 0; + #ifndef NOT_IN_libc + __libc_lock_unlock (dirp->lock); + #endif +Index: glibc-2.18/sysdeps/unix/sysv/linux/i386/readdir64_r.c +=================================================================== +--- glibc-2.18.orig/sysdeps/unix/sysv/linux/i386/readdir64_r.c ++++ glibc-2.18/sysdeps/unix/sysv/linux/i386/readdir64_r.c +@@ -18,7 +18,6 @@ + #define __READDIR_R __readdir64_r + #define __GETDENTS __getdents64 + #define DIRENT_TYPE struct dirent64 +-#define GETDENTS_64BIT_ALIGNED 1 + + #include + +Index: glibc-2.18/sysdeps/unix/sysv/linux/wordsize-64/readdir_r.c +=================================================================== +--- glibc-2.18.orig/sysdeps/unix/sysv/linux/wordsize-64/readdir_r.c ++++ glibc-2.18/sysdeps/unix/sysv/linux/wordsize-64/readdir_r.c +@@ -1,5 +1,4 @@ + #define readdir64_r __no_readdir64_r_decl +-#define GETDENTS_64BIT_ALIGNED 1 + #include + #undef readdir64_r + weak_alias (__readdir_r, readdir64_r) diff --git a/strcoll-overflow.patch b/strcoll-overflow.patch new file mode 100644 index 0000000..5fa0fde --- /dev/null +++ b/strcoll-overflow.patch @@ -0,0 +1,1026 @@ +Siddhesh Poyarekar + + [BZ #14547] + * string/strcoll_l.c (STRCOLL): Skip allocating memory for + cache if string sizes may cause integer overflow. + + [BZ #14547] + * string/strcoll_l.c (coll_seq): New members rule, idx, + save_idx and back_us. + (get_next_seq_nocache): New function. + (do_compare_nocache): New function. + (STRCOLL): Use get_next_seq_nocache and do_compare_nocache + when malloc fails. + + [BZ #14547] + * string/strcoll_l.c (coll_seq): New structure. + (get_next_seq_cached): New function. + (get_next_seq): New function. + (do_compare): New function. + (STRCOLL): Use GNU style definition. Simplify implementation + by using get_next_seq, get_next_seq_cached and do_copare. + +Index: glibc-2.17.90/string/strcoll_l.c +=================================================================== +--- glibc-2.17.90.orig/string/strcoll_l.c ++++ glibc-2.17.90/string/strcoll_l.c +@@ -41,11 +41,434 @@ + + #include "../locale/localeinfo.h" + ++/* Track status while looking for sequences in a string. */ ++typedef struct ++{ ++ int len; /* Length of the current sequence. */ ++ int val; /* Position of the sequence relative to the ++ previous non-ignored sequence. */ ++ size_t idxnow; /* Current index in sequences. */ ++ size_t idxmax; /* Maximum index in sequences. */ ++ size_t idxcnt; /* Current count of indeces. */ ++ size_t backw; /* Current Backward sequence index. */ ++ size_t backw_stop; /* Index where the backward sequences stop. */ ++ const USTRING_TYPE *us; /* The string. */ ++ int32_t *idxarr; /* Array to cache weight indeces. */ ++ unsigned char *rulearr; /* Array to cache rules. */ ++ unsigned char rule; /* Saved rule for the first sequence. */ ++ int32_t idx; /* Index to weight of the current sequence. */ ++ int32_t save_idx; /* Save looked up index of a forward ++ sequence after the last backward ++ sequence. */ ++ const USTRING_TYPE *back_us; /* Beginning of the backward sequence. */ ++} coll_seq; ++ ++/* Get next sequence. The weight indeces are cached, so we don't need to ++ traverse the string. */ ++static void ++get_next_seq_cached (coll_seq *seq, int nrules, int pass, ++ const unsigned char *rulesets, ++ const USTRING_TYPE *weights) ++{ ++ int val = seq->val = 0; ++ int len = seq->len; ++ size_t backw_stop = seq->backw_stop; ++ size_t backw = seq->backw; ++ size_t idxcnt = seq->idxcnt; ++ size_t idxmax = seq->idxmax; ++ size_t idxnow = seq->idxnow; ++ unsigned char *rulearr = seq->rulearr; ++ int32_t *idxarr = seq->idxarr; ++ ++ while (len == 0) ++ { ++ ++val; ++ if (backw_stop != ~0ul) ++ { ++ /* The is something pushed. */ ++ if (backw == backw_stop) ++ { ++ /* The last pushed character was handled. Continue ++ with forward characters. */ ++ if (idxcnt < idxmax) ++ { ++ idxnow = idxcnt; ++ backw_stop = ~0ul; ++ } ++ else ++ { ++ /* Nothing anymore. The backward sequence ++ ended with the last sequence in the string. */ ++ idxnow = ~0ul; ++ break; ++ } ++ } ++ else ++ idxnow = --backw; ++ } ++ else ++ { ++ backw_stop = idxcnt; ++ ++ while (idxcnt < idxmax) ++ { ++ if ((rulesets[rulearr[idxcnt] * nrules + pass] ++ & sort_backward) == 0) ++ /* No more backward characters to push. */ ++ break; ++ ++idxcnt; ++ } ++ ++ if (backw_stop == idxcnt) ++ { ++ /* No sequence at all or just one. */ ++ if (idxcnt == idxmax) ++ /* Note that seq1len is still zero. */ ++ break; ++ ++ backw_stop = ~0ul; ++ idxnow = idxcnt++; ++ } ++ else ++ /* We pushed backward sequences. */ ++ idxnow = backw = idxcnt - 1; ++ } ++ len = weights[idxarr[idxnow]++]; ++ } ++ ++ /* Update the structure. */ ++ seq->val = val; ++ seq->len = len; ++ seq->backw_stop = backw_stop; ++ seq->backw = backw; ++ seq->idxcnt = idxcnt; ++ seq->idxnow = idxnow; ++} ++ ++/* Get next sequence. Traverse the string as required. */ ++static void ++get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets, ++ const USTRING_TYPE *weights, const int32_t *table, ++ const USTRING_TYPE *extra, const int32_t *indirect) ++{ ++#include WEIGHT_H ++ int val = seq->val = 0; ++ int len = seq->len; ++ size_t backw_stop = seq->backw_stop; ++ size_t backw = seq->backw; ++ size_t idxcnt = seq->idxcnt; ++ size_t idxmax = seq->idxmax; ++ size_t idxnow = seq->idxnow; ++ unsigned char *rulearr = seq->rulearr; ++ int32_t *idxarr = seq->idxarr; ++ const USTRING_TYPE *us = seq->us; ++ ++ while (len == 0) ++ { ++ ++val; ++ if (backw_stop != ~0ul) ++ { ++ /* The is something pushed. */ ++ if (backw == backw_stop) ++ { ++ /* The last pushed character was handled. Continue ++ with forward characters. */ ++ if (idxcnt < idxmax) ++ { ++ idxnow = idxcnt; ++ backw_stop = ~0ul; ++ } ++ else ++ /* Nothing anymore. The backward sequence ended with ++ the last sequence in the string. Note that seq2len ++ is still zero. */ ++ break; ++ } ++ else ++ idxnow = --backw; ++ } ++ else ++ { ++ backw_stop = idxmax; ++ ++ while (*us != L('\0')) ++ { ++ int32_t tmp = findidx (&us, -1); ++ rulearr[idxmax] = tmp >> 24; ++ idxarr[idxmax] = tmp & 0xffffff; ++ idxcnt = idxmax++; ++ ++ if ((rulesets[rulearr[idxcnt] * nrules] ++ & sort_backward) == 0) ++ /* No more backward characters to push. */ ++ break; ++ ++idxcnt; ++ } ++ ++ if (backw_stop >= idxcnt) ++ { ++ /* No sequence at all or just one. */ ++ if (idxcnt == idxmax || backw_stop > idxcnt) ++ /* Note that seq1len is still zero. */ ++ break; ++ ++ backw_stop = ~0ul; ++ idxnow = idxcnt; ++ } ++ else ++ /* We pushed backward sequences. */ ++ idxnow = backw = idxcnt - 1; ++ } ++ len = weights[idxarr[idxnow]++]; ++ } ++ ++ /* Update the structure. */ ++ seq->val = val; ++ seq->len = len; ++ seq->backw_stop = backw_stop; ++ seq->backw = backw; ++ seq->idxcnt = idxcnt; ++ seq->idxmax = idxmax; ++ seq->idxnow = idxnow; ++ seq->us = us; ++} ++ ++/* Get next sequence. Traverse the string as required. This function does not ++ set or use any index or rule cache. */ ++static void ++get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char *rulesets, ++ const USTRING_TYPE *weights, const int32_t *table, ++ const USTRING_TYPE *extra, const int32_t *indirect, ++ int pass) ++{ ++#include WEIGHT_H ++ int val = seq->val = 0; ++ int len = seq->len; ++ size_t backw_stop = seq->backw_stop; ++ size_t backw = seq->backw; ++ size_t idxcnt = seq->idxcnt; ++ size_t idxmax = seq->idxmax; ++ int32_t idx = seq->idx; ++ const USTRING_TYPE *us = seq->us; ++ ++ while (len == 0) ++ { ++ ++val; ++ if (backw_stop != ~0ul) ++ { ++ /* The is something pushed. */ ++ if (backw == backw_stop) ++ { ++ /* The last pushed character was handled. Continue ++ with forward characters. */ ++ if (idxcnt < idxmax) ++ { ++ idx = seq->save_idx; ++ backw_stop = ~0ul; ++ } ++ else ++ { ++ /* Nothing anymore. The backward sequence ended with ++ the last sequence in the string. Note that len is ++ still zero. */ ++ idx = 0; ++ break; ++ } ++ } ++ else ++ { ++ /* XXX Traverse BACKW sequences from the beginning of ++ BACKW_STOP to get the next sequence. Is ther a quicker way ++ to do this? */ ++ int i = backw_stop; ++ us = seq->back_us; ++ while (i < backw) ++ { ++ int32_t tmp = findidx (&us, -1); ++ idx = tmp & 0xffffff; ++ i++; ++ } ++ --backw; ++ us = seq->us; ++ } ++ } ++ else ++ { ++ backw_stop = idxmax; ++ int32_t prev_idx = idx; ++ ++ while (*us != L('\0')) ++ { ++ int32_t tmp = findidx (&us, -1); ++ unsigned char rule = tmp >> 24; ++ prev_idx = idx; ++ idx = tmp & 0xffffff; ++ idxcnt = idxmax++; ++ ++ /* Save the rule for the first sequence. */ ++ if (__glibc_unlikely (idxcnt == 0)) ++ seq->rule = rule; ++ ++ if ((rulesets[rule * nrules + pass] ++ & sort_backward) == 0) ++ /* No more backward characters to push. */ ++ break; ++ ++idxcnt; ++ } ++ ++ if (backw_stop >= idxcnt) ++ { ++ /* No sequence at all or just one. */ ++ if (idxcnt == idxmax || backw_stop > idxcnt) ++ /* Note that len is still zero. */ ++ break; ++ ++ backw_stop = ~0ul; ++ } ++ else ++ { ++ /* We pushed backward sequences. If the stream ended with the ++ backward sequence, then we process the last sequence we ++ found. Otherwise we process the sequence before the last ++ one since the last one was a forward sequence. */ ++ seq->back_us = seq->us; ++ seq->us = us; ++ backw = idxcnt; ++ if (idxmax > idxcnt) ++ { ++ backw--; ++ seq->save_idx = idx; ++ idx = prev_idx; ++ } ++ if (backw > backw_stop) ++ backw--; ++ } ++ } ++ ++ len = weights[idx++]; ++ /* Skip over indeces of previous levels. */ ++ for (int i = 0; i < pass; i++) ++ { ++ idx += len; ++ len = weights[idx]; ++ idx++; ++ } ++ } ++ ++ /* Update the structure. */ ++ seq->val = val; ++ seq->len = len; ++ seq->backw_stop = backw_stop; ++ seq->backw = backw; ++ seq->idxcnt = idxcnt; ++ seq->idxmax = idxmax; ++ seq->us = us; ++ seq->idx = idx; ++} ++ ++/* Compare two sequences. This version does not use the index and rules ++ cache. */ ++static int ++do_compare_nocache (coll_seq *seq1, coll_seq *seq2, int position, ++ const USTRING_TYPE *weights) ++{ ++ int seq1len = seq1->len; ++ int seq2len = seq2->len; ++ int val1 = seq1->val; ++ int val2 = seq2->val; ++ int idx1 = seq1->idx; ++ int idx2 = seq2->idx; ++ int result = 0; ++ ++ /* Test for position if necessary. */ ++ if (position && val1 != val2) ++ { ++ result = val1 - val2; ++ goto out; ++ } ++ ++ /* Compare the two sequences. */ ++ do ++ { ++ if (weights[idx1] != weights[idx2]) ++ { ++ /* The sequences differ. */ ++ result = weights[idx1] - weights[idx2]; ++ goto out; ++ } ++ ++ /* Increment the offsets. */ ++ ++idx1; ++ ++idx2; ++ ++ --seq1len; ++ --seq2len; ++ } ++ while (seq1len > 0 && seq2len > 0); ++ ++ if (position && seq1len != seq2len) ++ result = seq1len - seq2len; ++ ++out: ++ seq1->len = seq1len; ++ seq2->len = seq2len; ++ seq1->idx = idx1; ++ seq2->idx = idx2; ++ return result; ++} ++ ++/* Compare two sequences using the index cache. */ ++static int ++do_compare (coll_seq *seq1, coll_seq *seq2, int position, ++ const USTRING_TYPE *weights) ++{ ++ int seq1len = seq1->len; ++ int seq2len = seq2->len; ++ int val1 = seq1->val; ++ int val2 = seq2->val; ++ int32_t *idx1arr = seq1->idxarr; ++ int32_t *idx2arr = seq2->idxarr; ++ int idx1now = seq1->idxnow; ++ int idx2now = seq2->idxnow; ++ int result = 0; ++ ++ /* Test for position if necessary. */ ++ if (position && val1 != val2) ++ { ++ result = val1 - val2; ++ goto out; ++ } ++ ++ /* Compare the two sequences. */ ++ do ++ { ++ if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]]) ++ { ++ /* The sequences differ. */ ++ result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]]; ++ goto out; ++ } ++ ++ /* Increment the offsets. */ ++ ++idx1arr[idx1now]; ++ ++idx2arr[idx2now]; ++ ++ --seq1len; ++ --seq2len; ++ } ++ while (seq1len > 0 && seq2len > 0); ++ ++ if (position && seq1len != seq2len) ++ result = seq1len - seq2len; ++ ++out: ++ seq1->len = seq1len; ++ seq2->len = seq2len; ++ return result; ++} ++ + int +-STRCOLL (s1, s2, l) +- const STRING_TYPE *s1; +- const STRING_TYPE *s2; +- __locale_t l; ++STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l) + { + struct __locale_data *current = l->__locales[LC_COLLATE]; + uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; +@@ -56,34 +479,6 @@ STRCOLL (s1, s2, l) + const USTRING_TYPE *weights; + const USTRING_TYPE *extra; + const int32_t *indirect; +- uint_fast32_t pass; +- int result = 0; +- const USTRING_TYPE *us1; +- const USTRING_TYPE *us2; +- size_t s1len; +- size_t s2len; +- int32_t *idx1arr; +- int32_t *idx2arr; +- unsigned char *rule1arr; +- unsigned char *rule2arr; +- size_t idx1max; +- size_t idx2max; +- size_t idx1cnt; +- size_t idx2cnt; +- size_t idx1now; +- size_t idx2now; +- size_t backw1_stop; +- size_t backw2_stop; +- size_t backw1; +- size_t backw2; +- int val1; +- int val2; +- int position; +- int seq1len; +- int seq2len; +- int use_malloc; +- +-#include WEIGHT_H + + if (nrules == 0) + return STRCMP (s1, s2); +@@ -98,7 +493,6 @@ STRCOLL (s1, s2, l) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; + indirect = (const int32_t *) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; +- use_malloc = 0; + + assert (((uintptr_t) table) % __alignof__ (table[0]) == 0); + assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0); +@@ -106,18 +500,13 @@ STRCOLL (s1, s2, l) + assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0); + + /* We need this a few times. */ +- s1len = STRLEN (s1); +- s2len = STRLEN (s2); ++ size_t s1len = STRLEN (s1); ++ size_t s2len = STRLEN (s2); + + /* Catch empty strings. */ +- if (__builtin_expect (s1len == 0, 0) || __builtin_expect (s2len == 0, 0)) ++ if (__glibc_unlikely (s1len == 0) || __glibc_unlikely (s2len == 0)) + return (s1len != 0) - (s2len != 0); + +- /* We need the elements of the strings as unsigned values since they +- are used as indeces. */ +- us1 = (const USTRING_TYPE *) s1; +- us2 = (const USTRING_TYPE *) s2; +- + /* Perform the first pass over the string and while doing this find + and store the weights for each character. Since we want this to + be as fast as possible we are using `alloca' to store the temporary +@@ -127,411 +516,124 @@ STRCOLL (s1, s2, l) + + Please note that the localedef programs makes sure that `position' + is not used at the first level. */ +- if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1))) +- { +- idx1arr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1)); +- idx2arr = &idx1arr[s1len]; +- rule1arr = (unsigned char *) &idx2arr[s2len]; +- rule2arr = &rule1arr[s1len]; +- +- if (idx1arr == NULL) +- /* No memory. Well, go with the stack then. +- +- XXX Once this implementation is stable we will handle this +- differently. Instead of precomputing the indeces we will +- do this in time. This means, though, that this happens for +- every pass again. */ +- goto try_stack; +- use_malloc = 1; +- } +- else +- { +- try_stack: +- idx1arr = (int32_t *) alloca (s1len * sizeof (int32_t)); +- idx2arr = (int32_t *) alloca (s2len * sizeof (int32_t)); +- rule1arr = (unsigned char *) alloca (s1len); +- rule2arr = (unsigned char *) alloca (s2len); +- } + +- idx1cnt = 0; +- idx2cnt = 0; +- idx1max = 0; +- idx2max = 0; +- idx1now = 0; +- idx2now = 0; +- backw1_stop = ~0ul; +- backw2_stop = ~0ul; +- backw1 = ~0ul; +- backw2 = ~0ul; +- seq1len = 0; +- seq2len = 0; +- position = rulesets[0] & sort_position; +- while (1) +- { +- val1 = 0; +- val2 = 0; ++ coll_seq seq1, seq2; ++ bool use_malloc = false; ++ int result = 0; + +- /* Get the next non-IGNOREd element for string `s1'. */ +- if (seq1len == 0) +- do +- { +- ++val1; +- +- if (backw1_stop != ~0ul) +- { +- /* The is something pushed. */ +- if (backw1 == backw1_stop) +- { +- /* The last pushed character was handled. Continue +- with forward characters. */ +- if (idx1cnt < idx1max) +- { +- idx1now = idx1cnt; +- backw1_stop = ~0ul; +- } +- else +- /* Nothing anymore. The backward sequence ended with +- the last sequence in the string. Note that seq1len +- is still zero. */ +- break; +- } +- else +- idx1now = --backw1; +- } +- else +- { +- backw1_stop = idx1max; +- +- while (*us1 != L('\0')) +- { +- int32_t tmp = findidx (&us1, -1); +- rule1arr[idx1max] = tmp >> 24; +- idx1arr[idx1max] = tmp & 0xffffff; +- idx1cnt = idx1max++; +- +- if ((rulesets[rule1arr[idx1cnt] * nrules] +- & sort_backward) == 0) +- /* No more backward characters to push. */ +- break; +- ++idx1cnt; +- } +- +- if (backw1_stop >= idx1cnt) +- { +- /* No sequence at all or just one. */ +- if (idx1cnt == idx1max || backw1_stop > idx1cnt) +- /* Note that seq1len is still zero. */ +- break; +- +- backw1_stop = ~0ul; +- idx1now = idx1cnt; +- } +- else +- /* We pushed backward sequences. */ +- idx1now = backw1 = idx1cnt - 1; +- } +- } +- while ((seq1len = weights[idx1arr[idx1now]++]) == 0); +- +- /* And the same for string `s2'. */ +- if (seq2len == 0) +- do +- { +- ++val2; +- +- if (backw2_stop != ~0ul) +- { +- /* The is something pushed. */ +- if (backw2 == backw2_stop) +- { +- /* The last pushed character was handled. Continue +- with forward characters. */ +- if (idx2cnt < idx2max) +- { +- idx2now = idx2cnt; +- backw2_stop = ~0ul; +- } +- else +- /* Nothing anymore. The backward sequence ended with +- the last sequence in the string. Note that seq2len +- is still zero. */ +- break; +- } +- else +- idx2now = --backw2; +- } +- else +- { +- backw2_stop = idx2max; +- +- while (*us2 != L('\0')) +- { +- int32_t tmp = findidx (&us2, -1); +- rule2arr[idx2max] = tmp >> 24; +- idx2arr[idx2max] = tmp & 0xffffff; +- idx2cnt = idx2max++; +- +- if ((rulesets[rule2arr[idx2cnt] * nrules] +- & sort_backward) == 0) +- /* No more backward characters to push. */ +- break; +- ++idx2cnt; +- } +- +- if (backw2_stop >= idx2cnt) +- { +- /* No sequence at all or just one. */ +- if (idx2cnt == idx2max || backw2_stop > idx2cnt) +- /* Note that seq1len is still zero. */ +- break; +- +- backw2_stop = ~0ul; +- idx2now = idx2cnt; +- } +- else +- /* We pushed backward sequences. */ +- idx2now = backw2 = idx2cnt - 1; +- } +- } +- while ((seq2len = weights[idx2arr[idx2now]++]) == 0); +- +- /* See whether any or both strings are empty. */ +- if (seq1len == 0 || seq2len == 0) +- { +- if (seq1len == seq2len) +- /* Both ended. So far so good, both strings are equal at the +- first level. */ +- break; +- +- /* This means one string is shorter than the other. Find out +- which one and return an appropriate value. */ +- result = seq1len == 0 ? -1 : 1; +- goto free_and_return; +- } +- +- /* Test for position if necessary. */ +- if (position && val1 != val2) +- { +- result = val1 - val2; +- goto free_and_return; +- } ++ memset (&seq1, 0, sizeof (seq1)); ++ seq2 = seq1; + +- /* Compare the two sequences. */ +- do +- { +- if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]]) +- { +- /* The sequences differ. */ +- result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]]; +- goto free_and_return; +- } ++ size_t size_max = SIZE_MAX / (sizeof (int32_t) + 1); + +- /* Increment the offsets. */ +- ++idx1arr[idx1now]; +- ++idx2arr[idx2now]; ++ /* If the strings are long enough to cause overflow in the size request, then ++ skip the allocation and proceed with the non-cached routines. */ ++ if (MIN (s1len, s2len) > size_max ++ || MAX (s1len, s2len) > size_max - MIN (s1len, s2len)) ++ goto begin_collate; + +- --seq1len; +- --seq2len; +- } +- while (seq1len > 0 && seq2len > 0); ++ if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1))) ++ { ++ seq1.idxarr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1)); + +- if (position && seq1len != seq2len) ++ /* If we failed to allocate memory, we leave everything as NULL so that ++ we use the nocache version of traversal and comparison functions. */ ++ if (seq1.idxarr != NULL) + { +- result = seq1len - seq2len; +- goto free_and_return; ++ seq2.idxarr = &seq1.idxarr[s1len]; ++ seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len]; ++ seq2.rulearr = &seq1.rulearr[s1len]; ++ use_malloc = true; + } + } ++ else ++ { ++ seq1.idxarr = (int32_t *) alloca (s1len * sizeof (int32_t)); ++ seq2.idxarr = (int32_t *) alloca (s2len * sizeof (int32_t)); ++ seq1.rulearr = (unsigned char *) alloca (s1len); ++ seq2.rulearr = (unsigned char *) alloca (s2len); ++ } ++ ++ int rule; + +- /* Now the remaining passes over the weights. We now use the +- indeces we found before. */ +- for (pass = 1; pass < nrules; ++pass) ++ begin_collate: ++ rule = 0; ++ /* Cache values in the first pass and if needed, use them in subsequent ++ passes. */ ++ for (int pass = 0; pass < nrules; ++pass) + { ++ seq1.idxcnt = 0; ++ seq1.idx = 0; ++ seq2.idx = 0; ++ seq1.backw_stop = ~0ul; ++ seq1.backw = ~0ul; ++ seq2.idxcnt = 0; ++ seq2.backw_stop = ~0ul; ++ seq2.backw = ~0ul; ++ ++ /* We need the elements of the strings as unsigned values since they ++ are used as indeces. */ ++ seq1.us = (const USTRING_TYPE *) s1; ++ seq2.us = (const USTRING_TYPE *) s2; ++ + /* We assume that if a rule has defined `position' in one section + this is true for all of them. */ +- idx1cnt = 0; +- idx2cnt = 0; +- backw1_stop = ~0ul; +- backw2_stop = ~0ul; +- backw1 = ~0ul; +- backw2 = ~0ul; +- position = rulesets[rule1arr[0] * nrules + pass] & sort_position; ++ int position = rulesets[rule * nrules + pass] & sort_position; + + while (1) + { +- val1 = 0; +- val2 = 0; +- +- /* Get the next non-IGNOREd element for string `s1'. */ +- if (seq1len == 0) +- do +- { +- ++val1; +- +- if (backw1_stop != ~0ul) +- { +- /* The is something pushed. */ +- if (backw1 == backw1_stop) +- { +- /* The last pushed character was handled. Continue +- with forward characters. */ +- if (idx1cnt < idx1max) +- { +- idx1now = idx1cnt; +- backw1_stop = ~0ul; +- } +- else +- { +- /* Nothing anymore. The backward sequence +- ended with the last sequence in the string. */ +- idx1now = ~0ul; +- break; +- } +- } +- else +- idx1now = --backw1; +- } +- else +- { +- backw1_stop = idx1cnt; +- +- while (idx1cnt < idx1max) +- { +- if ((rulesets[rule1arr[idx1cnt] * nrules + pass] +- & sort_backward) == 0) +- /* No more backward characters to push. */ +- break; +- ++idx1cnt; +- } +- +- if (backw1_stop == idx1cnt) +- { +- /* No sequence at all or just one. */ +- if (idx1cnt == idx1max) +- /* Note that seq1len is still zero. */ +- break; +- +- backw1_stop = ~0ul; +- idx1now = idx1cnt++; +- } +- else +- /* We pushed backward sequences. */ +- idx1now = backw1 = idx1cnt - 1; +- } +- } +- while ((seq1len = weights[idx1arr[idx1now]++]) == 0); +- +- /* And the same for string `s2'. */ +- if (seq2len == 0) +- do +- { +- ++val2; +- +- if (backw2_stop != ~0ul) +- { +- /* The is something pushed. */ +- if (backw2 == backw2_stop) +- { +- /* The last pushed character was handled. Continue +- with forward characters. */ +- if (idx2cnt < idx2max) +- { +- idx2now = idx2cnt; +- backw2_stop = ~0ul; +- } +- else +- { +- /* Nothing anymore. The backward sequence +- ended with the last sequence in the string. */ +- idx2now = ~0ul; +- break; +- } +- } +- else +- idx2now = --backw2; +- } +- else +- { +- backw2_stop = idx2cnt; +- +- while (idx2cnt < idx2max) +- { +- if ((rulesets[rule2arr[idx2cnt] * nrules + pass] +- & sort_backward) == 0) +- /* No more backward characters to push. */ +- break; +- ++idx2cnt; +- } +- +- if (backw2_stop == idx2cnt) +- { +- /* No sequence at all or just one. */ +- if (idx2cnt == idx2max) +- /* Note that seq2len is still zero. */ +- break; +- +- backw2_stop = ~0ul; +- idx2now = idx2cnt++; +- } +- else +- /* We pushed backward sequences. */ +- idx2now = backw2 = idx2cnt - 1; +- } +- } +- while ((seq2len = weights[idx2arr[idx2now]++]) == 0); ++ if (__glibc_unlikely (seq1.idxarr == NULL)) ++ { ++ get_next_seq_nocache (&seq1, nrules, rulesets, weights, table, ++ extra, indirect, pass); ++ get_next_seq_nocache (&seq2, nrules, rulesets, weights, table, ++ extra, indirect, pass); ++ } ++ else if (pass == 0) ++ { ++ get_next_seq (&seq1, nrules, rulesets, weights, table, extra, ++ indirect); ++ get_next_seq (&seq2, nrules, rulesets, weights, table, extra, ++ indirect); ++ } ++ else ++ { ++ get_next_seq_cached (&seq1, nrules, pass, rulesets, weights); ++ get_next_seq_cached (&seq2, nrules, pass, rulesets, weights); ++ } + + /* See whether any or both strings are empty. */ +- if (seq1len == 0 || seq2len == 0) ++ if (seq1.len == 0 || seq2.len == 0) + { +- if (seq1len == seq2len) ++ if (seq1.len == seq2.len) + /* Both ended. So far so good, both strings are equal + at this level. */ + break; + + /* This means one string is shorter than the other. Find out + which one and return an appropriate value. */ +- result = seq1len == 0 ? -1 : 1; ++ result = seq1.len == 0 ? -1 : 1; + goto free_and_return; + } + +- /* Test for position if necessary. */ +- if (position && val1 != val2) +- { +- result = val1 - val2; +- goto free_and_return; +- } +- +- /* Compare the two sequences. */ +- do +- { +- if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]]) +- { +- /* The sequences differ. */ +- result = (weights[idx1arr[idx1now]] +- - weights[idx2arr[idx2now]]); +- goto free_and_return; +- } +- +- /* Increment the offsets. */ +- ++idx1arr[idx1now]; +- ++idx2arr[idx2now]; +- +- --seq1len; +- --seq2len; +- } +- while (seq1len > 0 && seq2len > 0); +- +- if (position && seq1len != seq2len) +- { +- result = seq1len - seq2len; +- goto free_and_return; +- } ++ if (__glibc_unlikely (seq1.idxarr == NULL)) ++ result = do_compare_nocache (&seq1, &seq2, position, weights); ++ else ++ result = do_compare (&seq1, &seq2, position, weights); ++ if (result != 0) ++ goto free_and_return; + } ++ ++ if (__glibc_likely (seq1.rulearr != NULL)) ++ rule = seq1.rulearr[0]; ++ else ++ rule = seq1.rule; + } + + /* Free the memory if needed. */ + free_and_return: + if (use_malloc) +- free (idx1arr); ++ free (seq1.idxarr); + + return result; + }