2014-07-02 Florian Weimer * manual/locale.texi (Locale Names): New section documenting locale name syntax. Adjust menu and node chaining accordingly. (Choosing Locale): Reference Locale Names, Locale Categories. Mention setting LC_ALL=C. Reflect that name syntax is now documented. (Locale Categories): New section title. Reference Locale Names. LC_ALL is an environment variable, but not a category. (Setting the Locale): Remove "locale -a" invocation and LOCPATH description, now in Locale Name. Reference that section. Locale name syntax is now documented. [BZ #17137] * locale/findlocale.c (name_present, valid_locale_name): New functions. (_nl_find_locale): Use the loc_name variable to store name candidates. Call name_present and valid_locale_name to check and validate locale names. Return an error if the locale is invalid. * localedata/tst-setlocale3.c: New file. * localedata/Makefile (tests): Add tst-setlocale3. (tst-setlocale3-ENV): New variable. * locale/setlocale.c (setlocale): Use strdup for allocating composite name copy. Index: glibc-2.19/locale/findlocale.c =================================================================== --- glibc-2.19.orig/locale/findlocale.c +++ glibc-2.19/locale/findlocale.c @@ -17,6 +17,7 @@ . */ #include +#include #include #include #include @@ -57,6 +58,45 @@ struct loaded_l10nfile *_nl_locale_file_ const char _nl_default_locale_path[] attribute_hidden = LOCALEDIR; +/* Checks if the name is actually present, that is, not NULL and not + empty. */ +static inline int +name_present (const char *name) +{ + return name != NULL && name[0] != '\0'; +} + +/* Checks that the locale name neither extremely long, nor contains a + ".." path component (to prevent directory traversal). */ +static inline int +valid_locale_name (const char *name) +{ + /* Not set. */ + size_t namelen = strlen (name); + /* Name too long. The limit is arbitrary and prevents stack overflow + issues later. */ + if (__glibc_unlikely (namelen > 255)) + return 0; + /* Directory traversal attempt. */ + static const char slashdot[4] = {'/', '.', '.', '/'}; + if (__glibc_unlikely (memmem (name, namelen, + slashdot, sizeof (slashdot)) != NULL)) + return 0; + if (namelen == 2 && __glibc_unlikely (name[0] == '.' && name [1] == '.')) + return 0; + if (namelen >= 3 + && __glibc_unlikely (((name[0] == '.' + && name[1] == '.' + && name[2] == '/') + || (name[namelen - 3] == '/' + && name[namelen - 2] == '.' + && name[namelen - 1] == '.')))) + return 0; + /* If there is a slash in the name, it must start with one. */ + if (__glibc_unlikely (memchr (name, '/', namelen) != NULL) && name[0] != '/') + return 0; + return 1; +} struct __locale_data * internal_function @@ -65,7 +105,7 @@ _nl_find_locale (const char *locale_path { int mask; /* Name of the locale for this category. */ - char *loc_name; + char *loc_name = (char *) *name; const char *language; const char *modifier; const char *territory; @@ -73,31 +113,39 @@ _nl_find_locale (const char *locale_path const char *normalized_codeset; struct loaded_l10nfile *locale_file; - if ((*name)[0] == '\0') + if (loc_name[0] == '\0') { /* The user decides which locale to use by setting environment variables. */ - *name = getenv ("LC_ALL"); - if (*name == NULL || (*name)[0] == '\0') - *name = getenv (_nl_category_names.str + loc_name = getenv ("LC_ALL"); + if (!name_present (loc_name)) + loc_name = getenv (_nl_category_names.str + _nl_category_name_idxs[category]); - if (*name == NULL || (*name)[0] == '\0') - *name = getenv ("LANG"); + if (!name_present (loc_name)) + loc_name = getenv ("LANG"); + if (!name_present (loc_name)) + loc_name = (char *) _nl_C_name; } - if (*name == NULL || (*name)[0] == '\0' - || (__builtin_expect (__libc_enable_secure, 0) - && strchr (*name, '/') != NULL)) - *name = (char *) _nl_C_name; + /* We used to fall back to the C locale if the name contains a slash + character '/', but we now check for directory traversal in + valid_locale_name, so this is no longer necessary. */ - if (__builtin_expect (strcmp (*name, _nl_C_name), 1) == 0 - || __builtin_expect (strcmp (*name, _nl_POSIX_name), 1) == 0) + if (__builtin_expect (strcmp (loc_name, _nl_C_name), 1) == 0 + || __builtin_expect (strcmp (loc_name, _nl_POSIX_name), 1) == 0) { /* We need not load anything. The needed data is contained in the library itself. */ *name = (char *) _nl_C_name; return _nl_C[category]; } + else if (!valid_locale_name (loc_name)) + { + __set_errno (EINVAL); + return NULL; + } + + *name = loc_name; /* We really have to load some data. First we try the archive, but only if there was no LOCPATH environment variable specified. */ Index: glibc-2.19/locale/setlocale.c =================================================================== --- glibc-2.19.orig/locale/setlocale.c +++ glibc-2.19/locale/setlocale.c @@ -272,6 +272,8 @@ setlocale (int category, const char *loc of entries of the form `CATEGORY=VALUE'. */ const char *newnames[__LC_LAST]; struct __locale_data *newdata[__LC_LAST]; + /* Copy of the locale argument, for in-place splitting. */ + char *locale_copy = NULL; /* Set all name pointers to the argument name. */ for (category = 0; category < __LC_LAST; ++category) @@ -281,7 +283,13 @@ setlocale (int category, const char *loc if (__builtin_expect (strchr (locale, ';') != NULL, 0)) { /* This is a composite name. Make a copy and split it up. */ - char *np = strdupa (locale); + locale_copy = strdup (locale); + if (__glibc_unlikely (locale_copy == NULL)) + { + __libc_rwlock_unlock (__libc_setlocale_lock); + return NULL; + } + char *np = locale_copy; char *cp; int cnt; @@ -299,6 +307,7 @@ setlocale (int category, const char *loc { error_return: __libc_rwlock_unlock (__libc_setlocale_lock); + free (locale_copy); /* Bogus category name. */ ERROR_RETURN; @@ -391,8 +400,9 @@ setlocale (int category, const char *loc /* Critical section left. */ __libc_rwlock_unlock (__libc_setlocale_lock); - /* Free the resources (the locale path variable). */ + /* Free the resources. */ free (locale_path); + free (locale_copy); return composite; } Index: glibc-2.19/localedata/Makefile =================================================================== --- glibc-2.19.orig/localedata/Makefile +++ glibc-2.19/localedata/Makefile @@ -77,7 +77,7 @@ locale_test_suite := tst_iswalnum tst_is tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \ tst-leaks tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \ - tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 + tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3 tests-static = bug-setlocale1-static tests += $(tests-static) ifeq (yes,$(build-shared)) @@ -292,6 +292,7 @@ bug-setlocale1-ARGS = -- $(host-test-pro bug-setlocale1-static-ENV = $(bug-setlocale1-ENV) bug-setlocale1-static-ARGS = $(bug-setlocale1-ARGS) tst-setlocale2-ENV = LOCPATH=$(common-objpfx)localedata +tst-setlocale3-ENV = LOCPATH=$(common-objpfx)localedata $(objdir)/iconvdata/gconv-modules: $(MAKE) -C ../iconvdata subdir=iconvdata $@ Index: glibc-2.19/localedata/tst-setlocale3.c =================================================================== --- /dev/null +++ glibc-2.19/localedata/tst-setlocale3.c @@ -0,0 +1,203 @@ +/* Regression test for setlocale invalid environment variable handling. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +/* The result of setlocale may be overwritten by subsequent calls, so + this wrapper makes a copy. */ +static char * +setlocale_copy (int category, const char *locale) +{ + const char *result = setlocale (category, locale); + if (result == NULL) + return NULL; + return strdup (result); +} + +static char *de_locale; + +static void +setlocale_fail (const char *envstring) +{ + setenv ("LC_CTYPE", envstring, 1); + if (setlocale (LC_CTYPE, "") != NULL) + { + printf ("unexpected setlocale success for \"%s\" locale\n", envstring); + exit (1); + } + const char *newloc = setlocale (LC_CTYPE, NULL); + if (strcmp (newloc, de_locale) != 0) + { + printf ("failed setlocale call \"%s\" changed locale to \"%s\"\n", + envstring, newloc); + exit (1); + } +} + +static void +setlocale_success (const char *envstring) +{ + setenv ("LC_CTYPE", envstring, 1); + char *newloc = setlocale_copy (LC_CTYPE, ""); + if (newloc == NULL) + { + printf ("setlocale for \"%s\": %m\n", envstring); + exit (1); + } + if (strcmp (newloc, de_locale) == 0) + { + printf ("setlocale with LC_CTYPE=\"%s\" left locale at \"%s\"\n", + envstring, de_locale); + exit (1); + } + if (setlocale (LC_CTYPE, de_locale) == NULL) + { + printf ("restoring locale \"%s\" with LC_CTYPE=\"%s\": %m\n", + de_locale, envstring); + exit (1); + } + char *newloc2 = setlocale_copy (LC_CTYPE, newloc); + if (newloc2 == NULL) + { + printf ("restoring locale \"%s\" following \"%s\": %m\n", + newloc, envstring); + exit (1); + } + if (strcmp (newloc, newloc2) != 0) + { + printf ("representation of locale \"%s\" changed from \"%s\" to \"%s\"", + envstring, newloc, newloc2); + exit (1); + } + free (newloc); + free (newloc2); + + if (setlocale (LC_CTYPE, de_locale) == NULL) + { + printf ("restoring locale \"%s\" with LC_CTYPE=\"%s\": %m\n", + de_locale, envstring); + exit (1); + } +} + +/* Checks that a known-good locale still works if LC_ALL contains a + value which should be ignored. */ +static void +setlocale_ignore (const char *to_ignore) +{ + const char *fr_locale = "fr_FR.UTF-8"; + setenv ("LC_CTYPE", fr_locale, 1); + char *expected_locale = setlocale_copy (LC_CTYPE, ""); + if (expected_locale == NULL) + { + printf ("setlocale with LC_CTYPE=\"%s\" failed: %m\n", fr_locale); + exit (1); + } + if (setlocale (LC_CTYPE, de_locale) == NULL) + { + printf ("failed to restore locale: %m\n"); + exit (1); + } + unsetenv ("LC_CTYPE"); + + setenv ("LC_ALL", to_ignore, 1); + setenv ("LC_CTYPE", fr_locale, 1); + const char *actual_locale = setlocale (LC_CTYPE, ""); + if (actual_locale == NULL) + { + printf ("setlocale with LC_ALL, LC_CTYPE=\"%s\" failed: %m\n", + fr_locale); + exit (1); + } + if (strcmp (actual_locale, expected_locale) != 0) + { + printf ("setlocale under LC_ALL failed: got \"%s\", expected \"%s\"\n", + actual_locale, expected_locale); + exit (1); + } + unsetenv ("LC_CTYPE"); + setlocale_success (fr_locale); + unsetenv ("LC_ALL"); + free (expected_locale); +} + +static int +do_test (void) +{ + /* The glibc test harness sets this environment variable + uncondionally. */ + unsetenv ("LC_ALL"); + + de_locale = setlocale_copy (LC_CTYPE, "de_DE.UTF-8"); + if (de_locale == NULL) + { + printf ("setlocale (LC_CTYPE, \"de_DE.UTF-8\"): %m\n"); + return 1; + } + setlocale_success ("C"); + setlocale_success ("en_US.UTF-8"); + setlocale_success ("/en_US.UTF-8"); + setlocale_success ("//en_US.UTF-8"); + setlocale_ignore (""); + + setlocale_fail ("does-not-exist"); + setlocale_fail ("/"); + setlocale_fail ("/../localedata/en_US.UTF-8"); + setlocale_fail ("en_US.UTF-8/"); + setlocale_fail ("en_US.UTF-8/.."); + setlocale_fail ("en_US.UTF-8/../en_US.UTF-8"); + setlocale_fail ("../localedata/en_US.UTF-8"); + { + size_t large_length = 1024; + char *large_name = malloc (large_length + 1); + if (large_name == NULL) + { + puts ("malloc failure"); + return 1; + } + memset (large_name, '/', large_length); + const char *suffix = "en_US.UTF-8"; + strcpy (large_name + large_length - strlen (suffix), suffix); + setlocale_fail (large_name); + free (large_name); + } + { + size_t huge_length = 64 * 1024 * 1024; + char *huge_name = malloc (huge_length + 1); + if (huge_name == NULL) + { + puts ("malloc failure"); + return 1; + } + memset (huge_name, 'X', huge_length); + huge_name[huge_length] = '\0'; + /* Construct a composite locale specification. */ + const char *prefix = "LC_CTYPE=de_DE.UTF-8;LC_TIME="; + memcpy (huge_name, prefix, strlen (prefix)); + setlocale_fail (huge_name); + free (huge_name); + } + + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" Index: glibc-2.19/manual/locale.texi =================================================================== --- glibc-2.19.orig/manual/locale.texi +++ glibc-2.19/manual/locale.texi @@ -29,6 +29,7 @@ will follow the conventions preferred by * Setting the Locale:: How a program specifies the locale with library functions. * Standard Locales:: Locale names available on all systems. +* Locale Names:: Format of system-specific locale names. * Locale Information:: How to access the information for the locale. * Formatting Numbers:: A dedicated function to format numbers. * Yes-or-No Questions:: Check a Response against the locale. @@ -99,14 +100,16 @@ locale named @samp{espana-castellano} to most of Spain. The set of locales supported depends on the operating system you are -using, and so do their names. We can't make any promises about what -locales will exist, except for one standard locale called @samp{C} or -@samp{POSIX}. Later we will describe how to construct locales. -@comment (@pxref{Building Locale Files}). +using, and so do their names, except that the standard locale called +@samp{C} or @samp{POSIX} always exist. @xref{Locale Names}. + +In order to force the system to always use the default locale, the +user can set the @code{LC_ALL} environment variable to @samp{C}. @cindex combining locales -A user also has the option of specifying different locales for different -purposes---in effect, choosing a mixture of multiple locales. +A user also has the option of specifying different locales for +different purposes---in effect, choosing a mixture of multiple +locales. @xref{Locale Categories}. For example, the user might specify the locale @samp{espana-castellano} for most purposes, but specify the locale @samp{usa-english} for @@ -120,7 +123,7 @@ which locales apply. However, the user for a particular subset of those purposes. @node Locale Categories, Setting the Locale, Choosing Locale, Locales -@section Categories of Activities that Locales Affect +@section Locale Categories @cindex categories for locales @cindex locale categories @@ -128,7 +131,11 @@ The purposes that locales serve are grou that a user or a program can choose the locale for each category independently. Here is a table of categories; each name is both an environment variable that a user can set, and a macro name that you can -use as an argument to @code{setlocale}. +use as the first argument to @code{setlocale}. + +The contents of the environment variable (or the string in the second +argument to @code{setlocale}) has to be a valid locale name. +@xref{Locale Names}. @vtable @code @comment locale.h @@ -172,7 +179,7 @@ for affirmative and negative responses. @comment locale.h @comment ISO @item LC_ALL -This is not an environment variable; it is only a macro that you can use +This is not a category; it is only a macro that you can use with @code{setlocale} to set a single locale for all purposes. Setting this environment variable overwrites all selections by the other @code{LC_*} variables or @code{LANG}. @@ -355,13 +362,7 @@ The symbols in this section are defined @c strndup @ascuheap @acsmem @c strcasecmp_l ok (C locale) The function @code{setlocale} sets the current locale for category -@var{category} to @var{locale}. A list of all the locales the system -provides can be created by running - -@pindex locale -@smallexample - locale -a -@end smallexample +@var{category} to @var{locale}. If @var{category} is @code{LC_ALL}, this specifies the locale for all purposes. The other possible values of @var{category} specify an @@ -386,10 +387,9 @@ is passed in as @var{locale} parameter. When you read the current locale for category @code{LC_ALL}, the value encodes the entire combination of selected locales for all categories. -In this case, the value is not just a single locale name. In fact, we -don't make any promises about what it looks like. But if you specify -the same ``locale name'' with @code{LC_ALL} in a subsequent call to -@code{setlocale}, it restores the same combination of locale selections. +If you specify the same ``locale name'' with @code{LC_ALL} in a +subsequent call to @code{setlocale}, it restores the same combination +of locale selections. To be sure you can use the returned string encoding the currently selected locale at a later time, you must make a copy of the string. It is not @@ -405,20 +405,15 @@ for @var{category}. If a nonempty string is given for @var{locale}, then the locale of that name is used if possible. +The effective locale name (either the second argument to +@code{setlocale}, or if the argument is an empty string, the name +obtained from the process environment) must be valid locale name. +@xref{Locale Names}. + If you specify an invalid locale name, @code{setlocale} returns a null pointer and leaves the current locale unchanged. @end deftypefun -The path used for finding locale data can be set using the -@code{LOCPATH} environment variable. The default path for finding -locale data is system specific. It is computed from the value given -as the prefix while configuring the C library. This value normally is -@file{/usr} or @file{/}. For the former the complete path is: - -@smallexample -/usr/lib/locale -@end smallexample - Here is an example showing how you might use @code{setlocale} to temporarily switch to a new locale. @@ -458,7 +453,7 @@ locale categories, and future versions o portability, assume that any symbol beginning with @samp{LC_} might be defined in @file{locale.h}. -@node Standard Locales, Locale Information, Setting the Locale, Locales +@node Standard Locales, Locale Names, Setting the Locale, Locales @section Standard Locales The only locale names you can count on finding on all operating systems @@ -492,7 +487,94 @@ with the environment, rather than trying locale explicitly by name. Remember, different machines might have different sets of locales installed. -@node Locale Information, Formatting Numbers, Standard Locales, Locales +@node Locale Names, Locale Information, Standard Locales, Locales +@section Locale Names + +The following command prints a list of locales supported by the +system: + +@pindex locale +@smallexample + locale -a +@end smallexample + +@strong{Portability Note:} With the notable exception of the standard +locale names @samp{C} and @samp{POSIX}, locale names are +system-specific. + +Most locale names follow XPG syntax and consist of up to four parts: + +@smallexample +@var{language}[_@var{territory}[.@var{codeset}]][@@@var{modifier}] +@end smallexample + +Beside the first part, all of them are allowed to be missing. If the +full specified locale is not found, less specific ones are looked for. +The various parts will be stripped off, in the following order: + +@enumerate +@item +codeset +@item +normalized codeset +@item +territory +@item +modifier +@end enumerate + +For example, the locale name @samp{de_AT.iso885915@@euro} denotes a +German-language locale for use in Austria, using the ISO-8859-15 +(Latin-9) character set, and with the Euro as the currency symbol. + +In addition to locale names which follow XPG syntax, systems may +provide aliases such as @samp{german}. Both categories of names must +not contain the slash character @samp{/}. + +If the locale name starts with a slash @samp{/}, it is treated as a +path relative to the configured locale directories; see @code{LOCPATH} +below. The specified path must not contain a component @samp{..}, or +the name is invalid, and @code{setlocale} will fail. + +@strong{Portability Note:} POSIX suggests that if a locale name starts +with a slash @samp{/}, it is resolved as an absolute path. However, +@theglibc{} treats it as a relative path under the directories listed +in @code{LOCPATH} (or the default locale directory if @code{LOCPATH} +is unset). + +Locale names which are longer than an implementation-defined limit are +invalid and cause @code{setlocale} to fail. + +As a special case, locale names used with @code{LC_ALL} can combine +several locales, reflecting different locale settings for different +categories. For example, you might want to use a U.S. locale with ISO +A4 paper format, so you set @code{LANG} to @samp{en_US.UTF-8}, and +@code{LC_PAPER} to @samp{de_DE.UTF-8}. In this case, the +@code{LC_ALL}-style combined locale name is + +@smallexample +LC_CTYPE=en_US.UTF-8;LC_TIME=en_US.UTF-8;LC_PAPER=de_DE.UTF-8;@dots{} +@end smallexample + +followed by other category settings not shown here. + +@vindex LOCPATH +The path used for finding locale data can be set using the +@code{LOCPATH} environment variable. This variable lists the +directories in which to search for locale definitions, separated by a +colon @samp{:}. + +The default path for finding locale data is system specific. A typical +value for the @code{LOCPATH} default is: + +@smallexample +/usr/share/locale +@end smallexample + +The value of @code{LOCPATH} is ignored by privileged programs for +security reasons, and only the default directory is used. + +@node Locale Information, Formatting Numbers, Locale Names, Locales @section Accessing Locale Information There are several ways to access locale information. The simplest