c24db158fe
regression with the C.UTF-8 locales https://unicode-org.atlassian.net/browse/ICU-20575 OBS-URL: https://build.opensuse.org/package/show/X11:common:Factory/icu?expand=0&rev=106
80 lines
3.2 KiB
Diff
80 lines
3.2 KiB
Diff
From 075cefb2e21f57f4cac1bc2868e93dd1b8c077cc Mon Sep 17 00:00:00 2001
|
|
From: "Steven R. Loomis" <srloomis@us.ibm.com>
|
|
Date: Thu, 25 Apr 2019 10:40:28 -0700
|
|
Subject: [PATCH] ICU-20575 fix broken default locale mapping for C.UTF-8
|
|
|
|
Regression was in 1afef30549d93c17bb966c6803d5d943cf055925
|
|
PR #418 [ICU-20187]
|
|
|
|
- We dropped the mapping from "C" in uloc_canonicalize,
|
|
but then putil did not handle cases where a codepage was
|
|
set (such as C.UTF-8).
|
|
|
|
- Add an additional check in uprv_getDefaultLocaleID() for
|
|
locales that end up as "C" or "POSIX" after removing codepage
|
|
suffix.
|
|
|
|
- Also fix regression where aa@bb would become aa__BB__BB
|
|
(incorrectly doubled __BB)
|
|
---
|
|
icu4c/source/common/putil.cpp | 23 +++++++++++++++++------
|
|
1 file changed, 17 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/source/common/putil.cpp b/source/common/putil.cpp
|
|
index 532a0903cdd..289a8aaa141 100644
|
|
--- a/source/common/putil.cpp
|
|
+++ b/source/common/putil.cpp
|
|
@@ -1560,6 +1560,10 @@ static const char *uprv_getPOSIXIDForCategory(int category)
|
|
{
|
|
/* Nothing worked. Give it a nice POSIX default value. */
|
|
posixID = "en_US_POSIX";
|
|
+ // Note: this test will not catch 'C.UTF-8',
|
|
+ // that will be handled in uprv_getDefaultLocaleID().
|
|
+ // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
|
|
+ // caller which expects to see "en_US_POSIX" in many branches.
|
|
}
|
|
return posixID;
|
|
}
|
|
@@ -1631,8 +1635,8 @@ The leftmost codepage (.xxx) wins.
|
|
}
|
|
|
|
// Copy the ID into owned memory.
|
|
- // Over-allocate in case we replace "@" with "__".
|
|
- char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 1 + 1));
|
|
+ // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
|
|
+ char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
|
|
if (correctedPOSIXLocale == nullptr) {
|
|
return nullptr;
|
|
}
|
|
@@ -1641,11 +1645,18 @@ The leftmost codepage (.xxx) wins.
|
|
char *limit;
|
|
if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
|
|
*limit = 0;
|
|
- if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
|
|
- *limit = 0;
|
|
- }
|
|
+ }
|
|
+ if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
|
|
+ *limit = 0;
|
|
}
|
|
|
|
+ if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
|
|
+ || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
|
|
+ // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
|
|
+ // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
|
|
+ uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
|
|
+ }
|
|
+
|
|
/* Note that we scan the *uncorrected* ID. */
|
|
const char *p;
|
|
if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
|
|
@@ -1668,7 +1679,7 @@ The leftmost codepage (.xxx) wins.
|
|
if ((q = uprv_strchr(p, '.')) != nullptr) {
|
|
/* How big will the resulting string be? */
|
|
int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
|
|
- uprv_strncat(correctedPOSIXLocale, p, q-p);
|
|
+ uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
|
|
correctedPOSIXLocale[len] = 0;
|
|
}
|
|
else {
|