From 5df87e088d368b8147277f02a4c691e6156470b6741bb60c8c1312cc96cd968d Mon Sep 17 00:00:00 2001 From: OBS User unknown Date: Fri, 15 Feb 2008 00:23:22 +0000 Subject: [PATCH] OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/icu?expand=0&rev=4 --- icu.changes | 54 +++++++ icu.spec | 78 +++++++-- icu4c-3_6-docs.tar.bz2 | 3 - icu4c-3_6-src.tar.bz2 | 3 - icu4c-3_8-docs.tar.bz2 | 3 + icu4c-3_8_1-src.tar.bz2 | 3 + libicu-regex.patch | 350 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 475 insertions(+), 19 deletions(-) delete mode 100644 icu4c-3_6-docs.tar.bz2 delete mode 100644 icu4c-3_6-src.tar.bz2 create mode 100644 icu4c-3_8-docs.tar.bz2 create mode 100644 icu4c-3_8_1-src.tar.bz2 create mode 100644 libicu-regex.patch diff --git a/icu.changes b/icu.changes index 10ca939..9210954 100644 --- a/icu.changes +++ b/icu.changes @@ -1,3 +1,57 @@ +------------------------------------------------------------------- +Wed Feb 13 17:42:00 CET 2008 - maw@suse.de + +- Update to version 3.8: + + Locale Data: ICU uses and supports data from Common Locale Data + Repository (CLDR) 1.5.0.1, which includes many improvements in + quality and quantity of data. + + Rule Based Time Zone: This set of classes provides the ability + to read and write time zone data in RFC2445 VTIMEZONE format. + This also provides access Olson timezone transitions. + + Timezone Formatting: This has changed to give more human + readable results. + + Relative Date/Time Formatting: A draft of the relative date/time + format class has been added. This functionality can be accessed + through the DateFormat::createDateInstance or DateFormat.getInstance + factory method. It provides the ability to format localized dates + in terms of "yesterday", "today" and "tomorrow", instead of a + specific only date or time. + + Demonstrations and Tools + * ICU Data Library Customizer: This new online tool provides an + easier way customize ICU's data. + * ICU4J Demonstrations: These will demonstrate some features of + ICU4J, like calendars, transliteration, and several other + features. + + Flexible Date/Time Formatting: A draft of flexible date/time + format generator has been added. This allows multiple date and + time format patterns to be generated that are valid for specific + locales. This funtionality can be accessed through the + DateTimePatternGenerator API. + + Time Zones: The default time zone is detected more accurately + on Unix machines. + + Additional Calendars + * @calendar=taiwan: This calendar is a variant of the Gregorian + calendar used in Taiwan. + * @calendar=indian: This is the Indian national calendar. + * @calendar=persian: This is the Persian calendar. It is also + known as the JalÄli Calendar. It is used in several Arabic + countries. + + UnicodeSet + * The Freezable design pattern is now supported, which can + improve performance for the contains() and span() methods + on frozen UnicodeSet objects. + * A span function was added for iterating through strings. + * The containsAll(string) and containsNone(string) now support + set strings, instead of only codepoints. + + Performance + * Charset conversion performance has been enhanced. The + amount of improvement varies depending on the converter + being used and platform being used. + * Rule based transliterator construction performance has been + improved. +- s#%run_ldconfig#/sbin/ldconfig# in libicu's %post and %postun +- add libicu-regex.patch (bnc#354372). + ------------------------------------------------------------------- Thu Feb 7 14:25:21 CET 2008 - pmladek@suse.cz diff --git a/icu.spec b/icu.spec index 9f3a445..1525a47 100644 --- a/icu.spec +++ b/icu.spec @@ -1,5 +1,5 @@ # -# spec file for package icu (Version 3.6) +# spec file for package icu (Version 3.8.1) # # Copyright (c) 2008 SUSE LINUX Products GmbH, Nuernberg, Germany. # This file and all modifications and additions to the pristine @@ -10,21 +10,23 @@ # norootforbuild + Name: icu BuildRequires: gcc-c++ License: IBM Public License Group: System/Libraries AutoReqProv: on -Version: 3.6 -Release: 33 +Version: 3.8.1 +Release: 1 Requires: libicu = %{version} Url: http://ibm.com/software/globalization/icu -Source0: icu4c-3_6-src.tar.bz2 -Source1: icu4c-3_6-docs.tar.bz2 +Source0: icu4c-3_8_1-src.tar.bz2 +Source1: icu4c-3_8-docs.tar.bz2 Source3: SuSEconfig.icu # OOo-2.3.1 and above needs to access the setBreakType method, see # http://bugs.icu-project.org/trac/ticket/5498 Patch: icu4c-3_6-src-setBreakType-public.diff +Patch1: libicu-regex.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build Summary: International Components for Unicode @@ -122,6 +124,7 @@ Authors: %prep %setup -q -n icu -a1 %patch +%patch1 -p0 %build cd source @@ -143,7 +146,6 @@ make #make check %install -rm -rf $RPM_BUILD_ROOT find . -name CVS -type d | xargs rm -rf cd source make DESTDIR=$RPM_BUILD_ROOT install @@ -151,8 +153,8 @@ make DESTDIR=$RPM_BUILD_ROOT install chmod a+rx $RPM_BUILD_ROOT%{_libdir}/*.so.* # install uncompiled source data: mkdir -p $RPM_BUILD_ROOT/usr/share/icu/%{version}/unidata -install -m 644 data/unidata/*.txt $RPM_BUILD_ROOT/usr/share/icu/%{version}/unidata -ln -s unidata/UnicodeData.txt $RPM_BUILD_ROOT/usr/share/icu/%{version}/ +install -m 644 data/unidata/*.txt $RPM_BUILD_ROOT/%{_datadir}/icu/%{version}/unidata +ln -s unidata/UnicodeData.txt $RPM_BUILD_ROOT/%{_datadir}/icu/%{version}/ # run test suite: #pushd data #ln -sf build/*.cnv build/*.res build/*.dat build/*.brk . @@ -162,16 +164,15 @@ ln -s unidata/UnicodeData.txt $RPM_BUILD_ROOT/usr/share/icu/%{version}/ mkdir -p $RPM_BUILD_ROOT/sbin/conf.d/ sed -e "s/@ICUVERSION@/%{version}/g" < ${RPM_SOURCE_DIR}/SuSEconfig.icu > ./SuSEconfig.icu install -m 755 ./SuSEconfig.icu $RPM_BUILD_ROOT/sbin/conf.d/ -rm $RPM_BUILD_ROOT/usr/share/icu/%{version}/license.html +rm $RPM_BUILD_ROOT/%{_datadir}/icu/%{version}/license.html +rm $RPM_BUILD_ROOT/%{_datadir}/icu/%{version}/install-sh %clean rm -rf $RPM_BUILD_ROOT -%post -n libicu -%run_ldconfig +%post -n libicu -p /sbin/ldconfig -%postun -n libicu -%run_ldconfig +%postun -n libicu -p /sbin/ldconfig %files %defattr(-, root, root) @@ -221,6 +222,57 @@ rm -rf $RPM_BUILD_ROOT /usr/share/icu/%{version}/UnicodeData.txt %changelog +* Wed Feb 13 2008 maw@suse.de +- Update to version 3.8: + + Locale Data: ICU uses and supports data from Common Locale Data + Repository (CLDR) 1.5.0.1, which includes many improvements in + quality and quantity of data. + + Rule Based Time Zone: This set of classes provides the ability + to read and write time zone data in RFC2445 VTIMEZONE format. + This also provides access Olson timezone transitions. + + Timezone Formatting: This has changed to give more human + readable results. + + Relative Date/Time Formatting: A draft of the relative date/time + format class has been added. This functionality can be accessed + through the DateFormat::createDateInstance or DateFormat.getInstance + factory method. It provides the ability to format localized dates + in terms of "yesterday", "today" and "tomorrow", instead of a + specific only date or time. + + Demonstrations and Tools + * ICU Data Library Customizer: This new online tool provides an + easier way customize ICU's data. + * ICU4J Demonstrations: These will demonstrate some features of + ICU4J, like calendars, transliteration, and several other + features. + + Flexible Date/Time Formatting: A draft of flexible date/time + format generator has been added. This allows multiple date and + time format patterns to be generated that are valid for specific + locales. This funtionality can be accessed through the + DateTimePatternGenerator API. + + Time Zones: The default time zone is detected more accurately + on Unix machines. + + Additional Calendars + * @calendar=taiwan: This calendar is a variant of the Gregorian + calendar used in Taiwan. + * @calendar=indian: This is the Indian national calendar. + * @calendar=persian: This is the Persian calendar. It is also + known as the JalÄli Calendar. It is used in several Arabic + countries. + + UnicodeSet + * The Freezable design pattern is now supported, which can + improve performance for the contains() and span() methods + on frozen UnicodeSet objects. + * A span function was added for iterating through strings. + * The containsAll(string) and containsNone(string) now support + set strings, instead of only codepoints. + + Performance + * Charset conversion performance has been enhanced. The + amount of improvement varies depending on the converter + being used and platform being used. + * Rule based transliterator construction performance has been + improved. +- s#%%run_ldconfig#/sbin/ldconfig# in libicu's %%post and %%postun +- add libicu-regex.patch (bnc#354372). * Thu Feb 07 2008 pmladek@suse.cz - made RuleBasedBreakIterator::setBreakType method pubclic; it was requested by OpenOffice_org >= 2.3.1, see http://bugs.icu-project.org/trac/ticket/5498 diff --git a/icu4c-3_6-docs.tar.bz2 b/icu4c-3_6-docs.tar.bz2 deleted file mode 100644 index 3041127..0000000 --- a/icu4c-3_6-docs.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:55c51c8e9787957e6fc8165579d50633ba75ff7d96e6faf9e609630c6dbc4ee2 -size 1555704 diff --git a/icu4c-3_6-src.tar.bz2 b/icu4c-3_6-src.tar.bz2 deleted file mode 100644 index 0cfda6e..0000000 --- a/icu4c-3_6-src.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:18085bdae14000069f328dcdc19baea44f6e827be9801348b342559cf87a4989 -size 8666211 diff --git a/icu4c-3_8-docs.tar.bz2 b/icu4c-3_8-docs.tar.bz2 new file mode 100644 index 0000000..3822ece --- /dev/null +++ b/icu4c-3_8-docs.tar.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b8d0486af18fd4590329b592f2126c7608a4094339fb80f70ee573d55a0b11 +size 2339992 diff --git a/icu4c-3_8_1-src.tar.bz2 b/icu4c-3_8_1-src.tar.bz2 new file mode 100644 index 0000000..65103c8 --- /dev/null +++ b/icu4c-3_8_1-src.tar.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:919f4684e0924d6be941efef16a6c2b2e74e2790c6a2d6a373928000d5b97301 +size 9320021 diff --git a/libicu-regex.patch b/libicu-regex.patch new file mode 100644 index 0000000..72738e4 --- /dev/null +++ b/libicu-regex.patch @@ -0,0 +1,350 @@ +Index: source/i18n/regexcmp.cpp +=================================================================== +--- source/i18n/regexcmp.cpp (revision 23251) ++++ source/i18n/regexcmp.cpp (revision 23418) +@@ -2,7 +2,7 @@ + // + // file: regexcmp.cpp + // +-// Copyright (C) 2002-2007 International Business Machines Corporation and others. ++// Copyright (C) 2002-2008 International Business Machines Corporation and others. + // All Rights Reserved. + // + // This file contains the ICU regular expression compiler, which is responsible +@@ -1186,14 +1186,17 @@ + // Because capture groups can be forward-referenced by back-references, + // we fill the operand with the capture group number. At the end + // of compilation, it will be changed to the variable's location. +- U_ASSERT(groupNum > 0); +- int32_t op; +- if (fModeFlags & UREGEX_CASE_INSENSITIVE) { +- op = URX_BUILD(URX_BACKREF_I, groupNum); ++ if (groupNum < 1) { ++ error(U_REGEX_INVALID_BACK_REF); + } else { +- op = URX_BUILD(URX_BACKREF, groupNum); ++ int32_t op; ++ if (fModeFlags & UREGEX_CASE_INSENSITIVE) { ++ op = URX_BUILD(URX_BACKREF_I, groupNum); ++ } else { ++ op = URX_BUILD(URX_BACKREF, groupNum); ++ } ++ fRXPat->fCompiledPat->addElement(op, *fStatus); + } +- fRXPat->fCompiledPat->addElement(op, *fStatus); + } + break; + +Index: source/i18n/rematch.cpp +=================================================================== +--- source/i18n/rematch.cpp (revision 23251) ++++ source/i18n/rematch.cpp (revision 23418) +@@ -1,6 +1,6 @@ + /* + ************************************************************************** +-* Copyright (C) 2002-2007 International Business Machines Corporation * ++* Copyright (C) 2002-2008 International Business Machines Corporation * + * and others. All rights reserved. * + ************************************************************************** + */ +@@ -30,6 +30,15 @@ + + U_NAMESPACE_BEGIN + ++// Limit the size of the back track stack, to avoid system failures caused ++// by heap exhaustion. Units are in 32 bit words, not bytes. ++// This value puts ICU's limits higher than most other regexp implementations, ++// which use recursion rather than the heap, and take more storage per ++// backtrack point. ++// This constant is _temporary_. Proper API to control the value will added. ++// ++static const int32_t BACKTRACK_STACK_CAPACITY = 8000000; ++ + //----------------------------------------------------------------------------- + // + // Constructor and Destructor +@@ -53,8 +62,9 @@ + } + if (fStack == NULL || fData == NULL) { + fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; ++ } else { ++ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); + } +- + reset(RegexStaticSets::gStaticSets->fEmptyString); + } + +@@ -78,6 +88,8 @@ + } + if (fStack == NULL || fData == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; ++ } else { ++ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); + } + reset(input); + } +@@ -102,6 +114,8 @@ + } + if (fStack == NULL || fData == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; ++ } else { ++ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); + } + reset(RegexStaticSets::gStaticSets->fEmptyString); + } +@@ -1014,6 +1028,14 @@ + inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int32_t savePatIdx, int32_t frameSize, UErrorCode &status) { + // push storage for a new frame. + int32_t *newFP = fStack->reserveBlock(frameSize, status); ++ if (newFP == NULL) { ++ // Heap allocation error on attempted stack expansion. ++ // We need to return a writable stack frame, so just return the ++ // previous frame. The match operation will stop quickly ++ // becuase of the error status, after which the frame will never ++ // be looked at again. ++ return fp; ++ } + fp = (REStackFrame *)(newFP - frameSize); // in case of realloc of stack. + + // New stack frame = copy of old top frame. +@@ -1029,8 +1051,8 @@ + fp->fPatIdx = savePatIdx; + return (REStackFrame *)newFP; + } +- +- ++ ++ + //-------------------------------------------------------------------------------- + // + // MatchAt This is the actual matching engine. +@@ -2261,6 +2283,7 @@ + } + + if (U_FAILURE(status)) { ++ isMatch = FALSE; + break; + } + } +Index: source/test/intltest/regextst.h +=================================================================== +--- source/test/intltest/regextst.h (revision 23251) ++++ source/test/intltest/regextst.h (revision 23418) +@@ -1,6 +1,6 @@ + /******************************************************************** + * COPYRIGHT: +- * Copyright (c) 2002-2007, International Business Machines Corporation and ++ * Copyright (c) 2002-2008, International Business Machines Corporation and + * others. All Rights Reserved. + ********************************************************************/ + +@@ -30,6 +30,7 @@ + virtual void Extended(); + virtual void Errors(); + virtual void PerlTests(); ++ virtual void Bug6149(); + + // The following functions are internal to the regexp tests. + virtual UBool doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line); +Index: source/test/intltest/regextst.cpp +=================================================================== +--- source/test/intltest/regextst.cpp (revision 23251) ++++ source/test/intltest/regextst.cpp (revision 23418) +@@ -1,6 +1,6 @@ + /******************************************************************** + * COPYRIGHT: +- * Copyright (c) 2002-2007, International Business Machines Corporation and ++ * Copyright (c) 2002-2008, International Business Machines Corporation and + * others. All Rights Reserved. + ********************************************************************/ + +@@ -66,6 +66,10 @@ + case 6: name = "PerlTests"; + if (exec) PerlTests(); + break; ++ case 7: name = "Bug 6149"; ++ if (exec) Bug6149(); ++ break; ++ + + + default: name = ""; +@@ -1639,6 +1643,12 @@ + + // Ticket 5389 + REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX); ++ ++ // Invalid Back Reference \0 ++ // For ICU 3.8 and earlier ++ // For ICU versions newer than 3.8, \0 introduces an octal escape. ++ // ++ REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_INVALID_BACK_REF); + + } + +@@ -2122,6 +2132,26 @@ + } + + ++//-------------------------------------------------------------- ++// ++// Bug6149 Verify limits to heap expansion for backtrack stack. ++// Use this pattern, ++// "(a?){1,}" ++// The zero-length match will repeat forever. ++// (That this goes into a loop is another bug) ++// ++//--------------------------------------------------------------- ++void RegexTest::Bug6149() { ++ UnicodeString pattern("(a?){1,}"); ++ UnicodeString s("xyz"); ++ uint32_t flags = 0; ++ UErrorCode status = U_ZERO_ERROR; ++ ++ RegexMatcher matcher(pattern, s, flags, status); ++ UBool result = false; ++ REGEX_ASSERT_FAIL(result=matcher.matches(status), U_BUFFER_OVERFLOW_ERROR); ++ REGEX_ASSERT(result == FALSE); ++ } + + #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ + +Index: source/common/uvectr32.cpp +=================================================================== +--- source/common/uvectr32.cpp (revision 23251) ++++ source/common/uvectr32.cpp (revision 23418) +@@ -1,6 +1,6 @@ + /* + ****************************************************************************** +-* Copyright (C) 1999-2003, International Business Machines Corporation and * ++* Copyright (C) 1999-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + * Date Name Description +@@ -26,6 +26,7 @@ + UVector32::UVector32(UErrorCode &status) : + count(0), + capacity(0), ++ maxCapacity(0), + elements(NULL) + { + _init(DEFUALT_CAPACITY, status); +@@ -34,6 +35,7 @@ + UVector32::UVector32(int32_t initialCapacity, UErrorCode &status) : + count(0), + capacity(0), ++ maxCapacity(0), + elements(0) + { + _init(initialCapacity, status); +@@ -46,6 +48,9 @@ + if (initialCapacity < 1) { + initialCapacity = DEFUALT_CAPACITY; + } ++ if (maxCapacity>0 && maxCapacity= minimumCapacity) { + return TRUE; +- } else { +- int32_t newCap = capacity * 2; +- if (newCap < minimumCapacity) { +- newCap = minimumCapacity; +- } +- int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap); +- if (newElems == 0) { +- status = U_MEMORY_ALLOCATION_ERROR; +- return FALSE; +- } +- uprv_memcpy(newElems, elements, sizeof(elements[0]) * count); +- uprv_free(elements); +- elements = newElems; +- capacity = newCap; +- return TRUE; + } ++ if (maxCapacity>0 && minimumCapacity>maxCapacity) { ++ status = U_BUFFER_OVERFLOW_ERROR; ++ return FALSE; ++ } ++ int32_t newCap = capacity * 2; ++ if (newCap < minimumCapacity) { ++ newCap = minimumCapacity; ++ } ++ if (maxCapacity > 0 && newCap > maxCapacity) { ++ newCap = maxCapacity; ++ } ++ int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap); ++ if (newElems == 0) { ++ status = U_MEMORY_ALLOCATION_ERROR; ++ return FALSE; ++ } ++ uprv_memcpy(newElems, elements, sizeof(elements[0]) * count); ++ uprv_free(elements); ++ elements = newElems; ++ capacity = newCap; ++ return TRUE; + } + ++void UVector32::setMaxCapacity(int32_t limit) { ++ U_ASSERT(limit >= 0); ++ maxCapacity = limit; ++ if (maxCapacity < 0) { ++ maxCapacity = 0; ++ } ++} ++ + /** + * Change the size of this vector as follows: If newSize is smaller, + * then truncate the array, possibly deleting held elements for i >= +Index: source/common/uvectr32.h +=================================================================== +--- source/common/uvectr32.h (revision 23251) ++++ source/common/uvectr32.h (revision 23418) +@@ -1,6 +1,6 @@ + /* + ********************************************************************** +-* Copyright (C) 1999-2006, International Business Machines ++* Copyright (C) 1999-2008, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + */ +@@ -61,6 +61,8 @@ + int32_t count; + + int32_t capacity; ++ ++ int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow. + + int32_t* elements; + +@@ -162,6 +164,14 @@ + int32_t *getBuffer() const; + + /** ++ * Set the maximum allowed buffer capacity for this vector/stack. ++ * Default with no limit set is unlimited, go until malloc() fails. ++ * A Limit of zero means unlimited capacity. ++ * Units are vector elements (32 bits each), not bytes. ++ */ ++ void setMaxCapacity(int32_t limit); ++ ++ /** + * ICU "poor man's RTTI", returns a UClassID for this class. + */ + static UClassID U_EXPORT2 getStaticClassID(); +@@ -221,7 +231,9 @@ + } + + inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) { +- ensureCapacity(count+size, status); ++ if (ensureCapacity(count+size, status) == FALSE) { ++ return NULL; ++ } + int32_t *rp = elements+count; + count += size; + return rp;