SHA256
1
0
forked from pool/icu
OBS User unknown 2008-02-15 00:23:22 +00:00 committed by Git OBS Bridge
parent f69e6fb4b3
commit 5df87e088d
7 changed files with 475 additions and 19 deletions

View File

@ -1,3 +1,57 @@
-------------------------------------------------------------------
Wed Feb 13 17:42:00 CET 2008 - maw@suse.de
- Update to version 3.8:
+ Locale Data: ICU uses and supports data from Common Locale Data
Repository (CLDR) 1.5.0.1, which includes many improvements in
quality and quantity of data.
+ Rule Based Time Zone: This set of classes provides the ability
to read and write time zone data in RFC2445 VTIMEZONE format.
This also provides access Olson timezone transitions.
+ Timezone Formatting: This has changed to give more human
readable results.
+ Relative Date/Time Formatting: A draft of the relative date/time
format class has been added. This functionality can be accessed
through the DateFormat::createDateInstance or DateFormat.getInstance
factory method. It provides the ability to format localized dates
in terms of "yesterday", "today" and "tomorrow", instead of a
specific only date or time.
+ Demonstrations and Tools
* ICU Data Library Customizer: This new online tool provides an
easier way customize ICU's data.
* ICU4J Demonstrations: These will demonstrate some features of
ICU4J, like calendars, transliteration, and several other
features.
+ Flexible Date/Time Formatting: A draft of flexible date/time
format generator has been added. This allows multiple date and
time format patterns to be generated that are valid for specific
locales. This funtionality can be accessed through the
DateTimePatternGenerator API.
+ Time Zones: The default time zone is detected more accurately
on Unix machines.
+ Additional Calendars
* @calendar=taiwan: This calendar is a variant of the Gregorian
calendar used in Taiwan.
* @calendar=indian: This is the Indian national calendar.
* @calendar=persian: This is the Persian calendar. It is also
known as the JalÄli Calendar. It is used in several Arabic
countries.
+ UnicodeSet
* The Freezable design pattern is now supported, which can
improve performance for the contains() and span() methods
on frozen UnicodeSet objects.
* A span function was added for iterating through strings.
* The containsAll(string) and containsNone(string) now support
set strings, instead of only codepoints.
+ Performance
* Charset conversion performance has been enhanced. The
amount of improvement varies depending on the converter
being used and platform being used.
* Rule based transliterator construction performance has been
improved.
- s#%run_ldconfig#/sbin/ldconfig# in libicu's %post and %postun
- add libicu-regex.patch (bnc#354372).
-------------------------------------------------------------------
Thu Feb 7 14:25:21 CET 2008 - pmladek@suse.cz

View File

@ -1,5 +1,5 @@
#
# spec file for package icu (Version 3.6)
# spec file for package icu (Version 3.8.1)
#
# Copyright (c) 2008 SUSE LINUX Products GmbH, Nuernberg, Germany.
# This file and all modifications and additions to the pristine
@ -10,21 +10,23 @@
# norootforbuild
Name: icu
BuildRequires: gcc-c++
License: IBM Public License
Group: System/Libraries
AutoReqProv: on
Version: 3.6
Release: 33
Version: 3.8.1
Release: 1
Requires: libicu = %{version}
Url: http://ibm.com/software/globalization/icu
Source0: icu4c-3_6-src.tar.bz2
Source1: icu4c-3_6-docs.tar.bz2
Source0: icu4c-3_8_1-src.tar.bz2
Source1: icu4c-3_8-docs.tar.bz2
Source3: SuSEconfig.icu
# OOo-2.3.1 and above needs to access the setBreakType method, see
# http://bugs.icu-project.org/trac/ticket/5498
Patch: icu4c-3_6-src-setBreakType-public.diff
Patch1: libicu-regex.patch
BuildRoot: %{_tmppath}/%{name}-%{version}-build
Summary: International Components for Unicode
@ -122,6 +124,7 @@ Authors:
%prep
%setup -q -n icu -a1
%patch
%patch1 -p0
%build
cd source
@ -143,7 +146,6 @@ make
#make check
%install
rm -rf $RPM_BUILD_ROOT
find . -name CVS -type d | xargs rm -rf
cd source
make DESTDIR=$RPM_BUILD_ROOT install
@ -151,8 +153,8 @@ make DESTDIR=$RPM_BUILD_ROOT install
chmod a+rx $RPM_BUILD_ROOT%{_libdir}/*.so.*
# install uncompiled source data:
mkdir -p $RPM_BUILD_ROOT/usr/share/icu/%{version}/unidata
install -m 644 data/unidata/*.txt $RPM_BUILD_ROOT/usr/share/icu/%{version}/unidata
ln -s unidata/UnicodeData.txt $RPM_BUILD_ROOT/usr/share/icu/%{version}/
install -m 644 data/unidata/*.txt $RPM_BUILD_ROOT/%{_datadir}/icu/%{version}/unidata
ln -s unidata/UnicodeData.txt $RPM_BUILD_ROOT/%{_datadir}/icu/%{version}/
# run test suite:
#pushd data
#ln -sf build/*.cnv build/*.res build/*.dat build/*.brk .
@ -162,16 +164,15 @@ ln -s unidata/UnicodeData.txt $RPM_BUILD_ROOT/usr/share/icu/%{version}/
mkdir -p $RPM_BUILD_ROOT/sbin/conf.d/
sed -e "s/@ICUVERSION@/%{version}/g" < ${RPM_SOURCE_DIR}/SuSEconfig.icu > ./SuSEconfig.icu
install -m 755 ./SuSEconfig.icu $RPM_BUILD_ROOT/sbin/conf.d/
rm $RPM_BUILD_ROOT/usr/share/icu/%{version}/license.html
rm $RPM_BUILD_ROOT/%{_datadir}/icu/%{version}/license.html
rm $RPM_BUILD_ROOT/%{_datadir}/icu/%{version}/install-sh
%clean
rm -rf $RPM_BUILD_ROOT
%post -n libicu
%run_ldconfig
%post -n libicu -p /sbin/ldconfig
%postun -n libicu
%run_ldconfig
%postun -n libicu -p /sbin/ldconfig
%files
%defattr(-, root, root)
@ -221,6 +222,57 @@ rm -rf $RPM_BUILD_ROOT
/usr/share/icu/%{version}/UnicodeData.txt
%changelog
* Wed Feb 13 2008 maw@suse.de
- Update to version 3.8:
+ Locale Data: ICU uses and supports data from Common Locale Data
Repository (CLDR) 1.5.0.1, which includes many improvements in
quality and quantity of data.
+ Rule Based Time Zone: This set of classes provides the ability
to read and write time zone data in RFC2445 VTIMEZONE format.
This also provides access Olson timezone transitions.
+ Timezone Formatting: This has changed to give more human
readable results.
+ Relative Date/Time Formatting: A draft of the relative date/time
format class has been added. This functionality can be accessed
through the DateFormat::createDateInstance or DateFormat.getInstance
factory method. It provides the ability to format localized dates
in terms of "yesterday", "today" and "tomorrow", instead of a
specific only date or time.
+ Demonstrations and Tools
* ICU Data Library Customizer: This new online tool provides an
easier way customize ICU's data.
* ICU4J Demonstrations: These will demonstrate some features of
ICU4J, like calendars, transliteration, and several other
features.
+ Flexible Date/Time Formatting: A draft of flexible date/time
format generator has been added. This allows multiple date and
time format patterns to be generated that are valid for specific
locales. This funtionality can be accessed through the
DateTimePatternGenerator API.
+ Time Zones: The default time zone is detected more accurately
on Unix machines.
+ Additional Calendars
* @calendar=taiwan: This calendar is a variant of the Gregorian
calendar used in Taiwan.
* @calendar=indian: This is the Indian national calendar.
* @calendar=persian: This is the Persian calendar. It is also
known as the JalÄli Calendar. It is used in several Arabic
countries.
+ UnicodeSet
* The Freezable design pattern is now supported, which can
improve performance for the contains() and span() methods
on frozen UnicodeSet objects.
* A span function was added for iterating through strings.
* The containsAll(string) and containsNone(string) now support
set strings, instead of only codepoints.
+ Performance
* Charset conversion performance has been enhanced. The
amount of improvement varies depending on the converter
being used and platform being used.
* Rule based transliterator construction performance has been
improved.
- s#%%run_ldconfig#/sbin/ldconfig# in libicu's %%post and %%postun
- add libicu-regex.patch (bnc#354372).
* Thu Feb 07 2008 pmladek@suse.cz
- made RuleBasedBreakIterator::setBreakType method pubclic; it was requested by
OpenOffice_org >= 2.3.1, see http://bugs.icu-project.org/trac/ticket/5498

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:55c51c8e9787957e6fc8165579d50633ba75ff7d96e6faf9e609630c6dbc4ee2
size 1555704

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:18085bdae14000069f328dcdc19baea44f6e827be9801348b342559cf87a4989
size 8666211

3
icu4c-3_8-docs.tar.bz2 Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:12b8d0486af18fd4590329b592f2126c7608a4094339fb80f70ee573d55a0b11
size 2339992

3
icu4c-3_8_1-src.tar.bz2 Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:919f4684e0924d6be941efef16a6c2b2e74e2790c6a2d6a373928000d5b97301
size 9320021

350
libicu-regex.patch Normal file
View File

@ -0,0 +1,350 @@
Index: source/i18n/regexcmp.cpp
===================================================================
--- source/i18n/regexcmp.cpp (revision 23251)
+++ source/i18n/regexcmp.cpp (revision 23418)
@@ -2,7 +2,7 @@
//
// file: regexcmp.cpp
//
-// Copyright (C) 2002-2007 International Business Machines Corporation and others.
+// Copyright (C) 2002-2008 International Business Machines Corporation and others.
// All Rights Reserved.
//
// This file contains the ICU regular expression compiler, which is responsible
@@ -1186,14 +1186,17 @@
// Because capture groups can be forward-referenced by back-references,
// we fill the operand with the capture group number. At the end
// of compilation, it will be changed to the variable's location.
- U_ASSERT(groupNum > 0);
- int32_t op;
- if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
- op = URX_BUILD(URX_BACKREF_I, groupNum);
+ if (groupNum < 1) {
+ error(U_REGEX_INVALID_BACK_REF);
} else {
- op = URX_BUILD(URX_BACKREF, groupNum);
+ int32_t op;
+ if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
+ op = URX_BUILD(URX_BACKREF_I, groupNum);
+ } else {
+ op = URX_BUILD(URX_BACKREF, groupNum);
+ }
+ fRXPat->fCompiledPat->addElement(op, *fStatus);
}
- fRXPat->fCompiledPat->addElement(op, *fStatus);
}
break;
Index: source/i18n/rematch.cpp
===================================================================
--- source/i18n/rematch.cpp (revision 23251)
+++ source/i18n/rematch.cpp (revision 23418)
@@ -1,6 +1,6 @@
/*
**************************************************************************
-* Copyright (C) 2002-2007 International Business Machines Corporation *
+* Copyright (C) 2002-2008 International Business Machines Corporation *
* and others. All rights reserved. *
**************************************************************************
*/
@@ -30,6 +30,15 @@
U_NAMESPACE_BEGIN
+// Limit the size of the back track stack, to avoid system failures caused
+// by heap exhaustion. Units are in 32 bit words, not bytes.
+// This value puts ICU's limits higher than most other regexp implementations,
+// which use recursion rather than the heap, and take more storage per
+// backtrack point.
+// This constant is _temporary_. Proper API to control the value will added.
+//
+static const int32_t BACKTRACK_STACK_CAPACITY = 8000000;
+
//-----------------------------------------------------------------------------
//
// Constructor and Destructor
@@ -53,8 +62,9 @@
}
if (fStack == NULL || fData == NULL) {
fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
}
-
reset(RegexStaticSets::gStaticSets->fEmptyString);
}
@@ -78,6 +88,8 @@
}
if (fStack == NULL || fData == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
}
reset(input);
}
@@ -102,6 +114,8 @@
}
if (fStack == NULL || fData == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY);
}
reset(RegexStaticSets::gStaticSets->fEmptyString);
}
@@ -1014,6 +1028,14 @@
inline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int32_t savePatIdx, int32_t frameSize, UErrorCode &status) {
// push storage for a new frame.
int32_t *newFP = fStack->reserveBlock(frameSize, status);
+ if (newFP == NULL) {
+ // Heap allocation error on attempted stack expansion.
+ // We need to return a writable stack frame, so just return the
+ // previous frame. The match operation will stop quickly
+ // becuase of the error status, after which the frame will never
+ // be looked at again.
+ return fp;
+ }
fp = (REStackFrame *)(newFP - frameSize); // in case of realloc of stack.
// New stack frame = copy of old top frame.
@@ -1029,8 +1051,8 @@
fp->fPatIdx = savePatIdx;
return (REStackFrame *)newFP;
}
-
-
+
+
//--------------------------------------------------------------------------------
//
// MatchAt This is the actual matching engine.
@@ -2261,6 +2283,7 @@
}
if (U_FAILURE(status)) {
+ isMatch = FALSE;
break;
}
}
Index: source/test/intltest/regextst.h
===================================================================
--- source/test/intltest/regextst.h (revision 23251)
+++ source/test/intltest/regextst.h (revision 23418)
@@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 2002-2007, International Business Machines Corporation and
+ * Copyright (c) 2002-2008, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@@ -30,6 +30,7 @@
virtual void Extended();
virtual void Errors();
virtual void PerlTests();
+ virtual void Bug6149();
// The following functions are internal to the regexp tests.
virtual UBool doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int32_t line);
Index: source/test/intltest/regextst.cpp
===================================================================
--- source/test/intltest/regextst.cpp (revision 23251)
+++ source/test/intltest/regextst.cpp (revision 23418)
@@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 2002-2007, International Business Machines Corporation and
+ * Copyright (c) 2002-2008, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@@ -66,6 +66,10 @@
case 6: name = "PerlTests";
if (exec) PerlTests();
break;
+ case 7: name = "Bug 6149";
+ if (exec) Bug6149();
+ break;
+
default: name = "";
@@ -1639,6 +1643,12 @@
// Ticket 5389
REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX);
+
+ // Invalid Back Reference \0
+ // For ICU 3.8 and earlier
+ // For ICU versions newer than 3.8, \0 introduces an octal escape.
+ //
+ REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_INVALID_BACK_REF);
}
@@ -2122,6 +2132,26 @@
}
+//--------------------------------------------------------------
+//
+// Bug6149 Verify limits to heap expansion for backtrack stack.
+// Use this pattern,
+// "(a?){1,}"
+// The zero-length match will repeat forever.
+// (That this goes into a loop is another bug)
+//
+//---------------------------------------------------------------
+void RegexTest::Bug6149() {
+ UnicodeString pattern("(a?){1,}");
+ UnicodeString s("xyz");
+ uint32_t flags = 0;
+ UErrorCode status = U_ZERO_ERROR;
+
+ RegexMatcher matcher(pattern, s, flags, status);
+ UBool result = false;
+ REGEX_ASSERT_FAIL(result=matcher.matches(status), U_BUFFER_OVERFLOW_ERROR);
+ REGEX_ASSERT(result == FALSE);
+ }
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
Index: source/common/uvectr32.cpp
===================================================================
--- source/common/uvectr32.cpp (revision 23251)
+++ source/common/uvectr32.cpp (revision 23418)
@@ -1,6 +1,6 @@
/*
******************************************************************************
-* Copyright (C) 1999-2003, International Business Machines Corporation and *
+* Copyright (C) 1999-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
* Date Name Description
@@ -26,6 +26,7 @@
UVector32::UVector32(UErrorCode &status) :
count(0),
capacity(0),
+ maxCapacity(0),
elements(NULL)
{
_init(DEFUALT_CAPACITY, status);
@@ -34,6 +35,7 @@
UVector32::UVector32(int32_t initialCapacity, UErrorCode &status) :
count(0),
capacity(0),
+ maxCapacity(0),
elements(0)
{
_init(initialCapacity, status);
@@ -46,6 +48,9 @@
if (initialCapacity < 1) {
initialCapacity = DEFUALT_CAPACITY;
}
+ if (maxCapacity>0 && maxCapacity<initialCapacity) {
+ initialCapacity = maxCapacity;
+ }
elements = (int32_t *)uprv_malloc(sizeof(int32_t)*initialCapacity);
if (elements == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
@@ -189,24 +194,38 @@
UBool UVector32::expandCapacity(int32_t minimumCapacity, UErrorCode &status) {
if (capacity >= minimumCapacity) {
return TRUE;
- } else {
- int32_t newCap = capacity * 2;
- if (newCap < minimumCapacity) {
- newCap = minimumCapacity;
- }
- int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap);
- if (newElems == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return FALSE;
- }
- uprv_memcpy(newElems, elements, sizeof(elements[0]) * count);
- uprv_free(elements);
- elements = newElems;
- capacity = newCap;
- return TRUE;
}
+ if (maxCapacity>0 && minimumCapacity>maxCapacity) {
+ status = U_BUFFER_OVERFLOW_ERROR;
+ return FALSE;
+ }
+ int32_t newCap = capacity * 2;
+ if (newCap < minimumCapacity) {
+ newCap = minimumCapacity;
+ }
+ if (maxCapacity > 0 && newCap > maxCapacity) {
+ newCap = maxCapacity;
+ }
+ int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap);
+ if (newElems == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return FALSE;
+ }
+ uprv_memcpy(newElems, elements, sizeof(elements[0]) * count);
+ uprv_free(elements);
+ elements = newElems;
+ capacity = newCap;
+ return TRUE;
}
+void UVector32::setMaxCapacity(int32_t limit) {
+ U_ASSERT(limit >= 0);
+ maxCapacity = limit;
+ if (maxCapacity < 0) {
+ maxCapacity = 0;
+ }
+}
+
/**
* Change the size of this vector as follows: If newSize is smaller,
* then truncate the array, possibly deleting held elements for i >=
Index: source/common/uvectr32.h
===================================================================
--- source/common/uvectr32.h (revision 23251)
+++ source/common/uvectr32.h (revision 23418)
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 1999-2006, International Business Machines
+* Copyright (C) 1999-2008, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
@@ -61,6 +61,8 @@
int32_t count;
int32_t capacity;
+
+ int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow.
int32_t* elements;
@@ -162,6 +164,14 @@
int32_t *getBuffer() const;
/**
+ * Set the maximum allowed buffer capacity for this vector/stack.
+ * Default with no limit set is unlimited, go until malloc() fails.
+ * A Limit of zero means unlimited capacity.
+ * Units are vector elements (32 bits each), not bytes.
+ */
+ void setMaxCapacity(int32_t limit);
+
+ /**
* ICU "poor man's RTTI", returns a UClassID for this class.
*/
static UClassID U_EXPORT2 getStaticClassID();
@@ -221,7 +231,9 @@
}
inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) {
- ensureCapacity(count+size, status);
+ if (ensureCapacity(count+size, status) == FALSE) {
+ return NULL;
+ }
int32_t *rp = elements+count;
count += size;
return rp;