forked from pool/libxml2
- Avoid quadratic checking of identity-constraints: [bsc#1178823]
* key/unique/keyref schema attributes currently use qudratic loops
to check their various constraints (that keys are unique and that
keyrefs refer to existing keys).
* This fix uses a hash table to avoid the quadratic behaviour.
- Add libxml2-Avoid-quadratic-checking-of-identity-constraints.patch
OBS-URL: https://build.opensuse.org/request/show/850723
OBS-URL: https://build.opensuse.org/package/show/devel:libraries:c_c++/libxml2?expand=0&rev=153
392 lines
12 KiB
Diff
392 lines
12 KiB
Diff
From 5aab6473018269c10bedf70aaa183c55c20b7ec2 Mon Sep 17 00:00:00 2001
|
|
From: Michael Matz <matz@suse.de>
|
|
Date: Sat, 21 Nov 2020 01:21:56 +0100
|
|
Subject: [PATCH] Avoid quadratic checking of identity-constraints
|
|
|
|
key/unique/keyref schema attributes currently use qudratic loops
|
|
to check their various constraints (that keys are unique and that
|
|
keyrefs refer to existing keys). That becomes extremely slow if
|
|
there are many elements with keys. This happens in the wild with
|
|
e.g. the OVAL XML descriptions of security patches. You need the
|
|
openscap schemata, and then an example xml file:
|
|
|
|
% zypper in openscap-utils
|
|
% wget ftp://ftp.suse.com/pub/projects/security/oval/opensuse.leap.15.1.xml
|
|
% time xmllint --schema /usr/share/openscap/schemas/oval/5.5/oval-definitions-schema.xsd opensuse.leap.15.1.xml > /dev/null
|
|
opensuse.leap.15.1.xml validates
|
|
|
|
real 16m59,857s
|
|
user 16m55,787s
|
|
sys 0m1,060s
|
|
|
|
This patch makes libxml use a hash table to avoid the quadratic
|
|
behaviour. The existing hash table only accepts strings as keys, so
|
|
we're mostly reusing the canonical representation of key values to derive
|
|
such strings (with the caveat given in a comment). The alternative
|
|
would be to rework the hash table code to accept either numbers or free
|
|
functions as hash workers, but the code is fast enough as is.
|
|
|
|
With the patch we have this then:
|
|
|
|
% time LD_LIBRARY_PATH=./libxml2/.libs/ ./libxml2/.libs/xmllint --schema /usr/share/openscap/schemas/oval/5.5/oval-definitions-schema.xsd opensuse.leap.15.1.xml > /dev/null
|
|
opensuse.leap.15.1.xml validates
|
|
|
|
real 0m3,531s
|
|
user 0m3,427s
|
|
sys 0m0,103s
|
|
|
|
So, a ~300x speedup. This patch survives 'make check' and 'make tests'.
|
|
---
|
|
xmlschemas.c | 189 +++++++++++++++++++++++++++++++++++++++++++++------
|
|
1 file changed, 167 insertions(+), 22 deletions(-)
|
|
|
|
diff --git a/xmlschemas.c b/xmlschemas.c
|
|
index cc200636..c455b4a3 100644
|
|
--- a/xmlschemas.c
|
|
+++ b/xmlschemas.c
|
|
@@ -860,6 +860,7 @@ struct _xmlSchemaIDCMatcher {
|
|
int sizeKeySeqs;
|
|
xmlSchemaItemListPtr targets; /* list of target-node
|
|
(xmlSchemaPSVIIDCNodePtr) entries */
|
|
+ xmlHashTablePtr htab;
|
|
};
|
|
|
|
/*
|
|
@@ -1055,6 +1056,18 @@ struct _xmlSchemaSubstGroup {
|
|
xmlSchemaItemListPtr members;
|
|
};
|
|
|
|
+/**
|
|
+ * xmlIDCHashEntry:
|
|
+ *
|
|
+ * an entry in hash tables to quickly look up keys/uniques
|
|
+ */
|
|
+typedef struct _xmlIDCHashEntry xmlIDCHashEntry;
|
|
+typedef xmlIDCHashEntry *xmlIDCHashEntryPtr;
|
|
+struct _xmlIDCHashEntry {
|
|
+ xmlIDCHashEntryPtr next; /* next item with same hash */
|
|
+ int index; /* index into associated item list */
|
|
+};
|
|
+
|
|
/************************************************************************
|
|
* *
|
|
* Some predeclarations *
|
|
@@ -1478,6 +1491,7 @@ xmlSchemaWildcardPCToString(int pc)
|
|
* @val: the precomputed value
|
|
* @retValue: the returned value
|
|
* @ws: the whitespace type of the value
|
|
+ * @for_hash: non-zero if this is supposed to generate a string for hashing
|
|
*
|
|
* Get a the canonical representation of the value.
|
|
* The caller has to free the returned retValue.
|
|
@@ -1486,9 +1500,10 @@ xmlSchemaWildcardPCToString(int pc)
|
|
* API errors or if the value type is not supported yet.
|
|
*/
|
|
static int
|
|
-xmlSchemaGetCanonValueWhtspExt(xmlSchemaValPtr val,
|
|
- xmlSchemaWhitespaceValueType ws,
|
|
- xmlChar **retValue)
|
|
+xmlSchemaGetCanonValueWhtspExt_1(xmlSchemaValPtr val,
|
|
+ xmlSchemaWhitespaceValueType ws,
|
|
+ xmlChar **retValue,
|
|
+ int for_hash)
|
|
{
|
|
int list;
|
|
xmlSchemaValType valType;
|
|
@@ -1522,6 +1537,20 @@ xmlSchemaGetCanonValueWhtspExt(xmlSchemaValPtr val,
|
|
xmlFree((xmlChar *) value2);
|
|
goto internal_error;
|
|
}
|
|
+ if (for_hash && valType == XML_SCHEMAS_DECIMAL) {
|
|
+ /* We can mostly use the canonical value for hashing,
|
|
+ except in the case of decimal. There the canonical
|
|
+ representation requires a trailing '.0' even for
|
|
+ non-fractional numbers, but for the derived integer
|
|
+ types it forbids any decimal point. Nevertheless they
|
|
+ compare equal if the value is equal. We need to generate
|
|
+ the same hash value for this to work, and it's easiest
|
|
+ to just cut off the useless '.0' suffix for the
|
|
+ decimal type. */
|
|
+ int len = xmlStrlen(value2);
|
|
+ if (len > 2 && value2[len-1] == '0' && value2[len-2] == '.')
|
|
+ ((xmlChar*)value2)[len-2] = 0;
|
|
+ }
|
|
value = value2;
|
|
}
|
|
if (*retValue == NULL)
|
|
@@ -1548,6 +1577,22 @@ internal_error:
|
|
return (-1);
|
|
}
|
|
|
|
+static int
|
|
+xmlSchemaGetCanonValueWhtspExt(xmlSchemaValPtr val,
|
|
+ xmlSchemaWhitespaceValueType ws,
|
|
+ xmlChar **retValue)
|
|
+{
|
|
+ return xmlSchemaGetCanonValueWhtspExt_1(val, ws, retValue, 0);
|
|
+}
|
|
+
|
|
+static int
|
|
+xmlSchemaGetCanonValueHash(xmlSchemaValPtr val,
|
|
+ xmlChar **retValue)
|
|
+{
|
|
+ return xmlSchemaGetCanonValueWhtspExt_1(val, XML_SCHEMA_WHITESPACE_COLLAPSE,
|
|
+ retValue, 1);
|
|
+}
|
|
+
|
|
/**
|
|
* xmlSchemaFormatItemForReport:
|
|
* @buf: the string buffer
|
|
@@ -22311,6 +22356,17 @@ xmlSchemaIDCFreeIDCTable(xmlSchemaPSVIIDCBindingPtr bind)
|
|
}
|
|
}
|
|
|
|
+static void
|
|
+xmlFreeIDCHashEntry (void *payload, const xmlChar *name ATTRIBUTE_UNUSED)
|
|
+{
|
|
+ xmlIDCHashEntryPtr e = payload, n;
|
|
+ while (e) {
|
|
+ n = e->next;
|
|
+ xmlFree(e);
|
|
+ e = n;
|
|
+ }
|
|
+}
|
|
+
|
|
/**
|
|
* xmlSchemaIDCFreeMatcherList:
|
|
* @matcher: the first IDC matcher in the list
|
|
@@ -22349,6 +22405,8 @@ xmlSchemaIDCFreeMatcherList(xmlSchemaIDCMatcherPtr matcher)
|
|
}
|
|
xmlSchemaItemListFree(matcher->targets);
|
|
}
|
|
+ if (matcher->htab != NULL)
|
|
+ xmlHashFree(matcher->htab, xmlFreeIDCHashEntry);
|
|
xmlFree(matcher);
|
|
matcher = next;
|
|
}
|
|
@@ -22399,6 +22457,10 @@ xmlSchemaIDCReleaseMatcherList(xmlSchemaValidCtxtPtr vctxt,
|
|
xmlSchemaItemListFree(matcher->targets);
|
|
matcher->targets = NULL;
|
|
}
|
|
+ if (matcher->htab != NULL) {
|
|
+ xmlHashFree(matcher->htab, xmlFreeIDCHashEntry);
|
|
+ matcher->htab = NULL;
|
|
+ }
|
|
matcher->next = NULL;
|
|
/*
|
|
* Cache the matcher.
|
|
@@ -22633,10 +22695,10 @@ next_sto:
|
|
}
|
|
|
|
static const xmlChar *
|
|
-xmlSchemaFormatIDCKeySequence(xmlSchemaValidCtxtPtr vctxt,
|
|
- xmlChar **buf,
|
|
- xmlSchemaPSVIIDCKeyPtr *seq,
|
|
- int count)
|
|
+xmlSchemaFormatIDCKeySequence_1(xmlSchemaValidCtxtPtr vctxt,
|
|
+ xmlChar **buf,
|
|
+ xmlSchemaPSVIIDCKeyPtr *seq,
|
|
+ int count, int for_hash)
|
|
{
|
|
int i, res;
|
|
xmlChar *value = NULL;
|
|
@@ -22644,9 +22706,13 @@ xmlSchemaFormatIDCKeySequence(xmlSchemaValidCtxtPtr vctxt,
|
|
*buf = xmlStrdup(BAD_CAST "[");
|
|
for (i = 0; i < count; i++) {
|
|
*buf = xmlStrcat(*buf, BAD_CAST "'");
|
|
- res = xmlSchemaGetCanonValueWhtspExt(seq[i]->val,
|
|
- xmlSchemaGetWhiteSpaceFacetValue(seq[i]->type),
|
|
- &value);
|
|
+ if (!for_hash)
|
|
+ res = xmlSchemaGetCanonValueWhtspExt(seq[i]->val,
|
|
+ xmlSchemaGetWhiteSpaceFacetValue(seq[i]->type),
|
|
+ &value);
|
|
+ else {
|
|
+ res = xmlSchemaGetCanonValueHash(seq[i]->val, &value);
|
|
+ }
|
|
if (res == 0)
|
|
*buf = xmlStrcat(*buf, BAD_CAST value);
|
|
else {
|
|
@@ -22668,6 +22734,24 @@ xmlSchemaFormatIDCKeySequence(xmlSchemaValidCtxtPtr vctxt,
|
|
return (BAD_CAST *buf);
|
|
}
|
|
|
|
+static const xmlChar *
|
|
+xmlSchemaFormatIDCKeySequence(xmlSchemaValidCtxtPtr vctxt,
|
|
+ xmlChar **buf,
|
|
+ xmlSchemaPSVIIDCKeyPtr *seq,
|
|
+ int count)
|
|
+{
|
|
+ return xmlSchemaFormatIDCKeySequence_1(vctxt, buf, seq, count, 0);
|
|
+}
|
|
+
|
|
+static const xmlChar *
|
|
+xmlSchemaHashKeySequence(xmlSchemaValidCtxtPtr vctxt,
|
|
+ xmlChar **buf,
|
|
+ xmlSchemaPSVIIDCKeyPtr *seq,
|
|
+ int count)
|
|
+{
|
|
+ return xmlSchemaFormatIDCKeySequence_1(vctxt, buf, seq, count, 1);
|
|
+}
|
|
+
|
|
/**
|
|
* xmlSchemaXPathPop:
|
|
* @vctxt: the WXS validation context
|
|
@@ -23029,15 +23113,25 @@ create_key:
|
|
if ((idc->type != XML_SCHEMA_TYPE_IDC_KEYREF) &&
|
|
(targets->nbItems != 0)) {
|
|
xmlSchemaPSVIIDCKeyPtr ckey, bkey, *bkeySeq;
|
|
+ xmlIDCHashEntryPtr e;
|
|
|
|
- i = 0;
|
|
res = 0;
|
|
+
|
|
+ if (!matcher->htab)
|
|
+ e = NULL;
|
|
+ else {
|
|
+ xmlChar *value = NULL;
|
|
+ xmlSchemaHashKeySequence(vctxt, &value, *keySeq, nbKeys);
|
|
+ e = xmlHashLookup(matcher->htab, value);
|
|
+ FREE_AND_NULL(value);
|
|
+ }
|
|
+
|
|
/*
|
|
* Compare the key-sequences, key by key.
|
|
*/
|
|
- do {
|
|
+ for (;e; e = e->next) {
|
|
bkeySeq =
|
|
- ((xmlSchemaPSVIIDCNodePtr) targets->items[i])->keys;
|
|
+ ((xmlSchemaPSVIIDCNodePtr) targets->items[e->index])->keys;
|
|
for (j = 0; j < nbKeys; j++) {
|
|
ckey = (*keySeq)[j];
|
|
bkey = bkeySeq[j];
|
|
@@ -23058,9 +23152,8 @@ create_key:
|
|
*/
|
|
break;
|
|
}
|
|
- i++;
|
|
- } while (i < targets->nbItems);
|
|
- if (i != targets->nbItems) {
|
|
+ }
|
|
+ if (e) {
|
|
xmlChar *str = NULL, *strB = NULL;
|
|
/*
|
|
* TODO: Try to report the key-sequence.
|
|
@@ -23138,6 +23231,24 @@ create_key:
|
|
}
|
|
return (-1);
|
|
}
|
|
+ if (idc->type != XML_SCHEMA_TYPE_IDC_KEYREF) {
|
|
+ xmlChar *value = NULL;
|
|
+ xmlIDCHashEntryPtr r, e;
|
|
+ if (!matcher->htab)
|
|
+ matcher->htab = xmlHashCreate(4);
|
|
+ xmlSchemaHashKeySequence(vctxt, &value, ntItem->keys, nbKeys);
|
|
+ e = xmlMalloc(sizeof *e);
|
|
+ e->index = targets->nbItems - 1;
|
|
+ r = xmlHashLookup(matcher->htab, value);
|
|
+ if (r) {
|
|
+ e->next = r->next;
|
|
+ r->next = e;
|
|
+ } else {
|
|
+ e->next = NULL;
|
|
+ xmlHashAddEntry(matcher->htab, value, e);
|
|
+ }
|
|
+ FREE_AND_NULL(value);
|
|
+ }
|
|
|
|
goto selector_leave;
|
|
selector_key_error:
|
|
@@ -23394,6 +23505,10 @@ xmlSchemaIDCFillNodeTables(xmlSchemaValidCtxtPtr vctxt,
|
|
matcher->targets->items = NULL;
|
|
matcher->targets->sizeItems = 0;
|
|
matcher->targets->nbItems = 0;
|
|
+ if (matcher->htab) {
|
|
+ xmlHashFree(matcher->htab, xmlFreeIDCHashEntry);
|
|
+ matcher->htab = NULL;
|
|
+ }
|
|
} else {
|
|
/*
|
|
* Compare the key-sequences and add to the IDC node-table.
|
|
@@ -23841,6 +23956,7 @@ xmlSchemaCheckCVCIDCKeyRef(xmlSchemaValidCtxtPtr vctxt)
|
|
int i, j, k, res, nbFields, hasDupls;
|
|
xmlSchemaPSVIIDCKeyPtr *refKeys, *keys;
|
|
xmlSchemaPSVIIDCNodePtr refNode = NULL;
|
|
+ xmlHashTablePtr table = NULL;
|
|
|
|
nbFields = matcher->aidc->def->nbFields;
|
|
|
|
@@ -23858,26 +23974,52 @@ xmlSchemaCheckCVCIDCKeyRef(xmlSchemaValidCtxtPtr vctxt)
|
|
/*
|
|
* Search for a matching key-sequences.
|
|
*/
|
|
+ if (bind) {
|
|
+ table = xmlHashCreate(bind->nbNodes * 2);
|
|
+ for (j = 0; j < bind->nbNodes; j++) {
|
|
+ xmlChar *value;
|
|
+ xmlIDCHashEntryPtr r, e;
|
|
+ keys = bind->nodeTable[j]->keys;
|
|
+ xmlSchemaHashKeySequence(vctxt, &value, keys, nbFields);
|
|
+ e = xmlMalloc(sizeof *e);
|
|
+ e->index = j;
|
|
+ r = xmlHashLookup(table, value);
|
|
+ if (r) {
|
|
+ e->next = r->next;
|
|
+ r->next = e;
|
|
+ } else {
|
|
+ e->next = NULL;
|
|
+ xmlHashAddEntry(table, value, e);
|
|
+ }
|
|
+ FREE_AND_NULL(value);
|
|
+ }
|
|
+ }
|
|
for (i = 0; i < matcher->targets->nbItems; i++) {
|
|
res = 0;
|
|
refNode = matcher->targets->items[i];
|
|
if (bind != NULL) {
|
|
+ xmlChar *value;
|
|
+ xmlIDCHashEntryPtr e;
|
|
refKeys = refNode->keys;
|
|
- for (j = 0; j < bind->nbNodes; j++) {
|
|
- keys = bind->nodeTable[j]->keys;
|
|
+ xmlSchemaHashKeySequence(vctxt, &value, refKeys, nbFields);
|
|
+ e = xmlHashLookup(table, value);
|
|
+ FREE_AND_NULL(value);
|
|
+ res = 0;
|
|
+ for (;e; e = e->next) {
|
|
+ keys = bind->nodeTable[e->index]->keys;
|
|
for (k = 0; k < nbFields; k++) {
|
|
res = xmlSchemaAreValuesEqual(keys[k]->val,
|
|
- refKeys[k]->val);
|
|
+ refKeys[k]->val);
|
|
if (res == 0)
|
|
- break;
|
|
+ break;
|
|
else if (res == -1) {
|
|
return (-1);
|
|
}
|
|
}
|
|
if (res == 1) {
|
|
/*
|
|
- * Match found.
|
|
- */
|
|
+ * Match found.
|
|
+ */
|
|
break;
|
|
}
|
|
}
|
|
@@ -23932,6 +24074,9 @@ xmlSchemaCheckCVCIDCKeyRef(xmlSchemaValidCtxtPtr vctxt)
|
|
FREE_AND_NULL(strB);
|
|
}
|
|
}
|
|
+ if (table) {
|
|
+ xmlHashFree(table, xmlFreeIDCHashEntry);
|
|
+ }
|
|
}
|
|
matcher = matcher->next;
|
|
}
|
|
--
|
|
GitLab
|
|
|