expat/expat-CVE-2024-28757.patch

320 lines
14 KiB
Diff

From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Mon, 4 Mar 2024 23:49:06 +0100
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
isolated external parser
When parsing DTD content with code like ..
XML_Parser parser = XML_ParserCreate(NULL);
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
no amplification over direct input. As a result, billion laughs attacks from
DTD-only input were not detected with this isolated way of using an external parser.
The new approach is to assume direct input of length not 0 but 22 -- derived from
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
finding 66812.
---
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Mon, 4 Mar 2024 23:49:06 +0100
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
isolated external parser
When parsing DTD content with code like ..
XML_Parser parser = XML_ParserCreate(NULL);
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
no amplification over direct input. As a result, billion laughs attacks from
DTD-only input were not detected with this isolated way of using an external parser.
The new approach is to assume direct input of length not 0 but 22 -- derived from
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
finding 66812.
---
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Mon, 4 Mar 2024 23:49:06 +0100
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
isolated external parser
When parsing DTD content with code like ..
XML_Parser parser = XML_ParserCreate(NULL);
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
no amplification over direct input. As a result, billion laughs attacks from
DTD-only input were not detected with this isolated way of using an external parser.
The new approach is to assume direct input of length not 0 but 22 -- derived from
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
finding 66812.
---
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Mon, 4 Mar 2024 23:49:06 +0100
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
isolated external parser
When parsing DTD content with code like ..
XML_Parser parser = XML_ParserCreate(NULL);
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
no amplification over direct input. As a result, billion laughs attacks from
DTD-only input were not detected with this isolated way of using an external parser.
The new approach is to assume direct input of length not 0 but 22 -- derived from
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
finding 66812.
---
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Mon, 4 Mar 2024 23:49:06 +0100
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
isolated external parser
When parsing DTD content with code like ..
XML_Parser parser = XML_ParserCreate(NULL);
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
no amplification over direct input. As a result, billion laughs attacks from
DTD-only input were not detected with this isolated way of using an external parser.
The new approach is to assume direct input of length not 0 but 22 -- derived from
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
finding 66812.
---
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Mon, 4 Mar 2024 23:49:06 +0100
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
isolated external parser
When parsing DTD content with code like ..
XML_Parser parser = XML_ParserCreate(NULL);
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
no amplification over direct input. As a result, billion laughs attacks from
DTD-only input were not detected with this isolated way of using an external parser.
The new approach is to assume direct input of length not 0 but 22 -- derived from
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
finding 66812.
---
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Mon, 4 Mar 2024 23:49:06 +0100
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
isolated external parser
When parsing DTD content with code like ..
XML_Parser parser = XML_ParserCreate(NULL);
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
no amplification over direct input. As a result, billion laughs attacks from
DTD-only input were not detected with this isolated way of using an external parser.
The new approach is to assume direct input of length not 0 but 22 -- derived from
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
finding 66812.
---
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Mon, 4 Mar 2024 23:49:06 +0100
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
isolated external parser
When parsing DTD content with code like ..
XML_Parser parser = XML_ParserCreate(NULL);
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
no amplification over direct input. As a result, billion laughs attacks from
DTD-only input were not detected with this isolated way of using an external parser.
The new approach is to assume direct input of length not 0 but 22 -- derived from
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
finding 66812.
---
expat/lib/xmlparse.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
Index: expat-2.5.0/lib/xmlparse.c
===================================================================
--- expat-2.5.0.orig/lib/xmlparse.c
+++ expat-2.5.0/lib/xmlparse.c
@@ -7655,6 +7655,8 @@ copyString(const XML_Char *s, const XML_
static float
accountingGetCurrentAmplification(XML_Parser rootParser) {
+ // 1.........1.........12 => 22
+ const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
const XmlBigCount countBytesOutput
= rootParser->m_accounting.countBytesDirect
+ rootParser->m_accounting.countBytesIndirect;
@@ -7662,7 +7664,9 @@ accountingGetCurrentAmplification(XML_Pa
= rootParser->m_accounting.countBytesDirect
? (countBytesOutput
/ (float)(rootParser->m_accounting.countBytesDirect))
- : 1.0f;
+ : ((lenOfShortestInclude
+ + rootParser->m_accounting.countBytesIndirect)
+ / (float)lenOfShortestInclude);
assert(! rootParser->m_parentParser);
return amplificationFactor;
}
Index: expat-2.5.0/tests/runtests.c
===================================================================
--- expat-2.5.0.orig/tests/runtests.c
+++ expat-2.5.0/tests/runtests.c
@@ -12092,6 +12092,63 @@ START_TEST(test_helper_unsigned_char_to_
fail("unsignedCharToPrintable result mistaken");
}
END_TEST
+
+START_TEST(test_amplification_isolated_external_parser) {
+ // NOTE: Length 44 is precisely twice the length of "<!ENTITY a SYSTEM 'b'>"
+ // (22) that is used in function accountingGetCurrentAmplification in
+ // xmlparse.c.
+ // 1.........1.........1.........1.........1..4 => 44
+ const char doc[] = "<!ENTITY % p1 '123456789_123456789_1234567'>";
+ const int docLen = (int)sizeof(doc) - 1;
+ const float maximumToleratedAmplification = 2.0f;
+
+ struct TestCase {
+ int offsetOfThreshold;
+ enum XML_Status expectedStatus;
+ };
+
+ struct TestCase cases[] = {
+ {-2, XML_STATUS_ERROR}, {-1, XML_STATUS_ERROR}, {0, XML_STATUS_ERROR},
+ {+1, XML_STATUS_OK}, {+2, XML_STATUS_OK},
+ };
+
+ for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
+ const int offsetOfThreshold = cases[i].offsetOfThreshold;
+ const enum XML_Status expectedStatus = cases[i].expectedStatus;
+ const unsigned long long activationThresholdBytes
+ = docLen + offsetOfThreshold;
+
+ // set_subtest("offsetOfThreshold=%d, expectedStatus=%d", offsetOfThreshold,
+ // expectedStatus);
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(parser != NULL);
+
+ assert_true(XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ parser, maximumToleratedAmplification)
+ == XML_TRUE);
+ assert_true(XML_SetBillionLaughsAttackProtectionActivationThreshold(
+ parser, activationThresholdBytes)
+ == XML_TRUE);
+
+ XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
+ assert_true(ext_parser != NULL);
+
+ const enum XML_Status actualStatus
+ = _XML_Parse_SINGLE_BYTES(ext_parser, doc, docLen, XML_TRUE);
+
+ assert_true(actualStatus == expectedStatus);
+ if (actualStatus != XML_STATUS_OK) {
+ assert_true(XML_GetErrorCode(ext_parser)
+ == XML_ERROR_AMPLIFICATION_LIMIT_BREACH);
+ }
+
+ XML_ParserFree(ext_parser);
+ XML_ParserFree(parser);
+ }
+}
+END_TEST
+
#endif // defined(XML_DTD)
static Suite *
@@ -12485,6 +12542,8 @@ make_suite(void) {
tcase_add_test(tc_accounting, test_accounting_precision);
tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api);
tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable);
+ tcase_add_test__ifdef_xml_dtd(tc_accounting,
+ test_amplification_isolated_external_parser);
#endif
return s;