320 lines
14 KiB
Diff
320 lines
14 KiB
Diff
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
|
|
From: Sebastian Pipping <sebastian@pipping.org>
|
|
Date: Mon, 4 Mar 2024 23:49:06 +0100
|
|
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
|
|
isolated external parser
|
|
|
|
When parsing DTD content with code like ..
|
|
|
|
XML_Parser parser = XML_ParserCreate(NULL);
|
|
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
|
|
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
|
|
|
|
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
|
|
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
|
|
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
|
|
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
|
|
no amplification over direct input. As a result, billion laughs attacks from
|
|
DTD-only input were not detected with this isolated way of using an external parser.
|
|
|
|
The new approach is to assume direct input of length not 0 but 22 -- derived from
|
|
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
|
|
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
|
|
|
|
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
|
|
finding 66812.
|
|
---
|
|
|
|
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
|
|
From: Sebastian Pipping <sebastian@pipping.org>
|
|
Date: Mon, 4 Mar 2024 23:49:06 +0100
|
|
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
|
|
isolated external parser
|
|
|
|
When parsing DTD content with code like ..
|
|
|
|
XML_Parser parser = XML_ParserCreate(NULL);
|
|
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
|
|
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
|
|
|
|
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
|
|
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
|
|
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
|
|
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
|
|
no amplification over direct input. As a result, billion laughs attacks from
|
|
DTD-only input were not detected with this isolated way of using an external parser.
|
|
|
|
The new approach is to assume direct input of length not 0 but 22 -- derived from
|
|
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
|
|
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
|
|
|
|
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
|
|
finding 66812.
|
|
---
|
|
|
|
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
|
|
From: Sebastian Pipping <sebastian@pipping.org>
|
|
Date: Mon, 4 Mar 2024 23:49:06 +0100
|
|
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
|
|
isolated external parser
|
|
|
|
When parsing DTD content with code like ..
|
|
|
|
XML_Parser parser = XML_ParserCreate(NULL);
|
|
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
|
|
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
|
|
|
|
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
|
|
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
|
|
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
|
|
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
|
|
no amplification over direct input. As a result, billion laughs attacks from
|
|
DTD-only input were not detected with this isolated way of using an external parser.
|
|
|
|
The new approach is to assume direct input of length not 0 but 22 -- derived from
|
|
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
|
|
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
|
|
|
|
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
|
|
finding 66812.
|
|
---
|
|
|
|
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
|
|
From: Sebastian Pipping <sebastian@pipping.org>
|
|
Date: Mon, 4 Mar 2024 23:49:06 +0100
|
|
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
|
|
isolated external parser
|
|
|
|
When parsing DTD content with code like ..
|
|
|
|
XML_Parser parser = XML_ParserCreate(NULL);
|
|
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
|
|
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
|
|
|
|
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
|
|
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
|
|
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
|
|
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
|
|
no amplification over direct input. As a result, billion laughs attacks from
|
|
DTD-only input were not detected with this isolated way of using an external parser.
|
|
|
|
The new approach is to assume direct input of length not 0 but 22 -- derived from
|
|
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
|
|
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
|
|
|
|
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
|
|
finding 66812.
|
|
---
|
|
|
|
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
|
|
From: Sebastian Pipping <sebastian@pipping.org>
|
|
Date: Mon, 4 Mar 2024 23:49:06 +0100
|
|
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
|
|
isolated external parser
|
|
|
|
When parsing DTD content with code like ..
|
|
|
|
XML_Parser parser = XML_ParserCreate(NULL);
|
|
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
|
|
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
|
|
|
|
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
|
|
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
|
|
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
|
|
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
|
|
no amplification over direct input. As a result, billion laughs attacks from
|
|
DTD-only input were not detected with this isolated way of using an external parser.
|
|
|
|
The new approach is to assume direct input of length not 0 but 22 -- derived from
|
|
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
|
|
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
|
|
|
|
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
|
|
finding 66812.
|
|
---
|
|
|
|
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
|
|
From: Sebastian Pipping <sebastian@pipping.org>
|
|
Date: Mon, 4 Mar 2024 23:49:06 +0100
|
|
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
|
|
isolated external parser
|
|
|
|
When parsing DTD content with code like ..
|
|
|
|
XML_Parser parser = XML_ParserCreate(NULL);
|
|
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
|
|
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
|
|
|
|
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
|
|
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
|
|
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
|
|
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
|
|
no amplification over direct input. As a result, billion laughs attacks from
|
|
DTD-only input were not detected with this isolated way of using an external parser.
|
|
|
|
The new approach is to assume direct input of length not 0 but 22 -- derived from
|
|
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
|
|
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
|
|
|
|
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
|
|
finding 66812.
|
|
---
|
|
|
|
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
|
|
From: Sebastian Pipping <sebastian@pipping.org>
|
|
Date: Mon, 4 Mar 2024 23:49:06 +0100
|
|
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
|
|
isolated external parser
|
|
|
|
When parsing DTD content with code like ..
|
|
|
|
XML_Parser parser = XML_ParserCreate(NULL);
|
|
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
|
|
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
|
|
|
|
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
|
|
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
|
|
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
|
|
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
|
|
no amplification over direct input. As a result, billion laughs attacks from
|
|
DTD-only input were not detected with this isolated way of using an external parser.
|
|
|
|
The new approach is to assume direct input of length not 0 but 22 -- derived from
|
|
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
|
|
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
|
|
|
|
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
|
|
finding 66812.
|
|
---
|
|
|
|
From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001
|
|
From: Sebastian Pipping <sebastian@pipping.org>
|
|
Date: Mon, 4 Mar 2024 23:49:06 +0100
|
|
Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with
|
|
isolated external parser
|
|
|
|
When parsing DTD content with code like ..
|
|
|
|
XML_Parser parser = XML_ParserCreate(NULL);
|
|
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
|
|
enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE);
|
|
|
|
.. there are 0 bytes accounted as direct input and all input from `doc` accounted
|
|
as indirect input. Now function accountingGetCurrentAmplification cannot calculate
|
|
the current amplification ratio as "(direct + indirect) / direct", and it did refuse
|
|
to divide by 0 as one would expect, but it returned 1.0 for this case to indicate
|
|
no amplification over direct input. As a result, billion laughs attacks from
|
|
DTD-only input were not detected with this isolated way of using an external parser.
|
|
|
|
The new approach is to assume direct input of length not 0 but 22 -- derived from
|
|
ghost input "<!ENTITY a SYSTEM 'b'>", the shortest possible way to include an external
|
|
DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22".
|
|
|
|
GitHub issue #839 has more details on this issue and its origin in ClusterFuzz
|
|
finding 66812.
|
|
---
|
|
expat/lib/xmlparse.c | 6 +++++-
|
|
1 file changed, 5 insertions(+), 1 deletion(-)
|
|
|
|
Index: expat-2.5.0/lib/xmlparse.c
|
|
===================================================================
|
|
--- expat-2.5.0.orig/lib/xmlparse.c
|
|
+++ expat-2.5.0/lib/xmlparse.c
|
|
@@ -7655,6 +7655,8 @@ copyString(const XML_Char *s, const XML_
|
|
|
|
static float
|
|
accountingGetCurrentAmplification(XML_Parser rootParser) {
|
|
+ // 1.........1.........12 => 22
|
|
+ const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
|
|
const XmlBigCount countBytesOutput
|
|
= rootParser->m_accounting.countBytesDirect
|
|
+ rootParser->m_accounting.countBytesIndirect;
|
|
@@ -7662,7 +7664,9 @@ accountingGetCurrentAmplification(XML_Pa
|
|
= rootParser->m_accounting.countBytesDirect
|
|
? (countBytesOutput
|
|
/ (float)(rootParser->m_accounting.countBytesDirect))
|
|
- : 1.0f;
|
|
+ : ((lenOfShortestInclude
|
|
+ + rootParser->m_accounting.countBytesIndirect)
|
|
+ / (float)lenOfShortestInclude);
|
|
assert(! rootParser->m_parentParser);
|
|
return amplificationFactor;
|
|
}
|
|
Index: expat-2.5.0/tests/runtests.c
|
|
===================================================================
|
|
--- expat-2.5.0.orig/tests/runtests.c
|
|
+++ expat-2.5.0/tests/runtests.c
|
|
@@ -12092,6 +12092,63 @@ START_TEST(test_helper_unsigned_char_to_
|
|
fail("unsignedCharToPrintable result mistaken");
|
|
}
|
|
END_TEST
|
|
+
|
|
+START_TEST(test_amplification_isolated_external_parser) {
|
|
+ // NOTE: Length 44 is precisely twice the length of "<!ENTITY a SYSTEM 'b'>"
|
|
+ // (22) that is used in function accountingGetCurrentAmplification in
|
|
+ // xmlparse.c.
|
|
+ // 1.........1.........1.........1.........1..4 => 44
|
|
+ const char doc[] = "<!ENTITY % p1 '123456789_123456789_1234567'>";
|
|
+ const int docLen = (int)sizeof(doc) - 1;
|
|
+ const float maximumToleratedAmplification = 2.0f;
|
|
+
|
|
+ struct TestCase {
|
|
+ int offsetOfThreshold;
|
|
+ enum XML_Status expectedStatus;
|
|
+ };
|
|
+
|
|
+ struct TestCase cases[] = {
|
|
+ {-2, XML_STATUS_ERROR}, {-1, XML_STATUS_ERROR}, {0, XML_STATUS_ERROR},
|
|
+ {+1, XML_STATUS_OK}, {+2, XML_STATUS_OK},
|
|
+ };
|
|
+
|
|
+ for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
|
+ const int offsetOfThreshold = cases[i].offsetOfThreshold;
|
|
+ const enum XML_Status expectedStatus = cases[i].expectedStatus;
|
|
+ const unsigned long long activationThresholdBytes
|
|
+ = docLen + offsetOfThreshold;
|
|
+
|
|
+ // set_subtest("offsetOfThreshold=%d, expectedStatus=%d", offsetOfThreshold,
|
|
+ // expectedStatus);
|
|
+
|
|
+ XML_Parser parser = XML_ParserCreate(NULL);
|
|
+ assert_true(parser != NULL);
|
|
+
|
|
+ assert_true(XML_SetBillionLaughsAttackProtectionMaximumAmplification(
|
|
+ parser, maximumToleratedAmplification)
|
|
+ == XML_TRUE);
|
|
+ assert_true(XML_SetBillionLaughsAttackProtectionActivationThreshold(
|
|
+ parser, activationThresholdBytes)
|
|
+ == XML_TRUE);
|
|
+
|
|
+ XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
|
|
+ assert_true(ext_parser != NULL);
|
|
+
|
|
+ const enum XML_Status actualStatus
|
|
+ = _XML_Parse_SINGLE_BYTES(ext_parser, doc, docLen, XML_TRUE);
|
|
+
|
|
+ assert_true(actualStatus == expectedStatus);
|
|
+ if (actualStatus != XML_STATUS_OK) {
|
|
+ assert_true(XML_GetErrorCode(ext_parser)
|
|
+ == XML_ERROR_AMPLIFICATION_LIMIT_BREACH);
|
|
+ }
|
|
+
|
|
+ XML_ParserFree(ext_parser);
|
|
+ XML_ParserFree(parser);
|
|
+ }
|
|
+}
|
|
+END_TEST
|
|
+
|
|
#endif // defined(XML_DTD)
|
|
|
|
static Suite *
|
|
@@ -12485,6 +12542,8 @@ make_suite(void) {
|
|
tcase_add_test(tc_accounting, test_accounting_precision);
|
|
tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api);
|
|
tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable);
|
|
+ tcase_add_test__ifdef_xml_dtd(tc_accounting,
|
|
+ test_amplification_isolated_external_parser);
|
|
#endif
|
|
|
|
return s;
|