From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Mon, 4 Mar 2024 23:49:06 +0100 Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with isolated external parser When parsing DTD content with code like .. XML_Parser parser = XML_ParserCreate(NULL); XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE); .. there are 0 bytes accounted as direct input and all input from `doc` accounted as indirect input. Now function accountingGetCurrentAmplification cannot calculate the current amplification ratio as "(direct + indirect) / direct", and it did refuse to divide by 0 as one would expect, but it returned 1.0 for this case to indicate no amplification over direct input. As a result, billion laughs attacks from DTD-only input were not detected with this isolated way of using an external parser. The new approach is to assume direct input of length not 0 but 22 -- derived from ghost input "", the shortest possible way to include an external DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22". GitHub issue #839 has more details on this issue and its origin in ClusterFuzz finding 66812. --- From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Mon, 4 Mar 2024 23:49:06 +0100 Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with isolated external parser When parsing DTD content with code like .. XML_Parser parser = XML_ParserCreate(NULL); XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE); .. there are 0 bytes accounted as direct input and all input from `doc` accounted as indirect input. Now function accountingGetCurrentAmplification cannot calculate the current amplification ratio as "(direct + indirect) / direct", and it did refuse to divide by 0 as one would expect, but it returned 1.0 for this case to indicate no amplification over direct input. As a result, billion laughs attacks from DTD-only input were not detected with this isolated way of using an external parser. The new approach is to assume direct input of length not 0 but 22 -- derived from ghost input "", the shortest possible way to include an external DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22". GitHub issue #839 has more details on this issue and its origin in ClusterFuzz finding 66812. --- From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Mon, 4 Mar 2024 23:49:06 +0100 Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with isolated external parser When parsing DTD content with code like .. XML_Parser parser = XML_ParserCreate(NULL); XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE); .. there are 0 bytes accounted as direct input and all input from `doc` accounted as indirect input. Now function accountingGetCurrentAmplification cannot calculate the current amplification ratio as "(direct + indirect) / direct", and it did refuse to divide by 0 as one would expect, but it returned 1.0 for this case to indicate no amplification over direct input. As a result, billion laughs attacks from DTD-only input were not detected with this isolated way of using an external parser. The new approach is to assume direct input of length not 0 but 22 -- derived from ghost input "", the shortest possible way to include an external DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22". GitHub issue #839 has more details on this issue and its origin in ClusterFuzz finding 66812. --- From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Mon, 4 Mar 2024 23:49:06 +0100 Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with isolated external parser When parsing DTD content with code like .. XML_Parser parser = XML_ParserCreate(NULL); XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE); .. there are 0 bytes accounted as direct input and all input from `doc` accounted as indirect input. Now function accountingGetCurrentAmplification cannot calculate the current amplification ratio as "(direct + indirect) / direct", and it did refuse to divide by 0 as one would expect, but it returned 1.0 for this case to indicate no amplification over direct input. As a result, billion laughs attacks from DTD-only input were not detected with this isolated way of using an external parser. The new approach is to assume direct input of length not 0 but 22 -- derived from ghost input "", the shortest possible way to include an external DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22". GitHub issue #839 has more details on this issue and its origin in ClusterFuzz finding 66812. --- From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Mon, 4 Mar 2024 23:49:06 +0100 Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with isolated external parser When parsing DTD content with code like .. XML_Parser parser = XML_ParserCreate(NULL); XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE); .. there are 0 bytes accounted as direct input and all input from `doc` accounted as indirect input. Now function accountingGetCurrentAmplification cannot calculate the current amplification ratio as "(direct + indirect) / direct", and it did refuse to divide by 0 as one would expect, but it returned 1.0 for this case to indicate no amplification over direct input. As a result, billion laughs attacks from DTD-only input were not detected with this isolated way of using an external parser. The new approach is to assume direct input of length not 0 but 22 -- derived from ghost input "", the shortest possible way to include an external DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22". GitHub issue #839 has more details on this issue and its origin in ClusterFuzz finding 66812. --- From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Mon, 4 Mar 2024 23:49:06 +0100 Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with isolated external parser When parsing DTD content with code like .. XML_Parser parser = XML_ParserCreate(NULL); XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE); .. there are 0 bytes accounted as direct input and all input from `doc` accounted as indirect input. Now function accountingGetCurrentAmplification cannot calculate the current amplification ratio as "(direct + indirect) / direct", and it did refuse to divide by 0 as one would expect, but it returned 1.0 for this case to indicate no amplification over direct input. As a result, billion laughs attacks from DTD-only input were not detected with this isolated way of using an external parser. The new approach is to assume direct input of length not 0 but 22 -- derived from ghost input "", the shortest possible way to include an external DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22". GitHub issue #839 has more details on this issue and its origin in ClusterFuzz finding 66812. --- From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Mon, 4 Mar 2024 23:49:06 +0100 Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with isolated external parser When parsing DTD content with code like .. XML_Parser parser = XML_ParserCreate(NULL); XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE); .. there are 0 bytes accounted as direct input and all input from `doc` accounted as indirect input. Now function accountingGetCurrentAmplification cannot calculate the current amplification ratio as "(direct + indirect) / direct", and it did refuse to divide by 0 as one would expect, but it returned 1.0 for this case to indicate no amplification over direct input. As a result, billion laughs attacks from DTD-only input were not detected with this isolated way of using an external parser. The new approach is to assume direct input of length not 0 but 22 -- derived from ghost input "", the shortest possible way to include an external DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22". GitHub issue #839 has more details on this issue and its origin in ClusterFuzz finding 66812. --- From 1d50b80cf31de87750103656f6eb693746854aa8 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Mon, 4 Mar 2024 23:49:06 +0100 Subject: [PATCH 1/2] lib/xmlparse.c: Detect billion laughs attack with isolated external parser When parsing DTD content with code like .. XML_Parser parser = XML_ParserCreate(NULL); XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); enum XML_Status status = XML_Parse(ext_parser, doc, (int)strlen(doc), XML_TRUE); .. there are 0 bytes accounted as direct input and all input from `doc` accounted as indirect input. Now function accountingGetCurrentAmplification cannot calculate the current amplification ratio as "(direct + indirect) / direct", and it did refuse to divide by 0 as one would expect, but it returned 1.0 for this case to indicate no amplification over direct input. As a result, billion laughs attacks from DTD-only input were not detected with this isolated way of using an external parser. The new approach is to assume direct input of length not 0 but 22 -- derived from ghost input "", the shortest possible way to include an external DTD --, and do the usual "(direct + indirect) / direct" math with "direct := 22". GitHub issue #839 has more details on this issue and its origin in ClusterFuzz finding 66812. --- expat/lib/xmlparse.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) Index: expat-2.5.0/lib/xmlparse.c =================================================================== --- expat-2.5.0.orig/lib/xmlparse.c +++ expat-2.5.0/lib/xmlparse.c @@ -7655,6 +7655,8 @@ copyString(const XML_Char *s, const XML_ static float accountingGetCurrentAmplification(XML_Parser rootParser) { + // 1.........1.........12 => 22 + const size_t lenOfShortestInclude = sizeof("") - 1; const XmlBigCount countBytesOutput = rootParser->m_accounting.countBytesDirect + rootParser->m_accounting.countBytesIndirect; @@ -7662,7 +7664,9 @@ accountingGetCurrentAmplification(XML_Pa = rootParser->m_accounting.countBytesDirect ? (countBytesOutput / (float)(rootParser->m_accounting.countBytesDirect)) - : 1.0f; + : ((lenOfShortestInclude + + rootParser->m_accounting.countBytesIndirect) + / (float)lenOfShortestInclude); assert(! rootParser->m_parentParser); return amplificationFactor; } Index: expat-2.5.0/tests/runtests.c =================================================================== --- expat-2.5.0.orig/tests/runtests.c +++ expat-2.5.0/tests/runtests.c @@ -12092,6 +12092,63 @@ START_TEST(test_helper_unsigned_char_to_ fail("unsignedCharToPrintable result mistaken"); } END_TEST + +START_TEST(test_amplification_isolated_external_parser) { + // NOTE: Length 44 is precisely twice the length of "" + // (22) that is used in function accountingGetCurrentAmplification in + // xmlparse.c. + // 1.........1.........1.........1.........1..4 => 44 + const char doc[] = ""; + const int docLen = (int)sizeof(doc) - 1; + const float maximumToleratedAmplification = 2.0f; + + struct TestCase { + int offsetOfThreshold; + enum XML_Status expectedStatus; + }; + + struct TestCase cases[] = { + {-2, XML_STATUS_ERROR}, {-1, XML_STATUS_ERROR}, {0, XML_STATUS_ERROR}, + {+1, XML_STATUS_OK}, {+2, XML_STATUS_OK}, + }; + + for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { + const int offsetOfThreshold = cases[i].offsetOfThreshold; + const enum XML_Status expectedStatus = cases[i].expectedStatus; + const unsigned long long activationThresholdBytes + = docLen + offsetOfThreshold; + + // set_subtest("offsetOfThreshold=%d, expectedStatus=%d", offsetOfThreshold, + // expectedStatus); + + XML_Parser parser = XML_ParserCreate(NULL); + assert_true(parser != NULL); + + assert_true(XML_SetBillionLaughsAttackProtectionMaximumAmplification( + parser, maximumToleratedAmplification) + == XML_TRUE); + assert_true(XML_SetBillionLaughsAttackProtectionActivationThreshold( + parser, activationThresholdBytes) + == XML_TRUE); + + XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); + assert_true(ext_parser != NULL); + + const enum XML_Status actualStatus + = _XML_Parse_SINGLE_BYTES(ext_parser, doc, docLen, XML_TRUE); + + assert_true(actualStatus == expectedStatus); + if (actualStatus != XML_STATUS_OK) { + assert_true(XML_GetErrorCode(ext_parser) + == XML_ERROR_AMPLIFICATION_LIMIT_BREACH); + } + + XML_ParserFree(ext_parser); + XML_ParserFree(parser); + } +} +END_TEST + #endif // defined(XML_DTD) static Suite * @@ -12485,6 +12542,8 @@ make_suite(void) { tcase_add_test(tc_accounting, test_accounting_precision); tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api); tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable); + tcase_add_test__ifdef_xml_dtd(tc_accounting, + test_amplification_isolated_external_parser); #endif return s;