From 4bfab2c821961fb4c5ed8a04e329778c9b09a1df Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Wed, 12 Jul 2023 16:59:07 +0200 Subject: [PATCH] Make the validation of ISO-Schematron files optional in lxml, depending on the availability of the RNG validation file. Some lxml distributions discard the validation schema file due to licensing issues. See https://bugs.launchpad.net/lxml/+bug/2024343 --- CHANGES.txt | 8 ++++++++ doc/validation.txt | 9 +++++++++ src/lxml/isoschematron/__init__.py | 24 +++++++++++++++++++----- 3 files changed, 36 insertions(+), 5 deletions(-) Index: lxml-4.9.3/CHANGES.txt =================================================================== --- lxml-4.9.3.orig/CHANGES.txt +++ lxml-4.9.3/CHANGES.txt @@ -27,6 +27,14 @@ Other changes * Built with Cython 0.29.36 to adapt to changes in Python 3.12. +* LP#2024343: The validation of the schema file itself is now optional in the + ISO-Schematron implementation. This was done because some lxml distributions + discard the RNG validation schema file due to licensing issues. The validation + can now always be disabled with ``Schematron(..., validate_schema=False)``. + It is enabled by default if available and disabled otherwise. The module + constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used + to detect whether schema file validation is available. + 4.9.2 (2022-12-13) ================== Index: lxml-4.9.3/doc/validation.txt =================================================================== --- lxml-4.9.3.orig/doc/validation.txt +++ lxml-4.9.3/doc/validation.txt @@ -615,6 +615,15 @@ The usage of validation phases is a uniq a very powerful tool e.g. for establishing validation stages or to provide different validators for different "validation audiences". +Note: Some lxml distributions exclude the validation schema file due to licensing issues. +Since lxml 5.0, the validation of the user provided schema can be disabled with +``Schematron(..., validate_schema=False)``. +It is enabled by default if available and disabled otherwise. Previous versions of +lxml always had it enabled and failed at import time if the file was not available. +Thus, some distributions chose to remove the entire ISO-Schematron support. +The module constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used +since lxml 5.0 to detect whether schema file validation is available. + (Pre-ISO-Schematron) -------------------- Index: lxml-4.9.3/src/lxml/isoschematron/__init__.py =================================================================== --- lxml-4.9.3.orig/src/lxml/isoschematron/__init__.py +++ lxml-4.9.3/src/lxml/isoschematron/__init__.py @@ -61,10 +61,16 @@ iso_svrl_for_xslt1 = _etree.XSLT(_etree. svrl_validation_errors = _etree.XPath( '//svrl:failed-assert', namespaces={'svrl': SVRL_NS}) - # RelaxNG validator for schematron schemas -schematron_schema_valid = _etree.RelaxNG( - file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')) +schematron_schema_valid_supported = False +try: + schematron_schema_valid = _etree.RelaxNG( + file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')) + schematron_schema_valid_supported = True +except _etree.RelaxNGParseError: + # Some distributions delete the file due to licensing issues. + def schematron_schema_valid(arg): + raise NotImplementedError("Validating the ISO schematron requires iso-schematron.rng") def stylesheet_params(**kwargs): @@ -153,6 +159,13 @@ class Schematron(_etree._Validator): report document gets stored and can be accessed as the ``validation_report`` property. + If ``validate_schema`` is set to False, the validation of the schema file + itself is disabled. Validation happens by default after building the full + schema, unless the schema validation file cannot be found at import time, + in which case the validation gets disabled. Some lxml distributions exclude + this file due to licensing issues. ISO-Schematron validation can then still + be used normally, but the schemas themselves cannot be validated. + Here is a usage example:: >>> from lxml import etree @@ -234,7 +247,8 @@ class Schematron(_etree._Validator): def __init__(self, etree=None, file=None, include=True, expand=True, include_params={}, expand_params={}, compile_params={}, store_schematron=False, store_xslt=False, store_report=False, - phase=None, error_finder=ASSERTS_ONLY): + phase=None, error_finder=ASSERTS_ONLY, + validate_schema=schematron_schema_valid_supported): super(Schematron, self).__init__() self._store_report = store_report @@ -273,7 +287,7 @@ class Schematron(_etree._Validator): schematron = self._include(schematron, **include_params) if expand: schematron = self._expand(schematron, **expand_params) - if not schematron_schema_valid(schematron): + if validate_schema and not schematron_schema_valid(schematron): raise _etree.SchematronParseError( "invalid schematron schema: %s" % schematron_schema_valid.error_log)