Sync from SUSE:SLFO:Main jsoup revision 1c663f249efc701bae2a322d5f518ace

2024-05-03 14:06:53 +02:00 · 2024-05-03 14:06:53 +02:00 · 32a0b409fc
commit 32a0b409fc
6 changed files with 496 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,23 @@
+## Default LFS
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.bsp filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.gem filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.jar filter=lfs diff=lfs merge=lfs -text
+*.lz filter=lfs diff=lfs merge=lfs -text
+*.lzma filter=lfs diff=lfs merge=lfs -text
+*.obscpio filter=lfs diff=lfs merge=lfs -text
+*.oxt filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.rpm filter=lfs diff=lfs merge=lfs -text
+*.tbz filter=lfs diff=lfs merge=lfs -text
+*.tbz2 filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.ttf filter=lfs diff=lfs merge=lfs -text
+*.txz filter=lfs diff=lfs merge=lfs -text
+*.whl filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
--- a/16
+++ b/16
@ -0,0 +1,16 @@
+<services>
+	<service name="tar_scm" mode="disabled">
+		<param name="scm">git</param>
+		<param name="url">https://github.com/jhy/jsoup.git</param>
+		<param name="revision">jsoup-1.15.3</param>
+		<param name="match-tag">jsoup-*</param>
+		<param name="versionformat">@PARENT_TAG@</param>
+		<param name="versionrewrite-pattern">jsoup-(.*)</param>
+		<param name="exclude">src/test/resources</param>
+	</service>
+	<service name="recompress" mode="disabled">
+		<param name="file">*.tar</param>
+		<param name="compression">xz</param>
+	</service>
+	<service name="set_version" mode="disabled"/>
+</services>
--- a/jsoup-1.15.3.tar.xz
+++ b/jsoup-1.15.3.tar.xz
--- a/jsoup-build.xml
+++ b/jsoup-build.xml
@ -0,0 +1,155 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<project name="jsoup" default="package" basedir=".">
+
+  <!-- ====================================================================== -->
+  <!-- Build environment properties                                           -->
+  <!-- ====================================================================== -->
+
+  <property file="build.properties"/>
+  
+  <property name="project.name" value="jsoup Java HTML Parser"/>
+  <property name="project.groupId" value="org.jsoup"/>
+  <property name="project.artifactId" value="jsoup"/>
+  <property name="project.version" value="1.15.3"/>
+  <property name="project.description" value="jsoup is a Java library for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jquery-like methods. jsoup implements the WHATWG HTML5 specification, and parses HTML to the same DOM as modern browsers do."/>
+  <property name="project.organization.name" value="Jonathan Hedley"/>
+  <property name="project.build.sourceEncoding" value="UTF-8"/>
+
+  <property name="build.finalName" value="${project.artifactId}-${project.version}"/>
+  <property name="build.dir" value="target"/>
+  <property name="build.outputDir" value="${build.dir}/classes"/>
+  <property name="build.srcDir.0" value="src/main/java"/>
+  <property name="build.javadocDir.0" value="src/main/javadoc"/>
+  <property name="build.resourceDir.0" value="src/main/java"/>
+  <property name="build.resourceDir.1" value="."/>
+
+  <property name="compiler.source" value="1.8"/>
+  <property name="compiler.target" value="${compiler.source}"/>
+
+  <property name="reporting.outputDirectory" value="${build.dir}/site"/>
+
+  <!-- ====================================================================== -->
+  <!-- Defining classpaths                                                    -->
+  <!-- ====================================================================== -->
+
+  <path id="build.classpath">
+    <fileset dir="lib">
+      <include name="**/*.jar"/>
+    </fileset>
+  </path>
+
+  <!-- ====================================================================== -->
+  <!-- Cleaning up target                                                     -->
+  <!-- ====================================================================== -->
+
+  <target name="clean" description="Clean the output directory">
+    <delete dir="${build.dir}"/>
+  </target>
+
+  <!-- ====================================================================== -->
+  <!-- Compilation target                                                     -->
+  <!-- ====================================================================== -->
+
+  <target name="compile" description="Compile the code">
+    <mkdir dir="${build.outputDir}"/>
+    <javac destdir="${build.outputDir}" 
+           encoding="UTF-8" 
+           nowarn="false" 
+           debug="true" 
+           optimize="false" 
+           deprecation="true" 
+           target="${compiler.target}" 
+           verbose="false" 
+           fork="false" 
+           source="${compiler.source}">
+      <src>
+        <pathelement location="${build.srcDir.0}"/>
+      </src>
+      <classpath refid="build.classpath"/>
+    </javac>
+    <copy todir="${build.outputDir}">
+      <fileset dir="${build.resourceDir.0}">
+        <include name="**/*.properties"/>
+      </fileset>
+    </copy>
+    <mkdir dir="${build.outputDir}/META-INF/"/>
+    <copy todir="${build.outputDir}/META-INF/">
+      <fileset dir="${build.resourceDir.1}">
+        <include name="LICENSE"/>
+        <include name="README.md"/>
+        <include name="CHANGES"/>
+      </fileset>
+    </copy>
+  </target>
+
+  <!-- ====================================================================== -->
+  <!-- Javadoc target                                                         -->
+  <!-- ====================================================================== -->
+
+  <target name="javadoc" description="Generates the Javadoc of the application">
+    <javadoc sourcepath="${build.srcDir.0}" 
+             packagenames="*" 
+             destdir="${reporting.outputDirectory}/apidocs" 
+             access="protected" 
+             old="false" 
+             verbose="false" 
+             version="true" 
+             use="true" 
+             author="true" 
+             splitindex="false" 
+             nodeprecated="false" 
+             nodeprecatedlist="false" 
+             notree="false" 
+             noindex="false" 
+             nohelp="false" 
+             nonavbar="false" 
+             serialwarn="false" 
+             source="${compiler.source}"
+             encoding="${project.build.sourceEncoding}"
+             linksource="false" 
+             overview="${build.javadocDir.0}/overview.html" 
+             doctitle="${project.name} ${project.version} API" 
+             windowtitle="${project.name} ${project.version} API" 
+             breakiterator="false">
+      <classpath refid="build.classpath"/>
+    </javadoc>
+  </target>
+
+  <!-- ====================================================================== -->
+  <!-- Package target                                                         -->
+  <!-- ====================================================================== -->
+
+  <target name="package" depends="compile" description="Package the application">
+    <jar jarfile="${build.dir}/${build.finalName}.jar" 
+         compress="true" 
+         index="false" 
+         basedir="${build.outputDir}" 
+         excludes="**/package.html">
+      <manifest>
+        <attribute name="Automatic-Module-Name" value="${project.groupId}"/>
+        <attribute name="Bundle-Description" value="${project.description}"/>
+        <attribute name="Bundle-DocURL" value="https://jsoup.org/"/>
+        <attribute name="Bundle-License" value="https://jsoup.org/license"/>
+        <attribute name="Bundle-ManifestVersion" value="2"/>
+        <attribute name="Bundle-Name" value="${project.name}"/>
+        <attribute name="Bundle-SymbolicName" value="org.jsoup"/>
+        <attribute name="Bundle-Vendor" value="${project.organization.name}"/>
+        <attribute name="Bundle-Version" value="${project.version}"/>
+        <attribute name="Export-Package" value="org.jsoup.examples;uses:=&quot;org.jsoup.nodes&quot;;version=&quot;${project.version}&quot;,org.jsoup.helper;uses:=&quot;javax.annotation,javax.net.ssl,javax.xml.parsers,org.jsoup,org.jsoup.nodes,org.jsoup.parser,org.jsoup.select,org.w3c.dom&quot;;version=&quot;${project.version}&quot;,org.jsoup.internal;uses:=&quot;javax.annotation,javax.annotation.meta&quot;;version=&quot;${project.version}&quot;,org.jsoup.nodes;uses:=&quot;javax.annotation,org.jsoup,org.jsoup.helper,org.jsoup.parser,org.jsoup.select&quot;;version=&quot;${project.version}&quot;,org.jsoup.parser;uses:=&quot;javax.annotation,org.jsoup.nodes&quot;;version=&quot;${project.version}&quot;,org.jsoup.safety;uses:=&quot;org.jsoup.nodes&quot;;version=&quot;${project.version}&quot;,org.jsoup.select;uses:=&quot;javax.annotation,org.jsoup.nodes&quot;;version=&quot;${project.version}&quot;,org.jsoup;uses:=&quot;javax.annotation,javax.net.ssl,org.jsoup.nodes,org.jsoup.parser,org.jsoup.safety&quot;;version=&quot;${project.version}&quot;"/>
+        <attribute name="Implementation-Title" value="jsoup Java HTML Parser"/>
+        <attribute name="Implementation-Vendor" value="Jonathan Hedley"/>
+        <attribute name="Implementation-Version" value="${project.version}"/>
+        <attribute name="Import-Package" value="javax.annotation.meta;resolution:=optional,javax.annotation;resolution:=optional,javax.net.ssl,javax.xml.namespace,javax.xml.parsers,javax.xml.transform,javax.xml.transform.dom,javax.xml.transform.stream,javax.xml.xpath,org.jsoup,org.jsoup.helper,org.jsoup.internal,org.jsoup.nodes,org.jsoup.parser,org.jsoup.safety,org.jsoup.select,org.w3c.dom"/>
+        <attribute name="Require-Capability" value="osgi.ee;filter:=&quot;(&amp;(osgi.ee=JavaSE)(version=${compiler.target}))&quot;"/>
+      </manifest>
+    </jar>
+  </target>
+
+  <!-- ====================================================================== -->
+  <!-- A dummy target for the package named after the type it creates         -->
+  <!-- ====================================================================== -->
+
+  <target name="jar" depends="package" description="Builds the jar for the application"/>
+
+</project>
--- a/jsoup.changes
+++ b/jsoup.changes
@ -0,0 +1,206 @@
+-------------------------------------------------------------------
+Thu Oct 20 12:57:16 UTC 2022 - Fridrich Strba <fstrba@suse.com>
+
+- Fix typo in the ant *-build.xml file that caused errors while
+  building eclipse
+
+-------------------------------------------------------------------
+Mon Oct 17 05:42:39 UTC 2022 - Fridrich Strba <fstrba@suse.com>
+
+- Upgrade to upstream version 1.15.3
+- Changes of 1.15.3
+  * Security
+    + Fixed  bsc#1203459 (CVE-2022-36033), an issue where the jsoup
+      cleaner may incorrectly sanitize crafted XSS attempts if
+      SafeList.preserveRelativeLinks is enabled. See the security
+      advisory for more details.
+  * Improvements
+    + The Cleaner will preserve the source position of cleaned
+      elements, if source tracking is enabled in the original parse.
+    + The error messages output from Validate are more descriptive.
+      Exceptions are now ValidationExceptions
+      (extending IllegalArgumentException). Stack traces do not
+      include the Validate class, to make it simpler to see where
+      the exception originated. Common validation errors including
+      malformed URLs and empty selector results have more explicit
+      error messages.
+    + Build Improvement: added implementation version and related
+      fields to the jar manifest.
+  * Bug Fixes
+    + The DataUtil would incorrectly read from InputStreams that
+      emitted reads less than the requested size. This lead to
+      incorrect results when parsing from chunked server responses,
+      for example.
+- Changes of 1.15.2
+  * Improvements
+    + Added the ability to track the position (line, column, index)
+      in the original input source from where a given node was
+      parsed. Accessible via Node.sourceRange() and
+      Element.endSourceRange().
+    + Added Element.firstElementChild(), Element.lastElementChild(),
+      Node.firstChild(), Node.lastChild(), as convenient accessors
+      to those child nodes and elements.
+    + Added Element.expectFirst(), which is just like
+      Element.selectFirst(), but instead of returning a null if
+      there is no match, will throw an IllegalArgumentException.
+      This is useful if you want to simply abort processing if an
+      expected match is not found, such as in test cases.
+    + When pretty-printing HTML, doctypes are emitted on a newline
+      if there is a preceding comment.
+    + When pretty-printing, trim the leading and trailing spaces of
+      textnodes in block tags when possible, so that they are
+      indented correctly.
+    + In Element.selectXpath(), disable namespace awareness. This
+      makes it possible to always select elements by their simple
+      local name, regardless of whether an xmlns attribute was set.
+  * Bug Fixes
+    + When using the DataUtil.readToByteBuffer() method, such as in
+      Connection.Response.body(), if the document has not already
+      been parsed and must be read fully, and there is any maximum
+      buffer size being applied, only the default internal buffer
+      size was read.
+    + When serializing HTML, newlines in elements descending from a
+      pre tag were incorrectly skipped. That caused what should have
+      been preformatted output to instead be a run of text.
+    + When pretty-print serializing HTML, newlines separating
+      phrasing content (e.g. a <span> tag within a <p> tag would be
+      incorrectly skipped, instead of normalized to a space.
+      Additionally, improved space normalization between other end
+      of line occurences, and whitespace handling after a closing
+      </body>
+- Changes of 1.15.1
+  * Changes
+    + Removed previously deprecated methods and classes (including
+      org.jsoup.safety.Whitelist; use org.jsoup.safety.Safelist
+      instead).
+  * Improvements
+    + When converting jsoup Documents to W3C Documents in W3CDom,
+      preserve HTML valid attribute names if the input document is
+      using the HTML syntax. (Previously, would always coerce using
+      the more restrictive XML syntax.)
+    + Added the :containsWholeText(text) selector, to match against
+      non-normalized Element text. That can be useful when elements
+      can only be distinguished by e.g. specific case, or leading
+      whitespace, etc.
+    + Added Element#wholeOwnText() to retrieve the original
+      (non-normalized) ownText of an Element. Also added the
+      :containsWholeOwnText(text) selector, to match against that.
+      BR elements are now treated as newlines in the wholeText
+      methods.
+    + Added the :matchesWholeText(regex) and
+      :matchesWholeOwnText(regex) selectors, to match against whole
+      (non-normalized, case sensitive) element text and own text,
+      respectively.
+    + When evaluating an XPath query against a context element, the
+      complete document is now visible to the query, vs only the
+      context element's sub-tree. This enables support for queries
+      outside (parent or sibling) the element, e.g.
+      ancestor-or-self::*.
+    + Allow a maxPaddingWidth on the indent level in OutputSettings
+      when pretty printing. This defaults to 30 to limit the indent
+      level for very deeply nested elements, and may be disabled by
+      setting to -1.
+    + When cloning a Node or an Element, the clone gets a cloned
+      OwnerDocument containing only that clone, so as to preserve
+      applicable settings, such as the Pretty Print settings.
+    + Added a convenience method Jsoup.parse(File).
+    + In the NodeTraversor, added default implementations for
+      NodeVisitor.tail() and NodeFilter.tail(), so that code using
+      only head() methods can be written as lambdas.
+    + In NodeTraversor, added support for removing nodes via
+      Node.remove() during NodeVisitor.head().
+    + Added Node.forEachNode(Consumer<Node>) and
+      Element.forEach(Consumer<Element) methods, to efficiently
+      traverse the DOM with a functional interface.
+  * Bug Fixes
+    + Boolean attribute names should be case-insensitive, but were
+      not when the parser was configured to preserve case.
+    + When reading from SequenceInputStreams across the buffer, the
+      input stream was closed too early, resulting in missed
+      content.
+    + A comment with all dashes (<!----->) should not emit a parse
+      error.
+    + When throwing a SelectorParseException for an invalid
+      selector, don't try to String.format the input, as that could
+      throw an IllegalFormatException.
+    + When serializing HTML with Pretty Print enabled, extraneous
+      whitespace may be added on closing tags, or extra newlines may
+      be added at the end of script blocks.
+    + When copy-creating a Safelist from another, perform a
+      deep-copy of the original's settings, so that changes to the
+      original after creation do not affect the copy.
+    + Speed improvement when parsing constructed HTML containing
+      very deeply incorrectly stacked formatting elements with many
+      attributes.
+    + During parsing, a StackOverflowException was possible given
+      crafted HTML with hundreds of nested table elements followed
+      by invalid formatting elements.
+- Changes of 1.14.3
+  * Improvements
+    + Added native XPath support with Element.selectXpath(String)
+    + Added full support for the <template> tag, up to the HTML5
+      parser spec.
+    + Added support in CharacterReader to track newlines, so that
+      parse errors can be reported more intuitively.
+    + Tracked parse errors now have more details, including the
+      erroneous token, to help clarify the errors.
+    + Speed and memory optimizations for the :has(subquery)
+      selector.
+    + The :contains(text) and :containsOwn(text) selectors are now
+      whitespace normalized, aligning to the document text that they
+      are matching against.
+    + In Element, speed optimized adopting all of an element's child
+      nodes into a currently empty element. Improves the HTML
+      adoption agency algorithm when adopting elements with many
+      children.
+    + Increased the parse speed when in RCData (e.g. <title>) and
+      unescaped <tag> tokens are found, by memoizing the </title>
+      scan and reducing GC.
+    + When parsing custom tags (in HTML or XML), added a flyweight
+      cache on Tag.valueOf(String) to reduce memory overhead when
+      many tags are repeated. Also tuned other areas of the parser
+      when many very deeply stacked custom elements were present.
+  * Bug Fixes
+    + The OSGi bundle meta-data incorrectly set a version on the
+      import of javax.annotation (used as a build-time dependency
+      for nullability assertions).
+    + When tracking errors or checking for validity in the Cleaner,
+    errors were incorrectly raised for missing optional closing tags.
+    + The Attributes.equals() method was sensitive to the order of
+      its contents, but it should not be.
+    + When the HTML parser was configured to preserve case, Element
+      text methods would miss adding whitespace for BR tags.
+    + Attribute names are now normalized & validated correctly for
+      the specific output syntax (HTML or XML). Previously,
+      syntactically invalid attribute names could be output by the
+      html() methods. Such attributes are still available in the
+      DOM, and will be normalized if possible on output.
+    + Fixed an IOOB when an empty select tag was followed by a body
+      tag that needed reparenting.
+  * Build Improvements
+    + Fixed nullability annotations for Node.equals(Object) and
+      other equals methods.
+    + Added JDK 17 to the CI builds.
+
+-------------------------------------------------------------------
+Fri Aug 27 06:57:23 UTC 2021 - Fridrich Strba <fstrba@suse.com>
+
+- Upgrade to upstream version 1.14.2
+  * fixes bsc#1189749, CVE-2021-37714
+- Generate tarball using source service instead of a script
+
+-------------------------------------------------------------------
+Fri Feb 22 22:39:00 UTC 2019 - Fridrich Strba <fstrba@suse.com>
+
+- Remove from the tarball the non-free test data
+
+-------------------------------------------------------------------
+Sat Feb  2 18:52:01 UTC 2019 - Jan Engelhardt <jengelh@inai.de>
+
+- Ensure neutrality of descriptions.
+
+-------------------------------------------------------------------
+Fri Feb  1 08:53:28 UTC 2019 - Fridrich Strba <fstrba@suse.com>
+
+- Initial packaging of jsoup version 1.11.3
+- Added jsoup-build.xml file to build with ant
--- a/jsoup.spec
+++ b/jsoup.spec
@ -0,0 +1,93 @@
+#
+# spec file for package jsoup
+#
+# Copyright (c) 2022 SUSE LLC
+#
+# All modifications and additions to the file contributed by third parties
+# remain the property of their copyright owners, unless otherwise agreed
+# upon. The license for this file, and modifications and additions to the
+# file, is the same license as for the pristine package itself (unless the
+# license for the pristine package is not an Open Source License, in which
+# case the license is the MIT License). An "Open Source License" is a
+# license that conforms to the Open Source Definition (Version 1.9)
+# published by the Open Source Initiative.
+
+# Please submit bugfixes or comments via https://bugs.opensuse.org/
+#
+
+
+Name:           jsoup
+Version:        1.15.3
+Release:        0
+Summary:        Java library for working with HTML
+License:        MIT
+Group:          Development/Libraries/Java
+URL:            https://jsoup.org/
+# ./generate-tarball.sh
+Source0:        %{name}-%{version}.tar.xz
+Source1:        %{name}-build.xml
+BuildRequires:  ant
+BuildRequires:  fdupes
+BuildRequires:  java-devel >= 1.8
+BuildRequires:  javapackages-local
+BuildRequires:  jsr-305
+BuildArch:      noarch
+
+%description
+jsoup is a Java library for working with HTML.
+It provides an API for extracting and manipulating data,
+using DOM, CSS, and jquery-like methods.
+
+jsoup implements the WHATWG HTML5 specification.
+
+ - scrapes and parses HTML from a URL, file, or string
+ - finds and extracts data, using DOM traversal or CSS selectors
+ - manipulates the HTML elements, attributes, and text
+ - cleans user-submitted content against a safe white-list,
+   to prevent XSS attacks
+ - outputs tidied HTML
+
+jsoup can deal with invalid HTML tag soup.
+
+%package javadoc
+Summary:        Javadoc for %{name}
+Group:          Documentation/HTML
+
+%description javadoc
+API documentation for %{name}.
+
+%prep
+%setup -q
+cp %{SOURCE1} .
+
+%pom_remove_plugin :animal-sniffer-maven-plugin
+%pom_remove_plugin :japicmp-maven-plugin
+%pom_remove_plugin :maven-failsafe-plugin
+
+%build
+mkdir -p lib
+build-jar-repository -s lib jsr-305
+%{ant} -f %{name}-build.xml jar javadoc
+
+%install
+# jar
+install -dm 0755 %{buildroot}%{_javadir}/%{name}
+install -pm 0644 target/%{name}-%{version}.jar %{buildroot}%{_javadir}/%{name}/%{name}.jar
+# pom
+install -dm 0755 %{buildroot}%{_mavenpomdir}/%{name}
+install -pm 0644 pom.xml %{buildroot}%{_mavenpomdir}/%{name}/%{name}.pom
+%add_maven_depmap %{name}/%{name}.pom %{name}/%{name}.jar
+# javadoc
+install -dm 0755 %{buildroot}%{_javadocdir}
+install -pdm 0755 target/site/apidocs %{buildroot}%{_javadocdir}/%{name}
+%fdupes -s %{buildroot}%{_javadocdir}
+
+%files -f .mfiles
+%doc README.md CHANGES
+%license LICENSE
+
+%files javadoc
+%license LICENSE
+%{_javadocdir}/%{name}
+
+%changelog