Sync from SUSE:SLFO:Main jsoup revision 1c663f249efc701bae2a322d5f518ace

This commit is contained in:
Adrian Schröter 2024-05-03 14:06:53 +02:00
commit 32a0b409fc
6 changed files with 496 additions and 0 deletions

23
.gitattributes vendored Normal file
View File

@ -0,0 +1,23 @@
## Default LFS
*.7z filter=lfs diff=lfs merge=lfs -text
*.bsp filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.gem filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.jar filter=lfs diff=lfs merge=lfs -text
*.lz filter=lfs diff=lfs merge=lfs -text
*.lzma filter=lfs diff=lfs merge=lfs -text
*.obscpio filter=lfs diff=lfs merge=lfs -text
*.oxt filter=lfs diff=lfs merge=lfs -text
*.pdf filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
*.rpm filter=lfs diff=lfs merge=lfs -text
*.tbz filter=lfs diff=lfs merge=lfs -text
*.tbz2 filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.ttf filter=lfs diff=lfs merge=lfs -text
*.txz filter=lfs diff=lfs merge=lfs -text
*.whl filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text

16
_service Normal file
View File

@ -0,0 +1,16 @@
<services>
<service name="tar_scm" mode="disabled">
<param name="scm">git</param>
<param name="url">https://github.com/jhy/jsoup.git</param>
<param name="revision">jsoup-1.15.3</param>
<param name="match-tag">jsoup-*</param>
<param name="versionformat">@PARENT_TAG@</param>
<param name="versionrewrite-pattern">jsoup-(.*)</param>
<param name="exclude">src/test/resources</param>
</service>
<service name="recompress" mode="disabled">
<param name="file">*.tar</param>
<param name="compression">xz</param>
</service>
<service name="set_version" mode="disabled"/>
</services>

BIN
jsoup-1.15.3.tar.xz (Stored with Git LFS) Normal file

Binary file not shown.

155
jsoup-build.xml Normal file
View File

@ -0,0 +1,155 @@
<?xml version="1.0" encoding="UTF-8"?>
<project name="jsoup" default="package" basedir=".">
<!-- ====================================================================== -->
<!-- Build environment properties -->
<!-- ====================================================================== -->
<property file="build.properties"/>
<property name="project.name" value="jsoup Java HTML Parser"/>
<property name="project.groupId" value="org.jsoup"/>
<property name="project.artifactId" value="jsoup"/>
<property name="project.version" value="1.15.3"/>
<property name="project.description" value="jsoup is a Java library for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jquery-like methods. jsoup implements the WHATWG HTML5 specification, and parses HTML to the same DOM as modern browsers do."/>
<property name="project.organization.name" value="Jonathan Hedley"/>
<property name="project.build.sourceEncoding" value="UTF-8"/>
<property name="build.finalName" value="${project.artifactId}-${project.version}"/>
<property name="build.dir" value="target"/>
<property name="build.outputDir" value="${build.dir}/classes"/>
<property name="build.srcDir.0" value="src/main/java"/>
<property name="build.javadocDir.0" value="src/main/javadoc"/>
<property name="build.resourceDir.0" value="src/main/java"/>
<property name="build.resourceDir.1" value="."/>
<property name="compiler.source" value="1.8"/>
<property name="compiler.target" value="${compiler.source}"/>
<property name="reporting.outputDirectory" value="${build.dir}/site"/>
<!-- ====================================================================== -->
<!-- Defining classpaths -->
<!-- ====================================================================== -->
<path id="build.classpath">
<fileset dir="lib">
<include name="**/*.jar"/>
</fileset>
</path>
<!-- ====================================================================== -->
<!-- Cleaning up target -->
<!-- ====================================================================== -->
<target name="clean" description="Clean the output directory">
<delete dir="${build.dir}"/>
</target>
<!-- ====================================================================== -->
<!-- Compilation target -->
<!-- ====================================================================== -->
<target name="compile" description="Compile the code">
<mkdir dir="${build.outputDir}"/>
<javac destdir="${build.outputDir}"
encoding="UTF-8"
nowarn="false"
debug="true"
optimize="false"
deprecation="true"
target="${compiler.target}"
verbose="false"
fork="false"
source="${compiler.source}">
<src>
<pathelement location="${build.srcDir.0}"/>
</src>
<classpath refid="build.classpath"/>
</javac>
<copy todir="${build.outputDir}">
<fileset dir="${build.resourceDir.0}">
<include name="**/*.properties"/>
</fileset>
</copy>
<mkdir dir="${build.outputDir}/META-INF/"/>
<copy todir="${build.outputDir}/META-INF/">
<fileset dir="${build.resourceDir.1}">
<include name="LICENSE"/>
<include name="README.md"/>
<include name="CHANGES"/>
</fileset>
</copy>
</target>
<!-- ====================================================================== -->
<!-- Javadoc target -->
<!-- ====================================================================== -->
<target name="javadoc" description="Generates the Javadoc of the application">
<javadoc sourcepath="${build.srcDir.0}"
packagenames="*"
destdir="${reporting.outputDirectory}/apidocs"
access="protected"
old="false"
verbose="false"
version="true"
use="true"
author="true"
splitindex="false"
nodeprecated="false"
nodeprecatedlist="false"
notree="false"
noindex="false"
nohelp="false"
nonavbar="false"
serialwarn="false"
source="${compiler.source}"
encoding="${project.build.sourceEncoding}"
linksource="false"
overview="${build.javadocDir.0}/overview.html"
doctitle="${project.name} ${project.version} API"
windowtitle="${project.name} ${project.version} API"
breakiterator="false">
<classpath refid="build.classpath"/>
</javadoc>
</target>
<!-- ====================================================================== -->
<!-- Package target -->
<!-- ====================================================================== -->
<target name="package" depends="compile" description="Package the application">
<jar jarfile="${build.dir}/${build.finalName}.jar"
compress="true"
index="false"
basedir="${build.outputDir}"
excludes="**/package.html">
<manifest>
<attribute name="Automatic-Module-Name" value="${project.groupId}"/>
<attribute name="Bundle-Description" value="${project.description}"/>
<attribute name="Bundle-DocURL" value="https://jsoup.org/"/>
<attribute name="Bundle-License" value="https://jsoup.org/license"/>
<attribute name="Bundle-ManifestVersion" value="2"/>
<attribute name="Bundle-Name" value="${project.name}"/>
<attribute name="Bundle-SymbolicName" value="org.jsoup"/>
<attribute name="Bundle-Vendor" value="${project.organization.name}"/>
<attribute name="Bundle-Version" value="${project.version}"/>
<attribute name="Export-Package" value="org.jsoup.examples;uses:=&quot;org.jsoup.nodes&quot;;version=&quot;${project.version}&quot;,org.jsoup.helper;uses:=&quot;javax.annotation,javax.net.ssl,javax.xml.parsers,org.jsoup,org.jsoup.nodes,org.jsoup.parser,org.jsoup.select,org.w3c.dom&quot;;version=&quot;${project.version}&quot;,org.jsoup.internal;uses:=&quot;javax.annotation,javax.annotation.meta&quot;;version=&quot;${project.version}&quot;,org.jsoup.nodes;uses:=&quot;javax.annotation,org.jsoup,org.jsoup.helper,org.jsoup.parser,org.jsoup.select&quot;;version=&quot;${project.version}&quot;,org.jsoup.parser;uses:=&quot;javax.annotation,org.jsoup.nodes&quot;;version=&quot;${project.version}&quot;,org.jsoup.safety;uses:=&quot;org.jsoup.nodes&quot;;version=&quot;${project.version}&quot;,org.jsoup.select;uses:=&quot;javax.annotation,org.jsoup.nodes&quot;;version=&quot;${project.version}&quot;,org.jsoup;uses:=&quot;javax.annotation,javax.net.ssl,org.jsoup.nodes,org.jsoup.parser,org.jsoup.safety&quot;;version=&quot;${project.version}&quot;"/>
<attribute name="Implementation-Title" value="jsoup Java HTML Parser"/>
<attribute name="Implementation-Vendor" value="Jonathan Hedley"/>
<attribute name="Implementation-Version" value="${project.version}"/>
<attribute name="Import-Package" value="javax.annotation.meta;resolution:=optional,javax.annotation;resolution:=optional,javax.net.ssl,javax.xml.namespace,javax.xml.parsers,javax.xml.transform,javax.xml.transform.dom,javax.xml.transform.stream,javax.xml.xpath,org.jsoup,org.jsoup.helper,org.jsoup.internal,org.jsoup.nodes,org.jsoup.parser,org.jsoup.safety,org.jsoup.select,org.w3c.dom"/>
<attribute name="Require-Capability" value="osgi.ee;filter:=&quot;(&amp;(osgi.ee=JavaSE)(version=${compiler.target}))&quot;"/>
</manifest>
</jar>
</target>
<!-- ====================================================================== -->
<!-- A dummy target for the package named after the type it creates -->
<!-- ====================================================================== -->
<target name="jar" depends="package" description="Builds the jar for the application"/>
</project>

206
jsoup.changes Normal file
View File

@ -0,0 +1,206 @@
-------------------------------------------------------------------
Thu Oct 20 12:57:16 UTC 2022 - Fridrich Strba <fstrba@suse.com>
- Fix typo in the ant *-build.xml file that caused errors while
building eclipse
-------------------------------------------------------------------
Mon Oct 17 05:42:39 UTC 2022 - Fridrich Strba <fstrba@suse.com>
- Upgrade to upstream version 1.15.3
- Changes of 1.15.3
* Security
+ Fixed bsc#1203459 (CVE-2022-36033), an issue where the jsoup
cleaner may incorrectly sanitize crafted XSS attempts if
SafeList.preserveRelativeLinks is enabled. See the security
advisory for more details.
* Improvements
+ The Cleaner will preserve the source position of cleaned
elements, if source tracking is enabled in the original parse.
+ The error messages output from Validate are more descriptive.
Exceptions are now ValidationExceptions
(extending IllegalArgumentException). Stack traces do not
include the Validate class, to make it simpler to see where
the exception originated. Common validation errors including
malformed URLs and empty selector results have more explicit
error messages.
+ Build Improvement: added implementation version and related
fields to the jar manifest.
* Bug Fixes
+ The DataUtil would incorrectly read from InputStreams that
emitted reads less than the requested size. This lead to
incorrect results when parsing from chunked server responses,
for example.
- Changes of 1.15.2
* Improvements
+ Added the ability to track the position (line, column, index)
in the original input source from where a given node was
parsed. Accessible via Node.sourceRange() and
Element.endSourceRange().
+ Added Element.firstElementChild(), Element.lastElementChild(),
Node.firstChild(), Node.lastChild(), as convenient accessors
to those child nodes and elements.
+ Added Element.expectFirst(), which is just like
Element.selectFirst(), but instead of returning a null if
there is no match, will throw an IllegalArgumentException.
This is useful if you want to simply abort processing if an
expected match is not found, such as in test cases.
+ When pretty-printing HTML, doctypes are emitted on a newline
if there is a preceding comment.
+ When pretty-printing, trim the leading and trailing spaces of
textnodes in block tags when possible, so that they are
indented correctly.
+ In Element.selectXpath(), disable namespace awareness. This
makes it possible to always select elements by their simple
local name, regardless of whether an xmlns attribute was set.
* Bug Fixes
+ When using the DataUtil.readToByteBuffer() method, such as in
Connection.Response.body(), if the document has not already
been parsed and must be read fully, and there is any maximum
buffer size being applied, only the default internal buffer
size was read.
+ When serializing HTML, newlines in elements descending from a
pre tag were incorrectly skipped. That caused what should have
been preformatted output to instead be a run of text.
+ When pretty-print serializing HTML, newlines separating
phrasing content (e.g. a <span> tag within a <p> tag would be
incorrectly skipped, instead of normalized to a space.
Additionally, improved space normalization between other end
of line occurences, and whitespace handling after a closing
</body>
- Changes of 1.15.1
* Changes
+ Removed previously deprecated methods and classes (including
org.jsoup.safety.Whitelist; use org.jsoup.safety.Safelist
instead).
* Improvements
+ When converting jsoup Documents to W3C Documents in W3CDom,
preserve HTML valid attribute names if the input document is
using the HTML syntax. (Previously, would always coerce using
the more restrictive XML syntax.)
+ Added the :containsWholeText(text) selector, to match against
non-normalized Element text. That can be useful when elements
can only be distinguished by e.g. specific case, or leading
whitespace, etc.
+ Added Element#wholeOwnText() to retrieve the original
(non-normalized) ownText of an Element. Also added the
:containsWholeOwnText(text) selector, to match against that.
BR elements are now treated as newlines in the wholeText
methods.
+ Added the :matchesWholeText(regex) and
:matchesWholeOwnText(regex) selectors, to match against whole
(non-normalized, case sensitive) element text and own text,
respectively.
+ When evaluating an XPath query against a context element, the
complete document is now visible to the query, vs only the
context element's sub-tree. This enables support for queries
outside (parent or sibling) the element, e.g.
ancestor-or-self::*.
+ Allow a maxPaddingWidth on the indent level in OutputSettings
when pretty printing. This defaults to 30 to limit the indent
level for very deeply nested elements, and may be disabled by
setting to -1.
+ When cloning a Node or an Element, the clone gets a cloned
OwnerDocument containing only that clone, so as to preserve
applicable settings, such as the Pretty Print settings.
+ Added a convenience method Jsoup.parse(File).
+ In the NodeTraversor, added default implementations for
NodeVisitor.tail() and NodeFilter.tail(), so that code using
only head() methods can be written as lambdas.
+ In NodeTraversor, added support for removing nodes via
Node.remove() during NodeVisitor.head().
+ Added Node.forEachNode(Consumer<Node>) and
Element.forEach(Consumer<Element) methods, to efficiently
traverse the DOM with a functional interface.
* Bug Fixes
+ Boolean attribute names should be case-insensitive, but were
not when the parser was configured to preserve case.
+ When reading from SequenceInputStreams across the buffer, the
input stream was closed too early, resulting in missed
content.
+ A comment with all dashes (<!----->) should not emit a parse
error.
+ When throwing a SelectorParseException for an invalid
selector, don't try to String.format the input, as that could
throw an IllegalFormatException.
+ When serializing HTML with Pretty Print enabled, extraneous
whitespace may be added on closing tags, or extra newlines may
be added at the end of script blocks.
+ When copy-creating a Safelist from another, perform a
deep-copy of the original's settings, so that changes to the
original after creation do not affect the copy.
+ Speed improvement when parsing constructed HTML containing
very deeply incorrectly stacked formatting elements with many
attributes.
+ During parsing, a StackOverflowException was possible given
crafted HTML with hundreds of nested table elements followed
by invalid formatting elements.
- Changes of 1.14.3
* Improvements
+ Added native XPath support with Element.selectXpath(String)
+ Added full support for the <template> tag, up to the HTML5
parser spec.
+ Added support in CharacterReader to track newlines, so that
parse errors can be reported more intuitively.
+ Tracked parse errors now have more details, including the
erroneous token, to help clarify the errors.
+ Speed and memory optimizations for the :has(subquery)
selector.
+ The :contains(text) and :containsOwn(text) selectors are now
whitespace normalized, aligning to the document text that they
are matching against.
+ In Element, speed optimized adopting all of an element's child
nodes into a currently empty element. Improves the HTML
adoption agency algorithm when adopting elements with many
children.
+ Increased the parse speed when in RCData (e.g. <title>) and
unescaped <tag> tokens are found, by memoizing the </title>
scan and reducing GC.
+ When parsing custom tags (in HTML or XML), added a flyweight
cache on Tag.valueOf(String) to reduce memory overhead when
many tags are repeated. Also tuned other areas of the parser
when many very deeply stacked custom elements were present.
* Bug Fixes
+ The OSGi bundle meta-data incorrectly set a version on the
import of javax.annotation (used as a build-time dependency
for nullability assertions).
+ When tracking errors or checking for validity in the Cleaner,
errors were incorrectly raised for missing optional closing tags.
+ The Attributes.equals() method was sensitive to the order of
its contents, but it should not be.
+ When the HTML parser was configured to preserve case, Element
text methods would miss adding whitespace for BR tags.
+ Attribute names are now normalized & validated correctly for
the specific output syntax (HTML or XML). Previously,
syntactically invalid attribute names could be output by the
html() methods. Such attributes are still available in the
DOM, and will be normalized if possible on output.
+ Fixed an IOOB when an empty select tag was followed by a body
tag that needed reparenting.
* Build Improvements
+ Fixed nullability annotations for Node.equals(Object) and
other equals methods.
+ Added JDK 17 to the CI builds.
-------------------------------------------------------------------
Fri Aug 27 06:57:23 UTC 2021 - Fridrich Strba <fstrba@suse.com>
- Upgrade to upstream version 1.14.2
* fixes bsc#1189749, CVE-2021-37714
- Generate tarball using source service instead of a script
-------------------------------------------------------------------
Fri Feb 22 22:39:00 UTC 2019 - Fridrich Strba <fstrba@suse.com>
- Remove from the tarball the non-free test data
-------------------------------------------------------------------
Sat Feb 2 18:52:01 UTC 2019 - Jan Engelhardt <jengelh@inai.de>
- Ensure neutrality of descriptions.
-------------------------------------------------------------------
Fri Feb 1 08:53:28 UTC 2019 - Fridrich Strba <fstrba@suse.com>
- Initial packaging of jsoup version 1.11.3
- Added jsoup-build.xml file to build with ant

93
jsoup.spec Normal file
View File

@ -0,0 +1,93 @@
#
# spec file for package jsoup
#
# Copyright (c) 2022 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
# Please submit bugfixes or comments via https://bugs.opensuse.org/
#
Name: jsoup
Version: 1.15.3
Release: 0
Summary: Java library for working with HTML
License: MIT
Group: Development/Libraries/Java
URL: https://jsoup.org/
# ./generate-tarball.sh
Source0: %{name}-%{version}.tar.xz
Source1: %{name}-build.xml
BuildRequires: ant
BuildRequires: fdupes
BuildRequires: java-devel >= 1.8
BuildRequires: javapackages-local
BuildRequires: jsr-305
BuildArch: noarch
%description
jsoup is a Java library for working with HTML.
It provides an API for extracting and manipulating data,
using DOM, CSS, and jquery-like methods.
jsoup implements the WHATWG HTML5 specification.
- scrapes and parses HTML from a URL, file, or string
- finds and extracts data, using DOM traversal or CSS selectors
- manipulates the HTML elements, attributes, and text
- cleans user-submitted content against a safe white-list,
to prevent XSS attacks
- outputs tidied HTML
jsoup can deal with invalid HTML tag soup.
%package javadoc
Summary: Javadoc for %{name}
Group: Documentation/HTML
%description javadoc
API documentation for %{name}.
%prep
%setup -q
cp %{SOURCE1} .
%pom_remove_plugin :animal-sniffer-maven-plugin
%pom_remove_plugin :japicmp-maven-plugin
%pom_remove_plugin :maven-failsafe-plugin
%build
mkdir -p lib
build-jar-repository -s lib jsr-305
%{ant} -f %{name}-build.xml jar javadoc
%install
# jar
install -dm 0755 %{buildroot}%{_javadir}/%{name}
install -pm 0644 target/%{name}-%{version}.jar %{buildroot}%{_javadir}/%{name}/%{name}.jar
# pom
install -dm 0755 %{buildroot}%{_mavenpomdir}/%{name}
install -pm 0644 pom.xml %{buildroot}%{_mavenpomdir}/%{name}/%{name}.pom
%add_maven_depmap %{name}/%{name}.pom %{name}/%{name}.jar
# javadoc
install -dm 0755 %{buildroot}%{_javadocdir}
install -pdm 0755 target/site/apidocs %{buildroot}%{_javadocdir}/%{name}
%fdupes -s %{buildroot}%{_javadocdir}
%files -f .mfiles
%doc README.md CHANGES
%license LICENSE
%files javadoc
%license LICENSE
%{_javadocdir}/%{name}
%changelog