403 lines
14 KiB
RPMSpec
403 lines
14 KiB
RPMSpec
#
|
|
# spec file
|
|
#
|
|
# Copyright (c) 2023 SUSE LLC
|
|
#
|
|
# All modifications and additions to the file contributed by third parties
|
|
# remain the property of their copyright owners, unless otherwise agreed
|
|
# upon. The license for this file, and modifications and additions to the
|
|
# file, is the same license as for the pristine package itself (unless the
|
|
# license for the pristine package is not an Open Source License, in which
|
|
# case the license is the MIT License). An "Open Source License" is a
|
|
# license that conforms to the Open Source Definition (Version 1.9)
|
|
# published by the Open Source Initiative.
|
|
|
|
# Please submit bugfixes or comments via https://bugs.opensuse.org/
|
|
#
|
|
|
|
|
|
%define psuffix %{nil}
|
|
%global flavor @BUILD_FLAVOR@%{nil}
|
|
%if "%{flavor}" == "test-py39"
|
|
%define psuffix -test-py39
|
|
%define skip_python310 1
|
|
%define skip_python311 1
|
|
%bcond_without test
|
|
%endif
|
|
%if "%{flavor}" == "test-py310"
|
|
%define psuffix -test-py310
|
|
%define skip_python39 1
|
|
%define skip_python311 1
|
|
%bcond_without test
|
|
%endif
|
|
%if "%{flavor}" == "test-py311"
|
|
%define psuffix -test-py311
|
|
%define skip_python39 1
|
|
%define skip_python310 1
|
|
%bcond_without test
|
|
%endif
|
|
%if "%{flavor}" == ""
|
|
%bcond_with test
|
|
%endif
|
|
|
|
%{?sle15_python_module_pythons}
|
|
Name: python-dask%{psuffix}
|
|
# ===> Note: python-dask MUST be updated in sync with python-distributed! <===
|
|
Version: 2023.12.0
|
|
Release: 0
|
|
Summary: Minimal task scheduling abstraction
|
|
License: BSD-3-Clause
|
|
URL: https://dask.org
|
|
# SourceRepository: https://github.com/dask/dask
|
|
Source0: https://files.pythonhosted.org/packages/source/d/dask/dask-%{version}.tar.gz
|
|
BuildRequires: %{python_module base >= 3.9}
|
|
BuildRequires: %{python_module packaging >= 20.0}
|
|
BuildRequires: %{python_module pip}
|
|
BuildRequires: %{python_module setuptools}
|
|
BuildRequires: %{python_module versioneer-toml >= 0.29}
|
|
BuildRequires: %{python_module wheel}
|
|
BuildRequires: fdupes
|
|
BuildRequires: python-rpm-macros
|
|
Requires: python-PyYAML >= 5.3.1
|
|
Requires: python-click >= 8.1
|
|
Requires: python-cloudpickle >= 1.5
|
|
Requires: python-fsspec >= 2021.9
|
|
Requires: python-importlib-metadata >= 4.13.0
|
|
Requires: python-packaging >= 20.0
|
|
Requires: python-partd >= 1.2.0
|
|
Requires: python-toolz >= 0.10.0
|
|
Requires(post): update-alternatives
|
|
Requires(postun):update-alternatives
|
|
Recommends: %{name}-array = %{version}
|
|
Recommends: %{name}-dataframe = %{version}
|
|
Recommends: %{name}-distributed = %{version}
|
|
Suggests: %{name}-complete = %{version}
|
|
Suggests: %{name}-diagnostics = %{version}
|
|
# SECTION https://docs.dask.org/en/stable/install.html#optional-dependencies
|
|
Suggests: python-SQLAlchemy >= 1.4.16
|
|
Suggests: python-cityhash >= 0.2.4
|
|
Suggests: python-fastparquet >= 0.8.2
|
|
Suggests: python-gcsfs >= 2021.9.0
|
|
Suggests: python-crick >= 0.0.3
|
|
Suggests: python-cytoolz >= 0.10.1
|
|
Suggests: python-dask-ml >= 1.4.0
|
|
Suggests: python-fastavro >= 0.22.6
|
|
Suggests: python-graphviz >= 0.8.4
|
|
Suggests: python-h5py >= 2.10.0
|
|
Suggests: python-psutil >= 0.5.7
|
|
Suggests: python-pyarrow >= 14.0.1
|
|
Suggests: python-matplotlib
|
|
Suggests: python-mimesis >= 5.3.0
|
|
Suggests: python-mmh3 >= 2.5.1
|
|
Suggests: python-sparse >= 0.12.0
|
|
Suggests: python-s3fs >= 0.4.0
|
|
Suggests: python-xxhash >= 1.4.1
|
|
Suggests: python-zarr >= 2.12.0
|
|
# /SECTION
|
|
Provides: %{name}-bag = %{version}-%{release}
|
|
Obsoletes: %{name}-bag < %{version}-%{release}
|
|
Provides: %{name}-delayed = %{version}-%{release}
|
|
Obsoletes: %{name}-delayed < %{version}-%{release}
|
|
Provides: %{name}-dot = %{version}-%{release}
|
|
Obsoletes: %{name}-dot < %{version}-%{release}
|
|
Provides: %{name}-multiprocessing = %{version}-%{release}
|
|
Obsoletes: %{name}-multiprocessing < %{version}-%{release}
|
|
BuildArch: noarch
|
|
%if %{with test}
|
|
# test that we specified all requirements correctly in the core
|
|
# and subpackages by only requiring dask-test (= [complete] + pytest) and optional extras
|
|
BuildRequires: %{python_module dask-test = %{version}}
|
|
# SECTION additional optionally tested (importorskip) packages
|
|
BuildRequires: %{python_module SQLAlchemy >= 1.4.16}
|
|
BuildRequires: %{python_module cachey}
|
|
BuildRequires: %{python_module fastparquet >= 0.8.0}
|
|
# optional zarr increases fsspec miminum to 0.8.4 if present
|
|
BuildRequires: %{python_module fsspec >= 0.8.4}
|
|
BuildRequires: %{python_module h5py}
|
|
BuildRequires: %{python_module ipython}
|
|
BuildRequires: %{python_module jsonschema}
|
|
BuildRequires: %{python_module matplotlib}
|
|
BuildRequires: %{python_module mimesis}
|
|
BuildRequires: %{python_module multipledispatch}
|
|
BuildRequires: %{python_module numba}
|
|
# https://github.com/dask/partd/issues/66, https://github.com/dask/dask/pull/10176
|
|
BuildRequires: %{python_module partd >= 1.4.0}
|
|
# snappy required for using fastparquet
|
|
BuildRequires: %{python_module python-snappy}
|
|
BuildRequires: %{python_module requests}
|
|
BuildRequires: %{python_module scikit-image}
|
|
BuildRequires: %{python_module scipy}
|
|
BuildRequires: %{python_module sparse}
|
|
BuildRequires: %{python_module tables}
|
|
BuildRequires: %{python_module xarray}
|
|
BuildRequires: %{python_module zarr}
|
|
# /SECTION
|
|
%endif
|
|
%python_subpackages
|
|
|
|
%description
|
|
A flexible library for parallel computing in Python.
|
|
|
|
Dask is composed of two parts:
|
|
- Dynamic task scheduling optimized for computation. This is similar to
|
|
Airflow, Luigi, Celery, or Make, but optimized for interactive
|
|
computational workloads.
|
|
- “Big Data” collections like parallel arrays, dataframes, and lists that
|
|
extend common interfaces like NumPy, Pandas, or Python iterators to
|
|
larger-than-memory or distributed environments. These parallel collections
|
|
run on top of dynamic task schedulers.
|
|
|
|
%package complete
|
|
# This must have a Requires for dask and all the dask subpackages
|
|
Summary: All dask components
|
|
Requires: %{name} = %{version}
|
|
Requires: %{name}-array = %{version}
|
|
Requires: %{name}-dataframe = %{version}
|
|
Requires: %{name}-diagnostics = %{version}
|
|
Requires: %{name}-distributed = %{version}
|
|
Requires: python-lz4 >= 4.3.2
|
|
Requires: python-pyarrow >= 7
|
|
Provides: %{name}-all = %{version}-%{release}
|
|
Obsoletes: %{name}-all < %{version}-%{release}
|
|
|
|
%description complete
|
|
A flexible library for parallel computing in Python.
|
|
|
|
Dask is composed of two parts:
|
|
- Dynamic task scheduling optimized for computation. This is similar to
|
|
Airflow, Luigi, Celery, or Make, but optimized for interactive
|
|
computational workloads.
|
|
- “Big Data” collections like parallel arrays, dataframes, and lists that
|
|
extend common interfaces like NumPy, Pandas, or Python iterators to
|
|
larger-than-memory or distributed environments. These parallel collections
|
|
run on top of dynamic task schedulers.
|
|
|
|
This package pulls in all the optional dask components.
|
|
|
|
%package array
|
|
Summary: Numpy-like array data structure for dask
|
|
Requires: %{name} = %{version}
|
|
Requires: %{name}-delayed = %{version}
|
|
Requires: python-numpy >= 1.21
|
|
Recommends: python-scipy
|
|
|
|
%description array
|
|
A flexible library for parallel computing in Python.
|
|
|
|
Dask is composed of two parts:
|
|
- Dynamic task scheduling optimized for computation. This is similar to
|
|
Airflow, Luigi, Celery, or Make, but optimized for interactive
|
|
computational workloads.
|
|
- “Big Data” collections like parallel arrays, dataframes, and lists that
|
|
extend common interfaces like NumPy, Pandas, or Python iterators to
|
|
larger-than-memory or distributed environments. These parallel collections
|
|
run on top of dynamic task schedulers.
|
|
|
|
This package contains the dask array class.
|
|
|
|
Dask arrays implement a subset of the NumPy interface on large
|
|
arrays using blocked algorithms and task scheduling.
|
|
|
|
%package dataframe
|
|
Summary: Pandas-like DataFrame data structure for dask
|
|
Requires: %{name} = %{version}
|
|
Requires: %{name}-array = %{version}
|
|
Requires: %{name}-bag = %{version}
|
|
Requires: python-pandas >= 1.3
|
|
|
|
%description dataframe
|
|
A flexible library for parallel computing in Python.
|
|
|
|
Dask is composed of two parts:
|
|
- Dynamic task scheduling optimized for computation. This is similar to
|
|
Airflow, Luigi, Celery, or Make, but optimized for interactive
|
|
computational workloads.
|
|
- “Big Data” collections like parallel arrays, dataframes, and lists that
|
|
extend common interfaces like NumPy, Pandas, or Python iterators to
|
|
larger-than-memory or distributed environments. These parallel collections
|
|
run on top of dynamic task schedulers.
|
|
|
|
This package contains the dask DataFrame class.
|
|
|
|
A Dask DataFrame is a large parallel dataframe composed of many
|
|
smaller Pandas dataframes, split along the index. These pandas
|
|
dataframes may live on disk for larger-than-memory computing
|
|
on a single machine, or on many different machines in a cluster.
|
|
|
|
%package distributed
|
|
Summary: Interface with the distributed task scheduler in dask
|
|
Requires: %{name} = %{version}
|
|
# dask and distributed are always updated together
|
|
Requires: python-distributed = %{version}
|
|
|
|
%description distributed
|
|
A flexible library for parallel computing in Python.
|
|
|
|
Dask is composed of two parts:
|
|
- Dynamic task scheduling optimized for computation. This is similar to
|
|
Airflow, Luigi, Celery, or Make, but optimized for interactive
|
|
computational workloads.
|
|
- “Big Data” collections like parallel arrays, dataframes, and lists that
|
|
extend common interfaces like NumPy, Pandas, or Python iterators to
|
|
larger-than-memory or distributed environments. These parallel collections
|
|
run on top of dynamic task schedulers.
|
|
|
|
This meta package pulls in the distributed module into the dask namespace.
|
|
|
|
%package diagnostics
|
|
Summary: Diagnostics for dask
|
|
Requires: %{name} = %{version}
|
|
Requires: python-Jinja2 >= 2.10.3
|
|
Requires: python-bokeh >= 3.1
|
|
|
|
%description diagnostics
|
|
A flexible library for parallel computing in Python.
|
|
|
|
Dask is composed of two parts:
|
|
- Dynamic task scheduling optimized for computation. This is similar to
|
|
Airflow, Luigi, Celery, or Make, but optimized for interactive
|
|
computational workloads.
|
|
- “Big Data” collections like parallel arrays, dataframes, and lists that
|
|
extend common interfaces like NumPy, Pandas, or Python iterators to
|
|
larger-than-memory or distributed environments. These parallel collections
|
|
run on top of dynamic task schedulers.
|
|
|
|
This package contains the dask.diagnostics module
|
|
|
|
%package test
|
|
Summary: The test submodules of the python-dask package
|
|
Requires: %{name}-complete = %{version}
|
|
Requires: python-pandas
|
|
# SECTION pandas[test]
|
|
Requires: python-hypothesis
|
|
Requires: python-pytest-asyncio
|
|
# /SECTION
|
|
Requires: python-pre-commit
|
|
Requires: python-pytest
|
|
Requires: python-pytest-rerunfailures
|
|
Requires: python-pytest-timeout
|
|
Requires: python-pytest-xdist
|
|
|
|
%description test
|
|
Dask is a flexible library for parallel computing in Python.
|
|
This subpackage provides the .test submodules in the sitelib required for
|
|
unit testing dask.
|
|
|
|
%prep
|
|
%autosetup -p1 -n dask-%{version}
|
|
sed -i '/addopts/d' pyproject.toml
|
|
|
|
%build
|
|
%pyproject_wheel
|
|
|
|
%install
|
|
%if !%{with test}
|
|
%pyproject_install
|
|
%python_clone -a %{buildroot}%{_bindir}/dask
|
|
%{python_expand # give SUSE specific install instructions
|
|
sed -E -i '/Please either conda or pip install/,/python -m pip install/ {
|
|
s/either conda or pip//;
|
|
/conda install/ d;
|
|
s/python -m pip install "dask\[(.*)\]".*pip install/zypper in $python-dask-\1/
|
|
}' \
|
|
%{buildroot}%{$python_sitelib}/dask/distributed.py
|
|
sed -E -i '/Please either conda or pip install/,/python -m pip install/ c \
|
|
"Please file a bug report https://bugzilla.opensuse.org and\\n"\
|
|
"report the missing requirements."' \
|
|
%{buildroot}%{$python_sitelib}/dask/array/__init__.py \
|
|
%{buildroot}%{$python_sitelib}/dask/bag/__init__.py \
|
|
%{buildroot}%{$python_sitelib}/dask/dataframe/__init__.py
|
|
}
|
|
%{python_compileall}
|
|
%python_expand %fdupes %{buildroot}%{$python_sitelib}
|
|
%endif
|
|
|
|
%if %{with test}
|
|
%check
|
|
# move away from importpath
|
|
mv dask dask.moved
|
|
# different seed or mimesis version
|
|
donttest="(test_datasets and test_deterministic)"
|
|
# upstreams test if their ci is up to date, irrelevant for obs
|
|
donttest+=" or test_development_guidelines_matches_ci"
|
|
if [[ $(getconf LONG_BIT) -eq 32 ]]; then
|
|
# https://github.com/dask/dask/issues/8620
|
|
donttest+=" or test_query_with_meta"
|
|
donttest+=" or test_repartition_npartitions"
|
|
#
|
|
donttest+=" or test_pandas_multiindex"
|
|
donttest+=" or test_categorize_info"
|
|
fi
|
|
# (rarely) flaky on obs
|
|
donttest+=" or test_local_scheduler"
|
|
donttest+=" or (test_threaded and test_interrupt)"
|
|
# perhaps? rh#1968947#c4
|
|
donttest+=" or test_select_from_select"
|
|
# tries to get an IP address
|
|
donttest+=" or test_map_partitions_df_input"
|
|
# needs s3fs support in arrow
|
|
donttest+=" or test_pyarrow_filesystem_option_real_data"
|
|
%pytest --pyargs dask -n auto -r fE -m "not network" -k "not ($donttest)" --reruns 3 --reruns-delay 3
|
|
%endif
|
|
|
|
%post
|
|
%python_install_alternative dask
|
|
|
|
%postun
|
|
%python_uninstall_alternative dask
|
|
|
|
%if !%{with test}
|
|
%files %{python_files}
|
|
%doc README.rst
|
|
%license LICENSE.txt
|
|
%python_alternative %{_bindir}/dask
|
|
%{python_sitelib}/dask/
|
|
%{python_sitelib}/dask-%{version}.dist-info
|
|
%exclude %{python_sitelib}/dask/array/
|
|
%exclude %{python_sitelib}/dask/dataframe/
|
|
%exclude %{python_sitelib}/dask/diagnostics
|
|
%exclude %{python_sitelib}/dask/tests
|
|
%exclude %{python_sitelib}/dask/bag/tests
|
|
%exclude %{python_sitelib}/dask/bytes/tests
|
|
%exclude %{python_sitelib}/dask/widgets/tests
|
|
%pycache_only %exclude %{python_sitelib}/dask/__pycache__/delayed*.pyc
|
|
%pycache_only %exclude %{python_sitelib}/dask/__pycache__/dot.*
|
|
|
|
%files %{python_files complete}
|
|
%license LICENSE.txt
|
|
|
|
%files %{python_files array}
|
|
%license LICENSE.txt
|
|
%{python_sitelib}/dask/array/
|
|
%exclude %{python_sitelib}/dask/array/tests
|
|
|
|
%files %{python_files dataframe}
|
|
%license LICENSE.txt
|
|
%{python_sitelib}/dask/dataframe/
|
|
%exclude %{python_sitelib}/dask/dataframe/tests
|
|
%exclude %{python_sitelib}/dask/dataframe/io/tests
|
|
%exclude %{python_sitelib}/dask/dataframe/tseries/tests
|
|
|
|
%files %{python_files distributed}
|
|
%license LICENSE.txt
|
|
|
|
%files %{python_files diagnostics}
|
|
%license LICENSE.txt
|
|
%{python_sitelib}/dask/diagnostics/
|
|
%exclude %{python_sitelib}/dask/diagnostics/tests
|
|
|
|
%files %{python_files test}
|
|
%license LICENSE.txt
|
|
%{python_sitelib}/dask/tests
|
|
%{python_sitelib}/dask/bytes/tests
|
|
%{python_sitelib}/dask/widgets/tests
|
|
%{python_sitelib}/dask/array/tests
|
|
%{python_sitelib}/dask/bag/tests
|
|
%{python_sitelib}/dask/dataframe/tests
|
|
%{python_sitelib}/dask/dataframe/io/tests
|
|
%{python_sitelib}/dask/dataframe/tseries/tests
|
|
%{python_sitelib}/dask/diagnostics/tests
|
|
%endif
|
|
|
|
%changelog
|