python-dask/python-dask.spec

403 lines
14 KiB
RPMSpec

#
# spec file
#
# Copyright (c) 2023 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
# Please submit bugfixes or comments via https://bugs.opensuse.org/
#
%define psuffix %{nil}
%global flavor @BUILD_FLAVOR@%{nil}
%if "%{flavor}" == "test-py39"
%define psuffix -test-py39
%define skip_python310 1
%define skip_python311 1
%bcond_without test
%endif
%if "%{flavor}" == "test-py310"
%define psuffix -test-py310
%define skip_python39 1
%define skip_python311 1
%bcond_without test
%endif
%if "%{flavor}" == "test-py311"
%define psuffix -test-py311
%define skip_python39 1
%define skip_python310 1
%bcond_without test
%endif
%if "%{flavor}" == ""
%bcond_with test
%endif
%{?sle15_python_module_pythons}
Name: python-dask%{psuffix}
# ===> Note: python-dask MUST be updated in sync with python-distributed! <===
Version: 2023.12.0
Release: 0
Summary: Minimal task scheduling abstraction
License: BSD-3-Clause
URL: https://dask.org
# SourceRepository: https://github.com/dask/dask
Source0: https://files.pythonhosted.org/packages/source/d/dask/dask-%{version}.tar.gz
BuildRequires: %{python_module base >= 3.9}
BuildRequires: %{python_module packaging >= 20.0}
BuildRequires: %{python_module pip}
BuildRequires: %{python_module setuptools}
BuildRequires: %{python_module versioneer-toml >= 0.29}
BuildRequires: %{python_module wheel}
BuildRequires: fdupes
BuildRequires: python-rpm-macros
Requires: python-PyYAML >= 5.3.1
Requires: python-click >= 8.1
Requires: python-cloudpickle >= 1.5
Requires: python-fsspec >= 2021.9
Requires: python-importlib-metadata >= 4.13.0
Requires: python-packaging >= 20.0
Requires: python-partd >= 1.2.0
Requires: python-toolz >= 0.10.0
Requires(post): update-alternatives
Requires(postun):update-alternatives
Recommends: %{name}-array = %{version}
Recommends: %{name}-dataframe = %{version}
Recommends: %{name}-distributed = %{version}
Suggests: %{name}-complete = %{version}
Suggests: %{name}-diagnostics = %{version}
# SECTION https://docs.dask.org/en/stable/install.html#optional-dependencies
Suggests: python-SQLAlchemy >= 1.4.16
Suggests: python-cityhash >= 0.2.4
Suggests: python-fastparquet >= 0.8.2
Suggests: python-gcsfs >= 2021.9.0
Suggests: python-crick >= 0.0.3
Suggests: python-cytoolz >= 0.10.1
Suggests: python-dask-ml >= 1.4.0
Suggests: python-fastavro >= 0.22.6
Suggests: python-graphviz >= 0.8.4
Suggests: python-h5py >= 2.10.0
Suggests: python-psutil >= 0.5.7
Suggests: python-pyarrow >= 14.0.1
Suggests: python-matplotlib
Suggests: python-mimesis >= 5.3.0
Suggests: python-mmh3 >= 2.5.1
Suggests: python-sparse >= 0.12.0
Suggests: python-s3fs >= 0.4.0
Suggests: python-xxhash >= 1.4.1
Suggests: python-zarr >= 2.12.0
# /SECTION
Provides: %{name}-bag = %{version}-%{release}
Obsoletes: %{name}-bag < %{version}-%{release}
Provides: %{name}-delayed = %{version}-%{release}
Obsoletes: %{name}-delayed < %{version}-%{release}
Provides: %{name}-dot = %{version}-%{release}
Obsoletes: %{name}-dot < %{version}-%{release}
Provides: %{name}-multiprocessing = %{version}-%{release}
Obsoletes: %{name}-multiprocessing < %{version}-%{release}
BuildArch: noarch
%if %{with test}
# test that we specified all requirements correctly in the core
# and subpackages by only requiring dask-test (= [complete] + pytest) and optional extras
BuildRequires: %{python_module dask-test = %{version}}
# SECTION additional optionally tested (importorskip) packages
BuildRequires: %{python_module SQLAlchemy >= 1.4.16}
BuildRequires: %{python_module cachey}
BuildRequires: %{python_module fastparquet >= 0.8.0}
# optional zarr increases fsspec miminum to 0.8.4 if present
BuildRequires: %{python_module fsspec >= 0.8.4}
BuildRequires: %{python_module h5py}
BuildRequires: %{python_module ipython}
BuildRequires: %{python_module jsonschema}
BuildRequires: %{python_module matplotlib}
BuildRequires: %{python_module mimesis}
BuildRequires: %{python_module multipledispatch}
BuildRequires: %{python_module numba}
# https://github.com/dask/partd/issues/66, https://github.com/dask/dask/pull/10176
BuildRequires: %{python_module partd >= 1.4.0}
# snappy required for using fastparquet
BuildRequires: %{python_module python-snappy}
BuildRequires: %{python_module requests}
BuildRequires: %{python_module scikit-image}
BuildRequires: %{python_module scipy}
BuildRequires: %{python_module sparse}
BuildRequires: %{python_module tables}
BuildRequires: %{python_module xarray}
BuildRequires: %{python_module zarr}
# /SECTION
%endif
%python_subpackages
%description
A flexible library for parallel computing in Python.
Dask is composed of two parts:
- Dynamic task scheduling optimized for computation. This is similar to
Airflow, Luigi, Celery, or Make, but optimized for interactive
computational workloads.
- “Big Data” collections like parallel arrays, dataframes, and lists that
extend common interfaces like NumPy, Pandas, or Python iterators to
larger-than-memory or distributed environments. These parallel collections
run on top of dynamic task schedulers.
%package complete
# This must have a Requires for dask and all the dask subpackages
Summary: All dask components
Requires: %{name} = %{version}
Requires: %{name}-array = %{version}
Requires: %{name}-dataframe = %{version}
Requires: %{name}-diagnostics = %{version}
Requires: %{name}-distributed = %{version}
Requires: python-lz4 >= 4.3.2
Requires: python-pyarrow >= 7
Provides: %{name}-all = %{version}-%{release}
Obsoletes: %{name}-all < %{version}-%{release}
%description complete
A flexible library for parallel computing in Python.
Dask is composed of two parts:
- Dynamic task scheduling optimized for computation. This is similar to
Airflow, Luigi, Celery, or Make, but optimized for interactive
computational workloads.
- “Big Data” collections like parallel arrays, dataframes, and lists that
extend common interfaces like NumPy, Pandas, or Python iterators to
larger-than-memory or distributed environments. These parallel collections
run on top of dynamic task schedulers.
This package pulls in all the optional dask components.
%package array
Summary: Numpy-like array data structure for dask
Requires: %{name} = %{version}
Requires: %{name}-delayed = %{version}
Requires: python-numpy >= 1.21
Recommends: python-scipy
%description array
A flexible library for parallel computing in Python.
Dask is composed of two parts:
- Dynamic task scheduling optimized for computation. This is similar to
Airflow, Luigi, Celery, or Make, but optimized for interactive
computational workloads.
- “Big Data” collections like parallel arrays, dataframes, and lists that
extend common interfaces like NumPy, Pandas, or Python iterators to
larger-than-memory or distributed environments. These parallel collections
run on top of dynamic task schedulers.
This package contains the dask array class.
Dask arrays implement a subset of the NumPy interface on large
arrays using blocked algorithms and task scheduling.
%package dataframe
Summary: Pandas-like DataFrame data structure for dask
Requires: %{name} = %{version}
Requires: %{name}-array = %{version}
Requires: %{name}-bag = %{version}
Requires: python-pandas >= 1.3
%description dataframe
A flexible library for parallel computing in Python.
Dask is composed of two parts:
- Dynamic task scheduling optimized for computation. This is similar to
Airflow, Luigi, Celery, or Make, but optimized for interactive
computational workloads.
- “Big Data” collections like parallel arrays, dataframes, and lists that
extend common interfaces like NumPy, Pandas, or Python iterators to
larger-than-memory or distributed environments. These parallel collections
run on top of dynamic task schedulers.
This package contains the dask DataFrame class.
A Dask DataFrame is a large parallel dataframe composed of many
smaller Pandas dataframes, split along the index. These pandas
dataframes may live on disk for larger-than-memory computing
on a single machine, or on many different machines in a cluster.
%package distributed
Summary: Interface with the distributed task scheduler in dask
Requires: %{name} = %{version}
# dask and distributed are always updated together
Requires: python-distributed = %{version}
%description distributed
A flexible library for parallel computing in Python.
Dask is composed of two parts:
- Dynamic task scheduling optimized for computation. This is similar to
Airflow, Luigi, Celery, or Make, but optimized for interactive
computational workloads.
- “Big Data” collections like parallel arrays, dataframes, and lists that
extend common interfaces like NumPy, Pandas, or Python iterators to
larger-than-memory or distributed environments. These parallel collections
run on top of dynamic task schedulers.
This meta package pulls in the distributed module into the dask namespace.
%package diagnostics
Summary: Diagnostics for dask
Requires: %{name} = %{version}
Requires: python-Jinja2 >= 2.10.3
Requires: python-bokeh >= 3.1
%description diagnostics
A flexible library for parallel computing in Python.
Dask is composed of two parts:
- Dynamic task scheduling optimized for computation. This is similar to
Airflow, Luigi, Celery, or Make, but optimized for interactive
computational workloads.
- “Big Data” collections like parallel arrays, dataframes, and lists that
extend common interfaces like NumPy, Pandas, or Python iterators to
larger-than-memory or distributed environments. These parallel collections
run on top of dynamic task schedulers.
This package contains the dask.diagnostics module
%package test
Summary: The test submodules of the python-dask package
Requires: %{name}-complete = %{version}
Requires: python-pandas
# SECTION pandas[test]
Requires: python-hypothesis
Requires: python-pytest-asyncio
# /SECTION
Requires: python-pre-commit
Requires: python-pytest
Requires: python-pytest-rerunfailures
Requires: python-pytest-timeout
Requires: python-pytest-xdist
%description test
Dask is a flexible library for parallel computing in Python.
This subpackage provides the .test submodules in the sitelib required for
unit testing dask.
%prep
%autosetup -p1 -n dask-%{version}
sed -i '/addopts/d' pyproject.toml
%build
%pyproject_wheel
%install
%if !%{with test}
%pyproject_install
%python_clone -a %{buildroot}%{_bindir}/dask
%{python_expand # give SUSE specific install instructions
sed -E -i '/Please either conda or pip install/,/python -m pip install/ {
s/either conda or pip//;
/conda install/ d;
s/python -m pip install "dask\[(.*)\]".*pip install/zypper in $python-dask-\1/
}' \
%{buildroot}%{$python_sitelib}/dask/distributed.py
sed -E -i '/Please either conda or pip install/,/python -m pip install/ c \
"Please file a bug report https://bugzilla.opensuse.org and\\n"\
"report the missing requirements."' \
%{buildroot}%{$python_sitelib}/dask/array/__init__.py \
%{buildroot}%{$python_sitelib}/dask/bag/__init__.py \
%{buildroot}%{$python_sitelib}/dask/dataframe/__init__.py
}
%{python_compileall}
%python_expand %fdupes %{buildroot}%{$python_sitelib}
%endif
%if %{with test}
%check
# move away from importpath
mv dask dask.moved
# different seed or mimesis version
donttest="(test_datasets and test_deterministic)"
# upstreams test if their ci is up to date, irrelevant for obs
donttest+=" or test_development_guidelines_matches_ci"
if [[ $(getconf LONG_BIT) -eq 32 ]]; then
# https://github.com/dask/dask/issues/8620
donttest+=" or test_query_with_meta"
donttest+=" or test_repartition_npartitions"
#
donttest+=" or test_pandas_multiindex"
donttest+=" or test_categorize_info"
fi
# (rarely) flaky on obs
donttest+=" or test_local_scheduler"
donttest+=" or (test_threaded and test_interrupt)"
# perhaps? rh#1968947#c4
donttest+=" or test_select_from_select"
# tries to get an IP address
donttest+=" or test_map_partitions_df_input"
# needs s3fs support in arrow
donttest+=" or test_pyarrow_filesystem_option_real_data"
%pytest --pyargs dask -n auto -r fE -m "not network" -k "not ($donttest)" --reruns 3 --reruns-delay 3
%endif
%post
%python_install_alternative dask
%postun
%python_uninstall_alternative dask
%if !%{with test}
%files %{python_files}
%doc README.rst
%license LICENSE.txt
%python_alternative %{_bindir}/dask
%{python_sitelib}/dask/
%{python_sitelib}/dask-%{version}.dist-info
%exclude %{python_sitelib}/dask/array/
%exclude %{python_sitelib}/dask/dataframe/
%exclude %{python_sitelib}/dask/diagnostics
%exclude %{python_sitelib}/dask/tests
%exclude %{python_sitelib}/dask/bag/tests
%exclude %{python_sitelib}/dask/bytes/tests
%exclude %{python_sitelib}/dask/widgets/tests
%pycache_only %exclude %{python_sitelib}/dask/__pycache__/delayed*.pyc
%pycache_only %exclude %{python_sitelib}/dask/__pycache__/dot.*
%files %{python_files complete}
%license LICENSE.txt
%files %{python_files array}
%license LICENSE.txt
%{python_sitelib}/dask/array/
%exclude %{python_sitelib}/dask/array/tests
%files %{python_files dataframe}
%license LICENSE.txt
%{python_sitelib}/dask/dataframe/
%exclude %{python_sitelib}/dask/dataframe/tests
%exclude %{python_sitelib}/dask/dataframe/io/tests
%exclude %{python_sitelib}/dask/dataframe/tseries/tests
%files %{python_files distributed}
%license LICENSE.txt
%files %{python_files diagnostics}
%license LICENSE.txt
%{python_sitelib}/dask/diagnostics/
%exclude %{python_sitelib}/dask/diagnostics/tests
%files %{python_files test}
%license LICENSE.txt
%{python_sitelib}/dask/tests
%{python_sitelib}/dask/bytes/tests
%{python_sitelib}/dask/widgets/tests
%{python_sitelib}/dask/array/tests
%{python_sitelib}/dask/bag/tests
%{python_sitelib}/dask/dataframe/tests
%{python_sitelib}/dask/dataframe/io/tests
%{python_sitelib}/dask/dataframe/tseries/tests
%{python_sitelib}/dask/diagnostics/tests
%endif
%changelog