# # spec file # # Copyright (c) 2023 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed # upon. The license for this file, and modifications and additions to the # file, is the same license as for the pristine package itself (unless the # license for the pristine package is not an Open Source License, in which # case the license is the MIT License). An "Open Source License" is a # license that conforms to the Open Source Definition (Version 1.9) # published by the Open Source Initiative. # Please submit bugfixes or comments via https://bugs.opensuse.org/ # %define psuffix %{nil} %global flavor @BUILD_FLAVOR@%{nil} %if "%{flavor}" == "test-py39" %define psuffix -test-py39 %define skip_python310 1 %define skip_python311 1 %bcond_without test %endif %if "%{flavor}" == "test-py310" %define psuffix -test-py310 %define skip_python39 1 %define skip_python311 1 %bcond_without test %endif %if "%{flavor}" == "test-py311" %define psuffix -test-py311 %define skip_python39 1 %define skip_python310 1 %bcond_without test %endif %if "%{flavor}" == "" %bcond_with test %endif %{?sle15_python_module_pythons} Name: python-dask%{psuffix} # ===> Note: python-dask MUST be updated in sync with python-distributed! <=== Version: 2023.12.0 Release: 0 Summary: Minimal task scheduling abstraction License: BSD-3-Clause URL: https://dask.org # SourceRepository: https://github.com/dask/dask Source0: https://files.pythonhosted.org/packages/source/d/dask/dask-%{version}.tar.gz BuildRequires: %{python_module base >= 3.9} BuildRequires: %{python_module packaging >= 20.0} BuildRequires: %{python_module pip} BuildRequires: %{python_module setuptools} BuildRequires: %{python_module versioneer-toml >= 0.29} BuildRequires: %{python_module wheel} BuildRequires: fdupes BuildRequires: python-rpm-macros Requires: python-PyYAML >= 5.3.1 Requires: python-click >= 8.1 Requires: python-cloudpickle >= 1.5 Requires: python-fsspec >= 2021.9 Requires: python-importlib-metadata >= 4.13.0 Requires: python-packaging >= 20.0 Requires: python-partd >= 1.2.0 Requires: python-toolz >= 0.10.0 Requires(post): update-alternatives Requires(postun):update-alternatives Recommends: %{name}-array = %{version} Recommends: %{name}-dataframe = %{version} Recommends: %{name}-distributed = %{version} Suggests: %{name}-complete = %{version} Suggests: %{name}-diagnostics = %{version} # SECTION https://docs.dask.org/en/stable/install.html#optional-dependencies Suggests: python-SQLAlchemy >= 1.4.16 Suggests: python-cityhash >= 0.2.4 Suggests: python-fastparquet >= 0.8.2 Suggests: python-gcsfs >= 2021.9.0 Suggests: python-crick >= 0.0.3 Suggests: python-cytoolz >= 0.10.1 Suggests: python-dask-ml >= 1.4.0 Suggests: python-fastavro >= 0.22.6 Suggests: python-graphviz >= 0.8.4 Suggests: python-h5py >= 2.10.0 Suggests: python-psutil >= 0.5.7 Suggests: python-pyarrow >= 14.0.1 Suggests: python-matplotlib Suggests: python-mimesis >= 5.3.0 Suggests: python-mmh3 >= 2.5.1 Suggests: python-sparse >= 0.12.0 Suggests: python-s3fs >= 0.4.0 Suggests: python-xxhash >= 1.4.1 Suggests: python-zarr >= 2.12.0 # /SECTION Provides: %{name}-bag = %{version}-%{release} Obsoletes: %{name}-bag < %{version}-%{release} Provides: %{name}-delayed = %{version}-%{release} Obsoletes: %{name}-delayed < %{version}-%{release} Provides: %{name}-dot = %{version}-%{release} Obsoletes: %{name}-dot < %{version}-%{release} Provides: %{name}-multiprocessing = %{version}-%{release} Obsoletes: %{name}-multiprocessing < %{version}-%{release} BuildArch: noarch %if %{with test} # test that we specified all requirements correctly in the core # and subpackages by only requiring dask-test (= [complete] + pytest) and optional extras BuildRequires: %{python_module dask-test = %{version}} # SECTION additional optionally tested (importorskip) packages BuildRequires: %{python_module SQLAlchemy >= 1.4.16} BuildRequires: %{python_module cachey} BuildRequires: %{python_module fastparquet >= 0.8.0} # optional zarr increases fsspec miminum to 0.8.4 if present BuildRequires: %{python_module fsspec >= 0.8.4} BuildRequires: %{python_module h5py} BuildRequires: %{python_module ipython} BuildRequires: %{python_module jsonschema} BuildRequires: %{python_module matplotlib} BuildRequires: %{python_module mimesis} BuildRequires: %{python_module multipledispatch} BuildRequires: %{python_module numba} # https://github.com/dask/partd/issues/66, https://github.com/dask/dask/pull/10176 BuildRequires: %{python_module partd >= 1.4.0} # snappy required for using fastparquet BuildRequires: %{python_module python-snappy} BuildRequires: %{python_module requests} BuildRequires: %{python_module scikit-image} BuildRequires: %{python_module scipy} BuildRequires: %{python_module sparse} BuildRequires: %{python_module tables} BuildRequires: %{python_module xarray} BuildRequires: %{python_module zarr} # /SECTION %endif %python_subpackages %description A flexible library for parallel computing in Python. Dask is composed of two parts: - Dynamic task scheduling optimized for computation. This is similar to Airflow, Luigi, Celery, or Make, but optimized for interactive computational workloads. - “Big Data” collections like parallel arrays, dataframes, and lists that extend common interfaces like NumPy, Pandas, or Python iterators to larger-than-memory or distributed environments. These parallel collections run on top of dynamic task schedulers. %package complete # This must have a Requires for dask and all the dask subpackages Summary: All dask components Requires: %{name} = %{version} Requires: %{name}-array = %{version} Requires: %{name}-dataframe = %{version} Requires: %{name}-diagnostics = %{version} Requires: %{name}-distributed = %{version} Requires: python-lz4 >= 4.3.2 Requires: python-pyarrow >= 7 Provides: %{name}-all = %{version}-%{release} Obsoletes: %{name}-all < %{version}-%{release} %description complete A flexible library for parallel computing in Python. Dask is composed of two parts: - Dynamic task scheduling optimized for computation. This is similar to Airflow, Luigi, Celery, or Make, but optimized for interactive computational workloads. - “Big Data” collections like parallel arrays, dataframes, and lists that extend common interfaces like NumPy, Pandas, or Python iterators to larger-than-memory or distributed environments. These parallel collections run on top of dynamic task schedulers. This package pulls in all the optional dask components. %package array Summary: Numpy-like array data structure for dask Requires: %{name} = %{version} Requires: %{name}-delayed = %{version} Requires: python-numpy >= 1.21 Recommends: python-scipy %description array A flexible library for parallel computing in Python. Dask is composed of two parts: - Dynamic task scheduling optimized for computation. This is similar to Airflow, Luigi, Celery, or Make, but optimized for interactive computational workloads. - “Big Data” collections like parallel arrays, dataframes, and lists that extend common interfaces like NumPy, Pandas, or Python iterators to larger-than-memory or distributed environments. These parallel collections run on top of dynamic task schedulers. This package contains the dask array class. Dask arrays implement a subset of the NumPy interface on large arrays using blocked algorithms and task scheduling. %package dataframe Summary: Pandas-like DataFrame data structure for dask Requires: %{name} = %{version} Requires: %{name}-array = %{version} Requires: %{name}-bag = %{version} Requires: python-pandas >= 1.3 %description dataframe A flexible library for parallel computing in Python. Dask is composed of two parts: - Dynamic task scheduling optimized for computation. This is similar to Airflow, Luigi, Celery, or Make, but optimized for interactive computational workloads. - “Big Data” collections like parallel arrays, dataframes, and lists that extend common interfaces like NumPy, Pandas, or Python iterators to larger-than-memory or distributed environments. These parallel collections run on top of dynamic task schedulers. This package contains the dask DataFrame class. A Dask DataFrame is a large parallel dataframe composed of many smaller Pandas dataframes, split along the index. These pandas dataframes may live on disk for larger-than-memory computing on a single machine, or on many different machines in a cluster. %package distributed Summary: Interface with the distributed task scheduler in dask Requires: %{name} = %{version} # dask and distributed are always updated together Requires: python-distributed = %{version} %description distributed A flexible library for parallel computing in Python. Dask is composed of two parts: - Dynamic task scheduling optimized for computation. This is similar to Airflow, Luigi, Celery, or Make, but optimized for interactive computational workloads. - “Big Data” collections like parallel arrays, dataframes, and lists that extend common interfaces like NumPy, Pandas, or Python iterators to larger-than-memory or distributed environments. These parallel collections run on top of dynamic task schedulers. This meta package pulls in the distributed module into the dask namespace. %package diagnostics Summary: Diagnostics for dask Requires: %{name} = %{version} Requires: python-Jinja2 >= 2.10.3 Requires: python-bokeh >= 3.1 %description diagnostics A flexible library for parallel computing in Python. Dask is composed of two parts: - Dynamic task scheduling optimized for computation. This is similar to Airflow, Luigi, Celery, or Make, but optimized for interactive computational workloads. - “Big Data” collections like parallel arrays, dataframes, and lists that extend common interfaces like NumPy, Pandas, or Python iterators to larger-than-memory or distributed environments. These parallel collections run on top of dynamic task schedulers. This package contains the dask.diagnostics module %package test Summary: The test submodules of the python-dask package Requires: %{name}-complete = %{version} Requires: python-pandas # SECTION pandas[test] Requires: python-hypothesis Requires: python-pytest-asyncio # /SECTION Requires: python-pre-commit Requires: python-pytest Requires: python-pytest-rerunfailures Requires: python-pytest-timeout Requires: python-pytest-xdist %description test Dask is a flexible library for parallel computing in Python. This subpackage provides the .test submodules in the sitelib required for unit testing dask. %prep %autosetup -p1 -n dask-%{version} sed -i '/addopts/d' pyproject.toml %build %pyproject_wheel %install %if !%{with test} %pyproject_install %python_clone -a %{buildroot}%{_bindir}/dask %{python_expand # give SUSE specific install instructions sed -E -i '/Please either conda or pip install/,/python -m pip install/ { s/either conda or pip//; /conda install/ d; s/python -m pip install "dask\[(.*)\]".*pip install/zypper in $python-dask-\1/ }' \ %{buildroot}%{$python_sitelib}/dask/distributed.py sed -E -i '/Please either conda or pip install/,/python -m pip install/ c \ "Please file a bug report https://bugzilla.opensuse.org and\\n"\ "report the missing requirements."' \ %{buildroot}%{$python_sitelib}/dask/array/__init__.py \ %{buildroot}%{$python_sitelib}/dask/bag/__init__.py \ %{buildroot}%{$python_sitelib}/dask/dataframe/__init__.py } %{python_compileall} %python_expand %fdupes %{buildroot}%{$python_sitelib} %endif %if %{with test} %check # move away from importpath mv dask dask.moved # different seed or mimesis version donttest="(test_datasets and test_deterministic)" # upstreams test if their ci is up to date, irrelevant for obs donttest+=" or test_development_guidelines_matches_ci" if [[ $(getconf LONG_BIT) -eq 32 ]]; then # https://github.com/dask/dask/issues/8620 donttest+=" or test_query_with_meta" donttest+=" or test_repartition_npartitions" # donttest+=" or test_pandas_multiindex" donttest+=" or test_categorize_info" fi # (rarely) flaky on obs donttest+=" or test_local_scheduler" donttest+=" or (test_threaded and test_interrupt)" # perhaps? rh#1968947#c4 donttest+=" or test_select_from_select" # tries to get an IP address donttest+=" or test_map_partitions_df_input" # needs s3fs support in arrow donttest+=" or test_pyarrow_filesystem_option_real_data" %pytest --pyargs dask -n auto -r fE -m "not network" -k "not ($donttest)" --reruns 3 --reruns-delay 3 %endif %post %python_install_alternative dask %postun %python_uninstall_alternative dask %if !%{with test} %files %{python_files} %doc README.rst %license LICENSE.txt %python_alternative %{_bindir}/dask %{python_sitelib}/dask/ %{python_sitelib}/dask-%{version}.dist-info %exclude %{python_sitelib}/dask/array/ %exclude %{python_sitelib}/dask/dataframe/ %exclude %{python_sitelib}/dask/diagnostics %exclude %{python_sitelib}/dask/tests %exclude %{python_sitelib}/dask/bag/tests %exclude %{python_sitelib}/dask/bytes/tests %exclude %{python_sitelib}/dask/widgets/tests %pycache_only %exclude %{python_sitelib}/dask/__pycache__/delayed*.pyc %pycache_only %exclude %{python_sitelib}/dask/__pycache__/dot.* %files %{python_files complete} %license LICENSE.txt %files %{python_files array} %license LICENSE.txt %{python_sitelib}/dask/array/ %exclude %{python_sitelib}/dask/array/tests %files %{python_files dataframe} %license LICENSE.txt %{python_sitelib}/dask/dataframe/ %exclude %{python_sitelib}/dask/dataframe/tests %exclude %{python_sitelib}/dask/dataframe/io/tests %exclude %{python_sitelib}/dask/dataframe/tseries/tests %files %{python_files distributed} %license LICENSE.txt %files %{python_files diagnostics} %license LICENSE.txt %{python_sitelib}/dask/diagnostics/ %exclude %{python_sitelib}/dask/diagnostics/tests %files %{python_files test} %license LICENSE.txt %{python_sitelib}/dask/tests %{python_sitelib}/dask/bytes/tests %{python_sitelib}/dask/widgets/tests %{python_sitelib}/dask/array/tests %{python_sitelib}/dask/bag/tests %{python_sitelib}/dask/dataframe/tests %{python_sitelib}/dask/dataframe/io/tests %{python_sitelib}/dask/dataframe/tseries/tests %{python_sitelib}/dask/diagnostics/tests %endif %changelog