commit a6c9d7e8e0b504a65d669970f15b61e8d9b3818d Author: Adrian Schröter Date: Fri Jan 5 09:44:41 2024 +0100 Sync from SUSE:ALP:Source:Standard:1.0 python-dask revision 6a670a18e2c6aa1a8e06637e81c4494f diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..fecc750 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/_multibuild b/_multibuild new file mode 100644 index 0000000..e3b0fc3 --- /dev/null +++ b/_multibuild @@ -0,0 +1,5 @@ + + test-py39 + test-py310 + test-py311 + diff --git a/dask-2023.12.0.tar.gz b/dask-2023.12.0.tar.gz new file mode 100644 index 0000000..51921fc --- /dev/null +++ b/dask-2023.12.0.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f687e647ced0d3f2cadbef113c730b6555b9499b62e3a220535438841001c91 +size 8567342 diff --git a/python-dask.changes b/python-dask.changes new file mode 100644 index 0000000..71d8f57 --- /dev/null +++ b/python-dask.changes @@ -0,0 +1,5038 @@ +------------------------------------------------------------------- +Sat Dec 2 14:09:52 UTC 2023 - Dirk Müller + +- update to 2023.12.0: + * Bokeh 3.3.0 compatibility + * Add ``network`` marker to + ``test_pyarrow_filesystem_option_real_data`` + * Bump GPU CI to CUDA 11.8 (:pr:`10656`) + * Tokenize ``pandas`` offsets deterministically + * Add tokenize ``pd.NA`` functionality + * Update gpuCI ``RAPIDS_VER`` to ``24.02`` (:pr:`10636`) + * Fix precision handling in ``array.linalg.norm`` (:pr:`10556`) + `joanrue`_ + * Add ``axis`` argument to ``DataFrame.clip`` and + ``Series.clip`` (:pr:`10616`) `Richard (Rick) Zamora`_ + * Update changelog entry for in-memory rechunking (:pr:`10630`) + `Florian Jetter`_ + * Fix flaky ``test_resources_reset_after_cancelled_task`` + * Bump GPU CI to CUDA 11.8 + * Bump ``conda-incubator/setup-miniconda`` + * Add debug logs to P2P scheduler plugin + * ``O(1)`` access for ``/info/task/`` endpoint + * Remove stringification from shuffle annotations + * Don't cast ``int`` metrics to ``float`` + * Drop asyncio TCP backend + * Add offload support to ``context_meter.add_callback`` + * Test that ``sync()`` propagates contextvars + * Fix ``test_statistical_profiling_cycle`` + * Replace ``Client.register_plugin`` s ``idempotent`` argument + with ``.idempotent`` attribute on plugins + * Fix test report generation + * Install ``pyarrow-hotfix`` on ``mindeps-pandas`` CI + * Reduce memory usage of scheduler process - optimize + ``scheduler.py::TaskState`` class + * Update cuDF test with explicit ``dtype=object`` + * Fix ``Cluster`` / ``SpecCluster`` calls to async close + methods + +------------------------------------------------------------------- +Thu Nov 16 21:26:58 UTC 2023 - Ondřej Súkup + +- Update to 2023.11.0 + * Zero-copy P2P Array Rechunking + * Deprecating PyArrow <14.0.1 + * Improved PyArrow filesystem for Parquet + * Improve Type Reconciliation in P2P Shuffling + * official support for Python 3.12 + * Reduced memory pressure for multi array reductions + * improved P2P shuffling robustness + * Reduced scheduler CPU load for large graphs + +------------------------------------------------------------------- +Sun Sep 10 13:29:26 UTC 2023 - Ben Greiner + +- Update to 2023.9.1 + ## Enhancements + * Stricter data type for dask keys (GH#10485) crusaderky + * Special handling for None in DASK_ environment variables + (GH#10487) crusaderky + ## Bug Fixes +- Release 2023.9.0 + ## Bug Fixes + * Remove support for np.int64 in keys (GH#10483) crusaderky + * Fix _partitions dtype in meta for shuffling (GH#10462) Hendrik + Makait + * Don’t use exception hooks to shorten tracebacks (GH#10456) + crusaderky +- Release 2023.8.1 + ## Enhancements + * Adding support for cgroup v2 to cpu_count (GH#10419) Johan + Olsson + * Support multi-column groupby with sort=True and split_out>1 + (GH#10425) Richard (Rick) Zamora + * Add DataFrame.enforce_runtime_divisions method (GH#10404) + Richard (Rick) Zamora + * Enable file mode="x" with a single_file=True for Dask DataFrame + to_csv (GH#10443) Genevieve Buckley + ## Bug Fixes + * Fix ValueError when running to_csv in append mode with + single_file as True (GH#10441) +- Release 2023.8.0 + ## Enhancements + * Fix for make_timeseries performance regression (GH#10428) Irina + Truong +- Release 2023.7.1 + * This release updates Dask DataFrame to automatically convert + text data using object data types to string[pyarrow] if + pandas>=2 and pyarrow>=12 are installed. This should result in + significantly reduced memory consumption and increased + computation performance in many workflows that deal with text + data. You can disable this change by setting the + dataframe.convert-string configuration value to False with + dask.config.set({"dataframe.convert-string": False}) + ## Enhancements + * Convert to pyarrow strings if proper dependencies are installed + (GH#10400) James Bourbeau + * Avoid repartition before shuffle for p2p (GH#10421) Patrick + Hoefler + * API to generate random Dask DataFrames (GH#10392) Irina Truong + * Speed up dask.bag.Bag.random_sample (GH#10356) crusaderky + * Raise helpful ValueError for invalid time units (GH#10408) Nat + Tabris + * Make repartition a no-op when divisions match (divisions + provided as a list) (GH#10395) Nicolas Grandemange + ## Bug Fixes + * Use dataframe.convert-string in read_parquet token (GH#10411) + James Bourbeau + * Category dtype is lost when concatenating MultiIndex (GH#10407) + Irina Truong + * Fix FutureWarning: The provided callable... (GH#10405) Irina + Truong + * Enable non-categorical hive-partition columns in read_parquet + (GH#10353) Richard (Rick) Zamora + * concat ignoring DataFrame withouth columns (GH#10359) Patrick + Hoefler +- Release 2023.7.0 + ## Enhancements + * Catch exceptions when attempting to load CLI entry points + (GH#10380) Jacob Tomlinson + ## Bug Fixes + * Fix typo in _clean_ipython_traceback (GH#10385) Alexander + Clausen + * Ensure that df is immutable after from_pandas (GH#10383) + Patrick Hoefler + * Warn consistently for inplace in Series.rename (GH#10313) + Patrick Hoefler +- Release 2023.6.1 + ## Enhancements + * Remove no longer supported clip_lower and clip_upper (GH#10371) + Patrick Hoefler + * Support DataFrame.set_index(..., sort=False) (GH#10342) Miles + * Cleanup remote tracebacks (GH#10354) Irina Truong + * Add dispatching mechanisms for pyarrow.Table conversion + (GH#10312) Richard (Rick) Zamora + * Choose P2P even if fusion is enabled (GH#10344) Hendrik Makait + * Validate that rechunking is possible earlier in graph + generation (GH#10336) Hendrik Makait + ## Bug Fixes + * Fix issue with header passed to read_csv (GH#10355) GALI PREM + SAGAR + * Respect dropna and observed in GroupBy.var and GroupBy.std + (GH#10350) Patrick Hoefler + * Fix H5FD_lock error when writing to hdf with distributed client + (GH#10309) Irina Truong + * Fix for total_mem_usage of bag.map() (GH#10341) Irina Truong + ## Deprecations + * Deprecate DataFrame.fillna/Series.fillna with method (GH#10349) + Irina Truong + * Deprecate DataFrame.first and Series.first (GH#10352) Irina + Truong +- Release 2023.6.0 + ## Enhancements + * Add missing not in predicate support to read_parquet (GH#10320) + Richard (Rick) Zamora + ## Bug Fixes + * Fix for incorrect value_counts (GH#10323) Irina Truong + * Update empty describe top and freq values (GH#10319) James + Bourbeau + +------------------------------------------------------------------- +Sat Jun 10 12:27:59 UTC 2023 - ecsos + +- Add %{?sle15_python_module_pythons} + +------------------------------------------------------------------- +Mon Jun 5 23:42:44 UTC 2023 - Steve Kowalik + +- Tighten bokeh requirement to match distributed. + +------------------------------------------------------------------- +Fri May 26 19:59:37 UTC 2023 - Ben Greiner + +- Update to 2023.5.1 + * This release drops support for Python 3.8. As of this release + Dask supports Python 3.9, 3.10, and 3.11. + ## Enhancements + * Drop Python 3.8 support (GH#10295) Thomas Grainger + * Change Dask Bag partitioning scheme to improve cluster + saturation (GH#10294) Jacob Tomlinson + * Generalize dd.to_datetime for GPU-backed collections, introduce + get_meta_library utility (GH#9881) Charles Blackmon-Luca + * Add na_action to DataFrame.map (GH#10305) Patrick Hoefler + * Raise TypeError in DataFrame.nsmallest and DataFrame.nlargest + when columns is not given (GH#10301) Patrick Hoefler + * Improve sizeof for pd.MultiIndex (GH#10230) Patrick Hoefler + * Support duplicated columns in a bunch of DataFrame methods + (GH#10261) Patrick Hoefler + * Add numeric_only support to DataFrame.idxmin and + DataFrame.idxmax (GH#10253) Patrick Hoefler + * Implement numeric_only support for DataFrame.quantile + (GH#10259) Patrick Hoefler + * Add support for numeric_only=False in DataFrame.std (GH#10251) + Patrick Hoefler + * Implement numeric_only=False for GroupBy.cumprod and + GroupBy.cumsum (GH#10262) Patrick Hoefler + * Implement numeric_only for skew and kurtosis (GH#10258) Patrick + Hoefler + * mask and where should accept a callable (GH#10289) Irina Truong + * Fix conversion from Categorical to pa.dictionary in + read_parquet (GH#10285) Patrick Hoefler + ## Bug Fixes + * Spurious config on nested annotations (GH#10318) crusaderky + * Fix rechunking behavior for dimensions with known and unknown + chunk sizes (GH#10157) Hendrik Makait + * Enable drop to support mismatched partitions (GH#10300) James + Bourbeau + * Fix divisions construction for to_timestamp (GH#10304) Patrick + Hoefler + * pandas ExtensionDtype raising in Series reduction operations + (GH#10149) Patrick Hoefler + * Fix regression in da.random interface (GH#10247) Eray Aslan + * da.coarsen doesn’t trim an empty chunk in meta (GH#10281) Irina + Truong + * Fix dtype inference for engine="pyarrow" in read_csv (GH#10280) + Patrick Hoefler +- Release 2023.5.0 + ## Enhancements + * Implement numeric_only=False for GroupBy.corr and GroupBy.cov + (GH#10264) Patrick Hoefler + * Add support for numeric_only=False in DataFrame.var (GH#10250) + Patrick Hoefler + * Add numeric_only support to DataFrame.mode (GH#10257) Patrick + Hoefler + * Add DataFrame.map to dask.DataFrame API (GH#10246) Patrick + Hoefler + * Adjust for DataFrame.applymap deprecation and all NA concat + behaviour change (GH#10245) Patrick Hoefler + * Enable numeric_only=False for DataFrame.count (GH#10234) + Patrick Hoefler + * Disallow array input in mask/where (GH#10163) Irina Truong + * Support numeric_only=True in GroupBy.corr and GroupBy.cov + (GH#10227) Patrick Hoefler + * Add numeric_only support to GroupBy.median (GH#10236) Patrick + Hoefler + * Support mimesis=9 in dask.datasets (GH#10241) James Bourbeau + * Add numeric_only support to min, max and prod (GH#10219) + Patrick Hoefler + * Add numeric_only=True support for GroupBy.cumsum and + GroupBy.cumprod (GH#10224) Patrick Hoefler + * Add helper to unpack numeric_only keyword (GH#10228) Patrick + Hoefler + ## Bug Fixes + * Fix clone + from_array failure (GH#10211) crusaderky + * Fix dataframe reductions for ea dtypes (GH#10150) Patrick + Hoefler + * Avoid scalar conversion deprecation warning in numpy=1.25 + (GH#10248) James Bourbeau + * Make sure transform output has the same index as input + (GH#10184) Irina Truong + * Fix corr and cov on a single-row partition (GH#9756) Irina + Truong + * Fix test_groupby_numeric_only_supported and + test_groupby_aggregate_categorical_observed upstream errors + (GH#10243) Irina Truong +- Release 2023.4.1 + ## Enhancements + * Implement numeric_only support for DataFrame.sum (GH#10194) + Patrick Hoefler + * Add support for numeric_only=True in GroupBy operations + (GH#10222) Patrick Hoefler + * Avoid deep copy in DataFrame.__setitem__ for pandas 1.4 and up + (GH#10221) Patrick Hoefler + * Avoid calling Series.apply with _meta_nonempty (GH#10212) + Patrick Hoefler + * Unpin sqlalchemy and fix compatibility issues (GH#10140) + Patrick Hoefler + ## Bug Fixes + * Partially revert default client discovery (GH#10225) Florian + Jetter + * Support arrow dtypes in Index meta creation (GH#10170) Patrick + Hoefler + * Repartitioning raises with extension dtype when truncating + floats (GH#10169) Patrick Hoefler + * Adjust empty Index from fastparquet to object dtype (GH#10179) + Patrick Hoefler +- Release 2023.4.0 + ## Enhancements + * Override old default values in update_defaults (GH#10159) Gabe + Joseph + * Add a CLI command to list and get a value from dask config + (GH#9936) Irina Truong + * Handle string-based engine argument to read_json (GH#9947) + Richard (Rick) Zamora + * Avoid deprecated GroupBy.dtypes (GH#10111) Irina Truong + ## Bug Fixes + * Revert grouper-related changes (GH#10182) Irina Truong + * GroupBy.cov raising for non-numeric grouping column (GH#10171) + Patrick Hoefler + * Updates for Index supporting numpy numeric dtypes (GH#10154) + Irina Truong + * Preserve dtype for partitioning columns when read with pyarrow + (GH#10115) Patrick Hoefler + * Fix annotations for to_hdf (GH#10123) Hendrik Makait + * Handle None column name when checking if columns are all + numeric (GH#10128) Lawrence Mitchell + * Fix valid_divisions when passed a tuple (GH#10126) Brian + Phillips + * Maintain annotations in DataFrame.categorize (GH#10120) Hendrik + Makait + * Fix handling of missing min/max parquet statistics during + filtering (GH#10042) Richard (Rick) Zamora + ## Deprecations + * Deprecate use_nullable_dtypes= and add dtype_backend= + (GH#10076) Irina Truong + * Deprecate convert_dtype in Series.apply (GH#10133) Irina Truong +- Drop dask-pr10042-parquetstats.patch + +------------------------------------------------------------------- +Tue Apr 4 20:46:26 UTC 2023 - Ben Greiner + +- Drop python38 test flavor + +------------------------------------------------------------------- +Thu Mar 30 21:00:52 UTC 2023 - Ben Greiner + +- Enable pyarrow in the [complete] extra + +------------------------------------------------------------------- +Mon Mar 27 16:40:11 UTC 2023 - Ben Greiner + +- Update to 2023.3.2 + ## Enhancements + * Deprecate observed=False for groupby with categoricals + (GH#10095) Irina Truong + * Deprecate axis= for some groupby operations (GH#10094) James + Bourbeau + * The axis keyword in DataFrame.rolling/Series.rolling is + deprecated (GH#10110) Irina Truong + * DataFrame._data deprecation in pandas (GH#10081) Irina Truong + * Use importlib_metadata backport to avoid CLI UserWarning + (GH#10070) Thomas Grainger + * Port option parsing logic from dask.dataframe.read_parquet to + to_parquet (GH#9981) Anton Loukianov + ## Bug Fixes + * Avoid using dd.shuffle in groupby-apply (GH#10043) Richard + (Rick) Zamora + * Enable null hive partitions with pyarrow parquet engine + (GH#10007) Richard (Rick) Zamora + * Support unknown shapes in *_like functions (GH#10064) Doug + Davis + ## Maintenance + * Restore Entrypoints compatibility (GH#10113) Jacob Tomlinson + * Allow pyarrow build to continue on failures (GH#10097) James + Bourbeau + * Fix test_set_index_on_empty with pyarrow strings active + (GH#10054) Irina Truong + * Temporarily skip pyarrow_compat tests with pandas 2.0 + (GH#10063) James Bourbeau + +------------------------------------------------------------------- +Sun Mar 26 17:13:15 UTC 2023 - Ben Greiner + +- Add dask-pr10042-parquetstats.patch gh#dask/dask#10042 +- Enable python311 build: numba is not a strict requirement + +------------------------------------------------------------------- +Sat Mar 11 22:53:32 UTC 2023 - Ben Greiner + +- Update to v2023.3.1 + ## Enhancements + * Support pyarrow strings in MultiIndex (GH#10040) Irina Truong + * Improved support for pyarrow strings (GH#10000) Irina Truong + * Fix flaky RuntimeWarning during array reductions (GH#10030) + James Bourbeau + * Extend complete extras (GH#10023) James Bourbeau + * Raise an error with dataframe.convert_string=True and pandas<2.0 + (GH#10033) Irina Truong + * Rename shuffle/rechunk config option/kwarg to method (GH#10013) + James Bourbeau + * Add initial support for converting pandas extension dtypes to + arrays (GH#10018) James Bourbeau + * Remove randomgen support (GH#9987) Eray Aslan + ## Bug Fixes + * Skip rechunk when rechunking to the same chunks with unknown + sizes (GH#10027) Hendrik Makait + * Custom utility to convert parquet filters to pyarrow expression + (GH#9885) Richard (Rick) Zamora + * Consider numpy scalars and 0d arrays as scalars when padding + (GH#9653) Justus Magin + * Fix parquet overwrite behavior after an adaptive read_parquet + operation (GH#10002) Richard (Rick) Zamora + ## Maintenance + * Remove stale hive-partitioning code from pyarrow parquet engine + (GH#10039) Richard (Rick) Zamora + * Increase minimum supported pyarrow to 7.0 (GH#10024) James + Bourbeau + * Revert “Prepare drop packunpack (GH#9994) (GH#10037) Florian + Jetter + * Have codecov wait for more builds before reporting (GH#10031) + James Bourbeau + * Prepare drop packunpack (GH#9994) Florian Jetter + * Add CI job with pyarrow strings turned on (GH#10017) James + Bourbeau + * Fix test_groupby_dropna_with_agg for pandas 2.0 (GH#10001) Irina + Truong + * Fix test_pickle_roundtrip for pandas 2.0 (GH#10011) James + Bourbeau + +------------------------------------------------------------------- +Wed Mar 8 15:20:50 UTC 2023 - Benjamin Greiner + +- Update dependencies +- Skip one more test failing because of missing pyarrow + +------------------------------------------------------------------- +Wed Mar 8 09:37:10 UTC 2023 - Dirk Müller + +- update to 2023.3.0: + * Bag must not pick p2p as shuffle default (:pr:`10005`) + * Minor follow-up to P2P by default (:pr:`10008`) `James + Bourbeau`_ + * Add minimum version to optional ``jinja2`` dependency + (:pr:`9999`) `Charles Blackmon-Luca`_ + * Enable P2P shuffling by default + * P2P rechunking + * Efficient `dataframe.convert_string` support for + `read_parquet` + * Allow p2p shuffle kwarg for DataFrame merges + * Change ``split_row_groups`` default to "infer" + * Add option for converting string data to use ``pyarrow`` + strings + * Add support for multi-column ``sort_values`` + * ``Generator`` based random-number generation in``dask.array`` + * Support ``numeric_only`` for simple groupby aggregations for + ``pandas`` 2.0 compatibility + * Fix profilers plot not being aligned to context manager enter + time + * Relax dask.dataframe assert_eq type checks + * Restore ``describe`` compatibility for ``pandas`` 2.0 + * Improving deploying Dask docs + * More docs for ``DataFrame.partitions`` + * Update docs with more information on default Delayed + scheduler + * Deployment Considerations documentation + * Temporarily rerun flaky tests + * Update parsing of FULL_RAPIDS_VER/FULL_UCX_PY_VER + * Increase minimum supported versions to ``pandas=1.3`` and + ``numpy=1.21`` + * Fix ``std`` to work with ``numeric_only`` for ``pandas`` 2.0 + * Temporarily ``xfail`` + ``test_roundtrip_partitioned_pyarrow_dataset`` (:pr:`9977`) + * Fix copy on write failure in `test_idxmaxmin` (:pr:`9944`) + * Bump ``pre-commit`` versions (:pr:`9955`) `crusaderky`_ + * Fix ``test_groupby_unaligned_index`` for ``pandas`` 2.0 + * Un-``xfail`` ``test_set_index_overlap_2`` for ``pandas`` 2.0 + * Fix ``test_merge_by_index_patterns`` for ``pandas`` 2.0 + * Bump jacobtomlinson/gha-find-replace from 2 to 3 (:pr:`9953`) + * Fix ``test_rolling_agg_aggregate`` for ``pandas`` 2.0 + compatibility + * Bump ``black`` to ``23.1.0`` + * Run GPU tests on python 3.8 & 3.10 (:pr:`9940`) + * Fix ``test_to_timestamp`` for ``pandas`` 2.0 (:pr:`9932`) + * Fix an error with ``groupby`` ``value_counts`` for ``pandas`` + 2.0 compatibility + * Config converter: replace all dashes with underscores + +------------------------------------------------------------------- +Sun Feb 26 00:08:43 UTC 2023 - Ben Greiner + +- Prepare test multiflavors for python311, but skip python311 + * Numba is not ready for python 3.11 yet gh#numba/numba#8304 + +------------------------------------------------------------------- +Fri Feb 17 09:06:25 UTC 2023 - Ben Greiner + +- Update to 2023.2.0 + ## Enhancements + * Update numeric_only default in quantile for pandas 2.0 + (GH#9854) Irina Truong + * Make repartition a no-op when divisions match (GH#9924) James + Bourbeau + * Update datetime_is_numeric behavior in describe for pandas 2.0 + (GH#9868) Irina Truong + * Update value_counts to return correct name in pandas 2.0 + (GH#9919) Irina Truong + * Support new axis=None behavior in pandas 2.0 for certain + reductions (GH#9867) James Bourbeau + * Filter out all-nan RuntimeWarning at the chunk level for nanmin + and nanmax (GH#9916) Julia Signell + * Fix numeric meta_nonempty index creation for pandas 2.0 + (GH#9908) James Bourbeau + * Fix DataFrame.info() tests for pandas 2.0 (GH#9909) James + Bourbeau + ## Bug Fixes + * Fix GroupBy.value_counts handling for multiple groupby columns + (GH#9905) Charles Blackmon-Luca + +------------------------------------------------------------------- +Sun Feb 5 13:29:14 UTC 2023 - Ben Greiner + +- Update to 2023.1.1 + ## Enhancements + * Add to_backend method to Array and _Frame (GH#9758) Richard + (Rick) Zamora + * Small fix for timestamp index divisions in pandas 2.0 (GH#9872) + Irina Truong + * Add numeric_only to DataFrame.cov and DataFrame.corr (GH#9787) + James Bourbeau + * Fixes related to group_keys default change in pandas 2.0 + (GH#9855) Irina Truong + * infer_datetime_format compatibility for pandas 2.0 (GH#9783) + James Bourbeau + ## Bug Fixes + * Fix serialization bug in BroadcastJoinLayer (GH#9871) Richard + (Rick) Zamora + * Satisfy broadcast argument in DataFrame.merge (GH#9852) Richard + (Rick) Zamora + * Fix pyarrow parquet columns statistics computation (GH#9772) + aywandji + ## Documentation + * Fix “duplicate explicit target name” docs warning (GH#9863) + Chiara Marmo + * Fix code formatting issue in “Defining a new collection + backend” docs (GH#9864) Chiara Marmo + * Update dashboard documentation for memory plot (GH#9768) Jayesh + Manani + * Add docs section about no-worker tasks (GH#9839) Florian Jetter + ## Maintenance + * Additional updates for detecting a distributed scheduler + (GH#9890) James Bourbeau + * Update gpuCI RAPIDS_VER to 23.04 (GH#9876) + * Reverse precedence between collection and distributed default + (GH#9869) Florian Jetter + * Update xarray-contrib/issue-from-pytest-log to version 1.2.6 + (GH#9865) James Bourbeau + * Dont require dask config shuffle default (GH#9826) Florian + Jetter + * Un-xfail datetime64 Parquet roundtripping tests for new + fastparquet (GH#9811) James Bourbeau + * Add option to manually run upstream CI build (GH#9853) James + Bourbeau + * Use custom timeout in CI builds (GH#9844) James Bourbeau + * Remove kwargs from make_blockwise_graph (GH#9838) Florian + Jetter + * Ignore warnings on persist call in + test_setitem_extended_API_2d_mask (GH#9843) Charles + Blackmon-Luca + * Fix running S3 tests locally (GH#9833) James Bourbeau +- Release 2023.1.0 + ## Enhancements + * Use distributed default clients even if no config is set + (GH#9808) Florian Jetter + * Implement ma.where and ma.nonzero (GH#9760) Erik Holmgren + * Update zarr store creation functions (GH#9790) Ryan Abernathey + * iteritems compatibility for pandas 2.0 (GH#9785) James Bourbeau + * Accurate sizeof for pandas string[python] dtype (GH#9781) + crusaderky + * Deflate sizeof() of duplicate references to pandas object types + (GH#9776) crusaderky + * GroupBy.__getitem__ compatibility for pandas 2.0 (GH#9779) + James Bourbeau + * append compatibility for pandas 2.0 (GH#9750) James Bourbeau + * get_dummies compatibility for pandas 2.0 (GH#9752) James + Bourbeau + * is_monotonic compatibility for pandas 2.0 (GH#9751) James + Bourbeau + * numpy=1.24 compatability (GH#9777) James Bourbeau + ## Documentation + * Remove duplicated encoding kwarg in docstring for to_json + (GH#9796) Sultan Orazbayev + * Mention SubprocessCluster in LocalCluster documentation + (GH#9784) Hendrik Makait + * Move Prometheus docs to dask/distributed (GH#9761) crusaderky + ## Maintenance + * Temporarily ignore RuntimeWarning in + test_setitem_extended_API_2d_mask (GH#9828) James Bourbeau + * Fix flaky test_threaded.py::test_interrupt (GH#9827) Hendrik + Makait + * Update xarray-contrib/issue-from-pytest-log in upstream report + (GH#9822) James Bourbeau + * pip install dask on gpuCI builds (GH#9816) Charles + Blackmon-Luca + * Bump actions/checkout from 3.2.0 to 3.3.0 (GH#9815) + * Resolve sqlalchemy import failures in mindeps testing (GH#9809) + Charles Blackmon-Luca + * Ignore sqlalchemy.exc.RemovedIn20Warning (GH#9801) Thomas + Grainger + * xfail datetime64 Parquet roundtripping tests for pandas 2.0 + (GH#9786) James Bourbeau + * Remove sqlachemy 1.3 compatibility (GH#9695) McToel + * Reduce size of expected DoK sparse matrix (GH#9775) Elliott + Sales de Andrade + * Remove executable flag from dask/dataframe/io/orc/utils.py + (GH#9774) Elliott Sales de Andrade +- Drop dask-pr9777-np1.24.patch + +------------------------------------------------------------------- +Mon Jan 2 20:44:44 UTC 2023 - Ben Greiner + +- Update to 2022.12.1 + ## Enhancements + * Support dtype_backend="pandas|pyarrow" configuration (GH#9719) + James Bourbeau + * Support cupy.ndarray to cudf.DataFrame dispatching in + dask.dataframe (GH#9579) Richard (Rick) Zamora + * Make filesystem-backend configurable in read_parquet (GH#9699) + Richard (Rick) Zamora + * Serialize all pyarrow extension arrays efficiently (GH#9740) + James Bourbeau + ## Bug Fixes + * Fix bug when repartitioning with tz-aware datetime index + (GH#9741) James Bourbeau + * Partial functions in aggs may have arguments (GH#9724) Irina + Truong + * Add support for simple operation with pyarrow-backed extension + dtypes (GH#9717) James Bourbeau + * Rename columns correctly in case of SeriesGroupby (GH#9716) + Lawrence Mitchell + ## Maintenance + * Add zarr to Python 3.11 CI environment (GH#9771) James Bourbeau + * Add support for Python 3.11 (GH#9708) Thomas Grainger + * Bump actions/checkout from 3.1.0 to 3.2.0 (GH#9753) + * Avoid np.bool8 deprecation warning (GH#9737) James Bourbeau + * Make sure dev packages aren’t overwritten in upstream CI build + (GH#9731) James Bourbeau + * Avoid adding data.h5 and mydask.html files during tests + (GH#9726) Thomas Grainger +- Release 2022.12.0 + ## Enhancements + * Remove statistics-based set_index logic from read_parquet + (GH#9661) Richard (Rick) Zamora + * Add support for use_nullable_dtypes to dd.read_parquet + (GH#9617) Ian Rose + * Fix map_overlap in order to accept pandas arguments (GH#9571) + Fabien Aulaire + * Fix pandas 1.5+ FutureWarning in .str.split(..., expand=True) + (GH#9704) Jacob Hayes + * Enable column projection for groupby slicing (GH#9667) Richard + (Rick) Zamora + * Support duplicate column cum-functions (GH#9685) Ben + * Improve error message for failed backend dispatch call + (GH#9677) Richard (Rick) Zamora + ## Bug Fixes + * Revise meta creation in arrow parquet engine (GH#9672) Richard + (Rick) Zamora + * Fix da.fft.fft for array-like inputs (GH#9688) James Bourbeau + * Fix groupby -aggregation when grouping on an index by name + (GH#9646) Richard (Rick) Zamora + ## Maintenance + * Avoid PytestReturnNotNoneWarning in test_inheriting_class + (GH#9707) Thomas Grainger + * Fix flaky test_dataframe_aggregations_multilevel (GH#9701) + Richard (Rick) Zamora + * Bump mypy version (GH#9697) crusaderky + * Disable dashboard in test_map_partitions_df_input (GH#9687) + James Bourbeau + * Use latest xarray-contrib/issue-from-pytest-log in upstream + build (GH#9682) James Bourbeau + * xfail ttest_1samp for upstream scipy (GH#9670) James Bourbeau + * Update gpuCI RAPIDS_VER to 23.02 (GH#9678) +- Add dask-pr9777-np1.24.patch gh#dask/dask#9777 +- Move to PEP517 build + +------------------------------------------------------------------- +Mon Nov 21 19:03:11 UTC 2022 - Ben Greiner + +- Go back to bokeh 2.4 + * gh#dask/dask#9659 + * we provide a legacy bokeh2 instead + +------------------------------------------------------------------- +Sun Nov 20 10:01:18 UTC 2022 - Ben Greiner + +- Update to version 2022.11.1 + ## Enhancements + * Restrict bokeh=3 support (GH#9673) Gabe Joseph (ignored in rpm, + fixed by bokek 3.0.2, see gh#dask/dask#9659) + * Updates for fastparquet evolution (GH#9650) Martin Durant + ## Maintenance + * Revert importlib.metadata workaround (GH#9658) James Bourbeau +- Release 2022.11.0 + ## Enhancements + * Generalize from_dict implementation to allow usage from other + backends (GH#9628) GALI PREM SAGAR + ## Bug Fixes + * Avoid pandas constructors in dask.dataframe.core (GH#9570) + Richard (Rick) Zamora + * Fix sort_values with Timestamp data (GH#9642) James Bourbeau + * Generalize array checking and remove pd.Index call in + _get_partitions (GH#9634) Benjamin Zaitlen + * Fix read_csv behavior for header=0 and names (GH#9614) Richard + (Rick) Zamora + ## Maintenance + * Allow bokeh=3 (GH#9659) James Bourbeau + * Add pre-commit to catch breakpoint() (GH#9638) James Bourbeau + * Bump xarray-contrib/issue-from-pytest-log from 1.1 to 1.2 + (GH#9635) + * Remove blosc references (GH#9625) Naty Clementi + * Harden test_repartition_npartitions (GH#9585) Richard (Rick) + Zamora +- Release 2022.10.2 + * This was a hotfix and has no changes in this repository. The + necessary fix was in dask/distributed, but we decided to bump + this version number for consistency. +- Release 2022.10.1 + ## Enhancements + * Enable named aggregation syntax (GH#9563) ChrisJar + * Add extension dtype support to set_index (GH#9566) James + Bourbeau + * Redesigning the array HTML repr for clarity (GH#9519) Shingo + OKAWA + ## Bug Fixes + * Fix merge with emtpy left DataFrame (GH#9578) Ian Rose + ## Maintenance + * Require Click 7.0+ in Dask (GH#9595) John A Kirkham + * Temporarily restrict bokeh<3 (GH#9607) James Bourbeau + * Resolve importlib-related failures in upstream CI (GH#9604) + Charles Blackmon-Luca + * Remove setuptools host dep, add CLI entrypoint (GH#9600) + Charles Blackmon-Luca + * More Backend dispatch class type annotations (GH#9573) Ian Rose +- Create a -test subpackage in order to avoid rpmlint errors +- Drop extra conftest: included in sdist. + +------------------------------------------------------------------- +Fri Oct 21 13:19:48 UTC 2022 - Ben Greiner + +- Update to version 2022.10.0 + * Backend library dispatching for IO in Dask-Array and + Dask-DataFrame (GH#9475) Richard (Rick) Zamora + * Add new CLI that is extensible (GH#9283) Doug Davis + * Groupby median (GH#9516) Ian Rose + * Fix array copy not being a no-op (GH#9555) David Hoese + * Add support for string timedelta in map_overlap (GH#9559) + Nicolas Grandemange + * Shuffle-based groupby for single functions (GH#9504) Ian Rose + * Make datetime.datetime tokenize idempotantly (GH#9532) Martin + Durant + * Support tokenizing datetime.time (GH#9528) Tim Paine + * Avoid race condition in lazy dispatch registration (GH#9545) + James Bourbeau + * Do not allow setitem to np.nan for int dtype (GH#9531) Doug + Davis + * Stable demo column projection (GH#9538) Ian Rose + * Ensure pickle-able binops in delayed (GH#9540) Ian Rose + * Fix project CSV columns when selecting (GH#9534) Martin Durant + * Update Parquet best practice (GH#9537) Matthew Rocklin +- move -all metapackage to -complete, mirroring upstream's + [complete] extra. + +------------------------------------------------------------------- +Fri Sep 30 23:19:11 UTC 2022 - Arun Persaud + +- update to version 2022.9.2: + * Enhancements + + Remove factorization logic from array auto chunking (:pr:`9507`) + `James Bourbeau`_ + * Documentation + + Add docs on running Dask in a standalone Python script + (:pr:`9513`) `James Bourbeau`_ + + Clarify custom-graph multiprocessing example (:pr:`9511`) + `nouman`_ + * Maintenance + + Groupby sort upstream compatibility (:pr:`9486`) `Ian Rose`_ + +------------------------------------------------------------------- +Fri Sep 16 19:54:12 UTC 2022 - Arun Persaud + +- update to version 2022.9.1: + * New Features + + Add "DataFrame" and "Series" "median" methods (:pr:`9483`) + `James Bourbeau`_ + * Enhancements + + Shuffle "groupby" default (:pr:`9453`) `Ian Rose`_ + + Filter by list (:pr:`9419`) `Greg Hayes`_ + + Added "distributed.utils.key_split" functionality to + "dask.utils.key_split" (:pr:`9464`) `Luke Conibear`_ + * Bug Fixes + + Fix overlap so that "set_index" doesn't drop rows (:pr:`9423`) + `Julia Signell`_ + + Fix assigning pandas "Series" to column when "ddf.columns.min()" + raises (:pr:`9485`) `Erik Welch`_ + + Fix metadata comparison "stack_partitions" (:pr:`9481`) `James + Bourbeau`_ + + Provide default for "split_out" (:pr:`9493`) `Lawrence + Mitchell`_ + * Deprecations + + Allow "split_out" to be "None", which then defaults to "1" in + "groupby().aggregate()" (:pr:`9491`) `Ian Rose`_ + * Documentation + + Fixing "enforce_metadata" documentation, not checking for dtypes + (:pr:`9474`) `Nicolas Grandemange`_ + + Fix "it's" --> "its" typo (:pr:`9484`) `Nat Tabris`_ + * Maintenance + + Workaround for parquet writing failure using some datetime + series but not others (:pr:`9500`) `Ian Rose`_ + + Filter out "numeric_only" warnings from "pandas" (:pr:`9496`) + `James Bourbeau`_ + + Avoid "set_index(..., inplace=True)" where not necessary + (:pr:`9472`) `James Bourbeau`_ + + Avoid passing groupby key list of length one (:pr:`9495`) `James + Bourbeau`_ + + Update "test_groupby_dropna_cudf" based on "cudf" support for + "group_keys" (:pr:`9482`) `James Bourbeau`_ + + Remove "dd.from_bcolz" (:pr:`9479`) `James Bourbeau`_ + + Added "flake8-bugbear" to "pre-commit" hooks (:pr:`9457`) `Luke + Conibear`_ + + Bind loop variables in function definitions ("B023") + (:pr:`9461`) `Luke Conibear`_ + + Added assert for comparisons ("B015") (:pr:`9459`) `Luke + Conibear`_ + + Set top-level default shell in CI workflows (:pr:`9469`) `James + Bourbeau`_ + + Removed unused loop control variables ("B007") (:pr:`9458`) + `Luke Conibear`_ + + Replaced "getattr" calls for constant attributes ("B009") + (:pr:`9460`) `Luke Conibear`_ + + Pin "libprotobuf" to allow nightly "pyarrow" in the upstream CI + build (:pr:`9465`) `Joris Van den Bossche`_ + + Replaced mutable data structures for default arguments ("B006") + (:pr:`9462`) `Luke Conibear`_ + + Changed "flake8" mirror and updated version (:pr:`9456`) `Luke + Conibear`_ + +------------------------------------------------------------------- +Sat Sep 10 15:15:32 UTC 2022 - Arun Persaud + +- update to version 2022.9.0: + * Enhancements + + Enable automatic column projection for "groupby" aggregations + (:pr:`9442`) `Richard (Rick) Zamora`_ + + Accept superclasses in NEP-13/17 dispatching (:pr:`6710`) `Gabe + Joseph`_ + * Bug Fixes + + Rename "by" columns internally for cumulative operations on the + same "by" columns (:pr:`9430`) `Pavithra Eswaramoorthy`_ + + Fix "get_group" with categoricals (:pr:`9436`) `Pavithra + Eswaramoorthy`_ + + Fix caching-related "MaterializedLayer.cull" performance + regression (:pr:`9413`) `Richard (Rick) Zamora`_ + * Documentation + + Add maintainer documentation page (:pr:`9309`) `James Bourbeau`_ + * Maintenance + + Revert skipped fastparquet test (:pr:`9439`) `Pavithra + Eswaramoorthy`_ + + "tmpfile" does not end files with period on empty extension + (:pr:`9429`) `Hendrik Makait`_ + + Skip failing fastparquet test with latest release (:pr:`9432`) + `James Bourbeau`_ + +------------------------------------------------------------------- +Thu Sep 1 06:57:11 UTC 2022 - Steve Kowalik + +- Update to 2022.8.1: + * Implement ma.*_like functions (:pr:`9378`) `Ruth Comer`_ + * Fuse compatible annotations (:pr:`9402`) `Ian Rose`_ + * Shuffle-based groupby aggregation for high-cardinality groups (:pr:`9302`) + `Richard (Rick) Zamora`_ + * Unpack namedtuple (:pr:`9361`) `Hendrik Makait`_ + * Fix SeriesGroupBy cumulative functions with axis=1 (:pr:`9377`) + `Pavithra Eswaramoorthy`_ + * Sparse array reductions (:pr:`9342`) `Ian Rose`_ + * Fix make_meta while using categorical column with index (:pr:`9348`) + `Pavithra Eswaramoorthy`_ + * Don't allow incompatible keywords in DataFrame.dropna (:pr:`9366`) + `Naty Clementi`_ + * Make set_index handle entirely empty dataframes (:pr:`8896`) + `Julia Signell`_ + * Improve dataclass handling in unpack_collections (:pr:`9345`) + `Hendrik Makait`_ + * Fix bag sampling when there are some smaller partitions (:pr:`9349`) + `Ian Rose`_ + * Add support for empty partitions to da.min/da.max functions (:pr:`9268`) + `geraninam`_ + * Use entry_points utility in sizeof (:pr:`9390`) `James Bourbeau`_ + * Add entry_points compatibility utility (:pr:`9388`) `Jacob Tomlinson`_ + * Upload environment file artifact for each CI build (:pr:`9372`) + `James Bourbeau`_ + * Remove werkzeug pin in CI (:pr:`9371`) `James Bourbeau`_ + * Fix type annotations for dd.from_pandas and dd.from_delayed (:pr:`9362`) + `Jordan Yap`_ + * Ensure make_meta doesn't hold ref to data (:pr:`9354`) `Jim Crist-Harif`_ + * Revise divisions logic in from_pandas (:pr:`9221`) `Richard (Rick) Zamora`_ + * Warn if user sets index with existing index (:pr:`9341`) `Julia Signell`_ + * Add keepdims keyword for da.average (:pr:`9332`) `Ruth Comer`_ + * Change repr methods to avoid Layer materialization (:pr:`9289`) + `Richard (Rick) Zamora`_ + * Make sure order kwarg will not crash the astype method (:pr:`9317`) + `Genevieve Buckley`_ + * Fix bug for cumsum on cupy chunked dask arrays (:pr:`9320`) + `Genevieve Buckley`_ + * Match input and output structure in _sample_reduce (:pr:`9272`) + `Pavithra Eswaramoorthy`_ + * Include meta in array serialization (:pr:`9240`) `Frédéric BRIOL`_ + * Fix Index.memory_usage (:pr:`9290`) `James Bourbeau`_ + * Fix division calculation in dask.dataframe.io.from_dask_array (:pr:`9282`) + `Jordan Yap`_ + * Switch js-yaml for yaml.js in config converter (:pr:`9306`) + `Jacob Tomlinson`_ + * Update da.linalg.solve for SciPy 1.9.0 compatibility (:pr:`9350`) + `Pavithra Eswaramoorthy`_ + * Update test_getitem_avoids_large_chunks_missing (:pr:`9347`) + `Pavithra Eswaramoorthy`_ + * Import loop_in_thread fixture in tests (:pr:`9337`) `James Bourbeau`_ + * Temporarily xfail test_solve_sym_pos (:pr:`9336`) `Pavithra Eswaramoorthy`_ + * Update gpuCI RAPIDS_VER to 22.10 (:pr:`9314`) + * Return Dask array if all axes are squeezed (:pr:`9250`) + `Pavithra Eswaramoorthy`_ + * Make cycle reported by toposort shorter (:pr:`9068`) `Erik Welch`_ + * Unknown chunk slicing - raise informative error (:pr:`9285`) + `Naty Clementi`_ + * Fix bug in HighLevelGraph.cull (:pr:`9267`) `Richard (Rick) Zamora`_ + * Sort categories (:pr:`9264`) `Pavithra Eswaramoorthy`_ + * Use max (instead of sum) for calculating warnsize (:pr:`9235`) + `Pavithra Eswaramoorthy`_ + * Fix bug when filtering on partitioned column with pyarrow (:pr:`9252`) + `Richard (Rick) Zamora`_ + *Add type annotations to dd.from_pandas and dd.from_delayed (:pr:`9237`) + `Michael Milton`_ + * Update test_plot_multiple for upcoming bokeh release (:pr:`9261`) + `James Bourbeau`_ + * Add typing to common array properties (:pr:`9255`) `Illviljan`_ + +------------------------------------------------------------------- +Mon Jul 11 02:47:49 UTC 2022 - Arun Persaud + +- update to version 2022.7.0: + * Enhancements + + Support "pathlib.PurePath" in "normalize_token" (:pr:`9229`) + `Angus Hollands`_ + + Add "AttributeNotImplementedError" for properties so IPython + glob search works (:pr:`9231`) `Erik Welch`_ + + "map_overlap": multiple dataframe handling (:pr:`9145`) `Fabien + Aulaire`_ + + Read entrypoints in "dask.sizeof" (:pr:`7688`) `Angus Hollands`_ + * Bug Fixes + + Fix "TypeError: 'Serialize' object is not subscriptable" when + writing parquet dataset with "Client(processes=False)" + (:pr:`9015`) `Lucas Miguel Ponce`_ + + Correct dtypes when "concat" with an empty dataframe + (:pr:`9193`) `Pavithra Eswaramoorthy`_ + * Documentation + + Highlight note about persist (:pr:`9234`) `Pavithra + Eswaramoorthy`_ + + Update release-procedure to include more detail and helpful + commands (:pr:`9215`) `Julia Signell`_ + + Better SEO for Futures and Dask vs. Spark pages (:pr:`9217`) + `Sarah Charlotte Johnson`_ + * Maintenance + + Use "math.prod" instead of "np.prod" on lists, tuples, and iters + (:pr:`9232`) `crusaderky`_ + + Only import IPython if type checking (:pr:`9230`) `Florian + Jetter`_ + + Tougher mypy checks (:pr:`9206`) `crusaderky`_ + +------------------------------------------------------------------- +Fri Jun 24 20:21:01 UTC 2022 - Ben Greiner + +- Update to to 2022.6.1 + * Enhancements + - Dask in pyodide (GH#9053) Ian Rose + - Create dask.utils.show_versions (GH#9144) Sultan Orazbayev + - Better error message for unsupported numpy operations on + dask.dataframe objects. (GH#9201) Julia Signell + - Add allow_rechunk kwarg to dask.array.overlap function + (GH#7776) Genevieve Buckley + - Add minutes and hours to dask.utils.format_time (GH#9116) + Matthew Rocklin + - More retries when writing parquet to remote filesystem + (GH#9175) Ian Rose + * Bug Fixes + - Timedelta deterministic hashing (GH#9213) Fabien Aulaire + - Enum deterministic hashing (GH#9212) Fabien Aulaire + - shuffle_group(): avoid converting to arrays (GH#9157) Mads R. + B. Kristensen + * Deprecations + - Deprecate extra format_time utility (GH#9184) James Bourbeau +- Release 2022.6.0 + * Enhancements + - Add feature to show names of layer dependencies in HLG + JupyterLab repr (GH#9081) Angelos Omirolis + - Add arrow schema extraction dispatch (GH#9169) GALI PREM + SAGAR + - Add sort_results argument to assert_eq (GH#9130) Pavithra + Eswaramoorthy + - Add weeks to parse_timedelta (GH#9168) Matthew Rocklin + - Warn that cloudpickle is not always deterministic (GH#9148) + Pavithra Eswaramoorthy + - Switch parquet default engine (GH#9140) Jim Crist-Harif + - Use deterministic hashing with _iLocIndexer / _LocIndexer + (GH#9108) Fabien Aulaire + - Enfore consistent schema in to_parquet pyarrow (GH#9131) Jim + Crist-Harif + * Bug Fixes + - Fix pyarrow.StringArray pickle (GH#9170) Jim Crist-Harif + - Fix parallel metadata collection in pyarrow engine (GH#9165) + Richard (Rick) Zamora + - Improve pyarrow partitioning logic (GH#9147) James Bourbeau + - pyarrow 8.0 partitioning fix (GH#9143) James Bourbeau +- Release 2022.05.2 + * Enhancements + - Add a dispatch for non-pandas Grouper objects and use it in + GroupBy (GH#9074) brandon-b-miller + - Error if read_parquet & to_parquet files intersect (GH#9124) + Jim Crist-Harif + - Visualize task graphs using ipycytoscape (GH#9091) Ian Rose +- Release 2022.05.1 + * New Features + - Add DataFrame.from_dict classmethod (GH#9017) Matthew Powers + - Add from_map function to Dask DataFrame (GH#8911) Richard + (Rick) Zamora + * Enhancements + - Improve to_parquet error for appended divisions overlap + (GH#9102) Jim Crist-Harif + - Enabled user-defined process-initializer functions (GH#9087) + ParticularMiner + - Mention align_dataframes=False option in map_partitions error + (GH#9075) Gabe Joseph + - Add kwarg enforce_ndim to dask.array.map_blocks() (GH#8865) + ParticularMiner + - Implement Series.GroupBy.fillna / DataFrame.GroupBy.fillna + methods (GH#8869) Pavithra Eswaramoorthy + - Allow fillna with Dask DataFrame (GH#8950) Pavithra + Eswaramoorthy + - Update error message for assignment with 1-d dask array + (GH#9036) Pavithra Eswaramoorthy + - Collection Protocol (GH#8674) Doug Davis + - Patch around pandas ArrowStringArray pickling (GH#9024) Jim + Crist-Harif + - Band-aid for compute_as_if_collection (GH#8998) Ian Rose + - Add p2p shuffle option (GH#8836) Matthew Rocklin + * Bug Fixes + - Fixup column projection with no columns (GH#9106) Jim + Crist-Harif + - Blockwise cull NumPy dtype (GH#9100) Ian Rose + - Fix column-projection bug in from_map (GH#9078) Richard + (Rick) Zamora + - Prevent nulls in index for non-numeric dtypes (GH#8963) Jorge + López + - Fix is_monotonic methods for more than 8 partitions (GH#9019) + Julia Signell + - Handle enumerate and generator inputs to from_map (GH#9066) + Richard (Rick) Zamora + - Revert is_dask_collection; back to previous implementation + (GH#9062) Doug Davis + - Fix Blockwise.clone does not handle iterable literal + arguments correctly (GH#8979) JSKenyon + - Array setitem hardmask (GH#9027) David Hassell + - Fix overlapping divisions error on append (GH#8997) Ian Rose + * Deprecations + - Add pre-deprecation warnings for read_parquet kwargs + chunksize and aggregate_files (GH#9052) Richard (Rick) Zamora +- Release 2022.05.0 + * This is a bugfix release with doc changes only +- Release 2022.04.2 + * This release includes several deprecations/breaking API changes + to dask.dataframe.read_parquet and dask.dataframe.to_parquet: + - to_parquet no longer writes _metadata files by default. If + you want to write a _metadata file, you can pass in + write_metadata_file=True. + - read_parquet now defaults to split_row_groups=False, which + results in one Dask dataframe partition per parquet file when + reading in a parquet dataset. If you’re working with large + parquet files you may need to set split_row_groups=True to + reduce your partition size. + - read_parquet no longer calculates divisions by default. If + you require read_parquet to return dataframes with known + divisions, please set calculate_divisions=True. + - read_parquet has deprecated the gather_statistics keyword + argument. Please use the calculate_divisions keyword argument + instead. + - read_parquet has deprecated the require_extensions keyword + argument. Please use the parquet_file_extension keyword + argument instead. + * New Features + - Add removeprefix and removesuffix as StringMethods (GH#8912) + Jorge López + * Enhancements + - Call fs.invalidate_cache in to_parquet (GH#8994) Jim + Crist-Harif + - Change to_parquet default to write_metadata_file=None + (GH#8988) Jim Crist-Harif + - Let arg reductions pass keepdims (GH#8926) Julia Signell + - Change split_row_groups default to False in read_parquet + (GH#8981) Richard (Rick) Zamora + - Improve NotImplementedError message for da.reshape (GH#8987) + Jim Crist-Harif + - Simplify to_parquet compute path (GH#8982) Jim Crist-Harif + - Raise an error if you try to use vindex with a Dask object + (GH#8945) Julia Signell + - Avoid pre_buffer=True when a precache method is specified + (GH#8957) Richard (Rick) Zamora + - from_dask_array uses blockwise instead of merging graphs + (GH#8889) Bryan Weber + - Use pre_buffer=True for “pyarrow” Parquet engine (GH#8952) + Richard (Rick) Zamora + * Bug Fixes + - Handle dtype=None correctly in da.full (GH#8954) Tom White + - Fix dask-sql bug caused by blockwise fusion (GH#8989) Richard + (Rick) Zamora + - to_parquet errors for non-string column names (GH#8990) Jim + Crist-Harif + - Make sure da.roll works even if shape is 0 (GH#8925) Julia + Signell + - Fix recursion error issue with set_index (GH#8967) Paul + Hobson + - Stringify BlockwiseDepDict mapping values when + produces_keys=True (GH#8972) Richard (Rick) Zamora + - Use DataFram`eIOLayer in DataFrame.from_delayed (GH#8852) + Richard (Rick) Zamora + - Check that values for the in predicate in read_parquet are + correct (GH#8846) Bryan Weber + - Fix bug for reduction of zero dimensional arrays (GH#8930) + Tom White + - Specify dtype when deciding division using np.linspace in + read_sql_query (GH#8940) Cheun Hong + * Deprecations + - Deprecate gather_statistics from read_parquet (GH#8992) + Richard (Rick) Zamora + - Change require_extension to top-level parquet_file_extension + read_parquet kwarg (GH#8935) Richard (Rick) Zamora +- Release 2022.04.1 + * New Features + - Add missing NumPy ufuncs: abs, left_shift, right_shift, + positive. (GH#8920) Tom White + * Enhancements + - Avoid collecting parquet metadata in pyarrow when + write_metadata_file=False (GH#8906) Richard (Rick) Zamora + - Better error for failed wildcard path in dd.read_csv() (fixes + #8878) (GH#8908) Roger Filmyer + - Return da.Array rather than dd.Series for non-ufunc + elementwise functions on dd.Series (GH#8558) Julia Signell + - Let get_dummies use meta computation in map_partitions + (GH#8898) Julia Signell + - Masked scalars input to da.from_array (GH#8895) David Hassell + - Raise ValueError in merge_asof for duplicate kwargs (GH#8861) + Bryan Weber + * Bug Fixes + - Make is_monotonic work when some partitions are empty + (GH#8897) Julia Signell + - Fix custom getter in da.from_array when inline_array=False + (GH#8903) Ian Rose + - Correctly handle dict-specification for rechunk. (GH#8859) + Richard + - Fix merge_asof: drop index column if left_on == right_on + (GH#8874) Gil Forsyth + * Deprecations + - Warn users that engine='auto' will change in future (GH#8907) + Jim Crist-Harif + - Remove pyarrow-legacy engine from parquet API (GH#8835) + Richard (Rick) Zamora +- Release 2022.04.0 + * This is the first release with support for Python 3.10 + * New Features + - Add Python 3.10 support (GH#8566) James Bourbeau + * Enhancements + - Add check on dtype.itemsize in order to produce a useful + error (GH#8860) Davide Gavio + - Add mild typing to common utils functions (GH#8848) Matthew + Rocklin + - Add sanity checks to divisions setter (GH#8806) Jim + Crist-Harif + - Use Blockwise and map_partitions for more tasks (GH#8831) + Bryan Weber + * Bug Fixes + - Fix dataframe.merge_asof to preserve right_on column + (GH#8857) Sarah Charlotte Johnson + - Fix “Buffer dtype mismatch” for pandas >= 1.3 on 32bit + (GH#8851) Ben Greiner + - Fix slicing fusion by altering SubgraphCallable getter + (GH#8827) Ian Rose + * Deprecations + - Remove support for PyPy (GH#8863) James Bourbeau + - Drop setuptools at runtime (GH#8855) crusaderky + - Remove dataframe.tseries.resample.getnanos (GH#8834) Sarah + Charlotte Johnson +- Drop dask-fix8169-pandas13.patch and dask-py310-test.patch + +------------------------------------------------------------------- +Sun Mar 27 19:18:19 UTC 2022 - Ben Greiner + +- dask.dataframe requires dask.bag (revealed by swifter test suite) + +------------------------------------------------------------------- +Fri Mar 25 19:02:53 UTC 2022 - Ben Greiner + +- Update to 2022.3.0 + * Bag: add implementation for reservoir sampling + * Add ma.count to Dask array + * Change to_parquet default to compression="snappy" + * Add weights parameter to dask.array.reduction + * Add ddf.compute_current_divisions to get divisions on a sorted + index or column + * Pass __name__ and __doc__ through on DelayedLeaf + * Raise exception for not implemented merge how option + * Move Bag.map_partitions to Blockwise + * Improve error messages for malformed config files + * Revise column-projection optimization to capture common + dask-sql patterns + * Useful error for empty divisions + * Scipy 1.8.0 compat: copy private classes into + dask/array/stats.py +- Release 2022.2.1 + * Add aggregate functions first and last to + dask.dataframe.pivot_table + * Add std() support for datetime64 dtype for pandas-like objects + * Add materialized task counts to HighLevelGraph and Layer html + reprs + * Do not allow iterating a DataFrameGroupBy + * Fix missing newline after info() call on empty DataFrame + * Add groupby.compute as a not implemented method + * Improve multi dataframe join performance + * Include bool type for Index + * Allow ArrowDatasetEngine subclass to override pandas->arrow + conversion also for partitioned write + * Increase performance of k-diagonal extraction in da.diag() and + da.diagonal() + * Change linspace creation to match numpy when num equal to 0 + * Tokenize dataclasses + * Update tokenize to treat dict and kwargs differently +- Release 2022.2.0 + * Add region to to_zarr when using existing array + * Add engine_kwargs support to dask.dataframe.to_sql + * Add include_path_column arg to read_json + * Add expand_dims to Dask array + * Add scheduler option to assert_eq utilities + * Fix eye inconsistency with NumPy for dtype=None + * Fix concatenate inconsistency with NumPy for axis=None + * Type annotations, part 1 + * Really allow any iterable to be passed as a meta + * Use map_partitions (Blockwise) in to_parquet +- Update dask-fix8169-pandas13.patch +- Add dask-py310-test.patch -- gh#dask/dask#8566 +- Make the distributed/dask update sync requirement even more + obvious. + +------------------------------------------------------------------- +Sat Jan 29 17:35:38 UTC 2022 - Ben Greiner + +- Update to 2022.1.1 + * Add dask.dataframe.series.view() + * Update tz for fastparquet + pandas 1.4.0 + * Cleaning up misc tests for pandas compat + * Moving to SQLAlchemy >= 1.4 + * Pandas compat: Filter sparse warnings + * Fail if meta is not a pandas object + * Use fsspec.parquet module for better remote-storage + read_parquet performance + * Move DataFrame ACA aggregations to HLG + * Add optional information about originating function call in + DataFrameIOLayer + * Blockwise array creation redux + * Refactor config default search path retrieval + * Add optimize_graph flag to Bag.to_dataframe function + * Make sure that delayed output operations still return lists of + paths + * Pandas compat: Fix to_frame name to not pass None + * Pandas compat: Fix axis=None warning + * Expand Dask YAML config search directories + * Fix groupby.cumsum with series grouped by index + * Fix derived_from for pandas methods + * Enforce boolean ascending for sort_values + * Fix parsing of __setitem__ indices + * Avoid divide by zero in slicing + * Downgrade meta error in + * Pandas compat: Deprecate append when pandas >= 1.4.0 + * Replace outdated columns argument with meta in DataFrame + constructor + * Refactor deploying docs + * Pin coverage in CI + * Move cached_cumsum imports to be from dask.utils + * Update gpuCI RAPIDS_VER to 22.04 + * Update cocstring for from_delayed function + * Handle plot_width / plot_height deprecations + * Remove unnecessary pyyaml importorskip + * Specify scheduler in DataFrame assert_eq + +------------------------------------------------------------------- +Tue Jan 25 22:07:53 UTC 2022 - Ben Greiner + +- Revert python310 enablement -- gh#dask/distributed#5460 + +------------------------------------------------------------------- +Tue Jan 25 09:35:17 UTC 2022 - Dirk Müller + +- reenable python 3.10 build as distributed is also reenabled + +------------------------------------------------------------------- +Thu Jan 20 16:23:05 UTC 2022 - Ben Greiner + +- Update to 2022.1.0 + * Add groupby.shift method (GH#8522) kori73 + * Add DataFrame.nunique (GH#8479) Sarah Charlotte Johnson + * Add da.ndim to match np.ndim (GH#8502) Julia Signell + * Replace interpolation with method and method with + internal_method (GH#8525) Julia Signell + * Remove daily stock demo utility (GH#8477) James Bourbeau + * Add Series and Index is_monotonic* methods (GH#8304) Daniel + Mesejo-León + * Deprecate token keyword argument to map_blocks (GH#8464) James + Bourbeau + * Deprecation warning for default value of boundary kwarg in + map_overlap (GH#8397) Genevieve Buckley +- Skip python310: Not supported by distributed yet + -- gh#dask/distributed#5350 + +------------------------------------------------------------------- +Wed Sep 22 12:50:07 UTC 2021 - Ben Greiner + +- Update to 2021.09.1 + * Fix groupby for future pandas + * Remove warning filters in tests that are no longer needed + * Add link to diagnostic visualize function in local diagnostic + docs + * Add datetime_is_numeric to dataframe.describe + * Remove references to pd.Int64Index in anticipation of + deprecation + * Use loc if needed for series __get_item__ + * Specifically ignore warnings on mean for empty slices + * Skip groupby nunique test for pandas >= 1.3.3 + * Implement ascending arg for sort_values + * Replace operator.getitem + * Deprecate zero_broadcast_dimensions and homogeneous_deepmap + * Add error if drop_index is negative + * Allow scheduler to be an Executor + * Handle asarray/asanyarray cases where like is a dask.Array + * Fix index_col duplication if index_col is type str + * Add dtype and order to asarray and asanyarray definitions + * Deprecate dask.dataframe.Series.__contains__ + * Fix edge case with like-arrays in _wrapped_qr + * Deprecate boundary_slice kwarg: kind for pandas compat +- Release 2021.09.0 + * Fewer open files + * Add FileNotFound to expected http errors + * Add DataFrame.sort_values to API docs + * Change to dask.order: be more eager at times + * Add pytest color to CI + * FIX: make_people works with processes scheduler + * Adds deep param to Dataframe copy method and restrict it to + False + * Fix typo in configuration docs + * Update formatting in DataFrame.query docstring + * Un-xfail sparse tests for 0.13.0 release + * Add axes property to DataFrame and Series + * Add CuPy support in da.unique (values only) + * Unit tests for sparse.zeros_like (xfailed) + * Add explicit like kwarg support to array creation functions + * Separate Array and DataFrame mindeps builds + * Fork out percentile_dispatch to dask.array + * Ensure filepath exists in to_parquet + * Update scheduler plugin usage in + test_scheduler_highlevel_graph_unpack_import + * Add DataFrame.shuffle to API docs + * Order requirements alphabetically +- Release 2021.08.1 + * Add ignore_metadata_file option to read_parquet + (pyarrow-dataset and fastparquet support only) + * Add reference to pytest-xdist in dev docs + * Include tz in meta from to_datetime + * CI Infra Docs + * Include invalid DataFrame key in assert_eq check + * Use __class__ when creating DataFrames + * Use development version of distributed in gpuCI build + * Ignore whitespace when gufunc signature + * Move pandas import and percentile dispatch refactor + * Add colors to represent high level layer types + * Upstream instance fix + * Add dask.widgets and migrate HTML reprs to jinja2 + * Remove wrap_func_like_safe, not required with + NumPy >= 1.17 + * Fix threaded scheduler memory backpressure regression + * Add percentile dispatch + * Use a publicly documented attribute obj in groupby rather than + private _selected_obj + * Specify module to import rechunk from + * Use dict to store data for {nan,}arg{min,max} in certain cases + * Fix blocksize description formatting in read_pandas + * Fix "point" -> "pointers" typo in docs +- Release 2021.08.0 + * Fix to_orc delayed compute behavior + * Don't convert to low-level task graph in + compute_as_if_collection + * Fix multifile read for hdf + * Resolve warning in distributed tests + * Update to_orc collection name + * Resolve skipfooter problem + * Raise NotImplementedError for non-indexable arg passed to + to_datetime + * Ensure we error on warnings from distributed + * Added dict format in to_bag accessories of DataFrame + * Delayed docs indirect dependencies + * Add tooltips to graphviz high-level graphs + * Close 2021 User Survey + * Reorganize CuPy tests into multiple files + * Refactor and Expand Dask-Dataframe ORC API + * Don't enforce columns if enforce=False + * Fix map_overlap trimming behavior when drop_axis is not None + * Mark gpuCI CuPy test as flaky + * Avoid using Delayed in to_csv and to_parquet + * Removed redundant check_dtypes + * Use pytest.warns instead of raises for checking parquet engine + deprecation + * Bump RAPIDS_VER in gpuCI to 21.10 + * Add back pyarrow-legacy test coverage for pyarrow>=5 + * Allow pyarrow>=5 in to_parquet and read_parquet + * Skip CuPy tests requiring NEP-35 when NumPy < 1.20 is available + * Add tail and head to SeriesGroupby + * Update Zoom link for monthly meeting + * Add gpuCI build script + * Deprecate daily_stock utility + * Add distributed.nanny to configuration reference docs + * Require NumPy 1.18+ & Pandas 1.0+ +- Add dask-fix8169-pandas13.patch -- gh#dask/dask#8169 + +------------------------------------------------------------------- +Sun Aug 8 14:42:17 UTC 2021 - Ben Greiner + +- Update to 2021.7.2 + * This is the last release with support for NumPy 1.17 and pandas + 0.25. Beginning with the next release, NumPy 1.18 and pandas + 1.0 will be the minimum supported versions. + * Add dask.array SVG to the HTML Repr + * Avoid use of Delayed in to_parquet + * Temporarily pin pyarrow<5 in CI + * Add deprecation warning for top-level ucx and rmm config values + * Remove skips from doctests (4 of 6) + * Remove skips from doctests (5 of 6) + * Adds missing prepend/append functionality to da.diff + * Change graphviz font family to sans + * Fix read-csv name - when path is different, use different name + for task + * Update configuration reference for ucx and rmm changes + * Add meta support to __setitem__ + * NEP-35 support for slice_with_int_dask_array + * Unpin fastparquet in CI + * Remove skips from doctests (3 of 6) +- Release 2021.7.1 + * Make array assert_eq check dtype + * Remove skips from doctests (6 of 6) + * Remove experimental feature warning from actors docs + * Remove skips from doctests (2 of 6) + * Separate out Array and Bag API + * Implement lazy Array.__iter__ + * Clean up places where we inadvertently iterate over arrays + * Add numeric_only kwarg to DataFrame reductions + * Add pytest marker for GPU tests + * Add support for histogram2d in dask.array + * Remove skips from doctests (1 of 6) + * Add node size scaling to the Graphviz output for the high + level graphs + * Update old Bokeh links + * Temporarily pin fastparquet in CI + * Add dask.array import to progress bar docs + * Use separate files for each DataFrame API function and method + * Fix pyarrow-dataset ordering bug + * Generalize unique aggregate + * Raise NotImplementedError when using pd.Grouper + * Add aggregate_files argument to enable multi-file partitions in + read_parquet + * Un-xfail test_daily_stock + * Update access configuration docs + * Use packaging for version comparisons + * Handle infinite loops in merge_asof + +------------------------------------------------------------------- +Fri Jul 16 09:25:39 UTC 2021 - Ben Greiner + +- Update to 2021.07.0 + * Include fastparquet in upstream CI build + * Blockwise: handle non-string constant dependencies + * fastparquet now supports new time types, including ns precision + * Avoid ParquetDataset API when appending in ArrowDatasetEngine + * Add retry logic to test_shuffle_priority + * Use strict channel priority in CI + * Support nested dask.distributed imports + * Should check module name only, not the entire directory + filepath + * Updates due to https://github.com/dask/fastparquet/pull/623 + * da.eye fix for chunks=-1 + * Temporarily xfail test_daily_stock + * Set priority annotations in SimpleShuffleLayer + * Blockwise: stringify constant key inputs + * Allow mixing dask and numpy arrays in @guvectorize + * Don't sample dict result of a shuffle group when calculating + its size + * Fix scipy tests + * Deterministically tokenize datetime.date + * Add sample_rows to read_csv-like + * Fix typo in config.deserialize docstring + * Remove warning filter in test_dataframe_picklable + * Improvements to histogramdd + * Make PY_VERSION private +- Release 2021.06.2 + * layers.py compare parts_out with set(self.parts_out) + * Make check_meta understand pandas dtypes better + * Remove "Educational Resources" doc page + * - Release 2021.06.1 + * Replace funding page with 'Supported By' section on dask.org + * Add initial deprecation utilities + * Enforce dtype conservation in ufuncs that explicitly use dtype= + * Add Coiled to list of paid support organizations + * Small tweaks to the HTML repr for Layer & HighLevelGraph + * Add dark mode support to HLG HTML repr + * Remove compatibility entries for old distributed + * Implementation of HTML repr for HighLevelGraph layers + * Update default blockwise token to avoid DataFrame column name + clash + * Use dispatch concat for merge_asof + * Fix upstream freq tests + * Use more context managers from the standard library + * Simplify skips in parquet tests + * Remove check for outdated bokeh + * More test coverage uploads + * Remove ImportError catching from dask/__init__.py + * Allow DataFrame.join() to take a list of DataFrames to merge + with + * Fix maximum recursion depth exception in dask.array.linspace + * Fix docs links + * Initial da.select() implementation and test + * Layers must implement get_output_keys method + * Don't include or expect freq in divisions + * A HighLevelGraph abstract layer for map_overlap + * Always include kwarg name in drop + * Only rechunk for median if needed + * Add add_(prefix|suffix) to DataFrame and Series + * Move read_hdf to Blockwise + * Make Layer.get_output_keys officially an abstract method + * Non-dask-arrays and broadcasting in ravel_multi_index + * Fix for paths ending with "/" in parquet overwrite + * Fixing calling .visualize() with filename=None + * Generate unique names for SubgraphCallable + * Pin fsspec to 2021.5.0 in CI + * Evaluate graph lazily if meta is provided in from_delayed + * Add meta support for DatetimeTZDtype + * Add dispatch label to automatic PR labeler + * Fix HDFS tests +- Release 2021.06.0 + * Remove abstract tokens from graph keys in rewrite_blockwise + * Ensure correct column order in csv project_columns + * Renamed inner loop variables to avoid duplication + * Do not return delayed object from to_zarr + * Array: correct number of outputs in apply_gufunc + * Rewrite da.fromfunction with da.blockwise + * Rename make_meta_util to make_meta + * Repartition before shuffle if the requested partitions are + less than input partitions + * Blockwise: handle constant key inputs + * Added raise to apply_gufunc + * Show failing tests summary in CI + * sizeof sets in Python 3.9 + * Warn if using pandas datetimelike string in + dataframe.__getitem__ + * Highlight the client.dashboard_link + * Easier link for subscribing to the Google calendar + * Automatically show graph visualization in Jupyter notebooks + * Add autofunction for unify_chunks in API docs +- Release 2021.05.1 + * Pandas compatibility + * Fix optimize_dataframe_getitem bug + * Update make_meta import in docs + * Implement da.searchsorted + * Fix format string in error message + * Fix read_sql_table returning wrong result for single column + loads + * Add slack join link in support.rst + * Remove unused alphabet variable + * Fix meta creation incase of object + * Add dispatch for union_categoricals + * Consolidate array Dispatch objects + * Move DataFrame dispatch.registers to their own file + * Fix delayed with dataclasses where init=False + * Allow a column to be named divisions + * Stack nd array with unknown chunks + * Promote the 2021 Dask User Survey + * Fix typo in DataFrame.set_index() + * Cleanup array API reference links + * Accept axis tuple for flip to be consistent with NumPy + * Bump pre-commit hook versions + * Cleanup to_zarr docstring + * Fix the docstring of read_orc + * Doc ipyparallel & mpi4py concurrent.futures + * Update tests to support CuPy 9 + * Fix some HighLevelGraph documentation inaccuracies + * Fix spelling in Series getitem error message + +------------------------------------------------------------------- +Tue May 18 10:12:19 UTC 2021 - Ben Greiner + +- update to version 2021.5.0 + * Remove deprecated kind kwarg to comply with pandas 1.3.0 + (GH#7653) Julia Signell + * Fix bug in DataFrame column projection (GH#7645) Richard (Rick) + Zamora + * Merge global annotations when packing (GH#7565) Mads R. B. + Kristensen + * Avoid inplace= in pandas set_categories (GH#7633) James + Bourbeau + * Change the active-fusion default to False for Dask-Dataframe + (GH#7620) Richard (Rick) Zamora + * Array: remove extraneous code from RandomState (GH#7487) Gabe + Joseph + * Implement str.concat when others=None (GH#7623) Daniel + Mesejo-León + * Fix dask.dataframe in sandboxed environments (GH#7601) Noah D. + Brenowitz + * Support for cupyx.scipy.linalg (GH#7563) Benjamin Zaitlen + * Move timeseries and daily-stock to Blockwise (GH#7615) Richard + (Rick) Zamora + * Fix bugs in broadcast join (GH#7617) Richard (Rick) Zamora + * Use Blockwise for DataFrame IO (parquet, csv, and orc) + (GH#7415) Richard (Rick) Zamora + * Adding chunk & type information to Dask HighLevelGraph s + (GH#7309) Genevieve Buckley + * Add pyarrow sphinx intersphinx_mapping (GH#7612) Ray Bell + * Remove skip on test freq (GH#7608) Julia Signell + * Defaults in read_parquet parameters (GH#7567) Ray Bell + * Remove ignore_abc_warning (GH#7606) Julia Signell + * Harden DataFrame merge between column-selection and index + (GH#7575) Richard (Rick) Zamora + * Get rid of ignore_abc decorator (GH#7604) Julia Signell + * Remove kwarg validation for bokeh (GH#7597) Julia Signell + * Add loky example (GH#7590) Naty Clementi + * Delayed: nout when arguments become tasks (GH#7593) Gabe Joseph + * Update distributed version in mindep CI build (GH#7602) James + Bourbeau + * Support all or no overlap between partition columns and real + columns (GH#7541) Richard (Rick) Zamora +- Stress that python-distributed, if used, has to have a matching + version number. Always update at the same time. + +------------------------------------------------------------------- +Mon May 3 01:34:23 UTC 2021 - Arun Persaud + +- update to version 2021.4.1: + * Handle Blockwise HLG pack/unpack for concatenate=True (:pr:`7455`) + Richard (Rick) Zamora + * map_partitions: use tokenized info as name of the SubgraphCallable + (:pr:`7524`) Mads R. B. Kristensen + * Using tmp_path and tmpdir to avoid temporary files and directories + hanging in the repo (:pr:`7592`) Naty Clementi + * Contributing to docs (development guide) (:pr:`7591`) Naty + Clementi + * Add more packages to Python 3.9 CI build (:pr:`7588`) James + Bourbeau + * Array: Fix NEP-18 dispatching in finalize (:pr:`7508`) Gabe Joseph + * Misc fixes for numpydoc (:pr:`7569`) Matthias Bussonnier + * Avoid pandas level= keyword deprecation (:pr:`7577`) James + Bourbeau + * Map e.g. .repartition(freq="M") to .repartition(freq="MS") + (:pr:`7504`) Ruben van de Geer + * Remove hash seeding in parallel CI runs (:pr:`7128`) Elliott Sales + de Andrade + * Add defaults in parameters in to_parquet (:pr:`7564`) Ray Bell + * Simplify transpose axes cleanup (:pr:`7561`) Julia Signell + * Make ValueError in len(index_names) > 1 explicit it's using + fastparquet (:pr:`7556`) Ray Bell + * Fix dict-column appending for pyarrow parquet engines (:pr:`7527`) + Richard (Rick) Zamora + * Add a documentation auto label (:pr:`7560`) Doug Davis + * Add dask.delayed.Delayed to docs so it can be referenced by other + sphinx docs (:pr:`7559`) Doug Davis + * Fix upstream idxmaxmin for uneven split_every (:pr:`7538`) Julia + Signell + * Make normalize_token for pandas Series/DataFrame future proof (no + direct block access) (:pr:`7318`) Joris Van den Bossche + * Redesigned __setitem__ implementation (:pr:`7393`) David Hassell + * histogram, histogramdd improvements (docs; return consistencies) + (:pr:`7520`) Doug Davis + * Force nightly pyarrow in the upstream build (:pr:`7530`) Joris Van + den Bossche + * Fix Configuration Reference (:pr:`7533`) Benjamin Zaitlen + * Use .to_parquet on dask.dataframe in doc string (:pr:`7528`) Ray + Bell + * Avoid double msgpack serialization of HLGs (:pr:`7525`) Mads + R. B. Kristensen + * Encourage usage of yaml.safe_load() in configuration doc + (:pr:`7529`) Hristo Georgiev + * Fix reshape bug. Add relevant test. Fixes #7171. (:pr:`7523`) + JSKenyon + * Support custom_metadata= argument in to_parquet (:pr:`7359`) + Richard (Rick) Zamora + * Clean some documentation warnings (:pr:`7518`) Daniel Mesejo-León + * Getting rid of more docs warnings (:pr:`7426`) Julia Signell + * Added product (alias of prod) (:pr:`7517`) Freyam Mehta + * Fix upstream __array_ufunc__ tests (:pr:`7494`) Julia Signell + * Escape from map_overlap to map_blocks if depth is zero + (:pr:`7481`) Genevieve Buckley + * Add check_type to array assert_eq (:pr:`7491`) Julia Signell + +------------------------------------------------------------------- +Fri Apr 9 13:47:13 UTC 2021 - Benjamin Greiner + +- Reenable 32bit tests after distributed is not cythonized anymore + gh#dask/dask#7489 + +------------------------------------------------------------------- +Sun Apr 4 16:38:31 UTC 2021 - Arun Persaud + +- update to version 2021.4.0: + * Adding support for multidimensional histograms with + dask.array.histogramdd (:pr:`7387`) Doug Davis + * Update docs on number of threads and workers in default + LocalCluster (:pr:`7497`) cameron16 + * Add labels automatically when certain files are touched in a PR + (:pr:`7506`) Julia Signell + * Extract ignore_order from kwargs (:pr:`7500`) GALI PREM SAGAR + * Only provide installation instructions when distributed is missing + (:pr:`7498`) Matthew Rocklin + * Start adding isort (:pr:`7370`) Julia Signell + * Add ignore_order parameter in dd.concat (:pr:`7473`) Daniel + Mesejo-León + * Use powers-of-two when displaying RAM (:pr:`7484`) Guido Imperiale + * Added License Classifier (:pr:`7485`) Tom Augspurger + * Replace conda with mamba (:pr:`7227`) Guido Imperiale + * Fix typo in array docs (:pr:`7478`) James Lamb + * Use concurrent.futures in local scheduler (:pr:`6322`) John A + Kirkham + +------------------------------------------------------------------- +Tue Mar 30 21:47:53 UTC 2021 - Ben Greiner + +- Update to 2021.3.1 + * Add a dispatch for is_categorical_dtype to handle non-pandas + objects (GH#7469) brandon-b-miller + * Use multiprocessing.Pool in test_read_text (GH#7472) John A + Kirkham + * Add missing meta kwarg to gufunc class (GH#7423) Peter Andreas + Entschev + * Example for memory-mapped Dask array (GH#7380) Dieter Weber + * Fix NumPy upstream failures xfail pandas and fastparquet + failures (GH#7441) Julia Signell + * Fix bug in repartition with freq (GH#7357) Ruben van de Geer + * Fix __array_function__ dispatching for tril/triu (GH#7457) + Peter Andreas Entschev + * Use concurrent.futures.Executors in a few tests (GH#7429) John + A Kirkham + * Require NumPy >=1.16 (GH#7383) Guido Imperiale + * Minor sort_values housekeeping (GH#7462) Ryan Williams + * Ensure natural sort order in parquet part paths (GH#7249) Ryan + Williams + * Remove global env mutation upon running test_config.py + (GH#7464) Hristo + * Update NumPy intersphinx URL (GH#7460) Gabe Joseph + * Add rot90 (GH#7440) Trevor Manz + * Update docs for required package for endpoint (GH#7454) Nick + Vazquez + * Master -> main in slice_array docstring (GH#7453) Gabe Joseph + * Expand dask.utils.is_arraylike docstring (GH#7445) Doug Davis + * Simplify BlockwiseIODeps importing (GH#7420) Richard (Rick) + Zamora + * Update layer annotation packing method (GH#7430) James Bourbeau + * Drop duplicate test in test_describe_empty (GH#7431) John A + Kirkham + * Add Series.dot method to dataframe module (GH#7236) Madhu94 + * Added df kurtosis-method and testing (GH#7273) Jan Borchmann + * Avoid quadratic-time performance for HLG culling (GH#7403) + Bruce Merry + * Temporarily skip problematic sparse test (GH#7421) James + Bourbeau + * Update some CI workflow names (GH#7422) James Bourbeau + * Fix HDFS test (GH#7418) Julia Signell + * Make changelog subtitles match the hierarchy (GH#7419) Julia + Signell + * Add support for normalize in value_counts (GH#7342) Julia + Signell + * Avoid unnecessary imports for HLG Layer unpacking and + materialization (GH#7381) Richard (Rick) Zamora + * Bincount fix slicing (GH#7391) Genevieve Buckley + * Add sliding_window_view (GH#7234) Deepak Cherian + * Fix typo in docs/source/develop.rst (GH#7414) Hristo + * Switch documentation builds for PRs to readthedocs (GH#7397) + James Bourbeau + * Adds sort_values to dask.DataFrame (GH#7286) gerrymanoim + * Pin sqlalchemy<1.4.0 in CI (GH#7405) James Bourbeau + * Comment fixes (GH#7215) Ryan Williams + * Dead code removal / fixes (GH#7388) Ryan Williams + * Use single thread for pa.Table.from_pandas calls (GH#7347) + Richard (Rick) Zamora + * Replace 'container' with 'image' (GH#7389) James Lamb + * DOC hyperlink repartition (GH#7394) Ray Bell + * Pass delimiter to fsspec in bag.read_text (GH#7349) Martin + Durant + * Update read_hdf default mode to "r" (GH#7039) rs9w33 + * Embed literals in SubgraphCallable when packing Blockwise + (GH#7353) Mads R. B. Kristensen + * Update test_hdf.py to not reuse file handlers (GH#7044) rs9w33 + * Require additional dependencies: cloudpickle, partd, fsspec, + toolz (GH#7345) Julia Signell + * Prepare Blockwise + IO infrastructure (GH#7281) Richard (Rick) + Zamora + * Remove duplicated imports from test_slicing.py (GH#7365) Hristo + * Add test deps for pip development (GH#7360) Julia Signell + * Support int slicing for non-NumPy arrays (GH#7364) Peter + Andreas Entschev + * Automatically cancel previous CI builds (GH#7348) James + Bourbeau + * dask.array.asarray should handle case where xarray class is in + top-level namespace (GH#7335) Tom White + * HighLevelGraph length without materializing layers (GH#7274) + Gabe Joseph + * Drop support for Python 3.6 (GH#7006) James Bourbeau + * Fix fsspec usage in create_metadata_file (GH#7295) Richard + (Rick) Zamora + * Change default branch from master to main (GH#7198) Julia + Signell + * Add Xarray to CI software environment (GH#7338) James Bourbeau + * Update repartition argument name in error text (GH#7336) Eoin + Shanaghy + * Run upstream tests based on commit message (GH#7329) James + Bourbeau + * Use pytest.register_assert_rewrite on util modules (GH#7278) + Bruce Merry + * Add example on using specific chunk sizes in from_array() + (GH#7330) James Lamb + * Move NumPy skip into test (GH#7247) Julia Signell +- Update package descriptions +- Add dask-delayed and dask-diagnostics packages +- Drop dask-multiprocessing package merged into main +- Skip python36: upstream dropped support for Python < 3.7 +- Drop dask-pr7247-numpyskip.patch merged upstream +- Test more optional requirements for better compatibility + assurance. + +------------------------------------------------------------------- +Sun Mar 7 16:40:26 UTC 2021 - Ben Greiner + +- Update to 2021.3.0 + * This is the first release with support for Python 3.9 and the + last release with support for Python 3.6 + * Bump minimum version of distributed (GH#7328) James Bourbeau + * Fix percentiles_summary with dask_cudf (GH#7325) Peter Andreas + Entschev + * Temporarily revert recent Array.__setitem__ updates (GH#7326) + James Bourbeau + * Blockwise.clone (GH#7312) Guido Imperiale + * NEP-35 duck array update (GH#7321) James Bourbeau + * Don’t allow setting .name for array (GH#7222) Julia Signell + * Use nearest interpolation for creating percentiles of integer + input (GH#7305) Kyle Barron + * Test exp with CuPy arrays (GH#7322) John A Kirkham + * Check that computed chunks have right size and dtype (GH#7277) + Bruce Merry + * pytest.mark.flaky (GH#7319) Guido Imperiale + * Contributing docs: add note to pull the latest git tags before + pip installing Dask (GH#7308) Genevieve Buckley + * Support for Python 3.9 (GH#7289) Guido Imperiale + * Add broadcast-based merge implementation (GH#7143) Richard + (Rick) Zamora + * Add split_every to graph_manipulation (GH#7282) Guido Imperiale + * Typo in optimize docs (GH#7306) Julius Busecke + * dask.graph_manipulation support for xarray.Dataset (GH#7276) + Guido Imperiale + * Add plot width and height support for Bokeh 2.3.0 (GH#7297) + James Bourbeau + * Add NumPy functions tri, triu_indices, triu_indices_from, + tril_indices, tril_indices_from (GH#6997) Illviljan + * Remove “cleanup” task in DataFrame on-disk shuffle (GH#7260) + Sinclair Target + * Use development version of distributed in CI (GH#7279) James + Bourbeau + * Moving high level graph pack/unpack Dask (GH#7179) Mads R. B. + Kristensen + * Improve performance of merge_percentiles (GH#7172) Ashwin + Srinath + * DOC: add dask-sql and fugue (GH#7129) Ray Bell + * Example for working with categoricals and parquet (GH#7085) + McToel + * Adds tree reduction to bincount (GH#7183) Thomas J. Fan + * Improve documentation of name in from_array (GH#7264) Bruce + Merry + * Fix cumsum for empty partitions (GH#7230) Julia Signell + * Add map_blocks example to dask array creation docs (GH#7221) + Julia Signell + * Fix performance issue in dask.graph_manipulation.wait_on() + (GH#7258) Guido Imperiale + * Replace coveralls with codecov.io (GH#7246) Guido Imperiale + * Pin to a particular black rev in pre-commit (GH#7256) Julia + Signell + * Minor typo in documentation: array-chunks.rst (GH#7254) Magnus + Nord + * Fix bugs in Blockwise and ShuffleLayer (GH#7213) Richard + (Rick) Zamora + * Fix parquet filtering bug for "pyarrow-dataset" with + pyarrow-3.0.0 (GH#7200) Richard (Rick) Zamora + * graph_manipulation without NumPy (GH#7243) Guido Imperiale + * Support for NEP-35 (GH#6738) Peter Andreas Entschev + * Avoid running unit tests during doctest CI build (GH#7240) + James Bourbeau + * Run doctests on CI (GH#7238) Julia Signell + * Cleanup code quality on set arithmetics (GH#7196) Guido + Imperiale + * Add dask.array.delete (GH#7125) Julia Signell + * Unpin graphviz now that new conda-forge recipe is built + (GH#7235) Julia Signell + * Don’t use NumPy 1.20 from conda-forge on Mac (GH#7211) Guido + Imperiale + * map_overlap: Don’t rechunk axes without overlap (GH#7233) + Deepak Cherian + * Pin graphviz to avoid issue with latest conda-forge build + (GH#7232) Julia Signell + * Use html_css_files in docs for custom CSS (GH#7220) James + Bourbeau + * Graph manipulation: clone, bind, checkpoint, wait_on (GH#7109) + Guido Imperiale + * Fix handling of filter expressions in parquet pyarrow-dataset + engine (GH#7186) Joris Van den Bossche + * Extend __setitem__ to more closely match numpy (GH#7033) David + Hassell + * Clean up Python 2 syntax (GH#7195) Guido Imperiale + * Fix regression in Delayed._length (GH#7194) Guido Imperiale + * __dask_layers__() tests and tweaks (GH#7177) Guido Imperiale + * Properly convert HighLevelGraph in multiprocessing scheduler + (GH#7191) Jim Crist-Harif + * Don’t fail fast in CI (GH#7188) James Bourbeau +- Add dask-pr7247-numpyskip.patch -- gh#dask/dask#7247 + +------------------------------------------------------------------- +Wed Feb 17 21:51:48 UTC 2021 - Ben Greiner + +- Run the full test suite: use rootdir conftest.py + * importable optional dependencies are skipped automatically + * can use network marker to skip network tests +- Don't package and test -dataframe and -array for python36 flavor, + because python36-numpy and depending packages were dropped from + Tumbleweed with version 1.20. +- Skip more distributed tests occasionally failing + +------------------------------------------------------------------- +Mon Feb 8 14:24:58 UTC 2021 - Ben Greiner + +- Update to version 2020.2.0 + * Add percentile support for NEP-35 (GH#7162) Peter Andreas + Entschev + * Added support for Float64 in column assignment (GH#7173) Nils + Braun + * Coarsen rechunking error (GH#7127) Davis Bennett + * Fix upstream CI tests (GH#6896) Julia Signell + * Revise HighLevelGraph Mapping API (GH#7160) Guido Imperiale + * Update low-level graph spec to use any hashable for keys + (GH#7163) James Bourbeau + * Generically rebuild a collection with different keys (GH#7142) + Guido Imperiale + * Make easier to link issues in PRs (GH#7130) Ray Bell + * Add dask.array.append (GH#7146) D-Stacks + * Allow dask.array.ravel to accept array_like argument (GH#7138) + D-Stacks + * Fixes link in array design doc (GH#7152) Thomas J. Fan + * Fix example of using blockwise for an outer product (GH#7119) + Bruce Merry + * Deprecate HighlevelGraph.dicts in favor of .layers (GH#7145) + Amit Kumar + * Align FastParquetEngine with pyarrow engines (GH#7091) Richard + (Rick) Zamora + * Merge annotations (GH#7102) Ian Rose + * Simplify contents of parts list in read_parquet (GH#7066) + Richard (Rick) Zamora + * check_meta(): use __class__ when checking DataFrame types + (GH#7099) Mads R. B. Kristensen + * Cache several properties (GH#7104) Illviljan + * Fix parquet getitem optimization (GH#7106) Richard (Rick) + Zamora + * Add cytoolz back to CI environment (GH#7103) James Bourbeau + +------------------------------------------------------------------- +Thu Jan 28 12:25:51 UTC 2021 - Ben Greiner + +- Update to version 2020.1.1 + Partially fix cumprod (GH#7089) Julia Signell + * Test pandas 1.1.x / 1.2.0 releases and pandas nightly + (GH#6996) Joris Van den Bossche + * Use assign to avoid SettingWithCopyWarning (GH#7092) Julia + Signell + * 'mode' argument passed to bokeh.output_file() (GH#7034) + (GH#7075) patquem + * Skip empty partitions when doing groupby.value_counts + (GH#7073) Julia Signell + * Add error messages to assert_eq() (GH#7083) James Lamb + * Make cached properties read-only (GH#7077) Illviljan +- Changelog for 2021.01.0 + * map_partitions with review comments (GH#6776) Kumar Bharath + Prabhu + * Make sure that population is a real list (GH#7027) Julia Signell + * Propagate storage_options in read_csv (GH#7074) Richard (Rick) + Zamora + * Remove all BlockwiseIO code (GH#7067) Richard (Rick) Zamora + * Fix CI (GH#7069) James Bourbeau + * Add option to control rechunking in reshape (GH#6753) Tom + Augspurger + * Fix linalg.lstsq for complex inputs (GH#7056) Johnnie Gray + * Add compression='infer' default to read_csv (GH#6960) Richard + (Rick) Zamora + * Revert parameter changes in svd_compressed #7003 (GH#7004) Eric + Czech + * Skip failing s3 test (GH#7064) Martin Durant + * Revert BlockwiseIO (GH#7048) Richard (Rick) Zamora + * Add some cross-references to DataFrame.to_bag() and Series. + to_bag() (GH#7049) Rob Malouf + * Rewrite matmul as blockwise without contraction/concatenate + (GH#7000) Rafal Wojdyla + * Use functools.cached_property in da.shape (GH#7023) Illviljan + * Use meta value in series non_empty (GH#6976) Julia Signell + * Revert “Temporarly pin sphinx version to 3.3.1 (GH#7002)” + (GH#7014) Rafal Wojdyla + * Revert python-graphviz pinning (GH#7037) Julia Signell + * Accidentally committed print statement (GH#7038) Julia Signell + * Pass dropna and observed in agg (GH#6992) Julia Signell + * Add index to meta after .str.split with expand (GH#7026) Ruben + van de Geer + * CI: test pyarrow 2.0 and nightly (GH#7030) Joris Van den Bossche + * Temporarily pin python-graphviz in CI (GH#7031) James Bourbeau + * Underline section in numpydoc (GH#7013) Matthias Bussonnier + * Keep normal optimizations when adding custom optimizations + (GH#7016) Matthew Rocklin + * Temporarily pin sphinx version to 3.3.1 (GH#7002) Rafal Wojdyla + * DOC: Misc formatting (GH#6998) Matthias Bussonnier + * Add inline_array option to from_array (GH#6773) Tom Augspurger + * Revert “Initial pass at blockwise array creation routines + (GH#6931)” (:pr:`6995) James Bourbeau + * Set npartitions in set_index (GH#6978) Julia Signell + * Upstream config serialization and inheritance (GH#6987) Jacob + Tomlinson + * Bump the minimum time in test_minimum_time (GH#6988) Martin + Durant + * Fix pandas dtype inference for read_parquet (GH#6985) Richard + (Rick) Zamora + * Avoid data loss in set_index with sorted=True (GH#6980) Richard + (Rick) Zamora + * Bugfix in read_parquet for handling un-named indices with + index=False (GH#6969) Richard (Rick) Zamora + * Use __class__ when comparing meta data (GH#6981) Mads R. B. + Kristensen + * Comparing string versions won’t always work (GH#6979) Rafal + Wojdyla + * Fix GH#6925 (GH#6982) sdementen + * Initial pass at blockwise array creation routines (GH#6931) Ian + Rose + * Simplify has_parallel_type() (GH#6927) Mads R. B. Kristensen + * Handle annotation unpacking in BlockwiseIO (GH#6934) Simon + Perkins + * Avoid deprecated yield_fixture in test_sql.py (GH#6968) Richard + (Rick) Zamora + * Remove bad graph logic in BlockwiseIO (GH#6933) Richard (Rick) + Zamora + * Get config item if variable is None (GH#6862) Jacob Tomlinson + * Update from_pandas docstring (GH#6957) Richard (Rick) Zamora + * Prevent fuse_roots from clobbering annotations (GH#6955) Simon + Perkins + +------------------------------------------------------------------- +Wed Jan 13 14:01:09 UTC 2021 - Benjamin Greiner + +- update to version 2020.12.0 + * Switched to CalVer for versioning scheme. + * Introduced new APIs for HighLevelGraph to enable sending + high-level representations of task graphs to the + distributed scheduler. + * Introduced new HighLevelGraph layer objects including + BasicLayer, Blockwise, BlockwiseIO, ShuffleLayer, and + more. + * Added support for applying custom Layer-level annotations + like priority, retries, etc. with the dask.annotations + context manager. + * Updated minimum supported version of pandas to 0.25.0 + and NumPy to 1.15.1. + * Support for the pyarrow.dataset API to read_parquet. + * Several fixes to Dask Array’s SVD. +- For a full list of changes see + https://docs.dask.org/en/latest/changelog.html +- Clean requirements +- Fix incorrect usage of python3_only macro +- Test with pytest-xdist in order to avoid hang after test + +------------------------------------------------------------------- +Sat Oct 10 19:03:48 UTC 2020 - Arun Persaud + +- update to version 2.30.0: + * Allow rechunk to evenly split into N chunks (:pr:`6420`) Scott + Sievert + +------------------------------------------------------------------- +Mon Oct 5 20:14:32 UTC 2020 - Arun Persaud + +- update to version 2.29.0: + * Array + + _repr_html_: color sides darker instead of drawing all the lines + (:pr:`6683`) Julia Signell + + Removes warning from nanstd and nanvar (:pr:`6667`) Thomas J Fan + + Get shape of output from original array - map_overlap + (:pr:`6682`) Julia Signell + + Replace np.searchsorted with bisect in indexing (:pr:`6669`) + Joachim B Haga + * Bag + + Make sure subprocesses have a consistent hash for bag groupby + (:pr:`6660`) Itamar Turner-Trauring + * Core + + Revert "Use HighLevelGraph layers everywhere in collections + (:pr:`6510`)" (:pr:`6697`) Tom Augspurger + + Use pandas.testing (:pr:`6687`) John A Kirkham + + Improve 128-bit floating-point skip in tests (:pr:`6676`) + Elliott Sales de Andrade + * DataFrame + + Allow setting dataframe items using a bool dataframe + (:pr:`6608`) Julia Signell + * Documentation + + Fix typo (:pr:`6692`) garanews + + Fix a few typos (:pr:`6678`) Pav A + +- changes from version 2.28.0: + * Array + + Partially reverted changes to Array indexing that produces large + changes. This restores the behavior from Dask 2.25.0 and + earlier, with a warning when large chunks are produced. A + configuration option is provided to avoid creating the large + chunks, see :ref:`array.slicing.efficiency`. (:pr:`6665`) Tom + Augspurger + + Add meta to to_dask_array (:pr:`6651`) Kyle Nicholson + + Fix :pr:`6631` and :pr:`6611` (:pr:`6632`) Rafal Wojdyla + + Infer object in array reductions (:pr:`6629`) Daniel Saxton + + Adding v_based flag for svd_flip (:pr:`6658`) Eric Czech + + Fix flakey array mean (:pr:`6656`) Sam Grayson + * Core + + Removed dsk equality check from SubgraphCallable.__eq__ + (:pr:`6666`) Mads R. B. Kristensen + + Use HighLevelGraph layers everywhere in collections (:pr:`6510`) + Mads R. B. Kristensen + + Adds hash dunder method to SubgraphCallable for caching purposes + (:pr:`6424`) Andrew Fulton + + Stop writing commented out config files by default (:pr:`6647`) + Matthew Rocklin + * DataFrame + + Add support for collect list aggregation via agg API + (:pr:`6655`) Madhur Tandon + + Slightly better error message (:pr:`6657`) Julia Signell + +------------------------------------------------------------------- +Sat Sep 19 15:07:55 UTC 2020 - Arun Persaud + +- update to version 2.27.0: + * Array + + Preserve dtype in svd (:pr:`6643`) Eric Czech + * Core + + store(): create a single HLG layer (:pr:`6601`) Mads + R. B. Kristensen + + Add pre-commit CI build (:pr:`6645`) James Bourbeau + + Update .pre-commit-config to latest black. (:pr:`6641`) Julia + Signell + + Update super usage to remove Python 2 compatibility (:pr:`6630`) + Poruri Sai Rahul + + Remove u string prefixes (:pr:`6633`) Poruri Sai Rahul + * DataFrame + + Improve error message for to_sql (:pr:`6638`) Julia Signell + + Use empty list as categories (:pr:`6626`) Julia Signell + * Documentation + + Add autofunction to array api docs for more ufuncs (:pr:`6644`) + James Bourbeau + + Add a number of missing ufuncs to dask.array docs (:pr:`6642`) + Ralf Gommers + + Add HelmCluster docs (:pr:`6290`) Jacob Tomlinson + +------------------------------------------------------------------- +Sat Sep 12 19:57:21 UTC 2020 - Arun Persaud + +- specfile: + * added python-mimesis and python-zarr to be able to run more tests + +- update to version 2.26.0: + * Array + + Backend-aware dtype inference for single-chunk svd (:pr:`6623`) + Eric Czech + + Make array.reduction docstring match for dtype (:pr:`6624`) + Martin Durant + + Set lower bound on compression level for svd_compressed using + rows and cols (:pr:`6622`) Eric Czech + + Improve SVD consistency and small array handling (:pr:`6616`) + Eric Czech + + Add svd_flip #6599 (:pr:`6613`) Eric Czech + + Handle sequences containing dask Arrays (:pr:`6595`) Gabe Joseph + + Avoid large chunks from getitem with lists (:pr:`6514`) Tom + Augspurger + + Eagerly slice numpy arrays in from_array (:pr:`6605`) Deepak + Cherian + + Restore ability to pickle dask arrays (:pr:`6594`) Noah D + Brenowitz + + Add SVD support for short-and-fat arrays (:pr:`6591`) Eric Czech + + Add simple chunk type registry and defer as appropriate to + upcast types (:pr:`6393`) Jon Thielen + + Align coarsen chunks by default (:pr:`6580`) Deepak Cherian + + Fixup reshape on unknown dimensions and other testing fixes + (:pr:`6578`) Ryan Williams + * Core + + Add validation and fixes for HighLevelGraph dependencies + (:pr:`6588`) Mads R. B. Kristensen + + Fix linting issue (:pr:`6598`) Tom Augspurger + + Skip bokeh version 2.0.0 (:pr:`6572`) John A Kirkham + * DataFrame + + Added bytes/row calculation when using meta (:pr:`6585`) McToel + + Handle min_count in Series.sum / prod (:pr:`6618`) Daniel Saxton + + Update DataFrame.set_index docstring (:pr:`6549`) Timost + + Always compute 0 and 1 quantiles during quantile calculations + (:pr:`6564`) Erik Welch + + Fix wrong path when reading empty csv file (:pr:`6573`) + Abdulelah Bin Mahfoodh + * Documentation + + Doc: Troubleshooting dashboard 404 (:pr:`6215`) Kilian Lieret + + Fixup extraConfig example (:pr:`6625`) Tom Augspurger + + Update supported Python versions (:pr:`6609`) Julia Signell + + Document dask/daskhub helm chart (:pr:`6560`) Tom Augspurger + +------------------------------------------------------------------- +Sat Aug 29 15:51:43 UTC 2020 - Arun Persaud + +- update to version 2.25.0: + * Core + + Compare key hashes in subs() (:pr:`6559`) Mads R. B. Kristensen + + Rerun with latest black release (:pr:`6568`) James Bourbeau + + License update (:pr:`6554`) Tom Augspurger + * DataFrame + + Add gs read_parquet example (:pr:`6548`) Ray Bell + * Documentation + + Remove version from documentation page names (:pr:`6558`) James + Bourbeau + + Update kubernetes-helm.rst (:pr:`6523`) David Sheldon + + Stop 2020 survey (:pr:`6547`) Tom Augspurger + +- changes from version 2.24.0: + * Array + + Fix setting random seed in tests. (:pr:`6518`) Elliott Sales de + Andrade + + Support meta in apply gufunc (:pr:`6521`) joshreback + + Replace cupy.sparse with cupyx.scipy.sparse (:pr:`6530`) John A + Kirkham + * Dataframe + + Bump up tolerance for rolling tests (:pr:`6502`) Julia Signell + + Implement DatFrame.__len__ (:pr:`6515`) Tom Augspurger + + Infer arrow schema in to_parquet (for ArrowEngine`) (:pr:`6490`) + `Richard Zamora`_ + + Fix parquet test when no pyarrow (:pr:`6524`) Martin Durant + + Remove problematic filter arguments in ArrowEngine (:pr:`6527`) + `Richard Zamora`_ + + Avoid schema validation by default in ArrowEngine (:pr:`6536`) + `Richard Zamora`_ + * Core + + Use unpack_collections in make_blockwise_graph (:pr:`6517`) + `Thomas Fan`_ + + Move key_split() from optimization.py to utils.py (:pr:`6529`) + Mads R. B. Kristensen + + Make tests run on moto server (:pr:`6528`) Martin Durant + +------------------------------------------------------------------- +Sat Aug 15 16:59:24 UTC 2020 - Arun Persaud + +- update to version 2.23.0: + * Array + + Reduce np.zeros, ones, and full array size with broadcasting + (:pr:`6491`) Matthias Bussonnier + + Add missing meta= for trim in map_overlap (:pr:`6494`) Peter + Andreas Entschev + * Bag + + Bag repartition partition size (:pr:`6371`) joshreback + * Core + + Scalar.__dask_layers__() to return self._name instead of + self.key (:pr:`6507`) Mads R. B. Kristensen + + Update dependencies correctly in fuse_root optimization + (:pr:`6508`) Mads R. B. Kristensen + * DataFrame + + Adds items to dataframe (:pr:`6503`) Thomas J Fan + + Include compression in write_table call (:pr:`6499`) Julia + Signell + + Fixed warning in nonempty_series (:pr:`6485`) Tom Augspurger + + Intelligently determine partitions based on type of first arg + (:pr:`6479`) Matthew Rocklin + + Fix pyarrow mkdirs (:pr:`6475`) Julia Signell + + Fix duplicate parquet output in to_parquet (:pr:`6451`) + michaelnarodovitch + * Documentation + + Fix documentation da.histogram (:pr:`6439`) Roberto Panai + + Add agg nunique example (:pr:`6404`) Ray Bell + + Fixed a few typos in the SQL docs (:pr:`6489`) Mike McCarty + + Docs for SQLing (:pr:`6453`) Martin Durant + +------------------------------------------------------------------- +Sat Aug 1 22:09:59 UTC 2020 - Arun Persaud + +- update to version 2.22.0: + * Array + + Compatibility for NumPy dtype deprecation (:pr:`6430`) Tom + Augspurger + * Core + + Implement sizeof for some bytes-like objects (:pr:`6457`) John A + Kirkham + + HTTP error for new fsspec (:pr:`6446`) Martin Durant + + When RecursionError is raised, return uuid from tokenize + function (:pr:`6437`) Julia Signell + + Install deps of upstream-dev packages (:pr:`6431`) Tom + Augspurger + + Use updated link in setup.cfg (:pr:`6426`) Zhengnan + * DataFrame + + Add single quotes around column names if strings (:pr:`6471`) + Gil Forsyth + + Refactor ArrowEngine for better read_parquet performance + (:pr:`6346`) Richard (Rick) Zamora + + Add tolist dispatch (:pr:`6444`) GALI PREM SAGAR + + Compatibility with pandas 1.1.0rc0 (:pr:`6429`) Tom Augspurger + + Multi value pivot table (:pr:`6428`) joshreback + + Duplicate argument definitions in to_csv docstring (:pr:`6411`) + Jun Han (Johnson) Ooi + * Documentation + + Add utility to docs to convert YAML config to env vars and back + (:pr:`6472`) Jacob Tomlinson + + Fix parameter server rendering (:pr:`6466`) Scott Sievert + + Fixes broken links (:pr:`6403`) Jim Circadian + + Complete parameter server implementation in docs (:pr:`6449`) + Scott Sievert + + Fix typo (:pr:`6436`) Jack Xiaosong Xu + +------------------------------------------------------------------- +Sat Jul 18 18:12:13 UTC 2020 - Arun Persaud + +- update to version 2.21.0: + * Array + + Correct error message in array.routines.gradient() (:pr:`6417`) + johnomotani + + Fix blockwise concatenate for array with some dimension=1 + (:pr:`6342`) Matthias Bussonnier + * Bag + + Fix bag.take example (:pr:`6418`) Roberto Panai + * Core + + Groups values in optimization pass should only be graph and keys + -- not an optimization + keys (:pr:`6409`) Ben Zaitlen + + Call custom optimizations once, with kwargs provided + (:pr:`6382`) Clark Zinzow + + Include pickle5 for testing on Python 3.7 (:pr:`6379`) John A + Kirkham + * DataFrame + + Correct typo in error message (:pr:`6422`) Tom McTiernan + + Use pytest.warns to check for UserWarning (:pr:`6378`) Richard + (Rick) Zamora + + Parse bytes_per_chunk keyword from string (:pr:`6370`) Matthew + Rocklin + * Documentation + + Numpydoc formatting (:pr:`6421`) Matthias Bussonnier + + Unpin numpydoc following 1.1 release (:pr:`6407`) Gil Forsyth + + Numpydoc formatting (:pr:`6402`) Matthias Bussonnier + + Add instructions for using conda when installing code for + development (:pr:`6399`) Ray Bell + + Update visualize docstrings (:pr:`6383`) Zhengnan + +------------------------------------------------------------------- +Thu Jul 9 08:20:00 UTC 2020 - Marketa Calabkova + +- Update to version 2.20.0 + Array + - Register ``sizeof`` for numpy zero-strided arrays (:pr:`6343`) `Matthias Bussonnier`_ + - Use ``concatenate_lookup`` in ``concatenate`` (:pr:`6339`) `John A Kirkham`_ + - Fix rechunking of arrays with some zero-length dimensions (:pr:`6335`) `Matthias Bussonnier`_ + DataFrame + - Dispatch ``iloc``` calls to ``getitem`` (:pr:`6355`) `Gil Forsyth`_ + - Handle unnamed pandas ``RangeIndex`` in fastparquet engine (:pr:`6350`) `Richard (Rick) Zamora`_ + - Preserve index when writing partitioned parquet datasets with pyarrow (:pr:`6282`) `Richard (Rick) Zamora`_ + - Use ``ignore_index`` for pandas' ``group_split_dispatch`` (:pr:`6251`) `Richard (Rick) Zamora`_ + Documentation + - Add doc describing argument (:pr:`6318`) `asmith26`_ +- 2.19.0 + Array + - Cast chunk sizes to python int ``dtype`` (:pr:`6326`) `Gil Forsyth`_ + - Add ``shape=None`` to ``*_like()`` array creation functions (:pr:`6064`) `Anderson Banihirwe`_ + Core + - Update expected error msg for protocol difference in fsspec (:pr:`6331`) `Gil Forsyth`_ + - Fix for floats < 1 in ``parse_bytes`` (:pr:`6311`) `Gil Forsyth`_ + - Fix exception causes all over the codebase (:pr:`6308`) `Ram Rachum`_ + - Fix duplicated tests (:pr:`6303`) `James Lamb`_ + - Remove unused testing function (:pr:`6304`) `James Lamb`_ + DataFrame + - Add high-level CSV Subgraph (:pr:`6262`) `Gil Forsyth`_ + - Fix ``ValueError`` when merging an index-only 1-partition dataframe (:pr:`6309`) `Krishan Bhasin`_ + - Make ``index.map`` clear divisions. (:pr:`6285`) `Julia Signell`_ + Documentation + - Add link to 2020 survey (:pr:`6328`) `Tom Augspurger`_ + - Update ``bag.rst`` (:pr:`6317`) `Ben Shaver`_ +- 2.18.1 + Array + - Don't try to set name on ``full`` (:pr:`6299`) `Julia Signell`_ + - Histogram: support lazy values for range/bins (another way) (:pr:`6252`) `Gabe Joseph`_ + Core + - Fix exception causes in ``utils.py`` (:pr:`6302`) `Ram Rachum`_ + - Improve performance of ``HighLevelGraph`` construction (:pr:`6293`) `Julia Signell`_ + Documentation + - Now readthedocs builds unrelased features' docstrings (:pr:`6295`) `Antonio Ercole De Luca`_ + - Add ``asyncssh`` intersphinx mappings (:pr:`6298`) `Jacob Tomlinson`_ +- 2.18.0 + Array + - Cast slicing index to dask array if same shape as original (:pr:`6273`) `Julia Signell`_ + - Fix ``stack`` error message (:pr:`6268`) `Stephanie Gott`_ + - ``full`` & ``full_like``: error on non-scalar ``fill_value`` (:pr:`6129`) `Huite`_ + - Support for multiple arrays in ``map_overlap`` (:pr:`6165`) `Eric Czech`_ + - Pad resample divisions so that edges are counted (:pr:`6255`) `Julia Signell`_ + Bag + - Random sampling of k elements from a dask bag #4799 (:pr:`6239`) `Antonio Ercole De Luca`_ + DataFrame + - Add ``dropna``, ``sort``, and ``ascending`` to ``sort_values`` (:pr:`5880`) `Julia Signell`_ + - Generalize ``from_dask_array`` (:pr:`6263`) `GALI PREM SAGAR`_ + - Add derived docstring for ``SeriesGroupby.nunique`` (:pr:`6284`) `Julia Signell`_ + - Remove ``NotImplementedError`` in resample with rule (:pr:`6274`) `Abdulelah Bin Mahfoodh`_ + - Add ``dd.to_sql`` (:pr:`6038`) `Ryan Williams`_ + Documentation + - Update remote data section (:pr:`6258`) `Ray Bell`_ +- 2.17.2 + Core + - Re-add the ``complete`` extra (:pr:`6257`) `Jim Crist-Harif`_ + DataFrame + - Raise error if ``resample`` isn't going to give right answer (:pr:`6244`) `Julia Signell`_ +- 2.17.1 + Array + - Empty array rechunk (:pr:`6233`) `Andrew Fulton`_ + Core + - Make ``pyyaml`` required (:pr:`6250`) `Jim Crist-Harif`_ + - Fix install commands from ``ImportError`` (:pr:`6238`) `Gaurav Sheni`_ + - Remove issue template (:pr:`6249`) `Jacob Tomlinson`_ + DataFrame + - Pass ``ignore_index`` to ``dd_shuffle`` from ``DataFrame.shuffle`` (:pr:`6247`) `Richard (Rick) Zamora`_ + - Cope with missing HDF keys (:pr:`6204`) `Martin Durant`_ + - Generalize ``describe`` & ``quantile`` apis (:pr:`5137`) `GALI PREM SAGAR`_ +- 2.17.0 + Array + - Small improvements to ``da.pad`` (:pr:`6213`) `Mark Boer`_ + - Return ``tuple`` if multiple outputs in ``dask.array.apply_gufunc``, add test to check for tuple (:pr:`6207`) `Kai Mühlbauer`_ + - Support ``stack`` with unknown chunksizes (:pr:`6195`) `swapna`_ + Bag + - Random Choice on Bags (:pr:`6208`) `Antonio Ercole De Luca`_ + Core + - Raise warning ``delayed.visualise()`` (:pr:`6216`) `Amol Umbarkar`_ + - Ensure other pickle arguments work (:pr:`6229`) `John A Kirkham`_ + - Overhaul ``fuse()`` config (:pr:`6198`) `Guido Imperiale`_ + - Update ``dask.order.order`` to consider "next" nodes using both FIFO and LIFO (:pr:`5872`) `Erik Welch`_ + DataFrame + - Use 0 as ``fill_value`` for more agg methods (:pr:`6245`) `Julia Signell`_ + - Generalize ``rearrange_by_column_tasks`` and add ``DataFrame.shuffle`` (:pr:`6066`) `Richard (Rick) Zamora`_ + - Xfail ``test_rolling_numba_engine`` for newer numba and older pandas (:pr:`6236`) `James Bourbeau`_ + - Generalize ``fix_overlap`` (:pr:`6240`) `GALI PREM SAGAR`_ + - Fix ``DataFrame.shape`` with no columns (:pr:`6237`) `noreentry`_ + - Avoid shuffle when setting a presorted index with overlapping divisions (:pr:`6226`) `Krishan Bhasin`_ + - Adjust the Parquet engine classes to allow more easily subclassing (:pr:`6211`) `Marius van Niekerk`_ + - Fix ``dd.merge_asof`` with ``left_on='col'`` & ``right_index=True`` (:pr:`6192`) `noreentry`_ + - Disable warning for ``concat`` (:pr:`6210`) `Tung Dang`_ + - Move ``AUTO_BLOCKSIZE`` out of ``read_csv`` signature (:pr:`6214`) `Jim Crist-Harif`_ + - ``.loc`` indexing with callable (:pr:`6185`) `Endre Mark Borza`_ + - Avoid apply in ``_compute_sum_of_squares`` for groupby std agg (:pr:`6186`) `Richard (Rick) Zamora`_ + - Minor correction to ``test_parquet`` (:pr:`6190`) `Brian Larsen`_ + - Adhering to the passed pat for delimeter join and fix error message (:pr:`6194`) `GALI PREM SAGAR`_ + - Skip ``test_to_parquet_with_get`` if no parquet libs available (:pr:`6188`) `Scott Sanderson`_ + Documentation + - Added documentation for ``distributed.Event`` class (:pr:`6231`) `Nils Braun`_ + - Doc write to remote (:pr:`6124`) `Ray Bell`_ +- 2.16.0 + Array + - Fix array general-reduction name (:pr:`6176`) `Nick Evans`_ + - Replace ``dim`` with ``shape`` in ``unravel_index`` (:pr:`6155`) `Julia Signell`_ + - Moment: handle all elements being masked (:pr:`5339`) `Gabe Joseph`_ + Core + - Remove Redundant string concatenations in dask code-base (:pr:`6137`) `GALI PREM SAGAR`_ + - Upstream compat (:pr:`6159`) `Tom Augspurger`_ + - Ensure ``sizeof`` of dict and sequences returns an integer (:pr:`6179`) `James Bourbeau`_ + - Estimate python collection sizes with random sampling (:pr:`6154`) `Florian Jetter`_ + - Update test upstream (:pr:`6146`) `Tom Augspurger`_ + - Skip test for mindeps build (:pr:`6144`) `Tom Augspurger`_ + - Switch default multiprocessing context to "spawn" (:pr:`4003`) `Itamar Turner-Trauring`_ + - Update manifest to include dask-schema (:pr:`6140`) `Ben Zaitlen`_ + DataFrame + - Harden inconsistent-schema handling in pyarrow-based ``read_parquet`` (:pr:`6160`) `Richard (Rick) Zamora`_ + - Add compute ``kwargs`` to methods that write data to disk (:pr:`6056`) `Krishan Bhasin`_ + - Fix issue where ``unique`` returns an index like result from backends (:pr:`6153`) `GALI PREM SAGAR`_ + - Fix internal error in ``map_partitions`` with collections (:pr:`6103`) `Tom Augspurger`_ + Documentation + - Add phase of computation to index TOC (:pr:`6157`) `Ben Zaitlen`_ + - Remove unused imports in scheduling script (:pr:`6138`) `James Lamb`_ + - Fix indent (:pr:`6147`) `Martin Durant`_ + - Add Tom's log config example (:pr:`6143`) `Martin Durant`_ +- 2.15.0 + Array + - Update ``dask.array.from_array`` to warn when passed a Dask collection (:pr:`6122`) `James Bourbeau`_ + - Un-numpy like behaviour in ``dask.array.pad`` (:pr:`6042`) `Mark Boer`_ + - Add support for ``repeats=0`` in ``da.repeat`` (:pr:`6080`) `James Bourbeau`_ + Core + - Fix yaml layout for schema (:pr:`6132`) `Ben Zaitlen`_ + - Configuration Reference (:pr:`6069`) `Ben Zaitlen`_ + - Add configuration option to turn off task fusion (:pr:`6087`) `Matthew Rocklin`_ + - Skip pyarrow on windows (:pr:`6094`) `Tom Augspurger`_ + - Set limit to maximum length of fused key (:pr:`6057`) `Lucas Rademaker`_ + - Add test against #6062 (:pr:`6072`) `Martin Durant`_ + - Bump checkout action to v2 (:pr:`6065`) `James Bourbeau`_ + DataFrame + - Generalize categorical calls to support cudf ``Categorical`` (:pr:`6113`) `GALI PREM SAGAR`_ + - Avoid reading ``_metadata`` on every worker (:pr:`6017`) `Richard (Rick) Zamora`_ + - Use ``group_split_dispatch`` and ``ignore_index`` in ``apply_concat_apply`` (:pr:`6119`) `Richard (Rick) Zamora`_ + - Handle new (dtype) pandas metadata with pyarrow (:pr:`6090`) `Richard (Rick) Zamora`_ + - Skip ``test_partition_on_cats_pyarrow`` if pyarrow is not installed (:pr:`6112`) `James Bourbeau`_ + - Update DataFrame len to handle columns with the same name (:pr:`6111`) `James Bourbeau`_ + - ``ArrowEngine`` bug fixes and test coverage (:pr:`6047`) `Richard (Rick) Zamora`_ + - Added mode (:pr:`5958`) `Adam Lewis`_ + +------------------------------------------------------------------- +Mon Apr 20 13:01:44 UTC 2020 - Tomáš Chvátal + +- Drop py2 dep from py3 only package + +------------------------------------------------------------------- +Sat Apr 11 21:45:43 UTC 2020 - Arun Persaud + +- update to version 2.14.0: + * Array + + Added np.iscomplexobj implementation (:pr:`6045`) Tom Augspurger + * Core + + Update test_rearrange_disk_cleanup_with_exception to pass + without cloudpickle installed (:pr:`6052`) James Bourbeau + + Fixed flaky test-rearrange (:pr:`5977`) Tom Augspurger + * DataFrame + + Use _meta_nonempty for dtype casting in stack_partitions + (:pr:`6061`) mlondschien + + Fix bugs in _metadata creation and filtering in parquet + ArrowEngine (:pr:`6023`) Richard (Rick) Zamora + * Documentation + + DOC: Add name caveats (:pr:`6040`) Tom Augspurger + +------------------------------------------------------------------- +Sat Mar 28 16:47:35 UTC 2020 - Arun Persaud + +- update to version 2.13.0: + * Array + + Support dtype and other keyword arguments in da.random + (:pr:`6030`) Matthew Rocklin + + Register support for cupy sparse hstack/vstack (:pr:`5735`) + Corey J. Nolet + + Force self.name to str in dask.array (:pr:`6002`) Chuanzhu Xu + * Bag + + Set rename_fused_keys to None by default in bag.optimize + (:pr:`6000`) Lucas Rademaker + * Core + + Copy dict in to_graphviz to prevent overwriting (:pr:`5996`) + JulianWgs + + Stricter pandas xfail (:pr:`6024`) Tom Augspurger + + Fix CI failures (:pr:`6013`) James Bourbeau + + Update toolz to 0.8.2 and use tlz (:pr:`5997`) Ryan Grout + + Move Windows CI builds to GitHub Actions (:pr:`5862`) James + Bourbeau + * DataFrame + + Improve path-related exceptions in read_hdf (:pr:`6032`) psimaj + + Fix dtype handling in dd.concat (:pr:`6006`) mlondschien + + Handle cudf's leftsemi and leftanti joins (:pr:`6025`) Richard J + Zamora + + Remove unused npartitions variable in dd.from_pandas + (:pr:`6019`) Daniel Saxton + + Added shuffle to DataFrame.random_split (:pr:`5980`) petiop + * Documentation + + Fix indentation in scheduler-overview docs (:pr:`6022`) Matthew + Rocklin + + Update task graphs in optimize docs (:pr:`5928`) Julia Signell + + Optionally get rid of intermediary boxes in visualize, and add + more labels (:pr:`5976`) Julia Signell + +------------------------------------------------------------------- +Sun Mar 8 19:03:37 UTC 2020 - Arun Persaud + +- update to version 2.12.0: + * Array + + Improve reuse of temporaries with numpy (:pr:`5933`) Bruce Merry + + Make map_blocks with block_info produce a Blockwise (:pr:`5896`) + Bruce Merry + + Optimize make_blockwise_graph (:pr:`5940`) Bruce Merry + + Fix axes ordering in da.tensordot (:pr:`5975`) Gil Forsyth + + Adds empty mode to array.pad (:pr:`5931`) Thomas J Fan + * Core + + Remove toolz.memoize dependency in dask.utils (:pr:`5978`) Ryan + Grout + + Close pool leaking subprocess (:pr:`5979`) Tom Augspurger + + Pin numpydoc to 0.8.0 (fix double autoescape) (:pr:`5961`) Gil + Forsyth + + Register deterministic tokenization for range objects + (:pr:`5947`) James Bourbeau + + Unpin msgpack in CI (:pr:`5930`) JAmes Bourbeau + + Ensure dot results are placed in unique files. (:pr:`5937`) + Elliott Sales de Andrade + + Add remaining optional dependencies to Travis 3.8 CI build + environment (:pr:`5920`) James Bourbeau + * DataFrame + + Skip parquet getitem optimization for some keys (:pr:`5917`) Tom + Augspurger + + Add ignore_index argument to rearrange_by_column code path + (:pr:`5973`) Richard J Zamora + + Add DataFrame and Series memory_usage_per_partition methods + (:pr:`5971`) James Bourbeau + + xfail test_describe when using Pandas 0.24.2 (:pr:`5948`) James + Bourbeau + + Implement dask.dataframe.to_numeric (:pr:`5929`) Julia Signell + + Add new error message content when columns are in a different + order (:pr:`5927`) Julia Signell + + Use shallow copy for assign operations when possible + (:pr:`5740`) Richard J Zamora + * Documentation + + Changed above to below in dask.array.triu docs (:pr:`5984`) + Henrik Andersson + + Array slicing: fix typo in slice_with_int_dask_array error + message (:pr:`5981`) Gabe Joseph + + Grammar and formatting updates to docstrings (:pr:`5963`) James + Lamb + + Update develop doc with conda option (:pr:`5939`) Ray Bell + + Update title of DataFrame extension docs (:pr:`5954`) James + Bourbeau + + Fixed typos in documentation (:pr:`5962`) James Lamb + + Add original class or module as a kwarg on _bind_* methods + (:pr:`5946`) Julia Signell + + Add collect list example (:pr:`5938`) Ray Bell + + Update optimization doc for python 3 (:pr:`5926`) Julia Signell + +------------------------------------------------------------------- +Sat Feb 22 18:54:54 UTC 2020 - Arun Persaud + +- specfile: + * require pandas >= 0.23 + +- update to version 2.11.0: + * Array + + Cache result of Array.shape (:pr:`5916`) Bruce Merry + + Improve accuracy of estimate_graph_size for rechunk (:pr:`5907`) + Bruce Merry + + Skip rechunk steps that do not alter chunking (:pr:`5909`) Bruce + Merry + + Support dtype and other kwargs in coarsen (:pr:`5903`) Matthew + Rocklin + + Push chunk override from map_blocks into blockwise (:pr:`5895`) + Bruce Merry + + Avoid using rewrite_blockwise for a singleton (:pr:`5890`) Bruce + Merry + + Optimize slices_from_chunks (:pr:`5891`) Bruce Merry + + Avoid unnecessary __getitem__ in block() when chunks have + correct dimensionality (:pr:`5884`) Thomas Robitaille + * Bag + + Add include_path option for dask.bag.read_text (:pr:`5836`) + Yifan Gu + + Fixes ValueError in delayed execution of bagged NumPy array + (:pr:`5828`) Surya Avala + * Core + + CI: Pin msgpack (:pr:`5923`) Tom Augspurger + + Rename test_inner to test_outer (:pr:`5922`) Shiva Raisinghani + + quote should quote dicts too (:pr:`5905`) Bruce Merry + + Register a normalizer for literal (:pr:`5898`) Bruce Merry + + Improve layer name synthesis for non-HLGs (:pr:`5888`) Bruce + Merry + + Replace flake8 pre-commit-hook with upstream (:pr:`5892`) Julia + Signell + + Call pip as a module to avoid warnings (:pr:`5861`) Cyril + Shcherbin + + Close ThreadPool at exit (:pr:`5852`) Tom Augspurger + + Remove dask.dataframe import in tokenization code (:pr:`5855`) + James Bourbeau + * DataFrame + + Require pandas>=0.23 (:pr:`5883`) Tom Augspurger + + Remove lambda from dataframe aggregation (:pr:`5901`) Matthew + Rocklin + + Fix exception chaining in dataframe/__init__.py (:pr:`5882`) Ram + Rachum + + Add support for reductions on empty dataframes (:pr:`5804`) + Shiva Raisinghani + + Expose sort= argument for groupby (:pr:`5801`) Richard J Zamora + + Add df.empty property (:pr:`5711`) rockwellw + + Use parquet read speed-ups from + fastparquet.api.paths_to_cats. (:pr:`5821`) Igor Gotlibovych + * Documentation + + Deprecate doc_wraps (:pr:`5912`) Tom Augspurger + + Update array internal design docs for HighLevelGraph era + (:pr:`5889`) Bruce Merry + + Move over dashboard connection docs (:pr:`5877`) Matthew Rocklin + + Move prometheus docs from distributed.dask.org (:pr:`5876`) + Matthew Rocklin + + Removing duplicated DO block at the end (:pr:`5878`) K.-Michael + Aye + + map_blocks see also (:pr:`5874`) Tom Augspurger + + More derived from (:pr:`5871`) Julia Signell + + Fix typo (:pr:`5866`) Yetunde Dada + + Fix typo in cloud.rst (:pr:`5860`) Andrew Thomas + + Add note pointing to code of conduct and diversity statement + (:pr:`5844`) Matthew Rocklin + +------------------------------------------------------------------- +Sat Feb 8 21:45:22 UTC 2020 - Arun Persaud + +- update to version 2.10.1: + * Fix Pandas 1.0 version comparison (:pr:`5851`) Tom Augspurger + * Fix typo in distributed diagnostics documentation (:pr:`5841`) + Gerrit Holl + +- changes from version 2.10.0: + * Support for pandas 1.0's new BooleanDtype and StringDtype + (:pr:`5815`) Tom Augspurger + * Compatibility with pandas 1.0's API breaking changes and + deprecations (:pr:`5792`) Tom Augspurger + * Fixed non-deterministic tokenization of some extension-array + backed pandas objects (:pr:`5813`) Tom Augspurger + * Fixed handling of dataclass class objects in collections + (:pr:`5812`) Matteo De Wint + * Fixed resampling with tz-aware dates when one of the endpoints + fell in a non-existent time (:pr:`5807`) dfonnegra + * Delay initial Zarr dataset creation until the computation occurs + (:pr:`5797`) Chris Roat + * Use parquet dataset statistics in more cases with the pyarrow + engine (:pr:`5799`) Richard J Zamora + * Fixed exception in groupby.std() when some of the keys were large + integers (:pr:`5737`) H. Thomson Comer + +------------------------------------------------------------------- +Sat Jan 18 19:16:38 UTC 2020 - Arun Persaud + +- update to version 2.9.2: + * Array + + Unify chunks in broadcast_arrays (:pr:`5765`) Matthew Rocklin + * Core + + xfail CSV encoding tests (:pr:`5791`) Tom Augspurger + + Update order to handle empty dask graph (:pr:`5789`) James + Bourbeau + + Redo dask.order.order (:pr:`5646`) Erik Welch + * DataFrame + + Add transparent compression for on-disk shuffle with partd + (:pr:`5786`) Christian Wesp + + Fix repr for empty dataframes (:pr:`5781`) Shiva Raisinghani + + Pandas 1.0.0RC0 compat (:pr:`5784`) Tom Augspurger + + Remove buggy assertions (:pr:`5783`) Tom Augspurger + + Pandas 1.0 compat (:pr:`5782`) Tom Augspurger + + Fix bug in pyarrow-based read_parquet on partitioned datasets + (:pr:`5777`) Richard J Zamora + + Compat for pandas 1.0 (:pr:`5779`) Tom Augspurger + + Fix groupby/mean error with with categorical index (:pr:`5776`) + Richard J Zamora + + Support empty partitions when performing cumulative aggregation + (:pr:`5730`) Matthew Rocklin + + set_index accepts single-item unnested list (:pr:`5760`) Wes + Roach + + Fixed partitioning in set index for ordered Categorical + (:pr:`5715`) Tom Augspurger + * Documentation + + Note additional use case for normalize_token.register + (:pr:`5766`) Thomas A Caswell + + Update bag repartition docstring (:pr:`5772`) Timost + + Small typos (:pr:`5771`) Maarten Breddels + + Fix typo in Task Expectations docs (:pr:`5767`) James Bourbeau + + Add docs section on task expectations to graph page (:pr:`5764`) + Devin Petersohn + +------------------------------------------------------------------- +Mon Jan 6 05:05:16 UTC 2020 - Arun Persaud + +- specfile: + * update copyright year + +- update to version 2.9.1: + * Array + + Support Array.view with dtype=None (:pr:`5736`) Anderson + Banihirwe + + Add dask.array.nanmedian (:pr:`5684`) Deepak Cherian + * Core + + xfail test_temporary_directory on Python 3.8 (:pr:`5734`) James + Bourbeau + + Add support for Python 3.8 (:pr:`5603`) James Bourbeau + + Use id to dedupe constants in rewrite_blockwise (:pr:`5696`) Jim + Crist + * DataFrame + + Raise error when converting a dask dataframe scalar to a boolean + (:pr:`5743`) James Bourbeau + + Ensure dataframe groupby-variance is greater than zero + (:pr:`5728`) Matthew Rocklin + + Fix DataFrame.__iter__ (:pr:`5719`) Tom Augspurger + + Support Parquet filters in disjunctive normal form, like PyArrow + (:pr:`5656`) Matteo De Wint + + Auto-detect categorical columns in ArrowEngine-based + read_parquet (:pr:`5690`) Richard J Zamora + + Skip parquet getitem optimization tests if no engine found + (:pr:`5697`) James Bourbeau + + Fix independent optimization of parquet-getitem (:pr:`5613`) Tom + Augspurger + * Documentation + + Update helm config doc (:pr:`5750`) Ray Bell + + Link to examples.dask.org in several places (:pr:`5733`) Tom + Augspurger + + Add missing " in performance report example (:pr:`5724`) James + Bourbeau + + Resolve several documentation build warnings (:pr:`5685`) James + Bourbeau + + add info on performance_report (:pr:`5713`) Ben Zaitlen + + Add more docs disclaimers (:pr:`5710`) Julia Signell + + Fix simple typo: wihout -> without (:pr:`5708`) Tim Gates + + Update numpydoc dependency (:pr:`5694`) James Bourbeau + +------------------------------------------------------------------- +Sat Dec 7 19:08:29 UTC 2019 - Arun Persaud + +- update to version 2.9.0: + * Array + + Fix da.std to work with NumPy arrays (:pr:`5681`) James Bourbeau + * Core + + Register sizeof functions for Numba and RMM (:pr:`5668`) John A + Kirkham + + Update meeting time (:pr:`5682`) Tom Augspurger + * DataFrame + + Modify dd.DataFrame.drop to use shallow copy (:pr:`5675`) + Richard J Zamora + + Fix bug in _get_md_row_groups (:pr:`5673`) Richard J Zamora + + Close sqlalchemy engine after querying DB (:pr:`5629`) Krishan + Bhasin + + Allow dd.map_partitions to not enforce meta (:pr:`5660`) Matthew + Rocklin + + Generalize concat_unindexed_dataframes to support cudf-backend + (:pr:`5659`) Richard J Zamora + + Add dataframe resample methods (:pr:`5636`) Ben Zaitlen + + Compute length of dataframe as length of first column + (:pr:`5635`) Matthew Rocklin + * Documentation + + Doc fixup (:pr:`5665`) James Bourbeau + + Update doc build instructions (:pr:`5640`) James Bourbeau + + Fix ADL link (:pr:`5639`) Ray Bell + + Add documentation build (:pr:`5617`) James Bourbeau + +------------------------------------------------------------------- +Sun Nov 24 17:35:04 UTC 2019 - Arun Persaud + +- update to version 2.8.1: + * Array + + Use auto rechunking in da.rechunk if no value given (:pr:`5605`) + Matthew Rocklin + * Core + + Add simple action to activate GH actions (:pr:`5619`) James + Bourbeau + * DataFrame + + Fix "file_path_0" bug in aggregate_row_groups (:pr:`5627`) + Richard J Zamora + + Add chunksize argument to read_parquet (:pr:`5607`) Richard J + Zamora + + Change test_repartition_npartitions to support arch64 + architecture (:pr:`5620`) ossdev07 + + Categories lost after groupby + agg (:pr:`5423`) Oliver Hofkens + + Fixed relative path issue with parquet metadata file + (:pr:`5608`) Nuno Gomes Silva + + Enable gpu-backed covariance/correlation in dataframes + (:pr:`5597`) Richard J Zamora + * Documentation + + Fix institutional faq and unknown doc warnings (:pr:`5616`) + James Bourbeau + + Add doc for some utils (:pr:`5609`) Tom Augspurger + + Removes html_extra_path (:pr:`5614`) James Bourbeau + + Fixed See Also referencence (:pr:`5612`) Tom Augspurger + +------------------------------------------------------------------- +Sat Nov 16 17:53:12 UTC 2019 - Arun Persaud + +- update to version 2.8.0: + * Array + + Implement complete dask.array.tile function (:pr:`5574`) Bouwe + Andela + + Add median along an axis with automatic rechunking (:pr:`5575`) + Matthew Rocklin + + Allow da.asarray to chunk inputs (:pr:`5586`) Matthew Rocklin + * Bag + + Use key_split in Bag name (:pr:`5571`) Matthew Rocklin + * Core + + Switch Doctests to Py3.7 (:pr:`5573`) Ryan Nazareth + + Relax get_colors test to adapt to new Bokeh release (:pr:`5576`) + Matthew Rocklin + + Add dask.blockwise.fuse_roots optimization (:pr:`5451`) Matthew + Rocklin + + Add sizeof implementation for small dicts (:pr:`5578`) Matthew + Rocklin + + Update fsspec, gcsfs, s3fs (:pr:`5588`) Tom Augspurger + * DataFrame + + Add dropna argument to groupby (:pr:`5579`) Richard J Zamora + + Revert "Remove import of dask_cudf, which is now a part of cudf + (:pr:`5568`)" (:pr:`5590`) Matthew Rocklin + * Documentation + + Add best practice for dask.compute function (:pr:`5583`) Matthew + Rocklin + + Create FUNDING.yml (:pr:`5587`) Gina Helfrich + + Add screencast for coordination primitives (:pr:`5593`) Matthew + Rocklin + + Move funding to .github repo (:pr:`5589`) Tom Augspurger + + Update calendar link (:pr:`5569`) Tom Augspurger + +------------------------------------------------------------------- +Mon Nov 11 18:24:07 UTC 2019 - Todd R + +- Update to 2.7.0 + + Array + * Reuse code for assert_eq util method + * Update da.array to always return a dask array + * Skip transpose on trivial inputs + * Avoid NumPy scalar string representation in tokenize + * Remove unnecessary tiledb shape constraint + * Removes bytes from sparse array HTML repr + + Core + * Drop Python 3.5 + * Update the use of fixtures in distributed tests + * Changed deprecated bokeh-port to dashboard-address + * Avoid updating with identical dicts in ensure_dict + * Test Upstream + * Accelerate reverse_dict + * Update test_imports.sh + * Support cgroups limits on cpu count in multiprocess and threaded schedulers + * Update minimum pyarrow version on CI + * Make cloudpickle optional + + DataFrame + * Add an example of index_col usage + * Explicitly use iloc for row indexing + * Accept dask arrays on columns assignemnt + * Implement unique and value_counts for SeriesGroupBy + * Add sizeof definition for pyarrow tables and columns + * Enable row-group task partitioning in pyarrow-based read_parquet + * Removes npartitions='auto' from dd.merge docstring + * Apply enforce error message shows non-overlapping columns. + * Optimize meta_nonempty for repetitive dtypes + * Remove import of dask_cudf, which is now a part of cudf + + Documentation + * Make capitalization more consistent in FAQ docs + * Add CONTRIBUTING.md + * Document optional dependencies + * Update helm chart docs to reflect new chart repo + * Add Resampler to API docs + * Fix typo in read_sql_table + * Add adaptive deployments screencast +- Update to 2.6.0 + + Core + * Call ``ensure_dict`` on graphs before entering ``toolz.merge`` + * Consolidating hash dispatch functions + + DataFrame + * Support Python 3.5 in Parquet code + * Avoid identity check in ``warn_dtype_mismatch`` + * Enable unused groupby tests + * Remove old parquet and bcolz dataframe optimizations + * Add getitem optimization for ``read_parquet`` + * Use ``_constructor_sliced`` method to determine Series type + * Fix map(series) for unsorted base series index + * Fix ``KeyError`` with Groupby label + + Documentation + * Use Zoom meeting instead of appear.in + * Added curated list of resources + * Update SSH docs to include ``SSHCluster`` + * Update "Why Dask?" page + * Fix typos in docstrings +- Update to 2.5.2 + + Array + * Correct chunk size logic for asymmetric overlaps + * Make da.unify_chunks public API + + DataFrame + * Fix dask.dataframe.fillna handling of Scalar object + + Documentation + * Remove boxes in Spark comparison page + * Add latest presentations + * Update cloud documentation +- Update to 2.5.0 + + Core + * Add sentinel no_default to get_dependencies task + * Update fsspec version + * Remove PY2 checks + + DataFrame + * Add option to not check meta in dd.from_delayed + * Fix test_timeseries_nulls_in_schema failures with pyarrow master + * Reduce read_metadata output size in pyarrow/parquet + * Test numeric edge case for repartition with npartitions. + * Unxfail pandas-datareader test + * Add DataFrame.pop implementation + * Enable merge/set_index for cudf-based dataframes with cupy ``values`` + * drop_duplicates support for positional subset parameter + + Documentation + * Add screencasts to array, bag, dataframe, delayed, futures and setup + * Fix delimeter parsing documentation + * Update overview image +- Update to 2.4.0 + + Array + * Adds explicit ``h5py.File`` mode + * Provides method to compute unknown array chunks sizes + * Ignore runtime warning in Array ``compute_meta`` + * Add ``_meta`` to ``Array.__dask_postpersist__`` + * Fixup ``da.asarray`` and ``da.asanyarray`` for datetime64 dtype and xarray objects + * Add shape implementation + * Add chunktype to array text repr + * Array.random.choice: handle array-like non-arrays + + Core + * Remove deprecated code + * Fix ``funcname`` when vectorized func has no ``__name__`` + * Truncate ``funcname`` to avoid long key names + * Add support for ``numpy.vectorize`` in ``funcname`` + * Fixed HDFS upstream test + * Support numbers and None in ``parse_bytes``/``timedelta`` + * Fix tokenizing of subindexes on memmapped numpy arrays + * Upstream fixups + + DataFrame + * Allow pandas to cast type of statistics + * Preserve index dtype after applying ``dd.pivot_table`` + * Implement explode for Series and DataFrame + * ``set_index`` on categorical fails with less categories than partitions + * Support output to a single CSV file + * Add ``groupby().transform()`` + * Adding filter kwarg to pyarrow dataset call + * Implement and check compression defaults for parquet + * Pass sqlalchemy params to delayed objects + * Fixing schema handling in arrow-parquet + * Add support for DF and Series ``groupby().idxmin/max()`` + * Add correlation calculation and add test + + Documentation + * Numpy docstring standard has moved + * Reference correct NumPy array name + * Minor edits to Array chunk documentation + * Add methods to API docs + * Add namespacing to configuration example + * Add get_task_stream and profile to the diagnostics page + * Add best practice to load data with Dask + * Update ``institutional-faq.rst`` + * Add threads and processes note to the best practices + * Update cuDF links + * Fixed small typo with parentheses placement + * Update link in reshape docstring +- Update to 2.3.0 + + Array + * Raise exception when ``from_array`` is given a dask array + * Avoid adjusting gufunc's meta dtype twice + * Add ``meta=`` keyword to map_blocks and add test with sparse + * Add rollaxis and moveaxis + * Always increment old chunk index + * Shuffle dask array + * Fix ordering when indexing a dask array with a bool dask array + + Bag + * Add workaround for memory leaks in bag generators + + Core + * Set strict xfail option + * test-upstream + * Fixed HDFS CI failure + * Error nicely if no file size inferred + * A few changes to ``config.set`` + * Fixup black string normalization + * Pin NumPy in windows tests + * Ensure parquet tests are skipped if fastparquet and pyarrow not installed + * Add fsspec to readthedocs + * Bump NumPy and Pandas to 1.17 and 0.25 in CI test + + DataFrame + * Fix ``DataFrame.query`` docstring (incorrect numexpr API) + * Parquet metadata-handling improvements + * Improve messaging around sorted parquet columns for index + * Add ``rearrange_by_divisions`` and ``set_index`` support for cudf + * Fix ``groupby.std()`` with integer colum names + * Add ``Series.__iter__`` + * Generalize ``hash_pandas_object`` to work for non-pandas backends + * Add rolling cov + * Add columns argument in drop function + + Documentation + * Update institutional FAQ doc + * Add draft of institutional FAQ + * Make boxes for dask-spark page + * Add motivation for shuffle docs + * Fix links and API entries for best-practices + * Remove "bytes" (internal data ingestion) doc page + * Redirect from our local distributed page to distributed.dask.org + * Cleanup API page + * Remove excess endlines from install docs + * Remove item list in phases of computation doc + * Remove custom graphs from the TOC sidebar + * Remove experimental status of custom collections + * Adds table of contents to Why Dask? + * Moves bag overview to top-level bag page + * Remove use-cases in favor of stories.dask.org + * Removes redundant TOC information in index.rst + * Elevate dashboard in distributed diagnostics documentation + * Updates "add" layer in HLG docs example + * Update GUFunc documentation +- Update to 2.2.0 + + Array + * Use da.from_array(..., asarray=False) if input follows NEP-18 + * Add missing attributes to from_array documentation + * Fix meta computation for some reduction functions + * Raise informative error in to_zarr if unknown chunks + * Remove invalid pad tests + * Ignore NumPy warnings in compute_meta + * Fix kurtosis calc for single dimension input array + * Support Numpy 1.17 in tests + + Bag + * Supply pool to bag test to resolve intermittent failure + + Core + * Base dask on fsspec + * Various upstream compatibility fixes + * Make distributed tests optional again. + * Fix HDFS in dask + * Ignore some more invalid value warnings. + + DataFrame + * Fix pd.MultiIndex size estimate + * Generalizing has_known_categories + * Refactor Parquet engine + * Add divide method to series and dataframe + * fix flaky partd test + * Adjust is_dataframe_like to adjust for value_counts change + * Generalize rolling windows to support non-Pandas dataframes + * Avoid unnecessary aggregation in pivot_table + * Add column names to apply_and_enforce error message + * Add schema keyword argument to to_parquet + * Remove recursion error in accessors + * Allow fastparquet to handle gather_statistics=False for file lists + + Documentation + * Adds NumFOCUS badge to the README + * Update developer docs + * Document DataFrame.set_index computataion behavior + * Use pip install . instead of calling setup.py + * Close user survey + * Fix Google Calendar meeting link + * Add docker image customization example + * Update remote-data-services after fsspec + * Fix typo in spark.rstZ + * Update setup/python docs for async/await API + * Update Local Storage HPC documentation + +------------------------------------------------------------------- +Tue Jul 23 00:23:55 UTC 2019 - Todd R + +- Update to 2.1.0 + + Array + * Add ``recompute=`` keyword to ``svd_compressed`` for lower-memory use + * Change ``__array_function__`` implementation for backwards compatibility + * Added ``dtype`` and ``shape`` kwargs to ``apply_along_axis`` + * Fix reduction with empty tuple axis + * Drop size 0 arrays in ``stack`` + + Core + * Removes index keyword from pandas ``to_parquet`` call + * Fixes upstream dev CI build installation + * Ensure scalar arrays are not rendered to SVG + * Environment creation overhaul + * s3fs, moto compatibility + * pytest 5.0 compat + + DataFrame + * Fix ``compute_meta`` recursion in blockwise + * Remove hard dependency on pandas in ``get_dummies`` + * Check dtypes unchanged when using ``DataFrame.assign`` + * Fix cumulative functions on tables with more than 1 partition + * Handle non-divisible sizes in repartition + * Handles timestamp and ``preserve_index`` changes in pyarrow + * Fix undefined ``meta`` for ``str.split(expand=False)`` + * Removed checks used for debugging ``merge_asof`` + * Don't use type when getting accessor in dataframes + * Add ``melt`` as a method of Dask DataFrame + * Adds path-like support to ``to_hdf`` + + Documentation + * Point to latest K8s setup article in JupyterHub docs + * Changes vizualize to visualize + * Fix ``from_sequence`` typo in delayed best practices + * Add user survey link to docs + * Fixes typo in optimization docs + * Update community meeting information +- Update to 2.0.0 + + Array + * Support automatic chunking in da.indices + * Err if there are no arrays to stack + * Asymmetrical Array Overlap + * Dispatch concatenate where possible within dask array + * Fix tokenization of memmapped numpy arrays on different part of same file + * Preserve NumPy condition in da.asarray to preserve output shape + * Expand foo_like_safe usage + * Defer order/casting einsum parameters to NumPy implementation + * Remove numpy warning in moment calculation + * Fix meta_from_array to support Xarray test suite + * Cache chunk boundaries for integer slicing + * Drop size 0 arrays in concatenate + * Raise ValueError if concatenate is given no arrays + * Promote types in `concatenate` using `_meta` + * Add chunk type to html repr in Dask array + * Add Dask Array._meta attribute + > Fix _meta slicing of flexible types + > Minor meta construction cleanup in concatenate + > Further relax Array meta checks for Xarray + > Support meta= keyword in da.from_delayed + > Concatenate meta along axis + > Use meta in stack + > Move blockwise_meta to more general compute_meta function + * Alias .partitions to .blocks attribute of dask arrays + * Drop outdated `numpy_compat` functions + * Allow da.eye to support arbitrary chunking sizes with chunks='auto' + * Fix CI warnings in dask.array tests + * Make map_blocks work with drop_axis + block_info + * Add SVG image and table in Array._repr_html_ + * ufunc: avoid __array_wrap__ in favor of __array_function__ + * Ensure trivial padding returns the original array + * Test ``da.block`` with 0-size arrays + + Core + * **Drop Python 2.7** + * Quiet dependency installs in CI + * Raise on warnings in tests + * Add a diagnostics extra to setup.py (includes bokeh) + * Add newline delimter keyword to OpenFile + * Overload HighLevelGraphs values method + * Add __await__ method to Dask collections + * Also ignore AttributeErrors which may occur if snappy (not python-snappy) is installed + * Canonicalize key names in config.rename + * Bump minimum partd to 0.3.10 + * Catch async def SyntaxError + * catch IOError in ensure_file + * Cleanup CI warnings + * Move distributed's parse and format functions to dask.utils + * Apply black formatting + * Package license file in wheels + + DataFrame + * Add an optional partition_size parameter to repartition + * merge_asof and prefix_reduction + * Allow dataframes to be indexed by dask arrays + * Avoid deprecated message parameter in pytest.raises + * Update test_to_records to test with lengths argument(:pr:`4515`) `asmith26`_ + * Remove pandas pinning in Dataframe accessors + * Fix correlation of series with same names + * Map Dask Series to Dask Series + * Warn in dd.merge on dtype warning + * Add groupby Covariance/Correlation + * keep index name with to_datetime + * Add Parallel variance computation for dataframes + * Add divmod implementation to arrays and dataframes + * Add documentation for dataframe reshape methods + * Avoid use of pandas.compat + * Added accessor registration for Series, DataFrame, and Index + * Add read_function keyword to read_json + * Provide full type name in check_meta + * Correctly estimate bytes per row in read_sql_table + * Adding support of non-numeric data to describe() + * Scalars for extension dtypes. + * Call head before compute in dd.from_delayed + * Add support for rolling operations with larger window that partition size in DataFrames with Time-based index + * Update groupby-apply doc with warning + * Change groupby-ness tests in `_maybe_slice` + * Add master best practices document + * Add document for how Dask works with GPUs + * Add cli API docs + * Ensure concat output has coherent dtypes + * Fixes pandas_datareader dependencies installation + * Accept pathlib.Path as pattern in read_hdf + + Documentation + * Move CLI API docs to relavant pages + * Add to_datetime function to dataframe API docs `Matthew Rocklin`_ + * Add documentation entry for dask.array.ma.average + * Add bag.read_avro to bag API docs + * Fix typo + * Docs: Drop support for Python 2.7 + * Remove requirement to modify changelog + * Add documentation about meta column order + * Add documentation note in DataFrame.shift + * Docs: Fix typo + * Put do/don't into boxes for delayed best practice docs + * Doc fixups + * Add quansight to paid support doc section + * Add document for custom startup + * Allow `utils.derive_from` to accept functions, apply across array + * Add "Avoid Large Partitions" section to best practices + * Update URL for joblib to new website hosting their doc (:pr:`4816`) `Christian Hudon`_ + +------------------------------------------------------------------- +Tue May 21 11:48:23 UTC 2019 - pgajdos@suse.com + +- version update to 1.2.2 + + Array + * Clarify regions kwarg to array.store (:pr:`4759`) `Martin Durant`_ + * Add dtype= parameter to da.random.randint (:pr:`4753`) `Matthew Rocklin`_ + * Use "row major" rather than "C order" in docstring (:pr:`4452`) `@asmith26`_ + * Normalize Xarray datasets to Dask arrays (:pr:`4756`) `Matthew Rocklin`_ + * Remove normed keyword in da.histogram (:pr:`4755`) `Matthew Rocklin`_ + + Bag + * Add key argument to Bag.distinct (:pr:`4423`) `Daniel Severo`_ + + Core + * Add core dask config file (:pr:`4774`) `Matthew Rocklin`_ + * Add core dask config file to MANIFEST.in (:pr:`4780`) `James Bourbeau`_ + * Enabling glob with HTTP file-system (:pr:`3926`) `Martin Durant`_ + * HTTPFile.seek with whence=1 (:pr:`4751`) `Martin Durant`_ + * Remove config key normalization (:pr:`4742`) `Jim Crist`_ + + DataFrame + * Remove explicit references to Pandas in dask.dataframe.groupby (:pr:`4778`) `Matthew Rocklin`_ + * Add support for group_keys kwarg in DataFrame.groupby() (:pr:`4771`) `Brian Chu`_ + * Describe doc (:pr:`4762`) `Martin Durant`_ + * Remove explicit pandas check in cumulative aggregations (:pr:`4765`) `Nick Becker`_ + * Added meta for read_json and test (:pr:`4588`) `Abhinav Ralhan`_ + * Add test for dtype casting (:pr:`4760`) `Martin Durant`_ + * Document alignment in map_partitions (:pr:`4757`) `Jim Crist`_ + * Implement Series.str.split(expand=True) (:pr:`4744`) `Matthew Rocklin`_ + + Documentation + * Tweaks to develop.rst from trying to run tests (:pr:`4772`) `Christian Hudon`_ + * Add document describing phases of computation (:pr:`4766`) `Matthew Rocklin`_ + * Point users to Dask-Yarn from spark documentation (:pr:`4770`) `Matthew Rocklin`_ + * Update images in delayed doc to remove labels (:pr:`4768`) `Martin Durant`_ + * Explain intermediate storage for dask arrays (:pr:`4025`) `John A Kirkham`_ + * Specify bash code-block in array best practices (:pr:`4764`) `James Bourbeau`_ + * Add array best practices doc (:pr:`4705`) `Matthew Rocklin`_ + * Update optimization docs now that cull is not automatic (:pr:`4752`) `Matthew Rocklin`_ +- version update to 1.2.1 + + Array + * Fix map_blocks with block_info and broadcasting (:pr:`4737`) `Bruce Merry`_ + * Make 'minlength' keyword argument optional in da.bincount (:pr:`4684`) `Genevieve Buckley`_ + * Add support for map_blocks with no array arguments (:pr:`4713`) `Bruce Merry`_ + * Add dask.array.trace (:pr:`4717`) `Danilo Horta`_ + * Add sizeof support for cupy.ndarray (:pr:`4715`) `Peter Andreas Entschev`_ + * Add name kwarg to from_zarr (:pr:`4663`) `Michael Eaton`_ + * Add chunks='auto' to from_array (:pr:`4704`) `Matthew Rocklin`_ + * Raise TypeError if dask array is given as shape for da.ones, zeros, empty or full (:pr:`4707`) `Genevieve Buckley`_ + * Add TileDB backend (:pr:`4679`) `Isaiah Norton`_ + + Core + * Delay long list arguments (:pr:`4735`) `Matthew Rocklin`_ + * Bump to numpy >= 1.13, pandas >= 0.21.0 (:pr:`4720`) `Jim Crist`_ + * Remove file "test" (:pr:`4710`) `James Bourbeau`_ + * Reenable development build, uses upstream libraries (:pr:`4696`) `Peter Andreas Entschev`_ + * Remove assertion in HighLevelGraph constructor (:pr:`4699`) `Matthew Rocklin`_ + + DataFrame + * Change cum-aggregation last-nonnull-value algorithm (:pr:`4736`) `Nick Becker`_ + * Fixup series-groupby-apply (:pr:`4738`) `Jim Crist`_ + * Refactor array.percentile and dataframe.quantile to use t-digest (:pr:`4677`) `Janne Vuorela`_ + * Allow naive concatenation of sorted dataframes (:pr:`4725`) `Matthew Rocklin`_ + * Fix perf issue in dd.Series.isin (:pr:`4727`) `Jim Crist`_ + * Remove hard pandas dependency for melt by using methodcaller (:pr:`4719`) `Nick Becker`_ + * A few dataframe metadata fixes (:pr:`4695`) `Jim Crist`_ + * Add Dataframe.replace (:pr:`4714`) `Matthew Rocklin`_ + * Add 'threshold' parameter to pd.DataFrame.dropna (:pr:`4625`) `Nathan Matare`_ + + Documentation + * Add warning about derived docstrings early in the docstring (:pr:`4716`) `Matthew Rocklin`_ + * Create dataframe best practices doc (:pr:`4703`) `Matthew Rocklin`_ + * Uncomment dask_sphinx_theme (:pr:`4728`) `James Bourbeau`_ + * Fix minor typo fix in a Queue/fire_and_forget example (:pr:`4709`) `Matthew Rocklin`_ + * Update from_pandas docstring to match signature (:pr:`4698`) `James Bourbeau`_ + +------------------------------------------------------------------- +Mon Apr 22 19:32:28 UTC 2019 - Todd R + +- Update to version 1.2.0 + + Array + * Fixed mean() and moment() on sparse arrays + * Add test for NEP-18. + * Allow None to say "no chunking" in normalize_chunks + * Fix limit value in auto_chunks + + Core + * Updated diagnostic bokeh test for compatibility with bokeh>=1.1.0 + * Adjusts codecov's target/threshold, disable patch + * Always start with empty http buffer, not None + + DataFrame + * Propagate index dtype and name when create dask dataframe from array + * Fix ordering of quantiles in describe + * Clean up and document rearrange_column_by_tasks + * Mark some parquet tests xfail + * Fix parquet breakages with arrow 0.13.0 + * Allow sample to be False when reading CSV from a remote URL + * Fix timezone metadata inference on parquet load + * Use is_dataframe/index_like in dd.utils + * Add min_count parameter to groupby sum method + * Correct quantile to handle unsorted quantiles + + Documentation + * Add delayed extra dependencies to install docs +- Update to version 1.1.5 + + Array + * Ensure that we use the dtype keyword in normalize_chunks + + Core + * Use recursive glob in LocalFileSystem + * Avoid YAML deprecation + * Fix CI and add set -e + * Support builtin sequence types in dask.visualize + * unpack/repack orderedDict + * Add da.random.randint to API docs + * Add zarr to CI environment + * Enable codecov + + DataFrame + * Support setting the index + * DataFrame.itertuples accepts index, name kwargs + * Support non-Pandas series in dd.Series.unique + * Replace use of explicit type check with ._is_partition_type predicate + * Remove additional pandas warnings in tests + * Check object for name/dtype attributes rather than type + * Fix comparison against pd.Series + * Fixing warning from setting categorical codes to floats + * Fix renaming on index to_frame method + * Fix divisions when joining two single-partition dataframes + * Warn if partitions overlap in compute_divisions + * Give informative meta= warning + * Add informative error message to Series.__getitem__ + * Add clear exception message when using index or index_col in read_csv + + Documentation + * Add documentation for custom groupby aggregations + * Docs dataframe joins + * Specify fork-based contributions + * correct to_parquet example in docs + * Update and secure several references + +------------------------------------------------------------------- +Tue Apr 9 10:06:13 UTC 2019 - pgajdos@suse.com + +- do not require optional python2-sparse for testing, python-sparse + is going to be python3-only + +------------------------------------------------------------------- +Mon Mar 11 12:30:53 UTC 2019 - Tomáš Chvátal + +- Update to 1.1.4: + * Various bugfixes in 1.1 branch + +------------------------------------------------------------------- +Wed Feb 20 11:19:16 UTC 2019 - Tomáš Chvátal + +- Enable tests and switch to multibuild + +------------------------------------------------------------------- +Sat Feb 2 17:09:28 UTC 2019 - Arun Persaud + +- update to version 1.1.1: + * Array + + Add support for cupy.einsum (:pr:`4402`) Johnnie Gray + + Provide byte size in chunks keyword (:pr:`4434`) Adam Beberg + + Raise more informative error for histogram bins and range + (:pr:`4430`) James Bourbeau + * DataFrame + + Lazily register more cudf functions and move to backends file + (:pr:`4396`) Matthew Rocklin + + Fix ORC tests for pyarrow 0.12.0 (:pr:`4413`) Jim Crist + + rearrange_by_column: ensure that shuffle arg defaults to 'disk' + if it's None in dask.config (:pr:`4414`) George Sakkis + + Implement filters for _read_pyarrow (:pr:`4415`) George Sakkis + + Avoid checking against types in is_dataframe_like (:pr:`4418`) + Matthew Rocklin + + Pass username as 'user' when using pyarrow (:pr:`4438`) Roma + Sokolov + * Delayed + + Fix DelayedAttr return value (:pr:`4440`) Matthew Rocklin + * Documentation + + Use SVG for pipeline graphic (:pr:`4406`) John A Kirkham + + Add doctest-modules to py.test documentation (:pr:`4427`) Daniel + Severo + * Core + + Work around psutil 5.5.0 not allowing pickling Process objects + Dimplexion + +------------------------------------------------------------------- +Sun Jan 20 04:50:39 UTC 2019 - Arun Persaud + +- specfile: + * update copyright year + +- update to version 1.1.0: + * Array + + Fix the average function when there is a masked array + (:pr:`4236`) Damien Garaud + + Add allow_unknown_chunksizes to hstack and vstack (:pr:`4287`) + Paul Vecchio + + Fix tensordot for 27+ dimensions (:pr:`4304`) Johnnie Gray + + Fixed block_info with axes. (:pr:`4301`) Tom Augspurger + + Use safe_wraps for matmul (:pr:`4346`) Mark Harfouche + + Use chunks="auto" in array creation routines (:pr:`4354`) + Matthew Rocklin + + Fix np.matmul in dask.array.Array.__array_ufunc__ (:pr:`4363`) + Stephan Hoyer + + COMPAT: Re-enable multifield copy->view change (:pr:`4357`) + Diane Trout + + Calling np.dtype on a delayed object works (:pr:`4387`) Jim + Crist + + Rework normalize_array for numpy data (:pr:`4312`) Marco Neumann + * DataFrame + + Add fill_value support for series comparisons (:pr:`4250`) James + Bourbeau + + Add schema name in read_sql_table for empty tables (:pr:`4268`) + Mina Farid + + Adjust check for bad chunks in map_blocks (:pr:`4308`) Tom + Augspurger + + Add dask.dataframe.read_fwf (:pr:`4316`) @slnguyen + + Use atop fusion in dask dataframe (:pr:`4229`) Matthew Rocklin + + Use parallel_types(`) in from_pandas (:pr:`4331`) Matthew + Rocklin + + Change DataFrame._repr_data to method (:pr:`4330`) Matthew + Rocklin + + Install pyarrow fastparquet for Appveyor (:pr:`4338`) Gábor + Lipták + + Remove explicit pandas checks and provide cudf lazy registration + (:pr:`4359`) Matthew Rocklin + + Replace isinstance(..., pandas`) with is_dataframe_like + (:pr:`4375`) Matthew Rocklin + + ENH: Support 3rd-party ExtensionArrays (:pr:`4379`) Tom + Augspurger + + Pandas 0.24.0 compat (:pr:`4374`) Tom Augspurger + * Documentation + + Fix link to 'map_blocks' function in array api docs (:pr:`4258`) + David Hoese + + Add a paragraph on Dask-Yarn in the cloud docs (:pr:`4260`) Jim + Crist + + Copy edit documentation (:pr:`4267), (:pr:`4263`), (:pr:`4262`), + (:pr:`4277`), (:pr:`4271`), (:pr:`4279), (:pr:`4265`), + (:pr:`4295`), (:pr:`4293`), (:pr:`4296`), (:pr:`4302`), + (:pr:`4306`), (:pr:`4318`), (:pr:`4314`), (:pr:`4309`), + (:pr:`4317`), (:pr:`4326`), (:pr:`4325`), (:pr:`4322`), + (:pr:`4332`), (:pr:`4333`), Miguel Farrajota + + Fix typo in code example (:pr:`4272`) Daniel Li + + Doc: Update array-api.rst (:pr:`4259`) (:pr:`4282`) Prabakaran + Kumaresshan + + Update hpc doc (:pr:`4266`) Guillaume Eynard-Bontemps + + Doc: Replace from_avro with read_avro in documents (:pr:`4313`) + Prabakaran Kumaresshan + + Remove reference to "get" scheduler functions in docs + (:pr:`4350`) Matthew Rocklin + + Fix typo in docstring (:pr:`4376`) Daniel Saxton + + Added documentation for dask.dataframe.merge (:pr:`4382`) + Jendrik Jördening + * Core + + Avoid recursion in dask.core.get (:pr:`4219`) Matthew Rocklin + + Remove verbose flag from pytest setup.cfg (:pr:`4281`) Matthew + Rocklin + + Support Pytest 4.0 by specifying marks explicitly (:pr:`4280`) + Takahiro Kojima + + Add High Level Graphs (:pr:`4092`) Matthew Rocklin + + Fix SerializableLock locked and acquire methods (:pr:`4294`) + Stephan Hoyer + + Pin boto3 to earlier version in tests to avoid moto conflict + (:pr:`4276`) Martin Durant + + Treat None as missing in config when updating (:pr:`4324`) + Matthew Rocklin + + Update Appveyor to Python 3.6 (:pr:`4337`) Gábor Lipták + + Use parse_bytes more liberally in dask.dataframe/bytes/bag + (:pr:`4339`) Matthew Rocklin + + Add a better error message when cloudpickle is missing + (:pr:`4342`) Mark Harfouche + + Support pool= keyword argument in threaded/multiprocessing get + functions (:pr:`4351`) Matthew Rocklin + + Allow updates from arbitrary Mappings in config.update, not only + dicts. (:pr:`4356`) Stuart Berg + + Move dask/array/top.py code to dask/blockwise.py (:pr:`4348`) + Matthew Rocklin + + Add has_parallel_type (:pr:`4395`) Matthew Rocklin + + CI: Update Appveyor (:pr:`4381`) Tom Augspurger + + Ignore non-readable config files (:pr:`4388`) Jim Crist + +------------------------------------------------------------------- +Sat Dec 1 18:36:31 UTC 2018 - Arun Persaud + +- update to version 1.0.0: + * Array + + Add nancumsum/nancumprod unit tests (:pr:`4215`) Guido Imperiale + * DataFrame + + Add index to to_dask_dataframe docstring (:pr:`4232`) James + Bourbeau + + Text and fix when appending categoricals with fastparquet + (:pr:`4245`) Martin Durant + + Don't reread metadata when passing ParquetFile to read_parquet + (:pr:`4247`) Martin Durant + * Documentation + + Copy edit documentation (:pr:`4222`) (:pr:`4224`) (:pr:`4228`) + (:pr:`4231`) (:pr:`4230`) (:pr:`4234`) (:pr:`4235`) (:pr:`4254`) + Miguel Farrajota + + Updated doc for the new scheduler keyword (:pr:`4251`) @milesial + * Core + + Avoid a few warnings (:pr:`4223`) Matthew Rocklin + + Remove dask.store module (:pr:`4221`) Matthew Rocklin + + Remove AUTHORS.md Jim Crist + +------------------------------------------------------------------- +Thu Nov 22 22:46:17 UTC 2018 - Arun Persaud + +- update to version 0.20.2: + * Array + + Avoid fusing dependencies of atop reductions (:pr:`4207`) + Matthew Rocklin + * Dataframe + + Improve memory footprint for dataframe correlation (:pr:`4193`) + Damien Garaud + + Add empty DataFrame check to boundary_slice (:pr:`4212`) James + Bourbeau + * Documentation + + Copy edit documentation (:pr:`4197`) (:pr:`4204`) (:pr:`4198`) + (:pr:`4199`) (:pr:`4200`) (:pr:`4202`) (:pr:`4209`) Miguel + Farrajota + + Add stats module namespace (:pr:`4206`) James Bourbeau + + Fix link in dataframe documentation (:pr:`4208`) James Bourbeau + +------------------------------------------------------------------- +Mon Nov 12 05:54:54 UTC 2018 - Arun Persaud + +- update to version 0.20.1: + * Array + + Only allocate the result space in wrapped_pad_func (:pr:`4153`) + John A Kirkham + + Generalize expand_pad_width to expand_pad_value (:pr:`4150`) + John A Kirkham + + Test da.pad with 2D linear_ramp case (:pr:`4162`) John A Kirkham + + Fix import for broadcast_to. (:pr:`4168`) samc0de + + Rewrite Dask Array's pad to add only new chunks (:pr:`4152`) + John A Kirkham + + Validate index inputs to atop (:pr:`4182`) Matthew Rocklin + * Core + + Dask.config set and get normalize underscores and hyphens + (:pr:`4143`) James Bourbeau + + Only subs on core collections, not subclasses (:pr:`4159`) + Matthew Rocklin + + Add block_size=0 option to HTTPFileSystem. (:pr:`4171`) Martin + Durant + + Add traverse support for dataclasses (:pr:`4165`) Armin Berres + + Avoid optimization on sharedicts without dependencies + (:pr:`4181`) Matthew Rocklin + + Update the pytest version for TravisCI (:pr:`4189`) Damien + Garaud + + Use key_split rather than funcname in visualize names + (:pr:`4160`) Matthew Rocklin + * Dataframe + + Add fix for DataFrame.__setitem__ for index (:pr:`4151`) + Anderson Banihirwe + + Fix column choice when passing list of files to fastparquet + (:pr:`4174`) Martin Durant + + Pass engine_kwargs from read_sql_table to sqlalchemy + (:pr:`4187`) Damien Garaud + * Documentation + + Fix documentation in Delayed best practices example that + returned an empty list (:pr:`4147`) Jonathan Fraine + + Copy edit documentation (:pr:`4164`) (:pr:`4175`) (:pr:`4185`) + (:pr:`4192`) (:pr:`4191`) (:pr:`4190`) (:pr:`4180`) Miguel + Farrajota + + Fix typo in docstring (:pr:`4183`) Carlos Valiente + +------------------------------------------------------------------- +Tue Oct 30 03:04:38 UTC 2018 - Arun Persaud + +- update to version 0.20.0: + * Array + + Fuse Atop operations (:pr:`3998`), (:pr:`4081`) Matthew Rocklin + + Support da.asanyarray on dask dataframes (:pr:`4080`) Matthew + Rocklin + + Remove unnecessary endianness check in datetime test + (:pr:`4113`) Elliott Sales de Andrade + + Set name=False in array foo_like functions (:pr:`4116`) Matthew + Rocklin + + Remove dask.array.ghost module (:pr:`4121`) Matthew Rocklin + + Fix use of getargspec in dask array (:pr:`4125`) Stephan Hoyer + + Adds dask.array.invert (:pr:`4127`), (:pr:`4131`) Anderson + Banihirwe + + Raise informative error on arg-reduction on unknown chunksize + (:pr:`4128`), (:pr:`4135`) Matthew Rocklin + + Normalize reversed slices in dask array (:pr:`4126`) Matthew + Rocklin + * Bag + + Add bag.to_avro (:pr:`4076`) Martin Durant + * Core + + Pull num_workers from config.get (:pr:`4086`), (:pr:`4093`) + James Bourbeau + + Fix invalid escape sequences with raw strings (:pr:`4112`) + Elliott Sales de Andrade + + Raise an error on the use of the get= keyword and set_options + (:pr:`4077`) Matthew Rocklin + + Add import for Azure DataLake storage, and add docs (:pr:`4132`) + Martin Durant + + Avoid collections.Mapping/Sequence (:pr:`4138`) Matthew Rocklin + * Dataframe + + Include index keyword in to_dask_dataframe (:pr:`4071`) Matthew + Rocklin + + add support for duplicate column names (:pr:`4087`) Jan Koch + + Implement min_count for the DataFrame methods sum and prod + (:pr:`4090`) Bart Broere + + Remove pandas warnings in concat (:pr:`4095`) Matthew Rocklin + + DataFrame.to_csv header option to only output headers in the + first chunk (:pr:`3909`) Rahul Vaidya + + Remove Series.to_parquet (:pr:`4104`) Justin Dennison + + Avoid warnings and deprecated pandas methods (:pr:`4115`) + Matthew Rocklin + + Swap 'old' and 'previous' when reporting append error + (:pr:`4130`) Martin Durant + * Documentation + + Copy edit documentation (:pr:`4073`), (:pr:`4074`), + (:pr:`4094`), (:pr:`4097`), (:pr:`4107`), (:pr:`4124`), + (:pr:`4133`), (:pr:`4139`) Miguel Farrajota + + Fix typo in code example (:pr:`4089`) Antonino Ingargiola + + Add pycon 2018 presentation (:pr:`4102`) Javad + + Quick description for gcsfs (:pr:`4109`) Martin Durant + + Fixed typo in docstrings of read_sql_table method (:pr:`4114`) + TakaakiFuruse + + Make target directories in redirects if they don't exist + (:pr:`4136`) Matthew Rocklin + +------------------------------------------------------------------- +Wed Oct 10 01:49:52 UTC 2018 - Arun Persaud + +- update to version 0.19.4: + * Array + + Implement apply_gufunc(..., axes=..., keepdims=...) (:pr:`3985`) + Markus Gonser + * Bag + + Fix typo in datasets.make_people (:pr:`4069`) Matthew Rocklin + * Dataframe + + Added percentiles options for dask.dataframe.describe method + (:pr:`4067`) Zhenqing Li + + Add DataFrame.partitions accessor similar to Array.blocks + (:pr:`4066`) Matthew Rocklin + * Core + + Pass get functions and Clients through scheduler keyword + (:pr:`4062`) Matthew Rocklin + * Documentation + + Fix Typo on hpc example. (missing = in kwarg). (:pr:`4068`) + Matthias Bussonier + + Extensive copy-editing: (:pr:`4065`), (:pr:`4064`), (:pr:`4063`) + Miguel Farrajota + +------------------------------------------------------------------- +Mon Oct 8 15:01:22 UTC 2018 - Arun Persaud + +- update to version 0.19.3: + * Array + + Make da.RandomState extensible to other modules (:pr:`4041`) + Matthew Rocklin + + Support unknown dims in ravel no-op case (:pr:`4055`) Jim Crist + + Add basic infrastructure for cupy (:pr:`4019`) Matthew Rocklin + + Avoid asarray and lock arguments for from_array(getitem`) + (:pr:`4044`) Matthew Rocklin + + Move local imports in corrcoef to global imports (:pr:`4030`) + John A Kirkham + + Move local indices import to global import (:pr:`4029`) John A + Kirkham + + Fix-up Dask Array's fromfunction w.r.t. dtype and kwargs + (:pr:`4028`) John A Kirkham + + Don't use dummy expansion for trim_internal in overlapped + (:pr:`3964`) Mark Harfouche + + Add unravel_index (:pr:`3958`) John A Kirkham + * Bag + + Sort result in Bag.frequencies (:pr:`4033`) Matthew Rocklin + + Add support for npartitions=1 edge case in groupby (:pr:`4050`) + James Bourbeau + + Add new random dataset for people (:pr:`4018`) Matthew Rocklin + + Improve performance of bag.read_text on small files (:pr:`4013`) + Eric Wolak + + Add bag.read_avro (:pr:`4000`) (:pr:`4007`) Martin Durant + * Dataframe + + Added an index parameter to + :meth:`dask.dataframe.from_dask_array` for creating a dask + DataFrame from a dask Array with a given index. (:pr:`3991`) Tom + Augspurger + + Improve sub-classability of dask dataframe (:pr:`4015`) Matthew + Rocklin + + Fix failing hdfs test [test-hdfs] (:pr:`4046`) Jim Crist + + fuse_subgraphs works without normal fuse (:pr:`4042`) Jim Crist + + Make path for reading many parquet files without prescan + (:pr:`3978`) Martin Durant + + Index in dd.from_dask_array (:pr:`3991`) Tom Augspurger + + Making skiprows accept lists (:pr:`3975`) Julia Signell + + Fail early in fastparquet read for nonexistent column + (:pr:`3989`) Martin Durant + * Core + + Add support for npartitions=1 edge case in groupby (:pr:`4050`) + James Bourbeau + + Automatically wrap large arguments with dask.delayed in + map_blocks/partitions (:pr:`4002`) Matthew Rocklin + + Fuse linear chains of subgraphs (:pr:`3979`) Jim Crist + + Make multiprocessing context configurable (:pr:`3763`) Itamar + Turner-Trauring + * Documentation + + Extensive copy-editing (:pr:`4049`), (:pr:`4034`), (:pr:`4031`), + (:pr:`4020`), (:pr:`4021`), (:pr:`4022`), (:pr:`4023`), + (:pr:`4016`), (:pr:`4017`), (:pr:`4010`), (:pr:`3997`), + (:pr:`3996`), Miguel Farrajota + + Update shuffle method selection docs [skip ci] (:pr:`4048`) + James Bourbeau + + Remove docs/source/examples, point to examples.dask.org + (:pr:`4014`) Matthew Rocklin + + Replace readthedocs links with dask.org (:pr:`4008`) Matthew + Rocklin + + Updates DataFrame.to_hdf docstring for returned values [skip ci] + (:pr:`3992`) James Bourbeau + +------------------------------------------------------------------- +Mon Sep 17 14:54:42 UTC 2018 - Arun Persaud + +- update to version 0.19.2: + * Array + + apply_gufunc implements automatic infer of functions output + dtypes (:pr:`3936`) Markus Gonser + + Fix array histogram range error when array has nans (#3980) + James Bourbeau + + Issue 3937 follow up, int type checks. (#3956) Yu Feng + + from_array: add @martindurant's explaining of how hashing is + done for an array. (#3965) Mark Harfouche + + Support gradient with coordinate (#3949) Keisuke Fujii + * Core + + Fix use of has_keyword with partial in Python 2.7 (#3966) Mark + Harfouche + + Set pyarrow as default for HDFS (#3957) Matthew Rocklin + * Documentation + + Use dask_sphinx_theme (#3963) Matthew Rocklin + + Use JupyterLab in Binder links from main page Matthew Rocklin + + DOC: fixed sphinx syntax (#3960) Tom Augspurger + +------------------------------------------------------------------- +Sat Sep 8 04:33:17 UTC 2018 - Arun Persaud + +- update to version 0.19.1: + * Array + + Don't enforce dtype if result has no dtype (:pr:`3928`) Matthew + Rocklin + + Fix NumPy issubtype deprecation warning (:pr:`3939`) Bruce Merry + + Fix arg reduction tokens to be unique with different arguments + (:pr:`3955`) Tobias de Jong + + Coerce numpy integers to ints in slicing code (:pr:`3944`) Yu + Feng + + Linalg.norm ndim along axis partial fix (:pr:`3933`) Tobias de + Jong + * Dataframe + + Deterministic DataFrame.set_index (:pr:`3867`) George Sakkis + + Fix divisions in read_parquet when dealing with filters #3831 + #3930 (:pr:`3923`) (:pr:`3931`) @andrethrill + + Fixing returning type in categorical.as_known (:pr:`3888`) + Sriharsha Hatwar + + Fix DataFrame.assign for callables (:pr:`3919`) Tom Augspurger + + Include partitions with no width in repartition (:pr:`3941`) + Matthew Rocklin + + Don't constrict stage/k dtype in dataframe shuffle (:pr:`3942`) + Matthew Rocklin + * Documentation + + DOC: Add hint on how to render task graphs horizontally + (:pr:`3922`) Uwe Korn + + Add try-now button to main landing page (:pr:`3924`) Matthew + Rocklin + +------------------------------------------------------------------- +Sun Sep 2 17:00:59 UTC 2018 - arun@gmx.de + +- specfile: + * remove devel from noarch + +- update to version 0.19.0: + * Array + + Fix argtopk split_every bug (:pr:`3810`) Guido Imperiale + + Ensure result computing dask.array.isnull(`) always gives a + numpy array (:pr:`3825`) Stephan Hoyer + + Support concatenate for scipy.sparse in dask array (:pr:`3836`) + Matthew Rocklin + + Fix argtopk on 32-bit systems. (:pr:`3823`) Elliott Sales de + Andrade + + Normalize keys in rechunk (:pr:`3820`) Matthew Rocklin + + Allow shape of dask.array to be a numpy array (:pr:`3844`) Mark + Harfouche + + Fix numpy deprecation warning on tuple indexing (:pr:`3851`) + Tobias de Jong + + Rename ghost module to overlap (:pr:`3830`) `Robert Sare`_ + + Re-add the ghost import to da __init__ (:pr:`3861`) Jim Crist + + Ensure copy preserves masked arrays (:pr:`3852`) Tobias de Jong + * DataFrame + + Added dtype and sparse keywords to + :func:`dask.dataframe.get_dummies` (:pr:`3792`) Tom Augspurger + + Added :meth:`dask.dataframe.to_dask_array` for converting a Dask + Series or DataFrame to a Dask Array, possibly with known chunk + sizes (:pr:`3884`) Tom Augspurger + + Changed the behavior for :meth:`dask.array.asarray` for dask + dataframe and series inputs. Previously, the series was eagerly + converted to an in-memory NumPy array before creating a dask + array with known chunks sizes. This caused unexpectedly high + memory usage. Now, no intermediate NumPy array is created, and a + Dask array with unknown chunk sizes is returned (:pr:`3884`) Tom + Augspurger + + DataFrame.iloc (:pr:`3805`) Tom Augspurger + + When reading multiple paths, expand globs. (:pr:`3828`) Irina + Truong + + Added index column name after resample (:pr:`3833`) Eric + Bonfadini + + Add (lazy) shape property to dataframe and series (:pr:`3212`) + Henrique Ribeiro + + Fix failing hdfs test [test-hdfs] (:pr:`3858`) Jim Crist + + Fixes for pyarrow 0.10.0 release (:pr:`3860`) Jim Crist + + Rename to_csv keys for diagnostics (:pr:`3890`) Matthew Rocklin + + Match pandas warnings for concat sort (:pr:`3897`) Tom + Augspurger + + Include filename in read_csv (:pr:`3908`) Julia Signell + * Core + + Better error message on import when missing common dependencies + (:pr:`3771`) Danilo Horta + + Drop Python 3.4 support (:pr:`3840`) Jim Crist + + Remove expired deprecation warnings (:pr:`3841`) Jim Crist + + Add DASK_ROOT_CONFIG environment variable (:pr:`3849`) `Joe + Hamman`_ + + Don't cull in local scheduler, do cull in delayed (:pr:`3856`) + Jim Crist + + Increase conda download retries (:pr:`3857`) Jim Crist + + Add python_requires and Trove classifiers (:pr:`3855`) @hugovk + + Fix collections.abc deprecation warnings in Python 3.7.0 + (:pr:`3876`) Jan Margeta + + Allow dot jpeg to xfail in visualize tests (:pr:`3896`) Matthew + Rocklin + + Add Python 3.7 to travis.yml (:pr:`3894`) Matthew Rocklin + + Add expand_environment_variables to dask.config (:pr:`3893`) + `Joe Hamman`_ + * Docs + + Fix typo in import statement of diagnostics (:pr:`3826`) John + Mrziglod + + Add link to YARN docs (:pr:`3838`) Jim Crist + + fix of minor typos in landing page index.html (:pr:`3746`) + Christoph Moehl + + Update delayed-custom.rst (:pr:`3850`) Anderson Banihirwe + + DOC: clarify delayed docstring (:pr:`3709`) Scott Sievert + + Add new presentations (:pr:`3880`) @javad94 + + Add dask array normalize_chunks to documentation (:pr:`3878`) + Daniel Rothenberg + + Docs: Fix link to snakeviz (:pr:`3900`) Hans Moritz Günther + + Add missing ` to docstring (:pr:`3915`) @rtobar + +- changes from version 0.18.2: + * Array + + Reimplemented argtopk to make it release the GIL (:pr:`3610`) + Guido Imperiale + + Don't overlap on non-overlapped dimensions in map_overlap + (:pr:`3653`) Matthew Rocklin + + Fix linalg.tsqr for dimensions of uncertain length (:pr:`3662`) + Jeremy Chen + + Break apart uneven array-of-int slicing to separate chunks + (:pr:`3648`) Matthew Rocklin + + Align auto chunks to provided chunks, rather than shape + (:pr:`3679`) Matthew Rocklin + + Adds endpoint and retstep support for linspace (:pr:`3675`) + James Bourbeau + + Implement .blocks accessor (:pr:`3689`) Matthew Rocklin + + Add block_info keyword to map_blocks functions (:pr:`3686`) + Matthew Rocklin + + Slice by dask array of ints (:pr:`3407`) Guido Imperiale + + Support dtype in arange (:pr:`3722`) Guido Imperiale + + Fix argtopk with uneven chunks (:pr:`3720`) Guido Imperiale + + Raise error when replace=False in da.choice (:pr:`3765`) James + Bourbeau + + Update chunks in Array.__setitem__ (:pr:`3767`) Itamar + Turner-Trauring + + Add a chunksize convenience property (:pr:`3777`) Jacob + Tomlinson + + Fix and simplify array slicing behavior when step < 0 + (:pr:`3702`) Ziyao Wei + + Ensure to_zarr with return_stored True returns a Dask Array + (:pr:`3786`) John A Kirkham + * Bag + + Add last_endline optional parameter in to_textfiles (:pr:`3745`) + George Sakkis + * Dataframe + + Add aggregate function for rolling objects (:pr:`3772`) Gerome + Pistre + + Properly tokenize cumulative groupby aggregations (:pr:`3799`) + Cloves Almeida + * Delayed + + Add the @ operator to the delayed objects (:pr:`3691`) Mark + Harfouche + + Add delayed best practices to documentation (:pr:`3737`) Matthew + Rocklin + + Fix @delayed decorator for methods and add tests (:pr:`3757`) + Ziyao Wei + * Core + + Fix extra progressbar (:pr:`3669`) Mike Neish + + Allow tasks back onto ordering stack if they have one dependency + (:pr:`3652`) Matthew Rocklin + + Prefer end-tasks with low numbers of dependencies when ordering + (:pr:`3588`) Tom Augspurger + + Add assert_eq to top-level modules (:pr:`3726`) Matthew Rocklin + + Test that dask collections can hold scipy.sparse arrays + (:pr:`3738`) Matthew Rocklin + + Fix setup of lz4 decompression functions (:pr:`3782`) Elliott + Sales de Andrade + + Add datasets module (:pr:`3780`) Matthew Rocklin + +------------------------------------------------------------------- +Sun Jun 24 01:07:09 UTC 2018 - arun@gmx.de + +- update to version 0.18.1: + * Array + + from_array now supports scalar types and nested lists/tuples in + input, just like all numpy functions do. It also produces a + simpler graph when the input is a plain ndarray (:pr:`3556`) + Guido Imperiale + + Fix slicing of big arrays due to cumsum dtype bug (:pr:`3620`) + Marco Rossi + + Add Dask Array implementation of pad (:pr:`3578`) John A Kirkham + + Fix array random API examples (:pr:`3625`) James Bourbeau + + Add average function to dask array (:pr:`3640`) James Bourbeau + + Tokenize ghost_internal with axes (:pr:`3643`) Matthew Rocklin + + from_array: special handling for ndarray, list, and scalar types + (:pr:`3568`) Guido Imperiale + + Add outer for Dask Arrays (:pr:`3658`) John A Kirkham + * DataFrame + + Add Index.to_series method (:pr:`3613`) Henrique Ribeiro + + Fix missing partition columns in pyarrow-parquet (:pr:`3636`) + Martin Durant + * Core + + Minor tweaks to CI (:pr:`3629`) Guido Imperiale + + Add back dask.utils.effective_get (:pr:`3642`) Matthew Rocklin + + DASK_CONFIG dictates config write location (:pr:`3621`) Jim + Crist + + Replace 'collections' key in unpack_collections with unique key + (:pr:`3632`) Yu Feng + + Avoid deepcopy in dask.config.set (:pr:`3649`) Matthew Rocklin + +- changes from version 0.18.0: + * Array + + Add to/read_zarr for Zarr-format datasets and arrays + (:pr:`3460`) Martin Durant + + Experimental addition of generalized ufunc support, + apply_gufunc, gufunc, and as_gufunc (:pr:`3109`) (:pr:`3526`) + (:pr:`3539`) Markus Gonser + + Avoid unnecessary rechunking tasks (:pr:`3529`) Matthew Rocklin + + Compute dtypes at runtime for fft (:pr:`3511`) Matthew Rocklin + + Generate UUIDs for all da.store operations (:pr:`3540`) Martin + Durant + + Correct internal dimension of Dask's SVD (:pr:`3517`) John A + Kirkham + + BUG: do not raise IndexError for identity slice in array.vindex + (:pr:`3559`) Scott Sievert + + Adds isneginf and isposinf (:pr:`3581`) John A Kirkham + + Drop Dask Array's learn module (:pr:`3580`) John A Kirkham + + added sfqr (short-and-fat) as a counterpart to tsqr… + (:pr:`3575`) Jeremy Chen + + Allow 0-width chunks in dask.array.rechunk (:pr:`3591`) Marc + Pfister + + Document Dask Array's nan_to_num in public API (:pr:`3599`) John + A Kirkham + + Show block example (:pr:`3601`) John A Kirkham + + Replace token= keyword with name= in map_blocks (:pr:`3597`) + Matthew Rocklin + + Disable locking in to_zarr (needed for using to_zarr in a + distributed context) (:pr:`3607`) John A Kirkham + + Support Zarr Arrays in to_zarr/from_zarr (:pr:`3561`) John A + Kirkham + + Added recursion to array/linalg/tsqr to better manage the single + core bottleneck (:pr:`3586`) `Jeremy Chan`_ + * Dataframe + + Add to/read_json (:pr:`3494`) Martin Durant + + Adds index to unsupported arguments for DataFrame.rename method + (:pr:`3522`) James Bourbeau + + Adds support to subset Dask DataFrame columns using + numpy.ndarray, pandas.Series, and pandas.Index objects + (:pr:`3536`) James Bourbeau + + Raise error if meta columns do not match dataframe (:pr:`3485`) + Christopher Ren + + Add index to unsupprted argument for DataFrame.rename + (:pr:`3522`) James Bourbeau + + Adds support for subsetting DataFrames with pandas Index/Series + and numpy ndarrays (:pr:`3536`) James Bourbeau + + Dataframe sample method docstring fix (:pr:`3566`) James + Bourbeau + + fixes dd.read_json to infer file compression (:pr:`3594`) Matt + Lee + + Adds n to sample method (:pr:`3606`) James Bourbeau + + Add fastparquet ParquetFile object support (:pr:`3573`) + @andrethrill + * Bag + + Rename method= keyword to shuffle= in bag.groupby (:pr:`3470`) + Matthew Rocklin + * Core + + Replace get= keyword with scheduler= keyword (:pr:`3448`) + Matthew Rocklin + + Add centralized dask.config module to handle configuration for + all Dask subprojects (:pr:`3432`) (:pr:`3513`) (:pr:`3520`) + Matthew Rocklin + + Add dask-ssh CLI Options and Description. (:pr:`3476`) @beomi + + Read whole files fix regardless of header for HTTP (:pr:`3496`) + Martin Durant + + Adds synchronous scheduler syntax to debugging docs (:pr:`3509`) + James Bourbeau + + Replace dask.set_options with dask.config.set (:pr:`3502`) + Matthew Rocklin + + Update sphinx readthedocs-theme (:pr:`3516`) Matthew Rocklin + + Introduce "auto" value for normalize_chunks (:pr:`3507`) Matthew + Rocklin + + Fix check in configuration with env=None (:pr:`3562`) Simon + Perkins + + Update sizeof definitions (:pr:`3582`) Matthew Rocklin + + Remove --verbose flag from travis-ci (:pr:`3477`) Matthew + Rocklin + + Remove "da.random" from random array keys (:pr:`3604`) Matthew + Rocklin + +------------------------------------------------------------------- +Mon May 21 03:57:53 UTC 2018 - arun@gmx.de + +- update to version 0.17.5: + * Compatibility with pandas 0.23.0 (:pr:`3499`) Tom Augspurger + +------------------------------------------------------------------- +Sun May 6 05:33:50 UTC 2018 - arun@gmx.de + +- update to version 0.17.4: + * Dataframe + + Add support for indexing Dask DataFrames with string subclasses + (:pr:`3461`) James Bourbeau + + Allow using both sorted_index and chunksize in read_hdf + (:pr:`3463`) Pierre Bartet + + Pass filesystem to arrow piece reader (:pr:`3466`) Martin Durant + + Switches to using dask.compat string_types (#3462) James + Bourbeau + +- changes from version 0.17.3: + * Array + + Add einsum for Dask Arrays (:pr:`3412`) Simon Perkins + + Add piecewise for Dask Arrays (:pr:`3350`) John A Kirkham + + Fix handling of nan in broadcast_shapes (:pr:`3356`) John A + Kirkham + + Add isin for dask arrays (:pr:`3363`). Stephan Hoyer + + Overhauled topk for Dask Arrays: faster algorithm, particularly + for large k's; added support for multiple axes, recursive + aggregation, and an option to pick the bottom k elements + instead. (:pr:`3395`) Guido Imperiale + + The topk API has changed from topk(k, array) to the more + conventional topk(array, k). The legacy API still works but is + now deprecated. (:pr:`2965`) Guido Imperiale + + New function argtopk for Dask Arrays (:pr:`3396`) Guido + Imperiale + + Fix handling partial depth and boundary in map_overlap + (:pr:`3445`) John A Kirkham + + Add gradient for Dask Arrays (:pr:`3434`) John A Kirkham + * DataFrame + + Allow t as shorthand for table in to_hdf for pandas + compatibility (:pr:`3330`) Jörg Dietrich + + Added top level isna method for Dask DataFrames (:pr:`3294`) + Christopher Ren + + Fix selection on partition column on read_parquet for + engine="pyarrow" (:pr:`3207`) Uwe Korn + + Added DataFrame.squeeze method (:pr:`3366`) Christopher Ren + + Added infer_divisions option to read_parquet to specify whether + read engines should compute divisions (:pr:`3387`) Jon Mease + + Added support for inferring division for engine="pyarrow" + (:pr:`3387`) Jon Mease + + Provide more informative error message for meta= errors + (:pr:`3343`) Matthew Rocklin + + add orc reader (:pr:`3284`) Martin Durant + + Default compression for parquet now always Snappy, in line with + pandas (:pr:`3373`) Martin Durant + + Fixed bug in Dask DataFrame and Series comparisons with NumPy + scalars (:pr:`3436`) James Bourbeau + + Remove outdated requirement from repartition docstring + (:pr:`3440`) Jörg Dietrich + + Fixed bug in aggregation when only a Series is selected + (:pr:`3446`) Jörg Dietrich + + Add default values to make_timeseries (:pr:`3421`) Matthew + Rocklin + * Core + + Support traversing collections in persist, visualize, and + optimize (:pr:`3410`) Jim Crist + + Add schedule= keyword to compute and persist. This replaces + common use of the get= keyword (:pr:`3448`) Matthew Rocklin + +------------------------------------------------------------------- +Sat Mar 24 18:48:24 UTC 2018 - arun@gmx.de + +- update to version 0.17.2: + * Array + + Add broadcast_arrays for Dask Arrays (:pr:`3217`) John A Kirkham + + Add bitwise_* ufuncs (:pr:`3219`) John A Kirkham + + Add optional axis argument to squeeze (:pr:`3261`) John A + Kirkham + + Validate inputs to atop (:pr:`3307`) Matthew Rocklin + + Avoid calls to astype in concatenate if all parts have the same + dtype (:pr:`3301`) `Martin Durant`_ + * DataFrame + + Fixed bug in shuffle due to aggressive truncation (:pr:`3201`) + Matthew Rocklin + + Support specifying categorical columns on read_parquet with + categories=[…] for engine="pyarrow" (:pr:`3177`) Uwe Korn + + Add dd.tseries.Resampler.agg (:pr:`3202`) Richard Postelnik + + Support operations that mix dataframes and arrays (:pr:`3230`) + Matthew Rocklin + + Support extra Scalar and Delayed args in + dd.groupby._Groupby.apply (:pr:`3256`) Gabriele Lanaro + * Bag + + Support joining against single-partitioned bags and delayed + objects (:pr:`3254`) Matthew Rocklin + * Core + + Fixed bug when using unexpected but hashable types for keys + (:pr:`3238`) Daniel Collins + + Fix bug in task ordering so that we break ties consistently with + the key name (:pr:`3271`) Matthew Rocklin + + Avoid sorting tasks in order when the number of tasks is very + large (:pr:`3298`) Matthew Rocklin + +------------------------------------------------------------------- +Fri Mar 2 19:52:06 UTC 2018 - sebix+novell.com@sebix.at + +- correctly package bytecode +- use %license macro + +------------------------------------------------------------------- +Fri Feb 23 03:52:52 UTC 2018 - arun@gmx.de + +- update to version 0.17.1: + * Array + + Corrected dimension chunking in indices (:issue:`3166`, + :pr:`3167`) Simon Perkins + + Inline store_chunk calls for store's return_stored option + (:pr:`3153`) John A Kirkham + + Compatibility with struct dtypes for NumPy 1.14.1 release + (:pr:`3187`) Matthew Rocklin + * DataFrame + + Bugfix to allow column assignment of pandas + datetimes(:pr:`3164`) Max Epstein + * Core + + New file-system for HTTP(S), allowing direct loading from + specific URLs (:pr:`3160`) `Martin Durant`_ + + Fix bug when tokenizing partials with no keywords (:pr:`3191`) + Matthew Rocklin + + Use more recent LZ4 API (:pr:`3157`) `Thrasibule`_ + + Introduce output stream parameter for progress bar (:pr:`3185`) + `Dieter Weber`_ + +------------------------------------------------------------------- +Sat Feb 10 17:26:43 UTC 2018 - arun@gmx.de + +- update to version 0.17.0: + * Array + + Added a support object-type arrays for nansum, nanmin, and + nanmax (:issue:`3133`) Keisuke Fujii + + Update error handling when len is called with empty chunks + (:issue:`3058`) Xander Johnson + + Fixes a metadata bug with store's return_stored option + (:pr:`3064`) John A Kirkham + + Fix a bug in optimization.fuse_slice to properly handle when + first input is None (:pr:`3076`) James Bourbeau + + Support arrays with unknown chunk sizes in percentile + (:pr:`3107`) Matthew Rocklin + + Tokenize scipy.sparse arrays and np.matrix (:pr:`3060`) Roman + Yurchak + * DataFrame + + Support month timedeltas in repartition(freq=...) (:pr:`3110`) + Matthew Rocklin + + Avoid mutation in dataframe groupby tests (:pr:`3118`) Matthew + Rocklin + + read_csv, read_table, and read_parquet accept iterables of paths + (:pr:`3124`) Jim Crist + + Deprecates the dd.to_delayed function in favor of the existing + method (:pr:`3126`) Jim Crist + + Return dask.arrays from df.map_partitions calls when the UDF + returns a numpy array (:pr:`3147`) Matthew Rocklin + + Change handling of columns and index in dd.read_parquet to be + more consistent, especially in handling of multi-indices + (:pr:`3149`) Jim Crist + + fastparquet append=True allowed to create new dataset + (:pr:`3097`) `Martin Durant`_ + + dtype rationalization for sql queries (:pr:`3100`) `Martin + Durant`_ + * Bag + + Document bag.map_paritions function may recieve either a list or + generator. (:pr:`3150`) Nir + * Core + + Change default task ordering to prefer nodes with few dependents + and then many downstream dependencies (:pr:`3056`) Matthew + Rocklin + + Add color= option to visualize to color by task order + (:pr:`3057`) (:pr:`3122`) Matthew Rocklin + + Deprecate dask.bytes.open_text_files (:pr:`3077`) Jim Crist + + Remove short-circuit hdfs reads handling due to maintenance + costs. May be re-added in a more robust manner later + (:pr:`3079`) Jim Crist + + Add dask.base.optimize for optimizing multiple collections + without computing. (:pr:`3071`) Jim Crist + + Rename dask.optimize module to dask.optimization (:pr:`3071`) + Jim Crist + + Change task ordering to do a full traversal (:pr:`3066`) Matthew + Rocklin + + Adds an optimize_graph keyword to all to_delayed methods to + allow controlling whether optimizations occur on + conversion. (:pr:`3126`) Jim Crist + + Support using pyarrow for hdfs integration (:pr:`3123`) Jim + Crist + + Move HDFS integration and tests into dask repo (:pr:`3083`) Jim + Crist + + Remove write_bytes (:pr:`3116`) Jim Crist + +------------------------------------------------------------------- +Thu Jan 11 23:56:36 UTC 2018 - arun@gmx.de + +- specfile: + * update copyright year + +- update to version 0.16.1: + * Array + + Fix handling of scalar percentile values in "percentile" + (:pr:`3021`) `James Bourbeau`_ + + Prevent "bool()" coercion from calling compute (:pr:`2958`) + `Albert DeFusco`_ + + Add "matmul" (:pr:`2904`) `John A Kirkham`_ + + Support N-D arrays with "matmul" (:pr:`2909`) `John A Kirkham`_ + + Add "vdot" (:pr:`2910`) `John A Kirkham`_ + + Explicit "chunks" argument for "broadcast_to" (:pr:`2943`) + `Stephan Hoyer`_ + + Add "meshgrid" (:pr:`2938`) `John A Kirkham`_ and (:pr:`3001`) + `Markus Gonser`_ + + Preserve singleton chunks in "fftshift"/"ifftshift" (:pr:`2733`) + `John A Kirkham`_ + + Fix handling of negative indexes in "vindex" and raise errors + for out of bounds indexes (:pr:`2967`) `Stephan Hoyer`_ + + Add "flip", "flipud", "fliplr" (:pr:`2954`) `John A Kirkham`_ + + Add "float_power" ufunc (:pr:`2962`) (:pr:`2969`) `John A + Kirkham`_ + + Compatability for changes to structured arrays in the upcoming + NumPy 1.14 release (:pr:`2964`) `Tom Augspurger`_ + + Add "block" (:pr:`2650`) `John A Kirkham`_ + + Add "frompyfunc" (:pr:`3030`) `Jim Crist`_ + * DataFrame + + Fixed naming bug in cumulative aggregations (:issue:`3037`) + `Martijn Arts`_ + + Fixed "dd.read_csv" when "names" is given but "header" is not + set to "None" (:issue:`2976`) `Martijn Arts`_ + + Fixed "dd.read_csv" so that passing instances of + "CategoricalDtype" in "dtype" will result in known categoricals + (:pr:`2997`) `Tom Augspurger`_ + + Prevent "bool()" coercion from calling compute (:pr:`2958`) + `Albert DeFusco`_ + + "DataFrame.read_sql()" (:pr:`2928`) to an empty database tables + returns an empty dask dataframe `Apostolos Vlachopoulos`_ + + Compatability for reading Parquet files written by PyArrow 0.8.0 + (:pr:`2973`) `Tom Augspurger`_ + + Correctly handle the column name (`df.columns.name`) when + reading in "dd.read_parquet" (:pr:2973`) `Tom Augspurger`_ + + Fixed "dd.concat" losing the index dtype when the data contained + a categorical (:issue:`2932`) `Tom Augspurger`_ + + Add "dd.Series.rename" (:pr:`3027`) `Jim Crist`_ + + "DataFrame.merge()" (:pr:`2960`) now supports merging on a + combination of columns and the index `Jon Mease`_ + + Removed the deprecated "dd.rolling*" methods, in preperation for + their removal in the next pandas release (:pr:`2995`) `Tom + Augspurger`_ + + Fix metadata inference bug in which single-partition series were + mistakenly special cased (:pr:`3035`) `Jim Crist`_ + + Add support for "Series.str.cat" (:pr:`3028`) `Jim Crist`_ + * Core + + Improve 32-bit compatibility (:pr:`2937`) `Matthew Rocklin`_ + + Change task prioritization to avoid upwards branching + (:pr:`3017`) `Matthew Rocklin`_ + +------------------------------------------------------------------- +Sun Nov 19 05:11:59 UTC 2017 - arun@gmx.de + +- update to version 0.16.0: + * Fix install of fastparquet on travis (#2897) + * Fix port for bokeh dashboard (#2889) + * fix hdfs3 version + * Modify hdfs import to point to hdfs3 (#2894) + * Explicitly pass in pyarrow filesystem for parquet (#2881) + * COMPAT: Ensure lists for multiple groupby keys (#2892) + * Avoid list index error in repartition_freq (#2873) + * Finish moving `infer_storage_options` (#2886) + * Support arrow in `to_parquet`. Several other parquet + cleanups. (#2868) + * Bugfix: Filesystem object not passed to pyarrow reader (#2527) + * Fix py34 build + * Fixup s3 tests (#2875) + * Close resource profiler process on __exit__ (#2871) + * Add changelog for to_parquet changes. [ci skip] + * A few parquet cleanups (#2867) + * Fixed fillna with Series (#2810) + * Error nicely on parse dates failure in read_csv (#2863) + * Fix empty dataframe partitioning for numpy 1.10.4 (#2862) + * Test `unique`'s inverse mapping's shape (#2857) + * Move `thread_state` out of the top namespace (#2858) + * Explain unique's steps (#2856) + * fix and test for issue #2811 (#2818) + * Minor tweaks to `_unique_internal` optional result handling + (#2855) + * Update dask interface during XArray integration (#2847) + * Remove unnecessary map_partitions in aggregate (#2712) + * Simplify `_unique_internal` (#2850) + * Add more tests for read_parquet(engine='pyarrow') (#2822) + * Do not raise exception when calling set_index on empty dataframe + #2819 (#2827) + * Test unique on more data (#2846) + * Do not except on set_index on text column with empty partitions + #2820 (#2831) + * Compat for bokeh 0.12.10 (#2844) + * Support `return_*` arguments with `unique` (#2779) + * Fix installing of pandas dev (#2838) + * Squash a few warnings in dask.array (#2833) + * Array optimizations don't elide some getter calls (#2826) + * test against pandas rc (#2814) + * df.astype(categorical_dtype) -> known categoricals (#2835) + * Fix cloudpickle test (#2836) + * BUG: Quantile with missing data (#2791) + * API: remove dask.async (#2828) + * Adds comma to flake8 section in setup.cfg (#2817) + * Adds asarray and asanyarray to the dask.array public API (#2787) + * flake8 now checks bare excepts (#2816) + * CI: Update for new flake8 / pycodestyle (#2808) + * Fix concat series bug (#2800) + * Typo in the docstring of read_parquet's filters param (#2806) + * Docs update (#2803) + * minor doc changes in bag.core (#2797) + * da.random.choice works with array args (#2781) + * Support broadcasting 0-length dimensions (#2784) + * ResourceProfiler plot works with single point (#2778) + * Implement Dask Array's unique to be lazy (#2775) + * Dask Collection Interface + * Reduce test memory usage (#2782) + * Deprecate vnorm (#2773) + * add auto-import of gcsfs (#2776) + * Add allclose (#2771) + * Remove `random.different_seeds` from API docs (#2772) + * Follow-up for atleast_nd (#2765) + * Use get_worker().client.get if available (#2762) + * Link PR for "Allow tuples as sharedict keys" (#2766) + * Allow tuples as sharedict keys (#2763) + * update docs to use flatten vs concat (#2764) + * Add atleast_nd functions (#2760) + * Consolidate changelog for 0.15.4 (#2759) + * Add changelog template for future date (#2758) + +------------------------------------------------------------------- +Mon Oct 30 06:16:22 UTC 2017 - arun@gmx.de + +- update to version 0.15.4: + * Drop s3fs requirement (#2750) + * Support -1 as an alias for dimension size in chunks (#2749) + * Handle zero dimension when rechunking (#2747) + * Pandas 0.21 compatability (#2737) + * API: Add `.str` accessor for Categorical with object dtype (#2743) + * Fix install failures + * Reduce memory usage + * A few test cleanups + * Fix #2720 (#2729) + * Pass on file_scheme to fastparquet (#2714) + * Support indexing with np.int (#2719) + * Tree reduction support for dask.bag.Bag.foldby (#2710) + * Update link to IPython parallel docs (#2715) + * Call mkdir from correct namespace in array.to_npy_stack. (#2709) + * add int96 times to parquet writer (#2711) + +------------------------------------------------------------------- +Sun Sep 24 21:28:49 UTC 2017 - arun@gmx.de + +- update to version 0.15.3: + * add .github/PULL_REQUEST_TEMPLATE.md file + * Make `y` optional in dask.array.learn (#2701) + * Add apply_over_axes (#2702) + * Use apply_along_axis name in Dask (#2704) + * Tweak apply_along_axis's pre-NumPy 1.13.0 error (#2703) + * Add apply_along_axis (#2698) + * Use travis conditional builds (#2697) + * Skip days in daily_stock that have nan values (#2693) + * TST: Have array assert_eq check scalars (#2681) + * Add schema keyword to read_sql (#2582) + * Only install pytest-runner if needed (#2692) + * Remove resize tool from bokeh plots (#2688) + * Add ptp (#2691) + * Catch warning from numpy in subs (#2457) + * Publish Series methods in dataframe api (#2686) + * Fix norm keepdims (#2683) + * Dask array slicing with boolean arrays (#2658) + * repartition works with mixed categoricals (#2676) + * Merge pull request #2667 from martindurant/parquet_file_schema + * Fix for parquet file schemes + * Optional axis argument for cumulative functions (#2664) + * Remove partial_by_order + * Support literals in atop + * [ci skip] Add flake8 note in developer doc page (#2662) + * Add filenames return for ddf.to_csv and bag.to_textfiles as they + both… (#2655) + * CLN: Remove redundant code, fix typos (#2652) + * [docs] company name change from Continuum to Anaconda (#2660) + * Fix what hapend when combining partition_on and append in + to_parquet (#2645) + * WIP: Add user defined aggregations (#2344) + * [docs] new cheatsheet (#2649) + * Masked arrays (#2301) + * Indexing with an unsigned integer array (#2647) + * ENH: Allow the groupby by param to handle columns and index levels + (#2636) + * update copyright date (#2642) + * python setup.py test runs py.test (#2641) + * Avoid using operator.itemgetter in dask.dataframe (#2638) + * Add `*_like` array creation functions (#2640) + * Consistent slicing names (#2601) + * Replace Continuum Analytics with Anaconda Inc. (#2631) + * Implement Series.str[index] (#2634) + * Support complex data with vnorm (#2621) + +- changes from version 0.15.2: + * BUG: setitem should update divisions (#2622) + * Allow dataframe.loc with numpy array (#2615) + * Add link to Stack Overflow's mcve docpage to support docs (#2612) + * Improve dtype inference and reflection (#2571) + * Add ediff1d (#2609) + * Optimize concatenate on singleton sequences (#2610) + * Add diff (#2607) + * Document norm in Dask Array API (#2605) + * Add norm (#2597) + * Don't check for memory leaks in distributed tests (#2603) + * Include computed collection within sharedict in delayed (#2583) + * Reorg array (#2595) + * Remove `expand` parameter from df.str.split (#2593) + * Normalize `meta` on call to `dd.from_delayed` (#2591) + * Remove bare `except:` blocks and test that none exist. (#2590) + * Adds choose method to dask.array.Array (#2584) + * Generalize vindex in dask.array (#2573) + * Clear `_cached_keys` on name change in dask.array (#2572) + * Don't render None for unknown divisions (#2570) + * Add missing initialization to CacheProfiler (#2550) + * Add argwhere, *nonzero, where (cond) (#2539) + * Fix indices error message (#2565) + * Fix and secure some references (#2563) + * Allows for read_hdf to accept an iterable of files (#2547) + * Allow split on rechunk on first pass (#2560) + * Improvements to dask.array.where (#2549) + * Adds isin method to dask.dataframe.DataFrame (#2558) + * Support dask array conditional in compress (#2555) + * Clarify ResourceProfiler docstring [ci skip] (#2553) + * In compress, use Dask to expand condition array (#2545) + * Support compress with axis as None (#2541) + * df.idxmax/df.idxmin work with empty partitions (#2542) + * FIX typo in accumulate docstring (#2552) + * da.where works with non-bool condition (#2543) + * da.repeat works with negative axis (#2544) + * Check metadata in `dd.from_delayed` (#2534) + * TST: clean up test directories in shuffle (#2535) + * Do no attemp to compute divisions on empty dataframe. (#2529) + * Remove deprecated bag behavior (#2525) + * Updates read_hdf docstring (#2518) + * Add dd.to_timedelta (#2523) + * Better error message for read_csv (#2522) + * Remove spurious keys from map_overlap graph (#2520) + * Do not compare x.dim with None in array. (#1847) + * Support concat for categorical MultiIndex (#2514) + * Support for callables in df.assign (#2513) + +------------------------------------------------------------------- +Thu May 4 22:24:37 UTC 2017 - toddrme2178@gmail.com + +- Implement single-spec version +- Update source URL. +- Split classes into own subpackages to lighten base dependencies. +- Update to version 0.15.1 + * Add storage_options to to_textfiles and to_csv (:pr:`2466`) + * Rechunk and simplify rfftfreq (:pr:`2473`), (:pr:`2475`) + * Better support ndarray subclasses (:pr:`2486`) + * Import star in dask.distributed (:pr:`2503`) + * Threadsafe cache handling with tokenization (:pr:`2511`) +- Update to version 0.15.0 + + Array + * Add dask.array.stats submodule (:pr:`2269`) + * Support ``ufunc.outer`` (:pr:`2345`) + * Optimize fancy indexing by reducing graph overhead (:pr:`2333`) (:pr:`2394`) + * Faster array tokenization using alternative hashes (:pr:`2377`) + * Added the matmul ``@`` operator (:pr:`2349`) + * Improved coverage of the ``numpy.fft`` module (:pr:`2320`) (:pr:`2322`) (:pr:`2327`) (:pr:`2323`) + * Support NumPy's ``__array_ufunc__`` protocol (:pr:`2438`) + + Bag + * Fix bug where reductions on bags with no partitions would fail (:pr:`2324`) + * Add broadcasting and variadic ``db.map`` top-level function. Also remove + auto-expansion of tuples as map arguments (:pr:`2339`) + * Rename ``Bag.concat`` to ``Bag.flatten`` (:pr:`2402`) + + DataFrame + * Parquet improvements (:pr:`2277`) (:pr:`2422`) + + Core + * Move dask.async module to dask.local (:pr:`2318`) + * Support callbacks with nested scheduler calls (:pr:`2397`) + * Support pathlib.Path objects as uris (:pr:`2310`) +- Update to version 0.14.3 + + DataFrame + * Pandas 0.20.0 support +- Update to version 0.14.2 + + Array + * Add da.indices (:pr:`2268`), da.tile (:pr:`2153`), da.roll (:pr:`2135`) + * Simultaneously support drop_axis and new_axis in da.map_blocks (:pr:`2264`) + * Rechunk and concatenate work with unknown chunksizes (:pr:`2235`) and (:pr:`2251`) + * Support non-numpy container arrays, notably sparse arrays (:pr:`2234`) + * Tensordot contracts over multiple axes (:pr:`2186`) + * Allow delayed targets in da.store (:pr:`2181`) + * Support interactions against lists and tuples (:pr:`2148`) + * Constructor plugins for debugging (:pr:`2142`) + * Multi-dimensional FFTs (single chunk) (:pr:`2116`) + + Bag + * to_dataframe enforces consistent types (:pr:`2199`) + + DataFrame + * Set_index always fully sorts the index (:pr:`2290`) + * Support compatibility with pandas 0.20.0 (:pr:`2249`), (:pr:`2248`), and (:pr:`2246`) + * Support Arrow Parquet reader (:pr:`2223`) + * Time-based rolling windows (:pr:`2198`) + * Repartition can now create more partitions, not just less (:pr:`2168`) + + Core + * Always use absolute paths when on POSIX file system (:pr:`2263`) + * Support user provided graph optimizations (:pr:`2219`) + * Refactor path handling (:pr:`2207`) + * Improve fusion performance (:pr:`2129`), (:pr:`2131`), and (:pr:`2112`) +- Update to version 0.14.1 + + Array + * Micro-optimize optimizations (:pr:`2058`) + * Change slicing optimizations to avoid fusing raw numpy arrays (:pr:`2075`) + (:pr:`2080`) + * Dask.array operations now work on numpy arrays (:pr:`2079`) + * Reshape now works in a much broader set of cases (:pr:`2089`) + * Support deepcopy python protocol (:pr:`2090`) + * Allow user-provided FFT implementations in ``da.fft`` (:pr:`2093`) + + Bag + + DataFrame + * Fix to_parquet with empty partitions (:pr:`2020`) + * Optional ``npartitions='auto'`` mode in ``set_index`` (:pr:`2025`) + * Optimize shuffle performance (:pr:`2032`) + * Support efficient repartitioning along time windows like + ``repartition(freq='12h')`` (:pr:`2059`) + * Improve speed of categorize (:pr:`2010`) + * Support single-row dataframe arithmetic (:pr:`2085`) + * Automatically avoid shuffle when setting index with a sorted column + (:pr:`2091`) + * Improve handling of integer-na handling in read_csv (:pr:`2098`) + + Delayed + * Repeated attribute access on delayed objects uses the same key (:pr:`2084`) + + Core + * Improve naming of nodes in dot visuals to avoid generic ``apply`` + (:pr:`2070`) + * Ensure that worker processes have different random seeds (:pr:`2094`) +- Update to version 0.14.0 + + Array + * Fix corner cases with zero shape and misaligned values in ``arange`` + * Improve concatenation efficiency (:pr:`1923`) + * Avoid hashing in ``from_array`` if name is provided (:pr:`1972`) + + Bag + * Repartition can now increase number of partitions (:pr:`1934`) + * Fix bugs in some reductions with empty partitions (:pr:`1939`), (:pr:`1950`), + (:pr:`1953`) + + DataFrame + * Support non-uniform categoricals (:pr:`1877`), (:pr:`1930`) + * Groupby cumulative reductions (:pr:`1909`) + * DataFrame.loc indexing now supports lists (:pr:`1913`) + * Improve multi-level groupbys (:pr:`1914`) + * Improved HTML and string repr for DataFrames (:pr:`1637`) + * Parquet append (:pr:`1940`) + * Add ``dd.demo.daily_stock`` function for teaching (:pr:`1992`) + + Delayed + * Add ``traverse=`` keyword to delayed to optionally avoid traversing nested + data structures (:pr:`1899`) + * Support Futures in from_delayed functions (:pr:`1961`) + * Improve serialization of decorated delayed functions (:pr:`1969`) + + Core + * Improve windows path parsing in corner cases (:pr:`1910`) + * Rename tasks when fusing (:pr:`1919`) + * Add top level ``persist`` function (:pr:`1927`) + * Propagate ``errors=`` keyword in byte handling (:pr:`1954`) + * Dask.compute traverses Python collections (:pr:`1975`) + * Structural sharing between graphs in dask.array and dask.delayed (:pr:`1985`) +- Update to version 0.13.0 + + Array + * Mandatory dtypes on dask.array. All operations maintain dtype information + and UDF functions like map_blocks now require a dtype= keyword if it can not + be inferred. (:pr:`1755`) + * Support arrays without known shapes, such as arises when slicing arrays with + arrays or converting dataframes to arrays (:pr:`1838`) + * Support mutation by setting one array with another (:pr:`1840`) + * Tree reductions for covariance and correlations. (:pr:`1758`) + * Add SerializableLock for better use with distributed scheduling (:pr:`1766`) + * Improved atop support (:pr:`1800`) + * Rechunk optimization (:pr:`1737`), (:pr:`1827`) + + Bag + * Avoid wrong results when recomputing the same groupby twice (:pr:`1867`) + + DataFrame + * Add ``map_overlap`` for custom rolling operations (:pr:`1769`) + * Add ``shift`` (:pr:`1773`) + * Add Parquet support (:pr:`1782`) (:pr:`1792`) (:pr:`1810`), (:pr:`1843`), + (:pr:`1859`), (:pr:`1863`) + * Add missing methods combine, abs, autocorr, sem, nsmallest, first, last, + prod, (:pr:`1787`) + * Approximate nunique (:pr:`1807`), (:pr:`1824`) + * Reductions with multiple output partitions (for operations like + drop_duplicates) (:pr:`1808`), (:pr:`1823`) (:pr:`1828`) + * Add delitem and copy to DataFrames, increasing mutation support (:pr:`1858`) + + Delayed + * Changed behaviour for ``delayed(nout=0)`` and ``delayed(nout=1)``: + ``delayed(nout=1)`` does not default to ``out=None`` anymore, and + ``delayed(nout=0)`` is also enabled. I.e. functions with return + tuples of length 1 or 0 can be handled correctly. This is especially + handy, if functions with a variable amount of outputs are wrapped by + ``delayed``. E.g. a trivial example: + ``delayed(lambda *args: args, nout=len(vals))(*vals)`` + + Core + * Refactor core byte ingest (:pr:`1768`), (:pr:`1774`) + * Improve import time (:pr:`1833`) +- update to version 0.12.0: + * update changelog (#1757) + * Avoids spurious warning message in concatenate (#1752) + * CLN: cleanup dd.multi (#1728) + * ENH: da.ufuncs now supports DataFrame/Series (#1669) + * Faster array slicing (#1731) + * Avoid calling list on partitions (#1747) + * Fix slicing error with None and ints (#1743) + * Add da.repeat (#1702) + * ENH: add dd.DataFrame.resample (#1741) + * Unify column names in dd.read_csv (#1740) + * replace empty with random in test to avoid nans + * Update diagnostics plots (#1736) + * Allow atop to change chunk shape (#1716) + * ENH: DataFrame.loc now supports 2d indexing (#1726) + * Correct shape when indexing with Ellipsis and None + * ENH: Add DataFrame.pivot_table (#1729) + * CLN: cleanup DataFrame class handling (#1727) + * ENH: Add DataFrame.combine_first (#1725) + * ENH: Add DataFrame all/any (#1724) + * micro-optimize _deps (#1722) + * A few small tweaks to da.Array.astype (#1721) + * BUG: Fixed metadata lookup failure in Accessor (#1706) + * Support auto-rechunking in stack and concatenate (#1717) + * Forward `get` kwarg in df.to_csv (#1715) + * Add rename support for multi-level columns (#1712) + * Update paid support section + * Add `drop` to reset_index (#1711) + * Cull dask.arrays on slicing (#1709) + * Update dd.read_* functions in docs + * WIP: Feature/dataframe aggregate (implements #1619) (#1678) + * Add da.round (#1708) + * Executor -> Client + * Add support of getitem for multilevel columns (#1697) + * Prepend optimization keywords with name of optimization (#1690) + * Add dd.read_table (#1682) + * Fix dd.pivot_table dtype to be deterministic (#1693) + * da.random with state is consistent across sizes (#1687) + * Remove `raises`, use pytest.raises instead (#1679) + * Remove unnecessary calls to list (#1681) + * Dataframe tree reductions (#1663) + * Add global optimizations to compute (#1675) + * TST: rename dataframe eq to assert_eq (#1674) + * ENH: Add DataFrame/Series.align (#1668) + * CLN: dataframe.io (#1664) + * ENH: Add DataFrame/Series clip_xxx (#1667) + * Clear divisions on single_partitions_merge (#1666) + * ENH: add dd.pivot_table (#1665) + * Typo in `use-cases`? (#1670) + * add distributed follow link doc page + * Dataframe elemwise (#1660) + * Windows file and endline test handling (#1661) + * remove old badges + * Fix #1656: failures when parallel testing (#1657) + * Remove use of multiprocessing.Manager (#1652) (#1653) + * A few fixes for `map_blocks` (#1654) + * Automatically expand chunking in atop (#1644) + * Add AppVeyor configuration (#1648) + * TST: move flake8 to travis script (#1655) + * CLN: Remove unused funcs (#1638) + * Implementing .size and groupby size method (#1627) (#1649) + * Use strides, shape, and offset in memmap tokenize (#1646) + * Validate scalar metadata is scalar (#1642) + * Convert readthedocs links for their .org -> .io migration for + hosted projects (#1639) + * CLN: little cleanup of dd.categorical (#1635) + * Signature of Array.transpose matches numpy (#1632) + * Error nicely when indexing Array with Array (#1629) + * ENH: add DataFrame.get_xtype_counts (#1634) + * PEP8: some fixes (#1633) +- changes from version 0.11.1: + * support uniform index partitions in set_index(sorted) (#1626) + * Groupby works with multiprocessing (#1625) + * Use a nonempty index in _maybe_partial_time_string + * Fix segfault in groupby-var + * Support Pandas 0.19.0 + * Deprecations (#1624) + * work-around for ddf.info() failing because of + https://github.com/pydata/pandas/issues/14368 (#1623) + * .str accessor needs to pass thru both args & kwargs (#1621) + * Ensure dtype is provided in additional tests (#1620) + * coerce rounded numbers to int in dask.array.ghost (#1618) + * Use assert_eq everywhere in dask.array tests (#1617) + * Update documentation (#1606) + * Support new_axes= keyword in atop (#1612) + * pass through node_attr and edge_attr in dot_graph (#1614) + * Add swapaxes to dask array (#1611) + * add clip to Array (#1610) + * Add atop(concatenate=False) keyword argument (#1609) + * Better error message on metadata inference failure (#1598) + * ENH/API: Enhanced Categorical Accessor (#1574) + * PEP8: dataframe fix except E127,E402,E501,E731 (#1601) + * ENH: dd.get_dummies for categorical Series (#1602) + * PEP8: some fixes (#1605) + * Fix da.learn tests for scikit-learn release (#1597) + * Suppress warnings in psutil (#1589) + * avoid more timeseries warnings (#1586) + * Support inplace operators in dataframe (#1585) + * Squash warnings in resample (#1583) + * expand imports for dask.distributed (#1580) + * Add indicator keyword to dd.merge (#1575) + * Error loudly if `nrows` used in read_csv (#1576) + * Add versioneer (#1569) + * Strengthen statement about gitter for developers in docs + * Raise IndexError on out of bounds slice. (#1579) + * ENH: Support Series in read_hdf (#1577) + * COMPAT/API: DataFrame.categorize missing values (#1578) + * Add `pipe` method to dask.dataframe (#1567) + * Sample from `read_bytes` ends on a delimiter (#1571) + * Remove mention of bag join in docs (#1568) + * Tokenize mmap works without filename (#1570) + * String accessor works with indexes (#1561) + * corrected links to documentation from Examples (#1557) + * Use conda-forge channel in travis (#1559) + * add s3fs to travis.yml (#1558) + * ENH: DataFrame.select_dtypes (#1556) + * Improve slicing performance (#1539) + * Check meta in `__init__` of _Frame + * Fix metadata in Series.getitem + * A few changes to `dask.delayed` (#1542) + * Fixed read_hdf example (#1544) + * add section on distributed computing with link to toc + * Fix spelling (#1535) + * Only fuse simple indexing with getarray backends (#1529) + * Deemphasize graphs in docs (#1531) + * Avoid pickle when tokenizing __main__ functions (#1527) + * Add changelog doc going up to dask 0.6.1 (2015-07-23). (#1526) + * update dataframe docs + * update index + * Update to highlight the use of glob based file naming option for + df exports (#1525) + * Add custom docstring to dd.to_csv, mentioning that one file per + partition is written (#1524) + * Run slow tests in Travis for all Python versions, even if coverage + check is disabled. (#1523) + * Unify example doc pages into one (#1520) + * Remove lambda/inner functions in dask.dataframe (#1516) + * Add documentation for dataframe metadata (#1514) + * "dd.map_partitions" works with scalar outputs (#1515) + * meta_nonempty returns types of correct size (#1513) + * add memory use note to tsqr docstring + * Fix slow consistent keyname test (#1510) + * Chunks check (#1504) + * Fix last 'line' in sample; prevents open quotes. (#1495) + * Create new threadpool when operating from thread (#1487) + * Add finalize- prefix to dask.delayed collections + * Move key-split from distributed to dask + * State that delayed values should be lists in bag.from_delayed + (#1490) + * Use lists in db.from_sequence (#1491) + * Implement user defined aggregations (#1483) + * Field access works with non-scalar fields (#1484) +- Update to 0.11.0 + * DataFrames now enforce knowing full metadata (columns, dtypes) + everywhere. Previously we would operate in an ambiguous state + when functions lost dtype information (such as apply). Now all + dataframes always know their dtypes and raise errors asking for + information if they are unable to infer (which they usually + can). Some internal attributes like _pd and _pd_nonempty have + been moved. + * The internals of the distributed scheduler have been refactored + to transition tasks between explicit states. This improves + resilience, reasoning about scheduling, plugin operation, and + logging. It also makes the scheduler code easier to understand + for newcomers. + * Breaking Changes + + The distributed.s3 and distributed.hdfs namespaces are gone. + Use protocols in normal methods like read_text('s3://...' + instead. + + Dask.array.reshape now errs in some cases where previously + it would have create a very large number of tasks +- update to version 0.10.2: + * raise informative error on merge(on=frame) + * Fix crash with -OO Python command line (#1388) + * [WIP] Read hdf partitioned (#1407) + * Add dask.array.digitize. (#1409) + * Adding documentation to create dask DataFrame from HDF5 (#1405) + * Unify shuffle algorithms (#1404) + * dd.read_hdf: clear errors on exceeding row numbers (#1406) + * Rename `get_division` to `get_partition` + * Add nice error messages on import failures + * Use task-based shuffle in hash_joins (#1383) + * Fixed #1381: Reimplemented DataFrame.repartition(npartition=N) so + it doesn't require indexing and just coalesce existing partitions + without shuffling/balancing (#1396) + * Import visualize from dask.diagnostics in docs + * Backport `equal_nans` to older version of numpy + * Improve checks for dtype and shape in dask.array + * Progess bar process should be deamon + * LZMA may not be available in python 3 (#1391) + * dd.to_hdf: multiple files multiprocessing avoid locks (#1384) + * dir works with numeric column names + * Dataframe groupby works with numeric column names + * Use fsync when appending to partd + * Fix pickling issue in dataframe to_bag + * Add documentation for dask.dataframe.to_hdf + * Fixed a copy-paste typo in DataFrame.map_partitions docstring + * Fix 'visualize' import location in diagnostics documentation + (#1376) + * update cheat sheet (#1371) +- update to version 0.10.1: + * `inline` no longer removes keys (#1356) + * avoid c: in infer_storage_options (#1369) + * Protect reductions against empty partitions (#1361) + * Add doc examples for dask.array.histogram. (#1363) + * Fix typo in pip install requirements path (#1364) + * avoid unnecessary dependencies between save tasks in + dataframe.to_hdf (#1293) + * remove xfail mark for blosc missing const + * Add `anon=True` for read from s3 test + * `subs` doesn't needlessly compare keys and values + * Use pytest.importorskip instead of try/except/return pattern + * Fixes for bokeh 0.12.0 + * Multiprocess scheduler handles unpickling errors + * arra.random with array-like parameters (#1327) + * Fixes issue #1337 (#1338) + * Remove dask runtime dependence on mock 2.7 backport. + * Load known but external protocols automatically (#1325) + * Add center argument to Series/DataFrame.rolling (#1280) + * Add Bag.random_sample method. (#1332) + * Correct docs install command and add missing required packages + (#1333) + * Mark the 4 slowest tests as slow to get a faster suite by + default. (#1334) + * Travis: Install mock package in Python 2.7. + * Automatic blocksize for read_csv based on available memory and + number of cores. + * Replace "Matthew Rocklin" with "Dask Development Team" (#1329) + * Support column assignment in DataFrame (#1322) + * Few travis fixes, pandas version >= 0.18.0 (#1314) + * Don't run hdf test if pytables package is not present. (#1323) + * Add delayed.compute to api docs. + * Support datetimes in DataFrame._build_pd (#1319) + * Test setting the index with datetime with timezones, which is a + pandas-defined dtype + * (#1315) + * Add s3fs to requirements (#1316) + * Pass dtype information through in Series.astype (#1320) + * Add draft of development guidelines (#1305) + * Skip tests needing optional package when it's not present. (#1318) + * DOC: Document DataFrame.categorize + * make dd.to_csv support writing to multiple csv files (#1303) + * quantiles for repartitioning (#1261) + * DOC: Minimal doc for get_sync (#1312) + * Pass through storage_options in db.read_text (#1304) + * Fixes #1237: correctly propagate storage_options through read_* + APIs and use urlsplit to automatically get remote connection + settings (#1269) + * TST: Travis build matrix to specify numpy/pandas ver (#1300) + * amend doc string to Bag.to_textfiles + * Return dask.Delayed when saving files with compute = false (#1286) + * Support empty or small dataframes in from_pandas (#1290) + * Add validation and tests for order breaking name_function (#1275) + * ENH: dataframe now supports partial string selection (#1278) + * Fix typo in spark-dask docs + * added note and verbose exception about CSV parsing errors (#1287) +- update to version 0.10.0: + * Add parametrization to merge tests + * Add more challenging types to nonempty_sample_df test + * Windows fixes + * TST: Fix coveralls badge (#1276) + * Sort index on shuffle (#1274) + * Update specification docs to reflect new spec. + * Add groupby docs (#1273) + * Update spark docs + * Rolling class receives normal arguments (unchecked other than + pandas call), stores at + * Reduce communication in rolling operations #1242 (#1270) + * Fix Shuffle (#1255) + * Work on earlier versions of Pandas + * Handle additional Pandas types + * Use non-empty fake dataframe in merge operations + * Add failing test for merge case + * Add utility function to create sample dataframe + * update release procedure + * amend doc string to Bag.to_textfiles (#1258) + * Drop Python 2.6 support (#1264) + * Clean DataFrame naming conventions (#1263) + * Fix some bugs in the rolling implementation. + * Fix core.get to use new spec + * Make graph definition recursive + * Handle empty partitions in dask.bag.to_textfiles + * test index.min/max + * Add regression test for non-ndarray slicing + * Standardize dataframe keynames + * bump csv sample size to 256k (#1253) + * Switch tests to utils.tmpdir (#1251) + * Fix dot_graph filename split bug + * Correct documentation to reflect argument existing now. + * Allow non-zero axis for .rolling (for application over columns) + * Fix scheduler behavior for top-level lists + * Various spelling mistakes in docstrings, comments, exception + messages, and a filename + * Fix typo. (#1247) + * Fix tokenize in dask.delayed + * Remove unused imports, pep8 fixes + * Fix bug in slicing optimization + * Add Task Shuffle (#1186) + * Add bytes API (#1224) + * Add dask_key_name to docs, fix bug in methods + * Allow formatting in dask.dataframe.to_hdf path and key parameters + * Match pandas' exceptions a bit closer in the rolling API. Also, + correct computation f + * Add tests to package (#1231) + * Document visualize method (#1234) + * Skip new rolling API's tests if the pandas we have is too old. + * Improve df_or_series.rolling(...) implementation. + * Remove `iloc` property on `dask.dataframe` + * Support for the new pandas rolling API. + * test delayed names are different under kwargs + * Add Hussain Sultan to AUTHORS + * Add `optimize_graph` keyword to multiprocessing get + * Add `optimize_graph` keyword to `compute` + * Add dd.info() (#1213) + * Cleanup base tests + * Add groupby documentation stub + * pngmath is deprecated in sphinx 1.4 + * A few docfixes + * Extract dtype in dd.from_bcolz + * Throw NotImplementedError if old toolz.accumulate + * Add isnull and notnull for dataframe + * Add dask.bag.accumulate + * Fix categorical partitioning + * create single lock for glob read_hdf + * Fix failing from_url doctest + * Add missing api to bag docs + * Add Skipper Seabold to AUTHORS. + * Don't use mutable default argument + * Fix typo + * Ensure to_task_dasks always returns a task + * Fix dir for dataframe objects + * Infer metadata in dd.from_delayed + * Fix some closure issues in dask.dataframe + * Add storage_options keyword to read_csv + * Define finalize function for dask.dataframe.Scalar + * py26 compatibility + * add stacked logos to docs + * test from-array names + * rename from_array tasks + * add atop to array docs + * Add motivation and example to delayed docs + * splat out delayed values in compute docs + * Fix optimize docs + * add html page with logos + * add dask logo to documentation images + * Few pep8 cleanups to dask.dataframe.groupby + * Groupby aggregate works with list of columns + * Use different names for input and output in from_array + * Don't enforce same column names + * don't write header for first block in csv + * Add var and std to DataFrame groupby (#1159) + * Move conda recipe to conda-forge (#1162) + * Use function names in map_blocks and elemwise (#1163) + * add hyphen to delayed name (#1161) + * Avoid shuffles when merging with Pandas objects (#1154) + * Add DataFrame.eval + * Ensure future imports + * Add db.Bag.unzip + * Guard against shape attributes that are not sequences + * Add dask.array.multinomial +- update to version 0.9.0: + * No upstream changelog +- update to version 0.8.2: + * No upstream changelog +- update to version 0.8.1: + * No upstream changelog +- update to version 0.8.0: + * No upstream changelog +- update to version 0.7.5: + * No upstream changelog +- update to version 0.7.5: + * No upstream changelog +- update to version 0.7.0: + * No upstream changelog +- update to version 0.6.1: + * No upstream changelog + +------------------------------------------------------------------- +Tue Jul 14 13:33:53 UTC 2015 - toddrme2178@gmail.com + +- Update to 0.6.0 + * No upstream changelog + +------------------------------------------------------------------- +Tue May 19 11:03:41 UTC 2015 - toddrme2178@gmail.com + +- Update to 0.5.0 + * No upstream changelog + +------------------------------------------------------------------- +Thu Apr 9 16:57:59 UTC 2015 - toddrme2178@gmail.com + +- Initial version + diff --git a/python-dask.spec b/python-dask.spec new file mode 100644 index 0000000..9853e75 --- /dev/null +++ b/python-dask.spec @@ -0,0 +1,402 @@ +# +# spec file +# +# Copyright (c) 2023 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +%define psuffix %{nil} +%global flavor @BUILD_FLAVOR@%{nil} +%if "%{flavor}" == "test-py39" +%define psuffix -test-py39 +%define skip_python310 1 +%define skip_python311 1 +%bcond_without test +%endif +%if "%{flavor}" == "test-py310" +%define psuffix -test-py310 +%define skip_python39 1 +%define skip_python311 1 +%bcond_without test +%endif +%if "%{flavor}" == "test-py311" +%define psuffix -test-py311 +%define skip_python39 1 +%define skip_python310 1 +%bcond_without test +%endif +%if "%{flavor}" == "" +%bcond_with test +%endif + +%{?sle15_python_module_pythons} +Name: python-dask%{psuffix} +# ===> Note: python-dask MUST be updated in sync with python-distributed! <=== +Version: 2023.12.0 +Release: 0 +Summary: Minimal task scheduling abstraction +License: BSD-3-Clause +URL: https://dask.org +# SourceRepository: https://github.com/dask/dask +Source0: https://files.pythonhosted.org/packages/source/d/dask/dask-%{version}.tar.gz +BuildRequires: %{python_module base >= 3.9} +BuildRequires: %{python_module packaging >= 20.0} +BuildRequires: %{python_module pip} +BuildRequires: %{python_module setuptools} +BuildRequires: %{python_module versioneer-toml >= 0.29} +BuildRequires: %{python_module wheel} +BuildRequires: fdupes +BuildRequires: python-rpm-macros +Requires: python-PyYAML >= 5.3.1 +Requires: python-click >= 8.1 +Requires: python-cloudpickle >= 1.5 +Requires: python-fsspec >= 2021.9 +Requires: python-importlib-metadata >= 4.13.0 +Requires: python-packaging >= 20.0 +Requires: python-partd >= 1.2.0 +Requires: python-toolz >= 0.10.0 +Requires(post): update-alternatives +Requires(postun):update-alternatives +Recommends: %{name}-array = %{version} +Recommends: %{name}-dataframe = %{version} +Recommends: %{name}-distributed = %{version} +Suggests: %{name}-complete = %{version} +Suggests: %{name}-diagnostics = %{version} +# SECTION https://docs.dask.org/en/stable/install.html#optional-dependencies +Suggests: python-SQLAlchemy >= 1.4.16 +Suggests: python-cityhash >= 0.2.4 +Suggests: python-fastparquet >= 0.8.2 +Suggests: python-gcsfs >= 2021.9.0 +Suggests: python-crick >= 0.0.3 +Suggests: python-cytoolz >= 0.10.1 +Suggests: python-dask-ml >= 1.4.0 +Suggests: python-fastavro >= 0.22.6 +Suggests: python-graphviz >= 0.8.4 +Suggests: python-h5py >= 2.10.0 +Suggests: python-psutil >= 0.5.7 +Suggests: python-pyarrow >= 14.0.1 +Suggests: python-matplotlib +Suggests: python-mimesis >= 5.3.0 +Suggests: python-mmh3 >= 2.5.1 +Suggests: python-sparse >= 0.12.0 +Suggests: python-s3fs >= 0.4.0 +Suggests: python-xxhash >= 1.4.1 +Suggests: python-zarr >= 2.12.0 +# /SECTION +Provides: %{name}-bag = %{version}-%{release} +Obsoletes: %{name}-bag < %{version}-%{release} +Provides: %{name}-delayed = %{version}-%{release} +Obsoletes: %{name}-delayed < %{version}-%{release} +Provides: %{name}-dot = %{version}-%{release} +Obsoletes: %{name}-dot < %{version}-%{release} +Provides: %{name}-multiprocessing = %{version}-%{release} +Obsoletes: %{name}-multiprocessing < %{version}-%{release} +BuildArch: noarch +%if %{with test} +# test that we specified all requirements correctly in the core +# and subpackages by only requiring dask-test (= [complete] + pytest) and optional extras +BuildRequires: %{python_module dask-test = %{version}} +# SECTION additional optionally tested (importorskip) packages +BuildRequires: %{python_module SQLAlchemy >= 1.4.16} +BuildRequires: %{python_module cachey} +BuildRequires: %{python_module fastparquet >= 0.8.0} +# optional zarr increases fsspec miminum to 0.8.4 if present +BuildRequires: %{python_module fsspec >= 0.8.4} +BuildRequires: %{python_module h5py} +BuildRequires: %{python_module ipython} +BuildRequires: %{python_module jsonschema} +BuildRequires: %{python_module matplotlib} +BuildRequires: %{python_module mimesis} +BuildRequires: %{python_module multipledispatch} +BuildRequires: %{python_module numba} +# https://github.com/dask/partd/issues/66, https://github.com/dask/dask/pull/10176 +BuildRequires: %{python_module partd >= 1.4.0} +# snappy required for using fastparquet +BuildRequires: %{python_module python-snappy} +BuildRequires: %{python_module requests} +BuildRequires: %{python_module scikit-image} +BuildRequires: %{python_module scipy} +BuildRequires: %{python_module sparse} +BuildRequires: %{python_module tables} +BuildRequires: %{python_module xarray} +BuildRequires: %{python_module zarr} +# /SECTION +%endif +%python_subpackages + +%description +A flexible library for parallel computing in Python. + +Dask is composed of two parts: +- Dynamic task scheduling optimized for computation. This is similar to + Airflow, Luigi, Celery, or Make, but optimized for interactive + computational workloads. +- “Big Data” collections like parallel arrays, dataframes, and lists that + extend common interfaces like NumPy, Pandas, or Python iterators to + larger-than-memory or distributed environments. These parallel collections + run on top of dynamic task schedulers. + +%package complete +# This must have a Requires for dask and all the dask subpackages +Summary: All dask components +Requires: %{name} = %{version} +Requires: %{name}-array = %{version} +Requires: %{name}-dataframe = %{version} +Requires: %{name}-diagnostics = %{version} +Requires: %{name}-distributed = %{version} +Requires: python-lz4 >= 4.3.2 +Requires: python-pyarrow >= 7 +Provides: %{name}-all = %{version}-%{release} +Obsoletes: %{name}-all < %{version}-%{release} + +%description complete +A flexible library for parallel computing in Python. + +Dask is composed of two parts: +- Dynamic task scheduling optimized for computation. This is similar to + Airflow, Luigi, Celery, or Make, but optimized for interactive + computational workloads. +- “Big Data” collections like parallel arrays, dataframes, and lists that + extend common interfaces like NumPy, Pandas, or Python iterators to + larger-than-memory or distributed environments. These parallel collections + run on top of dynamic task schedulers. + +This package pulls in all the optional dask components. + +%package array +Summary: Numpy-like array data structure for dask +Requires: %{name} = %{version} +Requires: %{name}-delayed = %{version} +Requires: python-numpy >= 1.21 +Recommends: python-scipy + +%description array +A flexible library for parallel computing in Python. + +Dask is composed of two parts: +- Dynamic task scheduling optimized for computation. This is similar to + Airflow, Luigi, Celery, or Make, but optimized for interactive + computational workloads. +- “Big Data” collections like parallel arrays, dataframes, and lists that + extend common interfaces like NumPy, Pandas, or Python iterators to + larger-than-memory or distributed environments. These parallel collections + run on top of dynamic task schedulers. + +This package contains the dask array class. + +Dask arrays implement a subset of the NumPy interface on large +arrays using blocked algorithms and task scheduling. + +%package dataframe +Summary: Pandas-like DataFrame data structure for dask +Requires: %{name} = %{version} +Requires: %{name}-array = %{version} +Requires: %{name}-bag = %{version} +Requires: python-pandas >= 1.3 + +%description dataframe +A flexible library for parallel computing in Python. + +Dask is composed of two parts: +- Dynamic task scheduling optimized for computation. This is similar to + Airflow, Luigi, Celery, or Make, but optimized for interactive + computational workloads. +- “Big Data” collections like parallel arrays, dataframes, and lists that + extend common interfaces like NumPy, Pandas, or Python iterators to + larger-than-memory or distributed environments. These parallel collections + run on top of dynamic task schedulers. + +This package contains the dask DataFrame class. + +A Dask DataFrame is a large parallel dataframe composed of many +smaller Pandas dataframes, split along the index. These pandas +dataframes may live on disk for larger-than-memory computing +on a single machine, or on many different machines in a cluster. + +%package distributed +Summary: Interface with the distributed task scheduler in dask +Requires: %{name} = %{version} +# dask and distributed are always updated together +Requires: python-distributed = %{version} + +%description distributed +A flexible library for parallel computing in Python. + +Dask is composed of two parts: +- Dynamic task scheduling optimized for computation. This is similar to + Airflow, Luigi, Celery, or Make, but optimized for interactive + computational workloads. +- “Big Data” collections like parallel arrays, dataframes, and lists that + extend common interfaces like NumPy, Pandas, or Python iterators to + larger-than-memory or distributed environments. These parallel collections + run on top of dynamic task schedulers. + +This meta package pulls in the distributed module into the dask namespace. + +%package diagnostics +Summary: Diagnostics for dask +Requires: %{name} = %{version} +Requires: python-Jinja2 >= 2.10.3 +Requires: python-bokeh >= 3.1 + +%description diagnostics +A flexible library for parallel computing in Python. + +Dask is composed of two parts: +- Dynamic task scheduling optimized for computation. This is similar to + Airflow, Luigi, Celery, or Make, but optimized for interactive + computational workloads. +- “Big Data” collections like parallel arrays, dataframes, and lists that + extend common interfaces like NumPy, Pandas, or Python iterators to + larger-than-memory or distributed environments. These parallel collections + run on top of dynamic task schedulers. + +This package contains the dask.diagnostics module + +%package test +Summary: The test submodules of the python-dask package +Requires: %{name}-complete = %{version} +Requires: python-pandas +# SECTION pandas[test] +Requires: python-hypothesis +Requires: python-pytest-asyncio +# /SECTION +Requires: python-pre-commit +Requires: python-pytest +Requires: python-pytest-rerunfailures +Requires: python-pytest-timeout +Requires: python-pytest-xdist + +%description test +Dask is a flexible library for parallel computing in Python. +This subpackage provides the .test submodules in the sitelib required for +unit testing dask. + +%prep +%autosetup -p1 -n dask-%{version} +sed -i '/addopts/d' pyproject.toml + +%build +%pyproject_wheel + +%install +%if !%{with test} +%pyproject_install +%python_clone -a %{buildroot}%{_bindir}/dask +%{python_expand # give SUSE specific install instructions +sed -E -i '/Please either conda or pip install/,/python -m pip install/ { + s/either conda or pip//; + /conda install/ d; + s/python -m pip install "dask\[(.*)\]".*pip install/zypper in $python-dask-\1/ + }' \ + %{buildroot}%{$python_sitelib}/dask/distributed.py +sed -E -i '/Please either conda or pip install/,/python -m pip install/ c \ + "Please file a bug report https://bugzilla.opensuse.org and\\n"\ + "report the missing requirements."' \ + %{buildroot}%{$python_sitelib}/dask/array/__init__.py \ + %{buildroot}%{$python_sitelib}/dask/bag/__init__.py \ + %{buildroot}%{$python_sitelib}/dask/dataframe/__init__.py +} +%{python_compileall} +%python_expand %fdupes %{buildroot}%{$python_sitelib} +%endif + +%if %{with test} +%check +# move away from importpath +mv dask dask.moved +# different seed or mimesis version +donttest="(test_datasets and test_deterministic)" +# upstreams test if their ci is up to date, irrelevant for obs +donttest+=" or test_development_guidelines_matches_ci" +if [[ $(getconf LONG_BIT) -eq 32 ]]; then + # https://github.com/dask/dask/issues/8620 + donttest+=" or test_query_with_meta" + donttest+=" or test_repartition_npartitions" + # + donttest+=" or test_pandas_multiindex" + donttest+=" or test_categorize_info" +fi +# (rarely) flaky on obs +donttest+=" or test_local_scheduler" +donttest+=" or (test_threaded and test_interrupt)" +# perhaps? rh#1968947#c4 +donttest+=" or test_select_from_select" +# tries to get an IP address +donttest+=" or test_map_partitions_df_input" +# needs s3fs support in arrow +donttest+=" or test_pyarrow_filesystem_option_real_data" +%pytest --pyargs dask -n auto -r fE -m "not network" -k "not ($donttest)" --reruns 3 --reruns-delay 3 +%endif + +%post +%python_install_alternative dask + +%postun +%python_uninstall_alternative dask + +%if !%{with test} +%files %{python_files} +%doc README.rst +%license LICENSE.txt +%python_alternative %{_bindir}/dask +%{python_sitelib}/dask/ +%{python_sitelib}/dask-%{version}.dist-info +%exclude %{python_sitelib}/dask/array/ +%exclude %{python_sitelib}/dask/dataframe/ +%exclude %{python_sitelib}/dask/diagnostics +%exclude %{python_sitelib}/dask/tests +%exclude %{python_sitelib}/dask/bag/tests +%exclude %{python_sitelib}/dask/bytes/tests +%exclude %{python_sitelib}/dask/widgets/tests +%pycache_only %exclude %{python_sitelib}/dask/__pycache__/delayed*.pyc +%pycache_only %exclude %{python_sitelib}/dask/__pycache__/dot.* + +%files %{python_files complete} +%license LICENSE.txt + +%files %{python_files array} +%license LICENSE.txt +%{python_sitelib}/dask/array/ +%exclude %{python_sitelib}/dask/array/tests + +%files %{python_files dataframe} +%license LICENSE.txt +%{python_sitelib}/dask/dataframe/ +%exclude %{python_sitelib}/dask/dataframe/tests +%exclude %{python_sitelib}/dask/dataframe/io/tests +%exclude %{python_sitelib}/dask/dataframe/tseries/tests + +%files %{python_files distributed} +%license LICENSE.txt + +%files %{python_files diagnostics} +%license LICENSE.txt +%{python_sitelib}/dask/diagnostics/ +%exclude %{python_sitelib}/dask/diagnostics/tests + +%files %{python_files test} +%license LICENSE.txt +%{python_sitelib}/dask/tests +%{python_sitelib}/dask/bytes/tests +%{python_sitelib}/dask/widgets/tests +%{python_sitelib}/dask/array/tests +%{python_sitelib}/dask/bag/tests +%{python_sitelib}/dask/dataframe/tests +%{python_sitelib}/dask/dataframe/io/tests +%{python_sitelib}/dask/dataframe/tseries/tests +%{python_sitelib}/dask/diagnostics/tests +%endif + +%changelog