Accepting request 1159356 from devel:languages:python:numeric

OBS-URL: https://build.opensuse.org/request/show/1159356 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/python-xarray?expand=0&rev=47
2024-03-19 16:32:42 +00:00 · 2024-03-19 16:32:42 +00:00 · 47d22cc664
commit 47d22cc664
parent 201d95acff 2906ec4e42
3 changed files with 203 additions and 0 deletions
--- a/python-xarray.changes
+++ b/python-xarray.changes
@ -1,3 +1,9 @@
+-------------------------------------------------------------------
+Mon Mar 18 19:47:16 UTC 2024 - Ben Greiner <code@bnavigator.de>
+
+- Add xarray-pr8797-tokenize.patch
+  * gh#pydata/xarray#8797 fixes gh#pydata/xarray#8788
+
 -------------------------------------------------------------------
 Fri Mar  1 21:04:08 UTC 2024 - Matej Cepl <mcepl@cepl.eu>

--- a/python-xarray.spec
+++ b/python-xarray.spec
@ -37,6 +37,8 @@ Source:         https://files.pythonhosted.org/packages/source/x/xarray/xarray-%
 # PATCH-FEATURE-UPSTREAM local_dataset.patch gh#pydata/xarray#5377 mcepl@suse.com
 # fix xr.tutorial.open_dataset to work with the preloaded cache.
 Patch0:         local_dataset.patch
+# PATCH-FIX-UPSTREAM xarray-pr8797-tokenize.patch gh#pydata/xarray#8797 fixes gh#pydata/xarray#8788
+Patch1:         https://github.com/pydata/xarray/pull/8797.patch#/xarray-pr8797-tokenize.patch
 BuildRequires:  %{python_module base >= 3.9}
 BuildRequires:  %{python_module pip}
 BuildRequires:  %{python_module setuptools_scm}
--- a/xarray-pr8797-tokenize.patch
+++ b/xarray-pr8797-tokenize.patch
@ -0,0 +1,195 @@
+From 4eb05f0f73c535455f457e650036c86cdfaf4aa2 Mon Sep 17 00:00:00 2001
+From: crusaderky <crusaderky@gmail.com>
+Date: Thu, 29 Feb 2024 12:21:18 +0000
+Subject: [PATCH] tokenize() should ignore difference between None and {} attrs
+
+---
+ xarray/core/dataarray.py    |  2 +-
+ xarray/core/dataset.py      |  8 ++++----
+ xarray/core/variable.py     |  6 ++++--
+ xarray/namedarray/core.py   |  7 +++----
+ xarray/namedarray/utils.py  |  4 ++--
+ xarray/tests/test_dask.py   | 35 ++++++++++++++++++++++++-----------
+ xarray/tests/test_sparse.py |  4 ----
+ 7 files changed, 38 insertions(+), 28 deletions(-)
+
+diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
+index c00fe1a9e6..aeb6b2217c 100644
+--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
+@@ -1070,7 +1070,7 @@ def reset_coords(
+         dataset[self.name] = self.variable
+         return dataset
+ 
+-    def __dask_tokenize__(self):
+    def __dask_tokenize__(self) -> object:
+         from dask.base import normalize_token
+ 
+         return normalize_token((type(self), self._variable, self._coords, self._name))
+diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
+index 884e302b8b..e1fd9e025f 100644
+--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
+@@ -694,7 +694,7 @@ def __init__(
+             data_vars, coords
+         )
+ 
+-        self._attrs = dict(attrs) if attrs is not None else None
+        self._attrs = dict(attrs) if attrs else None
+         self._close = None
+         self._encoding = None
+         self._variables = variables
+@@ -739,7 +739,7 @@ def attrs(self) -> dict[Any, Any]:
+ 
+     @attrs.setter
+     def attrs(self, value: Mapping[Any, Any]) -> None:
+-        self._attrs = dict(value)
+        self._attrs = dict(value) if value else None
+ 
+     @property
+     def encoding(self) -> dict[Any, Any]:
+@@ -856,11 +856,11 @@ def load(self, **kwargs) -> Self:
+ 
+         return self
+ 
+-    def __dask_tokenize__(self):
+    def __dask_tokenize__(self) -> object:
+         from dask.base import normalize_token
+ 
+         return normalize_token(
+-            (type(self), self._variables, self._coord_names, self._attrs)
+            (type(self), self._variables, self._coord_names, self._attrs or None)
+         )
+ 
+     def __dask_graph__(self):
+diff --git a/xarray/core/variable.py b/xarray/core/variable.py
+index cd0c022d70..315c46369b 100644
+--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
+@@ -2592,11 +2592,13 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False):
+         if not isinstance(self._data, PandasIndexingAdapter):
+             self._data = PandasIndexingAdapter(self._data)
+ 
+-    def __dask_tokenize__(self):
+    def __dask_tokenize__(self) -> object:
+         from dask.base import normalize_token
+ 
+         # Don't waste time converting pd.Index to np.ndarray
+-        return normalize_token((type(self), self._dims, self._data.array, self._attrs))
+        return normalize_token(
+            (type(self), self._dims, self._data.array, self._attrs or None)
+        )
+ 
+     def load(self):
+         # data is already loaded into memory for IndexVariable
+diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py
+index 2972269043..fd209bc273 100644
+--- a/xarray/namedarray/core.py
+++ b/xarray/namedarray/core.py
+@@ -511,7 +511,7 @@ def attrs(self) -> dict[Any, Any]:
+ 
+     @attrs.setter
+     def attrs(self, value: Mapping[Any, Any]) -> None:
+-        self._attrs = dict(value)
+        self._attrs = dict(value) if value else None
+ 
+     def _check_shape(self, new_data: duckarray[Any, _DType_co]) -> None:
+         if new_data.shape != self.shape:
+@@ -570,13 +570,12 @@ def real(
+             return real(self)
+         return self._new(data=self._data.real)
+ 
+-    def __dask_tokenize__(self) -> Hashable:
+    def __dask_tokenize__(self) -> object:
+         # Use v.data, instead of v._data, in order to cope with the wrappers
+         # around NetCDF and the like
+         from dask.base import normalize_token
+ 
+-        s, d, a, attrs = type(self), self._dims, self.data, self.attrs
+-        return normalize_token((s, d, a, attrs))  # type: ignore[no-any-return]
+        return normalize_token((type(self), self._dims, self.data, self._attrs or None))
+ 
+     def __dask_graph__(self) -> Graph | None:
+         if is_duck_dask_array(self._data):
+diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py
+index 0326a6173c..b82a80b546 100644
+--- a/xarray/namedarray/utils.py
+++ b/xarray/namedarray/utils.py
+@@ -218,7 +218,7 @@ def __eq__(self, other: ReprObject | Any) -> bool:
+     def __hash__(self) -> int:
+         return hash((type(self), self._value))
+ 
+-    def __dask_tokenize__(self) -> Hashable:
+    def __dask_tokenize__(self) -> object:
+         from dask.base import normalize_token
+ 
+-        return normalize_token((type(self), self._value))  # type: ignore[no-any-return]
+        return normalize_token((type(self), self._value))
+diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
+index 07bf773cc8..517fc0c2d6 100644
+--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
+@@ -299,17 +299,6 @@ def test_persist(self):
+         self.assertLazyAndAllClose(u + 1, v)
+         self.assertLazyAndAllClose(u + 1, v2)
+ 
+-    def test_tokenize_empty_attrs(self) -> None:
+-        # Issue #6970
+-        assert self.eager_var._attrs is None
+-        expected = dask.base.tokenize(self.eager_var)
+-        assert self.eager_var.attrs == self.eager_var._attrs == {}
+-        assert (
+-            expected
+-            == dask.base.tokenize(self.eager_var)
+-            == dask.base.tokenize(self.lazy_var.compute())
+-        )
+-
+     @requires_pint
+     def test_tokenize_duck_dask_array(self):
+         import pint
+@@ -1573,6 +1562,30 @@ def test_token_identical(obj, transform):
+     )
+ 
+ 
+@pytest.mark.parametrize(
+    "obj",
+    [
+        make_ds(),  # Dataset
+        make_ds().variables["c2"],  # Variable
+        make_ds().variables["x"],  # IndexVariable
+    ],
+)
+def test_tokenize_empty_attrs(obj):
+    """Issues #6970 and #8788"""
+    obj.attrs = {}
+    assert obj._attrs is None
+    a = dask.base.tokenize(obj)
+
+    assert obj.attrs == {}
+    assert obj._attrs == {}  # attrs getter changed None to dict
+    b = dask.base.tokenize(obj)
+    assert a == b
+
+    obj2 = obj.copy()
+    c = dask.base.tokenize(obj2)
+    assert a == c
+
+
+ def test_recursive_token():
+     """Test that tokenization is invoked recursively, and doesn't just rely on the
+     output of str()
+diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py
+index 289149bdd6..09c1281875 100644
+--- a/xarray/tests/test_sparse.py
+++ b/xarray/tests/test_sparse.py
+@@ -878,10 +878,6 @@ def test_dask_token():
+     import dask
+ 
+     s = sparse.COO.from_numpy(np.array([0, 0, 1, 2]))
+-
+-    # https://github.com/pydata/sparse/issues/300
+-    s.__dask_tokenize__ = lambda: dask.base.normalize_token(s.__dict__)
+-
+     a = DataArray(s)
+     t1 = dask.base.tokenize(a)
+     t2 = dask.base.tokenize(a)