- Update to 2.3.1

* This release includes some improvements and fixes to the future
    string data type (preview feature for the upcoming pandas 3.0)
  ## Improvements and fixes for the StringDtype
  * Comparisons between different string dtypes
  * Index set operations ignore empty RangeIndex and object dtype
    Index
  ## Bug fixes
  * Bug in DataFrameGroupBy.min(), DataFrameGroupBy.max(),
    Resampler.min(), Resampler.max() where all NA values of string
    dtype would return float instead of string dtype (GH 60810)
  * Bug in DataFrame.join() incorrectly downcasting object-dtype
    indexes (GH 61771)
  * Bug in DataFrame.sum() with axis=1, DataFrameGroupBy.sum() or
    SeriesGroupBy.sum() with skipna=True, and Resampler.sum() with
    all NA values of StringDtype resulted in 0 instead of the empty
    string "" (GH 60229)
  * Fixed bug in DataFrame.explode() and Series.explode() where
    methods would fail with dtype="str" (GH 61623)
  * Fixed bug in unpickling objects pickled in pandas versions
    pre-2.3.0 that used StringDtype (GH 61763)
- Release 2.3.0
  ## Enhancements
  * The semantics for the copy keyword in __array__ methods (i.e.
    called when using np.array() or np.asarray() on pandas objects)
    has been updated to work correctly with NumPy >= 2 (GH 57739)
  * Series.str.decode() result now has StringDtype when
    future.infer_string is True (GH 60709)
  * to_hdf() and to_hdf() now round-trip with StringDtype (GH
    60663)

OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:numeric/python-pandas?expand=0&rev=146
This commit is contained in:
2025-07-11 21:51:16 +00:00
committed by Git OBS Bridge
commit 2d224fe122
22 changed files with 6538 additions and 0 deletions

23
.gitattributes vendored Normal file
View File

@@ -0,0 +1,23 @@
## Default LFS
*.7z filter=lfs diff=lfs merge=lfs -text
*.bsp filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.gem filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.jar filter=lfs diff=lfs merge=lfs -text
*.lz filter=lfs diff=lfs merge=lfs -text
*.lzma filter=lfs diff=lfs merge=lfs -text
*.obscpio filter=lfs diff=lfs merge=lfs -text
*.oxt filter=lfs diff=lfs merge=lfs -text
*.pdf filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
*.rpm filter=lfs diff=lfs merge=lfs -text
*.tbz filter=lfs diff=lfs merge=lfs -text
*.tbz2 filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.ttf filter=lfs diff=lfs merge=lfs -text
*.txz filter=lfs diff=lfs merge=lfs -text
*.whl filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
.osc

8
_constraints Normal file
View File

@@ -0,0 +1,8 @@
<constraints>
<hardware>
<physicalmemory>
<size unit="G">12</size>
</physicalmemory>
<processors>4</processors>
</hardware>
</constraints>

6
_multibuild Normal file
View File

@@ -0,0 +1,6 @@
<multibuild>
<package>test-py310</package>
<package>test-py311</package>
<package>test-py312</package>
<package>test-py313</package>
</multibuild>

16
_service Normal file
View File

@@ -0,0 +1,16 @@
<services>
<service name="tar_scm" mode="disabled">
<param name="url">https://github.com/pandas-dev/pandas.git</param>
<param name="scm">git</param>
<param name="revision">v2.3.1</param>
<param name="versionformat">@PARENT_TAG@</param>
<param name="versionrewrite-pattern">v(.*)</param>
<param name="filename">pandas</param>
<param name="package-meta">yes</param>
</service>
<service name="recompress" mode="disabled">
<param name="file">*.tar</param>
<param name="compression">gz</param>
</service>
<service name="set_version" mode="disabled" />
</services>

316
dropna.patch Normal file
View File

@@ -0,0 +1,316 @@
From 1e899afbd9ca20f4ce9d6f93e1f62c072be0ed23 Mon Sep 17 00:00:00 2001
From: Gen Sato <52241300+halogen22@users.noreply.github.com>
Date: Tue, 18 Mar 2025 01:33:40 +0900
Subject: [PATCH] BUG: .mode(dropna=False) doesn't work with nullable integers
(#61132)
* Fix dropna bug when mode
* Fix test cases
* Fix data type incompatible
---
doc/source/whatsnew/v3.0.0.rst | 1 +
pandas/_libs/hashtable_func_helper.pxi.in | 2 +-
pandas/core/algorithms.py | 12 +++---
pandas/core/arrays/base.py | 5 ++-
pandas/core/arrays/categorical.py | 2 +-
pandas/core/arrays/datetimelike.py | 2 +-
pandas/core/arrays/masked.py | 8 +---
pandas/core/series.py | 2 +-
pandas/tests/series/test_reductions.py | 23 +++++++++++
pandas/tests/test_algos.py | 47 +++++++++++++++--------
10 files changed, 71 insertions(+), 33 deletions(-)
Index: pandas-2.2.3/pandas/_libs/hashtable_func_helper.pxi.in
===================================================================
--- pandas-2.2.3.orig/pandas/_libs/hashtable_func_helper.pxi.in
+++ pandas-2.2.3/pandas/_libs/hashtable_func_helper.pxi.in
@@ -443,7 +443,7 @@ def mode(ndarray[htfunc_t] values, bint
if na_counter > 0:
res_mask = np.zeros(j+1, dtype=np.bool_)
- res_mask[j] = True
+ res_mask[j] = (na_counter == max_count)
return modes[:j + 1], res_mask
Index: pandas-2.2.3/pandas/core/algorithms.py
===================================================================
--- pandas-2.2.3.orig/pandas/core/algorithms.py
+++ pandas-2.2.3/pandas/core/algorithms.py
@@ -1022,7 +1022,7 @@ def duplicated(
def mode(
values: ArrayLike, dropna: bool = True, mask: npt.NDArray[np.bool_] | None = None
-) -> ArrayLike:
+) -> tuple[np.ndarray, npt.NDArray[np.bool_]] | ExtensionArray:
"""
Returns the mode(s) of an array.
@@ -1035,7 +1035,7 @@ def mode(
Returns
-------
- np.ndarray or ExtensionArray
+ Union[Tuple[np.ndarray, npt.NDArray[np.bool_]], ExtensionArray]
"""
values = _ensure_arraylike(values, func_name="mode")
original = values
@@ -1049,8 +1049,10 @@ def mode(
values = _ensure_data(values)
npresult, res_mask = htable.mode(values, dropna=dropna, mask=mask)
- if res_mask is not None:
- return npresult, res_mask # type: ignore[return-value]
+ if res_mask is None:
+ res_mask = np.zeros(npresult.shape, dtype=np.bool_)
+ else:
+ return npresult, res_mask
try:
npresult = np.sort(npresult)
@@ -1061,7 +1063,7 @@ def mode(
)
result = _reconstruct_data(npresult, original.dtype, original)
- return result
+ return result, res_mask
def rank(
Index: pandas-2.2.3/pandas/core/arrays/base.py
===================================================================
--- pandas-2.2.3.orig/pandas/core/arrays/base.py
+++ pandas-2.2.3/pandas/core/arrays/base.py
@@ -2270,8 +2270,9 @@ class ExtensionArray:
Sorted, if possible.
"""
# error: Incompatible return value type (got "Union[ExtensionArray,
- # ndarray[Any, Any]]", expected "Self")
- return mode(self, dropna=dropna) # type: ignore[return-value]
+ # Tuple[np.ndarray, npt.NDArray[np.bool_]]", expected "Self")
+ result, _ = mode(self, dropna=dropna)
+ return result # type: ignore[return-value]
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
if any(
Index: pandas-2.2.3/pandas/core/arrays/categorical.py
===================================================================
--- pandas-2.2.3.orig/pandas/core/arrays/categorical.py
+++ pandas-2.2.3/pandas/core/arrays/categorical.py
@@ -2435,7 +2435,7 @@ class Categorical(NDArrayBackedExtension
if dropna:
mask = self.isna()
- res_codes = algorithms.mode(codes, mask=mask)
+ res_codes, _ = algorithms.mode(codes, mask=mask)
res_codes = cast(np.ndarray, res_codes)
assert res_codes.dtype == codes.dtype
res = self._from_backing_data(res_codes)
Index: pandas-2.2.3/pandas/core/arrays/datetimelike.py
===================================================================
--- pandas-2.2.3.orig/pandas/core/arrays/datetimelike.py
+++ pandas-2.2.3/pandas/core/arrays/datetimelike.py
@@ -1646,7 +1646,7 @@ class DatetimeLikeArrayMixin( # type: i
if dropna:
mask = self.isna()
- i8modes = algorithms.mode(self.view("i8"), mask=mask)
+ i8modes, _ = algorithms.mode(self.view("i8"), mask=mask)
npmodes = i8modes.view(self._ndarray.dtype)
npmodes = cast(np.ndarray, npmodes)
return self._from_backing_data(npmodes)
Index: pandas-2.2.3/pandas/core/arrays/masked.py
===================================================================
--- pandas-2.2.3.orig/pandas/core/arrays/masked.py
+++ pandas-2.2.3/pandas/core/arrays/masked.py
@@ -1105,12 +1105,8 @@ class BaseMaskedArray(OpsMixin, Extensio
return Series(arr, index=index, name="count", copy=False)
def _mode(self, dropna: bool = True) -> Self:
- if dropna:
- result = mode(self._data, dropna=dropna, mask=self._mask)
- res_mask = np.zeros(result.shape, dtype=np.bool_)
- else:
- result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
- result = type(self)(result, res_mask) # type: ignore[arg-type]
+ result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
+ result = type(self)(result, res_mask)
return result[result.argsort()]
@doc(ExtensionArray.equals)
Index: pandas-2.2.3/pandas/core/series.py
===================================================================
--- pandas-2.2.3.orig/pandas/core/series.py
+++ pandas-2.2.3/pandas/core/series.py
@@ -2328,7 +2328,7 @@ class Series(base.IndexOpsMixin, NDFrame
# TODO: Add option for bins like value_counts()
values = self._values
if isinstance(values, np.ndarray):
- res_values = algorithms.mode(values, dropna=dropna)
+ res_values, _ = algorithms.mode(values, dropna=dropna)
else:
res_values = values._mode(dropna=dropna)
Index: pandas-2.2.3/pandas/tests/series/test_reductions.py
===================================================================
--- pandas-2.2.3.orig/pandas/tests/series/test_reductions.py
+++ pandas-2.2.3/pandas/tests/series/test_reductions.py
@@ -51,6 +51,29 @@ def test_mode_nullable_dtype(any_numeric
tm.assert_series_equal(result, expected)
+def test_mode_nullable_dtype_edge_case(any_numeric_ea_dtype):
+ # GH##58926
+ ser = Series([1, 2, 3, 1], dtype=any_numeric_ea_dtype)
+ result = ser.mode(dropna=False)
+ expected = Series([1], dtype=any_numeric_ea_dtype)
+ tm.assert_series_equal(result, expected)
+
+ ser2 = Series([1, 1, 2, 3, pd.NA], dtype=any_numeric_ea_dtype)
+ result = ser2.mode(dropna=False)
+ expected = Series([1], dtype=any_numeric_ea_dtype)
+ tm.assert_series_equal(result, expected)
+
+ ser3 = Series([1, pd.NA, pd.NA], dtype=any_numeric_ea_dtype)
+ result = ser3.mode(dropna=False)
+ expected = Series([pd.NA], dtype=any_numeric_ea_dtype)
+ tm.assert_series_equal(result, expected)
+
+ ser4 = Series([1, 1, pd.NA, pd.NA], dtype=any_numeric_ea_dtype)
+ result = ser4.mode(dropna=False)
+ expected = Series([1, pd.NA], dtype=any_numeric_ea_dtype)
+ tm.assert_series_equal(result, expected)
+
+
def test_mode_infer_string():
# GH#56183
pytest.importorskip("pyarrow")
Index: pandas-2.2.3/pandas/tests/test_algos.py
===================================================================
--- pandas-2.2.3.orig/pandas/tests/test_algos.py
+++ pandas-2.2.3/pandas/tests/test_algos.py
@@ -1840,7 +1840,8 @@ class TestRank:
class TestMode:
def test_no_mode(self):
exp = Series([], dtype=np.float64, index=Index([], dtype=int))
- tm.assert_numpy_array_equal(algos.mode(np.array([])), exp.values)
+ result, _ = algos.mode(np.array([]))
+ tm.assert_numpy_array_equal(result, exp.values)
@pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
def test_mode_single(self, dt):
@@ -1853,20 +1854,24 @@ class TestMode:
ser = Series(data_single, dtype=dt)
exp = Series(exp_single, dtype=dt)
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
ser = Series(data_multi, dtype=dt)
exp = Series(exp_multi, dtype=dt)
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
def test_mode_obj_int(self):
exp = Series([1], dtype=int)
- tm.assert_numpy_array_equal(algos.mode(exp.values), exp.values)
+ result, _ = algos.mode(exp.values)
+ tm.assert_numpy_array_equal(result, exp.values)
exp = Series(["a", "b", "c"], dtype=object)
- tm.assert_numpy_array_equal(algos.mode(exp.values), exp.values)
+ result, _ = algos.mode(exp.values)
+ tm.assert_numpy_array_equal(result, exp.values)
@pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
def test_number_mode(self, dt):
@@ -1878,12 +1883,14 @@ class TestMode:
ser = Series(data_single, dtype=dt)
exp = Series(exp_single, dtype=dt)
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
ser = Series(data_multi, dtype=dt)
exp = Series(exp_multi, dtype=dt)
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
def test_strobj_mode(self):
@@ -1892,7 +1899,8 @@ class TestMode:
ser = Series(data, dtype="c")
exp = Series(exp, dtype="c")
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
@pytest.mark.parametrize("dt", [str, object])
@@ -1902,7 +1910,8 @@ class TestMode:
ser = Series(data, dtype=dt)
exp = Series(exp, dtype=dt)
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
def test_datelike_mode(self):
@@ -1936,18 +1945,21 @@ class TestMode:
def test_mixed_dtype(self):
exp = Series(["foo"], dtype=object)
ser = Series([1, "foo", "foo"])
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
def test_uint64_overflow(self):
exp = Series([2**63], dtype=np.uint64)
ser = Series([1, 2**63, 2**63], dtype=np.uint64)
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
exp = Series([1, 2**63], dtype=np.uint64)
ser = Series([1, 2**63], dtype=np.uint64)
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
def test_categorical(self):
@@ -1969,15 +1981,18 @@ class TestMode:
def test_index(self):
idx = Index([1, 2, 3])
exp = Series([1, 2, 3], dtype=np.int64)
- tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
+ result, _ = algos.mode(idx)
+ tm.assert_numpy_array_equal(result, exp.values)
idx = Index([1, "a", "a"])
exp = Series(["a"], dtype=object)
- tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
+ result, _ = algos.mode(idx)
+ tm.assert_numpy_array_equal(result, exp.values)
idx = Index([1, 1, 2, 3, 3])
exp = Series([1, 3], dtype=np.int64)
- tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
+ result, _ = algos.mode(idx)
+ tm.assert_numpy_array_equal(result, exp.values)
idx = Index(
["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],

3
pandas-2.2.2.tar.gz Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8f044538e419c7d5c03434c96b1439cbd88701dcd02d6a79b08947fbb656c2f4
size 50782448

3
pandas-2.2.3.tar.gz Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:be944b80189d9bbcbd269aa5f43cfe9d607149a19473121aed8d51f5d56c4ff0
size 52331559

3
pandas-2.3.1.tar.gz Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b80d93d9d3b3863c33864b393ba1712d7bc0461720b61b799988e13f06c5ad37
size 403715895

View File

@@ -0,0 +1,40 @@
From 1828b62ee913da44ec4402642ef7baaafeb65677 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 15 Apr 2024 09:47:31 -1000
Subject: [PATCH] Backport PR #58268: CI/TST: Unxfail
test_slice_locs_negative_step Pyarrow test
---
pandas/tests/indexes/object/test_indexing.py | 12 +-----------
1 file changed, 1 insertion(+), 11 deletions(-)
diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py
index 443cacf94d239..ebf9dac715f8d 100644
--- a/pandas/tests/indexes/object/test_indexing.py
+++ b/pandas/tests/indexes/object/test_indexing.py
@@ -7,7 +7,6 @@
NA,
is_matching_na,
)
-from pandas.compat import pa_version_under16p0
import pandas.util._test_decorators as td
import pandas as pd
@@ -201,16 +200,7 @@ class TestSliceLocs:
(pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
],
)
- def test_slice_locs_negative_step(self, in_slice, expected, dtype, request):
- if (
- not pa_version_under16p0
- and dtype == "string[pyarrow_numpy]"
- and in_slice == slice("a", "a", -1)
- ):
- request.applymarker(
- pytest.mark.xfail(reason="https://github.com/apache/arrow/issues/40642")
- )
-
+ def test_slice_locs_negative_step(self, in_slice, expected, dtype):
index = Index(list("bcdxy"), dtype=dtype)
s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)

View File

@@ -0,0 +1,71 @@
From 0cab756077f5291f8d6a7fcfacaf374f62b866a0 Mon Sep 17 00:00:00 2001
From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
Date: Mon, 29 Apr 2024 23:11:21 -0400
Subject: [PATCH 1/2] Remove deprecated plot_date calls
These were deprecated in Matplotlib 3.9.
---
pandas/tests/plotting/test_datetimelike.py | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
index 6b709522bab70..b91bde41bf4c4 100644
--- a/pandas/tests/plotting/test_datetimelike.py
+++ b/pandas/tests/plotting/test_datetimelike.py
@@ -1432,13 +1432,11 @@ def test_mpl_nopandas(self):
values1 = np.arange(10.0, 11.0, 0.5)
values2 = np.arange(11.0, 12.0, 0.5)
- kw = {"fmt": "-", "lw": 4}
-
_, ax = mpl.pyplot.subplots()
- ax.plot_date([x.toordinal() for x in dates], values1, **kw)
- ax.plot_date([x.toordinal() for x in dates], values2, **kw)
-
- line1, line2 = ax.get_lines()
+ line1, line2, = ax.plot(
+ [x.toordinal() for x in dates], values1, "-",
+ [x.toordinal() for x in dates], values2, "-",
+ linewidth=4)
exp = np.array([x.toordinal() for x in dates], dtype=np.float64)
tm.assert_numpy_array_equal(line1.get_xydata()[:, 0], exp)
From 6d6574c4e71e3bab91503f85c8aa80c927785865 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
<66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 30 Apr 2024 16:47:26 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
---
pandas/tests/plotting/test_datetimelike.py | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
index b91bde41bf4c4..4b4eeada58366 100644
--- a/pandas/tests/plotting/test_datetimelike.py
+++ b/pandas/tests/plotting/test_datetimelike.py
@@ -1433,10 +1433,18 @@ def test_mpl_nopandas(self):
values2 = np.arange(11.0, 12.0, 0.5)
_, ax = mpl.pyplot.subplots()
- line1, line2, = ax.plot(
- [x.toordinal() for x in dates], values1, "-",
- [x.toordinal() for x in dates], values2, "-",
- linewidth=4)
+ (
+ line1,
+ line2,
+ ) = ax.plot(
+ [x.toordinal() for x in dates],
+ values1,
+ "-",
+ [x.toordinal() for x in dates],
+ values2,
+ "-",
+ linewidth=4,
+ )
exp = np.array([x.toordinal() for x in dates], dtype=np.float64)
tm.assert_numpy_array_equal(line1.get_xydata()[:, 0], exp)

View File

@@ -0,0 +1,41 @@
From d36f6dac81b577504386b53357270d9f05a9bc89 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 14 May 2024 09:04:20 -1000
Subject: [PATCH] Backport PR #58719: CI: xfail test_to_xarray_index_types due
to new 2024.5 release
---
pandas/tests/generic/test_to_xarray.py | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py
index d8401a8b2ae3f..491f621783a76 100644
--- a/pandas/tests/generic/test_to_xarray.py
+++ b/pandas/tests/generic/test_to_xarray.py
@@ -9,6 +9,7 @@
date_range,
)
import pandas._testing as tm
+from pandas.util.version import Version
pytest.importorskip("xarray")
@@ -29,11 +30,17 @@ def df(self):
}
)
- def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
+ def test_to_xarray_index_types(self, index_flat, df, using_infer_string, request):
index = index_flat
# MultiIndex is tested in test_to_xarray_with_multiindex
if len(index) == 0:
pytest.skip("Test doesn't make sense for empty index")
+ import xarray
+
+ if Version(xarray.__version__) >= Version("2024.5"):
+ request.applymarker(
+ pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/9026")
+ )
from xarray import Dataset

View File

@@ -0,0 +1,29 @@
From d4e803caf7aabd464f6fb1d43ef39903911a3cec Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 3 Jul 2024 06:45:24 -1000
Subject: [PATCH] Backport PR #59168: TST: Address UserWarning in matplotlib
test
---
pandas/plotting/_matplotlib/core.py | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 2979903edf360..52382d9f7d572 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -893,7 +893,13 @@ def _make_legend(self) -> None:
elif self.subplots and self.legend:
for ax in self.axes:
if ax.get_visible():
- ax.legend(loc="best")
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore",
+ "No artists with labels found to put in legend.",
+ UserWarning,
+ )
+ ax.legend(loc="best")
@final
@staticmethod

View File

@@ -0,0 +1,174 @@
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 361998db8e..87d419e2db 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -111,6 +111,7 @@ ALL_FLOAT_DTYPES: list[Dtype] = [*FLOAT_NUMPY_DTYPES, *FLOAT_EA_DTYPES]
COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"]
STRING_DTYPES: list[Dtype] = [str, "str", "U"]
+COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES]
DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"]
TIMEDELTA64_DTYPES: list[Dtype] = ["timedelta64[ns]", "m8[ns]"]
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 7c35dfdde9..10134c90f8 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1403,6 +1403,21 @@ def complex_dtype(request):
return request.param
+@pytest.fixture(params=tm.COMPLEX_FLOAT_DTYPES)
+def complex_or_float_dtype(request):
+ """
+ Parameterized fixture for complex and numpy float dtypes.
+
+ * complex
+ * 'complex64'
+ * 'complex128'
+ * float
+ * 'float32'
+ * 'float64'
+ """
+ return request.param
+
+
@pytest.fixture(params=tm.SIGNED_INT_NUMPY_DTYPES)
def any_signed_int_numpy_dtype(request):
"""
diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
index b5861fbaeb..d642c37cea 100644
--- a/pandas/core/computation/expr.py
+++ b/pandas/core/computation/expr.py
@@ -31,7 +31,6 @@ from pandas.core.computation.ops import (
UNARY_OPS_SYMS,
BinOp,
Constant,
- Div,
FuncNode,
Op,
Term,
@@ -370,7 +369,7 @@ class BaseExprVisitor(ast.NodeVisitor):
"Add",
"Sub",
"Mult",
- None,
+ "Div",
"Pow",
"FloorDiv",
"Mod",
@@ -533,9 +532,6 @@ class BaseExprVisitor(ast.NodeVisitor):
left, right = self._maybe_downcast_constants(left, right)
return self._maybe_evaluate_binop(op, op_class, left, right)
- def visit_Div(self, node, **kwargs):
- return lambda lhs, rhs: Div(lhs, rhs)
-
def visit_UnaryOp(self, node, **kwargs):
op = self.visit(node.op)
operand = self.visit(node.operand)
diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py
index 95ac20ba39..ea8b1c0457 100644
--- a/pandas/core/computation/ops.py
+++ b/pandas/core/computation/ops.py
@@ -332,31 +332,6 @@ for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict):
_binary_ops_dict.update(d)
-def _cast_inplace(terms, acceptable_dtypes, dtype) -> None:
- """
- Cast an expression inplace.
-
- Parameters
- ----------
- terms : Op
- The expression that should cast.
- acceptable_dtypes : list of acceptable numpy.dtype
- Will not cast if term's dtype in this list.
- dtype : str or numpy.dtype
- The dtype to cast to.
- """
- dt = np.dtype(dtype)
- for term in terms:
- if term.type in acceptable_dtypes:
- continue
-
- try:
- new_value = term.value.astype(dt)
- except AttributeError:
- new_value = dt.type(term.value)
- term.update(new_value)
-
-
def is_term(obj) -> bool:
return isinstance(obj, Term)
@@ -516,31 +491,6 @@ class BinOp(Op):
def isnumeric(dtype) -> bool:
return issubclass(np.dtype(dtype).type, np.number)
-
-class Div(BinOp):
- """
- Div operator to special case casting.
-
- Parameters
- ----------
- lhs, rhs : Term or Op
- The Terms or Ops in the ``/`` expression.
- """
-
- def __init__(self, lhs, rhs) -> None:
- super().__init__("/", lhs, rhs)
-
- if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type):
- raise TypeError(
- f"unsupported operand type(s) for {self.op}: "
- f"'{lhs.return_type}' and '{rhs.return_type}'"
- )
-
- # do not upcast float32s to float64 un-necessarily
- acceptable_dtypes = [np.float32, np.float64]
- _cast_inplace(com.flatten(self), acceptable_dtypes, np.float64)
-
-
UNARY_OPS_SYMS = ("+", "-", "~", "not")
_unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert)
_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs))
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 17630f14b0..e8fad6b8cb 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -747,16 +747,26 @@ class TestTypeCasting:
@pytest.mark.parametrize("op", ["+", "-", "*", "**", "/"])
# maybe someday... numexpr has too many upcasting rules now
# chain(*(np.core.sctypes[x] for x in ['uint', 'int', 'float']))
- @pytest.mark.parametrize("dt", [np.float32, np.float64])
@pytest.mark.parametrize("left_right", [("df", "3"), ("3", "df")])
- def test_binop_typecasting(self, engine, parser, op, dt, left_right):
- df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)), dtype=dt)
+ def test_binop_typecasting(
+ self, engine, parser, op, complex_or_float_dtype, left_right, request
+ ):
+ # GH#21374
+ dtype = complex_or_float_dtype
+ df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)), dtype=dtype)
left, right = left_right
s = f"{left} {op} {right}"
res = pd.eval(s, engine=engine, parser=parser)
- assert df.values.dtype == dt
- assert res.values.dtype == dt
- tm.assert_frame_equal(res, eval(s))
+ if dtype == "complex64" and engine == "numexpr":
+ mark = pytest.mark.xfail(
+ reason="numexpr issue with complex that are upcast "
+ "to complex 128 "
+ "https://github.com/pydata/numexpr/issues/492"
+ )
+ request.applymarker(mark)
+ assert df.values.dtype == dtype
+ assert res.values.dtype == dtype
+ tm.assert_frame_equal(res, eval(s), check_exact=False)
# -------------------------------------

View File

@@ -0,0 +1,23 @@
From f97f5e107f145fd09133d21cb1902c84c936754c Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 11 Dec 2024 13:20:10 -0800
Subject: [PATCH] Backport PR #60544: CI/TST: Use tm.external_error_raised for
test_from_arrow_respecting_given_dtype_unsafe
---
pandas/tests/extension/test_arrow.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 03ab7c7f1dad8..470ca0673c60e 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -1637,7 +1637,7 @@ def test_from_arrow_respecting_given_dtype():
def test_from_arrow_respecting_given_dtype_unsafe():
array = pa.array([1.5, 2.5], type=pa.float64())
- with pytest.raises(pa.ArrowInvalid, match="Float value 1.5 was truncated"):
+ with tm.external_error_raised(pa.ArrowInvalid):
array.to_pandas(types_mapper={pa.float64(): ArrowDtype(pa.int64())}.get)

View File

@@ -0,0 +1,306 @@
From 78b63f87013c8d2fe98fc86ecf685b5cd20da3e1 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 17 Dec 2024 15:01:59 -0800
Subject: [PATCH 1/3] Backport PR #60584: TST: Address matplotlib 3.10
deprecation of vert=
---
pandas/plotting/_matplotlib/boxplot.py | 4 +-
pandas/plotting/_matplotlib/tools.py | 2 +-
pandas/tests/plotting/frame/test_frame.py | 41 ++++++++++++----
pandas/tests/plotting/test_boxplot_method.py | 50 +++++++++++++++-----
4 files changed, 74 insertions(+), 23 deletions(-)
diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py
index d2b76decaa75d..f78327896908f 100644
--- a/pandas/plotting/_matplotlib/boxplot.py
+++ b/pandas/plotting/_matplotlib/boxplot.py
@@ -20,6 +20,7 @@
import pandas as pd
import pandas.core.common as com
+from pandas.util.version import Version
from pandas.io.formats.printing import pprint_thing
from pandas.plotting._matplotlib.core import (
@@ -54,7 +55,8 @@ def _set_ticklabels(ax: Axes, labels: list[str], is_vertical: bool, **kwargs) ->
ticks = ax.get_xticks() if is_vertical else ax.get_yticks()
if len(ticks) != len(labels):
i, remainder = divmod(len(ticks), len(labels))
- assert remainder == 0, remainder
+ if Version(mpl.__version__) < Version("3.10"):
+ assert remainder == 0, remainder
labels *= i
if is_vertical:
ax.set_xticklabels(labels, **kwargs)
diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py
index 898b5b25e7b01..98441c5afbaa4 100644
--- a/pandas/plotting/_matplotlib/tools.py
+++ b/pandas/plotting/_matplotlib/tools.py
@@ -57,7 +57,7 @@ def format_date_labels(ax: Axes, rot) -> None:
fig = ax.get_figure()
if fig is not None:
# should always be a Figure but can technically be None
- maybe_adjust_figure(fig, bottom=0.2)
+ maybe_adjust_figure(fig, bottom=0.2) # type: ignore[arg-type]
def table(
diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
index 4ca4067214bbd..33366b4eabba5 100644
--- a/pandas/tests/plotting/frame/test_frame.py
+++ b/pandas/tests/plotting/frame/test_frame.py
@@ -1059,28 +1059,43 @@ def test_boxplot_series_positions(self, hist_df):
tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions)
assert len(ax.lines) == 7 * len(numeric_cols)
+ @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
+ @pytest.mark.xfail(
+ Version(mpl.__version__) >= Version("3.10"),
+ reason="Fails starting with matplotlib 3.10",
+ )
def test_boxplot_vertical(self, hist_df):
df = hist_df
numeric_cols = df._get_numeric_data().columns
labels = [pprint_thing(c) for c in numeric_cols]
# if horizontal, yticklabels are rotated
- ax = df.plot.box(rot=50, fontsize=8, vert=False)
+ kwargs = (
+ {"vert": False}
+ if Version(mpl.__version__) < Version("3.10")
+ else {"orientation": "horizontal"}
+ )
+ ax = df.plot.box(rot=50, fontsize=8, **kwargs)
_check_ticks_props(ax, xrot=0, yrot=50, ylabelsize=8)
_check_text_labels(ax.get_yticklabels(), labels)
assert len(ax.lines) == 7 * len(numeric_cols)
- @pytest.mark.filterwarnings("ignore:Attempt:UserWarning")
+ @pytest.mark.filterwarnings("ignore::UserWarning")
+ @pytest.mark.xfail(
+ Version(mpl.__version__) >= Version("3.10"),
+ reason="Fails starting with matplotlib version 3.10",
+ )
def test_boxplot_vertical_subplots(self, hist_df):
df = hist_df
numeric_cols = df._get_numeric_data().columns
labels = [pprint_thing(c) for c in numeric_cols]
+ kwargs = (
+ {"vert": False}
+ if Version(mpl.__version__) < Version("3.10")
+ else {"orientation": "horizontal"}
+ )
axes = _check_plot_works(
- df.plot.box,
- default_axes=True,
- subplots=True,
- vert=False,
- logx=True,
+ df.plot.box, default_axes=True, subplots=True, logx=True, **kwargs
)
_check_axes_shape(axes, axes_num=3, layout=(1, 3))
_check_ax_scales(axes, xaxis="log")
@@ -1088,12 +1103,22 @@ def test_boxplot_vertical_subplots(self, hist_df):
_check_text_labels(ax.get_yticklabels(), [label])
assert len(ax.lines) == 7
+ @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
+ @pytest.mark.xfail(
+ Version(mpl.__version__) >= Version("3.10"),
+ reason="Fails starting with matplotlib 3.10",
+ )
def test_boxplot_vertical_positions(self, hist_df):
df = hist_df
numeric_cols = df._get_numeric_data().columns
labels = [pprint_thing(c) for c in numeric_cols]
positions = np.array([3, 2, 8])
- ax = df.plot.box(positions=positions, vert=False)
+ kwargs = (
+ {"vert": False}
+ if Version(mpl.__version__) < Version("3.10")
+ else {"orientation": "horizontal"}
+ )
+ ax = df.plot.box(positions=positions, **kwargs)
_check_text_labels(ax.get_yticklabels(), labels)
tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions)
assert len(ax.lines) == 7 * len(numeric_cols)
diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py
index 76f7fa1f22eec..969ea76efd041 100644
--- a/pandas/tests/plotting/test_boxplot_method.py
+++ b/pandas/tests/plotting/test_boxplot_method.py
@@ -1,5 +1,7 @@
""" Test cases for .boxplot method """
+from __future__ import annotations
+
import itertools
import string
@@ -22,6 +24,7 @@
_check_ticks_props,
_check_visible,
)
+from pandas.util.version import Version
from pandas.io.formats.printing import pprint_thing
@@ -35,6 +38,17 @@ def _check_ax_limits(col, ax):
assert y_max >= col.max()
+if Version(mpl.__version__) < Version("3.10"):
+ verts: list[dict[str, bool | str]] = [{"vert": False}, {"vert": True}]
+else:
+ verts = [{"orientation": "horizontal"}, {"orientation": "vertical"}]
+
+
+@pytest.fixture(params=verts)
+def vert(request):
+ return request.param
+
+
class TestDataFramePlots:
def test_stacked_boxplot_set_axis(self):
# GH2980
@@ -315,7 +329,7 @@ def test_specified_props_kwd(self, props, expected):
assert result[expected][0].get_color() == "C1"
- @pytest.mark.parametrize("vert", [True, False])
+ @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
def test_plot_xlabel_ylabel(self, vert):
df = DataFrame(
{
@@ -325,11 +339,11 @@ def test_plot_xlabel_ylabel(self, vert):
}
)
xlabel, ylabel = "x", "y"
- ax = df.plot(kind="box", vert=vert, xlabel=xlabel, ylabel=ylabel)
+ ax = df.plot(kind="box", xlabel=xlabel, ylabel=ylabel, **vert)
assert ax.get_xlabel() == xlabel
assert ax.get_ylabel() == ylabel
- @pytest.mark.parametrize("vert", [True, False])
+ @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
def test_plot_box(self, vert):
# GH 54941
rng = np.random.default_rng(2)
@@ -338,14 +352,14 @@ def test_plot_box(self, vert):
xlabel, ylabel = "x", "y"
_, axs = plt.subplots(ncols=2, figsize=(10, 7), sharey=True)
- df1.plot.box(ax=axs[0], vert=vert, xlabel=xlabel, ylabel=ylabel)
- df2.plot.box(ax=axs[1], vert=vert, xlabel=xlabel, ylabel=ylabel)
+ df1.plot.box(ax=axs[0], xlabel=xlabel, ylabel=ylabel, **vert)
+ df2.plot.box(ax=axs[1], xlabel=xlabel, ylabel=ylabel, **vert)
for ax in axs:
assert ax.get_xlabel() == xlabel
assert ax.get_ylabel() == ylabel
mpl.pyplot.close()
- @pytest.mark.parametrize("vert", [True, False])
+ @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
def test_boxplot_xlabel_ylabel(self, vert):
df = DataFrame(
{
@@ -355,11 +369,11 @@ def test_boxplot_xlabel_ylabel(self, vert):
}
)
xlabel, ylabel = "x", "y"
- ax = df.boxplot(vert=vert, xlabel=xlabel, ylabel=ylabel)
+ ax = df.boxplot(xlabel=xlabel, ylabel=ylabel, **vert)
assert ax.get_xlabel() == xlabel
assert ax.get_ylabel() == ylabel
- @pytest.mark.parametrize("vert", [True, False])
+ @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
def test_boxplot_group_xlabel_ylabel(self, vert):
df = DataFrame(
{
@@ -369,14 +383,20 @@ def test_boxplot_group_xlabel_ylabel(self, vert):
}
)
xlabel, ylabel = "x", "y"
- ax = df.boxplot(by="group", vert=vert, xlabel=xlabel, ylabel=ylabel)
+ ax = df.boxplot(by="group", xlabel=xlabel, ylabel=ylabel, **vert)
for subplot in ax:
assert subplot.get_xlabel() == xlabel
assert subplot.get_ylabel() == ylabel
mpl.pyplot.close()
- @pytest.mark.parametrize("vert", [True, False])
- def test_boxplot_group_no_xlabel_ylabel(self, vert):
+ @pytest.mark.filterwarnings("ignore:set_ticklabels:UserWarning")
+ def test_boxplot_group_no_xlabel_ylabel(self, vert, request):
+ if Version(mpl.__version__) >= Version("3.10") and vert == {
+ "orientation": "horizontal"
+ }:
+ request.applymarker(
+ pytest.mark.xfail(reason=f"{vert} fails starting with matplotlib 3.10")
+ )
df = DataFrame(
{
"a": np.random.default_rng(2).standard_normal(10),
@@ -384,9 +404,13 @@ def test_boxplot_group_no_xlabel_ylabel(self, vert):
"group": np.random.default_rng(2).choice(["group1", "group2"], 10),
}
)
- ax = df.boxplot(by="group", vert=vert)
+ ax = df.boxplot(by="group", **vert)
for subplot in ax:
- target_label = subplot.get_xlabel() if vert else subplot.get_ylabel()
+ target_label = (
+ subplot.get_xlabel()
+ if vert == {"vert": True} or vert == {"orientation": "vertical"}
+ else subplot.get_ylabel()
+ )
assert target_label == pprint_thing(["group"])
mpl.pyplot.close()
From 8d4c506e4352341dff217f1658a4c1655031eef7 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 17 Dec 2024 15:15:51 -0800
Subject: [PATCH 2/3] Add missing import
---
pandas/plotting/_matplotlib/boxplot.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py
index f78327896908f..80f0349b205e6 100644
--- a/pandas/plotting/_matplotlib/boxplot.py
+++ b/pandas/plotting/_matplotlib/boxplot.py
@@ -7,6 +7,7 @@
)
import warnings
+import matplotlib as mpl
from matplotlib.artist import setp
import numpy as np
From 5b0e557934b771b5d4d5d100605d351cf48a3b10 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 17 Dec 2024 15:27:55 -0800
Subject: [PATCH 3/3] Ignore pre-commit check
---
pandas/tests/plotting/test_boxplot_method.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py
index 969ea76efd041..e1b03a34086c0 100644
--- a/pandas/tests/plotting/test_boxplot_method.py
+++ b/pandas/tests/plotting/test_boxplot_method.py
@@ -408,7 +408,8 @@ def test_boxplot_group_no_xlabel_ylabel(self, vert, request):
for subplot in ax:
target_label = (
subplot.get_xlabel()
- if vert == {"vert": True} or vert == {"orientation": "vertical"}
+ if vert == {"vert": True} # noqa: PLR1714
+ or vert == {"orientation": "vertical"}
else subplot.get_ylabel()
)
assert target_label == pprint_thing(["group"])

316
pandas-pr61132-dropna.patch Normal file
View File

@@ -0,0 +1,316 @@
From 1e899afbd9ca20f4ce9d6f93e1f62c072be0ed23 Mon Sep 17 00:00:00 2001
From: Gen Sato <52241300+halogen22@users.noreply.github.com>
Date: Tue, 18 Mar 2025 01:33:40 +0900
Subject: [PATCH] BUG: .mode(dropna=False) doesn't work with nullable integers
(#61132)
* Fix dropna bug when mode
* Fix test cases
* Fix data type incompatible
---
doc/source/whatsnew/v3.0.0.rst | 1 +
pandas/_libs/hashtable_func_helper.pxi.in | 2 +-
pandas/core/algorithms.py | 12 +++---
pandas/core/arrays/base.py | 5 ++-
pandas/core/arrays/categorical.py | 2 +-
pandas/core/arrays/datetimelike.py | 2 +-
pandas/core/arrays/masked.py | 8 +---
pandas/core/series.py | 2 +-
pandas/tests/series/test_reductions.py | 23 +++++++++++
pandas/tests/test_algos.py | 47 +++++++++++++++--------
10 files changed, 71 insertions(+), 33 deletions(-)
Index: pandas-2.3.1/pandas/_libs/hashtable_func_helper.pxi.in
===================================================================
--- pandas-2.3.1.orig/pandas/_libs/hashtable_func_helper.pxi.in
+++ pandas-2.3.1/pandas/_libs/hashtable_func_helper.pxi.in
@@ -443,7 +443,7 @@ def mode(ndarray[htfunc_t] values, bint
if na_counter > 0:
res_mask = np.zeros(j+1, dtype=np.bool_)
- res_mask[j] = True
+ res_mask[j] = (na_counter == max_count)
return modes[:j + 1], res_mask
Index: pandas-2.3.1/pandas/core/algorithms.py
===================================================================
--- pandas-2.3.1.orig/pandas/core/algorithms.py
+++ pandas-2.3.1/pandas/core/algorithms.py
@@ -1022,7 +1022,7 @@ def duplicated(
def mode(
values: ArrayLike, dropna: bool = True, mask: npt.NDArray[np.bool_] | None = None
-) -> ArrayLike:
+) -> tuple[np.ndarray, npt.NDArray[np.bool_]] | ExtensionArray:
"""
Returns the mode(s) of an array.
@@ -1035,7 +1035,7 @@ def mode(
Returns
-------
- np.ndarray or ExtensionArray
+ Union[Tuple[np.ndarray, npt.NDArray[np.bool_]], ExtensionArray]
"""
values = _ensure_arraylike(values, func_name="mode")
original = values
@@ -1049,8 +1049,10 @@ def mode(
values = _ensure_data(values)
npresult, res_mask = htable.mode(values, dropna=dropna, mask=mask)
- if res_mask is not None:
- return npresult, res_mask # type: ignore[return-value]
+ if res_mask is None:
+ res_mask = np.zeros(npresult.shape, dtype=np.bool_)
+ else:
+ return npresult, res_mask
try:
npresult = safe_sort(npresult)
@@ -1061,7 +1063,7 @@ def mode(
)
result = _reconstruct_data(npresult, original.dtype, original)
- return result
+ return result, res_mask
def rank(
Index: pandas-2.3.1/pandas/core/arrays/base.py
===================================================================
--- pandas-2.3.1.orig/pandas/core/arrays/base.py
+++ pandas-2.3.1/pandas/core/arrays/base.py
@@ -2270,8 +2270,9 @@ class ExtensionArray:
Sorted, if possible.
"""
# error: Incompatible return value type (got "Union[ExtensionArray,
- # ndarray[Any, Any]]", expected "Self")
- return mode(self, dropna=dropna) # type: ignore[return-value]
+ # Tuple[np.ndarray, npt.NDArray[np.bool_]]", expected "Self")
+ result, _ = mode(self, dropna=dropna)
+ return result # type: ignore[return-value]
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
if any(
Index: pandas-2.3.1/pandas/core/arrays/categorical.py
===================================================================
--- pandas-2.3.1.orig/pandas/core/arrays/categorical.py
+++ pandas-2.3.1/pandas/core/arrays/categorical.py
@@ -2459,7 +2459,7 @@ class Categorical(NDArrayBackedExtension
if dropna:
mask = self.isna()
- res_codes = algorithms.mode(codes, mask=mask)
+ res_codes, _ = algorithms.mode(codes, mask=mask)
res_codes = cast(np.ndarray, res_codes)
assert res_codes.dtype == codes.dtype
res = self._from_backing_data(res_codes)
Index: pandas-2.3.1/pandas/core/arrays/datetimelike.py
===================================================================
--- pandas-2.3.1.orig/pandas/core/arrays/datetimelike.py
+++ pandas-2.3.1/pandas/core/arrays/datetimelike.py
@@ -1669,7 +1669,7 @@ class DatetimeLikeArrayMixin( # type: i
if dropna:
mask = self.isna()
- i8modes = algorithms.mode(self.view("i8"), mask=mask)
+ i8modes, _ = algorithms.mode(self.view("i8"), mask=mask)
npmodes = i8modes.view(self._ndarray.dtype)
npmodes = cast(np.ndarray, npmodes)
return self._from_backing_data(npmodes)
Index: pandas-2.3.1/pandas/core/arrays/masked.py
===================================================================
--- pandas-2.3.1.orig/pandas/core/arrays/masked.py
+++ pandas-2.3.1/pandas/core/arrays/masked.py
@@ -1124,12 +1124,8 @@ class BaseMaskedArray(OpsMixin, Extensio
return Series(arr, index=index, name="count", copy=False)
def _mode(self, dropna: bool = True) -> Self:
- if dropna:
- result = mode(self._data, dropna=dropna, mask=self._mask)
- res_mask = np.zeros(result.shape, dtype=np.bool_)
- else:
- result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
- result = type(self)(result, res_mask) # type: ignore[arg-type]
+ result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
+ result = type(self)(result, res_mask)
return result[result.argsort()]
@doc(ExtensionArray.equals)
Index: pandas-2.3.1/pandas/core/series.py
===================================================================
--- pandas-2.3.1.orig/pandas/core/series.py
+++ pandas-2.3.1/pandas/core/series.py
@@ -2337,7 +2337,7 @@ class Series(base.IndexOpsMixin, NDFrame
# TODO: Add option for bins like value_counts()
values = self._values
if isinstance(values, np.ndarray):
- res_values = algorithms.mode(values, dropna=dropna)
+ res_values, _ = algorithms.mode(values, dropna=dropna)
else:
res_values = values._mode(dropna=dropna)
Index: pandas-2.3.1/pandas/tests/series/test_reductions.py
===================================================================
--- pandas-2.3.1.orig/pandas/tests/series/test_reductions.py
+++ pandas-2.3.1/pandas/tests/series/test_reductions.py
@@ -51,6 +51,29 @@ def test_mode_nullable_dtype(any_numeric
tm.assert_series_equal(result, expected)
+def test_mode_nullable_dtype_edge_case(any_numeric_ea_dtype):
+ # GH##58926
+ ser = Series([1, 2, 3, 1], dtype=any_numeric_ea_dtype)
+ result = ser.mode(dropna=False)
+ expected = Series([1], dtype=any_numeric_ea_dtype)
+ tm.assert_series_equal(result, expected)
+
+ ser2 = Series([1, 1, 2, 3, pd.NA], dtype=any_numeric_ea_dtype)
+ result = ser2.mode(dropna=False)
+ expected = Series([1], dtype=any_numeric_ea_dtype)
+ tm.assert_series_equal(result, expected)
+
+ ser3 = Series([1, pd.NA, pd.NA], dtype=any_numeric_ea_dtype)
+ result = ser3.mode(dropna=False)
+ expected = Series([pd.NA], dtype=any_numeric_ea_dtype)
+ tm.assert_series_equal(result, expected)
+
+ ser4 = Series([1, 1, pd.NA, pd.NA], dtype=any_numeric_ea_dtype)
+ result = ser4.mode(dropna=False)
+ expected = Series([1, pd.NA], dtype=any_numeric_ea_dtype)
+ tm.assert_series_equal(result, expected)
+
+
def test_mode_infer_string():
# GH#56183
pytest.importorskip("pyarrow")
Index: pandas-2.3.1/pandas/tests/test_algos.py
===================================================================
--- pandas-2.3.1.orig/pandas/tests/test_algos.py
+++ pandas-2.3.1/pandas/tests/test_algos.py
@@ -1855,7 +1855,8 @@ class TestRank:
class TestMode:
def test_no_mode(self):
exp = Series([], dtype=np.float64, index=Index([], dtype=int))
- tm.assert_numpy_array_equal(algos.mode(np.array([])), exp.values)
+ result, _ = algos.mode(np.array([]))
+ tm.assert_numpy_array_equal(result, exp.values)
@pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
def test_mode_single(self, dt):
@@ -1868,20 +1869,24 @@ class TestMode:
ser = Series(data_single, dtype=dt)
exp = Series(exp_single, dtype=dt)
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
ser = Series(data_multi, dtype=dt)
exp = Series(exp_multi, dtype=dt)
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
def test_mode_obj_int(self):
exp = Series([1], dtype=int)
- tm.assert_numpy_array_equal(algos.mode(exp.values), exp.values)
+ result, _ = algos.mode(exp.values)
+ tm.assert_numpy_array_equal(result, exp.values)
exp = Series(["a", "b", "c"], dtype=object)
- tm.assert_numpy_array_equal(algos.mode(exp.values), exp.values)
+ result, _ = algos.mode(exp.values)
+ tm.assert_numpy_array_equal(result, exp.values)
@pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
def test_number_mode(self, dt):
@@ -1893,12 +1898,14 @@ class TestMode:
ser = Series(data_single, dtype=dt)
exp = Series(exp_single, dtype=dt)
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
ser = Series(data_multi, dtype=dt)
exp = Series(exp_multi, dtype=dt)
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
def test_strobj_mode(self):
@@ -1907,7 +1914,8 @@ class TestMode:
ser = Series(data, dtype="c")
exp = Series(exp, dtype="c")
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
@pytest.mark.parametrize("dt", [str, object])
@@ -1920,7 +1928,8 @@ class TestMode:
if using_infer_string and dt is str:
tm.assert_extension_array_equal(algos.mode(ser.values), exp.values)
else:
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
def test_datelike_mode(self):
@@ -1954,18 +1963,21 @@ class TestMode:
def test_mixed_dtype(self):
exp = Series(["foo"], dtype=object)
ser = Series([1, "foo", "foo"])
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
def test_uint64_overflow(self):
exp = Series([2**63], dtype=np.uint64)
ser = Series([1, 2**63, 2**63], dtype=np.uint64)
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
exp = Series([1, 2**63], dtype=np.uint64)
ser = Series([1, 2**63], dtype=np.uint64)
- tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+ result, _ = algos.mode(ser.values)
+ tm.assert_numpy_array_equal(result, exp.values)
tm.assert_series_equal(ser.mode(), exp)
def test_categorical(self):
@@ -1987,15 +1999,18 @@ class TestMode:
def test_index(self):
idx = Index([1, 2, 3])
exp = Series([1, 2, 3], dtype=np.int64)
- tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
+ result, _ = algos.mode(idx)
+ tm.assert_numpy_array_equal(result, exp.values)
idx = Index([1, "a", "a"])
exp = Series(["a"], dtype=object)
- tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
+ result, _ = algos.mode(idx)
+ tm.assert_numpy_array_equal(result, exp.values)
idx = Index([1, 1, 2, 3, 3])
exp = Series([1, 3], dtype=np.int64)
- tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
+ result, _ = algos.mode(idx)
+ tm.assert_numpy_array_equal(result, exp.values)
idx = Index(
["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],

4304
python-pandas.changes Normal file

File diff suppressed because it is too large Load Diff

703
python-pandas.spec Normal file
View File

@@ -0,0 +1,703 @@
#
# spec file for package python-pandas
#
# Copyright (c) 2025 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
# Please submit bugfixes or comments via https://bugs.opensuse.org/
#
%global flavor @BUILD_FLAVOR@%{nil}
%{?sle15_python_module_pythons}
%if "%{flavor}" == ""
%define psuffix %{nil}
%bcond_with test
%else
%define psuffix -%{flavor}
%bcond_without test
%if "%{flavor}" != "test-py310"
%define skip_python310 1
%endif
%if "%{flavor}" != "test-py311"
%define skip_python311 1
%endif
%if "%{flavor}" != "test-py312"
%define skip_python312 1
%endif
%if "%{flavor}" != "test-py313"
%define skip_python313 1
%endif
# Skip empty buildsets on tumbleweed or flavors other than python311 on leap with sle15_python_module_pythons
%if "%{shrink:%{pythons}}" == "" || ("%pythons" == "python311" && 0%{?skip_python311})
ExclusiveArch: donotbuild
%define python_module() %flavor-not-enabled-in-buildset-for-suse-%{?suse_version}
%endif
%endif
# Only test the core functionality in Ring1 (Lettered Staging)
%bcond_with ringdisabled
# s3fs not available
%bcond_with aws
# pandas-gbq not available
%bcond_with gcp
# xlsb not available
%bcond_with xslb
%bcond_with consortium_standard
%bcond_with calamine
%bcond_with adbc
# depend/not depend on python-pyarrow and apache-arrow [bsc#1218592]
%bcond_without pyarrow
%if %{suse_version} <= 1500
# requires __has_builtin with keywords
%define gccver 13
%endif
Name: python-pandas%{psuffix}
# Set version through _service
Version: 2.3.1
Release: 0
Summary: Python data structures for data analysis, time series, and statistics
License: BSD-3-Clause
URL: https://pandas.pydata.org/
# SourceRepository: https://github.com/pandas-dev/pandas
# Must be created by cloning through `osc service runall`: gh#pandas-dev/pandas#54903, gh#pandas-dev/pandas#54907
Source0: pandas-%{version}.tar.gz
# PATCH-FIX-UPSTREAM pandas-pr61132-dropna.patch gh#pandas-dev/pandas#61132 BUG: .mode(dropna=False) doesn't work with nullable integers
Patch1: pandas-pr61132-dropna.patch
%if !%{with test}
BuildRequires: %{python_module Cython >= 3.0.5}
BuildRequires: %{python_module devel >= 3.9}
BuildRequires: %{python_module meson-python >= 0.13.1}
BuildRequires: %{python_module numpy-devel >= 1.26}
BuildRequires: %{python_module pip}
BuildRequires: %{python_module versioneer-toml}
BuildRequires: %{python_module wheel}
BuildRequires: fdupes
BuildRequires: gcc%{?gccver}-c++
BuildRequires: git-core
BuildRequires: (meson >= 1.2.1 with meson < 2)
%endif
BuildRequires: python-rpm-macros
Requires: python-python-dateutil >= 2.8.2
Requires: python-pytz >= 2020.1
Requires: timezone >= 2022a
Obsoletes: python-pandas-doc < %{version}
Provides: python-pandas-doc = %{version}
%if 0%{python_version_nodots} < 311
Requires: python-numpy >= 1.22.4
%else
%if 0%{python_version_nodots} == 311
Requires: python-numpy >= 1.23.2
%else
Requires: python-numpy >= 1.26
%endif
%endif
# SECTION extras
Recommends: python-pandas-performance
Recommends: python-pandas-pyarrow
Suggests: python-pandas-all
Suggests: python-pandas-clipboard
Suggests: python-pandas-compression
Suggests: python-pandas-computation
Suggests: python-pandas-excel
Suggests: python-pandas-fss
Suggests: python-pandas-hdf5
Suggests: python-pandas-html
Suggests: python-pandas-mysql
Suggests: python-pandas-output_formatting
Suggests: python-pandas-plot
Suggests: python-pandas-postgresql
Suggests: python-pandas-spss
Suggests: python-pandas-sql-other
Suggests: python-pandas-test
Suggests: python-pandas-xml
%{?with_aws:Suggests: python-pandas-aws}
%{?with_gcp:Suggests: python-pandas-gcp}
%{?with_pyarrow:Suggests: python-pandas-parquet}
%{?with_pyarrow:Suggests: python-pandas-feather}
# /SECTION
%if %{with test}
# required for sqlite3 tests
BuildRequires: %{pythons}
BuildRequires: %{python_module pandas-test = %{version}}
BuildRequires: memory-constraints
BuildRequires: xvfb-run
%if !%{with ringdisabled}
BuildRequires: %{python_module IPython}
BuildRequires: %{python_module dask-array}
BuildRequires: %{python_module dask-dataframe}
BuildRequires: %{python_module pandas-all = %{version}}
BuildRequires: %{python_module pandas-clipboard = %{version}}
BuildRequires: %{python_module pandas-compression = %{version}}
BuildRequires: %{python_module pandas-computation = %{version}}
BuildRequires: %{python_module pandas-excel = %{version}}
%{?with_pyarrow:BuildRequires: %{python_module pandas-feather = %{version}}}
BuildRequires: %{python_module pandas-fss = %{version}}
BuildRequires: %{python_module pandas-hdf5 = %{version}}
BuildRequires: %{python_module pandas-html = %{version}}
BuildRequires: %{python_module pandas-mysql = %{version}}
BuildRequires: %{python_module pandas-output_formatting = %{version}}
%{?with_pyarrow:BuildRequires: %{python_module pandas-parquet = %{version}}}
BuildRequires: %{python_module pandas-performance = %{version}}
BuildRequires: %{python_module pandas-plot = %{version}}
BuildRequires: %{python_module pandas-postgresql = %{version}}
%{?with_pyarrow:BuildRequires: %{python_module pandas-pyarrow = %{version}}}
BuildRequires: %{python_module pandas-spss = %{version}}
BuildRequires: %{python_module pandas-sql-other = %{version}}
BuildRequires: %{python_module pandas-xml = %{version}}
BuildRequires: xclip
%{?with_aws:BuildRequires: %{python_module pandas-aws = %{version}}}
%{?with_gcp:BuildRequires: %{python_module pandas-gcp = %{version}}}
%{?with_consortium_standard:BuildRequires: %{python_module pandas-consortium-standard = %{version}}}
%endif
%endif
%python_subpackages
%description
Pandas is a Python package providing data structures designed for
working with structured (tabular, multidimensional, potentially
heterogeneous) and time series data. It is a high-level building
block for doing data analysis in Python.
%package test
Summary: The python pandas[test] extra
Requires: python-hypothesis >= 6.46.1
Requires: python-pandas = %{version}
Requires: python-pytest >= 7.3.2
Requires: python-pytest-xdist >= 2.2.0
BuildArch: noarch
%description test
This package provides the [test] extra for python-pandas
%package pyarrow
Summary: The python pandas[pyarrow] extra
Requires: python-pandas = %{version}
Requires: python-pyarrow >= 10.0.1
BuildArch: noarch
%description pyarrow
This package provides the [pyarrow] extra for python-pandas
%package performance
Summary: The python pandas[performance] extra
Requires: python-Bottleneck >= 1.3.6
Requires: python-numba >= 0.56.4
Requires: python-numexpr >= 2.8.4
Requires: python-pandas = %{version}
BuildArch: noarch
%description performance
This package provides the [performance] extra for python-pandas
It is highly recommended to install this subpackage, as its dependencies
provide speed improvements, especially when working with large data sets.
%package computation
Summary: The python pandas[computation] extra
Requires: python-pandas = %{version}
Requires: python-scipy >= 1.10.0
Requires: python-xarray >= 2022.12.0
BuildArch: noarch
%description computation
This package provides the [computation] extra for python-pandas
%package fss
Summary: The python pandas[fss] extra
Requires: python-fsspec >= 2022.11
Requires: python-pandas = %{version}
BuildArch: noarch
%description fss
This package provides the [fss] extra for python-pandas
%package aws
Summary: The python pandas[aws] extra
Requires: python-pandas = %{version}
Requires: python-s3fs >= 2022.11
BuildArch: noarch
%description aws
This package provides the [aws] extra for python-pandas
%package gcp
Summary: The python pandas[gcp] extra
Requires: python-gcsfs >= 2022.11
Requires: python-pandas = %{version}
Requires: python-pandas-gbq >= 0.19.0
BuildArch: noarch
%description gcp
This package provides the [gcp] extra for python-pandas
%package excel
Summary: The python pandas[excel] extra
Requires: python-odfpy >= 1.4.1
Requires: python-openpyxl >= 3.1.0
Requires: python-pandas = %{version}
%{?with_xlsb:Requires: python-pyxlsb >= 1.0.10}
Requires: python-XlsxWriter >= 3.0.5
Requires: python-xlrd >= 2.0.1
%{?with_calamine:Requires: python-calamine >= 0.1.7}
BuildArch: noarch
%description excel
This package provides the [excel] extra for python-pandas.
(Except for pyxlsb and calamine which are not available as openSUSE rpm package)
%package parquet
Summary: The python pandas[parquet] extra
Requires: python-pandas = %{version}
Requires: python-pyarrow >= 10.0.1
BuildArch: noarch
%description parquet
This package provides the [parquet] extra for python-pandas
%package feather
Summary: The python pandas[feather] extra
Requires: python-pandas = %{version}
Requires: python-pyarrow >= 10.0.1
BuildArch: noarch
%description feather
This package provides the [feather] extra for python-pandas
%package hdf5
Summary: The python pandas[hdf5] extra
Requires: python-blosc
Requires: python-pandas = %{version}
Requires: python-tables >= 3.8.0
BuildArch: noarch
%description hdf5
This package provides the [hdf5] extra for python-pandas
%package spss
Summary: The python pandas[spss] extra
Requires: python-pandas = %{version}
Requires: python-pyreadstat >= 1.2.0
BuildArch: noarch
%description spss
This package provides the [spss] extra for python-pandas
%package postgresql
Summary: The python pandas[postgresql] extra
Requires: python-SQLAlchemy >= 2.0.0
Requires: python-pandas = %{version}
Requires: python-psycopg2 >= 2.9.6
%{?with_adbc:Requires: python-adbc-driver-postgresql >= 0.8}
BuildArch: noarch
%description postgresql
This package provides the [postgresql] extra for python-pandas
%package mysql
Summary: The python pandas[mysql] extra
Requires: python-PyMySQL >= 1.0.2
Requires: python-SQLAlchemy >= 2.0.0
Requires: python-pandas = %{version}
BuildArch: noarch
%description mysql
This package provides the [mysql] extra for python-pandas
%package sql-other
Summary: The python pandas[sql-other] extra
Requires: python-SQLAlchemy >= 2.0.0
%{?with_adbc:Requires: python-adbc-driver-postgresql >= 0.8}
%{?with_adbc:Requires: python-adbc-driver-sqlite >= 0.8}
Requires: python-pandas = %{version}
BuildArch: noarch
%description sql-other
This package provides the [sql-other] extra for python-pandas
%package html
Summary: The python pandas[html] extra
Requires: python-beautifulsoup4 >= 4.11.2
Requires: python-html5lib >= 1.1
Requires: python-lxml >= 4.9.2
Requires: python-pandas = %{version}
BuildArch: noarch
%description html
This package provides the [html] extra for python-pandas
%package xml
Summary: The python pandas[xml] extra
Requires: python-lxml >= 4.9.2
Requires: python-pandas = %{version}
BuildArch: noarch
%description xml
This package provides the [xml] extra for python-pandas
%package plot
Summary: The python pandas[plot] extra
Requires: python-matplotlib >= 3.6.3
Requires: python-pandas = %{version}
BuildArch: noarch
%description plot
This package provides the [plot] extra for python-pandas
%package output_formatting
Summary: The python pandas[output_formatting] extra
Requires: python-Jinja2 >= 3.1.2
Requires: python-pandas = %{version}
Requires: python-tabulate >= 0.9.0
BuildArch: noarch
%description output_formatting
This package provides the [output_formatting] extra for python-pandas
%package clipboard
Summary: The python pandas[clipboard] extra
Requires: python-PyQt5 >= 5.15.9
Requires: python-QtPy >= 2.3.0
Requires: python-pandas = %{version}
BuildArch: noarch
%description clipboard
This package provides the [clipboard] extra for python-pandas
%package compression
Summary: The python pandas[compression] extra
Requires: python-pandas = %{version}
Requires: python-zstandard >= 0.19.0
BuildArch: noarch
%description compression
This package provides the [compression] extra for python-pandas
%package consortium-standard
Summary: The python pandas[consortium-standard] extra
Requires: python-dataframe-api-compat >= 0.1.7
Requires: python-pandas = %{version}
BuildArch: noarch
%description consortium-standard
This package provides the [consortium-standard] extra for python-pandas
%package all
Summary: The python pandas[all] extra
Requires: python-Bottleneck >= 1.3.6
Requires: python-Jinja2 >= 3.1.2
Requires: python-PyMySQL >= 1.0.2
Requires: python-PyQt5 >= 5.15.9
Requires: python-QtPy >= 2.3.0
Requires: python-SQLAlchemy >= 2
Requires: python-XlsxWriter >= 3.0.5
Requires: python-beautifulsoup4 >= 4.11.2
%{?with_adbc:Requires: python-adbc-driver-postgresql >= 0.8}
%{?with_adbc:Requires: python-adbc-driver-sqlite >= 0.8}
Requires: python-blosc
%{?with_calamine:Requires: python-calamine >= 0.1.7}
%{?with_pyarrow:Requires: python-fastparquet >= 2022.12}
Requires: python-fsspec >= 2022.11
Requires: python-gcsfs >= 2022.11
Requires: python-html5lib >= 1.1
Requires: python-hypothesis >= 6.46.1
Requires: python-lxml >= 4.9.2
Requires: python-matplotlib >= 3.6.3
Requires: python-numba >= 0.56.4
Requires: python-numexpr >= 2.8.4
Requires: python-odfpy >= 1.4.1
Requires: python-openpyxl >= 3.1.0
Requires: python-pandas = %{version}
Requires: python-psycopg2 >= 2.9.6
%{?with_pyarrow:Requires: python-pyarrow >= 10.0.1}
Requires: python-pyreadstat >= 1.2.0
Requires: python-pytest >= 7.3.2
Requires: python-pytest-xdist >= 2.2.0
Requires: python-scipy >= 1.10.0
Requires: python-tables >= 3.8.0
Requires: python-tabulate >= 0.9
Requires: python-xarray >= 2022.12
Requires: python-xlrd >= 2.0.1
Requires: python-zstandard >= 0.19.0
%{?with_aws:Requires: python-s3fs >= 2022.05.0}
%{?with_gcp:Requires: python-pandas-gbq >= 0.19}
%{?with_xslb:Requires: python-pyxlsb >= 1.0.10}
%{?with_consortium_standard: Requires: python-dataframe-api-compat >= 0.1.7}
BuildArch: noarch
%description all
This package provides most the [all] extra for python-pandas
Some requirements defined in the PyPI package are left out
because they are not available as openSUSE RPM packages:
* pandas-gbq
* pyxlsb
* s3fs
* dataframe-api-compat
* adbc-driver-postgresql
* adbc-driver-sqlite
* calamine
You can install them directly through `pip%{python_bin_suffix} install --user`, if needed.
%prep
# ATTENTION: unpack and generate _version_meson.py before any patches and modifications for a clean version
%setup -q -n pandas-%{version}
%if !%{with test}
# use the last one from the buildset: need versioneer installed
%python_expand genpython="%__$python"
${genpython} generate_version.py -o _version_meson.py
sed -i "s|'generate_version.py',|'${genpython}', 'generate_version.py',|" meson.build
# don't require the PyPI data only tzdata package, we use the timezone RPM package
sed -i '/dependencies = \[/,/\]/ {/tzdata.*>=/d}' pyproject.toml
%endif
%autopatch -p1
%build
%if !%{with test}
%{?gccver:export CXX=g++-%{gccver}}
%{?gccver:export CC=gcc-%{gccver}}
export CFLAGS="%{optflags} -fno-strict-aliasing"
%pyproject_wheel
%endif
%install
%if !%{with test}
%pyproject_install
%{python_expand #
find %{buildroot}%{$python_sitearch}/pandas/_libs -name '*.[ch]' -delete
sed -i -e '/.[ch],/d' %{buildroot}%{$python_sitearch}/pandas-%{version}.dist-info/RECORD
%fdupes %{buildroot}%{$python_sitearch}
}
%else
# Copy the installed package back into the source tree
# This is equivalent to build and install editable (pip install -e .), and the only way
# to have a passing test suite due to how the test collection works in pytest >= 7.
# Only works for separate python flavors in multibuild.
%python_expand cp -rf %{$python_sitearch}/pandas/* pandas/
%endif
%check
%if %{with test}
export LANG=en_US.UTF-8
export LC_ALL=en_US.UTF-8
export PYTHONDONTWRITEBYTECODE=1
# Workaround for pytest-xdist flaky collection order
# https://github.com/pytest-dev/pytest/issues/920
# https://github.com/pytest-dev/pytest/issues/1075
export PYTHONHASHSEED=1
# no network connection on obs
SKIP_MARKERS="network"
# clipboard not set up properly in build service without window manager
SKIP_MARKERS+=" or clipboard"
# skip tests which upstream marked for -n 1 only.
SKIP_MARKERS+=" or single_cpu"
# pytest-xdist worker crash
SKIP_TESTS="test_pivot_number_of_levels_larger_than_int32"
# no locally running database server
SKIP_TESTS+=" or psycopg2_engine or psycopg2_conn or pymysql_engine or pymysql_conn"
SKIP_TESTS+=" or test_psycopg2_schema_support"
SKIP_TESTS+=" or test_self_join_date_columns"
# expects a dirty git revision from git repo
SKIP_TESTS+=" or test_git_version"
# https://github.com/pandas-dev/pandas/pull/57391, proposed change is not necessarily the right one
%if "%{flavor}" == "test-py312" || "%{flavor}" == "test-py313"
SKIP_TESTS+=" or (test_scalar_unary and numexpr-pandas)"
%endif
# https://github.com/pandas-dev/pandas/pull/55901, not gonna merge this huge patch to fix one test failing with new timezone, will be included in 3.0
SKIP_TESTS+=" or test_array_inference[data7-expected7]"
# too new xarray, gh#pandas-dev/pandas#60109 backport too much
SKIP_TESTS+=" or (TestDataFrameToXArray and test_to_xarray_index_types)"
# xpass strict: our xarray seems to handle this fine
SKIP_TESTS+=" or (TestSeriesToXArray and test_to_xarray_index_types)"
%ifarch %{ix86} %{arm32}
# https://github.com/pandas-dev/pandas/issues/31856
SKIP_TESTS+=" or test_maybe_promote_int_with_int"
# rounding error
SKIP_TESTS+=" or (test_rolling_quantile_interpolation_options and data1 and linear and 0.1)"
# overflow
SKIP_TESTS+=" or test_large_string_pyarrow"
SKIP_TESTS+=" or test_pandas_nullable_with_missing_values"
SKIP_TESTS+=" or test_pandas_nullable_without_missing_values"
SKIP_TESTS+=" or (test_to_datetime and TestOrigin and test_epoch)"
SKIP_TESTS+=" or test_td_mul_numeric_ndarray_0d"
SKIP_TESTS+=" or test_get_indexer_non_unique_wrong_dtype"
# pyarrow read-only errors
SKIP_TESTS+=" or test_left_join_multi_index"
SKIP_TESTS+=" or test_join_on_single_col_dup_on_right"
# dtype mismatch
SKIP_TESTS+=" or test_frame_setitem_dask_array_into_new_col"
SKIP_TESTS+=" or test_get_indexer_arrow_dictionary_target"
# numba formats not supported on 32-bit
SKIP_TESTS+=" or numba"
%endif
%ifarch %{ix86}
# overflows on i586
SKIP_TESTS+=" or test_encode_non_c_locale"
# intp != int32 (still numpy 1)?
SKIP_TESTS+=" or test_ensure_platform_int"
# fails on i586 (was gcc10-skip-one-test.patch)
SKIP_TESTS+=" or test_merge_on_ints_floats_warning"
%endif
%ifarch ppc64 s390x
# big endian type issues
SKIP_TESTS+=" or test_astype"
SKIP_TESTS+=" or test_to_numpy_string"
SKIP_TESTS+=" or (test_construction and test_to_numpy)"
SKIP_TESTS+=" or test_to_records_index_name"
SKIP_TESTS+=" or test_to_records_dtype"
SKIP_TESTS+=" or test_to_records_dict_like"
SKIP_TESTS+=" or (test_c_parser_only and test_unsupported_dtype)"
SKIP_TESTS+=" or test_td_mul_td64_ndarray_invalid"
%endif
%ifnarch x86_64
# type and numeric precision issues, partially reported for arm and marked xfail upstream but not for e.g. ppc
SKIP_TESTS+=" or (test_astype and test_subtype_integer_errors)"
SKIP_TESTS+=" or (test_to_numeric and test_downcast_nullable_numeric and data12-UInt64-signed-UInt64)"
SKIP_TESTS+=" or (test_rolling and test_rolling_var_numerical_issues)"
SKIP_TESTS+=" or (test_groupby and test_groupby_numerical_stability_sum_mean)"
SKIP_TESTS+=" or (test_groupby and test_groupby_numerical_stability_cumsum)"
SKIP_TESTS+=" or (test_c_parser_only and test_float_precision_options)"
# run the slow tests only on x86_64
SKIP_MARKERS+=" or slow or db"
%endif
# The test collection consumes a lot of memory per worker. This sets %%jobs.
%limit_build -m 3072
%{python_expand $python -c 'import pandas; print(pandas.__path__); print(pandas.show_versions())'
# cache: can't just say no cacheprovider, because one test checks for the --lf option of pytest-cache
xvfb-run pytest-%{$python_bin_suffix} -v -n %{jobs} -rsfE --dist=loadfile \
-o cache_dir=$PWD/.pytest_cache --cache-clear \
-m "not (${SKIP_MARKERS})" \
-k "not (${SKIP_TESTS})" \
pandas
}
%endif
%if !%{with test}
%files %{python_files}
%license LICENSE
%doc README.md
%{python_sitearch}/pandas/
%{python_sitearch}/pandas-%{version}.dist-info
%files %{python_files test}
%license LICENSE
%doc README.md
%if !%{with ringdisabled}
%files %{python_files pyarrow}
%license LICENSE
%doc README.md
%files %{python_files performance}
%license LICENSE
%doc README.md
%if 0%{python_version_nodots} >= 310
%files %{python_files computation}
%license LICENSE
%doc README.md
%endif
%files %{python_files fss}
%license LICENSE
%doc README.md
%if %{with aws}
%files %{python_files aws}
%license LICENSE
%doc README.md
%endif
%if %{with gcp}
%files %{python_files gcp}
%license LICENSE
%doc README.md
%endif
%files %{python_files excel}
%license LICENSE
%doc README.md
%if %{with pyarrow}
%files %{python_files parquet}
%license LICENSE
%doc README.md
%endif
%if %{with pyarrow}
%files %{python_files feather}
%license LICENSE
%doc README.md
%endif
%files %{python_files hdf5}
%license LICENSE
%doc README.md
%files %{python_files spss}
%license LICENSE
%doc README.md
%files %{python_files postgresql}
%license LICENSE
%doc README.md
%files %{python_files mysql}
%license LICENSE
%doc README.md
%files %{python_files sql-other}
%license LICENSE
%doc README.md
%files %{python_files html}
%license LICENSE
%doc README.md
%files %{python_files xml}
%license LICENSE
%doc README.md
%files %{python_files plot}
%license LICENSE
%doc README.md
%files %{python_files output_formatting}
%license LICENSE
%doc README.md
%files %{python_files clipboard}
%license LICENSE
%doc README.md
%files %{python_files compression}
%license LICENSE
%doc README.md
%if %{with consortium_standard}
%files %{python_files consortium-standard}
%license LICENSE
%doc README.md
%endif
%files %{python_files all}
%license LICENSE
%doc README.md
%endif
%endif
%changelog

75
tests-npdev.patch Normal file
View File

@@ -0,0 +1,75 @@
From 2536d3a736eea96b9da8b774e671516eb8f25f4a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 24 Apr 2024 07:26:56 -1000
Subject: [PATCH] CI: Fix npdev failures (#58389)
* CI: Fix npdev failures
* Use unique index, make array writable
* Update pandas/_libs/hashtable_class_helper.pxi.in
* Update pandas/tests/arrays/test_datetimelike.py
* Update pandas/tests/arrays/test_datetimelike.py
---
pandas/tests/arrays/test_datetimelike.py | 8 ++++++--
pandas/tests/extension/base/missing.py | 2 ++
pandas/tests/indexes/test_base.py | 4 ++--
3 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index 22c63af59a47c..3d8f8d791b763 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -661,7 +661,9 @@ def test_array_interface(self, datetime_index):
assert result is expected
tm.assert_numpy_array_equal(result, expected)
result = np.array(arr, dtype="datetime64[ns]")
- assert result is not expected
+ if not np_version_gt2:
+ # TODO: GH 57739
+ assert result is not expected
tm.assert_numpy_array_equal(result, expected)
# to object dtype
@@ -976,7 +978,9 @@ def test_array_interface(self, timedelta_index):
assert result is expected
tm.assert_numpy_array_equal(result, expected)
result = np.array(arr, dtype="timedelta64[ns]")
- assert result is not expected
+ if not np_version_gt2:
+ # TODO: GH 57739
+ assert result is not expected
tm.assert_numpy_array_equal(result, expected)
# to object dtype
diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py
index 4b9234a9904a2..cee565d4f7c1e 100644
--- a/pandas/tests/extension/base/missing.py
+++ b/pandas/tests/extension/base/missing.py
@@ -27,7 +27,9 @@ def test_isna_returns_copy(self, data_missing, na_func):
expected = result.copy()
mask = getattr(result, na_func)()
if isinstance(mask.dtype, pd.SparseDtype):
+ # TODO: GH 57739
mask = np.array(mask)
+ mask.flags.writeable = True
mask[:] = True
tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 04858643d97b1..2e94961b673f8 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -71,8 +71,8 @@ def test_constructor_casting(self, index):
tm.assert_contains_all(arr, new_index)
tm.assert_index_equal(index, new_index)
- @pytest.mark.parametrize("index", ["string"], indirect=True)
- def test_constructor_copy(self, index, using_infer_string):
+ def test_constructor_copy(self, using_infer_string):
+ index = Index(list("abc"), name="name")
arr = np.array(index)
new_index = Index(arr, copy=True, name="name")
assert isinstance(new_index, Index)

55
tests-timedelta.patch Normal file
View File

@@ -0,0 +1,55 @@
From d0cb2056d0b27080b2f5cc0b88db8d263f684230 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 7 Aug 2024 10:49:25 -1000
Subject: [PATCH] COMPAT: Fix numpy 2.1 timedelta * DateOffset (#59441)
---
pandas/core/arrays/timedeltas.py | 8 ++++++++
pandas/tests/arithmetic/test_timedelta64.py | 8 +++++++-
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 15bfe442ca87f..83cc2871f5459 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -467,6 +467,10 @@ def __mul__(self, other) -> Self:
if is_scalar(other):
# numpy will accept float and int, raise TypeError for others
result = self._ndarray * other
+ if result.dtype.kind != "m":
+ # numpy >= 2.1 may not raise a TypeError
+ # and seems to dispatch to others.__rmul__?
+ raise TypeError(f"Cannot multiply with {type(other).__name__}")
freq = None
if self.freq is not None and not isna(other):
freq = self.freq * other
@@ -494,6 +498,10 @@ def __mul__(self, other) -> Self:
# numpy will accept float or int dtype, raise TypeError for others
result = self._ndarray * other
+ if result.dtype.kind != "m":
+ # numpy >= 2.1 may not raise a TypeError
+ # and seems to dispatch to others.__rmul__?
+ raise TypeError(f"Cannot multiply with {type(other).__name__}")
return type(self)._simple_new(result, dtype=result.dtype)
__rmul__ = __mul__
diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py
index 4583155502374..87e085fb22878 100644
--- a/pandas/tests/arithmetic/test_timedelta64.py
+++ b/pandas/tests/arithmetic/test_timedelta64.py
@@ -1460,7 +1460,13 @@ def test_td64arr_mul_int(self, box_with_array):
def test_td64arr_mul_tdlike_scalar_raises(self, two_hours, box_with_array):
rng = timedelta_range("1 days", "10 days", name="foo")
rng = tm.box_expected(rng, box_with_array)
- msg = "argument must be an integer|cannot use operands with types dtype"
+ msg = "|".join(
+ [
+ "argument must be an integer",
+ "cannot use operands with types dtype",
+ "Cannot multiply with",
+ ]
+ )
with pytest.raises(TypeError, match=msg):
rng * two_hours

22
timedelta.patch Normal file
View File

@@ -0,0 +1,22 @@
From 0b6cece3acda1ae6e4f582d8276851b02aeac1ea Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 25 Nov 2024 11:35:37 -0800
Subject: [PATCH] TST: Avoid hashing np.timedelta64 without unit (#60416)
---
pandas/tests/test_algos.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: pandas-2.2.3/pandas/tests/test_algos.py
===================================================================
--- pandas-2.2.3.orig/pandas/tests/test_algos.py
+++ pandas-2.2.3/pandas/tests/test_algos.py
@@ -1280,7 +1280,7 @@ class TestValueCounts:
result_dt = algos.value_counts(dt)
tm.assert_series_equal(result_dt, exp_dt)
- exp_td = Series({np.timedelta64(10000): 1}, name="count")
+ exp_td = Series([1], index=[np.timedelta64(10000)], name="count")
with tm.assert_produces_warning(FutureWarning, match=msg):
result_td = algos.value_counts(td)
tm.assert_series_equal(result_td, exp_td)