From 60349b6793e043adfc39460742a89f11165a65e50d921fb41a45e41f1dbedc28 Mon Sep 17 00:00:00 2001 From: Dirk Mueller Date: Sun, 8 Sep 2024 16:12:30 +0000 Subject: [PATCH] - Drop pandas-pr58720-xarray-dp.patch: It does no longer xfail OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:numeric/python-pandas?expand=0&rev=127 --- .gitattributes | 23 + .gitignore | 1 + _constraints | 8 + _multibuild | 5 + _service | 16 + pandas-2.2.2.tar.gz | 3 + pandas-pr58269-pyarrow16xpass.patch | 40 + pandas-pr58484-matplotlib.patch | 71 + pandas-pr58720-xarray-dp.patch | 41 + pandas-pr59175-matplotlib.patch | 29 + pandas-pr59353-np2eval.patch | 174 ++ python-pandas.changes | 4141 +++++++++++++++++++++++++++ python-pandas.spec | 703 +++++ 13 files changed, 5255 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 _constraints create mode 100644 _multibuild create mode 100644 _service create mode 100644 pandas-2.2.2.tar.gz create mode 100644 pandas-pr58269-pyarrow16xpass.patch create mode 100644 pandas-pr58484-matplotlib.patch create mode 100644 pandas-pr58720-xarray-dp.patch create mode 100644 pandas-pr59175-matplotlib.patch create mode 100644 pandas-pr59353-np2eval.patch create mode 100644 python-pandas.changes create mode 100644 python-pandas.spec diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57affb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.osc diff --git a/_constraints b/_constraints new file mode 100644 index 0000000..c8f3460 --- /dev/null +++ b/_constraints @@ -0,0 +1,8 @@ + + + + 12 + + 4 + + diff --git a/_multibuild b/_multibuild new file mode 100644 index 0000000..6d8cafe --- /dev/null +++ b/_multibuild @@ -0,0 +1,5 @@ + + test-py310 + test-py311 + test-py312 + diff --git a/_service b/_service new file mode 100644 index 0000000..34fed70 --- /dev/null +++ b/_service @@ -0,0 +1,16 @@ + + + https://github.com/pandas-dev/pandas.git + git + v2.2.2 + @PARENT_TAG@ + v(.*) + pandas + yes + + + *.tar + gz + + + diff --git a/pandas-2.2.2.tar.gz b/pandas-2.2.2.tar.gz new file mode 100644 index 0000000..735fe58 --- /dev/null +++ b/pandas-2.2.2.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f044538e419c7d5c03434c96b1439cbd88701dcd02d6a79b08947fbb656c2f4 +size 50782448 diff --git a/pandas-pr58269-pyarrow16xpass.patch b/pandas-pr58269-pyarrow16xpass.patch new file mode 100644 index 0000000..620b2b7 --- /dev/null +++ b/pandas-pr58269-pyarrow16xpass.patch @@ -0,0 +1,40 @@ +From 1828b62ee913da44ec4402642ef7baaafeb65677 Mon Sep 17 00:00:00 2001 +From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> +Date: Mon, 15 Apr 2024 09:47:31 -1000 +Subject: [PATCH] Backport PR #58268: CI/TST: Unxfail + test_slice_locs_negative_step Pyarrow test + +--- + pandas/tests/indexes/object/test_indexing.py | 12 +----------- + 1 file changed, 1 insertion(+), 11 deletions(-) + +diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py +index 443cacf94d239..ebf9dac715f8d 100644 +--- a/pandas/tests/indexes/object/test_indexing.py ++++ b/pandas/tests/indexes/object/test_indexing.py +@@ -7,7 +7,6 @@ + NA, + is_matching_na, + ) +-from pandas.compat import pa_version_under16p0 + import pandas.util._test_decorators as td + + import pandas as pd +@@ -201,16 +200,7 @@ class TestSliceLocs: + (pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc] + ], + ) +- def test_slice_locs_negative_step(self, in_slice, expected, dtype, request): +- if ( +- not pa_version_under16p0 +- and dtype == "string[pyarrow_numpy]" +- and in_slice == slice("a", "a", -1) +- ): +- request.applymarker( +- pytest.mark.xfail(reason="https://github.com/apache/arrow/issues/40642") +- ) +- ++ def test_slice_locs_negative_step(self, in_slice, expected, dtype): + index = Index(list("bcdxy"), dtype=dtype) + + s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step) diff --git a/pandas-pr58484-matplotlib.patch b/pandas-pr58484-matplotlib.patch new file mode 100644 index 0000000..1434543 --- /dev/null +++ b/pandas-pr58484-matplotlib.patch @@ -0,0 +1,71 @@ +From 0cab756077f5291f8d6a7fcfacaf374f62b866a0 Mon Sep 17 00:00:00 2001 +From: Elliott Sales de Andrade +Date: Mon, 29 Apr 2024 23:11:21 -0400 +Subject: [PATCH 1/2] Remove deprecated plot_date calls + +These were deprecated in Matplotlib 3.9. +--- + pandas/tests/plotting/test_datetimelike.py | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py +index 6b709522bab70..b91bde41bf4c4 100644 +--- a/pandas/tests/plotting/test_datetimelike.py ++++ b/pandas/tests/plotting/test_datetimelike.py +@@ -1432,13 +1432,11 @@ def test_mpl_nopandas(self): + values1 = np.arange(10.0, 11.0, 0.5) + values2 = np.arange(11.0, 12.0, 0.5) + +- kw = {"fmt": "-", "lw": 4} +- + _, ax = mpl.pyplot.subplots() +- ax.plot_date([x.toordinal() for x in dates], values1, **kw) +- ax.plot_date([x.toordinal() for x in dates], values2, **kw) +- +- line1, line2 = ax.get_lines() ++ line1, line2, = ax.plot( ++ [x.toordinal() for x in dates], values1, "-", ++ [x.toordinal() for x in dates], values2, "-", ++ linewidth=4) + + exp = np.array([x.toordinal() for x in dates], dtype=np.float64) + tm.assert_numpy_array_equal(line1.get_xydata()[:, 0], exp) + +From 6d6574c4e71e3bab91503f85c8aa80c927785865 Mon Sep 17 00:00:00 2001 +From: "pre-commit-ci[bot]" + <66853113+pre-commit-ci[bot]@users.noreply.github.com> +Date: Tue, 30 Apr 2024 16:47:26 +0000 +Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks + +for more information, see https://pre-commit.ci +--- + pandas/tests/plotting/test_datetimelike.py | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py +index b91bde41bf4c4..4b4eeada58366 100644 +--- a/pandas/tests/plotting/test_datetimelike.py ++++ b/pandas/tests/plotting/test_datetimelike.py +@@ -1433,10 +1433,18 @@ def test_mpl_nopandas(self): + values2 = np.arange(11.0, 12.0, 0.5) + + _, ax = mpl.pyplot.subplots() +- line1, line2, = ax.plot( +- [x.toordinal() for x in dates], values1, "-", +- [x.toordinal() for x in dates], values2, "-", +- linewidth=4) ++ ( ++ line1, ++ line2, ++ ) = ax.plot( ++ [x.toordinal() for x in dates], ++ values1, ++ "-", ++ [x.toordinal() for x in dates], ++ values2, ++ "-", ++ linewidth=4, ++ ) + + exp = np.array([x.toordinal() for x in dates], dtype=np.float64) + tm.assert_numpy_array_equal(line1.get_xydata()[:, 0], exp) diff --git a/pandas-pr58720-xarray-dp.patch b/pandas-pr58720-xarray-dp.patch new file mode 100644 index 0000000..2bed97a --- /dev/null +++ b/pandas-pr58720-xarray-dp.patch @@ -0,0 +1,41 @@ +From d36f6dac81b577504386b53357270d9f05a9bc89 Mon Sep 17 00:00:00 2001 +From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> +Date: Tue, 14 May 2024 09:04:20 -1000 +Subject: [PATCH] Backport PR #58719: CI: xfail test_to_xarray_index_types due + to new 2024.5 release + +--- + pandas/tests/generic/test_to_xarray.py | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py +index d8401a8b2ae3f..491f621783a76 100644 +--- a/pandas/tests/generic/test_to_xarray.py ++++ b/pandas/tests/generic/test_to_xarray.py +@@ -9,6 +9,7 @@ + date_range, + ) + import pandas._testing as tm ++from pandas.util.version import Version + + pytest.importorskip("xarray") + +@@ -29,11 +30,17 @@ def df(self): + } + ) + +- def test_to_xarray_index_types(self, index_flat, df, using_infer_string): ++ def test_to_xarray_index_types(self, index_flat, df, using_infer_string, request): + index = index_flat + # MultiIndex is tested in test_to_xarray_with_multiindex + if len(index) == 0: + pytest.skip("Test doesn't make sense for empty index") ++ import xarray ++ ++ if Version(xarray.__version__) >= Version("2024.5"): ++ request.applymarker( ++ pytest.mark.xfail(reason="https://github.com/pydata/xarray/issues/9026") ++ ) + + from xarray import Dataset + diff --git a/pandas-pr59175-matplotlib.patch b/pandas-pr59175-matplotlib.patch new file mode 100644 index 0000000..80ea265 --- /dev/null +++ b/pandas-pr59175-matplotlib.patch @@ -0,0 +1,29 @@ +From d4e803caf7aabd464f6fb1d43ef39903911a3cec Mon Sep 17 00:00:00 2001 +From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> +Date: Wed, 3 Jul 2024 06:45:24 -1000 +Subject: [PATCH] Backport PR #59168: TST: Address UserWarning in matplotlib + test + +--- + pandas/plotting/_matplotlib/core.py | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py +index 2979903edf360..52382d9f7d572 100644 +--- a/pandas/plotting/_matplotlib/core.py ++++ b/pandas/plotting/_matplotlib/core.py +@@ -893,7 +893,13 @@ def _make_legend(self) -> None: + elif self.subplots and self.legend: + for ax in self.axes: + if ax.get_visible(): +- ax.legend(loc="best") ++ with warnings.catch_warnings(): ++ warnings.filterwarnings( ++ "ignore", ++ "No artists with labels found to put in legend.", ++ UserWarning, ++ ) ++ ax.legend(loc="best") + + @final + @staticmethod diff --git a/pandas-pr59353-np2eval.patch b/pandas-pr59353-np2eval.patch new file mode 100644 index 0000000..0a99a76 --- /dev/null +++ b/pandas-pr59353-np2eval.patch @@ -0,0 +1,174 @@ +diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py +index 361998db8e..87d419e2db 100644 +--- a/pandas/_testing/__init__.py ++++ b/pandas/_testing/__init__.py +@@ -111,6 +111,7 @@ ALL_FLOAT_DTYPES: list[Dtype] = [*FLOAT_NUMPY_DTYPES, *FLOAT_EA_DTYPES] + + COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"] + STRING_DTYPES: list[Dtype] = [str, "str", "U"] ++COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES] + + DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"] + TIMEDELTA64_DTYPES: list[Dtype] = ["timedelta64[ns]", "m8[ns]"] +diff --git a/pandas/conftest.py b/pandas/conftest.py +index 7c35dfdde9..10134c90f8 100644 +--- a/pandas/conftest.py ++++ b/pandas/conftest.py +@@ -1403,6 +1403,21 @@ def complex_dtype(request): + return request.param + + ++@pytest.fixture(params=tm.COMPLEX_FLOAT_DTYPES) ++def complex_or_float_dtype(request): ++ """ ++ Parameterized fixture for complex and numpy float dtypes. ++ ++ * complex ++ * 'complex64' ++ * 'complex128' ++ * float ++ * 'float32' ++ * 'float64' ++ """ ++ return request.param ++ ++ + @pytest.fixture(params=tm.SIGNED_INT_NUMPY_DTYPES) + def any_signed_int_numpy_dtype(request): + """ +diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py +index b5861fbaeb..d642c37cea 100644 +--- a/pandas/core/computation/expr.py ++++ b/pandas/core/computation/expr.py +@@ -31,7 +31,6 @@ from pandas.core.computation.ops import ( + UNARY_OPS_SYMS, + BinOp, + Constant, +- Div, + FuncNode, + Op, + Term, +@@ -370,7 +369,7 @@ class BaseExprVisitor(ast.NodeVisitor): + "Add", + "Sub", + "Mult", +- None, ++ "Div", + "Pow", + "FloorDiv", + "Mod", +@@ -533,9 +532,6 @@ class BaseExprVisitor(ast.NodeVisitor): + left, right = self._maybe_downcast_constants(left, right) + return self._maybe_evaluate_binop(op, op_class, left, right) + +- def visit_Div(self, node, **kwargs): +- return lambda lhs, rhs: Div(lhs, rhs) +- + def visit_UnaryOp(self, node, **kwargs): + op = self.visit(node.op) + operand = self.visit(node.operand) +diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py +index 95ac20ba39..ea8b1c0457 100644 +--- a/pandas/core/computation/ops.py ++++ b/pandas/core/computation/ops.py +@@ -332,31 +332,6 @@ for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict): + _binary_ops_dict.update(d) + + +-def _cast_inplace(terms, acceptable_dtypes, dtype) -> None: +- """ +- Cast an expression inplace. +- +- Parameters +- ---------- +- terms : Op +- The expression that should cast. +- acceptable_dtypes : list of acceptable numpy.dtype +- Will not cast if term's dtype in this list. +- dtype : str or numpy.dtype +- The dtype to cast to. +- """ +- dt = np.dtype(dtype) +- for term in terms: +- if term.type in acceptable_dtypes: +- continue +- +- try: +- new_value = term.value.astype(dt) +- except AttributeError: +- new_value = dt.type(term.value) +- term.update(new_value) +- +- + def is_term(obj) -> bool: + return isinstance(obj, Term) + +@@ -516,31 +491,6 @@ class BinOp(Op): + def isnumeric(dtype) -> bool: + return issubclass(np.dtype(dtype).type, np.number) + +- +-class Div(BinOp): +- """ +- Div operator to special case casting. +- +- Parameters +- ---------- +- lhs, rhs : Term or Op +- The Terms or Ops in the ``/`` expression. +- """ +- +- def __init__(self, lhs, rhs) -> None: +- super().__init__("/", lhs, rhs) +- +- if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type): +- raise TypeError( +- f"unsupported operand type(s) for {self.op}: " +- f"'{lhs.return_type}' and '{rhs.return_type}'" +- ) +- +- # do not upcast float32s to float64 un-necessarily +- acceptable_dtypes = [np.float32, np.float64] +- _cast_inplace(com.flatten(self), acceptable_dtypes, np.float64) +- +- + UNARY_OPS_SYMS = ("+", "-", "~", "not") + _unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert) + _unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs)) +diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py +index 17630f14b0..e8fad6b8cb 100644 +--- a/pandas/tests/computation/test_eval.py ++++ b/pandas/tests/computation/test_eval.py +@@ -747,16 +747,26 @@ class TestTypeCasting: + @pytest.mark.parametrize("op", ["+", "-", "*", "**", "/"]) + # maybe someday... numexpr has too many upcasting rules now + # chain(*(np.core.sctypes[x] for x in ['uint', 'int', 'float'])) +- @pytest.mark.parametrize("dt", [np.float32, np.float64]) + @pytest.mark.parametrize("left_right", [("df", "3"), ("3", "df")]) +- def test_binop_typecasting(self, engine, parser, op, dt, left_right): +- df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)), dtype=dt) ++ def test_binop_typecasting( ++ self, engine, parser, op, complex_or_float_dtype, left_right, request ++ ): ++ # GH#21374 ++ dtype = complex_or_float_dtype ++ df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)), dtype=dtype) + left, right = left_right + s = f"{left} {op} {right}" + res = pd.eval(s, engine=engine, parser=parser) +- assert df.values.dtype == dt +- assert res.values.dtype == dt +- tm.assert_frame_equal(res, eval(s)) ++ if dtype == "complex64" and engine == "numexpr": ++ mark = pytest.mark.xfail( ++ reason="numexpr issue with complex that are upcast " ++ "to complex 128 " ++ "https://github.com/pydata/numexpr/issues/492" ++ ) ++ request.applymarker(mark) ++ assert df.values.dtype == dtype ++ assert res.values.dtype == dtype ++ tm.assert_frame_equal(res, eval(s), check_exact=False) + + + # ------------------------------------- diff --git a/python-pandas.changes b/python-pandas.changes new file mode 100644 index 0000000..a8443b7 --- /dev/null +++ b/python-pandas.changes @@ -0,0 +1,4141 @@ +------------------------------------------------------------------- +Sun Sep 8 10:02:47 UTC 2024 - Ben Greiner + +- Drop pandas-pr58720-xarray-dp.patch: It does no longer xfail + +------------------------------------------------------------------- +Wed Aug 28 13:53:55 UTC 2024 - Ben Greiner + +- Skip overflowing tests on 32-bit + +------------------------------------------------------------------- +Sun Aug 25 21:22:36 UTC 2024 - Ben Greiner + +- Add pandas-pr59353-np2eval.patch + * gh#pandas-dev/pandas#59353 + * gh#pandas-dev/pandas#58548 + +------------------------------------------------------------------- +Thu Jul 11 09:51:36 UTC 2024 - Ben Greiner + +- Add pandas-pr59175-matplotlib.patch -- gh#pandas-dev/pandas#59175 + +------------------------------------------------------------------- +Sun May 12 17:57:39 UTC 2024 - Matej Cepl + +- Add pandas-pr58269-pyarrow16xpass.patch + (gh#pandas-dev/pandas!58269) +- Add pandas-pr58720-xarray-dp.patch + (gh#pandas-dev/pandas!58720), which makes pandas compatible + with the modern xarray +- Add pandas-pr58484-matplotlib.patch + (gh#pandas-dev/pandas!58484), which makes pandas compatible + with the modern matplotlib +- Skip also test_plot_scatter_shape (gh#pandas-dev/pandas#58851) + +------------------------------------------------------------------- +Thu May 9 23:44:42 UTC 2024 - Matej Cepl + +- Skip build on Python 3.10 ... too many dependencies are missing. + +------------------------------------------------------------------- +Tue Apr 30 18:08:56 UTC 2024 - Ben Greiner + +- Update to 2.2.2 + * Pandas 2.2.2 is now compatible with numpy 2.0 + * Pandas 2.2.2 is the first version of pandas that is generally + compatible with the upcoming numpy 2.0 release, and wheels for + pandas 2.2.2 will work with both numpy 1.x and 2.x. One major + caveat is that arrays created with numpy 2.0’s new StringDtype + will convert to object dtyped arrays upon Series/DataFrame + creation. Full support for numpy 2.0’s StringDtype is expected + to land in pandas 3.0. + * As usual please report any bugs discovered to our issue tracker + ## Fixed regressions + * DataFrame.__dataframe__() was producing incorrect data buffers + when the a column’s type was a pandas nullable on with missing + values (GH 56702) + * DataFrame.__dataframe__() was producing incorrect data buffers + when the a column’s type was a pyarrow nullable on with missing + values (GH 57664) + * Avoid issuing a spurious DeprecationWarning when a custom + DataFrame or Series subclass method is called (GH 57553) + * Fixed regression in precision of to_datetime() with string and + unit input (GH 57051) + ## Bug fixes + * DataFrame.__dataframe__() was producing incorrect data buffers + when the column’s type was nullable boolean (GH 55332) + * DataFrame.__dataframe__() was showing bytemask instead of + bitmask for 'string[pyarrow]' validity buffer (GH 57762) + * DataFrame.__dataframe__() was showing non-null validity buffer + (instead of None) 'string[pyarrow]' without missing values (GH + 57761) + * DataFrame.to_sql() was failing to find the right table when + using the schema argument (GH 57539) +- Remove obsolete python39 multibuild +- Add pandas-pr58269-pyarrow16xpass.patch + gh#pandas-dev/pandas#58269 + +------------------------------------------------------------------- +Mon Mar 4 20:44:10 UTC 2024 - Ben Greiner + +- No xarrary for python 3.9 anymore: Remove from pandas[all] and + exclude pandas[computation]. Reenable testing to check it. + It will be skipped automatically when python39 is dropped from + Tumbleweed globally. +- Fix 15.x build: requires newer compiler +- Fix 15.x test skips: sle15_python_module_pythons needs to be + declared earlier + +------------------------------------------------------------------- +Fri Feb 23 20:04:59 UTC 2024 - Ben Greiner + +- Update to 2.2.1 + ## Enhancements + * Added pyarrow pip extra so users can install pandas and pyarrow + with pip with pip install pandas[pyarrow] (#54466) + ## Fixed regressions + * Fixed memory leak in `read_csv` (#57039) + * Fixed performance regression in `Series.combine_first` (#55845) + * Fixed regression causing overflow for near-minimum timestamps + (#57150) + * Fixed regression in `concat` changing long-standing behavior + that always sorted the non-concatenation axis when the axis was + a `DatetimeIndex` (#57006) + * Fixed regression in `merge_ordered` raising TypeError for + fill_method="ffill" and how="left" (#57010) + * Fixed regression in `pandas.testing.assert_series_equal` + defaulting to check_exact=True when checking the `Index` + (#57067) + * Fixed regression in `read_json` where an `Index` would be + returned instead of a `RangeIndex` (#57429) + * Fixed regression in `wide_to_long` raising an AttributeError + for string columns (#57066) + * Fixed regression in `.DataFrameGroupBy.idxmin`, + `.DataFrameGroupBy.idxmax`, `.SeriesGroupBy.idxmin`, + `.SeriesGroupBy.idxmax` ignoring the skipna argument (#57040) + * Fixed regression in `.DataFrameGroupBy.idxmin`, + `.DataFrameGroupBy.idxmax`, `.SeriesGroupBy.idxmin`, + `.SeriesGroupBy.idxmax` where values containing the minimum or + maximum value for the dtype could produce incorrect results + (#57040) + * Fixed regression in `CategoricalIndex.difference` raising + KeyError when other contains null values other than NaN + (#57318) + * Fixed regression in `DataFrame.groupby` raising ValueError when + grouping by a `Series` in some cases (#57276) + * Fixed regression in `DataFrame.loc` raising IndexError for + non-unique, masked dtype indexes where result has more than + 10,000 rows (#57027) + * Fixed regression in `DataFrame.loc` which was unnecessarily + throwing "incompatible dtype warning" when expanding with + partial row indexer and multiple columns (see PDEP6) (#56503) + * Fixed regression in `DataFrame.map` with na_action="ignore" not + being respected for NumPy nullable and `ArrowDtypes` (#57316) + * Fixed regression in `DataFrame.merge` raising ValueError for + certain types of 3rd-party extension arrays (#57316) + * Fixed regression in `DataFrame.query` with all NaT column with + object dtype (#57068) + * Fixed regression in `DataFrame.shift` raising AssertionError + for axis=1 and empty `DataFrame` (#57301) + * Fixed regression in `DataFrame.sort_index` not producing a + stable sort for a index with duplicates (#57151) + * Fixed regression in `DataFrame.to_dict` with orient='list' and + datetime or timedelta types returning integers (#54824) + * Fixed regression in `DataFrame.to_json` converting nullable + integers to floats (#57224) + * Fixed regression in `DataFrame.to_sql` when method="multi" is + passed and the dialect type is not Oracle (#57310) + * Fixed regression in `DataFrame.transpose` with nullable + extension dtypes not having F-contiguous data potentially + causing exceptions when used (#57315) + * Fixed regression in `DataFrame.update` emitting incorrect + warnings about downcasting (#57124) + * Fixed regression in `DataFrameGroupBy.idxmin`, + `DataFrameGroupBy.idxmax`, `SeriesGroupBy.idxmin`, + `SeriesGroupBy.idxmax` ignoring the skipna argument (#57040) + * Fixed regression in `DataFrameGroupBy.idxmin`, + `DataFrameGroupBy.idxmax`, `SeriesGroupBy.idxmin`, + `SeriesGroupBy.idxmax` where values containing the minimum or + maximum value for the dtype could produce incorrect results + (#57040) + * Fixed regression in `ExtensionArray.to_numpy` raising for + non-numeric masked dtypes (#56991) + * Fixed regression in `Index.join` raising TypeError when joining + an empty index to a non-empty index containing mixed dtype + values (#57048) + * Fixed regression in `Series.astype` introducing decimals when + converting from integer with missing values to string dtype + (#57418) + * Fixed regression in `Series.pct_change` raising a ValueError + for an empty `Series` (#57056) + * Fixed regression in `Series.to_numpy` when dtype is given as + float and the data contains NaNs (#57121) + * Fixed regression in addition or subtraction of `DateOffset` + objects with millisecond components to datetime64 `Index`, + `Series`, or `DataFrame` (#57529) + ## Bug fixes + * Fixed bug in `pandas.api.interchange.from_dataframe` which was + raising for Nullable integers (#55069) + * Fixed bug in `pandas.api.interchange.from_dataframe` which was + raising for empty inputs (#56700) + * Fixed bug in `pandas.api.interchange.from_dataframe` which + wasn't converting columns names to strings (#55069) + * Fixed bug in `DataFrame.__getitem__` for empty `DataFrame` with + Copy-on-Write enabled (#57130) + * Fixed bug in `PeriodIndex.asfreq` which was silently converting + frequencies which are not supported as period frequencies + instead of raising an error (#56945) + ## Note + * The DeprecationWarning that was raised when pandas was imported + without PyArrow being installed has been removed. This decision + was made because the warning was too noisy for too many users + and a lot of feedback was collected about the decision to make + PyArrow a required dependency. Pandas is currently considering + the decision whether or not PyArrow should be added as a hard + dependency in 3.0. Interested users can follow the discussion + here. + * Added the argument skipna to `DataFrameGroupBy.first`, + `DataFrameGroupBy.last`, `SeriesGroupBy.first`, and + `SeriesGroupBy.last`; achieving skipna=False used to be + available via `DataFrameGroupBy.nth`, but the behavior was + changed in pandas 2.0.0 (#57019) + * Added the argument skipna to `Resampler.first`, + `Resampler.last` (#57019) +- Release notes for 2.2.0 + * For full changelog see + https://github.com/pandas-dev/pandas/blob/main/doc/source/whatsnew/v2.2.0.rst + ## Enhancements + * ADBC Driver support in to_sql and read_sql + * Create a pandas Series based on one or more conditions + * to_numpy for NumPy nullable and Arrow types converts to + suitable NumPy dtype + * Series.struct accessor for PyArrow structured data + * Series.list accessor for PyArrow list data + * Calamine engine for `read_excel` + ## Notable bug fixes + * `merge` and `DataFrame.join` now consistently follow documented + sort behavior + * `merge` and `DataFrame.join` no longer reorder levels when + levels differ + * Increased minimum versions for dependencies + ## Deprecations + * Chained assignment + * Deprecate aliases M, Q, Y, etc. in favour of ME, QE, YE, etc. + for offsets + * Deprecated automatic downcasting +- Simplify flavor test setup: obs can evaluate %{shrink:} now + +------------------------------------------------------------------- +Tue Feb 6 08:59:26 UTC 2024 - Dirk Müller + +- enable py312 testing, remove py39 testing + +------------------------------------------------------------------- +Fri Jan 12 11:18:40 UTC 2024 - pgajdos@suse.com + +- have a possibility to not use pyarrow [bsc#1218592] + +------------------------------------------------------------------- +Wed Dec 13 16:35:35 UTC 2023 - Ben Greiner + +- Update to 2.1.4 + ## Fixed regressions + * Fixed regression when trying to read a pickled pandas DataFrame + from pandas 1.3 (GH 55137) + ## Bug fixes + * Bug in Series constructor raising DeprecationWarning when index + is a list of Series (GH 55228) + * Bug in Series when trying to cast date-like string inputs to + ArrowDtype of pyarrow.timestamp (GH 56266) + * Bug in DataFrame.apply() where passing raw=True ignored args + passed to the applied function (GH 55753) + * Bug in Index.__getitem__() returning wrong result for Arrow + dtypes and negative stepsize (GH 55832) + * Fixed bug in to_numeric() converting to extension dtype for + string[pyarrow_numpy] dtype (GH 56179) + * Fixed bug in DataFrameGroupBy.min() and DataFrameGroupBy.max() + not preserving extension dtype for empty object (GH 55619) + * Fixed bug in DataFrame.__setitem__() casting Index with + object-dtype to PyArrow backed strings when infer_string option + is set (GH 55638) + * Fixed bug in DataFrame.to_hdf() raising when columns have + StringDtype (GH 55088) + * Fixed bug in Index.insert() casting object-dtype to PyArrow + backed strings when infer_string option is set (GH 55638) + * Fixed bug in Series.__ne__() resulting in False for comparison + between NA and string value for dtype="string[pyarrow_numpy]" + (GH 56122) + * Fixed bug in Series.mode() not keeping object dtype when + infer_string is set (GH 56183) + * Fixed bug in Series.reset_index() not preserving object dtype + when infer_string is set (GH 56160) + * Fixed bug in Series.str.split() and Series.str.rsplit() when + pat=None for ArrowDtype with pyarrow.string (GH 56271) + * Fixed bug in Series.str.translate() losing object dtype when + string option is set (GH 56152) +- Go back to Cython0, it has NOT been unpinned by upstream released + version + * https://github.com/pandas-dev/pandas/blob/v2.1.4/pyproject.toml#L8 + * See also gh#jsonpickle/jsonpickle#460 + +------------------------------------------------------------------- +Fri Dec 1 03:25:23 UTC 2023 - Steve Kowalik + +- Update to 2.1.3: + * Reverted deprecation of fill_method=None in DataFrame.pct_change(), + Series.pct_change(), DataFrameGroupBy.pct_change(), and + SeriesGroupBy.pct_change(); the values 'backfill', 'bfill', 'pad', and + 'ffill' are still deprecated + * Fixed regressions + + Fixed infinite recursion from operations that return a new object on + some DataFrame subclasses + + Fixed regression in DataFrame.join() where result has missing values + and dtype is arrow backed string + + Fixed regression in rolling() where non-nanosecond index or on column + would produce incorrect results + + Fixed regression in DataFrame.resample() which was extrapolating back + to origin when origin was outside its bounds + + Fixed regression in DataFrame.sort_index() which was not sorting + correctly when the index was a sliced MultiIndex + + Fixed regression in DataFrameGroupBy.agg() and SeriesGroupBy.agg() + where if the option compute.use_numba was set to True, groupby methods + not supported by the numba engine would raise a TypeError + + Fixed performance regression with wide DataFrames, typically + involving methods where all columns were accessed individually + + Fixed regression in merge_asof() raising TypeError for by with + datetime and timedelta dtypes + + Fixed regression in read_parquet() when reading a file with a string + column consisting of more than 2 GB of string data and using the + "string" dtype + + Fixed regression in DataFrame.to_sql() not roundtripping datetime + columns correctly for sqlite when using detect_types + + Fixed regression in construction of certain DataFrame or Series + subclasses + * Bug fixes + + Bug in DatetimeIndex.diff() raising TypeError + + Bug in Index.isin() raising for Arrow backed string and None value + + Fix read_parquet() and read_feather() for CVE-2023-47248 + + Fixed bug in DataFrameGroupBy reductions not preserving object dtype + when infer_string is set + + Fixed bug in SeriesGroupBy.value_counts() returning incorrect dtype for + string columns + + Fixed bug in Categorical.equals() if other has arrow backed string dtype + + Fixed bug in DataFrame.__setitem__() not inferring string dtype for + zero-dimensional array with infer_string=True + + Fixed bug in DataFrame.idxmin() and DataFrame.idxmax() raising for + arrow dtypes + + Fixed bug in DataFrame.interpolate() raising incorrect error message + + Fixed bug in Index.insert() raising when inserting None into Index with + dtype="string[pyarrow_numpy]" + + Fixed bug in Series.all() and Series.any() not treating missing values + correctly for dtype="string[pyarrow_numpy]" + + Fixed bug in Series.floordiv() for ArrowDtype + + Fixed bug in Series.mode() not sorting values for arrow backed string + dtype + + Fixed bug in Series.rank() for string[pyarrow_numpy] dtype + + Fixed bug in Series.str.extractall() for ArrowDtype dtype being + converted to object + + Fixed bug where PDEP-6 warning about setting an item of an + incompatible dtype was being shown when creating a new conditional + column + + Silence Period[B] warnings introduced by GH 53446 during normal + plotting activity + + Fixed bug in Series constructor not inferring string dtype when NA is + the first value and infer_string is set +- Prepare for Python 3.12, include the flavor check. +- Unpin Cython, upstream has moved onto 3. + +------------------------------------------------------------------- +Sat Oct 14 03:55:05 UTC 2023 - Bernhard Wiedemann + +- Fix random build failures + +------------------------------------------------------------------- +Sat Oct 7 20:22:18 UTC 2023 - Ben Greiner + +- Update to 2.1.1 + ## Fixed regressions + * Fixed regression in concat() when DataFrame ‘s have two + different extension dtypes (GH 54848) + * Fixed regression in merge() when merging over a PyArrow string + index (GH 54894) + * Fixed regression in read_csv() when usecols is given and dtypes + is a dict for engine="python" (GH 54868) + * Fixed regression in read_csv() when delim_whitespace is True + (GH 54918, GH 54931) + * Fixed regression in GroupBy.get_group() raising for axis=1 (GH + 54858) + * Fixed regression in DataFrame.__setitem__() raising + AssertionError when setting a Series with a partial MultiIndex + (GH 54875) + * Fixed regression in DataFrame.filter() not respecting the order + of elements for filter (GH 54980) + * Fixed regression in DataFrame.to_sql() not roundtripping + datetime columns correctly for sqlite (GH 54877) + * Fixed regression in DataFrameGroupBy.agg() when aggregating a + DataFrame with duplicate column names using a dictionary (GH + 55006) + * Fixed regression in MultiIndex.append() raising when appending + overlapping IntervalIndex levels (GH 54934) + * Fixed regression in Series.drop_duplicates() for PyArrow + strings (GH 54904) + * Fixed regression in Series.interpolate() raising when + fill_value was given (GH 54920) + * Fixed regression in Series.value_counts() raising for numeric + data if bins was specified (GH 54857) + * Fixed regression in comparison operations for PyArrow backed + columns not propagating exceptions correctly (GH 54944) + * Fixed regression when comparing a Series with datetime64 dtype + with None (GH 54870) + ## Bug fixes + * Fixed bug for ArrowDtype raising NotImplementedError for + fixed-size list (GH 55000) + * Fixed bug in DataFrame.stack() with future_stack=True and + columns a non-MultiIndex consisting of tuples (GH 54948) + * Fixed bug in Series.dt.tz() with ArrowDtype where a string was + returned instead of a tzinfo object (GH 55003) + * Fixed bug in Series.pct_change() and DataFrame.pct_change() + showing unnecessary FutureWarning (GH 54981) + ## Other + * Reverted the deprecation that disallowed Series.apply() + returning a DataFrame when the passed-in callable returns a + Series object (GH 52116) +- Drop pandas-pr55073-pyarrow13.patch merged upstream + +------------------------------------------------------------------- +Sun Sep 10 13:10:06 UTC 2023 - Ben Greiner + +- Fix test failures with pyarrow 13 + * Add pandas-pr55073-pyarrow13.patch + * gh#pandas-dev/pandas#55073 + * gh#pandas-dev/pandas#55048 + * gh#pandas-dev/pandas#55020 + +------------------------------------------------------------------- +Tue Sep 5 21:02:37 UTC 2023 - Ben Greiner + +- Use git cloned archive gh#pandas-dev/pandas#54907 + +------------------------------------------------------------------- +Thu Aug 31 14:51:11 UTC 2023 - Ben Greiner + +- Update to 2.1.0 + * https://pandas.pydata.org/pandas-docs/version/2.1.0/whatsnew/v2.1.0.html + * Avoid NumPy object dtype for strings by default + * DataFrame reductions preserve extension dtypes + * Copy-on-Write improvements + * New DataFrame.map() method and support for ExtensionArrays + * New implementation of DataFrame.stack() + * Other minor enhancements (see link above) + ## Backwards incompatible API changes + * pandas 2.1.0 supports Python 3.9 and higher + * Increased minimum versions for numpy 1.22.3 and some optional + dependencies + * arrays.PandasArray has been renamed NumpyExtensionArray and the + attached dtype name changed from PandasDtype to NumpyEADtype; + importing PandasArray still works until the next major version + (GH 53694) + ## Deprecations + * Deprecated silent upcasting in setitem-like Series operations + * Deprecated parsing datetimes with mixed time zones + * Other Deprecation (see link above) + ## More + * Performance Improvements (see link above) + * Bug fixes (see linkl above) +- Switch to meson build system + +------------------------------------------------------------------- +Sun Aug 13 21:50:37 UTC 2023 - Dirk Müller + +- update to 2.0.3: + * Bug in Timestamp.weekday`() was returning incorrect results + before '0000-02-29' + * Fixed performance regression in merging on datetime-like columns + * Fixed regression when DataFrame.to_string() creates extra space + for string dtypes + * Bug in DataFrame.convert_dtype() and Series.convert_dtype() + when trying to convert ArrowDtype with dtype_backend="nullable_numpy" + * Bug in RangeIndex.union() when using sort=True with another + RangeIndex + * Bug in Series.reindex() when expanding a non-nanosecond datetime + or timedelta + * Bug in read_csv() when defining dtype with bool[pyarrow] for + the "c" and "python" engines + * Bug in Series.str.split() and Series.str.rsplit() with expand=True + * Bug in indexing methods (e.g. DataFrame.__getitem__()) where + taking the entire DataFrame/Series would raise an OverflowError + when Copy on Write was enabled the length of the array was over + the maximum size a 32-bit integer can hold + * Bug when constructing a DataFrame with columns of an ArrowDtype + with a pyarrow.dictionary type that reindexes the data + * Bug when indexing a DataFrame or Series with an Index with a + timestamp ArrowDtype would raise an AttributeError +- drop pandas-fix-tests.patch (upstream) + +------------------------------------------------------------------- +Thu Jun 22 09:36:06 UTC 2023 - Guillaume GARDET + +- Fix tests on aarch64: + * pandas-fix-tests.patch + +------------------------------------------------------------------- +Sun Jun 11 19:51:26 UTC 2023 - Johannes Kastl + +- do not use %elif for python-numpy dependency condition + +------------------------------------------------------------------- +Wed Jun 7 09:13:48 UTC 2023 - Ben Greiner + +- Increase minimum memory constraints for tests + +------------------------------------------------------------------- +Sat May 27 13:18:13 UTC 2023 - Ben Greiner + +- Update to 2.0.2 + ## Fixed regressions + * Fixed performance regression in GroupBy.apply() (GH53195) + * Fixed regression in merge() on Windows when dtype is np.intc + (GH52451) + * Fixed regression in read_sql() dropping columns with duplicated + column names (GH53117) + * Fixed regression in DataFrame.loc() losing MultiIndex name when + enlarging object (GH53053) + * Fixed regression in DataFrame.to_string() printing a backslash + at the end of the first row of data, instead of headers, when + the DataFrame doesn’t fit the line width (GH53054) + * Fixed regression in MultiIndex.join() returning levels in wrong + order (GH53093) + ## Bug fixes + * Bug in arrays.ArrowExtensionArray incorrectly assigning dict + instead of list for .type with pyarrow.map_ and raising a + NotImplementedError with pyarrow.struct (GH53328) + * Bug in api.interchange.from_dataframe() was raising IndexError + on empty categorical data (GH53077) + * Bug in api.interchange.from_dataframe() was returning + DataFrame’s of incorrect sizes when called on slices (GH52824) + * Bug in api.interchange.from_dataframe() was unnecessarily + raising on bitmasks (GH49888) + * Bug in merge() when merging on datetime columns on different + resolutions (GH53200) + * Bug in read_csv() raising OverflowError for engine="pyarrow" + and parse_dates set (GH53295) + * Bug in to_datetime() was inferring format to contain "%H" + instead of "%I" if date contained “AM” / “PM” tokens (GH53147) + * Bug in DataFrame.convert_dtypes() ignores convert_* keywords + when set to False dtype_backend="pyarrow" (GH52872) + * Bug in DataFrame.convert_dtypes() losing timezone for tz-aware + dtypes and dtype_backend="pyarrow" (GH53382) + * Bug in DataFrame.sort_values() raising for PyArrow dictionary + dtype (GH53232) + * Bug in Series.describe() treating pyarrow-backed timestamps and + timedeltas as categorical data (GH53001) + * Bug in Series.rename() not making a lazy copy when + Copy-on-Write is enabled when a scalar is passed to it + (GH52450) + * Bug in pd.array() raising for NumPy array and pa.large_string + or pa.large_binary (GH52590) + * Bug in DataFrame.__getitem__() not preserving dtypes for + MultiIndex partial keys (GH51895) + ## Other + * Raised a better error message when calling + Series.dt.to_pydatetime() with ArrowDtype with pyarrow.date32 + or pyarrow.date64 type (GH52812) +- Release to 2.0.1 + ## Fixed regressions + * Fixed regression for subclassed Series when constructing from a + dictionary (GH52445) + * Fixed regression in SeriesGroupBy.agg() failing when grouping + with categorical data, multiple groupings, as_index=False, and + a list of aggregations (GH52760) + * Fixed regression in DataFrame.pivot() changing Index name of + input object (GH52629) + * Fixed regression in DataFrame.resample() raising on a DataFrame + with no columns (GH52484) + * Fixed regression in DataFrame.sort_values() not resetting index + when DataFrame is already sorted and ignore_index=True + (GH52553) + * Fixed regression in MultiIndex.isin() raising TypeError for + Generator (GH52568) + * Fixed regression in Series.describe() showing RuntimeWarning + for extension dtype Series with one element (GH52515) + * Fixed regression when adding a new column to a DataFrame when + the DataFrame.columns was a RangeIndex and the new key was + hashable but not a scalar (GH52652) + ## Bug fixes + * Bug in Series.dt.days that would overflow int32 number of days + (GH52391) + * Bug in arrays.DatetimeArray constructor returning an incorrect + unit when passed a non-nanosecond numpy datetime array + (GH52555) + * Bug in ArrowExtensionArray with duration dtype overflowing when + constructed from data containing numpy NaT (GH52843) + * Bug in Series.dt.round() when passing a freq of equal or higher + resolution compared to the Series would raise a + ZeroDivisionError (GH52761) + * Bug in Series.median() with ArrowDtype returning an approximate + median (GH52679) + * Bug in api.interchange.from_dataframe() was unnecessarily + raising on categorical dtypes (GH49889) + * Bug in api.interchange.from_dataframe() was unnecessarily + raising on large string dtypes (GH52795) + * Bug in pandas.testing.assert_series_equal() where + check_dtype=False would still raise for datetime or timedelta + types with different resolutions (GH52449) + * Bug in read_csv() casting PyArrow datetimes to NumPy when + dtype_backend="pyarrow" and parse_dates is set causing a + performance bottleneck in the process (GH52546) + * Bug in to_datetime() and to_timedelta() when trying to convert + numeric data with a ArrowDtype (GH52425) + * Bug in to_numeric() with errors='coerce' and + dtype_backend='pyarrow' with ArrowDtype data (GH52588) + * Bug in ArrowDtype.__from_arrow__() not respecting if dtype is + explicitly given (GH52533) + * Bug in DataFrame.describe() not respecting ArrowDtype in + include and exclude (GH52570) + * Bug in DataFrame.max() and related casting different Timestamp + resolutions always to nanoseconds (GH52524) + * Bug in Series.describe() not returning ArrowDtype with + pyarrow.float64 type with numeric data (GH52427) + * Bug in Series.dt.tz_localize() incorrectly localizing + timestamps with ArrowDtype (GH52677) + * Bug in arithmetic between np.datetime64 and np.timedelta64 NaT + scalars with units always returning nanosecond resolution + (GH52295) + * Bug in logical and comparison operations between ArrowDtype and + numpy masked types (e.g. "boolean") (GH52625) + * Fixed bug in merge() when merging with ArrowDtype one one and a + NumPy dtype on the other side (GH52406) + * Fixed segfault in Series.to_numpy() with null[pyarrow] dtype + (GH52443) + ## Other + * DataFrame created from empty dicts had columns of dtype object. + It is now a RangeIndex (GH52404) + * Series created from empty dicts had index of dtype object. It + is now a RangeIndex (GH52404) + * Implemented Series.str.split() and Series.str.rsplit() for + ArrowDtype with pyarrow.string (GH52401) + * Implemented most str accessor methods for ArrowDtype with + pyarrow.string (GH52401) + * Supplying a non-integer hashable key that tests False in + api.types.is_scalar() now raises a KeyError for + RangeIndex.get_loc(), like it does for Index.get_loc(). + Previously it raised an InvalidIndexError (GH52652). +- Release to 2.0.0 + ## Enhancements + * Installing optional dependencies with pip extras + * Index can now hold numpy numeric dtypes + * Argument dtype_backend , to return pyarrow-backed or + numpy-backed nullable dtypes + * Copy-on-Write improvements + * Other enhancements, see + https://pandas.pydata.org/pandas-docs/version/2.0.2/whatsnew/v2.0.0.html#other-enhancements + ## Notable bug fixes + * DataFrameGroupBy.cumsum() and DataFrameGroupBy.cumprod() + overflow instead of lossy casting to float + * DataFrameGroupBy.nth() and SeriesGroupBy.nth() now behave as + filtrations + ## Backwards incompatible API changes + * Construction with datetime64 or timedelta64 dtype with + unsupported resolution + * Value counts sets the resulting name to count + * Disallow astype conversion to non-supported + datetime64/timedelta64 dtypes + * UTC and fixed-offset timezones default to standard-library + tzinfo objects + * Empty DataFrames/Series will now default to have a RangeIndex + * DataFrame to LaTeX has a new render engine + * Increased minimum versions for dependencies + * Datetimes are now parsed with a consistent format + * Other API changes, see + https://pandas.pydata.org/pandas-docs/version/2.0.2/whatsnew/v2.0.0.html#other-api-changes + ## Deprecations + ## Removal of prior version deprecations/changes + ## Performance improvements + ## Bug fixes +- Drop python38 test flavor and start testing python311 which has + been missing since. + +------------------------------------------------------------------- +Mon May 8 06:10:30 UTC 2023 - Johannes Kastl + +- add sle15_python_module_pythons + +------------------------------------------------------------------- +Wed Feb 8 18:28:19 UTC 2023 - Arun Persaud + +- specfile: + * update copyright year + * remove pandas-pr49886-fix-numpy-deprecations.patch, implemented upstreams + +- update to version 1.5.3: + * Fixed regressions + + Fixed performance regression in Series.isin() when values is + empty (GH49839) + + Fixed regression in DataFrame.memory_usage() showing unnecessary + FutureWarning when DataFrame is empty (GH50066) + + Fixed regression in DataFrameGroupBy.transform() when used with + as_index=False (GH49834) + + Enforced reversion of color as an alias for c and size as an + alias for s in function DataFrame.plot.scatter() (GH49732) + + Fixed regression in SeriesGroupBy.apply() setting a name + attribute on the result if the result was a DataFrame (GH49907) + + Fixed performance regression in setting with the at() indexer + (GH49771) + + Fixed regression in the methods apply, agg, and transform when + used with NumPy functions that informed users to supply + numeric_only=True if the operation failed on non-numeric dtypes; + such columns must be dropped prior to using these methods + (GH50538) + + Fixed regression in to_datetime() raising ValueError when + parsing array of float containing np.nan (GH50237) + * Bug fixes + + Bug in the Copy-on-Write implementation losing track of views + when indexing a DataFrame with another DataFrame (GH50630) + + Bug in Styler.to_excel() leading to error when unrecognized + border-style (e.g. "hair") provided to Excel writers (GH48649) + + Bug in Series.quantile() emitting warning from NumPy when Series + has only NA values (GH50681) + + Bug when chaining several Styler.concat() calls, only the last + styler was concatenated (GH49207) + + Fixed bug when instantiating a DataFrame subclass inheriting + from typing.Generic that triggered a UserWarning on python 3.11 + (GH49649) + + Bug in pivot_table() with NumPy 1.24 or greater when the + DataFrame columns has nested elements (GH50342) + + Bug in pandas.testing.assert_series_equal() (and equivalent + assert_ functions) when having nested data and using numpy >= + 1.25 (GH50360) + * Other + + Note: If you are using DataFrame.to_sql(), read_sql(), + read_sql_table(), or read_sql_query() with SQLAlchemy 1.4.46 or + greater, you may see a sqlalchemy.exc.RemovedIn20Warning. These + warnings can be safely ignored for the SQLAlchemy 1.4.x releases + as pandas works toward compatibility with SQLAlchemy 2.0. + + Reverted deprecation (GH45324) of behavior of + Series.__getitem__() and Series.__setitem__() slicing with an + integer Index; this will remain positional (GH49612) + + A FutureWarning raised when attempting to set values inplace + with DataFrame.loc() or DataFrame.iloc() has been changed to a + DeprecationWarning (GH48673) + +------------------------------------------------------------------- +Fri Dec 23 16:22:18 UTC 2022 - Ben Greiner + +- Update to version 1.5.2 + ## Fixed regressions + * Fixed regression in MultiIndex.join() for extension array + dtypes (GH49277) + * Fixed regression in Series.replace() raising RecursionError + with numeric dtype and when specifying value=None (GH45725) + * Fixed regression in arithmetic operations for DataFrame with + MultiIndex columns with different dtypes (GH49769) + * Fixed regression in DataFrame.plot() preventing Colormap + instance from being passed using the colormap argument if + Matplotlib 3.6+ is used (GH49374) + * Fixed regression in date_range() returning an invalid set of + periods for CustomBusinessDay frequency and start date with + timezone (GH49441) + * Fixed performance regression in groupby operations (GH49676) + * Fixed regression in Timedelta constructor returning object of + wrong type when subclassing Timedelta (GH49579) + ## Bug fixes + * Bug in the Copy-on-Write implementation losing track of views + in certain chained indexing cases (GH48996) + * Fixed memory leak in Styler.to_excel() (GH49751) + ## Other + * Reverted color as an alias for c and size as an alias for s in + function DataFrame.plot.scatter() (GH49732) +- Add pandas-pr49886-fix-numpy-deprecations.patch + * gh#pandas-dev/pandas#49887 +- Move to PEP518 build + +------------------------------------------------------------------- +Sat Oct 22 16:10:11 UTC 2022 - Arun Persaud + +- update to version 1.5.1: + * Fixed regressions + + Fixed Regression in Series.__setitem__() casting None to NaN for + object dtype (GH48665) + + Fixed Regression in DataFrame.loc() when setting values as a + DataFrame with all True indexer (GH48701) + + Regression in read_csv() causing an EmptyDataError when using an + UTF-8 file handle that was already read from (GH48646) + + Regression in to_datetime() when utc=True and arg contained + timezone naive and aware arguments raised a ValueError (GH48678) + + Fixed regression in DataFrame.loc() raising FutureWarning when + setting an empty DataFrame (GH48480) + + Fixed regression in DataFrame.describe() raising TypeError when + result contains NA (GH48778) + + Fixed regression in DataFrame.plot() ignoring invalid colormap + for kind="scatter" (GH48726) + + Fixed regression in MultiIndex.values`() resetting freq + attribute of underlying Index object (GH49054) + + Fixed performance regression in factorize() when na_sentinel is + not None and sort=False (GH48620) + + Fixed regression causing an AttributeError during warning + emitted if the provided table name in DataFrame.to_sql() and the + table name actually used in the database do not match (GH48733) + + Fixed regression in to_datetime() when arg was a date string + with nanosecond and format contained %f would raise a ValueError + (GH48767) + + Fixed regression in assert_frame_equal() raising for MultiIndex + with Categorical and check_like=True (GH48975) + + Fixed regression in DataFrame.fillna() replacing wrong values + for datetime64[ns] dtype and inplace=True (GH48863) + + Fixed DataFrameGroupBy.size() not returning a Series when axis=1 + (GH48738) + + Fixed Regression in DataFrameGroupBy.apply() when user defined + function is called on an empty dataframe (GH47985) + + Fixed regression in DataFrame.apply() when passing non-zero axis + via keyword argument (GH48656) + + Fixed regression in Series.groupby() and DataFrame.groupby() + when the grouper is a nullable data type (e.g. Int64) or a + PyArrow-backed string array, contains null values, and + dropna=False (GH48794) + + Fixed performance regression in Series.isin() with mismatching + dtypes (GH49162) + + Fixed regression in DataFrame.to_parquet() raising when file + name was specified as bytes (GH48944) + + Fixed regression in ExcelWriter where the book attribute could + no longer be set; however setting this attribute is now + deprecated and this ability will be removed in a future version + of pandas (GH48780) + + Fixed regression in DataFrame.corrwith() when computing + correlation on tied data with method="spearman" (GH48826) + * Bug fixes + + Bug in Series.__getitem__() not falling back to positional for + integer keys and boolean Index (GH48653) + + Bug in DataFrame.to_hdf() raising AssertionError with boolean + index (GH48667) + + Bug in assert_index_equal() for extension arrays with non + matching NA raising ValueError (GH48608) + + Bug in DataFrame.pivot_table() raising unexpected FutureWarning + when setting datetime column as index (GH48683) + + Bug in DataFrame.sort_values() emitting unnecessary + FutureWarning when called on DataFrame with boolean sparse + columns (GH48784) + + Bug in arrays.ArrowExtensionArray with a comparison operator to + an invalid object would not raise a NotImplementedError + (GH48833) + * Other + + Avoid showing deprecated signatures when introspecting functions + with warnings about arguments becoming keyword-only (GH48692) + +------------------------------------------------------------------- +Mon Sep 19 21:49:49 UTC 2022 - Arun Persaud + +- specfile: + * update required versions + +- update to version 1.5.0: + * long changelog, full version available at + https://pandas.pydata.org/pandas-docs/stable/whatsnew/v1.5.0.html# + +------------------------------------------------------------------- +Sat Sep 10 13:25:41 UTC 2022 - Arun Persaud + +- specfile: + * update required version + +- update to version 1.4.4: + * Fixed regressions + + Fixed regression in DataFrame.fillna() not working on a + DataFrame with a MultiIndex (GH47649) + + Fixed regression in taking NULL objects from a DataFrame causing + a segmentation violation. These NULL values are created by + numpy.empty_like() (GH46848) + + Fixed regression in concat() materializing the Index during + sorting even if the Index was already sorted (GH47501) + + Fixed regression in concat() or merge() handling of all-NaN + ExtensionArrays with custom attributes (GH47762) + + Fixed regression in calling bitwise numpy ufuncs (for example, + np.bitwise_and) on Index objects (GH46769) + + Fixed regression in cut() when using a datetime64 IntervalIndex + as bins (GH46218) + + Fixed regression in DataFrame.select_dtypes() where + include="number" included BooleanDtype (GH46870) + + Fixed regression in DataFrame.loc() raising error when indexing + with a NamedTuple (GH48124) + + Fixed regression in DataFrame.loc() not updating the cache + correctly after values were set (GH47867) + + Fixed regression in DataFrame.loc() not aligning index in some + cases when setting a DataFrame (GH47578) + + Fixed regression in DataFrame.loc() setting a length-1 array + like value to a single value in the DataFrame (GH46268) + + Fixed regression when slicing with DataFrame.loc() with + DatetimeIndex with a DateOffset object for its freq (GH46671) + + Fixed regression in setting None or non-string value into a + string-dtype Series using a mask (GH47628) + + Fixed regression in updating a DataFrame column through Series + __setitem__ (using chained assignment) not updating column + values inplace and using too much memory (GH47172) + + Fixed regression in DataFrame.select_dtypes() returning a view + on the original DataFrame (GH48090) + + Fixed regression using custom Index subclasses (for example, + used in xarray) with reset_index() or Index.insert() (GH47071) + + Fixed regression in intersection() when the DatetimeIndex has + dates crossing daylight savings time (GH46702) + + Fixed regression in merge() throwing an error when passing a + Series with a multi-level name (GH47946) + + Fixed regression in DataFrame.eval() creating a copy when + updating inplace (GH47449) + + Fixed regression where getting a row using DataFrame.iloc() with + SparseDtype would raise (GH46406) + * Bug fixes + + The FutureWarning raised when passing arguments (other than + filepath_or_buffer) as positional in read_csv() is now raised at + the correct stacklevel (GH47385) + + Bug in DataFrame.to_sql() when method was a callable that did + not return an int and would raise a TypeError (GH46891) + + Bug in DataFrameGroupBy.value_counts() where subset had no + effect (GH46383) + + Bug when getting values with DataFrame.loc() with a list of keys + causing an internal inconsistency that could lead to a + disconnect between frame.at[x, y] vs frame[y].loc[x] (GH22372) + + Bug in the Series.dt.strftime() accessor return a float instead + of object dtype Series for all-NaT input, which also causes a + spurious deprecation warning (GH45858) + * Other + + The minimum version of Cython needed to compile pandas is now + 0.29.32 (GH47978) + +------------------------------------------------------------------- +Sat Jul 9 13:43:53 UTC 2022 - Arun Persaud + +- update to version 1.4.3: + * Behavior of concat with empty or all-NA DataFrame columns + The behavior change in version 1.4.0 to stop ignoring the data + type of empty or all-NA columns with float or object dtype in + concat() (Ignoring dtypes in concat with empty or all-NA columns) + has been reverted (GH45637). + * Fixed regressions + + Fixed regression in DataFrame.replace() when the replacement value + was explicitly None when passed in a dictionary to to_replace also + casting other columns to object dtype even when there were no + values to replace (GH46634) + + Fixed regression in DataFrame.to_csv() raising error when + DataFrame contains extension dtype categorical column (GH46297, + GH46812) + + Fixed regression in representation of dtypes attribute of + MultiIndex (GH46900) + + Fixed regression when setting values with DataFrame.loc() updating + RangeIndex when index was set as new column and column was updated + afterwards (GH47128) + + Fixed regression in DataFrame.fillna() and DataFrame.update() + creating a copy when updating inplace (GH47188) + + Fixed regression in DataFrame.nsmallest() led to wrong results + when the sorting column has np.nan values (GH46589) + + Fixed regression in read_fwf() raising ValueError when widths was + specified with usecols (GH46580) + + Fixed regression in concat() not sorting columns for mixed column + names (GH47127) + + Fixed regression in Groupby.transform() and Groupby.agg() failing + with engine="numba" when the index was a MultiIndex (GH46867) + + Fixed regression in NaN comparison for Index operations where the + same object was compared (GH47105) + + Fixed regression is Styler.to_latex() and Styler.to_html() where + buf failed in combination with encoding (GH47053) + + Fixed regression in read_csv() with index_col=False identifying + first row as index names when header=None (GH46955) + + Fixed regression in DataFrameGroupBy.agg() when used with + list-likes or dict-likes and axis=1 that would give incorrect + results; now raises NotImplementedError (GH46995) + + Fixed regression in DataFrame.resample() and DataFrame.rolling() + when used with list-likes or dict-likes and axis=1 that would + raise an unintuitive error message; now raises NotImplementedError + (GH46904) + + Fixed regression in testing.assert_index_equal() when + check_order=False and Index has extension or object dtype + (GH47207) + + Fixed regression in read_excel() returning ints as floats on + certain input sheets (GH46988) + + Fixed regression in DataFrame.shift() when axis is columns and + fill_value is absent, freq is ignored (GH47039) + + Fixed regression in DataFrame.to_json() causing a segmentation + violation when DataFrame is created with an index parameter of the + type PeriodIndex (GH46683) + * Bug fixes + + Bug in pandas.eval(), DataFrame.eval() and DataFrame.query() + where passing empty local_dict or global_dict was treated as + passing None (GH47084) + + Most I/O methods no longer suppress OSError and ValueError when + closing file handles (GH47136) + * Other + + The minimum version of Cython needed to compile pandas is now + 0.29.30 (GH41935) + +------------------------------------------------------------------- +Tue Apr 5 13:40:30 UTC 2022 - Ben Greiner + +- Update to version 1.4.2 + * Fixed regression in DataFrame.drop() and Series.drop() when + Index had extension dtype and duplicates (GH45860) + * Fixed regression in read_csv() killing python process when + invalid file input was given for engine="c" (GH45957) + * Fixed memory performance regression in Series.fillna() when + called on a DataFrame column with inplace=True (GH46149) + * Provided an alternative solution for passing custom Excel + formats in Styler.to_excel(), which was a regression based on + stricter CSS validation. Examples available in the + documentation for Styler.format() (GH46152) + * Fixed regression in DataFrame.replace() when a replacement + value was also a target for replacement (GH46306) + * Fixed regression in DataFrame.replace() when the replacement + value was explicitly None when passed in a dictionary to + to_replace (GH45601, GH45836) + * Fixed regression when setting values with DataFrame.loc() + losing MultiIndex names if DataFrame was empty before (GH46317) + * Fixed regression when rendering boolean datatype columns with + Styler() (GH46384) + * Fixed regression in Groupby.rolling() with a frequency window + that would raise a ValueError even if the datetimes within each + group were monotonic (GH46061) + * Fix some cases for subclasses that define their _constructor + properties as general callables (GH46018) + * Fixed “longtable” formatting in Styler.to_latex() when + column_format is given in extended format (GH46037) + * Fixed incorrect rendering in Styler.format() with + hyperlinks="html" when the url contains a colon or other + special characters (GH46389) + * Improved error message in Rolling when window is a frequency + and NaT is in the rolling axis (GH46087) +- Copy back the installed package into the source tree + * mimics upstreams test setup of an editable install + * avoids conftest.py collection errors with pytest 7 + +------------------------------------------------------------------- +Sat Feb 12 23:29:24 UTC 2022 - Arun Persaud + +- update to version 1.4.1: + * Fixed regressions + + Regression in Series.mask() with inplace=True and PeriodDtype + and an incompatible other coercing to a common dtype instead of + raising (GH45546) + + Regression in assert_frame_equal() not respecting + check_flags=False (GH45554) + + Regression in DataFrame.loc() raising ValueError when indexing + (getting values) on a MultiIndex with one level (GH45779) + + Regression in Series.fillna() with downcast=False incorrectly + downcasting object dtype (GH45603) + + Regression in api.types.is_bool_dtype() raising an + AttributeError when evaluating a categorical Series (GH45615) + + Regression in DataFrame.iat() setting values leading to not + propagating correctly in subsequent lookups (GH45684) + + Regression when setting values with DataFrame.loc() losing Index + name if DataFrame was empty before (GH45621) + + Regression in join() with overlapping IntervalIndex raising an + InvalidIndexError (GH45661) + + Regression when setting values with Series.loc() raising with + all False indexer and Series on the right hand side (GH45778) + + Regression in read_sql() with a DBAPI2 connection that is not an + instance of sqlite3.Connection incorrectly requiring SQLAlchemy + be installed (GH45660) + + Regression in DateOffset when constructing with an integer + argument with no keywords (e.g. pd.DateOffset(n)) would behave + like datetime.timedelta(days=0) (GH45643, GH45890) + * Bug fixes + + Fixed segfault in DataFrame.to_json() when dumping tz-aware + datetimes in Python 3.10 (GH42130) + + Stopped emitting unnecessary FutureWarning in + DataFrame.sort_values() with sparse columns (GH45618) + + Fixed window aggregations in DataFrame.rolling() and + Series.rolling() to skip over unused elements (GH45647) + + Fixed builtin highlighters in Styler to be responsive to NA with + nullable dtypes (GH45804) + + Bug in apply() with axis=1 raising an erroneous ValueError + (GH45912) + * Other + + Reverted performance speedup of DataFrame.corr() for + method=pearson to fix precision regression (GH45640, GH42761) + +------------------------------------------------------------------- +Tue Jan 25 19:26:46 UTC 2022 - Ben Greiner + +- Skip more tests on non-intel architectures + boo#1167730 + +------------------------------------------------------------------- +Sun Jan 23 11:52:29 UTC 2022 - Ben Greiner + +- Update to version 1.4.0 + * https://pandas.pydata.org/docs/whatsnew/v1.4.0.html + * Enhancements + - Improved warning messages + - Index can hold arbitrary ExtensionArrays + - Enhancements in Styler + - Multi-threaded CSV reading with a new CSV Engine based on + pyarrow + - Rank function for rolling and expanding windows + - Groupby positional indexing + - DataFrame.from_dict and DataFrame.to_dict have new 'tight' + option + * Notable bug fixes + - Inconsistent date string parsing + - Ignoring dtypes in concat with empty or all-NA columns + - Null-values are no longer coerced to NaN-value in + value_counts and mode + - mangle_dupe_cols in read_csv no longer renames unique columns + conflicting with target names + - unstack and pivot_table no longer raises ValueError for + result that would exceed int32 limit + - groupby.apply consistent transform detection + * API changes + - Index.get_indexer_for() no longer accepts keyword arguments + (other than target); in the past these would be silently + ignored if the index was not unique (GH42310) + - Change in the position of the min_rows argument in + DataFrame.to_string() due to change in the docstring + (GH44304) + - Reduction operations for DataFrame or Series now raising a + ValueError when None is passed for skipna (GH44178) + - read_csv() and read_html() no longer raising an error when + one of the header rows consists only of Unnamed: columns + (GH13054) + - Changed the name attribute of several holidays in + USFederalHolidayCalendar to match official federal holiday + names. + * Deprecations + - Deprecated Int64Index, UInt64Index & Float64Index + - Deprecated Frame.append and Series.append +- Split out test runs into separate flavors, optimize memory usage + in pytest-xdist runs + +------------------------------------------------------------------- +Tue Jan 4 21:56:55 UTC 2022 - Ben Greiner + +- Update to version 1.3.5 + * Fixed regression in Series.equals() when comparing floats with + dtype object to None (GH44190) + * Fixed regression in merge_asof() raising error when array was + supplied as join key (GH42844) + * Fixed regression when resampling DataFrame with DateTimeIndex + with empty groups and uint8, uint16 or uint32 columns + incorrectly raising RuntimeError (GH43329) + * Fixed regression in creating a DataFrame from a timezone-aware + Timestamp scalar near a Daylight Savings Time transition + (GH42505) + * Fixed performance regression in read_csv() (GH44106) + * Fixed regression in Series.duplicated() and + Series.drop_duplicates() when Series has Categorical dtype with + boolean categories (GH44351) + * Fixed regression in GroupBy.sum() with timedelta64[ns] dtype + containing NaT failing to treat that value as NA (GH42659) + * Fixed regression in RollingGroupby.cov() and + RollingGroupby.corr() when other had the same shape as each + group would incorrectly return superfluous groups in the result + (GH42915) + +------------------------------------------------------------------- +Wed Oct 20 12:29:00 UTC 2021 - Guillaume GARDET + +- Update to version 1.3.4 + * Fixed regression in DataFrame.convert_dtypes() incorrectly + converts byte strings to strings (GH43183) + * Fixed regression in GroupBy.agg() where it was failing + silently with mixed data types along axis=1 and MultiIndex (GH43209) + * Fixed regression in merge() with integer and NaN keys + failing with outer merge (GH43550) + * Fixed regression in DataFrame.corr() raising ValueError with + method="spearman" on 32-bit platforms (GH43588) + * Fixed performance regression in MultiIndex.equals() (GH43549) + * Fixed performance regression in GroupBy.first() and GroupBy.last() + with StringDtype (GH41596) + * Fixed regression in Series.cat.reorder_categories() failing to + update the categories on the Series (GH43232) + * Fixed regression in Series.cat.categories() setter failing to + update the categories on the Series (GH43334) + * Fixed regression in read_csv() raising UnicodeDecodeError exception + when memory_map=True (GH43540) + * Fixed regression in DataFrame.explode() raising AssertionError + when column is any scalar which is not a string (GH43314) + * Fixed regression in Series.aggregate() attempting to pass args + and kwargs multiple times to the user supplied func in certain cases (GH43357) + * Fixed regression when iterating over a DataFrame.groupby.rolling + object causing the resulting DataFrames to have an incorrect index if the input groupings were not sorted (GH43386) + * Fixed regression in DataFrame.groupby.rolling.cov() and + DataFrame.groupby.rolling.corr() computing incorrect results if the + input groupings were not sorted (GH43386) + * Fixed bug in pandas.DataFrame.groupby.rolling() and + pandas.api.indexers.FixedForwardWindowIndexer leading to + segfaults and window endpoints being mixed across groups (GH43267) + * Fixed bug in GroupBy.mean() with datetimelike values + including NaT values returning incorrect results (GH43132) + * Fixed bug in Series.aggregate() not passing the first args + to the user supplied func in certain cases (GH43357) + * Fixed memory leaks in Series.rolling.quantile() and + Series.rolling.median() (GH43339) + +------------------------------------------------------------------- +Mon Sep 20 18:28:29 UTC 2021 - Ben Greiner + +- Update to version 1.3.3 + * Fixed regression in DataFrame constructor failing to broadcast + for defined Index and len one list of Timestamp (GH42810) + * Fixed regression in GroupBy.agg() incorrectly raising in some + cases (GH42390) + * Fixed regression in GroupBy.apply() where nan values were + dropped even with dropna=False (GH43205) + * Fixed regression in GroupBy.quantile() which was failing with + pandas.NA (GH42849) + * Fixed regression in merge() where on columns with + ExtensionDtype or bool data types were cast to object in right + and outer merge (GH40073) + * Fixed regression in RangeIndex.where() and RangeIndex.putmask() + raising AssertionError when result did not represent a + RangeIndex (GH43240) + * Fixed regression in read_parquet() where the fastparquet engine + would not work properly with fastparquet 0.7.0 (GH43075) + * Fixed regression in DataFrame.loc.__setitem__() raising + ValueError when setting array as cell value (GH43422) + * Fixed regression in is_list_like() where objects with __iter__ + set to None would be identified as iterable (GH43373) + * Fixed regression in DataFrame.__getitem__() raising error for + slice of DatetimeIndex when index is non monotonic (GH43223) + * Fixed regression in Resampler.aggregate() when used after + column selection would raise if func is a list of aggregation + functions (GH42905) + * Fixed regression in DataFrame.corr() where Kendall correlation + would produce incorrect results for columns with repeated + values (GH43401) + * Fixed regression in DataFrame.groupby() where aggregation on + columns with object types dropped results on those columns + (GH42395, GH43108) + * Fixed regression in Series.fillna() raising TypeError when + filling float Series with list-like fill value having a dtype + which couldn’t cast lostlessly (like float32 filled with + float64) (GH43424) + * Fixed regression in read_csv() raising AttributeError when the + file handle is an tempfile.SpooledTemporaryFile object + (GH43439) + * Fixed performance regression in core.window.ewm. + ExponentialMovingWindow.mean() (GH42333) + * Performance improvement for DataFrame.__setitem__() when the + key or value is not a DataFrame, or key is not list-like + (GH43274) + * Fixed bug in DataFrameGroupBy.agg() and DataFrameGroupBy. + transform() with engine="numba" where index data was not being + correctly passed into func (GH43133) +- Release 1.3.2 + * Performance regression in DataFrame.isin() and Series.isin() + for nullable data types (GH42714) + * Regression in updating values of Series using boolean index, + created by using DataFrame.pop() (GH42530) + * Regression in DataFrame.from_records() with empty records + (GH42456) + * Fixed regression in DataFrame.shift() where TypeError occurred + when shifting DataFrame created by concatenation of slices and + fills with values (GH42719) + * Regression in DataFrame.agg() when the func argument returned + lists and axis=1 (GH42727) + * Regression in DataFrame.drop() does nothing if MultiIndex has + duplicates and indexer is a tuple or list of tuples (GH42771) + * Fixed regression where read_csv() raised a ValueError when + parameters names and prefix were both set to None (GH42387) + * Fixed regression in comparisons between Timestamp object and + datetime64 objects outside the implementation bounds for + nanosecond datetime64 (GH42794) + * Fixed regression in Styler.highlight_min() and Styler. + highlight_max() where pandas.NA was not successfully ignored + (GH42650) + * Fixed regression in concat() where copy=False was not honored + in axis=1 Series concatenation (GH42501) + * Regression in Series.nlargest() and Series.nsmallest() with + nullable integer or float dtype (GH42816) + * Fixed regression in Series.quantile() with Int64Dtype (GH42626) + * Fixed regression in Series.groupby() and DataFrame.groupby() + where supplying the by argument with a Series named with a + tuple would incorrectly raise (GH42731) + * Bug in read_excel() modifies the dtypes dictionary when reading + a file with duplicate columns (GH42462) + * 1D slices over extension types turn into N-dimensional slices + over ExtensionArrays (GH42430) + * Fixed bug in Series.rolling() and DataFrame.rolling() not + calculating window bounds correctly for the first row when + center=True and window is an offset that covers all the rows + (GH42753) + * Styler.hide_columns() now hides the index name header row as + well as column headers (GH42101) + * Styler.set_sticky() has amended CSS to control the column/index + names and ensure the correct sticky positions (GH42537) + * Bug in de-serializing datetime indexes in PYTHONOPTIMIZED mode + (GH42866) + +------------------------------------------------------------------- +Tue Aug 17 09:09:49 UTC 2021 - Fabian Vogt + +- Drop suggests of python-numba (pulls in LLVM10) and python-QtPy + (pulls in Qt3D, python-qt5 is enough) to make the TW DVD fit again + +------------------------------------------------------------------- +Thu Aug 12 13:04:09 UTC 2021 - Ben Greiner + +- Update to version 1.3.1 + Fixed regressions + * Pandas could not be built on PyPy (GH42355) + * DataFrame constructed with an older version of pandas could not + be unpickled (GH42345) + * Performance regression in constructing a DataFrame from a + dictionary of dictionaries (GH42248) + * Fixed regression in DataFrame.agg() dropping values when the + DataFrame had an Extension Array dtype, a duplicate index, and + axis=1 (GH42380) + * Fixed regression in DataFrame.astype() changing the order of + noncontiguous data (GH42396) + * Performance regression in DataFrame in reduction operations + requiring casting such as DataFrame.mean() on integer data + (GH38592) + * Performance regression in DataFrame.to_dict() and Series.to_dict + () when orient argument one of “records”, “dict”, or “split” + (GH42352) + * Fixed regression in indexing with a list subclass incorrectly + raising TypeError (GH42433, GH42461) + * Fixed regression in DataFrame.isin() and Series.isin() raising + TypeError with nullable data containing at least one missing + value (GH42405) + * Regression in concat() between objects with bool dtype and + integer dtype casting to object instead of to integer (GH42092) + * Bug in Series constructor not accepting a dask.Array (GH38645) + * Fixed regression for SettingWithCopyWarning displaying + incorrect stacklevel (GH42570) + * Fixed regression for merge_asof() raising KeyError when one of + the by columns is in the index (GH34488) + * Fixed regression in to_datetime() returning pd.NaT for inputs + that produce duplicated values, when cache=True (GH42259) + * Fixed regression in SeriesGroupBy.value_counts() that resulted + in an IndexError when called on a Series with one row (GH42618) + * Fixed bug in DataFrame.transpose() dropping values when the + DataFrame had an Extension Array dtype and a duplicate index + (GH42380) + * Fixed bug in DataFrame.to_xml() raising KeyError when called + with index=False and an offset index (GH42458) + * Fixed bug in Styler.set_sticky() not handling index names + correctly for single index columns case (GH42537) + * Fixed bug in DataFrame.copy() failing to consolidate blocks in + the result (GH42579) + +------------------------------------------------------------------- +Thu Jul 22 01:54:09 UTC 2021 - Arun Persaud + +- specfile: + * update requirements + * README.rst ->README.md + +- update to version 1.3.0: + * long changelog, see https://pandas.pydata.org/pandas-docs/stable/whatsnew/v1.3.0.html + +- changes from version 1.2.5: + * Fixed regression in concat() between two DataFrame where one has + an Index that is all-None and the other is DatetimeIndex + incorrectly raising (GH40841) + * Fixed regression in DataFrame.sum() and DataFrame.prod() when + min_count and numeric_only are both given (GH41074) + * Fixed regression in read_csv() when using memory_map=True with an + non-UTF8 encoding (GH40986) + * Fixed regression in DataFrame.replace() and Series.replace() when + the values to replace is a NumPy float array (GH40371) + * Fixed regression in ExcelFile() when a corrupt file is opened but + not closed (GH41778) + * Fixed regression in DataFrame.astype() with dtype=str failing to + convert NaN in categorical columns (GH41797) +- Unpack some files required for testing + +------------------------------------------------------------------- +Mon May 3 01:33:01 UTC 2021 - Arun Persaud + +- update to version 1.2.4: + * Fixed regressions + + Fixed regression in DataFrame.sum() when min_count greater than + the DataFrame shape was passed resulted in a ValueError + (GH39738) + + Fixed regression in DataFrame.to_json() raising AttributeError + when run on PyPy (GH39837) + + Fixed regression in (in)equality comparison of pd.NaT with a + non-datetimelike numpy array returning a scalar instead of an + array (GH40722) + + Fixed regression in DataFrame.where() not returning a copy in + the case of an all True condition (GH39595) + + Fixed regression in DataFrame.replace() raising IndexError when + regex was a multi-key dictionary (GH39338) + + Fixed regression in repr of floats in an object column not + respecting float_format when printed in the console or outputted + through DataFrame.to_string(), DataFrame.to_html(), and + DataFrame.to_latex() (GH40024) + + Fixed regression in NumPy ufuncs such as np.add not passing + through all arguments for DataFrame (GH40662) + +------------------------------------------------------------------- +Wed Mar 3 19:10:52 UTC 2021 - Arun Persaud + +- update to version 1.2.3: + * Fixed regressions + + Fixed regression in to_excel() raising KeyError when giving + duplicate columns with columns attribute (GH39695) + + Fixed regression in nullable integer unary ops propagating mask + on assignment (GH39943) + + Fixed regression in DataFrame.__setitem__() not aligning + DataFrame on right-hand side for boolean indexer (GH39931) + + Fixed regression in to_json() failing to use compression with + URL-like paths that are internally opened in binary mode or with + user-provided file objects that are opened in binary mode + (GH39985) + + Fixed regression in Series.sort_index() and + DataFrame.sort_index(), which exited with an ungraceful error + when having kwarg ascending=None passed. Passing ascending=None + is still considered invalid, and the improved error message + suggests a proper usage (ascending must be a boolean or a + list-like of boolean) (GH39434) + + Fixed regression in DataFrame.transform() and Series.transform() + giving incorrect column labels when passed a dictionary with a + mix of list and non-list values (GH40018) + +------------------------------------------------------------------- +Sun Feb 14 20:53:06 UTC 2021 - Ben Greiner + +- Update to version 1.2.2 + * https://pandas.pydata.org/docs/whatsnew/v1.2.2.html + * fixed regressions and bugfixes +- Update to version 1.2.1 + * https://pandas.pydata.org/docs/whatsnew/v1.2.1.html + * fixed regressions and bugfixes + * Calling NumPy ufuncs on non-aligned DataFrames + * The deprecated attributes _AXIS_NAMES and _AXIS_NUMBERS of + DataFrame and Series will no longer show up in dir or inspect. + getmembers calls (GH38740) + * Bumped minimum fastparquet version to 0.4.0 to avoid + AttributeError from numba (GH38344) + * Bumped minimum pymysql version to 0.8.1 to avoid test failures + (GH38344) + * Added reference to backwards incompatible check_freq arg of + testing.assert_frame_equal() and testing.assert_series_equal() + in pandas 1.1.0 whats new (GH34050) +- Update to version 1.2.0 + * https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + * WARNING: + The xlwt package for writing old-style .xls excel files is + no longer maintained. The xlrd package is now only for reading + old-style .xls files. + Previously, the default argument engine=None to read_excel() + would result in using the xlrd engine in many cases, including + new Excel 2007+ (.xlsx) files. If openpyxl is installed, many + of these cases will now default to using the openpyxl engine. + See the read_excel() documentation for more details. + Thus, it is strongly encouraged to install openpyxl to read + Excel 2007+ (.xlsx) files. Please do not report issues when + using ``xlrd`` to read ``.xlsx`` files. This is no longer + supported, switch to using openpyxl instead. + Attempting to use the xlwt engine will raise a FutureWarning + unless the option io.excel.xls.writer is set to "xlwt". While + this option is now deprecated and will also raise a + FutureWarning, it can be globally set and the warning + suppressed. Users are recommended to write .xlsx files using + the openpyxl engine instead. + Enhancements + * Optionally disallow duplicate labels + * Passing arguments to fsspec backends + * Support for binary file handles in to_csv + * Support for short caption and table position in to_latex + * Change in default floating precision for read_csv and + read_table + * Experimental nullable data types for float data + * Index/column name preservation when aggregating + * GroupBy supports EWM operations directly + Deprecations + * https://pandas.pydata.org/docs/whatsnew/v1.2.0.html#deprecations +- Skip python36 build: New minimum supported Python is 3.7.1 +- Only Suggest instead of Recommend optional dependencies. Nobody + wants to pull in all of those packages by default. +- Remove pandas-pytest.ini +- Rework test deselection +- Limit to 4 pytest-xdist workers, as collection consumes a lot of + memory + +------------------------------------------------------------------- +Fri Oct 30 22:30:53 UTC 2020 - Arun Persaud + +- update to version 1.1.4: + * Fixed regressions + + Fixed regression in read_csv() raising a ValueError when names + was of type dict_keys (GH36928) + + Fixed regression in read_csv() with more than 1M rows and + specifying a index_col argument (GH37094) + + Fixed regression where attempting to mutate a DateOffset object + would no longer raise an AttributeError (GH36940) + + Fixed regression where DataFrame.agg() would fail with TypeError + when passed positional arguments to be passed on to the + aggregation function (GH36948). + + Fixed regression in RollingGroupby with sort=False not being + respected (GH36889) + + Fixed regression in Series.astype() converting None to "nan" + when casting to string (GH36904) + + Fixed regression in Series.rank() method failing for read-only + data (GH37290) + + Fixed regression in RollingGroupby causing a segmentation fault + with Index of dtype object (GH36727) + + Fixed regression in DataFrame.resample(...).apply(...)() raised + AttributeError when input was a DataFrame and only a Series was + evaluated (GH36951) + + Fixed regression in DataFrame.groupby(..).std() with nullable + integer dtype (GH37415) + + Fixed regression in PeriodDtype comparing both equal and unequal + to its string representation (GH37265) + + Fixed regression where slicing DatetimeIndex raised + AssertionError on irregular time series with pd.NaT or on + unsorted indices (GH36953 and GH35509) + + Fixed regression in certain offsets (pd.offsets.Day() and below) + no longer being hashable (GH37267) + + Fixed regression in StataReader which required chunksize to be + manually set when using an iterator to read a dataset (GH37280) + + Fixed regression in setitem with DataFrame.iloc() which raised + error when trying to set a value while filtering with a boolean + list (GH36741) + + Fixed regression in setitem with a Series getting aligned before + setting the values (GH37427) + + Fixed regression in MultiIndex.is_monotonic_increasing returning + wrong results with NaN in at least one of the levels (GH37220) + + Fixed regression in inplace arithmetic operation on a Series not + updating the parent DataFrame (GH36373) + * Bug fixes + + Bug causing groupby(...).sum() and similar to not preserve + metadata (GH29442) + + Bug in Series.isin() and DataFrame.isin() raising a ValueError + when the target was read-only (GH37174) + + Bug in GroupBy.fillna() that introduced a performance regression + after 1.0.5 (GH36757) + + Bug in DataFrame.info() was raising a KeyError when the + DataFrame has integer column names (GH37245) + + Bug in DataFrameGroupby.apply() would drop a CategoricalIndex + when grouped on (GH35792) + +------------------------------------------------------------------- +Mon Oct 5 20:11:59 UTC 2020 - Arun Persaud + +- specfile: + * updated cython version + +- update to version 1.1.3: + * Development Changes + + The minimum version of Cython is now the most recent bug-fix + version (0.29.21) (GH36296). + * Fixed regressions + + Fixed regression in DataFrame.agg(), DataFrame.apply(), + Series.agg(), and Series.apply() where internal suffix is + exposed to the users when no relabelling is applied (GH36189) + + Fixed regression in IntegerArray unary plus and minus operations + raising a TypeError (GH36063) + + Fixed regression when adding a timedelta_range() to a Timestamp + raised a ValueError (GH35897) + + Fixed regression in Series.__getitem__() incorrectly raising + when the input was a tuple (GH35534) + + Fixed regression in Series.__getitem__() incorrectly raising + when the input was a frozenset (GH35747) + + Fixed regression in modulo of Index, Series and DataFrame using + numexpr using C not Python semantics (GH36047, GH36526) + + Fixed regression in read_excel() with engine="odf" caused + UnboundLocalError in some cases where cells had nested child + nodes (GH36122, GH35802) + + Fixed regression in DataFrame.replace() inconsistent replace + when using a float in the replace method (GH35376) + + Fixed regression in Series.loc() on a Series with a MultiIndex + containing Timestamp raising InvalidIndexError (GH35858) + + Fixed regression in DataFrame and Series comparisons between + numeric arrays and strings (GH35700, GH36377) + + Fixed regression in DataFrame.apply() with raw=True and + user-function returning string (GH35940) + + Fixed regression when setting empty DataFrame column to a Series + in preserving name of index in frame (GH36527) + + Fixed regression in Period incorrect value for ordinal over the + maximum timestamp (GH36430) + + Fixed regression in read_table() raised ValueError when + delim_whitespace was set to True (GH35958) + + Fixed regression in Series.dt.normalize() when normalizing + pre-epoch dates the result was shifted one day (GH36294) + * Bug fixes + + Bug in read_spss() where passing a pathlib.Path as path would + raise a TypeError (GH33666) + + Bug in Series.str.startswith() and Series.str.endswith() with + category dtype not propagating na parameter (GH36241) + + Bug in Series constructor where integer overflow would occur for + sufficiently large scalar inputs when an index was provided + (GH36291) + + Bug in DataFrame.sort_values() raising an AttributeError when + sorting on a key that casts column to categorical dtype + (GH36383) + + Bug in DataFrame.stack() raising a ValueError when stacking + MultiIndex columns based on position when the levels had + duplicate names (GH36353) + + Bug in Series.astype() showing too much precision when casting + from np.float32 to string dtype (GH36451) + + Bug in Series.isin() and DataFrame.isin() when using NaN and a + row length above 1,000,000 (GH22205) + + Bug in cut() raising a ValueError when passed a Series of labels + with ordered=False (GH36603) + * Other + + Reverted enhancement added in pandas-1.1.0 where + timedelta_range() infers a frequency when passed start, stop, + and periods (GH32377) + +------------------------------------------------------------------- +Sat Sep 12 19:56:08 UTC 2020 - Arun Persaud + +- update to version 1.1.2: + * Fixed regressions + + Regression in DatetimeIndex.intersection() incorrectly raising + AssertionError when intersecting against a list (GH35876) + + Fix regression in updating a column inplace (e.g. using + df['col'].fillna(.., inplace=True)) (GH35731) + + Fix regression in DataFrame.append() mixing tz-aware and + tz-naive datetime columns (GH35460) + + Performance regression for RangeIndex.format() (GH35712) + + Regression where MultiIndex.get_loc() would return a slice + spanning the full index when passed an empty list (GH35878) + + Fix regression in invalid cache after an indexing operation; + this can manifest when setting which does not update the data + (GH35521) + + Regression in DataFrame.replace() where a TypeError would be + raised when attempting to replace elements of type Interval + (GH35931) + + Fix regression in pickle roundtrip of the closed attribute of + IntervalIndex (GH35658) + + Fixed regression in DataFrameGroupBy.agg() where a ValueError: + buffer source array is read-only would be raised when the + underlying array is read-only (GH36014) + + Fixed regression in Series.groupby.rolling() number of levels of + MultiIndex in input was compressed to one (GH36018) + + Fixed regression in DataFrameGroupBy on an empty DataFrame + (GH36197) + * Bug fixes + + Bug in DataFrame.eval() with object dtype column binary + operations (GH35794) + + Bug in Series constructor raising a TypeError when constructing + sparse datetime64 dtypes (GH35762) + + Bug in DataFrame.apply() with result_type="reduce" returning + with incorrect index (GH35683) + + Bug in Series.astype() and DataFrame.astype() not respecting the + errors argument when set to "ignore" for extension dtypes + (GH35471) + + Bug in DateTimeIndex.format() and PeriodIndex.format() with + name=True setting the first item to "None" where it should be "" + (GH35712) + + Bug in Float64Index.__contains__() incorrectly raising TypeError + instead of returning False (GH35788) + + Bug in Series constructor incorrectly raising a TypeError when + passed an ordered set (GH36044) + + Bug in Series.dt.isocalendar() and DatetimeIndex.isocalendar() + that returned incorrect year for certain dates (GH36032) + + Bug in DataFrame indexing returning an incorrect Series in some + cases when the series has been altered and a cache not + invalidated (GH33675) + + Bug in DataFrame.corr() causing subsequent indexing lookups to + be incorrect (GH35882) + + Bug in import_optional_dependency() returning incorrect package + names in cases where package name is different from import name + (GH35948) + + Bug when setting empty DataFrame column to a Series in + preserving name of index in frame (GH31368) + * Other + + factorize() now supports na_sentinel=None to include NaN in the + uniques of the values and remove dropna keyword which was + unintentionally exposed to public facing API in 1.1 version from + factorize() (GH35667) + + DataFrame.plot() and Series.plot() raise UserWarning about usage + of FixedFormatter and FixedLocator (GH35684 and GH35945) + +------------------------------------------------------------------- +Sat Sep 5 15:42:53 UTC 2020 - Arun Persaud + +- specfile: + * updated versions of some requirements, require numpy during build + * removed pandas-pr34991-npconstructor.patch, included upstream + * removed sed commands that are not needed anymore + * skip test to see if pandas is installed + +- update to version 1.1.1: + * Fixed regressions + + Fixed regression in CategoricalIndex.format() where, when + stringified scalars had different lengths, the shorter string + would be right-filled with spaces, so it had the same length as + the longest string (GH35439) + + Fixed regression in Series.truncate() when trying to truncate a + single-element series (GH35544) + + Fixed regression where DataFrame.to_numpy() would raise a + RuntimeError for mixed dtypes when converting to str (GH35455) + + Fixed regression where read_csv() would raise a ValueError when + pandas.options.mode.use_inf_as_na was set to True (GH35493) + + Fixed regression where pandas.testing.assert_series_equal() + would raise an error when non-numeric dtypes were passed with + check_exact=True (GH35446) + + Fixed regression in .groupby(..).rolling(..) where column + selection was ignored (GH35486) + + Fixed regression where DataFrame.interpolate() would raise a + TypeError when the DataFrame was empty (GH35598) + + Fixed regression in DataFrame.shift() with axis=1 and + heterogeneous dtypes (GH35488) + + Fixed regression in DataFrame.diff() with read-only data + (GH35559) + + Fixed regression in .groupby(..).rolling(..) where a segfault + would occur with center=True and an odd number of values + (GH35552) + + Fixed regression in DataFrame.apply() where functions that + altered the input in-place only operated on a single row + (GH35462) + + Fixed regression in DataFrame.reset_index() would raise a + ValueError on empty DataFrame with a MultiIndex with a + datetime64 dtype level (GH35606, GH35657) + + Fixed regression where pandas.merge_asof() would raise a + UnboundLocalError when left_index, right_index and tolerance + were set (GH35558) + + Fixed regression in .groupby(..).rolling(..) where a custom + BaseIndexer would be ignored (GH35557) + + Fixed regression in DataFrame.replace() and Series.replace() + where compiled regular expressions would be ignored during + replacement (GH35680) + + Fixed regression in aggregate() where a list of functions would + produce the wrong results if at least one of the functions did + not aggregate (GH35490) + + Fixed memory usage issue when instantiating large + pandas.arrays.StringArray (GH35499) + * Bug fixes + + Bug in Styler whereby cell_ids argument had no effect due to + other recent changes (GH35588) (GH35663) + + Bug in pandas.testing.assert_series_equal() and + pandas.testing.assert_frame_equal() where extension dtypes were + not ignored when check_dtypes was set to False (GH35715) + + Bug in to_timedelta() fails when arg is a Series with Int64 + dtype containing null values (GH35574) + + Bug in .groupby(..).rolling(..) where passing closed with column + selection would raise a ValueError (GH35549) + + Bug in DataFrame constructor failing to raise ValueError in some + cases when data and index have mismatched lengths (GH33437) + +- changes from version 1.1.0: + * Enhancements + + KeyErrors raised by loc specify missing labels + + All dtypes can now be converted to "StringDtype" + + Non-monotonic PeriodIndex Partial String Slicing + + Comparing two `DataFrame` or two `Series` and summarizing the + differences + + Allow NA in groupby key + + Sorting with keys + + Fold argument support in Timestamp constructor + + Parsing timezone-aware format with different timezones in + to_datetime + + Grouper and resample now supports the arguments origin and + offset + + fsspec now used for filesystem handling + * see + https://pandas.pydata.org/pandas-docs/stable/whatsnew/v1.1.0.html + for complete list + +------------------------------------------------------------------- +Wed Jul 22 10:04:49 UTC 2020 - Benjamin Greiner + +- support newest numpy by removing old test + gh#pandas-dev/pandas#34991 pandas-pr34991-npconstructor.patch +- move testing to multibuild flavor +- run slow tests only on x86_64 +- replace gcc10-skip-one-test.patch with pytest -k deselection +- tidy SKIP_TESTS declarations +- add pandas-pytest.ini as pytest.ini in order to support the + custom marks and filter some warnings +- remove random hash seed + +------------------------------------------------------------------- +Tue Jun 30 13:03:14 UTC 2020 - Matej Cepl + +- Skip test_raw_roundtrip on i586 + +------------------------------------------------------------------- +Wed Jun 24 01:52:29 UTC 2020 - Todd R + +- Update to version 1.0.5 + * Fixed regressions + + Fix regression in read_parquet() when reading from file-like objects (GH34467). + + Fix regression in reading from public S3 buckets (GH34626). + Note this disables the ability to read Parquet files from + directories on S3 again (GH26388, GH34632), which was added + in the 1.0.4 release, but is now targeted for pandas 1.1.0. + + Fixed regression in replace() raising an AssertionError when replacing values in an extension dtype with values of a different dtype (GH34530) + * Bug fixes + + Fixed building from source with Python 3.8 fetching the wrong version of NumPy + +------------------------------------------------------------------- +Sat May 30 23:39:38 UTC 2020 - Arun Persaud + +- update to version 1.0.4: + * Fixed regressions + + Fix regression where :meth:`Series.isna` and + :meth:`DataFrame.isna` would raise for categorical dtype when + pandas.options.mode.use_inf_as_na was set to True + (:issue:`33594`) + + Fix regression in :meth:`GroupBy.first` and :meth:`GroupBy.last` + where None is not preserved in object dtype (:issue:`32800`) + + Fix regression in DataFrame reductions using numeric_only=True + and ExtensionArrays (:issue:`33256`). + + Fix performance regression in memory_usage(deep=True) for object + dtype (:issue:`33012`) + + Fix regression where :meth:`Categorical.replace` would replace + with NaN whenever the new value and replacement value were equal + (:issue:`33288`) + + Fix regression where an ordered :class:`Categorical` containing + only NaN values would raise rather than returning NaN when + taking the minimum or maximum (:issue:`33450`) + + Fix regression in :meth:`DataFrameGroupBy.agg` with dictionary + input losing ExtensionArray dtypes (:issue:`32194`) + + Fix to preserve the ability to index with the "nearest" method + with xarray's CFTimeIndex, an :class:`Index` subclass + (pydata/xarray#3751, :issue:`32905`). + + Fix regression in :meth:`DataFrame.describe` raising TypeError: + unhashable type: 'dict' (:issue:`32409`) + + Fix regression in :meth:`DataFrame.replace` casts columns to + object dtype if items in to_replace not in values + (:issue:`32988`) + + Fix regression in :meth:`Series.groupby` would raise ValueError + when grouping by :class:`PeriodIndex` level (:issue:`34010`) + + Fix regression in :meth:`GroupBy.rolling.apply` ignores args and + kwargs parameters (:issue:`33433`) + + Fix regression in error message with np.min or np.max on + unordered :class:`Categorical` (:issue:`33115`) + + Fix regression in :meth:`DataFrame.loc` and :meth:`Series.loc` + throwing an error when a datetime64[ns, tz] value is provided + (:issue:`32395`) + * Bug fixes + + Bug in :meth:`SeriesGroupBy.first`, :meth:`SeriesGroupBy.last`, + :meth:`SeriesGroupBy.min`, and :meth:`SeriesGroupBy.max` + returning floats when applied to nullable Booleans + (:issue:`33071`) + + Bug in :meth:`Rolling.min` and :meth:`Rolling.max`: Growing + memory usage after multiple calls when using a fixed window + (:issue:`30726`) + + Bug in :meth:`~DataFrame.to_parquet` was not raising + PermissionError when writing to a private s3 bucket with invalid + creds. (:issue:`27679`) + + Bug in :meth:`~DataFrame.to_csv` was silently failing when + writing to an invalid s3 bucket. (:issue:`32486`) + + Bug in :meth:`read_parquet` was raising a FileNotFoundError when + passed an s3 directory path. (:issue:`26388`) + + Bug in :meth:`~DataFrame.to_parquet` was throwing an + AttributeError when writing a partitioned parquet file to s3 + (:issue:`27596`) + + Bug in :meth:`GroupBy.quantile` causes the quantiles to be + shifted when the by axis contains NaN (:issue:`33200`, + :issue:`33569`) + +------------------------------------------------------------------- +Mon May 25 20:21:59 UTC 2020 - Martin Liška + +- Add gcc10-skip-one-test.patch in order to fix a failing test-case + on i586. + +------------------------------------------------------------------- +Sat Mar 28 16:42:49 UTC 2020 - Arun Persaud + +- update to 1.0.3: + * Fixed regressions + + Fixed regression in resample.agg when the underlying data is + non-writeable (GH31710) + + Fixed regression in DataFrame exponentiation with reindexing + (GH32685) +- Increase memory _constraints to 8GB RAM. + +------------------------------------------------------------------- +Mon Mar 16 07:12:34 UTC 2020 - Tomáš Chvátal + +- Skip i586 failing tests with upstream ticket + +------------------------------------------------------------------- +Fri Mar 13 00:13:11 UTC 2020 - Hans-Peter Jansen + +- Update to 1.0.2: + * see https://pandas.pydata.org/pandas-docs/stable/whatsnew/v1.0.2.html +- Add pyperclip and Jinja2 as test dependencies + +------------------------------------------------------------------- +Mon Mar 9 15:19:33 UTC 2020 - Dirk Mueller + +- Update to 1.0.1: + * see https://pandas.pydata.org/pandas-docs/stable/whatsnew/v1.0.1.html + * see https://pandas.pydata.org/pandas-docs/stable/whatsnew/v1.0.0.html + +------------------------------------------------------------------- +Tue Jan 14 12:28:49 UTC 2020 - Tomáš Chvátal + +- Skip one test that fails on 32bit: test_encode_non_c_locale + +------------------------------------------------------------------- +Mon Nov 11 01:59:25 UTC 2019 - Steve Kowalik + +- Update to version 0.25.3 + + Support Python 3.8 + + Bug fixes + > Indexing + * Fix regression in DataFrame.reindex() not following the limit argument + * Fix regression in RangeIndex.get_indexer() for decreasing RangeIndex + where target values may be improperly identified as missing/present + > I/O + * Fix regression in notebook display where tags were missing for + DataFrame.index values + * Regression in to_csv() where writing a Series or DataFrame indexed by + an IntervalIndex would incorrectly raise a TypeError + * Fix to_csv() with ExtensionArray with list-like values + > Groupby/resample/rolling + * Bug incorrectly raising an IndexError when passing a list of quantiles + to pandas.core.groupby.DataFrameGroupBy.quantile() + * Bug in pandas.core.groupby.GroupBy.shift(), + pandas.core.groupby.GroupBy.bfill() and + pandas.core.groupby.GroupBy.ffill() where timezone information would + be dropped + * Bug in DataFrameGroupBy.quantile() where NA values in the grouping + could cause segfaults or incorrect results + +------------------------------------------------------------------- +Fri Sep 20 09:40:08 UTC 2019 - Tomáš Chvátal + +- Use xdist to run tests in threads, it takes ages otherwise + +------------------------------------------------------------------- +Wed Aug 28 15:32:47 UTC 2019 - Todd R + +- Update to version 0.25.1 + + Bug fixes + > Categorical + * Bug in :meth:`Categorical.fillna` that would replace all values, not just those that are ``NaN`` + > Datetimelike + * Bug in :func:`to_datetime` where passing a timezone-naive :class:`DatetimeArray` or :class:`DatetimeIndex` and ``utc=True`` would incorrectly return a timezone-naive result + * Bug in :meth:`Period.to_timestamp` where a :class:`Period` outside the :class:`Timestamp` implementation bounds (roughly 1677-09-21 to 2262-04-11) would return an incorrect :class:`Timestamp` instead of raising ``OutOfBoundsDatetime`` + * Bug in iterating over :class:`DatetimeIndex` when the underlying data is read-only + > Timezones + * Bug in :class:`Index` where a numpy object array with a timezone aware :class:`Timestamp` and ``np.nan`` would not return a :class:`DatetimeIndex` + > Numeric + * Bug in :meth:`Series.interpolate` when using a timezone aware :class:`DatetimeIndex` + * Bug when printing negative floating point complex numbers would raise an ``IndexError`` + * Bug where :class:`DataFrame` arithmetic operators such as :meth:`DataFrame.mul` with a :class:`Series` with axis=1 would raise an ``AttributeError`` on :class:`DataFrame` larger than the minimum threshold to invoke numexpr + * Bug in :class:`DataFrame` arithmetic where missing values in results were incorrectly masked with ``NaN`` instead of ``Inf`` + > Conversion + * Improved the warnings for the deprecated methods :meth:`Series.real` and :meth:`Series.imag` + > Interval + * Bug in :class:`IntervalIndex` where `dir(obj)` would raise ``ValueError`` + > Indexing + * Bug in partial-string indexing returning a NumPy array rather than a ``Series`` when indexing with a scalar like ``.loc['2015']`` + * Break reference cycle involving :class:`Index` and other index classes to allow garbage collection of index objects without running the GC. + * Fix regression in assigning values to a single column of a DataFrame with a ``MultiIndex`` columns. + * Fix regression in ``.ix`` fallback with an ``IntervalIndex``. + > Missing + * Bug in :func:`pandas.isnull` or :func:`pandas.isna` when the input is a type e.g. ``type(pandas.Series())`` + > I/O + * Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 + * Better error message when a negative header is passed in :func:`pandas.read_csv` + * Follow the ``min_rows`` display option (introduced in v0.25.0) correctly in the HTML repr in the notebook. + > Plotting + * Added a ``pandas_plotting_backends`` entrypoint group for registering plot backends. See :ref:`extending.plotting-backends` for more. + * Fixed the re-instatement of Matplotlib datetime converters after calling + :meth:`pandas.plotting.deregister_matplotlib_converters`. + * Fix compatibility issue with matplotlib when passing a pandas ``Index`` to a plot call. + > Groupby/resample/rolling + * Fixed regression in :meth:`pands.core.groupby.DataFrameGroupBy.quantile` raising when multiple quantiles are given + * Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information + * Bug in :meth:`pandas.core.groupby.GroupBy.nth` where ``observed=False`` was being ignored for Categorical groupers + * Bug in windowing over read-only arrays + * Fixed segfault in `pandas.core.groupby.DataFrameGroupBy.quantile` when an invalid quantile was passed + > Reshaping + * A ``KeyError`` is now raised if ``.unstack()`` is called on a :class:`Series` or :class:`DataFrame` with a flat :class:`Index` passing a name which is not the correct one + * Bug :meth:`merge_asof` could not merge :class:`Timedelta` objects when passing `tolerance` kwarg + * Bug in :meth:`DataFrame.crosstab` when ``margins`` set to ``True`` and ``normalize`` is not ``False``, an error is raised. + * :meth:`DataFrame.join` now suppresses the ``FutureWarning`` when the sort parameter is specified + * Bug in :meth:`DataFrame.join` raising with readonly arrays + > Sparse + * Bug in reductions for :class:`Series` with Sparse dtypes + > Other + * Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when replacing timezone-aware timestamps using a dict-like replacer + * Bug in :meth:`Series.rename` when using a custom type indexer. Now any value that isn't callable or dict-like is treated as a scalar. + +------------------------------------------------------------------- +Mon Jul 22 15:36:34 UTC 2019 - Todd R + +- Update to Version 0.25.0 + + Warning + * Starting with the 0.25.x series of releases, pandas only supports Python 3.5.3 and higher. + * The minimum supported Python version will be bumped to 3.6 in a future release. + * Panel has been fully removed. For N-D labeled data structures, please + use xarray + * read_pickle read_msgpack are only guaranteed backwards compatible back to + pandas version 0.20.3 + + Enhancements + * Groupby aggregation with relabeling + Pandas has added special groupby behavior, known as "named aggregation", for naming the + output columns when applying multiple aggregation functions to specific columns. + * Groupby Aggregation with multiple lambdas + You can now provide multiple lambda functions to a list-like aggregation in + pandas.core.groupby.GroupBy.agg. + * Better repr for MultiIndex + Printing of MultiIndex instances now shows tuples of each row and ensures + that the tuple items are vertically aligned, so it's now easier to understand + the structure of the MultiIndex. + * Shorter truncated repr for Series and DataFrame + Currently, the default display options of pandas ensure that when a Series + or DataFrame has more than 60 rows, its repr gets truncated to this maximum + of 60 rows (the display.max_rows option). However, this still gives + a repr that takes up a large part of the vertical screen estate. Therefore, + a new option display.min_rows is introduced with a default of 10 which + determines the number of rows showed in the truncated repr: + * Json normalize with max_level param support + json_normalize normalizes the provided input dict to all + nested levels. The new max_level parameter provides more control over + which level to end normalization. + * Series.explode to split list-like values to rows + Series and DataFrame have gained the DataFrame.explode methods to transform + list-likes to individual rows. + * DataFrame.plot keywords logy, logx and loglog can now accept the value 'sym' for symlog scaling. + * Added support for ISO week year format ('%G-%V-%u') when parsing datetimes using to_datetime + * Indexing of DataFrame and Series now accepts zerodim np.ndarray + * Timestamp.replace now supports the fold argument to disambiguate DST transition times + * DataFrame.at_time and Series.at_time now support datetime.time objects with timezones + * DataFrame.pivot_table now accepts an observed parameter which is passed to underlying calls to DataFrame.groupby to speed up grouping categorical data. + * Series.str has gained Series.str.casefold method to removes all case distinctions present in a string + * DataFrame.set_index now works for instances of abc.Iterator, provided their output is of the same length as the calling frame + * DatetimeIndex.union now supports the sort argument. The behavior of the sort parameter matches that of Index.union + * RangeIndex.union now supports the sort argument. If sort=False an unsorted Int64Index is always returned. sort=None is the default and returns a monotonically increasing RangeIndex if possible or a sorted Int64Index if not + * TimedeltaIndex.intersection now also supports the sort keyword + * DataFrame.rename now supports the errors argument to raise errors when attempting to rename nonexistent keys + * Added api.frame.sparse for working with a DataFrame whose values are sparse + * RangeIndex has gained ~RangeIndex.start, ~RangeIndex.stop, and ~RangeIndex.step attributes + * datetime.timezone objects are now supported as arguments to timezone methods and constructors + * DataFrame.query and DataFrame.eval now supports quoting column names with backticks to refer to names with spaces + * merge_asof now gives a more clear error message when merge keys are categoricals that are not equal + * pandas.core.window.Rolling supports exponential (or Poisson) window type + * Error message for missing required imports now includes the original import error's text + * DatetimeIndex and TimedeltaIndex now have a mean method + * DataFrame.describe now formats integer percentiles without decimal point + * Added support for reading SPSS .sav files using read_spss + * Added new option plotting.backend to be able to select a plotting backend different than the existing matplotlib one. Use pandas.set_option('plotting.backend', '') where for more details + * Interval, IntervalIndex, and ~arrays.IntervalArray have gained an ~Interval.is_empty attribute denoting if the given interval(s) are empty + + Backwards incompatible API changes + * Indexing with date strings with UTC offsets + Indexing a DataFrame or Series with a DatetimeIndex with a + date string with a UTC offset would previously ignore the UTC offset. Now, the UTC offset + is respected in indexing. + * MultiIndex constructed from levels and codes + Constructing a MultiIndex with NaN levels or codes value < -1 was allowed previously. + Now, construction with codes value < -1 is not allowed and NaN levels' corresponding codes + would be reassigned as -1. + * Groupby.apply on DataFrame evaluates first group only once + The implementation of DataFrameGroupBy.apply() + previously evaluated the supplied function consistently twice on the first group + to infer if it is safe to use a fast code path. Particularly for functions with + side effects, this was an undesired behavior and may have led to surprises. + * Concatenating sparse values + When passed DataFrames whose values are sparse, concat will now return a + Series or DataFrame with sparse values, rather than a SparseDataFrame . + * The .str-accessor performs stricter type checks + Due to the lack of more fine-grained dtypes, Series.str so far only checked whether the data was + of object dtype. Series.str will now infer the dtype data *within* the Series; in particular, + 'bytes'-only data will raise an exception (except for Series.str.decode, Series.str.get, + Series.str.len, Series.str.slice). + * Categorical dtypes are preserved during groupby + Previously, columns that were categorical, but not the groupby key(s) would be converted to object dtype during groupby operations. Pandas now will preserve these dtypes. + * Incompatible Index type unions + When performing Index.union operations between objects of incompatible dtypes, + the result will be a base Index of dtype object. This behavior holds true for + unions between Index objects that previously would have been prohibited. The dtype + of empty Index objects will now be evaluated before performing union operations + rather than simply returning the other Index object. Index.union can now be + considered commutative, such that A.union(B) == B.union(A) . + * DataFrame groupby ffill/bfill no longer return group labels + The methods ffill, bfill, pad and backfill of + DataFrameGroupBy + previously included the group labels in the return value, which was + inconsistent with other groupby transforms. Now only the filled values + are returned. + * DataFrame describe on an empty categorical / object column will return top and freq + When calling DataFrame.describe with an empty categorical / object + column, the 'top' and 'freq' columns were previously omitted, which was inconsistent with + the output for non-empty columns. Now the 'top' and 'freq' columns will always be included, + with numpy.nan in the case of an empty DataFrame + * __str__ methods now call __repr__ rather than vice versa + Pandas has until now mostly defined string representations in a Pandas objects's + __str__/__unicode__/__bytes__ methods, and called __str__ from the __repr__ + method, if a specific __repr__ method is not found. This is not needed for Python3. + In Pandas 0.25, the string representations of Pandas objects are now generally + defined in __repr__, and calls to __str__ in general now pass the call on to + the __repr__, if a specific __str__ method doesn't exist, as is standard for Python. + This change is backward compatible for direct usage of Pandas, but if you subclass + Pandas objects *and* give your subclasses specific __str__/__repr__ methods, + you may have to adjust your __str__/__repr__ methods . + * Indexing an IntervalIndex with Interval objects + Indexing methods for IntervalIndex have been modified to require exact matches only for Interval queries. + IntervalIndex methods previously matched on any overlapping Interval. Behavior with scalar points, e.g. querying + with an integer, is unchanged . + * Binary ufuncs on Series now align + Applying a binary ufunc like numpy.power now aligns the inputs + when both are Series . + * Categorical.argsort now places missing values at the end + Categorical.argsort now places missing values at the end of the array, making it + consistent with NumPy and the rest of pandas . + * Column order is preserved when passing a list of dicts to DataFrame + Starting with Python 3.7 the key-order of dict is guaranteed _. In practice, this has been true since + Python 3.6. The DataFrame constructor now treats a list of dicts in the same way as + it does a list of OrderedDict, i.e. preserving the order of the dicts. + This change applies only when pandas is running on Python>=3.6 . + * Increased minimum versions for dependencies + * DatetimeTZDtype will now standardize pytz timezones to a common timezone instance + * Timestamp and Timedelta scalars now implement the to_numpy method as aliases to Timestamp.to_datetime64 and Timedelta.to_timedelta64, respectively. + * Timestamp.strptime will now rise a NotImplementedError + * Comparing Timestamp with unsupported objects now returns :pyNotImplemented instead of raising TypeError. This implies that unsupported rich comparisons are delegated to the other object, and are now consistent with Python 3 behavior for datetime objects + * Bug in DatetimeIndex.snap which didn't preserving the name of the input Index + * The arg argument in pandas.core.groupby.DataFrameGroupBy.agg has been renamed to func + * The arg argument in pandas.core.window._Window.aggregate has been renamed to func + * Most Pandas classes had a __bytes__ method, which was used for getting a python2-style bytestring representation of the object. This method has been removed as a part of dropping Python2 + * The .str-accessor has been disabled for 1-level MultiIndex, use MultiIndex.to_flat_index if necessary + * Removed support of gtk package for clipboards + * Using an unsupported version of Beautiful Soup 4 will now raise an ImportError instead of a ValueError + * Series.to_excel and DataFrame.to_excel will now raise a ValueError when saving timezone aware data. + * ExtensionArray.argsort places NA values at the end of the sorted array. + * DataFrame.to_hdf and Series.to_hdf will now raise a NotImplementedError when saving a MultiIndex with extention data types for a fixed format. + * Passing duplicate names in read_csv will now raise a ValueError + + Deprecations + * Sparse subclasses + The SparseSeries and SparseDataFrame subclasses are deprecated. Their functionality is better-provided + by a Series or DataFrame with sparse values. + * msgpack format + The msgpack format is deprecated as of 0.25 and will be removed in a future version. It is recommended to use pyarrow for on-the-wire transmission of pandas objects. + * The deprecated .ix[] indexer now raises a more visible FutureWarning instead of DeprecationWarning . + * Deprecated the units=M (months) and units=Y (year) parameters for units of pandas.to_timedelta, pandas.Timedelta and pandas.TimedeltaIndex + * pandas.concat has deprecated the join_axes-keyword. Instead, use DataFrame.reindex or DataFrame.reindex_like on the result or on the inputs + * The SparseArray.values attribute is deprecated. You can use np.asarray(...) or + the SparseArray.to_dense method instead . + * The functions pandas.to_datetime and pandas.to_timedelta have deprecated the box keyword. Instead, use to_numpy or Timestamp.to_datetime64 or Timedelta.to_timedelta64. + * The DataFrame.compound and Series.compound methods are deprecated and will be removed in a future version . + * The internal attributes _start, _stop and _step attributes of RangeIndex have been deprecated. + Use the public attributes ~RangeIndex.start, ~RangeIndex.stop and ~RangeIndex.step instead . + * The Series.ftype, Series.ftypes and DataFrame.ftypes methods are deprecated and will be removed in a future version. + Instead, use Series.dtype and DataFrame.dtypes . + * The Series.get_values, DataFrame.get_values, Index.get_values, + SparseArray.get_values and Categorical.get_values methods are deprecated. + One of np.asarray(..) or ~Series.to_numpy can be used instead . + * The 'outer' method on NumPy ufuncs, e.g. np.subtract.outer has been deprecated on Series objects. Convert the input to an array with Series.array first + * Timedelta.resolution is deprecated and replaced with Timedelta.resolution_string. In a future version, Timedelta.resolution will be changed to behave like the standard library datetime.timedelta.resolution + * read_table has been undeprecated. + * Index.dtype_str is deprecated. + * Series.imag and Series.real are deprecated. + * Series.put is deprecated. + * Index.item and Series.item is deprecated. + * The default value ordered=None in ~pandas.api.types.CategoricalDtype has been deprecated in favor of ordered=False. When converting between categorical types ordered=True must be explicitly passed in order to be preserved. + * Index.contains is deprecated. Use key in index (__contains__) instead . + * DataFrame.get_dtype_counts is deprecated. + * Categorical.ravel will return a Categorical instead of a np.ndarray + + Removal of prior version deprecations/changes + * Removed Panel + * Removed the previously deprecated sheetname keyword in read_excel + * Removed the previously deprecated TimeGrouper + * Removed the previously deprecated parse_cols keyword in read_excel + * Removed the previously deprecated pd.options.html.border + * Removed the previously deprecated convert_objects + * Removed the previously deprecated select method of DataFrame and Series + * Removed the previously deprecated behavior of Series treated as list-like in ~Series.cat.rename_categories + * Removed the previously deprecated DataFrame.reindex_axis and Series.reindex_axis + * Removed the previously deprecated behavior of altering column or index labels with Series.rename_axis or DataFrame.rename_axis + * Removed the previously deprecated tupleize_cols keyword argument in read_html, read_csv, and DataFrame.to_csv + * Removed the previously deprecated DataFrame.from.csv and Series.from_csv + * Removed the previously deprecated raise_on_error keyword argument in DataFrame.where and DataFrame.mask + * Removed the previously deprecated ordered and categories keyword arguments in astype + * Removed the previously deprecated cdate_range + * Removed the previously deprecated True option for the dropna keyword argument in SeriesGroupBy.nth + * Removed the previously deprecated convert keyword argument in Series.take and DataFrame.take + + Performance improvements + * Significant speedup in SparseArray initialization that benefits most operations, fixing performance regression introduced in v0.20.0 + * DataFrame.to_stata() is now faster when outputting data with any string or non-native endian columns + * Improved performance of Series.searchsorted. The speedup is especially large when the dtype is + int8/int16/int32 and the searched key is within the integer bounds for the dtype + * Improved performance of pandas.core.groupby.GroupBy.quantile + * Improved performance of slicing and other selected operation on a RangeIndex + * RangeIndex now performs standard lookup without instantiating an actual hashtable, hence saving memory + * Improved performance of read_csv by faster tokenizing and faster parsing of small float numbers + * Improved performance of read_csv by faster parsing of N/A and boolean values + * Improved performance of IntervalIndex.is_monotonic, IntervalIndex.is_monotonic_increasing and IntervalIndex.is_monotonic_decreasing by removing conversion to MultiIndex + * Improved performance of DataFrame.to_csv when writing datetime dtypes + * Improved performance of read_csv by much faster parsing of MM/YYYY and DD/MM/YYYY datetime formats + * Improved performance of nanops for dtypes that cannot store NaNs. Speedup is particularly prominent for Series.all and Series.any + * Improved performance of Series.map for dictionary mappers on categorical series by mapping the categories instead of mapping all values + * Improved performance of IntervalIndex.intersection + * Improved performance of read_csv by faster concatenating date columns without extra conversion to string for integer/float zero and float NaN; by faster checking the string for the possibility of being a date + * Improved performance of IntervalIndex.is_unique by removing conversion to MultiIndex + * Restored performance of DatetimeIndex.__iter__ by re-enabling specialized code path + * Improved performance when building MultiIndex with at least one CategoricalIndex level + * Improved performance by removing the need for a garbage collect when checking for SettingWithCopyWarning + * For to_datetime changed default value of cache parameter to True + * Improved performance of DatetimeIndex and PeriodIndex slicing given non-unique, monotonic data . + * Improved performance of pd.read_json for index-oriented data. + * Improved performance of MultiIndex.shape . + + Bug fixes + > Categorical + * Bug in DataFrame.at and Series.at that would raise exception if the index was a CategoricalIndex + * Fixed bug in comparison of ordered Categorical that contained missing values with a scalar which sometimes incorrectly resulted in True + * Bug in DataFrame.dropna when the DataFrame has a CategoricalIndex containing Interval objects incorrectly raised a TypeError + > Datetimelike + * Bug in to_datetime which would raise an (incorrect) ValueError when called with a date far into the future and the format argument specified instead of raising OutOfBoundsDatetime + * Bug in to_datetime which would raise InvalidIndexError: Reindexing only valid with uniquely valued Index objects when called with cache=True, with arg including at least two different elements from the set {None, numpy.nan, pandas.NaT} + * Bug in DataFrame and Series where timezone aware data with dtype='datetime64[ns] was not cast to naive + * Improved Timestamp type checking in various datetime functions to prevent exceptions when using a subclassed datetime + * Bug in Series and DataFrame repr where np.datetime64('NaT') and np.timedelta64('NaT') with dtype=object would be represented as NaN + * Bug in to_datetime which does not replace the invalid argument with NaT when error is set to coerce + * Bug in adding DateOffset with nonzero month to DatetimeIndex would raise ValueError + * Bug in to_datetime which raises unhandled OverflowError when called with mix of invalid dates and NaN values with format='%Y%m%d' and error='coerce' + * Bug in isin for datetimelike indexes; DatetimeIndex, TimedeltaIndex and PeriodIndex where the levels parameter was ignored. + * Bug in to_datetime which raises TypeError for format='%Y%m%d' when called for invalid integer dates with length >= 6 digits with errors='ignore' + * Bug when comparing a PeriodIndex against a zero-dimensional numpy array + * Bug in constructing a Series or DataFrame from a numpy datetime64 array with a non-ns unit and out-of-bound timestamps generating rubbish data, which will now correctly raise an OutOfBoundsDatetime error . + * Bug in date_range with unnecessary OverflowError being raised for very large or very small dates + * Bug where adding Timestamp to a np.timedelta64 object would raise instead of returning a Timestamp + * Bug where comparing a zero-dimensional numpy array containing a np.datetime64 object to a Timestamp would incorrect raise TypeError + * Bug in to_datetime which would raise ValueError: Tz-aware datetime.datetime cannot be converted to datetime64 unless utc=True when called with cache=True, with arg including datetime strings with different offset + > Timedelta + * Bug in TimedeltaIndex.intersection where for non-monotonic indices in some cases an empty Index was returned when in fact an intersection existed + * Bug with comparisons between Timedelta and NaT raising TypeError + * Bug when adding or subtracting a BusinessHour to a Timestamp with the resulting time landing in a following or prior day respectively + * Bug when comparing a TimedeltaIndex against a zero-dimensional numpy array + > Timezones + * Bug in DatetimeIndex.to_frame where timezone aware data would be converted to timezone naive data + * Bug in to_datetime with utc=True and datetime strings that would apply previously parsed UTC offsets to subsequent arguments + * Bug in Timestamp.tz_localize and Timestamp.tz_convert does not propagate freq + * Bug in Series.at where setting Timestamp with timezone raises TypeError + * Bug in DataFrame.update when updating with timezone aware data would return timezone naive data + * Bug in to_datetime where an uninformative RuntimeError was raised when passing a naive Timestamp with datetime strings with mixed UTC offsets + * Bug in to_datetime with unit='ns' would drop timezone information from the parsed argument + * Bug in DataFrame.join where joining a timezone aware index with a timezone aware column would result in a column of NaN + * Bug in date_range where ambiguous or nonexistent start or end times were not handled by the ambiguous or nonexistent keywords respectively + * Bug in DatetimeIndex.union when combining a timezone aware and timezone unaware DatetimeIndex + * Bug when applying a numpy reduction function (e.g. numpy.minimum) to a timezone aware Series + > Numeric + * Bug in to_numeric in which large negative numbers were being improperly handled + * Bug in to_numeric in which numbers were being coerced to float, even though errors was not coerce + * Bug in to_numeric in which invalid values for errors were being allowed + * Bug in format in which floating point complex numbers were not being formatted to proper display precision and trimming + * Bug in error messages in DataFrame.corr and Series.corr. Added the possibility of using a callable. + * Bug in Series.divmod and Series.rdivmod which would raise an (incorrect) ValueError rather than return a pair of Series objects as result + * Raises a helpful exception when a non-numeric index is sent to interpolate with methods which require numeric index. + * Bug in ~pandas.eval when comparing floats with scalar operators, for example: x < -0.1 + * Fixed bug where casting all-boolean array to integer extension array failed + * Bug in divmod with a Series object containing zeros incorrectly raising AttributeError + * Inconsistency in Series floor-division (//) and divmod filling positive//zero with NaN instead of Inf + > Conversion + * Bug in DataFrame.astype() when passing a dict of columns and types the errors parameter was ignored. + > Strings + * Bug in the __name__ attribute of several methods of Series.str, which were set incorrectly + * Improved error message when passing Series of wrong dtype to Series.str.cat + > Interval + * Construction of Interval is restricted to numeric, Timestamp and Timedelta endpoints + * Fixed bug in Series/DataFrame not displaying NaN in IntervalIndex with missing values + * Bug in IntervalIndex.get_loc where a KeyError would be incorrectly raised for a decreasing IntervalIndex + * Bug in Index constructor where passing mixed closed Interval objects would result in a ValueError instead of an object dtype Index + > Indexing + * Improved exception message when calling DataFrame.iloc with a list of non-numeric objects . + * Improved exception message when calling .iloc or .loc with a boolean indexer with different length . + * Bug in KeyError exception message when indexing a MultiIndex with a non-existant key not displaying the original key . + * Bug in .iloc and .loc with a boolean indexer not raising an IndexError when too few items are passed . + * Bug in DataFrame.loc and Series.loc where KeyError was not raised for a MultiIndex when the key was less than or equal to the number of levels in the MultiIndex . + * Bug in which DataFrame.append produced an erroneous warning indicating that a KeyError will be thrown in the future when the data to be appended contains new columns . + * Bug in which DataFrame.to_csv caused a segfault for a reindexed data frame, when the indices were single-level MultiIndex . + * Fixed bug where assigning a arrays.PandasArray to a pandas.core.frame.DataFrame would raise error + * Allow keyword arguments for callable local reference used in the DataFrame.query string + * Fixed a KeyError when indexing a MultiIndex` level with a list containing exactly one label, which is missing + * Bug which produced AttributeError on partial matching Timestamp in a MultiIndex + * Bug in Categorical and CategoricalIndex with Interval values when using the in operator (__contains) with objects that are not comparable to the values in the Interval + * Bug in DataFrame.loc and DataFrame.iloc on a DataFrame with a single timezone-aware datetime64[ns] column incorrectly returning a scalar instead of a Series + * Bug in CategoricalIndex and Categorical incorrectly raising ValueError instead of TypeError when a list is passed using the in operator (__contains__) + * Bug in setting a new value in a Series with a Timedelta object incorrectly casting the value to an integer + * Bug in Series setting a new key (__setitem__) with a timezone-aware datetime incorrectly raising ValueError + * Bug in DataFrame.iloc when indexing with a read-only indexer + * Bug in Series setting an existing tuple key (__setitem__) with timezone-aware datetime values incorrectly raising TypeError + > Missing + * Fixed misleading exception message in Series.interpolate if argument order is required, but omitted . + * Fixed class type displayed in exception message in DataFrame.dropna if invalid axis parameter passed + * A ValueError will now be thrown by DataFrame.fillna when limit is not a positive integer + > MultiIndex + * Bug in which incorrect exception raised by Timedelta when testing the membership of MultiIndex + > I/O + * Bug in DataFrame.to_html() where values were truncated using display options instead of outputting the full content + * Fixed bug in missing text when using to_clipboard if copying utf-16 characters in Python 3 on Windows + * Bug in read_json for orient='table' when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema + * Bug in read_json for orient='table' and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema + * Bug in read_json for orient='table' and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema + * Bug in json_normalize for errors='ignore' where missing values in the input data, were filled in resulting DataFrame with the string "nan" instead of numpy.nan + * DataFrame.to_html now raises TypeError when using an invalid type for the classes parameter instead of AssertionError + * Bug in DataFrame.to_string and DataFrame.to_latex that would lead to incorrect output when the header keyword is used + * Bug in read_csv not properly interpreting the UTF8 encoded filenames on Windows on Python 3.6+ + * Improved performance in pandas.read_stata and pandas.io.stata.StataReader when converting columns that have missing values + * Bug in DataFrame.to_html where header numbers would ignore display options when rounding + * Bug in read_hdf where reading a table from an HDF5 file written directly with PyTables fails with a ValueError when using a sub-selection via the start or stop arguments + * Bug in read_hdf not properly closing store after a KeyError is raised + * Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds + * Improved pandas.read_stata and pandas.io.stata.StataReader to read incorrectly formatted 118 format files saved by Stata + * Improved the col_space parameter in DataFrame.to_html to accept a string so CSS length values can be set correctly + * Fixed bug in loading objects from S3 that contain # characters in the URL + * Adds use_bqstorage_api parameter to read_gbq to speed up downloads of large data frames. This feature requires version 0.10.0 of the pandas-gbq library as well as the google-cloud-bigquery-storage and fastavro libraries. + * Fixed memory leak in DataFrame.to_json when dealing with numeric data + * Bug in read_json where date strings with Z were not converted to a UTC timezone + * Added cache_dates=True parameter to read_csv, which allows to cache unique dates when they are parsed + * DataFrame.to_excel now raises a ValueError when the caller's dimensions exceed the limitations of Excel + * Fixed bug in pandas.read_csv where a BOM would result in incorrect parsing using engine='python' + * read_excel now raises a ValueError when input is of type pandas.io.excel.ExcelFile and engine param is passed since pandas.io.excel.ExcelFile has an engine defined + * Bug while selecting from HDFStore with where='' specified . + * Fixed bug in DataFrame.to_excel() where custom objects (i.e. PeriodIndex) inside merged cells were not being converted into types safe for the Excel writer + * Bug in read_hdf where reading a timezone aware DatetimeIndex would raise a TypeError + * Bug in to_msgpack and read_msgpack which would raise a ValueError rather than a FileNotFoundError for an invalid path + * Fixed bug in DataFrame.to_parquet which would raise a ValueError when the dataframe had no columns + * Allow parsing of PeriodDtype columns when using read_csv + > Plotting + * Fixed bug where api.extensions.ExtensionArray could not be used in matplotlib plotting + * Bug in an error message in DataFrame.plot. Improved the error message if non-numerics are passed to DataFrame.plot + * Bug in incorrect ticklabel positions when plotting an index that are non-numeric / non-datetime + * Fixed bug causing plots of PeriodIndex timeseries to fail if the frequency is a multiple of the frequency rule code + * Fixed bug when plotting a DatetimeIndex with datetime.timezone.utc timezone + > Groupby/resample/rolling + * Bug in pandas.core.resample.Resampler.agg with a timezone aware index where OverflowError would raise when passing a list of functions + * Bug in pandas.core.groupby.DataFrameGroupBy.nunique in which the names of column levels were lost + * Bug in pandas.core.groupby.GroupBy.agg when applying an aggregation function to timezone aware data + * Bug in pandas.core.groupby.GroupBy.first and pandas.core.groupby.GroupBy.last where timezone information would be dropped + * Bug in pandas.core.groupby.GroupBy.size when grouping only NA values + * Bug in Series.groupby where observed kwarg was previously ignored + * Bug in Series.groupby where using groupby with a MultiIndex Series with a list of labels equal to the length of the series caused incorrect grouping + * Ensured that ordering of outputs in groupby aggregation functions is consistent across all versions of Python + * Ensured that result group order is correct when grouping on an ordered Categorical and specifying observed=True + * Bug in pandas.core.window.Rolling.min and pandas.core.window.Rolling.max that caused a memory leak + * Bug in pandas.core.window.Rolling.count and pandas.core.window.Expanding.count was previously ignoring the axis keyword + * Bug in pandas.core.groupby.GroupBy.idxmax and pandas.core.groupby.GroupBy.idxmin with datetime column would return incorrect dtype + * Bug in pandas.core.groupby.GroupBy.cumsum, pandas.core.groupby.GroupBy.cumprod, pandas.core.groupby.GroupBy.cummin and pandas.core.groupby.GroupBy.cummax with categorical column having absent categories, would return incorrect result or segfault + * Bug in pandas.core.groupby.GroupBy.nth where NA values in the grouping would return incorrect results + * Bug in pandas.core.groupby.SeriesGroupBy.transform where transforming an empty group would raise a ValueError + * Bug in pandas.core.frame.DataFrame.groupby where passing a pandas.core.groupby.grouper.Grouper would return incorrect groups when using the .groups accessor + * Bug in pandas.core.groupby.GroupBy.agg where incorrect results are returned for uint64 columns. + * Bug in pandas.core.window.Rolling.median and pandas.core.window.Rolling.quantile where MemoryError is raised with empty window + * Bug in pandas.core.window.Rolling.median and pandas.core.window.Rolling.quantile where incorrect results are returned with closed='left' and closed='neither' + * Improved pandas.core.window.Rolling, pandas.core.window.Window and pandas.core.window.EWM functions to exclude nuisance columns from results instead of raising errors and raise a DataError only if all columns are nuisance + * Bug in pandas.core.window.Rolling.max and pandas.core.window.Rolling.min where incorrect results are returned with an empty variable window + * Raise a helpful exception when an unsupported weighted window function is used as an argument of pandas.core.window.Window.aggregate + > Reshaping + * Bug in pandas.merge adds a string of None, if None is assigned in suffixes instead of remain the column name as-is . + * Bug in merge when merging by index name would sometimes result in an incorrectly numbered index (missing index values are now assigned NA) + * to_records now accepts dtypes to its column_dtypes parameter + * Bug in concat where order of OrderedDict (and dict in Python 3.6+) is not respected, when passed in as objs argument + * Bug in pivot_table where columns with NaN values are dropped even if dropna argument is False, when the aggfunc argument contains a list + * Bug in concat where the resulting freq of two DatetimeIndex with the same freq would be dropped . + * Bug in merge where merging with equivalent Categorical dtypes was raising an error + * bug in DataFrame instantiating with a dict of iterators or generators (e.g. pd.DataFrame({'A': reversed(range(3))})) raised an error . + * Bug in DataFrame instantiating with a range (e.g. pd.DataFrame(range(3))) raised an error . + * Bug in DataFrame constructor when passing non-empty tuples would cause a segmentation fault + * Bug in Series.apply failed when the series is a timezone aware DatetimeIndex + * Bug in pandas.cut where large bins could incorrectly raise an error due to an integer overflow + * Bug in DataFrame.sort_index where an error is thrown when a multi-indexed DataFrame is sorted on all levels with the initial level sorted last + * Bug in Series.nlargest treats True as smaller than False + * Bug in DataFrame.pivot_table with a IntervalIndex as pivot index would raise TypeError + * Bug in which DataFrame.from_dict ignored order of OrderedDict when orient='index' . + * Bug in DataFrame.transpose where transposing a DataFrame with a timezone-aware datetime column would incorrectly raise ValueError + * Bug in pivot_table when pivoting a timezone aware column as the values would remove timezone information + * Bug in merge_asof when specifying multiple by columns where one is datetime64[ns, tz] dtype + > Sparse + * Significant speedup in SparseArray initialization that benefits most operations, fixing performance regression introduced in v0.20.0 + * Bug in SparseFrame constructor where passing None as the data would cause default_fill_value to be ignored + * Bug in SparseDataFrame when adding a column in which the length of values does not match length of index, AssertionError is raised instead of raising ValueError + * Introduce a better error message in Series.sparse.from_coo so it returns a TypeError for inputs that are not coo matrices + * Bug in numpy.modf on a SparseArray. Now a tuple of SparseArray is returned . + > Build Changes + * Fix install error with PyPy on macOS + > ExtensionArray + * Bug in factorize when passing an ExtensionArray with a custom na_sentinel . + * Series.count miscounts NA values in ExtensionArrays + * Added Series.__array_ufunc__ to better handle NumPy ufuncs applied to Series backed by extension arrays . + * Keyword argument deep has been removed from ExtensionArray.copy + > Other + * Removed unused C functions from vendored UltraJSON implementation + * Allow Index and RangeIndex to be passed to numpy min and max functions + * Use actual class name in repr of empty objects of a Series subclass . + * Bug in DataFrame where passing an object array of timezone-aware datetime objects would incorrectly raise ValueError +- Remove upstream-included pandas-tests-memory.patch + +------------------------------------------------------------------- +Sat Mar 16 22:35:08 UTC 2019 - Arun Persaud + +- specfile: + * requier pytest-mock + +- update to version 0.24.2: + * Fixed Regressions + + Fixed regression in DataFrame.all() and DataFrame.any() where + bool_only=True was ignored (GH25101) + + Fixed issue in DataFrame construction with passing a mixed list + of mixed types could segfault. (GH25075) + + Fixed regression in DataFrame.apply() causing RecursionError + when dict-like classes were passed as argument. (GH25196) + + Fixed regression in DataFrame.replace() where regex=True was + only replacing patterns matching the start of the string + (GH25259) + + Fixed regression in DataFrame.duplicated(), where empty + dataframe was not returning a boolean dtyped Series. (GH25184) + + Fixed regression in Series.min() and Series.max() where + numeric_only=True was ignored when the Series contained + Categorical data (GH25299) + + Fixed regression in subtraction between Series objects with + datetime64[ns] dtype incorrectly raising OverflowError when the + Series on the right contains null values (GH25317) + + Fixed regression in TimedeltaIndex where np.sum(index) + incorrectly returned a zero-dimensional object instead of a + scalar (GH25282) + + Fixed regression in IntervalDtype construction where passing an + incorrect string with ‘Interval’ as a prefix could result in a + RecursionError. (GH25338) + + Fixed regression in creating a period-dtype array from a + read-only NumPy array of period objects. (GH25403) + + Fixed regression in Categorical, where constructing it from a + categorical Series and an explicit categories= that differed + from that in the Series created an invalid object which could + trigger segfaults. (GH25318) + + Fixed regression in to_timedelta() losing precision when + converting floating data to Timedelta data (GH25077). + + Fixed pip installing from source into an environment without + NumPy (GH25193) + + Fixed regression in DataFrame.replace() where large strings of + numbers would be coerced into int64, causing an OverflowError + (GH25616) + + Fixed regression in factorize() when passing a custom + na_sentinel value with sort=True (GH25409). + + Fixed regression in DataFrame.to_csv() writing duplicate line + endings with gzip compress (GH25311) + * Bug Fixes + + I/O + o Better handling of terminal printing when the terminal + dimensions are not known (GH25080) + o Bug in reading a HDF5 table-format DataFrame created in Python + 2, in Python 3 (GH24925) + o Bug in reading a JSON with orient='table' generated by + DataFrame.to_json() with index=False (GH25170) + o Bug where float indexes could have misaligned values when + printing (GH25061) + + Reshaping + o Bug in transform() where applying a function to a timezone aware + column would return a timezone naive result (GH24198) + o Bug in DataFrame.join() when joining on a timezone aware + DatetimeIndex (GH23931) + o Visualization + o Bug in Series.plot() where a secondary y axis could not be set + to log scale (GH25545) + + Other + o Bug in Series.is_unique() where single occurrences of NaN were + not considered unique (GH25180) + o Bug in merge() when merging an empty DataFrame with an Int64 + column or a non-empty DataFrame with an Int64 column that is all + NaN (GH25183) + o Bug in IntervalTree where a RecursionError occurs upon + construction due to an overflow when adding endpoints, which + also causes IntervalIndex to crash during indexing operations + (GH25485) + o Bug in Series.size raising for some extension-array-backed + Series, rather than returning the size (GH25580) + o Bug in resampling raising for nullable integer-dtype columns + (GH25580) + +------------------------------------------------------------------- +Fri Feb 22 10:22:38 UTC 2019 - Tomáš Chvátal + +- Add patch to fix testrun on 32bit: + https://github.com/pandas-dev/pandas/issues/25384 + * pandas-tests-memory.patch + +------------------------------------------------------------------- +Thu Feb 21 10:45:17 UTC 2019 - Tomáš Chvátal + +- Add requirement for at least 4 GB of physical memory + +------------------------------------------------------------------- +Tue Feb 19 14:31:25 UTC 2019 - Tomáš Chvátal + +- Do not delete tests, they are used even by other inheriting packages + for their testing +- Execute tests + +------------------------------------------------------------------- +Tue Feb 5 22:16:08 UTC 2019 - Todd R + +- Update to 0.24.1 + * The default ``sort`` value for :meth:`Index.union` has changed from ``True`` to ``None`` (:issue:`24959`). + The default *behavior*, however, remains the same + * Fixed regression in :meth:`DataFrame.to_dict` with ``records`` orient raising an + ``AttributeError`` when the ``DataFrame`` contained more than 255 columns, or + wrongly converting column names that were not valid python identifiers (:issue:`24939`, :issue:`24940`). + * Fixed regression in :func:`read_sql` when passing certain queries with MySQL/pymysql (:issue:`24988`). + * Fixed regression in :class:`Index.intersection` incorrectly sorting the values by default (:issue:`24959`). + * Fixed regression in :func:`merge` when merging an empty ``DataFrame`` with multiple timezone-aware columns on one of the timezone-aware columns (:issue:`25014`). + * Fixed regression in :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` where passing ``None`` failed to remove the axis name (:issue:`25034`) + * Fixed regression in :func:`to_timedelta` with `box=False` incorrectly returning a ``datetime64`` object instead of a ``timedelta64`` object (:issue:`24961`) + * Fixed regression where custom hashable types could not be used as column keys in :meth:`DataFrame.set_index` (:issue:`24969`) + * Bug in :meth:`DataFrame.groupby` with :class:`Grouper` when there is a time change (DST) and grouping frequency is ``'1d'`` (:issue:`24972`) + * Fixed the warning for implicitly registered matplotlib converters not showing. See :ref:`whatsnew_0211.converters` for more (:issue:`24963`). + * Fixed AttributeError when printing a DataFrame's HTML repr after accessing the IPython config object (:issue:`25036`) + +------------------------------------------------------------------- +Mon Jan 28 15:46:08 UTC 2019 - Todd R + +- Update to 0.24.0 + Highlights include: + * Optional Integer NA Support + * New APIs for accessing the array backing a Series or Index + * A new top-level method for creating arrays + * Store Interval and Period data in a Series or DataFrame + * Support for joining on two MultiIndexes + +------------------------------------------------------------------- +Wed Aug 8 16:26:30 UTC 2018 - jengelh@inai.de + +- Ensure neutrality of description. Remove future visions. + Use noun phrase in summary. + +------------------------------------------------------------------- +Sat Aug 4 19:07:22 UTC 2018 - toddrme2178@gmail.com + +- Update to 0.23.4 + * Python 3.7 with Windows gave all missing values for rolling variance calculations (:issue:`21813`) + * Bug where calling :func:`DataFrameGroupBy.agg` with a list of functions including ``ohlc`` as the non-initial element would raise a ``ValueError`` (:issue:`21716`) + * Bug in ``roll_quantile`` caused a memory leak when calling ``.rolling(...).quantile(q)`` with ``q`` in (0,1) (:issue:`21965`) + * Bug in :func:`Series.clip` and :func:`DataFrame.clip` cannot accept list-like threshold containing ``NaN`` (:issue:`19992`) + +------------------------------------------------------------------- +Sat Jul 14 01:59:02 UTC 2018 - arun@gmx.de + +- update to version 0.23.3: + * This release fixes a build issue with the sdist for Python 3.7 + (GH21785) There are no other changes. + +------------------------------------------------------------------- +Sat Jul 7 17:09:22 UTC 2018 - arun@gmx.de + +- update to version 0.23.2: + * Fixed Regressions + + Fixed regression in to_csv() when handling file-like object + incorrectly (GH21471) + + Re-allowed duplicate level names of a MultiIndex. Accessing a + level that has a duplicate name by name still raises an error + (GH19029). + + Bug in both DataFrame.first_valid_index() and + Series.first_valid_index() raised for a row index having + duplicate values (GH21441) + + Fixed printing of DataFrames with hierarchical columns with long + names (GH21180) + + Fixed regression in reindex() and groupby() with a MultiIndex or + multiple keys that contains categorical datetime-like values + (GH21390). + + Fixed regression in unary negative operations with object dtype + (GH21380) + + Bug in Timestamp.ceil() and Timestamp.floor() when timestamp is + a multiple of the rounding frequency (GH21262) + + Fixed regression in to_clipboard() that defaulted to copying + dataframes with space delimited instead of tab delimited + (GH21104) + * Build Changes + + The source and binary distributions no longer include test data + files, resulting in smaller download sizes. Tests relying on + these data files will be skipped when using + pandas.test(). (GH19320) + * Bug Fixes + * Conversion + + Bug in constructing Index with an iterator or generator + (GH21470) + + Bug in Series.nlargest() for signed and unsigned integer dtypes + when the minimum value is present (GH21426) + * Indexing + + Bug in Index.get_indexer_non_unique() with categorical key + (GH21448) + + Bug in comparison operations for MultiIndex where error was + raised on equality / inequality comparison involving a + MultiIndex with nlevels == 1 (GH21149) + + Bug in DataFrame.drop() behaviour is not consistent for unique + and non-unique indexes (GH21494) + + Bug in DataFrame.duplicated() with a large number of columns + causing a ‘maximum recursion depth exceeded’ (GH21524). + * I/O + + Bug in read_csv() that caused it to incorrectly raise an error + when nrows=0, low_memory=True, and index_col was not None + (GH21141) + + Bug in json_normalize() when formatting the record_prefix with + integer columns (GH21536) + * Categorical + + Bug in rendering Series with Categorical dtype in rare + conditions under Python 2.7 (GH21002) + * Timezones + + Bug in Timestamp and DatetimeIndex where passing a Timestamp + localized after a DST transition would return a datetime before + the DST transition (GH20854) + + Bug in comparing DataFrame`s with tz-aware :class:`DatetimeIndex + columns with a DST transition that raised a KeyError (GH19970) + * Timedelta + + Bug in Timedelta where non-zero timedeltas shorter than 1 + microsecond were considered False (GH21484) + +------------------------------------------------------------------- +Wed Jun 13 17:45:54 UTC 2018 - toddrme2178@gmail.com + +- Update to 0.23.1 + + Fixed Regressions + * Reverted change to comparing a Series holding datetimes and a datetime.date object + * Reverted the ability of to_sql() to perform multivalue inserts as this caused regression in certain cases (GH21103). In the future this will be made configurable. + * Fixed regression in the DatetimeIndex.date and DatetimeIndex.time attributes in case of timezone-aware data: DatetimeIndex.time returned a tz-aware time instead of tz-naive (GH21267) and DatetimeIndex.date returned incorrect date when the input date has a non-UTC timezone (GH21230). + * Fixed regression in pandas.io.json.json_normalize() when called with None values in nested levels in JSON, and to not drop keys with value as None (GH21158, GH21356). + * Bug in to_csv() causes encoding error when compression and encoding are specified (GH21241, GH21118) + * Bug preventing pandas from being importable with -OO optimization (GH21071) + * Bug in Categorical.fillna() incorrectly raising a TypeError when value the individual categories are iterable and value is an iterable (GH21097, GH19788) + * Fixed regression in constructors coercing NA values like None to strings when passing dtype=str (GH21083) + * Regression in pivot_table() where an ordered Categorical with missing values for the pivot’s index would give a mis-aligned result (GH21133) + * Fixed regression in merging on boolean index/columns (GH21119). + + Performance Improvements + * Improved performance of CategoricalIndex.is_monotonic_increasing(), CategoricalIndex.is_monotonic_decreasing() and CategoricalIndex.is_monotonic() (GH21025) + * Improved performance of CategoricalIndex.is_unique() (GH21107) + + Bug fixes + * Groupby/Resample/Rolling + > Bug in DataFrame.agg() where applying multiple aggregation functions to a DataFrame with duplicated column names would cause a stack overflow (GH21063) + > Bug in pandas.core.groupby.GroupBy.ffill() and pandas.core.groupby.GroupBy.bfill() where the fill within a grouping would not always be applied as intended due to the implementations’ use of a non-stable sort (GH21207) + > Bug in pandas.core.groupby.GroupBy.rank() where results did not scale to 100% when specifying method='dense' and pct=True + > Bug in pandas.DataFrame.rolling() and pandas.Series.rolling() which incorrectly accepted a 0 window size rather than raising (GH21286) + * Data-type specific + > Bug in Series.str.replace() where the method throws TypeError on Python 3.5.2 (:issue: 21078) + > Bug in Timedelta: where passing a float with a unit would prematurely round the float precision (:issue: 14156) + > Bug in pandas.testing.assert_index_equal() which raised AssertionError incorrectly, when comparing two CategoricalIndex objects with param check_categorical=False (GH19776) + * Sparse + > Bug in SparseArray.shape which previously only returned the shape SparseArray.sp_values (GH21126) + * Indexing + > Bug in Series.reset_index() where appropriate error was not raised with an invalid level name (GH20925) + > Bug in interval_range() when start/periods or end/periods are specified with float start or end (GH21161) + > Bug in MultiIndex.set_names() where error raised for a MultiIndex with nlevels == 1 (GH21149) + > Bug in IntervalIndex constructors where creating an IntervalIndex from categorical data was not fully supported (GH21243, issue:21253) + > Bug in MultiIndex.sort_index() which was not guaranteed to sort correctly with level=1; this was also causing data misalignment in particular DataFrame.stack() operations (GH20994, GH20945, GH21052) + * Plotting + > New keywords (sharex, sharey) to turn on/off sharing of x/y-axis by subplots generated with pandas.DataFrame().groupby().boxplot() (:issue: 20968) + * I/O + > Bug in IO methods specifying compression='zip' which produced uncompressed zip archives (GH17778, GH21144) + > Bug in DataFrame.to_stata() which prevented exporting DataFrames to buffers and most file-like objects (GH21041) + > Bug in read_stata() and StataReader which did not correctly decode utf-8 strings on Python 3 from Stata 14 files (dta version 118) (GH21244) + > Bug in IO JSON read_json() reading empty JSON schema with orient='table' back to DataFrame caused an error (GH21287) + * Reshaping + > Bug in concat() where error was raised in concatenating Series with numpy scalar and tuple names (GH21015) + > Bug in concat() warning message providing the wrong guidance for future behavior (GH21101) + * Other + > Tab completion on Index in IPython no longer outputs deprecation warnings (GH21125) + > Bug preventing pandas being used on Windows without C++ redistributable installed (GH21106) + + + +------------------------------------------------------------------- +Mon May 21 17:50:23 UTC 2018 - toddrme2178@gmail.com + +- Update dependencies + +------------------------------------------------------------------- +Thu May 17 12:28:44 UTC 2018 - tchvatal@suse.com + +- Update to 0.23.0: + * Round-trippable JSON format with ‘table’ orient. + * Instantiation from dicts respects order for Python 3.6+. + * Dependent column arguments for assign. + * Merging / sorting on a combination of columns and index levels. + * Extending Pandas with custom types. + * Excluding unobserved categories from groupby. + * Changes to make output shape of DataFrame.apply consistent. + +------------------------------------------------------------------- +Thu May 17 12:06:17 UTC 2018 - tchvatal@suse.com + +- Do not bother generating pandas doc if it is already in both + html and pdf provided by upstream, just point to the URL + +------------------------------------------------------------------- +Thu Jan 11 11:18:48 UTC 2018 - tchvatal@suse.com + +- Drop commented code to allow us py3 only build + +------------------------------------------------------------------- +Wed Jan 3 22:41:40 UTC 2018 - arun@gmx.de + +- specfile: + * update copyright year + +- update to version 0.22.0: + * Pandas 0.22.0 changes the handling of empty and all-NA sums and + products. The summary is that + + The sum of an empty or all-NA Series is now 0 + + The product of an empty or all-NA Series is now 1 + + We’ve added a min_count parameter to .sum() and .prod() + controlling the minimum number of valid values for the result to + be valid. If fewer than min_count non-NA values are present, the + result is NA. The default is 0. To return NaN, the 0.21 + behavior, use min_count=1. + +------------------------------------------------------------------- +Sat Dec 16 23:04:54 UTC 2017 - arun@gmx.de + +- update to version 0.21.1: + * Highlights include: + + Temporarily restore matplotlib datetime plotting + functionality. This should resolve issues for users who + implicitly relied on pandas to plot datetimes with + matplotlib. See here. + + Improvements to the Parquet IO functions introduced in + 0.21.0. See here. + * Improvements to the Parquet IO functionality + + DataFrame.to_parquet() will now write non-default indexes when + the underlying engine supports it. The indexes will be preserved + when reading back in with read_parquet() (GH18581). + + read_parquet() now allows to specify the columns to read from a + parquet file (GH18154) + + read_parquet() now allows to specify kwargs which are passed to + the respective engine (GH18216) + * Other Enhancements + + Timestamp.timestamp() is now available in Python 2.7. (GH17329) + + Grouper and TimeGrouper now have a friendly repr output + (GH18203). + * Deprecations + + pandas.tseries.register has been renamed to + pandas.plotting.register_matplotlib_converters`() (GH18301) + * Performance Improvements + + Improved performance of plotting large series/dataframes + (GH18236). + * Conversion + + Bug in TimedeltaIndex subtraction could incorrectly overflow + when NaT is present (GH17791) + + Bug in DatetimeIndex subtracting datetimelike from DatetimeIndex + could fail to overflow (GH18020) + + Bug in IntervalIndex.copy() when copying and IntervalIndex with + non-default closed (GH18339) + + Bug in DataFrame.to_dict() where columns of datetime that are + tz-aware were not converted to required arrays when used with + orient='records', raising"TypeError` (GH18372) + + Bug in DateTimeIndex and date_range() where mismatching tz-aware + start and end timezones would not raise an err if end.tzinfo is + None (GH18431) + + Bug in Series.fillna() which raised when passed a long integer + on Python 2 (GH18159). + * Indexing + + Bug in a boolean comparison of a datetime.datetime and a + datetime64[ns] dtype Series (GH17965) + + Bug where a MultiIndex with more than a million records was not + raising AttributeError when trying to access a missing attribute + (GH18165) + + Bug in IntervalIndex constructor when a list of intervals is + passed with non-default closed (GH18334) + + Bug in Index.putmask when an invalid mask passed (GH18368) + + Bug in masked assignment of a timedelta64[ns] dtype Series, + incorrectly coerced to float (GH18493) + * I/O + + Bug in class:~pandas.io.stata.StataReader not converting + date/time columns with display formatting addressed + (GH17990). Previously columns with display formatting were + normally left as ordinal numbers and not converted to datetime + objects. + + Bug in read_csv() when reading a compressed UTF-16 encoded file + (GH18071) + + Bug in read_csv() for handling null values in index columns when + specifying na_filter=False (GH5239) + + Bug in read_csv() when reading numeric category fields with high + cardinality (GH18186) + + Bug in DataFrame.to_csv() when the table had MultiIndex columns, + and a list of strings was passed in for header (GH5539) + + Bug in parsing integer datetime-like columns with specified + format in read_sql (GH17855). + + Bug in DataFrame.to_msgpack() when serializing data of the + numpy.bool_ datatype (GH18390) + + Bug in read_json() not decoding when reading line deliminted + JSON from S3 (GH17200) + + Bug in pandas.io.json.json_normalize() to avoid modification of + meta (GH18610) + + Bug in to_latex() where repeated multi-index values were not + printed even though a higher level index differed from the + previous row (GH14484) + + Bug when reading NaN-only categorical columns in HDFStore + (GH18413) + + Bug in DataFrame.to_latex() with longtable=True where a latex + multicolumn always spanned over three columns (GH17959) + * Plotting + + Bug in DataFrame.plot() and Series.plot() with DatetimeIndex + where a figure generated by them is not pickleable in Python 3 + (GH18439) + * Groupby/Resample/Rolling + + Bug in DataFrame.resample(...).apply(...) when there is a + callable that returns different columns (GH15169) + + Bug in DataFrame.resample(...) when there is a time change (DST) + and resampling frequecy is 12h or higher (GH15549) + + Bug in pd.DataFrameGroupBy.count() when counting over a + datetimelike column (GH13393) + + Bug in rolling.var where calculation is inaccurate with a + zero-valued array (GH18430) + * Reshaping + + Error message in pd.merge_asof() for key datatype mismatch now + includes datatype of left and right key (GH18068) + + Bug in pd.concat when empty and non-empty DataFrames or Series + are concatenated (GH18178 GH18187) + + Bug in DataFrame.filter(...) when unicode is passed as a + condition in Python 2 (GH13101) + + Bug when merging empty DataFrames when np.seterr(divide='raise') + is set (GH17776) + * Numeric + + Bug in pd.Series.rolling.skew() and rolling.kurt() with all + equal values has floating issue (GH18044) + + Bug in TimedeltaIndex subtraction could incorrectly overflow + when NaT is present (GH17791) + + Bug in DatetimeIndex subtracting datetimelike from DatetimeIndex + could fail to overflow (GH18020) + * Categorical + + Bug in DataFrame.astype() where casting to ‘category’ on an + empty DataFrame causes a segmentation fault (GH18004) + + Error messages in the testing module have been improved when + items have different CategoricalDtype (GH18069) + + CategoricalIndex can now correctly take a + pd.api.types.CategoricalDtype as its dtype (GH18116) + + Bug in Categorical.unique() returning read-only codes array when + all categories were NaN (GH18051) + + Bug in DataFrame.groupby(axis=1) with a CategoricalIndex + (GH18432) + * String + + Series.str.split() will now propogate NaN values across all + expanded columns instead of None (GH18450) + +------------------------------------------------------------------- +Mon Oct 30 06:05:48 UTC 2017 - arun@gmx.de + +- specfile: + * updated minimum numpy version to 1.9.0 (see setup.py) + +- update to version 0.21.0: + * Highlights include: + + Integration with Apache Parquet, including a new top-level + read_parquet() function and DataFrame.to_parquet() method, see + here. + + New user-facing pandas.api.types.CategoricalDtype for specifying + categoricals independent of the data, see here. + + The behavior of sum and prod on all-NaN Series/DataFrames is now + consistent and no longer depends on whether bottleneck is + installed, see here. + + Compatibility fixes for pypy, see here. + + Additions to the drop, reindex and rename API to make them more + consistent, see here. + + Addition of the new methods DataFrame.infer_objects (see here) + and GroupBy.pipe (see here). + + Indexing with a list of labels, where one or more of the labels + is missing, is deprecated and will raise a KeyError in a future + version, see here. + * full list at http://pandas.pydata.org/pandas-docs/stable/whatsnew.html + +------------------------------------------------------------------- +Sat Sep 23 21:12:48 UTC 2017 - arun@gmx.de + +- update to version 0.20.3: + * bug fix release, see http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#v0-20-3-july-7-2017 + for complete changelog + +- changes from version 0.20.2: + * bug fix release, see http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#v0-20-2-june-4-2017 + for complete changelog + +------------------------------------------------------------------- +Thu May 18 01:07:08 UTC 2017 - toddrme2178@gmail.com + +- Update to version 0.20.1 + Highlights include: + * New ``.agg()`` API for Series/DataFrame similar to the + groupby-rolling-resample API's + * Integration with the ``feather-format``, including a new + top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` + method + * The ``.ix`` indexer has been deprecated + * ``Panel`` has been deprecated + * Addition of an ``IntervalIndex`` and ``Interval`` scalar type + * Improved user API when grouping by index levels in ``.groupby()`` + * Improved support for ``UInt64`` dtypes + * A new orient for JSON serialization, ``orient='table'``, that + uses the Table Schema spec and that gives the possibility for + a more interactive repr in the Jupyter Notebook + * Experimental support for exporting styled DataFrames + (``DataFrame.style``) to Excel + * Window binary corr/cov operations now return a MultiIndexed + ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now + deprecated + * Support for S3 handling now uses ``s3fs`` + * Google BigQuery support now uses the ``pandas-gbq`` library + +------------------------------------------------------------------- +Mon May 8 03:37:27 UTC 2017 - toddrme2178@gmail.com + +- Fix dateutil dependency + +------------------------------------------------------------------- +Tue Apr 25 18:39:03 UTC 2017 - toddrme2178@gmail.com + +- Implement single-spec version. + +------------------------------------------------------------------- +Thu Mar 30 15:00:41 UTC 2017 - toddrme2178@gmail.com + +- update to version 0.19.2: + * Enhancements + The pd.merge_asof(), added in 0.19.0, gained some improvements: + + pd.merge_asof() gained left_index/right_index and + left_by/right_by arguments (GH14253) + + pd.merge_asof() can take multiple columns in by parameter and + has specialized dtypes for better performace (GH13936) + * Performance Improvements + + Performance regression with PeriodIndex (GH14822) + + Performance regression in indexing with getitem (GH14930) + + Improved performance of .replace() (GH12745) + + Improved performance Series creation with a datetime index and + dictionary data (GH14894) + * Bug Fixes + + Compat with python 3.6 for pickling of some offsets (GH14685) + + Compat with python 3.6 for some indexing exception types + (GH14684, GH14689) + + Compat with python 3.6 for deprecation warnings in the test + suite (GH14681) + + Compat with python 3.6 for Timestamp pickles (GH14689) + + Compat with dateutil==2.6.0; segfault reported in the testing + suite (GH14621) + + Allow nanoseconds in Timestamp.replace as a kwarg (GH14621) + + Bug in pd.read_csv in which aliasing was being done for + na_values when passed in as a dictionary (GH14203) + + Bug in pd.read_csv in which column indices for a dict-like + na_values were not being respected (GH14203) + + Bug in pd.read_csv where reading files fails, if the number of + headers is equal to the number of lines in the file (GH14515) + + Bug in pd.read_csv for the Python engine in which an unhelpful + error message was being raised when multi-char delimiters were + not being respected with quotes (GH14582) + + Fix bugs (GH14734, GH13654) in pd.read_sas and + pandas.io.sas.sas7bdat.SAS7BDATReader that caused problems when + reading a SAS file incrementally. + + Bug in pd.read_csv for the Python engine in which an unhelpful + error message was being raised when skipfooter was not being + respected by Python’s CSV library (GH13879) + + Bug in .fillna() in which timezone aware datetime64 values were + incorrectly rounded (GH14872) + + Bug in .groupby(..., sort=True) of a non-lexsorted MultiIndex + when grouping with multiple levels (GH14776) + + Bug in pd.cut with negative values and a single bin (GH14652) + + Bug in pd.to_numeric where a 0 was not unsigned on a + downcast='unsigned' argument (GH14401) + + Bug in plotting regular and irregular timeseries using shared + axes (sharex=True or ax.twinx()) (GH13341, GH14322). + + Bug in not propogating exceptions in parsing invalid datetimes, + noted in python 3.6 (GH14561) + + Bug in resampling a DatetimeIndex in local TZ, covering a DST + change, which would raise AmbiguousTimeError (GH14682) + + Bug in indexing that transformed RecursionError into KeyError or + IndexingError (GH14554) + + Bug in HDFStore when writing a MultiIndex when using + data_columns=True (GH14435) + + Bug in HDFStore.append() when writing a Series and passing a + min_itemsize argument containing a value for the index (GH11412) + + Bug when writing to a HDFStore in table format with a + min_itemsize value for the index and without asking to append + (GH10381) + + Bug in Series.groupby.nunique() raising an IndexError for an + empty Series (GH12553) + + Bug in DataFrame.nlargest and DataFrame.nsmallest when the index + had duplicate values (GH13412) + + Bug in clipboard functions on linux with python2 with unicode + and separators (GH13747) + + Bug in clipboard functions on Windows 10 and python 3 (GH14362, + GH12807) + + Bug in .to_clipboard() and Excel compat (GH12529) + + Bug in DataFrame.combine_first() for integer columns (GH14687). + + Bug in pd.read_csv() in which the dtype parameter was not being + respected for empty data (GH14712) + + Bug in pd.read_csv() in which the nrows parameter was not being + respected for large input when using the C engine for parsing + (GH7626) + + Bug in pd.merge_asof() could not handle timezone-aware + DatetimeIndex when a tolerance was specified (GH14844) + + Explicit check in to_stata and StataWriter for out-of-range + values when writing doubles (GH14618) + + Bug in .plot(kind='kde') which did not drop missing values to + generate the KDE Plot, instead generating an empty + plot. (GH14821) + + Bug in unstack() if called with a list of column(s) as an + argument, regardless of the dtypes of all columns, they get + coerced to object (GH11847) +- update to version 0.19.1: + * Performance Improvements + + Fixed performance regression in factorization of Period data + (GH14338) + + Fixed performance regression in Series.asof(where) when where is + a scalar (GH14461) + + Improved performance in DataFrame.asof(where) when where is a + scalar (GH14461) + + Improved performance in .to_json() when lines=True (GH14408) + + Improved performance in certain types of loc indexing with a + MultiIndex (GH14551). + * Bug Fixes + + Source installs from PyPI will now again work without cython + installed, as in previous versions (GH14204) + + Compat with Cython 0.25 for building (GH14496) + + Fixed regression where user-provided file handles were closed in + read_csv (c engine) (GH14418). + + Fixed regression in DataFrame.quantile when missing values where + present in some columns (GH14357). + + Fixed regression in Index.difference where the freq of a + DatetimeIndex was incorrectly set (GH14323) + + Added back pandas.core.common.array_equivalent with a + deprecation warning (GH14555). + + Bug in pd.read_csv for the C engine in which quotation marks + were improperly parsed in skipped rows (GH14459) + + Bug in pd.read_csv for Python 2.x in which Unicode quote + characters were no longer being respected (GH14477) + + Fixed regression in Index.append when categorical indices were + appended (GH14545). + + Fixed regression in pd.DataFrame where constructor fails when + given dict with None value (GH14381) + + Fixed regression in DatetimeIndex._maybe_cast_slice_bound when + index is empty (GH14354). + + Bug in localizing an ambiguous timezone when a boolean is passed + (GH14402) + + Bug in TimedeltaIndex addition with a Datetime-like object where + addition overflow in the negative direction was not being caught + (GH14068, GH14453) + + Bug in string indexing against data with object Index may raise + AttributeError (GH14424) + + Corrrecly raise ValueError on empty input to pd.eval() and + df.query() (GH13139) + + Bug in RangeIndex.intersection when result is a empty set + (GH14364). + + Bug in groupby-transform broadcasting that could cause incorrect + dtype coercion (GH14457) + + Bug in Series.__setitem__ which allowed mutating read-only + arrays (GH14359). + + Bug in DataFrame.insert where multiple calls with duplicate + columns can fail (GH14291) + + pd.merge() will raise ValueError with non-boolean parameters in + passed boolean type arguments (GH14434) + + Bug in Timestamp where dates very near the minimum (1677-09) + could underflow on creation (GH14415) + + Bug in pd.concat where names of the keys were not propagated to + the resulting MultiIndex (GH14252) + + Bug in pd.concat where axis cannot take string parameters 'rows' + or 'columns' (GH14369) + + Bug in pd.concat with dataframes heterogeneous in length and + tuple keys (GH14438) + + Bug in MultiIndex.set_levels where illegal level values were + still set after raising an error (GH13754) + + Bug in DataFrame.to_json where lines=True and a value contained + a } character (GH14391) + + Bug in df.groupby causing an AttributeError when grouping a + single index frame by a column and the index level + (:issue`14327`) + + Bug in df.groupby where TypeError raised when + pd.Grouper(key=...) is passed in a list (GH14334) + + Bug in pd.pivot_table may raise TypeError or ValueError when + index or columns is not scalar and values is not specified + (GH14380) + +------------------------------------------------------------------- +Sun Oct 23 01:32:23 UTC 2016 - toddrme2178@gmail.com + +- update to version 0.19.0: + (long changelog, see http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#v0-19-0-october-2-2016) + * Highlights include: + + merge_asof() for asof-style time-series joining + + .rolling() is now time-series aware + + read_csv() now supports parsing Categorical data + + A function union_categorical() has been added for combining + categoricals + + PeriodIndex now has its own period dtype, and changed to be more + consistent with other Index classes + + Sparse data structures gained enhanced support of int and bool + dtypes + + Comparison operations with Series no longer ignores the index, + see here for an overview of the API changes. + + Introduction of a pandas development API for utility functions + + Deprecation of Panel4D and PanelND. We recommend to represent + these types of n-dimensional data with the xarray package. + + Removal of the previously deprecated modules pandas.io.data, + pandas.io.wb, pandas.tools.rplot. +- specfile: + * require python3-Cython + * Split documentation into own subpackage to speed up build. + * Remove buildrequires for optional dependencies to speed up build. +- Remove unneeded patches: + * 0001_disable_experimental_msgpack_big_endian.patch ^ + * 0001_respect_byteorder_in_statareader.patch + +------------------------------------------------------------------- +Tue Jul 12 16:44:48 UTC 2016 - antoine.belvire@laposte.net + +- Update to 0.8.1: + * .groupby(...) has been enhanced to provide convenient syntax + when working with .rolling(..), .expanding(..) and + .resample(..) per group. + * pd.to_datetime() has gained the ability to assemble dates + from a DataFrame. + * Method chaining improvements. + * Custom business hour offset. + * Many bug fixes in the handling of sparse. + * Expanded the Tutorials section with a feature on modern pandas, + courtesy of @TomAugsb (GH13045). +- Changes from 0.8.0: + * Moving and expanding window functions are now methods on Series + and DataFrame, similar to .groupby. + * Adding support for a RangeIndex as a specialized form of the + Int64Index for memory savings. + * API breaking change to the .resample method to make it more + .groupby like. + * Removal of support for positional indexing with floats, which + was deprecated since 0.14.0. This will now raise a TypeError. + * The .to_xarray() function has been added for compatibility with + the xarray package. + * The read_sas function has been enhanced to read sas7bdat files. + * Addition of the .str.extractall() method, and API changes to + the .str.extract() method and .str.cat() method. + * pd.test() top-level nose test runner is available (GH4327). + +------------------------------------------------------------------- +Fri Feb 26 13:13:58 UTC 2016 - tbechtold@suse.com + +- Require python-python-dateutil. package was renamed + +------------------------------------------------------------------- +Tue Feb 9 17:01:02 UTC 2016 - aplanas@suse.com + +- Add 0001_respect_byteorder_in_statareader.patch + Fix StataReader in big endian architectures + https://github.com/pydata/pandas/issues/11282 +- Add 0001_disable_experimental_msgpack_big_endian.patch + Skip experimental msgpack test in big endian systems + +------------------------------------------------------------------- +Wed Feb 3 15:27:31 UTC 2016 - aplanas@suse.com + +- Remove non-needed BuildRequires +- Update Requires from documentation +- Update Recommends from documentation +- Add tests in %check section + +------------------------------------------------------------------- +Mon Nov 30 09:56:31 UTC 2015 - toddrme2178@gmail.com + +- update to version 0.17.1: + (for full changelog see http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#v0-17-1-november-21-2015) + Highlights include: + * Support for Conditional HTML Formatting, see here + * Releasing the GIL on the csv reader & other ops, see here + * Fixed regression in DataFrame.drop_duplicates from 0.16.2, causing + incorrect results on integer values (GH11376) + +------------------------------------------------------------------- +Mon Oct 12 09:28:25 UTC 2015 - toddrme2178@gmail.com + +- update to version 0.17.0: + (for full changelog see http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#v0-17-0-october-9-2015) + Highlights: + * Release the Global Interpreter Lock (GIL) on some cython + operations, see here + * Plotting methods are now available as attributes of the .plot + accessor, see here + * The sorting API has been revamped to remove some long-time + inconsistencies, see here + * Support for a datetime64[ns] with timezones as a first-class + dtype, see here + * The default for to_datetime will now be to raise when presented + with unparseable formats, previously this would return the + original input. Also, date parse functions now return consistent + results. See here + * The default for dropna in HDFStore has changed to False, to store + by default all rows even if they are all NaN, see here + * Datetime accessor (dt) now supports Series.dt.strftime to generate + formatted strings for datetime-likes, and Series.dt.total_seconds + to ge nerate each duration of the timedelta in seconds. See here + * Period and PeriodIndex can handle multiplied freq like 3D, which + corresponding to 3 days span. See here + * Development installed versions of pandas will now have PEP440 + compliant version strings (GH9518) + * Development support for benchmarking with the Air Speed Velocity + library (GH8361) + * Support for reading SAS xport files, see here + * Documentation comparing SAS to pandas, see here + * Removal of the automatic TimeSeries broadcasting, deprecated since + 0.8.0, see here + * Display format with plain text can optionally align with Unicode + East Asian Width, see here + * Compatibility with Python 3.5 (GH11097) + * Compatibility with matplotlib 1.5.0 (GH11111) + +------------------------------------------------------------------- +Mon Jun 29 11:06:30 UTC 2015 - toddrme2178@gmail.com + +- update to version 0.16.2: + (see http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#v0-16-2-june-12-2015) + * Highlights + + A new pipe method + + Documentation on how to use numba with pandas + * Enhancements + + Added rsplit to Index/Series StringMethods (GH10303) + + Removed the hard-coded size limits on the DataFrame HTML + representation in the IPython notebook, and leave this to + IPython itself (only for IPython v3.0 or greater). This + eliminates the duplicate scroll bars that appeared in the + notebook with large frames (GH10231). + + Note that the notebook has a toggle output scrolling feature to + limit the display of very large frames (by clicking left of the + output). You can also configure the way DataFrames are displayed + using the pandas options, see here here. + + axis parameter of DataFrame.quantile now accepts also index and + column. (GH9543) + * API Changes + + Holiday now raises NotImplementedError if both offset and + observance are used in the constructor instead of returning an + incorrect result (GH10217). + * Performance Improvements + + Improved Series.resample performance with dtype=datetime64[ns] + (GH7754) + + Increase performance of str.split when expand=True (GH10081) + * Bug Fixes + + Bug in Series.hist raises an error when a one row Series was + given (GH10214) + + Bug where HDFStore.select modifies the passed columns list + (GH7212) + + Bug in Categorical repr with display.width of None in Python 3 + (GH10087) + + Bug in to_json with certain orients and a CategoricalIndex would + segfault (GH10317) + + Bug where some of the nan funcs do not have consistent return + dtypes (GH10251) + + Bug in DataFrame.quantile on checking that a valid axis was + passed (GH9543) + + Bug in groupby.apply aggregation for Categorical not preserving + categories (GH10138) + + Bug in to_csv where date_format is ignored if the datetime is + fractional (GH10209) + + Bug in DataFrame.to_json with mixed data types (GH10289) + + Bug in cache updating when consolidating (GH10264) + + Bug in mean() where integer dtypes can overflow (GH10172) + + Bug where Panel.from_dict does not set dtype when specified + (GH10058) + + Bug in Index.union raises AttributeError when passing + array-likes. (GH10149) + + Bug in Timestamp‘s’ microsecond, quarter, dayofyear, week and + daysinmonth properties return np.int type, not built-in + int. (GH10050) + + Bug in NaT raises AttributeError when accessing to daysinmonth, + dayofweek properties. (GH10096) + + Bug in Index repr when using the max_seq_items=None setting + (GH10182). + + Bug in getting timezone data with dateutil on various platforms + ( GH9059, GH8639, GH9663, GH10121) + + Bug in displaying datetimes with mixed frequencies; display ‘ms’ + datetimes to the proper precision. (GH10170) + + Bug in setitem where type promotion is applied to the entire + block (GH10280) + + Bug in Series arithmetic methods may incorrectly hold names + (GH10068) + + Bug in GroupBy.get_group when grouping on multiple keys, one of + which is categorical. (GH10132) + + Bug in DatetimeIndex and TimedeltaIndex names are lost after + timedelta arithmetics ( GH9926) + + Bug in DataFrame construction from nested dict with datetime64 + (GH10160) + + Bug in Series construction from dict with datetime64 keys + (GH9456) + + Bug in Series.plot(label="LABEL") not correctly setting the + label (GH10119) + + Bug in plot not defaulting to matplotlib axes.grid setting + (GH9792) + + Bug causing strings containing an exponent, but no decimal to be + parsed as int instead of float in engine='python' for the read_csv + parser (GH9565) + + Bug in Series.align resets name when fill_value is specified + (GH10067) + + Bug in read_csv causing index name not to be set on an empty + DataFrame (GH10184) + + Bug in SparseSeries.abs resets name (GH10241) + + Bug in TimedeltaIndex slicing may reset freq (GH10292) + + Bug in GroupBy.get_group raises ValueError when group key + contains NaT (GH6992) + + Bug in SparseSeries constructor ignores input data name + (GH10258) + + Bug in Categorical.remove_categories causing a ValueError when + removing the NaN category if underlying dtype is floating-point + (GH10156) + + Bug where infer_freq infers timerule (WOM-5XXX) unsupported by + to_offset (GH9425) + + Bug in DataFrame.to_hdf() where table format would raise a + seemingly unrelated error for invalid (non-string) column + names. This is now explicitly forbidden. (GH9057) + + Bug to handle masking empty DataFrame (GH10126). + + Bug where MySQL interface could not handle numeric table/column + names (GH10255) + + Bug in read_csv with a date_parser that returned a datetime64 + array of other time resolution than [ns] (GH10245) + + Bug in Panel.apply when the result has ndim=0 (GH10332) + + Bug in read_hdf where auto_close could not be passed (GH9327). + + Bug in read_hdf where open stores could not be used (GH10330). + + Bug in adding empty DataFrame``s, now results in a ``DataFrame + that .equals an empty DataFrame (GH10181). + + Bug in to_hdf and HDFStore which did not check that complib + choices were valid (GH4582, GH8874). + +------------------------------------------------------------------- +Tue May 19 09:18:50 UTC 2015 - toddrme2178@gmail.com + +- Update to version 0.16.1 + * Highlights + - Support for a ``CategoricalIndex``, a category based index + - New section on how-to-contribute to pandas + - Revised "Merge, join, and concatenate" documentation, + including graphical examples to make it easier to understand + each operations + - New method sample for drawing random samples from Series, + DataFrames and Panels. + - The default Index printing has changed to a more uniform + format + - BusinessHour datetime-offset is now supported + * Enhancements + - BusinessHour`offset is now supported, which represents + business hours starting from 09:00 - 17:00 on BusinessDay by + default. + - DataFrame.diff now takes an axis parameter that determines the + direction of differencing + - Allow clip, clip_lower, and clip_upper to accept array-like + arguments as thresholds (This is a regression from 0.11.0). + These methods now have an axis parameter which determines + how the Series or DataFrame will be aligned with the + threshold(s). + - DataFrame.mask() and Series.mask() now support same keywords + as where + - drop function can now accept errors keyword to suppress + ValueError raised when any of label does not exist in the + target data. + - Allow conversion of values with dtype datetime64 or timedelta64 + to strings using astype(str) + - get_dummies function now accepts sparse keyword. If set to + True, the return DataFrame is sparse, e.g. SparseDataFrame. + - Period now accepts datetime64 as value input. + - Allow timedelta string conversion when leading zero is + missing from time definition, ie 0:00:00 vs 00:00:00. + - Allow Panel.shift with axis='items' + - Trying to write an excel file now raises NotImplementedError + if the DataFrame has a MultiIndex instead of writing a broken + Excel file. + - Allow Categorical.add_categories to accept Series or np.array. + - Add/delete str/dt/cat accessors dynamically from __dir__. + - Add normalize as a dt accessor method. + - DataFrame and Series now have _constructor_expanddim property + as overridable constructor for one higher dimensionality + data. This should be used only when it is really needed + - pd.lib.infer_dtype now returns 'bytes' in Python 3 where + appropriate. + - We introduce a CategoricalIndex, a new type of index object + that is useful for supporting indexing with duplicates. This + is a container around a Categorical (introduced in v0.15.0) + and allows efficient indexing and storage of an index with a + large number of duplicated elements. Prior to 0.16.1, + setting the index of a DataFrame/Series with a category + dtype would convert this to regular object-based Index. + - Series, DataFrames, and Panels now have a new method: + pandas.DataFrame.sample. The method accepts a specific number + of rows or columns to return, or a fraction of the total + number or rows or columns. It also has options for sampling + with or without replacement, for passing in a column for + weights for non-uniform sampling, and for setting seed values + to facilitate replication. + - The following new methods are accesible via .str accessor to + apply the function to each values. + + capitalize() + + swapcase() + + normalize() + + partition() + + rpartition() + + index() + + rindex() + + translate() + - Added StringMethods (.str accessor) to Index + - split now takes expand keyword to specify whether to expand + dimensionality. return_type is deprecated. + * API changes + - When passing in an ax to df.plot( ..., ax=ax), the sharex + kwarg will now default to False. + - Add support for separating years and quarters using dashes, + for example 2014-Q1. + - pandas.DataFrame.assign now inserts new columns in + alphabetical order. Previously the order was arbitrary. + - By default, read_csv and read_table will now try to infer + the compression type based on the file extension. Set + compression=None to restore the previous behavior + (no decompression). + - The string representation of Index and its sub-classes have + now been unified. These will show a single-line display if + there are few values; a wrapped multi-line display for a lot + of values (but less than display.max_seq_items; if lots of + items > display.max_seq_items) will show a truncated display + (the head and tail of the data). The formatting for + MultiIndex is unchanges (a multi-line wrapped display). The + display width responds to the option display.max_seq_items, + which is defaulted to 100. + * Deprecations + - Series.str.split's return_type keyword was removed in favor + of expand + * Performance Improvements + - Improved csv write performance with mixed dtypes, including + datetimes by up to 5x + - Improved csv write performance generally by 2x + - Improved the performance of pd.lib.max_len_string_array + by 5-7x + * Bug Fixes + - Bug where labels did not appear properly in the legend of + DataFrame.plot(), passing label= arguments works, and Series + indices are no longer mutated. + - Bug in json serialization causing a segfault when a frame had + zero length. + - Bug in read_csv where missing trailing delimiters would cause + segfault. + - Bug in retaining index name on appending + - Bug in scatter_matrix draws unexpected axis ticklabels + - Fixed bug in StataWriter resulting in changes to input + DataFrame upon save. + - Bug in transform causing length mismatch when null entries + were present and a fast aggregator was being used + - Bug in equals causing false negatives when block order + differed + - Bug in grouping with multiple pd.Grouper where one is + non-time based + - Bug in read_sql_table error when reading postgres table with + timezone + - Bug in DataFrame slicing may not retain metadata + - Bug where TimdeltaIndex were not properly serialized in fixed + HDFStore + - Bug with TimedeltaIndex constructor ignoring name when given + another TimedeltaIndex as data. + - Bug in DataFrameFormatter._get_formatted_index with not + applying max_colwidth to the DataFrame index + - Bug in .loc with a read-only ndarray data source + - Bug in groupby.apply() that would raise if a passed user + defined function either returned only None (for all input). + - Always use temporary files in pytables tests + - Bug in plotting continuously using secondary_y may not show + legend properly. + - Bug in DataFrame.plot(kind="hist") results in TypeError when + DataFrame contains non-numeric columns + - Bug where repeated plotting of DataFrame with a DatetimeIndex + may raise TypeError + - Bug in setup.py that would allow an incompat cython version + to build + - Bug in plotting secondary_y incorrectly attaches right_ax + property to secondary axes specifying itself recursively. + - Bug in Series.quantile on empty Series of type Datetime or + Timedelta + - Bug in where causing incorrect results when upcasting was + required + - Bug in FloatArrayFormatter where decision boundary for + displaying "small" floats in decimal format is off by one + order of magnitude for a given display.precision + - Fixed bug where DataFrame.plot() raised an error when both + color and style keywords were passed and there was no color + symbol in the style strings + - Not showing a DeprecationWarning on combining list-likes with + an Index + - Bug in read_csv and read_table when using skip_rows parameter + if blank lines are present. + - Bug in read_csv() interprets index_col=True as 1 + - Bug in index equality comparisons using == failing on + Index/MultiIndex type incompatibility + - Bug in which SparseDataFrame could not take nan as a column + name + - Bug in to_msgpack and read_msgpack zlib and blosc compression + support + - Bug GroupBy.size doesn't attach index name properly if + grouped by TimeGrouper + - Bug causing an exception in slice assignments because + length_of_indexer returns wrong results + - Bug in csv parser causing lines with initial whitespace plus + one non-space character to be skipped. + - Bug in C csv parser causing spurious NaNs when data started + with newline followed by whitespace. + - Bug causing elements with a null group to spill into the + final group when grouping by a Categorical + - Bug where .iloc and .loc behavior is not consistent on empty + dataframes + - Bug in invalid attribute access on a TimedeltaIndex + incorrectly raised ValueError instead of AttributeError + - Bug in unequal comparisons between categorical data and a + scalar, which was not in the categories (e.g. + Series(Categorical(list("abc"), ordered=True)) > "d". This + returned False for all elements, but now raises a TypeError. + Equality comparisons also now return False for == and True + for !=. + - Bug in DataFrame __setitem__ when right hand side is a + dictionary + - Bug in where when dtype is datetime64/timedelta64, but dtype + of other is not + - Bug in MultiIndex.sortlevel() results in unicode level name + breaks + - Bug in which groupby.transform incorrectly enforced output + dtypes to match input dtypes. + - Bug in DataFrame constructor when columns parameter is set, + and data is an empty list + - Bug in bar plot with log=True raises TypeError if all values + are less than 1 + - Bug in horizontal bar plot ignores log=True + - Bug in PyTables queries that did not return proper results + using the index + - Bug where dividing a dataframe containing values of type + Decimal by another Decimal would raise. + - Bug where using DataFrames asfreq would remove the name of + the index. + - Bug causing extra index point when resample BM/BQ + - Changed caching in AbstractHolidayCalendar to be at the + instance level rather than at the class level as the latter + can result in unexpected behaviour. + - Fixed latex output for multi-indexed dataframes + - Bug causing an exception when setting an empty range using + DataFrame.loc + - Bug in hiding ticklabels with subplots and shared axes when + adding a new plot to an existing grid of axes + - Bug in transform and filter when grouping on a categorical + variable + - Bug in transform when groups are equal in number and dtype to + the input index + - Google BigQuery connector now imports dependencies on a + per-method basis. + - Updated BigQuery connector to no longer use deprecated + oauth2client.tools.run() + - Bug in subclassed DataFrame. It may not return the correct + class, when slicing or subsetting it. + - Bug in .median() where non-float null values are not handled + correctly + - Bug in Series.fillna() where it raises if a numerically + convertible string is given + +------------------------------------------------------------------- +Tue Mar 24 12:44:20 UTC 2015 - toddrme2178@gmail.com + +- update to version 0.16.0: + * Highlights: + - DataFrame.assign method + - Series.to_coo/from_coo methods to interact with scipy.sparse + - Backwards incompatible change to Timedelta to conform the .seconds + attribute with datetime.timedelta + - Changes to the .loc slicing API to conform with the behavior of .ix + - Changes to the default for ordering in the Categorical constructor + - Enhancement to the .str accessor to make string operations easier + - The pandas.tools.rplot, pandas.sandbox.qtpandas and pandas.rpy + modules are deprecated. We refer users to external packages like + seaborn, pandas-qt and rpy2 for similar or equivalent functionality + * New features + - Inspired by dplyr's mutate verb, DataFrame has a new assign method. + - Added SparseSeries.to_coo and SparseSeries.from_coo methods for + converting to and from scipy.sparse.coo_matrix instances. + - Following new methods are accesible via .str accessor to apply the + function to each values. This is intended to make it more consistent with + standard methods on strings: isalnum(), isalpha(), isdigit(), isdigit(), + isspace(), islower(), isupper(), istitle(), isnumeric(), isdecimal(), + find(), rfind(), ljust(), rjust(), zfill() + - Reindex now supports method='nearest' for frames or series with a + monotonic increasing or decreasing index. + - The read_excel() function's sheetname argument now accepts a list and + None, to get multiple or all sheets respectively. If more than one sheet + is specified, a dictionary is returned. + - Allow Stata files to be read incrementally with an iterator; support for + long strings in Stata files. + - Paths beginning with ~ will now be expanded to begin with the user's home + directory. + - Added time interval selection in get_data_yahoo. + - Added Timestamp.to_datetime64() to complement Timedelta.to_timedelta64(). + - tseries.frequencies.to_offset() now accepts Timedelta as input. + - Lag parameter was added to the autocorrelation method of Series, defaults + to lag-1 autocorrelation. + - Timedelta will now accept nanoseconds keyword in constructor. + - SQL code now safely escapes table and column names. + - Added auto-complete for Series.str., Series.dt. and + Series.cat.. + - Index.get_indexer now supports method='pad' and method='backfill' even + for any target array, not just monotonic targets. + - Index.asof now works on all index types. + - A verbose argument has been augmented in io.read_excel(), defaults to + False. Set to True to print sheet names as they are parsed. + - Added days_in_month (compatibility alias daysinmonth) property to + Timestamp, DatetimeIndex, Period, PeriodIndex, and Series.dt. + - Added decimal option in to_csv to provide formatting for non-'.' decimal + separators + - Added normalize option for Timestamp to normalized to midnight + - Added example for DataFrame import to R using HDF5 file and rhdf5 + library. + * Backwards incompatible API changes + - In v0.16.0, we are restoring the API to match that of datetime.timedelta. + Further, the component values are still available through the .components + accessor. This affects the .seconds and .microseconds accessors, and + removes the .hours, .minutes, .milliseconds accessors. These changes + affect TimedeltaIndex and the Series .dt accessor as well. + - The behavior of a small sub-set of edge cases for using .loc have + changed. Furthermore we have improved the content of the error messages + that are raised: + + Slicing with .loc where the start and/or stop bound is not found in + the index is now allowed; this previously would raise a KeyError. This + makes the behavior the same as .ix in this case. This change is only + for slicing, not when indexing with a single label. + + Allow slicing with float-like values on an integer index for .ix. + Previously this was only enabled for .loc: + + Provide a useful exception for indexing with an invalid type for that + index when using .loc. For example trying to use .loc on an index of + type DatetimeIndex or PeriodIndex or TimedeltaIndex, with an integer + (or a float). + - In prior versions, Categoricals that had an unspecified ordering + (meaning no ordered keyword was passed) were defaulted as ordered + Categoricals. Going forward, the ordered keyword in the Categorical + constructor will default to False. Ordering must now be explicit. + Furthermore, previously you *could* change the ordered attribute of a + Categorical by just setting the attribute, e.g. cat.ordered=True; This is + now deprecated and you should use cat.as_ordered() or cat.as_unordered(). + These will by default return a **new** object and not modify the + existing object. + - Index.duplicated now returns np.array(dtype=bool) rather than + Index(dtype=object) containing bool values. + - DataFrame.to_json now returns accurate type serialisation for each column + for frames of mixed dtype + - DatetimeIndex, PeriodIndex and TimedeltaIndex.summary now output the same + format. + - TimedeltaIndex.freqstr now output the same string format as + DatetimeIndex. + - Bar and horizontal bar plots no longer add a dashed line along the info + axis. The prior style can be achieved with matplotlib's axhline or + axvline methods. + - Series accessors .dt, .cat and .str now raise AttributeError instead of + TypeError if the series does not contain the appropriate type of data. + This follows Python's built-in exception hierarchy more closely and + ensures that tests like hasattr(s, 'cat') are consistent on both Python + 2 and 3. + - Series now supports bitwise operation for integral types. Previously even + if the input dtypes were integral, the output dtype was coerced to bool. + - During division involving a Series or DataFrame, 0/0 and 0//0 now give + np.nan instead of np.inf. + - Series.values_counts and Series.describe for categorical data will now + put NaN entries at the end. + - Series.describe for categorical data will now give counts and frequencies + of 0, not NaN, for unused categories + - Due to a bug fix, looking up a partial string label with + DatetimeIndex.asof now includes values that match the string, even if + they are after the start of the partial string label. Old behavior: + * Deprecations + - The rplot trellis plotting interface is deprecated and will be removed + in a future version. We refer to external packages like + seaborn for similar but more refined functionality. + - The pandas.sandbox.qtpandas interface is deprecated and will be removed + in a future version. + We refer users to the external package pandas-qt. + - The pandas.rpy interface is deprecated and will be removed in a future + version. + Similar functionaility can be accessed thru the rpy2 project + - Adding DatetimeIndex/PeriodIndex to another DatetimeIndex/PeriodIndex is + being deprecated as a set-operation. This will be changed to a TypeError + in a future version. .union() should be used for the union set operation. + - Subtracting DatetimeIndex/PeriodIndex from another + DatetimeIndex/PeriodIndex is being deprecated as a set-operation. This + will be changed to an actual numeric subtraction yielding a + TimeDeltaIndex in a future version. .difference() should be used for + the differencing set operation. + * Removal of prior version deprecations/changes + - DataFrame.pivot_table and crosstab's rows and cols keyword arguments were + removed in favor + of index and columns + - DataFrame.to_excel and DataFrame.to_csv cols keyword argument was removed + in favor of columns + - Removed convert_dummies in favor of get_dummies + - Removed value_range in favor of describe + * Performance Improvements + - Fixed a performance regression for .loc indexing with an array or + list-like. + - DataFrame.to_json 30x performance improvement for mixed dtype frames. + - Performance improvements in MultiIndex.duplicated by working with labels + instead of values + - Improved the speed of nunique by calling unique instead of value_counts + - Performance improvement of up to 10x in DataFrame.count and + DataFrame.dropna by taking advantage of homogeneous/heterogeneous dtypes + appropriately + - Performance improvement of up to 20x in DataFrame.count when using a + MultiIndex and the level keyword argument + - Performance and memory usage improvements in merge when key space exceeds + int64 bounds + - Performance improvements in multi-key groupby + - Performance improvements in MultiIndex.sortlevel + - Performance and memory usage improvements in DataFrame.duplicated + - Cythonized Period + - Decreased memory usage on to_hdf + * Bug Fixes + - Changed .to_html to remove leading/trailing spaces in table body + - Fixed issue using read_csv on s3 with Python 3 + - Fixed compatibility issue in DatetimeIndex affecting architectures where + numpy.int_ defaults to numpy.int32 + - Bug in Panel indexing with an object-like + - Bug in the returned Series.dt.components index was reset to the default + index + - Bug in Categorical.__getitem__/__setitem__ with listlike input getting + incorrect results from indexer coercion + - Bug in partial setting with a DatetimeIndex + - Bug in groupby for integer and datetime64 columns when applying an + aggregator that caused the value to be + changed when the number was sufficiently large + - Fixed bug in to_sql when mapping a Timestamp object column (datetime + column with timezone info) to the appropriate sqlalchemy type. + - Fixed bug in to_sql dtype argument not accepting an instantiated + SQLAlchemy type. + - Bug in .loc partial setting with a np.datetime64 + - Incorrect dtypes inferred on datetimelike looking Series & on .xs slices + - Items in Categorical.unique() (and s.unique() if s is of dtype category) + now appear in the order in which they are originally found, not in sorted + order. This is now consistent with the behavior for other dtypes in pandas. + - Fixed bug on big endian platforms which produced incorrect results in + StataReader. + - Bug in MultiIndex.has_duplicates when having many levels causes an + indexer overflow + - Bug in pivot and unstack where nan values would break index alignment + - Bug in left join on multi-index with sort=True or null values. + - Bug in MultiIndex where inserting new keys would fail. + - Bug in groupby when key space exceeds int64 bounds. + - Bug in unstack with TimedeltaIndex or DatetimeIndex and nulls. + - Bug in rank where comparing floats with tolerance will cause inconsistent + behaviour. + - Fixed character encoding bug in read_stata and StataReader when loading + data from a URL. + - Bug in adding offsets.Nano to other offets raises TypeError + - Bug in DatetimeIndex iteration, related to, fixed in + - Bugs in resample around DST transitions. This required fixing offset + classes so they behave correctly on DST transitions. + - Bug in binary operator method (eg .mul()) alignment with integer levels. + - Bug in boxplot, scatter and hexbin plot may show an unnecessary warning + - Bug in subplot with layout kw may show unnecessary warning + - Bug in using grouper functions that need passed thru arguments (e.g. + axis), when using wrapped function (e.g. fillna), + - DataFrame now properly supports simultaneous copy and dtype arguments in + constructor + - Bug in read_csv when using skiprows on a file with CR line endings with + the c engine. + - isnull now detects NaT in PeriodIndex + - Bug in groupby .nth() with a multiple column groupby + - Bug in DataFrame.where and Series.where coerce numerics to string + incorrectly + - Bug in DataFrame.where and Series.where raise ValueError when string + list-like is passed. + - Accessing Series.str methods on with non-string values now raises + TypeError instead of producing incorrect results + - Bug in DatetimeIndex.__contains__ when index has duplicates and is not + monotonic increasing + - Fixed division by zero error for Series.kurt() when all values are equal + - Fixed issue in the xlsxwriter engine where it added a default 'General' + format to cells if no other format wass applied. This prevented other + row or column formatting being applied. + - Fixes issue with index_col=False when usecols is also specified in + read_csv. + - Bug where wide_to_long would modify the input stubnames list + - Bug in to_sql not storing float64 values using double precision. + - SparseSeries and SparsePanel now accept zero argument constructors (same + as their non-sparse counterparts). + - Regression in merging Categorical and object dtypes + - Bug in read_csv with buffer overflows with certain malformed input files + - Bug in groupby MultiIndex with missing pair + - Fixed bug in Series.groupby where grouping on MultiIndex levels would + ignore the sort argument + - Fix bug in DataFrame.Groupby where sort=False is ignored in the case of + Categorical columns. + - Fixed bug with reading CSV files from Amazon S3 on python 3 raising a + TypeError + - Bug in the Google BigQuery reader where the 'jobComplete' key may be + present but False in the query results + - Bug in Series.values_counts with excluding NaN for categorical type + Series with dropna=True + - Fixed mising numeric_only option for DataFrame.std/var/sem + - Support constructing Panel or Panel4D with scalar data + - Series text representation disconnected from `max_rows`/`max_columns`. + - Series number formatting inconsistent when truncated. + - A Spurious SettingWithCopy Warning was generated when setting a new item + in a frame in some cases + + +------------------------------------------------------------------- +Mon Jan 12 13:46:26 UTC 2015 - toddrme2178@gmail.com + +- update to version 0.15.2: + * API changes: + - Indexing in MultiIndex beyond lex-sort depth is now supported, + though a lexically sorted index will have a better + performance. (GH2646) + - Bug in unique of Series with category dtype, which returned all + categories regardless whether they were "used" or not (see + GH8559 for the discussion). Previous behaviour was to return all + categories. + - Series.all and Series.any now support the level and skipna + parameters. Series.all, Series.any, Index.all, and Index.any no + longer support the out and keepdims parameters, which existed + for compatibility with ndarray. Various index types no longer + support the all and any aggregation functions and will now raise + TypeError. (GH8302). + - Allow equality comparisons of Series with a categorical dtype + and object dtype; previously these would raise TypeError + (GH8938) + - Bug in NDFrame: conflicting attribute/column names now behave + consistently between getting and setting. Previously, when both + a column and attribute named y existed, data.y would return the + attribute, while data.y = z would update the column (GH8994) + - Timestamp('now') is now equivalent to Timestamp.now() in that it + returns the local time rather than UTC. Also, Timestamp('today') + is now equivalent to Timestamp.today() and both have tz as a + possible argument. (GH9000) + - Fix negative step support for label-based slices (GH8753) + * Enhancements: + - Added ability to export Categorical data to Stata (GH8633). See + here for limitations of categorical variables exported to Stata + data files. + - Added flag order_categoricals to StataReader and read_stata to + select whether to order imported categorical data (GH8836). See + here for more information on importing categorical variables + from Stata data files. + - Added ability to export Categorical data to to/from HDF5 + (GH7621). Queries work the same as if it was an object + array. However, the category dtyped data is stored in a more + efficient manner. See here for an example and caveats + w.r.t. prior versions of pandas. + - Added support for searchsorted() on Categorical class (GH8420). + - Added the ability to specify the SQL type of columns when + writing a DataFrame to a database (GH8778). For example, + specifying to use the sqlalchemy String type instead of the + default Text type for string columns. + - Series.all and Series.any now support the level and skipna + parameters (GH8302). + - Panel now supports the all and any aggregation + functions. (GH8302). + - Added support for utcfromtimestamp(), fromtimestamp(), and + combine() on Timestamp class (GH5351). + - Added Google Analytics (pandas.io.ga) basic documentation + (GH8835). + - Timedelta arithmetic returns NotImplemented in unknown cases, + allowing extensions by custom classes (GH8813). + - Timedelta now supports arithemtic with numpy.ndarray objects of + the appropriate dtype (numpy 1.8 or newer only) (GH8884). + - Added Timedelta.to_timedelta64() method to the public API + (GH8884). + - Added gbq.generate_bq_schema() function to the gbq module + (GH8325). + - Series now works with map objects the same way as generators + (GH8909). + - Added context manager to HDFStore for automatic closing + (GH8791). + - to_datetime gains an exact keyword to allow for a format to not + require an exact match for a provided format string (if its + False). exact defaults to True (meaning that exact matching is + still the default) (GH8904) + - Added axvlines boolean option to parallel_coordinates plot + function, determines whether vertical lines will be printed, + default is True + - Added ability to read table footers to read_html (GH8552). + - to_sql now infers datatypes of non-NA values for columns that + contain NA values and have dtype object (GH8778). + * Performance: + - Reduce memory usage when skiprows is an integer in read_csv + (GH8681) + - Performance boost for to_datetime conversions with a passed + format=, and the exact=False (GH8904) + * Bug fixes: + - Bug in concat of Series with category dtype which were coercing + to object. (GH8641) + - Bug in Timestamp-Timestamp not returning a Timedelta type and + datelike-datelike ops with timezones (GH8865) + - Made consistent a timezone mismatch exception (either tz + operated with None or incompatible timezone), will now return + TypeError rather than ValueError (a couple of edge cases only), + (GH8865) + - Bug in using a pd.Grouper(key=...) with no level/axis or level + only (GH8795, GH8866) + - Report a TypeError when invalid/no paramaters are passed in a + groupby (GH8015) + - Bug in packaging pandas with py2app/cx_Freeze (GH8602, GH8831) + - Bug in groupby signatures that didn’t include *args or **kwargs + (GH8733). + - io.data.Options now raises RemoteDataError when no expiry dates + are available from Yahoo and when it receives no data from Yahoo + (GH8761), (GH8783). + - Unclear error message in csv parsing when passing dtype and + names and the parsed data is a different data type (GH8833) + - Bug in slicing a multi-index with an empty list and at least one + boolean indexer (GH8781) + - io.data.Options now raises RemoteDataError when no expiry dates + are available from Yahoo (GH8761). + - Timedelta kwargs may now be numpy ints and floats (GH8757). + - Fixed several outstanding bugs for Timedelta arithmetic and + comparisons (GH8813, GH5963, GH5436). + - sql_schema now generates dialect appropriate CREATE TABLE + statements (GH8697) + - slice string method now takes step into account (GH8754) + - Bug in BlockManager where setting values with different type + would break block integrity (GH8850) + - Bug in DatetimeIndex when using time object as key (GH8667) + - Bug in merge where how='left' and sort=False would not preserve + left frame order (GH7331) + - Bug in MultiIndex.reindex where reindexing at level would not + reorder labels (GH4088) + - Bug in certain operations with dateutil timezones, manifesting + with dateutil 2.3 (GH8639) + - Regression in DatetimeIndex iteration with a Fixed/Local offset + timezone (GH8890) + - Bug in to_datetime when parsing a nanoseconds using the %f + format (GH8989) + - io.data.Options now raises RemoteDataError when no expiry dates + are available from Yahoo and when it receives no data from Yahoo + (GH8761), (GH8783). + - Fix: The font size was only set on x axis if vertical or the y + axis if horizontal. (GH8765) + - Fixed division by 0 when reading big csv files in python 3 + (GH8621) + - Bug in outputing a Multindex with to_html,index=False which + would add an extra column (GH8452) + - Imported categorical variables from Stata files retain the + ordinal information in the underlying data (GH8836). + - Defined .size attribute across NDFrame objects to provide compat + with numpy >= 1.9.1; buggy with np.array_split (GH8846) + - Skip testing of histogram plots for matplotlib <= 1.2 (GH8648). + - Bug where get_data_google returned object dtypes (GH3995) + - Bug in DataFrame.stack(..., dropna=False) when the DataFrame’s + columns is a MultiIndex whose labels do not reference all its + levels. (GH8844) + - Bug in that Option context applied on __enter__ (GH8514) + - Bug in resample that causes a ValueError when resampling across + multiple days and the last offset is not calculated from the + start of the range (GH8683) + - Bug where DataFrame.plot(kind='scatter') fails when checking if + an np.array is in the DataFrame (GH8852) + - Bug in pd.infer_freq/DataFrame.inferred_freq that prevented + proper sub-daily frequency inference when the index contained + DST days (GH8772). + - Bug where index name was still used when plotting a series with + use_index=False (GH8558). + - Bugs when trying to stack multiple columns, when some (or all) + of the level names are numbers (GH8584). + - Bug in MultiIndex where __contains__ returns wrong result if + index is not lexically sorted or unique (GH7724) + - BUG CSV: fix problem with trailing whitespace in skipped rows, + (GH8679), (GH8661), (GH8983) + - Regression in Timestamp does not parse ‘Z’ zone designator for + UTC (GH8771) + - Bug in StataWriter the produces writes strings with 244 + characters irrespective of actual size (GH8969) + - Fixed ValueError raised by cummin/cummax when datetime64 Series + contains NaT. (GH8965) + - Bug in Datareader returns object dtype if there are missing + values (GH8980) + - Bug in plotting if sharex was enabled and index was a + timeseries, would show labels on multiple axes (GH3964). + - Bug where passing a unit to the TimedeltaIndex constructor + applied the to nano-second conversion twice. (GH9011). + - Bug in plotting of a period-like array (GH9012) +- Update copyright year + +------------------------------------------------------------------- +Sun Nov 9 15:40:36 UTC 2014 - toddrme2178@gmail.com + +- Updated to version 0.15.1: + + API changes + - Represent ``MultiIndex`` labels with a dtype that utilizes memory based + on the level size. + - ``groupby`` with ``as_index=False`` will not add erroneous extra columns + to result (:issue:`8582`): + - ``groupby`` will not erroneously exclude columns if the column name + conflics with the grouper name (:issue:`8112`): + - ``concat`` permits a wider variety of iterables of pandas objects to be + passed as the first parameter (:issue:`8645`): + - ``s.dt.hour`` and other ``.dt`` accessors will now return ``np.nan`` for + missing values (rather than previously -1), (:issue:`8689`) + - support for slicing with monotonic decreasing indexes, even if ``start`` + or ``stop`` is not found in the index (:issue:`7860`): + - added Index properties `is_monotonic_increasing` and + `is_monotonic_decreasing` (:issue:`8680`). + - pandas now also registers the ``datetime64`` dtype in matplotlib's units + registry to plot such values as datetimes. + + Enhancements + - Added option to select columns when importing Stata files (:issue:`7935`) + - Qualify memory usage in ``DataFrame.info()`` by adding ``+`` if it is a + lower bound (:issue:`8578`) + - Raise errors in certain aggregation cases where an argument such as + ``numeric_only`` is not handled (:issue:`8592`). + - Added support for 3-character ISO and non-standard country codes in + :func:``io.wb.download()`` (:issue:`8482`) + - :ref:`World Bank data requests ` now will warn/raise + based on an ``errors`` argument, as well as a list of hard-coded country + codes and the World Bank's JSON response. + - Added option to ``Series.str.split()`` to return a ``DataFrame`` rather + than a ``Series`` (:issue:`8428`) + - Added option to ``df.info(null_counts=None|True|False)`` to override the + default display options and force showing of the null-counts + (:issue:`8701`) + + Bug Fixes + - Bug in unpickling of a ``CustomBusinessDay`` object (:issue:`8591`) + - Bug in coercing ``Categorical`` to a records array, e.g. + ``df.to_records()`` (:issue:`8626`) + - Bug in ``Categorical`` not created properly with ``Series.to_frame()`` + (:issue:`8626`) + - Bug in coercing in astype of a ``Categorical`` of a passed + ``pd.Categorical`` (this now raises ``TypeError`` correctly), + (:issue:`8626`) + - Bug in ``cut``/``qcut`` when using ``Series`` and ``retbins=True`` + (:issue:`8589`) + - Bug in writing Categorical columns to an SQL database with ``to_sql`` + (:issue:`8624`). + - Bug in comparing ``Categorical`` of datetime raising when being compared + to a scalar datetime (:issue:`8687`) + - Bug in selecting from a ``Categorical`` with ``.iloc`` (:issue:`8623`) + - Bug in groupby-transform with a Categorical (:issue:`8623`) + - Bug in duplicated/drop_duplicates with a Categorical (:issue:`8623`) + - Bug in ``Categorical`` reflected comparison operator raising if the first + argument was a numpy array scalar (e.g. np.int64) (:issue:`8658`) + - Bug in Panel indexing with a list-like (:issue:`8710`) + - Compat issue is ``DataFrame.dtypes`` when + ``options.mode.use_inf_as_null`` is True (:issue:`8722`) + - Bug in ``read_csv``, ``dialect`` parameter would not take a string + (:issue: `8703`) + - Bug in slicing a multi-index level with an empty-list (:issue:`8737`) + - Bug in numeric index operations of add/sub with Float/Index Index with + numpy arrays (:issue:`8608`) + - Bug in setitem with empty indexer and unwanted coercion of dtypes + (:issue:`8669`) + - Bug in ix/loc block splitting on setitem (manifests with integer-like + dtypes, e.g. datetime64) (:issue:`8607`) + - Bug when doing label based indexing with integers not found in the index + for non-unique but monotonic indexes (:issue:`8680`). + - Bug when indexing a Float64Index with ``np.nan`` on numpy 1.7 + (:issue:`8980`). + - Fix ``shape`` attribute for ``MultiIndex`` (:issue:`8609`) + - Bug in ``GroupBy`` where a name conflict between the grouper and columns + would break ``groupby`` operations (:issue:`7115`, :issue:`8112`) + - Fixed a bug where plotting a column ``y`` and specifying a label would + mutate the index name of the original DataFrame (:issue:`8494`) + - Fix regression in plotting of a DatetimeIndex directly with matplotlib + (:issue:`8614`). + - Bug in ``date_range`` where partially-specified dates would incorporate + current date (:issue:`6961`) + - Bug in Setting by indexer to a scalar value with a mixed-dtype `Panel4d` + was failing (:issue:`8702`) + - Bug where ``DataReader``'s would fail if one of the symbols passed was + invalid. Now returns data for valid symbols and np.nan for invalid + (:issue:`8494`) + - Bug in ``get_quote_yahoo`` that wouldn't allow non-float return values + (:issue:`5229`). + +------------------------------------------------------------------- +Mon Oct 20 10:42:30 UTC 2014 - toddrme2178@gmail.com + +- Update to 0.15.0, highlights: + - Drop support for numpy < 1.7.0 + - The Categorical type was integrated as a first-class + pandas type + - New scalar type Timedelta, and a new index type TimedeltaIndex + - New DataFrame default display for df.info() to + include memory usage + - New datetimelike properties accessor .dt for Series + - Split indexing documentation into Indexing and Selecting Data and + MultiIndex / Advanced Indexing + - Split out string methods documentation into Working with Text Data + - read_csv will now by default ignore blank lines when parsing + - API change in using Indexes in set operations + - Internal refactoring of the Index class to no longer + sub-class ndarray + - dropping support for PyTables less than version 3.0.0, + and numexpr less than version 2.1 +- Update minimum dependency versions of + python-numpy, python-tables, and python-numexpr + +------------------------------------------------------------------- +Tue Jul 15 12:31:13 UTC 2014 - toddrme2178@gmail.com + +- Update to 0.14.1, highlights: + - New methods :meth:`~pandas.DataFrame.select_dtypes` to select columns + based on the dtype and :meth:`~pandas.Series.sem` to calculate the + standard error of the mean. + - Support for dateutil timezones (see :ref:`docs `). + - Support for ignoring full line comments in the :func:`~pandas.read_csv` + text parser. + - New documentation section on :ref:`Options and Settings `. + - Lots of bug fixes. + +------------------------------------------------------------------- +Sun Jun 1 07:41:11 UTC 2014 - toddrme2178@gmail.com + +- Update to 0.14.0, highlights: + * Officially support Python 3.4 + * SQL interfaces updated to use sqlalchemy + * Display interface changes + * MultiIndexing Using Slicers + * Ability to join a singly-indexed DataFrame with a multi-indexed DataFrame + * More consistency in groupby results and more flexible groupby specifications + * Holiday calendars are now supported in CustomBusinessDay + * Several improvements in plotting functions, including: hexbin, area and pie plots + * Performance doc section on I/O operations, See Here +- Added python-SQLAlchemy dependency + +------------------------------------------------------------------- +Fri Mar 7 04:11:36 UTC 2014 - arun@gmx.de + +- updated to 0.13.1 + + 500 lines worth of Changelog entries, so too long:) For a complete + list see: http://pandas.pydata.org/pandas-docs/dev/release.html + +------------------------------------------------------------------- +Mon Oct 21 21:59:47 UTC 2013 - toddrme2178@gmail.com + +- Update to 0.12.0 + * Integrated JSON reading and writing with the read_json + functions and methods like DataFrame.to_json. + * New HTML table reading function read_html which will use either + lxml or BeautifulSoup under the hood. + * Support for reading and writing STATA format files. +- Add all optional dependencies as Recommends +- Build and install documentation + +------------------------------------------------------------------- +Mon May 6 06:01:46 UTC 2013 - highwaystar.ru@gmail.com + +- added Recommends: python-tables +- update to 0.11.0 + * New precision indexing fields loc, iloc, at, and iat, to reduce + occasional ambiguity in the catch-all hitherto ix method. + * Expanded support for NumPy data types in DataFrame + * NumExpr integration to accelerate various operator evaluation + * New Cookbook and 10 minutes to pandas pages in the documentation + by Jeff Reback + * Improved DataFrame to CSV exporting performance + +------------------------------------------------------------------- +Tue Jun 19 20:29:31 UTC 2012 - scorot@free.fr + +- remove unneeded python-Pygments and python-Sphinx from build + requirements + +------------------------------------------------------------------- +Tue Jun 19 20:23:50 UTC 2012 - scorot@free.fr + +- remove duplicates +- fix bytecode inconsistent mtime + +------------------------------------------------------------------- +Wed Jun 13 20:45:39 UTC 2012 - scorot@free.fr + +- use proper commands instead of deprecated macro +- remove unneeded -01 and --skip-build flags from the install + command line +- set install prefix with %%{_prefix} instead of hard coded path + +------------------------------------------------------------------- +Wed Jun 13 18:41:46 UTC 2012 - scorot@free.fr + +- add %%py_compile macro in order to fix byte code mtime + inconsistency + +------------------------------------------------------------------- +Tue Jun 12 21:03:07 UTC 2012 - scorot@free.fr + +- spec file reformating + +------------------------------------------------------------------- +Tue Jun 12 20:46:31 UTC 2012 - scorot@free.fr + +- first package + diff --git a/python-pandas.spec b/python-pandas.spec new file mode 100644 index 0000000..07d8d52 --- /dev/null +++ b/python-pandas.spec @@ -0,0 +1,703 @@ +# +# spec file for package python-pandas +# +# Copyright (c) 2024 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +%global flavor @BUILD_FLAVOR@%{nil} +%{?sle15_python_module_pythons} + +%if "%{flavor}" == "" +%define psuffix %{nil} +%bcond_with test +%else +%define psuffix -%{flavor} +%bcond_without test +%if "%{flavor}" != "test-py310" +%define skip_python310 1 +%endif +%if "%{flavor}" != "test-py311" +%define skip_python311 1 +%endif +%if "%{flavor}" != "test-py312" +%define skip_python312 1 +%endif +# Skip empty buildsets, last one is for sle15_python_module_pythons +%if "%{shrink:%{pythons}}" == "" || ("%pythons" == "python311" && 0%{?skip_python311}) +ExclusiveArch: donotbuild +%define python_module() %flavor-not-enabled-in-buildset-for-suse-%{?suse_version} +%endif +%endif + +# Only test the core functionality in Ring1 (Lettered Staging) +%bcond_with ringdisabled +# s3fs not available +%bcond_with aws +# pandas-gbq not available +%bcond_with gcp +# xlsb not available +%bcond_with xslb +%bcond_with consortium_standard +%bcond_with calamine +%bcond_with adbc +# depend/not depend on python-pyarrow and apache-arrow [bsc#1218592] +%bcond_without pyarrow + +%if %{suse_version} <= 1500 +# requires __has_builtin with keywords +%define gccver 13 +%endif +Name: python-pandas%{psuffix} +# Set version through _service +Version: 2.2.2 +Release: 0 +Summary: Python data structures for data analysis, time series, and statistics +License: BSD-3-Clause +URL: https://pandas.pydata.org/ +# SourceRepository: https://github.com/pandas-dev/pandas +# Must be created by cloning through `osc service runall`: gh#pandas-dev/pandas#54903, gh#pandas-dev/pandas#54907 +Source0: pandas-%{version}.tar.gz +# PATCH-FIX-UPSTREAM pandas-pr58269-pyarrow16xpass.patch -- gh#pandas-dev/pandas#58269 +Patch0: pandas-pr58269-pyarrow16xpass.patch +# PATCH-FIX-UPSTREAM pandas-pr58484-matplotlib.patch -- gh#pandas-dev/pandas#58484 mcepl@suse.com, make pandas compatible with the modern matplotlib +Patch2: pandas-pr58484-matplotlib.patch +# PATCH-FIX-UPSTREAM pandas-pr59175-matplotlib.patch -- gh#pandas-dev/pandas#59175 +Patch3: https://github.com/pandas-dev/pandas/pull/59175.patch#/pandas-pr59175-matplotlib.patch +# PATCH-FIX-UPSTREAM pandas-pr59353-np2eval.patch -- gh#pandas-dev/pandas#59144 backported to 2.2, no new tests, see gh#pandas-dev/pandas#58548, gh#pandas-dev/pandas#59353 +Patch4: pandas-pr59353-np2eval.patch +%if !%{with test} +BuildRequires: %{python_module Cython >= 3.0.5} +BuildRequires: %{python_module devel >= 3.9} +BuildRequires: %{python_module meson-python >= 0.13.1} +BuildRequires: %{python_module numpy-devel >= 1.26} +BuildRequires: %{python_module pip} +BuildRequires: %{python_module versioneer-toml} +BuildRequires: %{python_module wheel} +BuildRequires: fdupes +BuildRequires: gcc%{?gccver}-c++ +BuildRequires: git-core +BuildRequires: meson >= 1.2.1 +%endif +BuildRequires: python-rpm-macros +Requires: python-python-dateutil >= 2.8.2 +Requires: python-pytz >= 2020.1 +Requires: timezone >= 2022a +Obsoletes: python-pandas-doc < %{version} +Provides: python-pandas-doc = %{version} +%if 0%{python_version_nodots} < 311 +Requires: python-numpy >= 1.22.4 +%else +%if 0%{python_version_nodots} == 311 +Requires: python-numpy >= 1.23.2 +%else +Requires: python-numpy >= 1.26 +%endif +%endif +# SECTION extras +Recommends: python-pandas-performance +Recommends: python-pandas-pyarrow +Suggests: python-pandas-all +Suggests: python-pandas-clipboard +Suggests: python-pandas-compression +Suggests: python-pandas-computation +Suggests: python-pandas-excel +Suggests: python-pandas-fss +Suggests: python-pandas-hdf5 +Suggests: python-pandas-html +Suggests: python-pandas-mysql +Suggests: python-pandas-output_formatting +Suggests: python-pandas-plot +Suggests: python-pandas-postgresql +Suggests: python-pandas-spss +Suggests: python-pandas-sql-other +Suggests: python-pandas-test +Suggests: python-pandas-xml +%{?with_aws:Suggests: python-pandas-aws} +%{?with_gcp:Suggests: python-pandas-gcp} +%{?with_pyarrow:Suggests: python-pandas-parquet} +%{?with_pyarrow:Suggests: python-pandas-feather} +# /SECTION +%if %{with test} +# required for sqlite3 tests +BuildRequires: %{pythons} +BuildRequires: %{python_module pandas-test = %{version}} +BuildRequires: memory-constraints +BuildRequires: xvfb-run +%if !%{with ringdisabled} +BuildRequires: %{python_module IPython} +BuildRequires: %{python_module dask-array} +BuildRequires: %{python_module dask-dataframe} +BuildRequires: %{python_module pandas-all = %{version}} +BuildRequires: %{python_module pandas-clipboard = %{version}} +BuildRequires: %{python_module pandas-compression = %{version}} +BuildRequires: %{python_module pandas-computation = %{version}} +BuildRequires: %{python_module pandas-excel = %{version}} +%{?with_pyarrow:BuildRequires: %{python_module pandas-feather = %{version}}} +BuildRequires: %{python_module pandas-fss = %{version}} +BuildRequires: %{python_module pandas-hdf5 = %{version}} +BuildRequires: %{python_module pandas-html = %{version}} +BuildRequires: %{python_module pandas-mysql = %{version}} +BuildRequires: %{python_module pandas-output_formatting = %{version}} +%{?with_pyarrow:BuildRequires: %{python_module pandas-parquet = %{version}}} +BuildRequires: %{python_module pandas-performance = %{version}} +BuildRequires: %{python_module pandas-plot = %{version}} +BuildRequires: %{python_module pandas-postgresql = %{version}} +%{?with_pyarrow:BuildRequires: %{python_module pandas-pyarrow = %{version}}} +BuildRequires: %{python_module pandas-spss = %{version}} +BuildRequires: %{python_module pandas-sql-other = %{version}} +BuildRequires: %{python_module pandas-xml = %{version}} +BuildRequires: xclip +%{?with_aws:BuildRequires: %{python_module pandas-aws = %{version}}} +%{?with_gcp:BuildRequires: %{python_module pandas-gcp = %{version}}} +%{?with_consortium_standard:BuildRequires: %{python_module pandas-consortium-standard = %{version}}} +%endif +%endif +%python_subpackages + +%description +Pandas is a Python package providing data structures designed for +working with structured (tabular, multidimensional, potentially +heterogeneous) and time series data. It is a high-level building +block for doing data analysis in Python. + +%package test +Summary: The python pandas[test] extra +Requires: python-hypothesis >= 6.46.1 +Requires: python-pandas = %{version} +Requires: python-pytest >= 7.3.2 +Requires: python-pytest-xdist >= 2.2.0 +BuildArch: noarch + +%description test +This package provides the [test] extra for python-pandas + +%package pyarrow +Summary: The python pandas[pyarrow] extra +Requires: python-pandas = %{version} +Requires: python-pyarrow >= 10.0.1 +BuildArch: noarch + +%description pyarrow +This package provides the [pyarrow] extra for python-pandas + +%package performance +Summary: The python pandas[performance] extra +Requires: python-Bottleneck >= 1.3.6 +Requires: python-numba >= 0.56.4 +Requires: python-numexpr >= 2.8.4 +Requires: python-pandas = %{version} +BuildArch: noarch + +%description performance +This package provides the [performance] extra for python-pandas + +It is highly recommended to install this subpackage, as its dependencies +provide speed improvements, especially when working with large data sets. + +%package computation +Summary: The python pandas[computation] extra +Requires: python-pandas = %{version} +Requires: python-scipy >= 1.10.0 +Requires: python-xarray >= 2022.12.0 +BuildArch: noarch + +%description computation +This package provides the [computation] extra for python-pandas + +%package fss +Summary: The python pandas[fss] extra +Requires: python-fsspec >= 2022.11 +Requires: python-pandas = %{version} +BuildArch: noarch + +%description fss +This package provides the [fss] extra for python-pandas + +%package aws +Summary: The python pandas[aws] extra +Requires: python-pandas = %{version} +Requires: python-s3fs >= 2022.11 +BuildArch: noarch + +%description aws +This package provides the [aws] extra for python-pandas + +%package gcp +Summary: The python pandas[gcp] extra +Requires: python-gcsfs >= 2022.11 +Requires: python-pandas = %{version} +Requires: python-pandas-gbq >= 0.19.0 +BuildArch: noarch + +%description gcp +This package provides the [gcp] extra for python-pandas + +%package excel +Summary: The python pandas[excel] extra +Requires: python-odfpy >= 1.4.1 +Requires: python-openpyxl >= 3.1.0 +Requires: python-pandas = %{version} +%{?with_xlsb:Requires: python-pyxlsb >= 1.0.10} +Requires: python-XlsxWriter >= 3.0.5 +Requires: python-xlrd >= 2.0.1 +%{?with_calamine:Requires: python-calamine >= 0.1.7} +BuildArch: noarch + +%description excel +This package provides the [excel] extra for python-pandas. +(Except for pyxlsb and calamine which are not available as openSUSE rpm package) + +%package parquet +Summary: The python pandas[parquet] extra +Requires: python-pandas = %{version} +Requires: python-pyarrow >= 10.0.1 +BuildArch: noarch + +%description parquet +This package provides the [parquet] extra for python-pandas + +%package feather +Summary: The python pandas[feather] extra +Requires: python-pandas = %{version} +Requires: python-pyarrow >= 10.0.1 +BuildArch: noarch + +%description feather +This package provides the [feather] extra for python-pandas + +%package hdf5 +Summary: The python pandas[hdf5] extra +Requires: python-blosc +Requires: python-pandas = %{version} +Requires: python-tables >= 3.8.0 +BuildArch: noarch + +%description hdf5 +This package provides the [hdf5] extra for python-pandas + +%package spss +Summary: The python pandas[spss] extra +Requires: python-pandas = %{version} +Requires: python-pyreadstat >= 1.2.0 +BuildArch: noarch + +%description spss +This package provides the [spss] extra for python-pandas + +%package postgresql +Summary: The python pandas[postgresql] extra +Requires: python-SQLAlchemy >= 2.0.0 +Requires: python-pandas = %{version} +Requires: python-psycopg2 >= 2.9.6 +%{?with_adbc:Requires: python-adbc-driver-postgresql >= 0.8} +BuildArch: noarch + +%description postgresql +This package provides the [postgresql] extra for python-pandas + +%package mysql +Summary: The python pandas[mysql] extra +Requires: python-PyMySQL >= 1.0.2 +Requires: python-SQLAlchemy >= 2.0.0 +Requires: python-pandas = %{version} +BuildArch: noarch + +%description mysql +This package provides the [mysql] extra for python-pandas + +%package sql-other +Summary: The python pandas[sql-other] extra +Requires: python-SQLAlchemy >= 2.0.0 +%{?with_adbc:Requires: python-adbc-driver-postgresql >= 0.8} +%{?with_adbc:Requires: python-adbc-driver-sqlite >= 0.8} +Requires: python-pandas = %{version} +BuildArch: noarch + +%description sql-other +This package provides the [sql-other] extra for python-pandas + +%package html +Summary: The python pandas[html] extra +Requires: python-beautifulsoup4 >= 4.11.2 +Requires: python-html5lib >= 1.1 +Requires: python-lxml >= 4.9.2 +Requires: python-pandas = %{version} +BuildArch: noarch + +%description html +This package provides the [html] extra for python-pandas + +%package xml +Summary: The python pandas[xml] extra +Requires: python-lxml >= 4.9.2 +Requires: python-pandas = %{version} +BuildArch: noarch + +%description xml +This package provides the [xml] extra for python-pandas + +%package plot +Summary: The python pandas[plot] extra +Requires: python-matplotlib >= 3.6.3 +Requires: python-pandas = %{version} +BuildArch: noarch + +%description plot +This package provides the [plot] extra for python-pandas + +%package output_formatting +Summary: The python pandas[output_formatting] extra +Requires: python-Jinja2 >= 3.1.2 +Requires: python-pandas = %{version} +Requires: python-tabulate >= 0.9.0 +BuildArch: noarch + +%description output_formatting +This package provides the [output_formatting] extra for python-pandas + +%package clipboard +Summary: The python pandas[clipboard] extra +Requires: python-PyQt5 >= 5.15.9 +Requires: python-QtPy >= 2.3.0 +Requires: python-pandas = %{version} +BuildArch: noarch + +%description clipboard +This package provides the [clipboard] extra for python-pandas + +%package compression +Summary: The python pandas[compression] extra +Requires: python-pandas = %{version} +Requires: python-zstandard >= 0.19.0 +BuildArch: noarch + +%description compression +This package provides the [compression] extra for python-pandas + +%package consortium-standard +Summary: The python pandas[consortium-standard] extra +Requires: python-dataframe-api-compat >= 0.1.7 +Requires: python-pandas = %{version} +BuildArch: noarch + +%description consortium-standard +This package provides the [consortium-standard] extra for python-pandas + +%package all +Summary: The python pandas[all] extra +Requires: python-Bottleneck >= 1.3.6 +Requires: python-Jinja2 >= 3.1.2 +Requires: python-PyMySQL >= 1.0.2 +Requires: python-PyQt5 >= 5.15.9 +Requires: python-QtPy >= 2.3.0 +Requires: python-SQLAlchemy >= 2 +Requires: python-XlsxWriter >= 3.0.5 +Requires: python-beautifulsoup4 >= 4.11.2 +%{?with_adbc:Requires: python-adbc-driver-postgresql >= 0.8} +%{?with_adbc:Requires: python-adbc-driver-sqlite >= 0.8} +Requires: python-blosc +%{?with_calamine:Requires: python-calamine >= 0.1.7} +%{?with_pyarrow:Requires: python-fastparquet >= 2022.12} +Requires: python-fsspec >= 2022.11 +Requires: python-gcsfs >= 2022.11 +Requires: python-html5lib >= 1.1 +Requires: python-hypothesis >= 6.46.1 +Requires: python-lxml >= 4.9.2 +Requires: python-matplotlib >= 3.6.3 +Requires: python-numba >= 0.56.4 +Requires: python-numexpr >= 2.8.4 +Requires: python-odfpy >= 1.4.1 +Requires: python-openpyxl >= 3.1.0 +Requires: python-pandas = %{version} +Requires: python-psycopg2 >= 2.9.6 +%{?with_pyarrow:Requires: python-pyarrow >= 10.0.1} +Requires: python-pyreadstat >= 1.2.0 +Requires: python-pytest >= 7.3.2 +Requires: python-pytest-xdist >= 2.2.0 +Requires: python-scipy >= 1.10.0 +Requires: python-tables >= 3.8.0 +Requires: python-tabulate >= 0.9 +Requires: python-xarray >= 2022.12 +Requires: python-xlrd >= 2.0.1 +Requires: python-zstandard >= 0.19.0 +%{?with_aws:Requires: python-s3fs >= 2022.05.0} +%{?with_gcp:Requires: python-pandas-gbq >= 0.19} +%{?with_xslb:Requires: python-pyxlsb >= 1.0.10} +%{?with_consortium_standard: Requires: python-dataframe-api-compat >= 0.1.7} +BuildArch: noarch + +%description all +This package provides most the [all] extra for python-pandas + +Some requirements defined in the PyPI package are left out +because they are not available as openSUSE RPM packages: + + * pandas-gbq + * pyxlsb + * s3fs + * dataframe-api-compat + * adbc-driver-postgresql + * adbc-driver-sqlite + * calamine + +You can install them directly through `pip%{python_bin_suffix} install --user`, if needed. + +%prep +# ATTENTION: unpack and generate _version_meson.py before any patches and modifications for a clean version +%setup -q -n pandas-%{version} +%if !%{with test} +# use the last one from the buildset: need versioneer installed +%python_expand genpython="%__$python" +${genpython} generate_version.py -o _version_meson.py +sed -i "s|'generate_version.py',|'${genpython}', 'generate_version.py',|" meson.build +# don't require the PyPI data only tzdata package, we use the timezone RPM package +sed -i '/dependencies = \[/,/\]/ {/tzdata.*>=/d}' pyproject.toml +%endif +%autopatch -p1 + +%build +%if !%{with test} +%{?gccver:export CXX=g++-%{gccver}} +%{?gccver:export CC=gcc-%{gccver}} +export CFLAGS="%{optflags} -fno-strict-aliasing" +%pyproject_wheel +%endif + +%install +%if !%{with test} +%pyproject_install +%{python_expand # +find %{buildroot}%{$python_sitearch}/pandas/_libs -name '*.[ch]' -delete +sed -i -e '/.[ch],/d' %{buildroot}%{$python_sitearch}/pandas-%{version}.dist-info/RECORD +%fdupes %{buildroot}%{$python_sitearch} +} +%else +# Copy the installed package back into the source tree +# This is equivalent to build and install editable (pip install -e .), and the only way +# to have a passing test suite due to how the test collection works in pytest >= 7. +# Only works for separate python flavors in multibuild. +%python_expand cp -rf %{$python_sitearch}/pandas/* pandas/ +%endif + +%check +%if %{with test} +export LANG=en_US.UTF-8 +export LC_ALL=en_US.UTF-8 +export PYTHONDONTWRITEBYTECODE=1 +# Workaround for pytest-xdist flaky collection order +# https://github.com/pytest-dev/pytest/issues/920 +# https://github.com/pytest-dev/pytest/issues/1075 +export PYTHONHASHSEED=1 +# no network connection on obs +SKIP_MARKERS="network" +# clipboard not set up properly in build service without window manager +SKIP_MARKERS+=" or clipboard" +# skip tests which upstream marked for -n 1 only. +SKIP_MARKERS+=" or single_cpu" +# pytest-xdist worker crash +SKIP_TESTS="test_pivot_number_of_levels_larger_than_int32" +# no locally running database server +SKIP_TESTS+=" or psycopg2_engine or psycopg2_conn or pymysql_engine or pymysql_conn" +SKIP_TESTS+=" or test_psycopg2_schema_support" +SKIP_TESTS+=" or test_self_join_date_columns" +# expects a dirty git revision from git repo +SKIP_TESTS+=" or test_git_version" +%if "%{flavor}" == "test-py312" +# https://github.com/pandas-dev/pandas/pull/57391, proposed change is not necessarily the right one +SKIP_TESTS+=" or (test_scalar_unary and numexpr-pandas)" +%endif +# Numpy2: unexpected 'np.str_(...)' in error message +SKIP_TESTS+=" or test_group_subplot_invalid_column_name" + +%ifarch %{ix86} %{arm32} +# https://github.com/pandas-dev/pandas/issues/31856 +SKIP_TESTS+=" or test_maybe_promote_int_with_int" +# rounding error +SKIP_TESTS+=" or (test_rolling_quantile_interpolation_options and data1 and linear and 0.1)" +# overflow +SKIP_TESTS+=" or test_large_string_pyarrow" +SKIP_TESTS+=" or test_pandas_nullable_with_missing_values" +SKIP_TESTS+=" or test_pandas_nullable_without_missing_values" +SKIP_TESTS+=" or (test_to_datetime and TestOrigin and test_epoch)" +SKIP_TESTS+=" or test_td_mul_numeric_ndarray_0d" +SKIP_TESTS+=" or test_get_indexer_non_unique_wrong_dtype" +# pyarrow read-only errors +SKIP_TESTS+=" or test_left_join_multi_index" +SKIP_TESTS+=" or test_join_on_single_col_dup_on_right" +# dtype mismatch +SKIP_TESTS+=" or test_frame_setitem_dask_array_into_new_col" +SKIP_TESTS+=" or test_get_indexer_arrow_dictionary_target" +# numba formats not supported on 32-bit +SKIP_TESTS+=" or numba" +%endif +%ifarch %{ix86} +# overflows on i586 +SKIP_TESTS+=" or test_encode_non_c_locale" +# intp != int32 (still numpy 1)? +SKIP_TESTS+=" or test_ensure_platform_int" +# fails on i586 (was gcc10-skip-one-test.patch) +SKIP_TESTS+=" or test_merge_on_ints_floats_warning" +%endif +%ifarch ppc64 s390x +# big endian type issues +SKIP_TESTS+=" or test_astype" +SKIP_TESTS+=" or test_to_numpy_string" +SKIP_TESTS+=" or (test_construction and test_to_numpy)" +SKIP_TESTS+=" or test_to_records_index_name" +SKIP_TESTS+=" or test_to_records_dtype" +SKIP_TESTS+=" or test_to_records_dict_like" +SKIP_TESTS+=" or (test_c_parser_only and test_unsupported_dtype)" +SKIP_TESTS+=" or test_td_mul_td64_ndarray_invalid" +%endif +%ifnarch x86_64 +# type and numeric precision issues, partially reported for arm and marked xfail upstream but not for e.g. ppc +SKIP_TESTS+=" or (test_astype and test_subtype_integer_errors)" +SKIP_TESTS+=" or (test_to_numeric and test_downcast_nullable_numeric and data12-UInt64-signed-UInt64)" +SKIP_TESTS+=" or (test_rolling and test_rolling_var_numerical_issues)" +SKIP_TESTS+=" or (test_groupby and test_groupby_numerical_stability_sum_mean)" +SKIP_TESTS+=" or (test_groupby and test_groupby_numerical_stability_cumsum)" +SKIP_TESTS+=" or (test_c_parser_only and test_float_precision_options)" +# run the slow tests only on x86_64 +SKIP_MARKERS+=" or slow or db" +%endif + +# The test collection consumes a lot of memory per worker. This sets %%jobs. +%limit_build -m 3072 + +%{python_expand $python -c 'import pandas; print(pandas.__path__); print(pandas.show_versions())' +# cache: can't just say no cacheprovider, because one test checks for the --lf option of pytest-cache +xvfb-run pytest-%{$python_bin_suffix} -v -n %{jobs} -rsfE --dist=loadfile \ + -o cache_dir=$PWD/.pytest_cache --cache-clear \ + -m "not (${SKIP_MARKERS})" \ + -k "not (${SKIP_TESTS})" \ + pandas +} +%endif + +%if !%{with test} +%files %{python_files} +%license LICENSE +%doc README.md +%{python_sitearch}/pandas/ +%{python_sitearch}/pandas-%{version}.dist-info + +%files %{python_files test} +%license LICENSE +%doc README.md + +%if !%{with ringdisabled} +%files %{python_files pyarrow} +%license LICENSE +%doc README.md + +%files %{python_files performance} +%license LICENSE +%doc README.md + +%if 0%{python_version_nodots} >= 310 +%files %{python_files computation} +%license LICENSE +%doc README.md +%endif + +%files %{python_files fss} +%license LICENSE +%doc README.md + +%if %{with aws} +%files %{python_files aws} +%license LICENSE +%doc README.md +%endif + +%if %{with gcp} +%files %{python_files gcp} +%license LICENSE +%doc README.md +%endif + +%files %{python_files excel} +%license LICENSE +%doc README.md + +%if %{with pyarrow} +%files %{python_files parquet} +%license LICENSE +%doc README.md +%endif + +%if %{with pyarrow} +%files %{python_files feather} +%license LICENSE +%doc README.md +%endif + +%files %{python_files hdf5} +%license LICENSE +%doc README.md + +%files %{python_files spss} +%license LICENSE +%doc README.md + +%files %{python_files postgresql} +%license LICENSE +%doc README.md + +%files %{python_files mysql} +%license LICENSE +%doc README.md + +%files %{python_files sql-other} +%license LICENSE +%doc README.md + +%files %{python_files html} +%license LICENSE +%doc README.md + +%files %{python_files xml} +%license LICENSE +%doc README.md + +%files %{python_files plot} +%license LICENSE +%doc README.md + +%files %{python_files output_formatting} +%license LICENSE +%doc README.md + +%files %{python_files clipboard} +%license LICENSE +%doc README.md + +%files %{python_files compression} +%license LICENSE +%doc README.md + +%if %{with consortium_standard} +%files %{python_files consortium-standard} +%license LICENSE +%doc README.md +%endif + +%files %{python_files all} +%license LICENSE +%doc README.md +%endif +%endif + +%changelog