From bd58ced90568bf3c63cc80e88190896e36258104521cfb522393dd6d37221a42 Mon Sep 17 00:00:00 2001 From: Benjamin Greiner Date: Fri, 8 Sep 2023 07:19:13 +0000 Subject: [PATCH] Accepting request 1109687 from home:bnavigator:branches:devel:languages:python:numeric MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update to 13.0.0 ## Compatibility notes: * The default format version for Parquet has been bumped from 2.4 to 2.6 GH-35746. In practice, this means that nanosecond timestamps now preserve its resolution instead of being converted to microseconds. * Support for Python 3.7 is dropped GH-34788 ## New features: * Conversion to non-nano datetime64 for pandas >= 2.0 is now supported GH-33321 * Write page index is now supported GH-36284 * Bindings for reading JSON format in Dataset are added GH-34216 * keys_sorted property of MapType is now exposed GH-35112 ## Other improvements: * Common python functionality between Table and RecordBatch classes has been consolidated ( GH-36129, GH-35415, GH-35390, GH-34979, GH-34868, GH-31868) * Some functionality for FixedShapeTensorType has been improved (__reduce__ GH-36038, picklability GH-35599) * Pyarrow scalars can now be accepted in the array constructor GH-21761 * DataFrame Interchange Protocol implementation and usage is now documented GH-33980 * Conversion between Arrow and Pandas for map/pydict now has enhanced support GH-34729 * Usability of pc.map_lookup / MapLookupOptions is improved GH-36045 * zero_copy_only keyword can now also be accepted in ChunkedArray.to_numpy() GH-34787 * Python C++ codebase now has linter support in Archery and the CI GH-35485 ## Relevant bug fixes: * __array__ numpy conversion for Table and RecordBatch is now corrected so that np.asarray(pa.Table) doesn’t return a transposed result GH-34886 * parquet.write_to_dataset doesn’t create empty files for non-observed dictionary (category) values anymore GH-23870 * Dataset writer now also correctly follows default Parquet version of 2.6 GH-36537 * Comparing pyarrow.dataset.Partitioning with other type is now correctly handled GH-36659 * Pickling of pyarrow.dataset PartitioningFactory objects is now supported GH-34884 * None schema is now disallowed in parquet writer GH-35858 * pa.FixedShapeTensorArray.to_numpy_ndarray is not failing on sliced arrays GH-35573 * Halffloat type is now supported in the conversion from Arrow list to pandas GH-36168 * __from_arrow__ is now also implemented for Array.to_pandas for pandas extension data types GH-36096 - Add pyarrow-pr37481-pandas2.1.patch gh#apache/arrow#37481 OBS-URL: https://build.opensuse.org/request/show/1109687 OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:numeric/python-pyarrow?expand=0&rev=13 --- apache-arrow-12.0.1.tar.gz | 3 -- apache-arrow-13.0.0.tar.gz | 3 ++ pyarrow-pr37481-pandas2.1.patch | 32 +++++++++++++++++++ python-pyarrow.changes | 55 +++++++++++++++++++++++++++++++++ python-pyarrow.rpmlintrc | 4 +-- python-pyarrow.spec | 6 ++-- 6 files changed, 95 insertions(+), 8 deletions(-) delete mode 100644 apache-arrow-12.0.1.tar.gz create mode 100644 apache-arrow-13.0.0.tar.gz create mode 100644 pyarrow-pr37481-pandas2.1.patch diff --git a/apache-arrow-12.0.1.tar.gz b/apache-arrow-12.0.1.tar.gz deleted file mode 100644 index f0a0304..0000000 --- a/apache-arrow-12.0.1.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f01b76a42ceb30409e7b1953ef64379297dd0c08502547cae6aaafd2c4a4d92e -size 19854518 diff --git a/apache-arrow-13.0.0.tar.gz b/apache-arrow-13.0.0.tar.gz new file mode 100644 index 0000000..271e4eb --- /dev/null +++ b/apache-arrow-13.0.0.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99c27e6a517c750f29c3e6b264836e31251bb8e978dbbf11316680ca3eb8ebda +size 20216422 diff --git a/pyarrow-pr37481-pandas2.1.patch b/pyarrow-pr37481-pandas2.1.patch new file mode 100644 index 0000000..aca2341 --- /dev/null +++ b/pyarrow-pr37481-pandas2.1.patch @@ -0,0 +1,32 @@ +From e454be85598b84631f41f8fd33afcff24cec5578 Mon Sep 17 00:00:00 2001 +From: Dane Pitkin +Date: Wed, 30 Aug 2023 16:36:29 -0400 +Subject: [PATCH] Bump pandas version that contains regression for pandas issue + 50127 + +--- + python/pyarrow/tests/test_pandas.py | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py +index ef6ddd09933c9..67502af443302 100644 +--- a/python/pyarrow/tests/test_pandas.py ++++ b/python/pyarrow/tests/test_pandas.py +@@ -457,7 +457,7 @@ def test_mixed_column_names(self): + preserve_index=True) + + def test_binary_column_name(self): +- if Version("2.0.0") <= Version(pd.__version__) < Version("2.1.0"): ++ if Version("2.0.0") <= Version(pd.__version__) < Version("2.2.0"): + # TODO: regression in pandas, hopefully fixed in next version + # https://issues.apache.org/jira/browse/ARROW-18394 + # https://github.com/pandas-dev/pandas/issues/50127 +@@ -3083,7 +3083,7 @@ def _fully_loaded_dataframe_example(): + + @pytest.mark.parametrize('columns', ([b'foo'], ['foo'])) + def test_roundtrip_with_bytes_unicode(columns): +- if Version("2.0.0") <= Version(pd.__version__) < Version("2.1.0"): ++ if Version("2.0.0") <= Version(pd.__version__) < Version("2.2.0"): + # TODO: regression in pandas, hopefully fixed in next version + # https://issues.apache.org/jira/browse/ARROW-18394 + # https://github.com/pandas-dev/pandas/issues/50127 diff --git a/python-pyarrow.changes b/python-pyarrow.changes index 6c700a0..4ed430e 100644 --- a/python-pyarrow.changes +++ b/python-pyarrow.changes @@ -1,3 +1,58 @@ +------------------------------------------------------------------- +Thu Aug 31 18:43:55 UTC 2023 - Ben Greiner + +- Update to 13.0.0 + ## Compatibility notes: + * The default format version for Parquet has been bumped from 2.4 + to 2.6 GH-35746. In practice, this means that nanosecond + timestamps now preserve its resolution instead of being + converted to microseconds. + * Support for Python 3.7 is dropped GH-34788 + ## New features: + * Conversion to non-nano datetime64 for pandas >= 2.0 is now + supported GH-33321 + * Write page index is now supported GH-36284 + * Bindings for reading JSON format in Dataset are added GH-34216 + * keys_sorted property of MapType is now exposed GH-35112 + ## Other improvements: + * Common python functionality between Table and RecordBatch + classes has been consolidated ( GH-36129, GH-35415, GH-35390, + GH-34979, GH-34868, GH-31868) + * Some functionality for FixedShapeTensorType has been improved + (__reduce__ GH-36038, picklability GH-35599) + * Pyarrow scalars can now be accepted in the array constructor + GH-21761 + * DataFrame Interchange Protocol implementation and usage is now + documented GH-33980 + * Conversion between Arrow and Pandas for map/pydict now has + enhanced support GH-34729 + * Usability of pc.map_lookup / MapLookupOptions is improved + GH-36045 + * zero_copy_only keyword can now also be accepted in + ChunkedArray.to_numpy() GH-34787 + * Python C++ codebase now has linter support in Archery and the + CI GH-35485 + ## Relevant bug fixes: + * __array__ numpy conversion for Table and RecordBatch is now + corrected so that np.asarray(pa.Table) doesn’t return a + transposed result GH-34886 + * parquet.write_to_dataset doesn’t create empty files for + non-observed dictionary (category) values anymore GH-23870 + * Dataset writer now also correctly follows default Parquet + version of 2.6 GH-36537 + * Comparing pyarrow.dataset.Partitioning with other type is now + correctly handled GH-36659 + * Pickling of pyarrow.dataset PartitioningFactory objects is now + supported GH-34884 + * None schema is now disallowed in parquet writer GH-35858 + * pa.FixedShapeTensorArray.to_numpy_ndarray is not failing on + sliced arrays GH-35573 + * Halffloat type is now supported in the conversion from Arrow + list to pandas GH-36168 + * __from_arrow__ is now also implemented for Array.to_pandas for + pandas extension data types GH-36096 +- Add pyarrow-pr37481-pandas2.1.patch gh#apache/arrow#37481 + ------------------------------------------------------------------- Fri Aug 25 12:52:17 UTC 2023 - Ben Greiner diff --git a/python-pyarrow.rpmlintrc b/python-pyarrow.rpmlintrc index 33ca7eb..fc787ae 100644 --- a/python-pyarrow.rpmlintrc +++ b/python-pyarrow.rpmlintrc @@ -1,4 +1,2 @@ -# header files in arch dependent sitearch -addFilter("devel.* no-binary") # empty but necessary module file -addFilter("zero-length .*__init__") \ No newline at end of file +addFilter("zero-length .*__init__") diff --git a/python-pyarrow.spec b/python-pyarrow.spec index 04d5216..821756e 100644 --- a/python-pyarrow.spec +++ b/python-pyarrow.spec @@ -19,7 +19,7 @@ %bcond_with xsimd %define plainpython python Name: python-pyarrow -Version: 12.0.1 +Version: 13.0.0 Release: 0 Summary: Python library for Apache Arrow License: Apache-2.0 AND BSD-3-Clause AND BSD-2-Clause AND MIT @@ -27,8 +27,10 @@ Group: Development/Languages/Python URL: https://arrow.apache.org/ Source0: https://github.com/apache/arrow/archive/apache-arrow-%{version}.tar.gz Source99: python-pyarrow.rpmlintrc +# PATCH-FIX-UPSTREAM pyarrow-pr37481-pandas2.1.patch gh#apache/arrow#37481 +Patch0: pyarrow-pr37481-pandas2.1.patch BuildRequires: %{python_module Cython >= 0.29.31 with %python-Cython < 3} -BuildRequires: %{python_module devel} +BuildRequires: %{python_module devel >= 3.8} BuildRequires: %{python_module numpy-devel >= 1.16.6} BuildRequires: %{python_module pip} BuildRequires: %{python_module setuptools_scm}