From 1c13c94cc4dc2bf76f4fcc4d2e70d58283c98977 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Schr=C3=B6ter?= Date: Fri, 5 Jan 2024 10:05:27 +0100 Subject: [PATCH] Sync from SUSE:ALP:Source:Standard:1.0 python-pyarrow revision 6ec570a7419e065d40c7d80e89e12abd --- .gitattributes | 23 +++ _constraints | 11 ++ apache-arrow-14.0.1.tar.gz | 3 + python-pyarrow.changes | 333 +++++++++++++++++++++++++++++++++++++ python-pyarrow.rpmlintrc | 2 + python-pyarrow.spec | 163 ++++++++++++++++++ 6 files changed, 535 insertions(+) create mode 100644 .gitattributes create mode 100644 _constraints create mode 100644 apache-arrow-14.0.1.tar.gz create mode 100644 python-pyarrow.changes create mode 100644 python-pyarrow.rpmlintrc create mode 100644 python-pyarrow.spec diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..fecc750 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/_constraints b/_constraints new file mode 100644 index 0000000..dc3d920 --- /dev/null +++ b/_constraints @@ -0,0 +1,11 @@ + + + + 9 + + + 9 + + + + diff --git a/apache-arrow-14.0.1.tar.gz b/apache-arrow-14.0.1.tar.gz new file mode 100644 index 0000000..0f6665f --- /dev/null +++ b/apache-arrow-14.0.1.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a48e54a09d58168bc04d86b13e7dab04f0aaba18a6f7e4dadf3e9c7bb835c8f1 +size 20634558 diff --git a/python-pyarrow.changes b/python-pyarrow.changes new file mode 100644 index 0000000..04548c7 --- /dev/null +++ b/python-pyarrow.changes @@ -0,0 +1,333 @@ +------------------------------------------------------------------- +Tue Nov 14 23:29:03 UTC 2023 - Ondřej Súkup + +- Fix cve in changelog + +------------------------------------------------------------------- +Tue Nov 14 09:28:23 UTC 2023 - Ondřej Súkup + +- Update to 14.0.1 +- drop pyarrow-pr37481-pandas2.1.patch +- fixes boo#1216991 CVE-2023-47248 + * GH-38431 - [Python][CI] Update fs.type_name checks for s3fs tests + * GH-38607 - [Python] Disable PyExtensionType autoload +- update to 14.0.0 + * very long list of changes can be found here: + https://arrow.apache.org/release/14.0.0.html + +------------------------------------------------------------------- +Thu Aug 31 18:43:55 UTC 2023 - Ben Greiner + +- Update to 13.0.0 + ## Compatibility notes: + * The default format version for Parquet has been bumped from 2.4 + to 2.6 GH-35746. In practice, this means that nanosecond + timestamps now preserve its resolution instead of being + converted to microseconds. + * Support for Python 3.7 is dropped GH-34788 + ## New features: + * Conversion to non-nano datetime64 for pandas >= 2.0 is now + supported GH-33321 + * Write page index is now supported GH-36284 + * Bindings for reading JSON format in Dataset are added GH-34216 + * keys_sorted property of MapType is now exposed GH-35112 + ## Other improvements: + * Common python functionality between Table and RecordBatch + classes has been consolidated ( GH-36129, GH-35415, GH-35390, + GH-34979, GH-34868, GH-31868) + * Some functionality for FixedShapeTensorType has been improved + (__reduce__ GH-36038, picklability GH-35599) + * Pyarrow scalars can now be accepted in the array constructor + GH-21761 + * DataFrame Interchange Protocol implementation and usage is now + documented GH-33980 + * Conversion between Arrow and Pandas for map/pydict now has + enhanced support GH-34729 + * Usability of pc.map_lookup / MapLookupOptions is improved + GH-36045 + * zero_copy_only keyword can now also be accepted in + ChunkedArray.to_numpy() GH-34787 + * Python C++ codebase now has linter support in Archery and the + CI GH-35485 + ## Relevant bug fixes: + * __array__ numpy conversion for Table and RecordBatch is now + corrected so that np.asarray(pa.Table) doesn’t return a + transposed result GH-34886 + * parquet.write_to_dataset doesn’t create empty files for + non-observed dictionary (category) values anymore GH-23870 + * Dataset writer now also correctly follows default Parquet + version of 2.6 GH-36537 + * Comparing pyarrow.dataset.Partitioning with other type is now + correctly handled GH-36659 + * Pickling of pyarrow.dataset PartitioningFactory objects is now + supported GH-34884 + * None schema is now disallowed in parquet writer GH-35858 + * pa.FixedShapeTensorArray.to_numpy_ndarray is not failing on + sliced arrays GH-35573 + * Halffloat type is now supported in the conversion from Arrow + list to pandas GH-36168 + * __from_arrow__ is now also implemented for Array.to_pandas for + pandas extension data types GH-36096 +- Add pyarrow-pr37481-pandas2.1.patch gh#apache/arrow#37481 + +------------------------------------------------------------------- +Fri Aug 25 12:52:17 UTC 2023 - Ben Greiner + +- Limit to Cython < 3 + +------------------------------------------------------------------- +Mon Jun 12 12:22:31 UTC 2023 - Ben Greiner + +- Update to 12.0.1 + ## Bug Fixes + * [GH-35389] - [Python] Fix coalesce_keys=False option in join + operation (#35505) + * [GH-35821] - [Python][CI] Skip extension type test failing with + pandas 2.0.2 (#35822) + * [GH-35845] - [CI][Python] Fix usage of assert_frame_equal in + test_hdfs.py (#35842) + ## New Features and Improvements + * [GH-35329] - [Python] Address pandas.types.is_sparse deprecation + (#35366) +- Drop pyarrow-pr35822-pandas2-extensiontype.patch + +------------------------------------------------------------------- +Wed Jun 7 07:39:44 UTC 2023 - Ben Greiner + +- Skip invalid pandas 2 test + * pyarrow-pr35822-pandas2-extensiontype.patch + * gh#apache/arrow#35822 + * gh#apache/arrow#35839 + +------------------------------------------------------------------- +Thu May 18 07:28:28 UTC 2023 - Ben Greiner + +- Update to 12.0.0 + ## Compatibility notes: + * Plasma has been removed in this release (GH-33243). In + addition, the deprecated serialization module in PyArrow was + also removed (GH-29705). IPC (Inter-Process Communication) + functionality of pyarrow or the standard library pickle should + be used instead. + * The deprecated use_async keyword has been removed from the + dataset module (GH-30774) + * Minimum Cython version to build PyArrow from source has been + raised to 0.29.31 (GH-34933). In addition, PyArrow can now be + compiled using Cython 3 (GH-34564). + ## New features: + * A new pyarrow.acero module with initial bindings for the Acero + execution engine has been added (GH-33976) + * A new canonical extension type for fixed shaped tensor data has + been defined. This is exposed in PyArrow as the + FixedShapeTensorType (GH-34882, GH-34956) + * Run-End Encoded arrays binding has been implemented (GH-34686, + GH-34568) + * Method is_nan has been added to Array, ChunkedArray and + Expression (GH-34154) + * Dataframe interchange protocol has been implemented for + RecordBatch (GH-33926) + ## Other improvements: + * Extension arrays can now be concatenated (GH-31868) + * get_partition_keys helper function is implemented in the + dataset module to access the partitioning field’s key/value + from the partition expression of a certain dataset fragment + (GH-33825) + * PyArrow Array objects can now be accepted by the pa.array() + constructor (GH-34411) + * The default row group size when writing parquet files has been + changed (GH-34280) + * RecordBatch has the select() method implemented (GH-34359) + * New method drop_column on the pyarrow.Table supports passing a + single column as a string (GH-33377) + * User-defined tabular functions, which are a user-functions + implemented in Python that return a stateful stream of tabular + data, are now also supported (GH-32916) + * Arrow Archery tool now includes linting of the Cython files + (GH-31905) + * Breaking Change: Reorder output fields of “group_by” node so + that keys/segment keys come before aggregates (GH-33616) + ## Relevant bug fixes: + * Acero can now detect and raise an error in case a join + operation needs too much bytes of key data (GH-34474) + * Fix for converting non-sequence object in pa.array() (GH-34944) + * Fix erroneous table conversion to pandas if table includes an + extension array that does not implement to_pandas_dtype + (GH-34906) + * Reading from a closed ArrayStreamBatchReader now returns + invalid status instead of segfaulting (GH-34165) + * array() now returns pyarrow.Array and not pyarrow.ChunkedArray + for columns with __arrow_array__ method and only one chunk so + that the conversion of pandas dataframe with categorical column + of dtype string[pyarrow] does not fail (GH-33727) + * Custom type mapper in to_pandas now converts index dtypes + together with column dtypes (GH-34283) + +------------------------------------------------------------------- +Wed Mar 29 13:25:55 UTC 2023 - Ben Greiner + +- Fix tests expecting the jemalloc backend which was disabled in + the apache-arrow package + +------------------------------------------------------------------- +Sun Mar 12 05:31:32 UTC 2023 - Ben Greiner + +- Update to v11.0.0 + * [Python][Doc] Add five more numpydoc checks to CI (#15214) + * [Python][CI][Doc] Enable numpydoc check PR03 (#13983) + * [Python] Expose flag to enable/disable storing Arrow schema in Parquet metadata (#13000) + * [Python] Add support for reading record batch custom metadata API (#13041) + * [Python] Add lazy Dataset.filter() method (#13409) + * [Python] ParquetDataset to still take legacy code path when old filesystem is passed (#15269) + * [Python] Switch default and deprecate use_legacy_dataset=True in ParquetDataset (#14052) + * [Python] Support lazy Dataset.filter + * [Python] Order of columns in pyarrow.feather.read_table (#14528) + * [Python] Construct MapArray from sequence of dicts (instead of list of tuples) (#14547) + * [Python] Unify CMakeLists.txt in python/ (#14925) + * [C++][Python] Implement list_slice kernel (#14395) + * [C++][Python] Enable struct_field kernel to accept string field names (#14495) + * [Python][C++] Add use\_threads to run\_substrait\_query + * [Python][Docs] adding info about TableGroupBy.aggregation with empty list (#14482) + * [Python] DataFrame Interchange Protocol for pyarrow Table + * [Python] Drop older versions of Pandas (<1.0) (#14631) + * [Python] Pass Cmake args to Python CPP + * [Docs][Python] Improve docs for S3FileSystem (#14599) + * [Python] Add missing value accessor to temporal types (#14746) + * [Python] Expose time32/time64 scalar values (#14637) + * [Python] Remove gcc 4.9 compatibility code (#14602) + * [C++][Python] Support slicing to end in list_slice kernel (#14749) + * [C++][Python] Support step >= 1 in list_slice kernel (#14696) + * [Release][Python] Upload .wheel/.tar.gz for release not RC (#14708) + * [Python] Expose Scalar.validate() (#15149) + * [Python] PyArrow C++ header files no longer always included in installed pyarrow (#14656) + * [Doc][Python] Update note about bundling Arrow C++ on Windows (#14660) + * [Python] Reduce warnings during tests (#14729) + * [Python] Expose reading a schema from an IPC message (#14831) + * [Python] Expose QuotingStyle to Python (#14722) + * [Python] Add (Chunked)Array sort() method (#14781) + * [Python] Dataset.sort_by (#14976) + * [Python] Avoid dependency on exec plan in Table.sort_by to fix minimal tests (#15268) + * [Python] Remove auto generated pyarrow_api.h and pyarrow_lib.h (#15219) + * [Python] Error if datetime.timedelta to pyarrow.duration conversion overflows (#13718) + * [Python] to_pandas fails with FixedOffset timezones when timestamp_as_object is used (#14448) + * [Python] Pass **kwargs in read_feather to to_pandas() (#14492) + * [Python] Add python test for decimals to csv (#14525) + * [Python] Test that reading of timedelta is stable (read_feather/to_pandas) (#14531) + * [C++][Python] Improve s3fs error message when wrong region (#14601) + * [Python][C++] Adding support for IpcWriteOptions to the dataset ipc file writer (#14414) + * [Python] Support passing create_dir thru pq.write_to_dataset (#14459) + * [CI][Python] Fix pandas master/nightly build failure related to timedelta (#14460) + * [Python] Fix writing files with multi-byte characters in file name (#14764) + * [Python] Handle pytest 8 deprecations about pytest.warns(None) + * [Python] Remove ARROW_BUILD_DIR in building pyarrow C++ (#14498) + * [Python] Honor default memory pool in Dataset scanning (#14516) + * [Python] Fully support filesystem in parquet.write_metadata (#14574) + * [Python] Check schema argument type in RecordBatchReader.from_batches (#14583) + * [Python][Docs] PyArrow table join docstring typos for left and right suffix arguments (#14591) + * [Python] pass back time types with correct type class (#14633) + * [Python] Support filesystem parameter in ParquetFile (#14717) + * [Python][Docs] Add missing CMAKE_PREFIX_PATH to allow setup.py CMake invocations to find Arrow CMake package (#14586) + * [Python][CI] Add DYLD_LIBRARY_PATH to avoid requiring PYARROW_BUNDLE_ARROW_CPP on macOS job (#14643) + * [Python] Don't crash when schema=None in FlightClient.do_put (#14698) + * [Python] Change warnings to _warnings in _plasma_store_entry_point (#14695) + * [CI][Python] Update nightly test-conda-python-3.7-pandas-0.24 to pandas >= 1.0 (#14714) + * [CI][Python] Update spark test modules to match spark master (#14715) + * [Python] Fix test_s3fs_wrong_region; set anonymous=True (#14716) + * [Python][CI] Fix nightly job using pandas dev (temporarily skip tests) (#15048) + * [Python] Quadratic memory usage of Table.to\_pandas with nested data + * [Python] Fix pyarrow.get_libraries() order (#14944) + * [Python] Fix segfault for dataset ORC write (#15049) + * [Python][Docs] Update docstring for pyarrow.decompress (#15061) + * [Python][CI] Dask nightly tests are failing due to fsspec bug (#15065) + * [C++][Python][FlightRPC] Make DoAction truly streaming (#15118) + * [Benchmarking][Python] Set ARROW_INSTALL_NAME_RPATH=ON for benchmark builds (#15123) + * [Python][macOS] Use `@rpath` for libarrow_python.dylib (#15143) + * [Python] Docstring test failure (#15186) + * [Python] Don't use target_include_directories() for imported target (#33606) + * [Python] Make CSV cancellation test more robust + * [Python][CI] Python sdist installation fails with latest setuptools 58.5 + * [Python] Missing bindings for existing\_data\_behavior makes it impossible to maintain old behavior + * [Python] update trove classifiers to include Python 3.10 + * [Release][Python] Use python -m pytest + * [Python][C++] Non-deterministic segfault in "AMD64 MacOS 10.15 Python 3.7" build + * [Python][Doc] Clarify what should be expected if read_table is passed an empty list of columns + * [Python][Packaging] Set deployment target to 10.13 for universal2 wheels + * [Python] Fix crash in take/filter of empty ExtensionArray + * [Python] Move marks from fixtures to individual tests/params + * [Python][CI] Requiring s3fs >= 2021.8 + * [Python] Allow writing datasets using a partitioning that only specifies field_names + * [Python] Table.from_arrays should raise an error when array is empty but names is not + * [Python][Packaging] Pin minimum setuptools version for the macos wheels + * [Python][Doc] Document nullable dtypes handling and usage of types_mapper in to_pandas conversion + * [C++][Python] Fix unique/value_counts on empty dictionary arrays + * [Python][CI] Fix tests using OrcFileFormat for Python 3.6 + orc not built + * [Python] Fix FlightClient.do_action + * [Python][Docs] Fix usage of sync scanner in dataset writing docs + * [Packaging][Python] Python 3.9 installation fails in macOS wheel build + * [CI][Python] Fix Spark integration failures + * [Python] Fix version constraints in pyproject.toml + * [Packaging][Python] Disable windows wheel testing for python 3.6 + * [Python][C++] Segfault with read\_json when a field is missing + * [Python] Support for set/list columns when converting from Pandas + * [Python] Support converting nested sets when converting to arrow + * [Python] Make filesystems compatible with fsspec + * [C++][Python][R] Consolidate coalesce/fill_null + * [Python][Doc] Document the fsspec wrapper for pyarrow.fs filesystems + * [Python] Support core-site.xml default filesystem. + * [Python] Improve HadoopFileSystem docstring + * [Python][Doc] Document missing pandas to arrow conversions + * [Python] Make SubTreeFileSystem print method more informative + * [Doc][Python] Improve documentation regarding dealing with memory mapped files + * [C++][Python] Implement a new scalar function: list_element + * [Python] Allow creating RecordBatch from Python dict + * [Python] Update HadoopFileSystem docs to clarify setting CLASSPATH env variable is required + * [Python] Improve documentation on what 'use_threads' does in 'read_feather' + * [C++][Python] Improve consistency of explicit C++ types in PyArrow files + * [Doc][Python] Improve PyArrow documentation for new users + * [C++][Python] Add CSV convert option to change decimal point + * [Python][Packaging] Build M1 wheels for python 3.8 + * [Release][Python] Verify python 3.8 macOS arm64 wheel + * [Doc][Python] Switch ipc/io doc to use context managers + * [Python] Mention alternative deprecation message for ParquetDataset.partitions + * [C++][Python] Implement ExtensionScalar + * [Packaging][Python] Skip test_cancellation test case on M1 + * [Python][FlightRPC] pyarrow client do_put close method after write_table did not throw flight error + * [Packaging][Python] Define --with-lg-page for jemalloc in the arm manylinux builds + * [Python] Fix docstrings + * [Python] Expose copy_files in pyarrow.fs + * [Doc][Python] Add a recipe on how to save partitioned datasets to the Cookbook + * [Python] Update deprecated pytest yield_fixture functions + * [Python] Support for MapType with Fields + * [Python][Docs] Improve filesystem documentation + * [Python] Add dataset mark to test_parquet_dataset_deprecated_properties + * . [Python] Preview data when printing tables + * [C++][Python] Column projection pushdown for ORC dataset reading + use liborc for column selection + * [C++][Python] Add support for new MonthDayNano Interval Type + * [Doc][Python] Add documentation for unify_schemas + * [C++][Python] Implement C data interface support for extension types + * [Python] Allow more than numpy.array as masks when creating arrays + * [Python] Correct TimestampScalar.as_py() and DurationScalar.as_py() docstrings + * [Python] Migrate Python ORC bindings to use new Result-based APIs + * [Python] Support tuples in unify_schemas + * [C++][Python] Not providing a sort_key in the "select_k_unstable" kernel crashes + * [C++][Python] Support cast of naive timestamps to strings + * [Python] Update kernel categories in compute doc to match C++ + * [C++][Python][R] Implement count distinct kernel + * [Python] Allow unsigned integer index type in dictionary() type factory function + * [Python] Missing Python tests for compute kernels + * [Python][CI] Add support for python 3.10 + * [C++][Python] Improve error message when trying use SyncScanner when requiring async + * [Python] Extend CompressedInputStream to work with paths, strings and files + * [Packaging][Python] Enable NEON SIMD optimization for M1 wheels + * [C++][Python] Use std::move() explicitly for g++ 4.8.5 + * [Python][Packaging] Use numpy 1.21.3 to build python 3.10 wheels for macOS and windows +- Build via PEP517 + +------------------------------------------------------------------- +Mon Aug 22 07:06:44 UTC 2022 - John Vandenberg + +- Update to v9.0.0 + +------------------------------------------------------------------- +Mon Jan 21 03:51:32 UTC 2019 - Todd R + +- Initial version for v0.13.0 diff --git a/python-pyarrow.rpmlintrc b/python-pyarrow.rpmlintrc new file mode 100644 index 0000000..91f1ca4 --- /dev/null +++ b/python-pyarrow.rpmlintrc @@ -0,0 +1,2 @@ +# empty but necessary module file +addFilter("zero-length .*__init__") diff --git a/python-pyarrow.spec b/python-pyarrow.spec new file mode 100644 index 0000000..4dfed96 --- /dev/null +++ b/python-pyarrow.spec @@ -0,0 +1,163 @@ +# +# spec file for package python-pyarrow +# +# Copyright (c) 2023 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +%bcond_with xsimd +%define plainpython python +Name: python-pyarrow +Version: 14.0.1 +Release: 0 +Summary: Python library for Apache Arrow +License: Apache-2.0 AND BSD-3-Clause AND BSD-2-Clause AND MIT +URL: https://arrow.apache.org/ +Source0: https://github.com/apache/arrow/archive/apache-arrow-%{version}.tar.gz +Source99: python-pyarrow.rpmlintrc +BuildRequires: %{python_module Cython >= 0.29.31 with %python-Cython < 3} +BuildRequires: %{python_module devel >= 3.8} +BuildRequires: %{python_module numpy-devel >= 1.16.6} +BuildRequires: %{python_module pip} +BuildRequires: %{python_module setuptools_scm} +BuildRequires: %{python_module setuptools} +BuildRequires: %{python_module wheel} +BuildRequires: apache-arrow-acero-devel-static = %{version} +BuildRequires: apache-arrow-dataset-devel-static = %{version} +BuildRequires: apache-arrow-devel = %{version} +BuildRequires: apache-arrow-devel-static = %{version} +BuildRequires: apache-parquet-devel = %{version} +BuildRequires: apache-parquet-devel-static = %{version} +BuildRequires: cmake +BuildRequires: fdupes +BuildRequires: gcc-c++ +BuildRequires: libzstd-devel-static +BuildRequires: openssl-devel +BuildRequires: pkgconfig +BuildRequires: python-rpm-macros +BuildRequires: cmake(re2) +BuildRequires: pkgconfig(bzip2) >= 1.0.8 +BuildRequires: pkgconfig(gmock) >= 1.10 +BuildRequires: pkgconfig(gtest) >= 1.10 +Requires: python-numpy >= 1.16.6 +# SECTION test requirements +BuildRequires: %{python_module hypothesis} +BuildRequires: %{python_module pandas} +BuildRequires: %{python_module pytest-lazy-fixture} +BuildRequires: %{python_module pytest-xdist} +BuildRequires: %{python_module pytest} +# /SECTION +%python_subpackages + +%description +Python library for Apache Arrow. + +Apache Arrow defines a language-independent columnar +memory format for flat and hierarchical data, organized +for efficient analytic operations on modern hardware like +CPUs and GPUs. The Arrow memory format also supports +zero-copy reads for lightning-fast data access without +serialization overhead. + +Arrow's libraries implement the format and provide building +blocks for a range of use cases, including high performance +analytics. Many popular projects use Arrow to ship columnar +data efficiently or as the basis for analytic engines. + +%package devel +Summary: Python library for Apache Arrow - header files +Requires: python-Cython +Requires: python-pyarrow = %{version} +Requires: %plainpython(abi) = %python_version +Supplements: (python-devel and python-pyarrow) + +%description devel +Python library for Apache Arrow. + +This package provides the header files within the python +platlib for consuming modules using cythonization. + +%prep +%autosetup -p1 -n arrow-apache-arrow-%{version} +# we disabled the jemalloc backend in apache-arrow +sed -i 's/should_have_jemalloc = sys.platform == "linux"/should_have_jemalloc = False/' python/pyarrow/tests/test_memory.py + +%build +pushd python +export CFLAGS="%{optflags}" +export PYARROW_BUILD_TYPE=relwithdebinfo +export PYARROW_BUILD_VERBOSE=1 +%{?_smp_build_ncpus:export PYARROW_PARALLEL=%{_smp_build_ncpus}} +export PYARROW_WITH_HDFS=1 +export PYARROW_WITH_DATASET=1 +export PYARROW_WITH_PARQUET=1 +export PYARROW_WITH_PARQUET_ENCRYPTION=0 +export PYARROW_PARQUET_USE_SHARED=1 +# x86_64-v1 does not have the advanced SIMD instructions. TW is stuck on it, we can't have -v3 through hwcaps as non-lib. +export PYARROW_CMAKE_OPTIONS=" \ +%ifarch aarch64 + -DARROW_SIMD_LEVEL:STRING=%{?with_xsimd:NEON}%{!?with_xsimd:NONE} \ +%else + -DARROW_SIMD_LEVEL:STRING="NONE" \ +%endif + -DARROW_RUNTIME_SIMD_LEVEL:STRING=%{?with_xsimd:MAX}%{!?with_xsimd:NONE} \ +" +%pyproject_wheel +popd + +%install +pushd python +%pyproject_install +%python_expand %fdupes %{buildroot}%{$python_sitearch} +popd + +%check +# Unexpected additional warning +donttest="test_env_var" +# flaky +donttest="$donttest or test_total_bytes_allocated" +%ifarch %{ix86} %{arm32} +# tests conversion to 64bit datatypes +donttest="$donttest or test_conversion" +donttest="$donttest or test_dictionary_to_numpy" +donttest="$donttest or test_foreign_buffer" +donttest="$donttest or test_from_numpy_nested" +donttest="$donttest or test_integer_limits" +donttest="$donttest or test_memory_map_large_seeks" +donttest="$donttest or test_primitive_serialization" +donttest="$donttest or test_python_file_large_seeks" +donttest="$donttest or test_schema_sizeof" +%endif +%pytest_arch --pyargs pyarrow -n auto -k "not ($donttest)" +%pytest_arch --pyargs pyarrow -n auto -k "$donttest" || : + +%files %{python_files} +%doc README.md +%license LICENSE.txt NOTICE.txt +%{python_sitearch}/pyarrow +%exclude %{python_sitearch}/pyarrow/include +%exclude %{python_sitearch}/pyarrow/src +%exclude %{python_sitearch}/pyarrow/lib.h +%exclude %{python_sitearch}/pyarrow/lib_api.h +%{python_sitearch}/pyarrow-%{version}.dist-info + +%files %{python_files devel} +%doc README.md +%license LICENSE.txt NOTICE.txt +%{python_sitearch}/pyarrow/include +%{python_sitearch}/pyarrow/src +%{python_sitearch}/pyarrow/lib.h +%{python_sitearch}/pyarrow/lib_api.h + +%changelog