Sync from SUSE:SLFO:Main python-fastparquet revision 136ce88c10ae31bf7d1802e80f6b9f42
This commit is contained in:
commit
0ea456fc73
23
.gitattributes
vendored
Normal file
23
.gitattributes
vendored
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
## Default LFS
|
||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bsp filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gem filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.jar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lzma filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.obscpio filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.oxt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.png filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rpm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tbz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tbz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ttf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.txz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.whl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
BIN
fastparquet-2023.10.1.tar.gz
(Stored with Git LFS)
Normal file
BIN
fastparquet-2023.10.1.tar.gz
(Stored with Git LFS)
Normal file
Binary file not shown.
514
python-fastparquet.changes
Normal file
514
python-fastparquet.changes
Normal file
@ -0,0 +1,514 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Jan 22 12:41:30 UTC 2024 - Daniel Garcia <daniel.garcia@suse.com>
|
||||||
|
|
||||||
|
- Do not run tests in s390x, bsc#1218603
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Dec 5 12:23:32 UTC 2023 - Dirk Müller <dmueller@suse.com>
|
||||||
|
|
||||||
|
- update to 2023.10.0:
|
||||||
|
* Datetime units in empty() with tz (#893)
|
||||||
|
* Fewer inplace decompressions for V2 pages (#890
|
||||||
|
* Allow writing categorical column with no categories (#888)
|
||||||
|
* Fixes for new numpy (#886)
|
||||||
|
* RLE bools and DELTA for v1 pages (#885, 883)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Sep 11 21:29:16 UTC 2023 - Dirk Müller <dmueller@suse.com>
|
||||||
|
|
||||||
|
- update to 2023.8.0:
|
||||||
|
* More general timestamp units (#874)
|
||||||
|
* ReadTheDocs V2 (#871)
|
||||||
|
* Better roundtrip dtypes (#861, 859)
|
||||||
|
* No convert when computing bytes-per-item for str (#858)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sat Jul 1 20:05:36 UTC 2023 - Arun Persaud <arun@gmx.de>
|
||||||
|
|
||||||
|
- update to version 2023.7.0:
|
||||||
|
* Add test case for reading non-pandas parquet file (#870)
|
||||||
|
* Extra field when cloning ParquetFile (#866)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Apr 28 08:10:46 UTC 2023 - Dirk Müller <dmueller@suse.com>
|
||||||
|
|
||||||
|
- update to 2023.4.0:
|
||||||
|
* allow loading categoricals even if not so in the pandas metadata,
|
||||||
|
when a column is dict-encodedand we only have one row-group (#863)
|
||||||
|
 * apply dtype to the columns names series, even when selecting no
|
||||||
|
columns (#861, 859)
|
||||||
|
 * don't make strings while estimating bye column size (#858)
|
||||||
|
 * handle upstream depr (#857, 856)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Feb 9 15:55:08 UTC 2023 - Arun Persaud <arun@gmx.de>
|
||||||
|
|
||||||
|
- update to version 2023.2.0:
|
||||||
|
* revert one-level set of filters (#852)
|
||||||
|
* full size dict for decoding V2 pages (#850)
|
||||||
|
* infer_object_encoding fix (#847)
|
||||||
|
* row filtering with V2 pages (#845)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Feb 8 18:25:03 UTC 2023 - Arun Persaud <arun@gmx.de>
|
||||||
|
|
||||||
|
- specfile:
|
||||||
|
* remove fastparquet-pr835.patch, implemented upstream
|
||||||
|
|
||||||
|
- update to version 2023.1.0:
|
||||||
|
* big improvement to write speed
|
||||||
|
* paging support for bigger row-groups
|
||||||
|
* pandas 2.0 support
|
||||||
|
* delta for big-endian architecture
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Jan 2 20:38:49 UTC 2023 - Ben Greiner <code@bnavigator.de>
|
||||||
|
|
||||||
|
- Update to 2022.12.0
|
||||||
|
* check all int32 values before passing to thrift writer
|
||||||
|
* fix type of num_rows to i64 for big single file
|
||||||
|
- Release 2022.11.0
|
||||||
|
* Switch to calver
|
||||||
|
* Speed up loading of nullable types
|
||||||
|
* Allow schema evolution by addition of columns
|
||||||
|
* Allow specifying dtypes of output
|
||||||
|
* update to scm versioning
|
||||||
|
* fixes to row filter, statistics and tests
|
||||||
|
* support pathlib.Paths
|
||||||
|
* JSON encoder options
|
||||||
|
- Drop fastparquet-pr813-updatefixes.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Dec 23 09:18:39 UTC 2022 - Guillaume GARDET <guillaume.gardet@opensuse.org>
|
||||||
|
|
||||||
|
- Add patch to fox the test test_delta_from_def_2 on
|
||||||
|
aarch64, armv7 and ppc64le:
|
||||||
|
* fastparquet-pr835.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Oct 28 15:47:41 UTC 2022 - Ben Greiner <code@bnavigator.de>
|
||||||
|
|
||||||
|
- Update to 0.8.3
|
||||||
|
* improved key/value handling and rejection of bad types
|
||||||
|
* fix regression in consolidate_cats (caught in dask tests)
|
||||||
|
- Release 0.8.2
|
||||||
|
* datetime indexes initialised to 0 to prevent overflow from
|
||||||
|
randommemory
|
||||||
|
* case from csv_to_parquet where stats exists but has not nulls
|
||||||
|
entry
|
||||||
|
* define len and bool for ParquetFile
|
||||||
|
* maintain int types of optional data tha came from pandas
|
||||||
|
* fix for delta encoding
|
||||||
|
- Add fastparquet-pr813-updatefixes.patch gh#dask/fastparquet#813
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Apr 26 11:02:27 UTC 2022 - Ben Greiner <code@bnavigator.de>
|
||||||
|
|
||||||
|
- Update to 0.8.1
|
||||||
|
* fix critical buffer overflow crash for large number of columns
|
||||||
|
and long column names
|
||||||
|
* metadata handling
|
||||||
|
* thrift int32 for list
|
||||||
|
* avoid error storing NaNs in column stats
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sat Jan 29 21:36:38 UTC 2022 - Ben Greiner <code@bnavigator.de>
|
||||||
|
|
||||||
|
- Update to 0.8.0
|
||||||
|
* our own cythonic thrift implementation (drop thrift dependency)
|
||||||
|
* more in-place dataset editing ad reordering
|
||||||
|
* python 3.10 support
|
||||||
|
* fixes for multi-index and pandas types
|
||||||
|
- Clean test skips
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sun Jan 16 13:34:53 UTC 2022 - Ben Greiner <code@bnavigator.de>
|
||||||
|
|
||||||
|
- Clean specfile from unused python36 conditionals
|
||||||
|
- Require thrift 0.15.0 (+patch) for Python 3.10 compatibility
|
||||||
|
* gh#dask/fastparquet#514
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sat Nov 27 20:34:53 UTC 2021 - Arun Persaud <arun@gmx.de>
|
||||||
|
|
||||||
|
- update to version 0.7.2:
|
||||||
|
* Ability to remove row-groups in-place for multifile datasets
|
||||||
|
* Accept pandas nullable Float type
|
||||||
|
* allow empty strings and fix min/max when there is no data
|
||||||
|
* make writing statistics optional
|
||||||
|
* row selection in to_pandas()
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sun Aug 8 15:13:55 UTC 2021 - Ben Greiner <code@bnavigator.de>
|
||||||
|
|
||||||
|
- Update to version 0.7.1
|
||||||
|
* Back compile for older versions of numpy
|
||||||
|
* Make pandas nullable types opt-out. The old behaviour (casting
|
||||||
|
to float) is still available with ParquetFile(...,
|
||||||
|
pandas_nulls=False).
|
||||||
|
* Fix time field regression: IsAdjustedToUTC will be False when
|
||||||
|
there is no timezone
|
||||||
|
* Micro improvements to the speed of ParquetFile creation by
|
||||||
|
using simple simple string ops instead of regex and
|
||||||
|
regularising filenames once at the start. Effects datasets with
|
||||||
|
many files.
|
||||||
|
- Release 0.7.0
|
||||||
|
* This version institutes major, breaking changes, listed here,
|
||||||
|
and incremental fixes and additions.
|
||||||
|
* Reading a directory without a _metadata summary file now works
|
||||||
|
by providing only the directory, instead of a list of
|
||||||
|
constituent files. This change also makes direct of use of
|
||||||
|
fsspec filesystems, if given, to be able to load the footer
|
||||||
|
metadata areas of the files concurrently, if the storage
|
||||||
|
backend supports it, and not directly instantiating
|
||||||
|
intermediate ParquetFile instances
|
||||||
|
* row-level filtering of the data. Whereas previously, only full
|
||||||
|
row-groups could be excluded on the basis of their parquet
|
||||||
|
metadata statistics (if present), filtering can now be done
|
||||||
|
within row-groups too. The syntax is the same as before,
|
||||||
|
allowing for multiple column expressions to be combined with
|
||||||
|
AND|OR, depending on the list structure. This mechanism
|
||||||
|
requires two passes: one to load the columns needed to create
|
||||||
|
the boolean mask, and another to load the columns actually
|
||||||
|
needed in the output. This will not be faster, and may be
|
||||||
|
slower, but in some cases can save significant memory
|
||||||
|
footprint, if a small fraction of rows are considered good and
|
||||||
|
the columns for the filter expression are not in the output.
|
||||||
|
Not currently supported for reading with DataPageV2.
|
||||||
|
* DELTA integer encoding (read-only): experimentally working,
|
||||||
|
but we only have one test file to verify against, since it is
|
||||||
|
not trivial to persuade Spark to produce files encoded this
|
||||||
|
way. DELTA can be extremely compact a representation for
|
||||||
|
slowly varying and/or monotonically increasing integers.
|
||||||
|
* nanosecond resolution times: the new extended "logical" types
|
||||||
|
system supports nanoseconds alongside the previous millis and
|
||||||
|
micros. We now emit these for the default pandas time type,
|
||||||
|
and produce full parquet schema including both "converted" and
|
||||||
|
"logical" type information. Note that all output has
|
||||||
|
isAdjustedToUTC=True, i.e., these are timestamps rather than
|
||||||
|
local time. The time-zone is stored in the metadata, as
|
||||||
|
before, and will be successfully recreated only in fastparquet
|
||||||
|
and (py)arrow. Otherwise, the times will appear to be UTC. For
|
||||||
|
compatibility with Spark, you may still want to use
|
||||||
|
times="int96" when writing.
|
||||||
|
* DataPageV2 writing: now we support both reading and writing.
|
||||||
|
For writing, can be enabled with the environment variable
|
||||||
|
FASTPARQUET_DATAPAGE_V2, or module global fastparquet.writer.
|
||||||
|
DATAPAGE_VERSION and is off by default. It will become on by
|
||||||
|
default in the future. In many cases, V2 will result in better
|
||||||
|
read performance, because the data and page headers are
|
||||||
|
encoded separately, so data can be directly read into the
|
||||||
|
output without addition allocation/copies. This feature is
|
||||||
|
considered experimental, but we believe it working well for
|
||||||
|
most use cases (i.e., our test suite) and should be readable
|
||||||
|
by all modern parquet frameworks including arrow and spark.
|
||||||
|
* pandas nullable types: pandas supports "masked" extension
|
||||||
|
arrays for types that previously could not support NULL at
|
||||||
|
all: ints and bools. Fastparquet used to cast such columns to
|
||||||
|
float, so that we could represent NULLs as NaN; now we use the
|
||||||
|
new(er) masked types by default. This means faster reading of
|
||||||
|
such columns, as there is no conversion. If the metadata
|
||||||
|
guarantees that there are no nulls, we still use the
|
||||||
|
non-nullable variant unless the data was written with
|
||||||
|
fastparquet/pyarrow, and the metadata indicates that the
|
||||||
|
original datatype was nullable. We already handled writing of
|
||||||
|
nullable columns.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue May 18 14:41:46 UTC 2021 - Ben Greiner <code@bnavigator.de>
|
||||||
|
|
||||||
|
- Update to version 0.6.3
|
||||||
|
* no release notes
|
||||||
|
* new requirement: cramjam instead of separate compression libs
|
||||||
|
and their bindings
|
||||||
|
* switch from numba to Cython
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Feb 12 14:50:18 UTC 2021 - Dirk Müller <dmueller@suse.com>
|
||||||
|
|
||||||
|
- skip python 36 build
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Feb 4 17:50:32 UTC 2021 - Jan Engelhardt <jengelh@inai.de>
|
||||||
|
|
||||||
|
- Use of "+=" in %check warrants bash as buildshell.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Feb 3 21:43:10 UTC 2021 - Ben Greiner <code@bnavigator.de>
|
||||||
|
|
||||||
|
- Skip the import without warning test gh#dask/fastparquet#558
|
||||||
|
- Apply the Cepl-Strangelove-Parameter to pytest
|
||||||
|
(--import-mode append)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sat Jan 2 21:04:30 UTC 2021 - Benjamin Greiner <code@bnavigator.de>
|
||||||
|
|
||||||
|
- update to version 0.5
|
||||||
|
* no changelog
|
||||||
|
- update test suite setup -- install the .test module
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sat Jul 18 18:13:53 UTC 2020 - Arun Persaud <arun@gmx.de>
|
||||||
|
|
||||||
|
- specfile:
|
||||||
|
* update requirements: version numbers and added packaging
|
||||||
|
|
||||||
|
- update to version 0.4.1:
|
||||||
|
* nulls, fixes #504
|
||||||
|
* deps: Add missing dependency on packaging. (#502)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Jul 9 14:04:10 UTC 2020 - Marketa Calabkova <mcalabkova@suse.com>
|
||||||
|
|
||||||
|
- Update to 0.4.0
|
||||||
|
* Changed RangeIndex private methods to public ones
|
||||||
|
* Use the python executable used to run the code
|
||||||
|
* Add support for Python 3.8
|
||||||
|
* support for numba > 0.48
|
||||||
|
- drop upstreamed patch use-python-exec.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Apr 6 06:54:36 UTC 2020 - Tomáš Chvátal <tchvatal@suse.com>
|
||||||
|
|
||||||
|
- Add patch to use sys.executable and not call py2 binary directly:
|
||||||
|
* use-python-exec.patch
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Apr 6 06:50:26 UTC 2020 - Tomáš Chvátal <tchvatal@suse.com>
|
||||||
|
|
||||||
|
- Update to 0.3.3:
|
||||||
|
* no upstream changelog
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Oct 25 17:50:50 UTC 2019 - Todd R <toddrme2178@gmail.com>
|
||||||
|
|
||||||
|
- Drop broken python 2 support.
|
||||||
|
- Testing fixes
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sat Aug 3 15:10:41 UTC 2019 - Arun Persaud <arun@gmx.de>
|
||||||
|
|
||||||
|
- update to version 0.3.2:
|
||||||
|
* Only calculate dataset stats once (#453)
|
||||||
|
* Fixes #436 (#452)
|
||||||
|
* Fix a crash if trying to read a file whose created_by value is not
|
||||||
|
set
|
||||||
|
* COMPAT: Fix for pandas DeprecationWarning (#446)
|
||||||
|
* Apply timezone to index (#439)
|
||||||
|
* Handle NaN partition values (#438)
|
||||||
|
* Pandas meta (#431)
|
||||||
|
* Only strip _metadata from end of file path (#430)
|
||||||
|
* Simple nesting fix (#428)
|
||||||
|
* Disallow bad tz on save, warn on load (#427)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Jul 30 14:23:21 UTC 2019 - Todd R <toddrme2178@gmail.com>
|
||||||
|
|
||||||
|
- Fix spurious test failure
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon May 20 15:12:11 CEST 2019 - Matej Cepl <mcepl@suse.com>
|
||||||
|
|
||||||
|
- Clean up SPEC file.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Tue Apr 30 14:28:46 UTC 2019 - Todd R <toddrme2178@gmail.com>
|
||||||
|
|
||||||
|
- update to 0.3.1
|
||||||
|
* Add schema == (__eq__) and != (__ne__) methods and tests.
|
||||||
|
* Fix item iteration for decimals
|
||||||
|
* List missing columns in error message
|
||||||
|
* Fix tz being None case
|
||||||
|
- Update to 0.3.0
|
||||||
|
* Squash some warnings and import failures
|
||||||
|
* Improvements to in and not in operators
|
||||||
|
* Fixes because pandas released
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sat Jan 26 17:05:09 UTC 2019 - Arun Persaud <arun@gmx.de>
|
||||||
|
|
||||||
|
- specfile:
|
||||||
|
* update copyright year
|
||||||
|
|
||||||
|
- update to version 0.2.1:
|
||||||
|
* Compat for pandas 0.24.0 refactor (#390)
|
||||||
|
* Change OverflowError message when failing on large pages (#387)
|
||||||
|
* Allow for changes in dictionary while reading a row-group column
|
||||||
|
(#367)
|
||||||
|
* Correct pypi project names for compression libraries (#385)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Nov 22 22:47:24 UTC 2018 - Arun Persaud <arun@gmx.de>
|
||||||
|
|
||||||
|
- update to version 0.2.0:
|
||||||
|
* Don't mutate column list input (#383) (#384)
|
||||||
|
* Add optional requirements to extras_require (#380)
|
||||||
|
* Fix "broken link to parquet-format page" (#377)
|
||||||
|
* Add .c file to repo
|
||||||
|
* Handle rows split across 2 pages in the case of a map (#369)
|
||||||
|
* Fixes 370 (#371)
|
||||||
|
* Handle multi-page maps (#368)
|
||||||
|
* Handle zero-column files. Closes #361. (#363)
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Sun Sep 30 16:22:56 UTC 2018 - Arun Persaud <arun@gmx.de>
|
||||||
|
|
||||||
|
- specfile:
|
||||||
|
* update url
|
||||||
|
* make %files section more specific
|
||||||
|
|
||||||
|
- update to version 0.1.6:
|
||||||
|
* Restrict what categories get passed through (#358)
|
||||||
|
* Deep digging for multi-indexes (#356)
|
||||||
|
* allow_empty is the default in >=zstandard-0.9 (#355)
|
||||||
|
* Remove setup_requires from setup.py (#345)
|
||||||
|
* Fixed error if a certain partition is empty, when writing a
|
||||||
|
partioned (#347)
|
||||||
|
* Allow UTF8 column names to be read (#342)
|
||||||
|
* readd test file
|
||||||
|
* Allow for NULL converted type (#340)
|
||||||
|
* Robust partition names (#336)
|
||||||
|
* Fix accidental multiindex
|
||||||
|
* Read multi indexes (#331)
|
||||||
|
* Allow reading from any file-like (#330)
|
||||||
|
* change `parquet-format` link to apache repo (#328)
|
||||||
|
* Remove extra space from api.py (#325)
|
||||||
|
* numba bool fun (#324)
|
||||||
|
|
||||||
|
- changes from version 0.1.5:
|
||||||
|
* Fix _dtypes to be more efficient, to work with files with lots of
|
||||||
|
columns (#318)
|
||||||
|
* Buildfix (#313)
|
||||||
|
* Use LZ4 block compression for compatibility with parquet-cpp
|
||||||
|
(#314) (#315)
|
||||||
|
* Fix typo in ParquetFile docstring (#312)
|
||||||
|
* Remove annoying print() when reading file with CategoricalDtype
|
||||||
|
index (#311)
|
||||||
|
* Allow lists of multi-file data-sets (#309)
|
||||||
|
* Acceleate dataframe.empty for small/medium sizes (#307)
|
||||||
|
* Include dictionary page in column size (#306)
|
||||||
|
* Fix for selecting columns which were used for partitioning (#304)
|
||||||
|
* Remove occurances of np.fromstring (#303)
|
||||||
|
* Add support for zstandard compression (#296)
|
||||||
|
* Int96time order (#298)
|
||||||
|
|
||||||
|
- changes from version 0.1.4:
|
||||||
|
* Add handling of keyword arguments for compressor (#294)
|
||||||
|
* Fix setup.py duplication (#295)
|
||||||
|
* Integrate pytest with setup.py (#293)
|
||||||
|
* Get setup.py pytest to work. (#287)
|
||||||
|
* Add LZ4 support (#292)
|
||||||
|
* Update for forthcoming thrift release (#281)
|
||||||
|
* If timezones are in pandas metadata, assign columns as required
|
||||||
|
(#285)
|
||||||
|
* Pandas import (#284)
|
||||||
|
* Copy FMDs instead of mutate (#279)
|
||||||
|
* small fixes (#278)
|
||||||
|
* fixes to get benchmark to work (#276)
|
||||||
|
* backwards compat with Dask
|
||||||
|
* Fix test_time_millis on Windows (#275)
|
||||||
|
* join paths os-independently (#271)
|
||||||
|
* Adds int32 support for object encoding (#268)
|
||||||
|
* Fix a couple small typos in documentation (#267)
|
||||||
|
* Partition order should be sorted (#265)
|
||||||
|
* COMPAT: Update thrift (#264)
|
||||||
|
* Speedups result (#253)
|
||||||
|
* Remove thrift_copy
|
||||||
|
* Define `__copy__` on thrift structures
|
||||||
|
* Update rtd deps
|
||||||
|
|
||||||
|
- changes from version 0.1.3:
|
||||||
|
* More care over append when partitioning multiple columns
|
||||||
|
* Sep for windows cats filtering
|
||||||
|
* Move pytest imports to tests/ remove requirememnt
|
||||||
|
* Special-case only zeros
|
||||||
|
* Cope with partition values like "07"
|
||||||
|
* fix for s3
|
||||||
|
* Fix for list of paths rooted in the current directory
|
||||||
|
* add test
|
||||||
|
* Explicit file opens
|
||||||
|
* update docstring
|
||||||
|
* Refactor partition interpretation
|
||||||
|
* py2 fix
|
||||||
|
* Error in test changed
|
||||||
|
* Better error messages when failed to cnovert on write
|
||||||
|
|
||||||
|
- changes from version 0.1.2:
|
||||||
|
* Revert accidental removal of s3 import
|
||||||
|
* Move thrift things together, and make thrift serializer for pickle
|
||||||
|
* COMPAT: for new pandas CategoricalDtype
|
||||||
|
* Fixup for backwards seeking.
|
||||||
|
* Fix some test failures
|
||||||
|
* Protptype version using thrift instead of thriftpy
|
||||||
|
* Not all mergers have cats
|
||||||
|
* Revert accidental deletion
|
||||||
|
* remove warnings
|
||||||
|
* Sort keys in json for metadata
|
||||||
|
* Check column chunks for categories sizes
|
||||||
|
* Account for partition dir names with numbers
|
||||||
|
* Fix map/list doc
|
||||||
|
* Catch more stats errors
|
||||||
|
* Prevent pandas auto-names being given to index
|
||||||
|
|
||||||
|
- changes from version 0.1.1:
|
||||||
|
* Add workaround for single-value-partition
|
||||||
|
* update test
|
||||||
|
* Simplify and fix for py2
|
||||||
|
* Use thrift encoding on statistics strings
|
||||||
|
* remove redundant SNAPPY from supported compressions list
|
||||||
|
* Fix statistics
|
||||||
|
* lists again
|
||||||
|
* Always convert int96 to times
|
||||||
|
* Update docs
|
||||||
|
* attribute typo
|
||||||
|
* Fix definition level
|
||||||
|
* Add test, clean columns
|
||||||
|
* Allow optional->optional lists and maps
|
||||||
|
* Flatten schema to enable loading of non-repeated columns
|
||||||
|
* Remove extra file
|
||||||
|
* Fix py2
|
||||||
|
* Fix "in" filter to cope with strings that could be numbers
|
||||||
|
* Allow pip install without NumPy or Cython
|
||||||
|
|
||||||
|
- changes from version 0.1.0:
|
||||||
|
* Add ParquetFile attribute documentation
|
||||||
|
* Fix tests
|
||||||
|
* Enable append to an empty dataset
|
||||||
|
* More warning words and check on partition_on
|
||||||
|
* Do not fail stats if there are no row-groups
|
||||||
|
* Fix "numpy_dtype"->"numpy_type
|
||||||
|
* "in" was checking range not exact membership of set
|
||||||
|
* If metadata gives index, put in columns
|
||||||
|
* Fix pytest warning
|
||||||
|
* Fail on ordering dict statistics
|
||||||
|
* Fix stats filter
|
||||||
|
* clean test
|
||||||
|
* Fix ImportWarning on Python 3.6+
|
||||||
|
* TEST: added updated test file for special strings used in filters
|
||||||
|
* fix links
|
||||||
|
* [README]: indicate dependency on LLVM 4.0.x.
|
||||||
|
* Filter stats had unfortunate converted_type check
|
||||||
|
* Ignore exceptions in val_to_num
|
||||||
|
* Also for TODAY
|
||||||
|
* Very special case for partition: NOW should be kept as string
|
||||||
|
* Allow partition_on; fix category nuls
|
||||||
|
* Remove old category key/values on writing
|
||||||
|
* Implement writing pandas metadata and auto-setting cats/index
|
||||||
|
* Pandas compatability
|
||||||
|
* Test and fix for filter on single file
|
||||||
|
* Do not attempt to recurse into schema elements with zero childrean
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu Jun 7 20:41:31 UTC 2018 - jengelh@inai.de
|
||||||
|
|
||||||
|
- Fixup grammar./Replace future aims with what it does now.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu May 3 14:07:08 UTC 2018 - toddrme2178@gmail.com
|
||||||
|
|
||||||
|
- Use %license tag
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Thu May 25 12:19:26 UTC 2017 - toddrme2178@gmail.com
|
||||||
|
|
||||||
|
- Initial version
|
92
python-fastparquet.spec
Normal file
92
python-fastparquet.spec
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
#
|
||||||
|
# spec file for package python-fastparquet
|
||||||
|
#
|
||||||
|
# Copyright (c) 2024 SUSE LLC
|
||||||
|
#
|
||||||
|
# All modifications and additions to the file contributed by third parties
|
||||||
|
# remain the property of their copyright owners, unless otherwise agreed
|
||||||
|
# upon. The license for this file, and modifications and additions to the
|
||||||
|
# file, is the same license as for the pristine package itself (unless the
|
||||||
|
# license for the pristine package is not an Open Source License, in which
|
||||||
|
# case the license is the MIT License). An "Open Source License" is a
|
||||||
|
# license that conforms to the Open Source Definition (Version 1.9)
|
||||||
|
# published by the Open Source Initiative.
|
||||||
|
|
||||||
|
# Please submit bugfixes or comments via https://bugs.opensuse.org/
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
%{?sle15_python_module_pythons}
|
||||||
|
Name: python-fastparquet
|
||||||
|
Version: 2023.10.1
|
||||||
|
Release: 0
|
||||||
|
Summary: Python support for Parquet file format
|
||||||
|
License: Apache-2.0
|
||||||
|
URL: https://github.com/dask/fastparquet/
|
||||||
|
# Use GitHub archive, because it containts the test modules and data, requires setting version manuall for setuptools_scm
|
||||||
|
Source: https://github.com/dask/fastparquet/archive/%{version}.tar.gz#/fastparquet-%{version}.tar.gz
|
||||||
|
BuildRequires: %{python_module Cython >= 0.29.23}
|
||||||
|
BuildRequires: %{python_module base >= 3.8}
|
||||||
|
BuildRequires: %{python_module cramjam >= 2.3.0}
|
||||||
|
# version requirement not declared for runtime, but necessary for tests.
|
||||||
|
BuildRequires: %{python_module fsspec >= 2021.6.0}
|
||||||
|
BuildRequires: %{python_module numpy-devel >= 1.20.3}
|
||||||
|
BuildRequires: %{python_module packaging}
|
||||||
|
BuildRequires: %{python_module pandas >= 1.5.0}
|
||||||
|
BuildRequires: %{python_module pip}
|
||||||
|
BuildRequires: %{python_module pytest-asyncio}
|
||||||
|
BuildRequires: %{python_module pytest-xdist}
|
||||||
|
BuildRequires: %{python_module pytest}
|
||||||
|
BuildRequires: %{python_module python-lzo}
|
||||||
|
BuildRequires: %{python_module setuptools_scm > 1.5.4}
|
||||||
|
BuildRequires: %{python_module setuptools}
|
||||||
|
BuildRequires: %{python_module wheel}
|
||||||
|
BuildRequires: fdupes
|
||||||
|
BuildRequires: git-core
|
||||||
|
BuildRequires: python-rpm-macros
|
||||||
|
Requires: python-cramjam >= 2.3.0
|
||||||
|
Requires: python-fsspec
|
||||||
|
Requires: python-numpy >= 1.20.3
|
||||||
|
Requires: python-packaging
|
||||||
|
Requires: python-pandas >= 1.5.0
|
||||||
|
Recommends: python-python-lzo
|
||||||
|
%python_subpackages
|
||||||
|
|
||||||
|
%description
|
||||||
|
This is a Python implementation of the parquet format
|
||||||
|
for integrating it into python-based Big Data workflows.
|
||||||
|
|
||||||
|
%prep
|
||||||
|
%autosetup -p1 -n fastparquet-%{version}
|
||||||
|
# remove pytest-runner from setup_requires
|
||||||
|
sed -i "s/'pytest-runner',//" setup.py
|
||||||
|
# this is not meant for setup.py
|
||||||
|
sed -i "s/oldest-supported-numpy/numpy/" setup.py
|
||||||
|
# the tests import the fastparquet.test module and we need to import from sitearch, so install it.
|
||||||
|
sed -i -e "s/^\s*packages=\[/&'fastparquet.test', /" -e "/exclude_package_data/ d" setup.py
|
||||||
|
|
||||||
|
%build
|
||||||
|
export CFLAGS="%{optflags}"
|
||||||
|
export SETUPTOOLS_SCM_PRETEND_VERSION=%{version}
|
||||||
|
%pyproject_wheel
|
||||||
|
|
||||||
|
%install
|
||||||
|
%pyproject_install
|
||||||
|
%python_expand rm -v %{buildroot}%{$python_sitearch}/fastparquet/{speedups,cencoding}.c
|
||||||
|
%python_expand %fdupes %{buildroot}%{$python_sitearch}
|
||||||
|
|
||||||
|
%check
|
||||||
|
%ifarch s390x
|
||||||
|
# Test suite is not working correctly in s390x so not running it.
|
||||||
|
echo "Not running tests for s390x"
|
||||||
|
%else
|
||||||
|
%pytest_arch --pyargs fastparquet --import-mode append -n auto
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%files %{python_files}
|
||||||
|
%doc README.rst
|
||||||
|
%license LICENSE
|
||||||
|
%{python_sitearch}/fastparquet
|
||||||
|
%{python_sitearch}/fastparquet-%{version}*-info
|
||||||
|
|
||||||
|
%changelog
|
Loading…
Reference in New Issue
Block a user