diff --git a/fastparquet-2022.12.0.tar.gz b/fastparquet-2022.12.0.tar.gz deleted file mode 100644 index 0c361cf..0000000 --- a/fastparquet-2022.12.0.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8e098493e2e87f8e004c3fc9b4e94ef57df9d7656fd8aa1a5626b6e6f3ff2b8e -size 28903475 diff --git a/fastparquet-2023.2.0.tar.gz b/fastparquet-2023.2.0.tar.gz new file mode 100644 index 0000000..18db672 --- /dev/null +++ b/fastparquet-2023.2.0.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6224c7fda7d900e4334b339c4376b58ad2c27892fe7faa67ae28c380100fbe24 +size 28902717 diff --git a/fastparquet-pr835.patch b/fastparquet-pr835.patch deleted file mode 100644 index fabf7fa..0000000 --- a/fastparquet-pr835.patch +++ /dev/null @@ -1,183 +0,0 @@ -From f035a2d7b37e2cbdef1a99bc6130be7e4afcf35f Mon Sep 17 00:00:00 2001 -From: Martin Durant -Date: Thu, 22 Dec 2022 12:57:32 -0500 -Subject: [PATCH 1/5] more print - ---- - fastparquet/cencoding.pyx | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/fastparquet/cencoding.pyx b/fastparquet/cencoding.pyx -index 4ab48be6..c151aff9 100644 ---- a/fastparquet/cencoding.pyx -+++ b/fastparquet/cencoding.pyx -@@ -225,6 +225,7 @@ cdef void delta_read_bitpacked(NumpyIO file_obj, uint8_t bitwidth, - stop += 8 - else: - o.write_int((data >> stop) & mask) -+ print("bitpack value", (data >> stop) & mask, data, stop, mask) - stop -= bitwidth - count -= 1 - -@@ -239,11 +240,13 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o): - const uint8_t[:] bitwidths - uint8_t bitwidth - values_per_miniblock = block_size // miniblock_per_block -+ print("\nstart", count, value, values_per_miniblock) - while True: - min_delta = zigzag_long(read_unsigned_var_int(file_obj)) - bitwidths = file_obj.read(miniblock_per_block) - for i in range(miniblock_per_block): - bitwidth = bitwidths[i] -+ print("\n miniblock", i, "width", bitwidth) - if bitwidth: - temp = o.loc - if count > 1: -@@ -253,6 +256,7 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o): - for j in range(values_per_miniblock): - temp = o.read_int() - o.loc -= 4 -+ print("miniblock value", value) - o.write_int(value) - value += min_delta + temp - count -= 1 -@@ -260,6 +264,7 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o): - return - else: - for j in range(values_per_miniblock): -+ print("miniblock value", value) - o.write_int(value) - value += min_delta - count -= 1 - -From c453e140355055be1077f2c99b24785444b4ab20 Mon Sep 17 00:00:00 2001 -From: Martin Durant -Date: Thu, 22 Dec 2022 13:02:27 -0500 -Subject: [PATCH 2/5] more - ---- - fastparquet/cencoding.pyx | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fastparquet/cencoding.pyx b/fastparquet/cencoding.pyx -index c151aff9..6c2051c6 100644 ---- a/fastparquet/cencoding.pyx -+++ b/fastparquet/cencoding.pyx -@@ -256,7 +256,7 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o): - for j in range(values_per_miniblock): - temp = o.read_int() - o.loc -= 4 -- print("miniblock value", value) -+ print("miniblock value (bw)", value) - o.write_int(value) - value += min_delta + temp - count -= 1 - -From 76b8c4fefe703d980b72ad37841bb2e76b9da590 Mon Sep 17 00:00:00 2001 -From: Martin Durant -Date: Thu, 22 Dec 2022 13:32:27 -0500 -Subject: [PATCH 3/5] more - ---- - fastparquet/cencoding.pyx | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/fastparquet/cencoding.pyx b/fastparquet/cencoding.pyx -index 6c2051c6..9335b52a 100644 ---- a/fastparquet/cencoding.pyx -+++ b/fastparquet/cencoding.pyx -@@ -223,6 +223,7 @@ cdef void delta_read_bitpacked(NumpyIO file_obj, uint8_t bitwidth, - if stop < 0: - data = ((data & 0X00FFFFFFFFFFFFFF) << 8) | file_obj.read_byte() - stop += 8 -+ print("bin stop", bin(stop), bin(data)) - else: - o.write_int((data >> stop) & mask) - print("bitpack value", (data >> stop) & mask, data, stop, mask) - -From bbb32d8ea7f2cb97a817a22c0357f1a023f79b42 Mon Sep 17 00:00:00 2001 -From: Martin Durant -Date: Thu, 22 Dec 2022 13:39:14 -0500 -Subject: [PATCH 4/5] trial - ---- - fastparquet/cencoding.pyx | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/fastparquet/cencoding.pyx b/fastparquet/cencoding.pyx -index 9335b52a..1be0a001 100644 ---- a/fastparquet/cencoding.pyx -+++ b/fastparquet/cencoding.pyx -@@ -18,7 +18,7 @@ cdef extern from "string.h": - from cpython cimport ( - PyBytes_FromStringAndSize, PyBytes_GET_SIZE, PyUnicode_DecodeUTF8, - ) --from libc.stdint cimport uint8_t, uint32_t, int32_t, uint64_t, int64_t -+from libc.stdint cimport int8_t, uint8_t, uint32_t, int32_t, uint64_t, int64_t - - - cpdef void read_rle(NumpyIO file_obj, int32_t header, int32_t bit_width, NumpyIO o, int32_t itemsize=4): -@@ -217,7 +217,7 @@ cdef void delta_read_bitpacked(NumpyIO file_obj, uint8_t bitwidth, - NumpyIO o, uint64_t count, uint8_t itemsize=4): - cdef: - uint64_t data = 0 -- char stop = -bitwidth -+ int8_t stop = -bitwidth - uint64_t mask = 0XFFFFFFFFFFFFFFFF >> (64 - bitwidth) - while count > 0: - if stop < 0: - -From 43e34e28b2e108f178b05bba8c109e2b131f5fc2 Mon Sep 17 00:00:00 2001 -From: Martin Durant -Date: Thu, 22 Dec 2022 13:43:11 -0500 -Subject: [PATCH 5/5] fix - ---- - fastparquet/cencoding.pyx | 6 ------ - 1 file changed, 6 deletions(-) - -diff --git a/fastparquet/cencoding.pyx b/fastparquet/cencoding.pyx -index 1be0a001..90ba15db 100644 ---- a/fastparquet/cencoding.pyx -+++ b/fastparquet/cencoding.pyx -@@ -223,10 +223,8 @@ cdef void delta_read_bitpacked(NumpyIO file_obj, uint8_t bitwidth, - if stop < 0: - data = ((data & 0X00FFFFFFFFFFFFFF) << 8) | file_obj.read_byte() - stop += 8 -- print("bin stop", bin(stop), bin(data)) - else: - o.write_int((data >> stop) & mask) -- print("bitpack value", (data >> stop) & mask, data, stop, mask) - stop -= bitwidth - count -= 1 - -@@ -241,13 +239,11 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o): - const uint8_t[:] bitwidths - uint8_t bitwidth - values_per_miniblock = block_size // miniblock_per_block -- print("\nstart", count, value, values_per_miniblock) - while True: - min_delta = zigzag_long(read_unsigned_var_int(file_obj)) - bitwidths = file_obj.read(miniblock_per_block) - for i in range(miniblock_per_block): - bitwidth = bitwidths[i] -- print("\n miniblock", i, "width", bitwidth) - if bitwidth: - temp = o.loc - if count > 1: -@@ -257,7 +253,6 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o): - for j in range(values_per_miniblock): - temp = o.read_int() - o.loc -= 4 -- print("miniblock value (bw)", value) - o.write_int(value) - value += min_delta + temp - count -= 1 -@@ -265,7 +260,6 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o): - return - else: - for j in range(values_per_miniblock): -- print("miniblock value", value) - o.write_int(value) - value += min_delta - count -= 1 diff --git a/python-fastparquet.changes b/python-fastparquet.changes index 188af17..c68257c 100644 --- a/python-fastparquet.changes +++ b/python-fastparquet.changes @@ -1,3 +1,24 @@ +------------------------------------------------------------------- +Thu Feb 9 15:55:08 UTC 2023 - Arun Persaud + +- update to version 2023.2.0: + * revert one-level set of filters (#852) + * full size dict for decoding V2 pages (#850) + * infer_object_encoding fix (#847) + * row filtering with V2 pages (#845) + +------------------------------------------------------------------- +Wed Feb 8 18:25:03 UTC 2023 - Arun Persaud + +- specfile: + * remove fastparquet-pr835.patch, implemented upstream + +- update to version 2023.1.0: + * big improvement to write speed + * paging support for bigger row-groups + * pandas 2.0 support + * delta for big-endian architecture + ------------------------------------------------------------------- Mon Jan 2 20:38:49 UTC 2023 - Ben Greiner diff --git a/python-fastparquet.spec b/python-fastparquet.spec index c5d6927..c33c0d8 100644 --- a/python-fastparquet.spec +++ b/python-fastparquet.spec @@ -17,16 +17,14 @@ Name: python-fastparquet -Version: 2022.12.0 +Version: 2023.2.0 Release: 0 Summary: Python support for Parquet file format License: Apache-2.0 URL: https://github.com/dask/fastparquet/ # Use GitHub archive, because it containts the test modules and data, requires setting version manuall for setuptools_scm Source: https://github.com/dask/fastparquet/archive/%{version}.tar.gz#/fastparquet-%{version}.tar.gz -# PATCH-FIX-UPSTREAM fastparquet-pr835.patch gh#dask/fastparquet#835 -Patch2: fastparquet-pr835.patch -BuildRequires: %{python_module Cython} +BuildRequires: %{python_module Cython >= 0.29.23} BuildRequires: %{python_module base >= 3.8} BuildRequires: %{python_module cramjam >= 2.3.0} # version requirement not declared for runtime, but necessary for tests.