1
0

Accepting request 1045047 from devel:languages:python:numeric

OBS-URL: https://build.opensuse.org/request/show/1045047
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/python-fastparquet?expand=0&rev=24
This commit is contained in:
Dominique Leuenberger 2022-12-24 13:52:02 +00:00 committed by Git OBS Bridge
commit 0c11a78ae2
3 changed files with 192 additions and 0 deletions

183
fastparquet-pr835.patch Normal file
View File

@ -0,0 +1,183 @@
From f035a2d7b37e2cbdef1a99bc6130be7e4afcf35f Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Thu, 22 Dec 2022 12:57:32 -0500
Subject: [PATCH 1/5] more print
---
fastparquet/cencoding.pyx | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/fastparquet/cencoding.pyx b/fastparquet/cencoding.pyx
index 4ab48be6..c151aff9 100644
--- a/fastparquet/cencoding.pyx
+++ b/fastparquet/cencoding.pyx
@@ -225,6 +225,7 @@ cdef void delta_read_bitpacked(NumpyIO file_obj, uint8_t bitwidth,
stop += 8
else:
o.write_int((data >> stop) & mask)
+ print("bitpack value", (data >> stop) & mask, data, stop, mask)
stop -= bitwidth
count -= 1
@@ -239,11 +240,13 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o):
const uint8_t[:] bitwidths
uint8_t bitwidth
values_per_miniblock = block_size // miniblock_per_block
+ print("\nstart", count, value, values_per_miniblock)
while True:
min_delta = zigzag_long(read_unsigned_var_int(file_obj))
bitwidths = file_obj.read(miniblock_per_block)
for i in range(miniblock_per_block):
bitwidth = bitwidths[i]
+ print("\n miniblock", i, "width", bitwidth)
if bitwidth:
temp = o.loc
if count > 1:
@@ -253,6 +256,7 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o):
for j in range(values_per_miniblock):
temp = o.read_int()
o.loc -= 4
+ print("miniblock value", value)
o.write_int(value)
value += min_delta + temp
count -= 1
@@ -260,6 +264,7 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o):
return
else:
for j in range(values_per_miniblock):
+ print("miniblock value", value)
o.write_int(value)
value += min_delta
count -= 1
From c453e140355055be1077f2c99b24785444b4ab20 Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Thu, 22 Dec 2022 13:02:27 -0500
Subject: [PATCH 2/5] more
---
fastparquet/cencoding.pyx | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fastparquet/cencoding.pyx b/fastparquet/cencoding.pyx
index c151aff9..6c2051c6 100644
--- a/fastparquet/cencoding.pyx
+++ b/fastparquet/cencoding.pyx
@@ -256,7 +256,7 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o):
for j in range(values_per_miniblock):
temp = o.read_int()
o.loc -= 4
- print("miniblock value", value)
+ print("miniblock value (bw)", value)
o.write_int(value)
value += min_delta + temp
count -= 1
From 76b8c4fefe703d980b72ad37841bb2e76b9da590 Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Thu, 22 Dec 2022 13:32:27 -0500
Subject: [PATCH 3/5] more
---
fastparquet/cencoding.pyx | 1 +
1 file changed, 1 insertion(+)
diff --git a/fastparquet/cencoding.pyx b/fastparquet/cencoding.pyx
index 6c2051c6..9335b52a 100644
--- a/fastparquet/cencoding.pyx
+++ b/fastparquet/cencoding.pyx
@@ -223,6 +223,7 @@ cdef void delta_read_bitpacked(NumpyIO file_obj, uint8_t bitwidth,
if stop < 0:
data = ((data & 0X00FFFFFFFFFFFFFF) << 8) | file_obj.read_byte()
stop += 8
+ print("bin stop", bin(stop), bin(data))
else:
o.write_int((data >> stop) & mask)
print("bitpack value", (data >> stop) & mask, data, stop, mask)
From bbb32d8ea7f2cb97a817a22c0357f1a023f79b42 Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Thu, 22 Dec 2022 13:39:14 -0500
Subject: [PATCH 4/5] trial
---
fastparquet/cencoding.pyx | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fastparquet/cencoding.pyx b/fastparquet/cencoding.pyx
index 9335b52a..1be0a001 100644
--- a/fastparquet/cencoding.pyx
+++ b/fastparquet/cencoding.pyx
@@ -18,7 +18,7 @@ cdef extern from "string.h":
from cpython cimport (
PyBytes_FromStringAndSize, PyBytes_GET_SIZE, PyUnicode_DecodeUTF8,
)
-from libc.stdint cimport uint8_t, uint32_t, int32_t, uint64_t, int64_t
+from libc.stdint cimport int8_t, uint8_t, uint32_t, int32_t, uint64_t, int64_t
cpdef void read_rle(NumpyIO file_obj, int32_t header, int32_t bit_width, NumpyIO o, int32_t itemsize=4):
@@ -217,7 +217,7 @@ cdef void delta_read_bitpacked(NumpyIO file_obj, uint8_t bitwidth,
NumpyIO o, uint64_t count, uint8_t itemsize=4):
cdef:
uint64_t data = 0
- char stop = -bitwidth
+ int8_t stop = -bitwidth
uint64_t mask = 0XFFFFFFFFFFFFFFFF >> (64 - bitwidth)
while count > 0:
if stop < 0:
From 43e34e28b2e108f178b05bba8c109e2b131f5fc2 Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Thu, 22 Dec 2022 13:43:11 -0500
Subject: [PATCH 5/5] fix
---
fastparquet/cencoding.pyx | 6 ------
1 file changed, 6 deletions(-)
diff --git a/fastparquet/cencoding.pyx b/fastparquet/cencoding.pyx
index 1be0a001..90ba15db 100644
--- a/fastparquet/cencoding.pyx
+++ b/fastparquet/cencoding.pyx
@@ -223,10 +223,8 @@ cdef void delta_read_bitpacked(NumpyIO file_obj, uint8_t bitwidth,
if stop < 0:
data = ((data & 0X00FFFFFFFFFFFFFF) << 8) | file_obj.read_byte()
stop += 8
- print("bin stop", bin(stop), bin(data))
else:
o.write_int((data >> stop) & mask)
- print("bitpack value", (data >> stop) & mask, data, stop, mask)
stop -= bitwidth
count -= 1
@@ -241,13 +239,11 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o):
const uint8_t[:] bitwidths
uint8_t bitwidth
values_per_miniblock = block_size // miniblock_per_block
- print("\nstart", count, value, values_per_miniblock)
while True:
min_delta = zigzag_long(read_unsigned_var_int(file_obj))
bitwidths = file_obj.read(miniblock_per_block)
for i in range(miniblock_per_block):
bitwidth = bitwidths[i]
- print("\n miniblock", i, "width", bitwidth)
if bitwidth:
temp = o.loc
if count > 1:
@@ -257,7 +253,6 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o):
for j in range(values_per_miniblock):
temp = o.read_int()
o.loc -= 4
- print("miniblock value (bw)", value)
o.write_int(value)
value += min_delta + temp
count -= 1
@@ -265,7 +260,6 @@ cpdef void delta_binary_unpack(NumpyIO file_obj, NumpyIO o):
return
else:
for j in range(values_per_miniblock):
- print("miniblock value", value)
o.write_int(value)
value += min_delta
count -= 1

View File

@ -1,3 +1,10 @@
-------------------------------------------------------------------
Fri Dec 23 09:18:39 UTC 2022 - Guillaume GARDET <guillaume.gardet@opensuse.org>
- Add patch to fox the test test_delta_from_def_2 on
aarch64, armv7 and ppc64le:
* fastparquet-pr835.patch
-------------------------------------------------------------------
Fri Oct 28 15:47:41 UTC 2022 - Ben Greiner <code@bnavigator.de>

View File

@ -25,6 +25,8 @@ URL: https://github.com/dask/fastparquet/
Source: https://github.com/dask/fastparquet/archive/%{version}.tar.gz#/fastparquet-%{version}.tar.gz
# PATCH-FIX-UPSTREAM fastparquet-pr813-updatefixes.patch gh#dask/fastparquet#813
Patch1: fastparquet-pr813-updatefixes.patch
# PATCH-FIX-UPSTREAM fastparquet-pr835.patch gh#dask/fastparquet#835
Patch2: fastparquet-pr835.patch
BuildRequires: %{python_module Cython}
BuildRequires: %{python_module base >= 3.8}
BuildRequires: %{python_module cramjam >= 2.3.0}