python-hdf5storage/hdf5storage-pr134-numpy2.patch
Dirk Mueller df11e96f83 - Unpin numpy 2
* Add hdf5storage-pr134-numpy2.patch
  * gh#frejanordsiek/hdf5storage#134 (backported)
- Make it noarch again

OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:numeric/python-hdf5storage?expand=0&rev=12
2024-09-09 08:32:36 +00:00

227 lines
11 KiB
Diff

From 9814bc28874a56757e16479186523b2b77d5c553 Mon Sep 17 00:00:00 2001
From: Jesse R Codling <codling@umich.edu>
Date: Wed, 14 Aug 2024 12:34:47 -0400
Subject: [PATCH 2/3] Numpy 2.0: Remove all np.unicode_ for np.str_
---
doc/source/storage_format.rst | 6 ++--
pyproject.toml | 2 +-
hdf5storage/Marshallers.py | 20 +++++++------
hdf5storage/__init__.py | 6 ++--
hdf5storage/utilities.py | 40 ++++++++++++-------------
tests/asserts.py | 14 ++++-----
tests/make_randoms.py | 4 +--
tests/test_dict_like_storage_methods.py | 6 ++--
tests/test_str_conv_utils.py | 8 ++---
tests/test_string_utf16_conversion.py | 4 +--
tests/test_write_readback.py | 6 ++--
11 files changed, 59 insertions(+), 57 deletions(-)
Index: hdf5storage-0.1.19/tests/make_randoms.py
===================================================================
--- hdf5storage-0.1.19.orig/tests/make_randoms.py
+++ hdf5storage-0.1.19/tests/make_randoms.py
@@ -156,7 +156,7 @@ def random_numpy(shape, dtype, allow_nan
chars = random_str_some_unicode(length)
else:
chars = random_str_ascii(length)
- data[index] = np.unicode_(chars)
+ data[index] = np.str_(chars)
return data
elif dtype == 'object':
data = np.zeros(shape=shape, dtype='object')
Index: hdf5storage-0.1.19/tests/test_string_utf16_conversion.py
===================================================================
--- hdf5storage-0.1.19.orig/tests/test_string_utf16_conversion.py
+++ hdf5storage-0.1.19/tests/test_string_utf16_conversion.py
@@ -44,12 +44,12 @@ import pytest
# convert_numpy_str_to_utf16 option is set.
#
# * str
-# * numpy.unicode_ scalars
+# * numpy.str_ scalars
if sys.hexversion < 0x3000000:
- tps_tuple = (unicode, np.unicode_)
+ tps_tuple = (unicode, np.str_)
else:
- tps_tuple = (str, np.unicode_)
+ tps_tuple = (str, np.str_)
@pytest.mark.parametrize("tp", tps_tuple)
Index: hdf5storage-0.1.19/hdf5storage/Marshallers.py
===================================================================
--- hdf5storage-0.1.19.orig/hdf5storage/Marshallers.py
+++ hdf5storage-0.1.19/hdf5storage/Marshallers.py
@@ -480,7 +480,7 @@ class NumpyScalarArrayMarshaller(TypeMar
'MATLAB_int_decode',
'MATLAB_fields'])
# As np.str_ is the unicode type string in Python 3 and the bare
- # bytes string in Python 2, we have to use np.unicode_ which is
+ # bytes string in Python 2, we have to use np.str_ which is
# or points to the unicode one in both versions.
self.types = [np.ndarray, np.matrix,
np.chararray, np.core.records.recarray,
@@ -489,7 +489,7 @@ class NumpyScalarArrayMarshaller(TypeMar
np.int8, np.int16, np.int32, np.int64,
np.float32, np.float64,
np.complex64, np.complex128,
- np.bytes_, np.unicode_, np.object_]
+ np.bytes_, np.str_, np.object_]
self._numpy_types = list(self.types)
# Using Python 3 type strings.
self.python_type_strings = ['numpy.ndarray', 'numpy.matrix',
@@ -525,7 +525,7 @@ class NumpyScalarArrayMarshaller(TypeMar
np.complex64: 'single',
np.complex128: 'double',
np.bytes_: 'char',
- np.unicode_: 'char',
+ np.str_: 'char',
np.object_: 'cell'}
# Make a dict to look up the opposite direction (given a matlab
@@ -542,7 +542,7 @@ class NumpyScalarArrayMarshaller(TypeMar
'int64': np.int64,
'single': np.float32,
'double': np.float64,
- 'char': np.unicode_,
+ 'char': np.str_,
'cell': np.object_,
'canonical empty': np.float64,
'struct': np.object_}
@@ -601,18 +601,7 @@ class NumpyScalarArrayMarshaller(TypeMar
raise NotImplementedError( \
'Can''t write non-ASCII numpy.bytes_.')
- # As of 2013-12-13, h5py cannot write numpy.str_ (UTF-32
- # encoding) types (its numpy.unicode_ in Python 2, which is an
- # alias for it in Python 3). If the option is set to try to
- # convert them to UTF-16, then an attempt at the conversion is
- # made. If no conversion is to be done, the conversion throws an
- # exception (a UTF-32 character had no UTF-16 equivalent), or a
- # UTF-32 character gets turned into a UTF-16 doublet (the
- # increase in the number of columns will be by a factor more
- # than the length of the strings); then it will be simply
- # converted to uint32's byte for byte instead.
-
- if data.dtype.type == np.unicode_:
+ if data.dtype.type == np.str_:
new_data = None
if options.convert_numpy_str_to_utf16:
try:
@@ -620,7 +609,7 @@ class NumpyScalarArrayMarshaller(TypeMar
data_to_store)
except:
pass
- if new_data is None or (type(data_to_store) == np.unicode_ \
+ if new_data is None or (type(data_to_store) == np.str_ \
and len(data_to_store) != len(new_data)) \
or (isinstance(data_to_store, np.ndarray) \
and new_data.shape[-1] != data_to_store.shape[-1] \
@@ -1049,7 +1038,7 @@ class NumpyScalarArrayMarshaller(TypeMar
str_attrs[attr_name] = value
elif isinstance(value, bytes):
str_attrs[attr_name] = value.decode()
- elif isinstance(value, np.unicode_):
+ elif isinstance(value, np.str_):
str_attrs[attr_name] = str(value)
elif isinstance(value, np.bytes_):
str_attrs[attr_name] = value.decode()
@@ -1313,7 +1302,7 @@ class NumpyScalarArrayMarshaller(TypeMar
elif underlying_type.startswith('str') \
or matlab_class == 'char':
if underlying_type == 'str':
- data = np.unicode_('')
+ data = np.str_('')
elif underlying_type.startswith('str'):
data = convert_to_numpy_str(data, \
length=int(underlying_type[3:])//32)
@@ -1344,7 +1333,7 @@ class NumpyScalarArrayMarshaller(TypeMar
data = data.flatten()[0]
elif underlying_type.startswith('str'):
if python_empty == 1:
- data = np.unicode_('')
+ data = np.str_('')
elif isinstance(data, np.ndarray):
data = data.flatten()[0]
else:
@@ -1511,7 +1500,7 @@ class PythonStringMarshaller(NumpyScalar
if (sys.hexversion >= 0x03000000 and isinstance(data, str)) \
or (sys.hexversion < 0x03000000 \
and isinstance(data, unicode)):
- cdata = np.unicode_(data)
+ cdata = np.str_(data)
else:
cdata = np.bytes_(data)
Index: hdf5storage-0.1.19/hdf5storage/utilities.py
===================================================================
--- hdf5storage-0.1.19.orig/hdf5storage/utilities.py
+++ hdf5storage-0.1.19/hdf5storage/utilities.py
@@ -408,7 +408,7 @@ def convert_to_str(data):
# assuming it is in UTF-8. Otherwise, data has to be returned as is.
if isinstance(data, (np.ndarray, np.uint8, np.uint16, np.uint32,
- np.bytes_, np.unicode_)):
+ np.bytes_, np.str_)):
if data.dtype.name == 'uint8':
return numpy_to_bytes(data.flatten()).decode('UTF-8')
elif data.dtype.name == 'uint16':
@@ -477,7 +477,7 @@ def convert_to_numpy_str(data, length=No
"""
# The method of conversion depends on its type.
- if isinstance(data, np.unicode_) or (isinstance(data, np.ndarray) \
+ if isinstance(data, np.str_) or (isinstance(data, np.ndarray) \
and data.dtype.char == 'U'):
# It is already an np.str_ or array of them, so nothing needs to
# be done.
@@ -486,16 +486,16 @@ def convert_to_numpy_str(data, length=No
or (sys.hexversion < 0x03000000 \
and isinstance(data, unicode)):
# Easily converted through constructor.
- return np.unicode_(data)
+ return np.str_(data)
elif isinstance(data, (bytes, bytearray, np.bytes_)):
# All of them can be decoded and then passed through the
# constructor.
- return np.unicode_(data.decode('UTF-8'))
+ return np.str_(data.decode('UTF-8'))
elif isinstance(data, (np.uint8, np.uint16)):
# They are single UTF-8 or UTF-16 scalars, and are easily
# converted to a UTF-8 string and then passed through the
# constructor.
- return np.unicode_(convert_to_str(data))
+ return np.str_(convert_to_str(data))
elif isinstance(data, np.uint32):
# It is just the uint32 version of the character, so it just
# needs to be have the dtype essentially changed by having its
@@ -507,7 +507,7 @@ def convert_to_numpy_str(data, length=No
new_data = np.zeros(shape=data.shape,
dtype='U' + str(data.dtype.itemsize))
for index, x in np.ndenumerate(data):
- new_data[index] = np.unicode_(x.decode('UTF-8'))
+ new_data[index] = np.str_(x.decode('UTF-8'))
return new_data
elif isinstance(data, np.ndarray) \
and data.dtype.name in ('uint8', 'uint16', 'uint32'):
@@ -559,7 +559,7 @@ def convert_to_numpy_str(data, length=No
dtype=new_data.dtype,
buffer=numpy_to_bytes(chunk))[()]
else:
- new_data[i] = np.unicode_(convert_to_str(chunk))
+ new_data[i] = np.str_(convert_to_str(chunk))
# Only thing is left is to reshape it.
return new_data.reshape(tuple(new_shape))
@@ -896,7 +896,7 @@ def get_attribute_string(target, name):
return value
elif isinstance(value, bytes):
return value.decode()
- elif isinstance(value, np.unicode_):
+ elif isinstance(value, np.str_):
return str(value)
elif isinstance(value, np.bytes_):
return value.decode()