python-numcodecs/numcodecs-pr417-raggednumpy.patch

152 lines
4.8 KiB
Diff

diff --git a/numcodecs/json.py b/numcodecs/json.py
index 670f223..b803a77 100644
--- a/numcodecs/json.py
+++ b/numcodecs/json.py
@@ -54,7 +54,10 @@ class JSON(Codec):
self._decoder = _json.JSONDecoder(**self._decoder_config)
def encode(self, buf):
- buf = np.asarray(buf)
+ try:
+ buf = np.asarray(buf)
+ except ValueError:
+ buf = np.asarray(buf, dtype=object)
items = buf.tolist()
items.extend((buf.dtype.str, buf.shape))
return self._encoder.encode(items).encode(self._text_encoding)
diff --git a/numcodecs/msgpacks.py b/numcodecs/msgpacks.py
index 026f583..6556498 100644
--- a/numcodecs/msgpacks.py
+++ b/numcodecs/msgpacks.py
@@ -52,7 +52,10 @@ class MsgPack(Codec):
self.raw = raw
def encode(self, buf):
- buf = np.asarray(buf)
+ try:
+ buf = np.asarray(buf)
+ except ValueError:
+ buf = np.asarray(buf, dtype=object)
items = buf.tolist()
items.extend((buf.dtype.str, buf.shape))
return msgpack.packb(items, use_bin_type=self.use_bin_type,
diff --git a/numcodecs/tests/test_json.py b/numcodecs/tests/test_json.py
index 7e8fcd6..8dac2b4 100644
--- a/numcodecs/tests/test_json.py
+++ b/numcodecs/tests/test_json.py
@@ -2,7 +2,7 @@ import itertools
import numpy as np
-
+import pytest
from numcodecs.json import JSON
from numcodecs.tests.common import (check_config, check_repr, check_encode_decode_array,
@@ -53,21 +53,23 @@ def test_backwards_compatibility():
check_backwards_compatibility(JSON.codec_id, arrays, codecs)
-def test_non_numpy_inputs():
+@pytest.mark.parametrize(
+ "input_data, dtype",
+ [
+ ([0, 1], None),
+ ([[0, 1], [2, 3]], None),
+ ([[0], [1], [2, 3]], object),
+ ([[[0, 0]], [[1, 1]], [[2, 3]]], None),
+ (["1"], None),
+ (["11", "11"], None),
+ (["11", "1", "1"], None),
+ ([{}], None),
+ ([{"key": "value"}, ["list", "of", "strings"]], object),
+ ]
+)
+def test_non_numpy_inputs(input_data, dtype):
# numpy will infer a range of different shapes and dtypes for these inputs.
# Make sure that round-tripping through encode preserves this.
- data = [
- [0, 1],
- [[0, 1], [2, 3]],
- [[0], [1], [2, 3]],
- [[[0, 0]], [[1, 1]], [[2, 3]]],
- ["1"],
- ["11", "11"],
- ["11", "1", "1"],
- [{}],
- [{"key": "value"}, ["list", "of", "strings"]],
- ]
- for input_data in data:
- for codec in codecs:
- output_data = codec.decode(codec.encode(input_data))
- assert np.array_equal(np.array(input_data), output_data)
+ for codec in codecs:
+ output_data = codec.decode(codec.encode(input_data))
+ assert np.array_equal(np.array(input_data, dtype=dtype), output_data)
diff --git a/numcodecs/tests/test_msgpacks.py b/numcodecs/tests/test_msgpacks.py
index 6aeadcf..d76aa12 100644
--- a/numcodecs/tests/test_msgpacks.py
+++ b/numcodecs/tests/test_msgpacks.py
@@ -2,6 +2,7 @@ import unittest
import numpy as np
+import pytest
try:
@@ -52,30 +53,32 @@ def test_backwards_compatibility():
check_backwards_compatibility(codec.codec_id, arrays, [codec])
-def test_non_numpy_inputs():
+@pytest.mark.parametrize(
+ "input_data, dtype",
+ [
+ ([0, 1], None),
+ ([[0, 1], [2, 3]], None),
+ ([[0], [1], [2, 3]], object),
+ ([[[0, 0]], [[1, 1]], [[2, 3]]], None),
+ (["1"], None),
+ (["11", "11"], None),
+ (["11", "1", "1"], None),
+ ([{}], None),
+ ([{"key": "value"}, ["list", "of", "strings"]], object),
+ ([b"1"], None),
+ ([b"11", b"11"], None),
+ ([b"11", b"1", b"1"], None),
+ ([{b"key": b"value"}, [b"list", b"of", b"strings"]], object),
+ ]
+)
+def test_non_numpy_inputs(input_data, dtype):
codec = MsgPack()
# numpy will infer a range of different shapes and dtypes for these inputs.
# Make sure that round-tripping through encode preserves this.
- data = [
- [0, 1],
- [[0, 1], [2, 3]],
- [[0], [1], [2, 3]],
- [[[0, 0]], [[1, 1]], [[2, 3]]],
- ["1"],
- ["11", "11"],
- ["11", "1", "1"],
- [{}],
- [{"key": "value"}, ["list", "of", "strings"]],
- [b"1"],
- [b"11", b"11"],
- [b"11", b"1", b"1"],
- [{b"key": b"value"}, [b"list", b"of", b"strings"]],
- ]
- for input_data in data:
- actual = codec.decode(codec.encode(input_data))
- expect = np.array(input_data)
- assert expect.shape == actual.shape
- assert np.array_equal(expect, actual)
+ actual = codec.decode(codec.encode(input_data))
+ expect = np.array(input_data, dtype=dtype)
+ assert expect.shape == actual.shape
+ assert np.array_equal(expect, actual)
def test_encode_decode_shape_dtype_preserved():