Compare commits
1 Commits
| Author | SHA256 | Date | |
|---|---|---|---|
|
|
a4630a2beb |
523
0001-Bug-503241-s390x-Support-z17-changes-to-the-NNPA-ins.patch
Normal file
523
0001-Bug-503241-s390x-Support-z17-changes-to-the-NNPA-ins.patch
Normal file
@@ -0,0 +1,523 @@
|
||||
From 24b634e8ce04de70d4aa6c61a12149df223f9c68 Mon Sep 17 00:00:00 2001
|
||||
From: Andreas Arnez <arnez@linux.ibm.com>
|
||||
Date: Wed, 25 Jun 2025 16:35:04 +0200
|
||||
Subject: [PATCH] Bug 503241 - s390x: Support z17 changes to the NNPA
|
||||
instruction
|
||||
|
||||
This adds support for the NNPA enhancements that are implemented with z17.
|
||||
---
|
||||
coregrind/m_extension/extension-s390x.c | 346 ++++++++++++++----------
|
||||
1 file changed, 207 insertions(+), 139 deletions(-)
|
||||
|
||||
diff --git a/coregrind/m_extension/extension-s390x.c b/coregrind/m_extension/extension-s390x.c
|
||||
index 85b99ad08..98b825d9b 100644
|
||||
--- a/coregrind/m_extension/extension-s390x.c
|
||||
+++ b/coregrind/m_extension/extension-s390x.c
|
||||
@@ -301,11 +301,17 @@ typedef enum {
|
||||
S390_NNPA_MAX = 0x15,
|
||||
S390_NNPA_LOG = 0x20,
|
||||
S390_NNPA_EXP = 0x21,
|
||||
+ S390_NNPA_SQRT = 0x22,
|
||||
+ S390_NNPA_INVSQRT = 0x23,
|
||||
S390_NNPA_RELU = 0x31,
|
||||
S390_NNPA_TANH = 0x32,
|
||||
S390_NNPA_SIGMOID = 0x33,
|
||||
S390_NNPA_SOFTMAX = 0x34,
|
||||
+ S390_NNPA_GELU = 0x35,
|
||||
S390_NNPA_BATCHNORM = 0x40,
|
||||
+ S390_NNPA_MOMENTS = 0x41,
|
||||
+ S390_NNPA_LAYERNORM = 0x42,
|
||||
+ S390_NNPA_NORM = 0x43,
|
||||
S390_NNPA_MAXPOOL2D = 0x50,
|
||||
S390_NNPA_AVGPOOL2D = 0x51,
|
||||
S390_NNPA_LSTMACT = 0x60,
|
||||
@@ -313,6 +319,9 @@ typedef enum {
|
||||
S390_NNPA_CONVOLUTION = 0x70,
|
||||
S390_NNPA_MATMUL_OP = 0x71,
|
||||
S390_NNPA_MATMUL_OP_BCAST23 = 0x72,
|
||||
+ S390_NNPA_MATMUL_OP_BCAST1 = 0x73,
|
||||
+ S390_NNPA_TRANSFORM = 0xf0,
|
||||
+ S390_NNPA_REDUCE = 0xf1,
|
||||
} s390_nnpa_function_t;
|
||||
|
||||
/* Suported NNPA functions */
|
||||
@@ -321,30 +330,51 @@ static const ULong NNPA_functions[] = {
|
||||
S390_SETBIT(S390_NNPA_SUB) | S390_SETBIT(S390_NNPA_MUL) |
|
||||
S390_SETBIT(S390_NNPA_DIV) | S390_SETBIT(S390_NNPA_MIN) |
|
||||
S390_SETBIT(S390_NNPA_MAX) | S390_SETBIT(S390_NNPA_LOG) |
|
||||
- S390_SETBIT(S390_NNPA_EXP) | S390_SETBIT(S390_NNPA_RELU) |
|
||||
+ S390_SETBIT(S390_NNPA_EXP) | S390_SETBIT(S390_NNPA_SQRT) |
|
||||
+ S390_SETBIT(S390_NNPA_INVSQRT) | S390_SETBIT(S390_NNPA_RELU) |
|
||||
S390_SETBIT(S390_NNPA_TANH) | S390_SETBIT(S390_NNPA_SIGMOID) |
|
||||
- S390_SETBIT(S390_NNPA_SOFTMAX)),
|
||||
- (S390_SETBIT(S390_NNPA_BATCHNORM) | S390_SETBIT(S390_NNPA_MAXPOOL2D) |
|
||||
- S390_SETBIT(S390_NNPA_AVGPOOL2D) | S390_SETBIT(S390_NNPA_LSTMACT) |
|
||||
- S390_SETBIT(S390_NNPA_GRUACT) | S390_SETBIT(S390_NNPA_CONVOLUTION) |
|
||||
- S390_SETBIT(S390_NNPA_MATMUL_OP) |
|
||||
- S390_SETBIT(S390_NNPA_MATMUL_OP_BCAST23)),
|
||||
+ S390_SETBIT(S390_NNPA_SOFTMAX) | S390_SETBIT(S390_NNPA_GELU)),
|
||||
+ (S390_SETBIT(S390_NNPA_BATCHNORM) | S390_SETBIT(S390_NNPA_MOMENTS) |
|
||||
+ S390_SETBIT(S390_NNPA_LAYERNORM) | S390_SETBIT(S390_NNPA_NORM) |
|
||||
+ S390_SETBIT(S390_NNPA_MAXPOOL2D) | S390_SETBIT(S390_NNPA_AVGPOOL2D) |
|
||||
+ S390_SETBIT(S390_NNPA_LSTMACT) | S390_SETBIT(S390_NNPA_GRUACT) |
|
||||
+ S390_SETBIT(S390_NNPA_CONVOLUTION) | S390_SETBIT(S390_NNPA_MATMUL_OP) |
|
||||
+ S390_SETBIT(S390_NNPA_MATMUL_OP_BCAST23) |
|
||||
+ S390_SETBIT(S390_NNPA_MATMUL_OP_BCAST1)),
|
||||
+ 0,
|
||||
+ (S390_SETBIT(S390_NNPA_TRANSFORM) | S390_SETBIT(S390_NNPA_REDUCE)),
|
||||
};
|
||||
|
||||
/* Supported parameter block formats */
|
||||
static const ULong NNPA_ipbf[] = {
|
||||
- (S390_SETBIT(0)),
|
||||
+ (S390_SETBIT(0) | S390_SETBIT(1)),
|
||||
};
|
||||
|
||||
/* Supported data types and data layout formats */
|
||||
+enum {
|
||||
+ S390_NNPA_TYPE_1 = 0, // data type 1 (16 bit)
|
||||
+ S390_NNPA_TYPE_BFP32 = 6,
|
||||
+ S390_NNPA_TYPE_INT8 = 8,
|
||||
+ S390_NNPA_TYPE_INT32 = 10,
|
||||
+};
|
||||
+
|
||||
+enum {
|
||||
+ S390_NNPA_4D_FEATURE_TENSOR = 0,
|
||||
+ S390_NNPA_4D_KERNEL_TENSOR = 1,
|
||||
+ S390_NNPA_4D_WEIGHTS_TENSOR = 2,
|
||||
+ S390_NNPA_4D_GENERIC_TENSOR = 31,
|
||||
+};
|
||||
+
|
||||
static const ULong NNPA_dtypes_layouts[] = {
|
||||
/* Data types */
|
||||
- (S390_SETBIT(0) | // data type 1 (16 bit)
|
||||
+ (S390_SETBIT(S390_NNPA_TYPE_1) | S390_SETBIT(S390_NNPA_TYPE_BFP32) |
|
||||
+ S390_SETBIT(S390_NNPA_TYPE_INT8) | S390_SETBIT(S390_NNPA_TYPE_INT32) |
|
||||
|
||||
/* Data layout formats */
|
||||
- S390_SETBIT(32 + 0) | // 4D-feature tensor
|
||||
- S390_SETBIT(32 + 1) // 4D-kernel tensor
|
||||
- ),
|
||||
+ S390_SETBIT(32 + S390_NNPA_4D_FEATURE_TENSOR) |
|
||||
+ S390_SETBIT(32 + S390_NNPA_4D_KERNEL_TENSOR) |
|
||||
+ S390_SETBIT(32 + S390_NNPA_4D_WEIGHTS_TENSOR) |
|
||||
+ S390_SETBIT(32 + S390_NNPA_4D_GENERIC_TENSOR)),
|
||||
};
|
||||
|
||||
static const ULong NNPA_conversions[] = {
|
||||
@@ -360,10 +390,15 @@ struct s390_NNPA_parms_qaf {
|
||||
UInt mdis;
|
||||
ULong mts;
|
||||
ULong conversions;
|
||||
- ULong reserved2[22];
|
||||
+ ULong reserved2;
|
||||
+ UInt mdnis[4];
|
||||
+ struct {
|
||||
+ ULong reserved[19];
|
||||
+ } reserved3;
|
||||
};
|
||||
|
||||
-struct s390_NNPA_tensor0 {
|
||||
+/* Tensor descriptor, common for all data-layout formats */
|
||||
+struct s390_NNPA_tensor {
|
||||
UChar layout;
|
||||
UChar dtype;
|
||||
UShort reserved1;
|
||||
@@ -372,21 +407,21 @@ struct s390_NNPA_tensor0 {
|
||||
ULong address;
|
||||
};
|
||||
|
||||
-struct s390_NNPA_parms0 {
|
||||
- ULong pbvn : 16;
|
||||
- ULong mvn : 8;
|
||||
- ULong ribm : 24;
|
||||
- ULong reserved0 : 15;
|
||||
- ULong cf : 1;
|
||||
- ULong reserved1[6];
|
||||
- ULong save_area_address;
|
||||
- struct s390_NNPA_tensor0 out[2];
|
||||
- struct s390_NNPA_tensor0 reserved2[2];
|
||||
- struct s390_NNPA_tensor0 in[3];
|
||||
- ULong reserved3[12];
|
||||
- UInt param[5];
|
||||
- UInt reserved4;
|
||||
- ULong reserved5[13];
|
||||
+/* Parameter block format 0 or 1 */
|
||||
+struct s390_NNPA_parms {
|
||||
+ ULong pbvn : 16;
|
||||
+ ULong mvn : 8;
|
||||
+ ULong ribm : 24;
|
||||
+ ULong reserved0 : 15;
|
||||
+ ULong cf : 1;
|
||||
+ ULong reserved1[6];
|
||||
+ ULong save_area_address;
|
||||
+ struct s390_NNPA_tensor out[2];
|
||||
+ struct s390_NNPA_tensor reserved2[2];
|
||||
+ struct s390_NNPA_tensor in[3];
|
||||
+ ULong reserved3[12];
|
||||
+ UInt param[16];
|
||||
+ ULong reserved4[8];
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -418,135 +453,145 @@ static const char* const s390_NNPA_errmsg_access[s390_NNPA_message_n] = {
|
||||
|
||||
struct s390_NNPA_mem_dimensions {
|
||||
UChar layout;
|
||||
- ULong dim[5]; // total dimensions
|
||||
- ULong used[4]; // used dimensions, without padding
|
||||
- ULong step[5];
|
||||
- ULong last_dim4_size;
|
||||
+ ULong dim[4];
|
||||
+ ULong total_size;
|
||||
+ ULong used_sticks; // occupied sticks per next-higher dimension
|
||||
+ ULong stick_fill;
|
||||
+ ULong last_stick_fill;
|
||||
};
|
||||
|
||||
-/* Determine the 5 dimensions used to represent the tensor data in memory */
|
||||
+/* Determine the dimensions used to represent the tensor data in memory */
|
||||
static enum ExtensionError
|
||||
-NNPA_tensor0_size(const struct s390_NNPA_tensor0* t,
|
||||
- UInt msg_idx,
|
||||
- struct s390_NNPA_mem_dimensions* out_md)
|
||||
+NNPA_tensor_size(const struct s390_NNPA_tensor* t,
|
||||
+ UInt msg_idx,
|
||||
+ struct s390_NNPA_mem_dimensions* out_md)
|
||||
{
|
||||
struct s390_NNPA_mem_dimensions md;
|
||||
ULong elem_size;
|
||||
+ ULong eps;
|
||||
|
||||
- md.layout = t->layout;
|
||||
- if (t->dtype == 0)
|
||||
+ switch (t->dtype) {
|
||||
+ case S390_NNPA_TYPE_INT8:
|
||||
+ elem_size = 1;
|
||||
+ break;
|
||||
+ case S390_NNPA_TYPE_1:
|
||||
elem_size = 2;
|
||||
- else
|
||||
+ break;
|
||||
+ case S390_NNPA_TYPE_BFP32:
|
||||
+ case S390_NNPA_TYPE_INT32:
|
||||
+ elem_size = 4;
|
||||
+ break;
|
||||
+ default:
|
||||
return INSN_ERR(s390_NNPA_errmsg_dtype[msg_idx]);
|
||||
+ }
|
||||
+ eps = 128 / elem_size;
|
||||
|
||||
+ md.layout = t->layout;
|
||||
switch (t->layout) {
|
||||
- case 0: // 4D-feature tensor
|
||||
- md.dim[0] = md.used[0] = t->dim4;
|
||||
- md.dim[1] = md.used[1] = (t->dim1 + 63) / 64;
|
||||
- md.dim[2] = md.used[2] = t->dim3;
|
||||
- md.dim[3] = (t->dim2 + 31) / 32 * 32;
|
||||
- md.used[3] = t->dim2;
|
||||
- md.dim[4] = 64;
|
||||
- md.last_dim4_size = elem_size * (t->dim1 % 64);
|
||||
+ case S390_NNPA_4D_FEATURE_TENSOR:
|
||||
+ md.dim[0] = t->dim4;
|
||||
+ md.dim[1] = (t->dim1 + eps - 1) / eps;
|
||||
+ md.used_sticks = t->dim2;
|
||||
+ goto common_tensor_dimensions;
|
||||
+ case S390_NNPA_4D_KERNEL_TENSOR:
|
||||
+ md.dim[0] = (t->dim1 + eps - 1) / eps;
|
||||
+ md.dim[1] = t->dim4;
|
||||
+ md.used_sticks = t->dim2;
|
||||
+ goto common_tensor_dimensions;
|
||||
+ case S390_NNPA_4D_WEIGHTS_TENSOR:
|
||||
+ elem_size *= 2;
|
||||
+ eps /= 2;
|
||||
+ md.dim[0] = t->dim4;
|
||||
+ md.dim[1] = (t->dim1 + eps - 1) / eps;
|
||||
+ md.used_sticks = (t->dim2 + 1) / 2;
|
||||
+ common_tensor_dimensions:
|
||||
+ md.dim[2] = t->dim3;
|
||||
+ md.dim[3] = (md.used_sticks + 31) / 32 * 32;
|
||||
+ md.stick_fill = elem_size * (t->dim1 >= eps ? eps : t->dim1);
|
||||
+ md.last_stick_fill = elem_size * ((t->dim1 - 1) % eps + 1);
|
||||
break;
|
||||
- case 1: // 4D-kernel tensor
|
||||
- md.dim[0] = md.used[0] = (t->dim1 + 63) / 64;
|
||||
- md.dim[1] = md.used[1] = t->dim4;
|
||||
- md.dim[2] = md.used[2] = t->dim3;
|
||||
- md.dim[3] = (t->dim2 + 31) / 32 * 32;
|
||||
- md.used[3] = t->dim2;
|
||||
- md.dim[4] = 64;
|
||||
- md.last_dim4_size = elem_size * (t->dim1 % 64);
|
||||
+ case S390_NNPA_4D_GENERIC_TENSOR:
|
||||
+ md.dim[0] = t->dim4;
|
||||
+ md.dim[1] = t->dim3;
|
||||
+ md.dim[2] = t->dim2;
|
||||
+ md.dim[3] = t->dim1;
|
||||
+ eps = 1;
|
||||
break;
|
||||
default:
|
||||
return INSN_ERR(s390_NNPA_errmsg_layout[msg_idx]);
|
||||
}
|
||||
- md.step[4] = elem_size * md.dim[4];
|
||||
- md.step[3] = md.step[4] * md.dim[3];
|
||||
- md.step[2] = md.step[3] * md.dim[2];
|
||||
- md.step[1] = md.step[2] * md.dim[1];
|
||||
- md.step[0] = md.step[1] * md.dim[0]; // total size
|
||||
- *out_md = md;
|
||||
+ md.total_size =
|
||||
+ elem_size * eps * md.dim[3] * md.dim[2] * md.dim[1] * md.dim[0];
|
||||
+ *out_md = md;
|
||||
return ExtErr_OK;
|
||||
}
|
||||
|
||||
-/* Determine the size of the non-pad elements in the last dimension */
|
||||
-static ULong NNPA_mem_dim4_size(const struct s390_NNPA_mem_dimensions* md,
|
||||
- ULong d0,
|
||||
- ULong d1)
|
||||
-{
|
||||
- switch (md->layout) {
|
||||
- case 0: // 4D-feature tensor
|
||||
- return d1 + 1 == md->dim[1] ? md->last_dim4_size : md->step[4];
|
||||
- case 1: // 4D-kernel tensor
|
||||
- return d0 + 1 == md->dim[0] ? md->last_dim4_size : md->step[4];
|
||||
- }
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static enum ExtensionError NNPA_pre_read_tensor0(
|
||||
- ThreadState* tst, UInt msg_idx, const struct s390_NNPA_tensor0* t)
|
||||
+/* Track a tensor's memory regions with PRE_MEM_READ or POST_MEM_WRITE */
|
||||
+static enum ExtensionError NNPA_track_tensor(ThreadState* tst,
|
||||
+ UInt msg_idx,
|
||||
+ const struct s390_NNPA_tensor* t,
|
||||
+ Bool do_write)
|
||||
{
|
||||
struct s390_NNPA_mem_dimensions md;
|
||||
enum ExtensionError ret;
|
||||
+ ULong addr = t->address;
|
||||
|
||||
- ret = NNPA_tensor0_size(t, msg_idx, &md);
|
||||
+ ret = NNPA_tensor_size(t, msg_idx, &md);
|
||||
if (ret != ExtErr_OK)
|
||||
return ret;
|
||||
|
||||
- for (ULong d0 = 0; d0 < md.used[0]; d0++) {
|
||||
- for (ULong d1 = 0; d1 < md.used[1]; d1++) {
|
||||
- for (ULong d2 = 0; d2 < md.used[2]; d2++) {
|
||||
- for (ULong d3 = 0; d3 < md.used[3]; d3++) {
|
||||
- ULong addr = t->address + d0 * md.step[1] + d1 * md.step[2] +
|
||||
- d2 * md.step[3] + d3 * md.step[4];
|
||||
- ULong len = NNPA_mem_dim4_size(&md, d0, d1);
|
||||
- PRE_MEM_READ(tst, s390_NNPA_errmsg_access[msg_idx], addr, len);
|
||||
+ switch (md.layout) {
|
||||
+ case S390_NNPA_4D_FEATURE_TENSOR:
|
||||
+ case S390_NNPA_4D_KERNEL_TENSOR:
|
||||
+ case S390_NNPA_4D_WEIGHTS_TENSOR:
|
||||
+ for (ULong d0 = 0; d0 < md.dim[0]; d0++) {
|
||||
+ for (ULong d1 = 0; d1 < md.dim[1]; d1++) {
|
||||
+ ULong len;
|
||||
+ switch (md.layout) {
|
||||
+ case S390_NNPA_4D_FEATURE_TENSOR:
|
||||
+ case S390_NNPA_4D_WEIGHTS_TENSOR:
|
||||
+ len = d1 + 1 == md.dim[1] ? md.last_stick_fill : md.stick_fill;
|
||||
+ break;
|
||||
+ case S390_NNPA_4D_KERNEL_TENSOR:
|
||||
+ len = d0 + 1 == md.dim[0] ? md.last_stick_fill : md.stick_fill;
|
||||
+ break;
|
||||
}
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
- return ExtErr_OK;
|
||||
-}
|
||||
-
|
||||
-static UWord NNPA_pre_write_tensor0(ThreadState* tst,
|
||||
- UInt msg_idx,
|
||||
- const struct s390_NNPA_tensor0* t)
|
||||
-{
|
||||
- struct s390_NNPA_mem_dimensions md;
|
||||
- enum ExtensionError ret;
|
||||
-
|
||||
- ret = NNPA_tensor0_size(t, msg_idx, &md);
|
||||
- if (ret != ExtErr_OK)
|
||||
- return ret;
|
||||
-
|
||||
- PRE_MEM_WRITE(tst, "NNPA(out_tensor)", t->address, md.step[0]);
|
||||
- return ExtErr_OK;
|
||||
-}
|
||||
-
|
||||
-static void NNPA_post_write_tensor0(ThreadState* tst,
|
||||
- UInt msg_idx,
|
||||
- const struct s390_NNPA_tensor0* t)
|
||||
-{
|
||||
- struct s390_NNPA_mem_dimensions md;
|
||||
- enum ExtensionError ret;
|
||||
-
|
||||
- ret = NNPA_tensor0_size(t, msg_idx, &md);
|
||||
- if (ret != ExtErr_OK)
|
||||
- return;
|
||||
-
|
||||
- for (ULong d0 = 0; d0 < md.used[0]; d0++) {
|
||||
- for (ULong d1 = 0; d1 < md.used[1]; d1++) {
|
||||
- for (ULong d2 = 0; d2 < md.used[2]; d2++) {
|
||||
- for (ULong d3 = 0; d3 < md.used[3]; d3++) {
|
||||
- ULong addr = t->address + d0 * md.step[1] + d1 * md.step[2] +
|
||||
- d2 * md.step[3] + d3 * md.step[4];
|
||||
- ULong len = NNPA_mem_dim4_size(&md, d0, d1);
|
||||
- POST_MEM_WRITE(tst, addr, len);
|
||||
+ for (ULong d2 = 0; d2 < md.dim[2]; d2++) {
|
||||
+ for (ULong d3 = 0; d3 < md.used_sticks; d3++) {
|
||||
+ if (md.layout == S390_NNPA_4D_WEIGHTS_TENSOR &&
|
||||
+ d3 == md.used_sticks - 1 && t->dim2 % 2 != 0) {
|
||||
+ // even elements only
|
||||
+ for (ULong i = 0; i < len - 1; i += 2) {
|
||||
+ if (do_write) {
|
||||
+ POST_MEM_WRITE(tst, addr + i, 1);
|
||||
+ } else {
|
||||
+ PRE_MEM_READ(tst, s390_NNPA_errmsg_access[msg_idx],
|
||||
+ addr + i, 1);
|
||||
+ }
|
||||
+ }
|
||||
+ } else if (do_write) {
|
||||
+ POST_MEM_WRITE(tst, addr, len);
|
||||
+ } else {
|
||||
+ PRE_MEM_READ(tst, s390_NNPA_errmsg_access[msg_idx], addr,
|
||||
+ len);
|
||||
+ }
|
||||
+ addr += 128;
|
||||
+ }
|
||||
+ addr += 128 * (md.dim[3] - md.used_sticks);
|
||||
}
|
||||
}
|
||||
}
|
||||
+ break;
|
||||
+ case S390_NNPA_4D_GENERIC_TENSOR:
|
||||
+ if (do_write) {
|
||||
+ POST_MEM_WRITE(tst, t->address, md.total_size);
|
||||
+ } else {
|
||||
+ PRE_MEM_READ(tst, s390_NNPA_errmsg_access[msg_idx], t->address,
|
||||
+ md.total_size);
|
||||
+ }
|
||||
+ break;
|
||||
}
|
||||
+ return ExtErr_OK;
|
||||
}
|
||||
|
||||
static enum ExtensionError do_extension_NNPA(ThreadState* tst, ULong variant)
|
||||
@@ -571,16 +616,21 @@ static enum ExtensionError do_extension_NNPA(ThreadState* tst, ULong variant)
|
||||
NNPA_dtypes_layouts, sizeof(NNPA_dtypes_layouts));
|
||||
s390_filter_functions(&parms->conversions, sizeof(ULong),
|
||||
NNPA_conversions, sizeof(NNPA_conversions));
|
||||
+ // Clear reserved fields
|
||||
+ parms->reserved1 = 0;
|
||||
+ parms->reserved2 = 0;
|
||||
+ parms->reserved3 = (__typeof__(parms->reserved3)){0};
|
||||
} else {
|
||||
- struct s390_NNPA_parms0* parms = (void*)parms_addr;
|
||||
- const struct s390_NNPA_parms0 orig_parms = *parms;
|
||||
- ULong save_area_size = 0;
|
||||
- UInt in_tensors;
|
||||
- UInt out_tensors;
|
||||
+ struct s390_NNPA_parms* parms = (void*)parms_addr;
|
||||
+ const struct s390_NNPA_parms orig_parms = *parms;
|
||||
+ ULong save_area_size = 0;
|
||||
+ UInt in_tensors;
|
||||
+ UInt out_tensors;
|
||||
+ enum ExtensionError retval;
|
||||
|
||||
parms_len = 4096;
|
||||
PRE_MEM_READ(tst, "NNPA(parms)", parms_addr,
|
||||
- sizeof(struct s390_NNPA_parms0));
|
||||
+ sizeof(struct s390_NNPA_parms));
|
||||
if (parms->cf) {
|
||||
PRE_MEM_READ(tst, "NNPA(parms.csb)", parms_addr + 512,
|
||||
parms_len - 512);
|
||||
@@ -594,28 +644,39 @@ static enum ExtensionError do_extension_NNPA(ThreadState* tst, ULong variant)
|
||||
case S390_NNPA_DIV:
|
||||
case S390_NNPA_MIN:
|
||||
case S390_NNPA_MAX:
|
||||
+ case S390_NNPA_NORM:
|
||||
in_tensors = 2;
|
||||
out_tensors = 1;
|
||||
break;
|
||||
case S390_NNPA_LOG:
|
||||
case S390_NNPA_EXP:
|
||||
+ case S390_NNPA_SQRT:
|
||||
+ case S390_NNPA_INVSQRT:
|
||||
case S390_NNPA_RELU:
|
||||
case S390_NNPA_TANH:
|
||||
case S390_NNPA_SIGMOID:
|
||||
+ case S390_NNPA_GELU:
|
||||
in_tensors = 1;
|
||||
out_tensors = 1;
|
||||
break;
|
||||
case S390_NNPA_SOFTMAX:
|
||||
+ case S390_NNPA_REDUCE:
|
||||
in_tensors = 1;
|
||||
out_tensors = 1;
|
||||
save_area_size = 8192;
|
||||
break;
|
||||
case S390_NNPA_BATCHNORM:
|
||||
+ case S390_NNPA_LAYERNORM:
|
||||
in_tensors = 3;
|
||||
out_tensors = 1;
|
||||
break;
|
||||
+ case S390_NNPA_MOMENTS:
|
||||
+ in_tensors = 1;
|
||||
+ out_tensors = 2;
|
||||
+ break;
|
||||
case S390_NNPA_MAXPOOL2D:
|
||||
case S390_NNPA_AVGPOOL2D:
|
||||
+ case S390_NNPA_TRANSFORM:
|
||||
in_tensors = 1;
|
||||
out_tensors = 1;
|
||||
break;
|
||||
@@ -627,6 +688,7 @@ static enum ExtensionError do_extension_NNPA(ThreadState* tst, ULong variant)
|
||||
case S390_NNPA_CONVOLUTION:
|
||||
case S390_NNPA_MATMUL_OP:
|
||||
case S390_NNPA_MATMUL_OP_BCAST23:
|
||||
+ case S390_NNPA_MATMUL_OP_BCAST1:
|
||||
in_tensors = 3;
|
||||
out_tensors = 1;
|
||||
break;
|
||||
@@ -635,16 +697,20 @@ static enum ExtensionError do_extension_NNPA(ThreadState* tst, ULong variant)
|
||||
}
|
||||
|
||||
for (UInt i = 0; i < in_tensors; i++) {
|
||||
- enum ExtensionError retval =
|
||||
- NNPA_pre_read_tensor0(tst, s390_NNPA_message_in + i, &parms->in[i]);
|
||||
+ retval = NNPA_track_tensor(tst, s390_NNPA_message_in + i,
|
||||
+ &parms->in[i], False);
|
||||
if (retval != ExtErr_OK)
|
||||
return retval;
|
||||
}
|
||||
for (UInt i = 0; i < out_tensors; i++) {
|
||||
- enum ExtensionError retval = NNPA_pre_write_tensor0(
|
||||
- tst, s390_NNPA_message_out + i, &parms->out[i]);
|
||||
+ UInt msg_idx = s390_NNPA_message_out + i;
|
||||
+ struct s390_NNPA_mem_dimensions md;
|
||||
+
|
||||
+ retval = NNPA_tensor_size(&parms->out[i], msg_idx, &md);
|
||||
if (retval != ExtErr_OK)
|
||||
return retval;
|
||||
+ PRE_MEM_WRITE(tst, s390_NNPA_errmsg_access[msg_idx],
|
||||
+ parms->out[i].address, md.total_size);
|
||||
}
|
||||
if (save_area_size != 0) {
|
||||
PRE_MEM_WRITE(tst, "NNPA(save_area)", parms->save_area_address,
|
||||
@@ -653,8 +719,10 @@ static enum ExtensionError do_extension_NNPA(ThreadState* tst, ULong variant)
|
||||
cc = do_NNPA_insn(&gpr0, parms_addr);
|
||||
if (cc == 0) {
|
||||
for (UInt i = 0; i < out_tensors; i++) {
|
||||
- NNPA_post_write_tensor0(tst, s390_NNPA_message_out + i,
|
||||
- &orig_parms.out[i]);
|
||||
+ retval = NNPA_track_tensor(tst, s390_NNPA_message_out + i,
|
||||
+ &orig_parms.out[i], True);
|
||||
+ if (retval != ExtErr_OK)
|
||||
+ return retval;
|
||||
}
|
||||
}
|
||||
}
|
||||
--
|
||||
2.51.0
|
||||
|
||||
BIN
valgrind-3.24.0.tar.bz2
LFS
BIN
valgrind-3.24.0.tar.bz2
LFS
Binary file not shown.
@@ -1,11 +0,0 @@
|
||||
-----BEGIN PGP SIGNATURE-----
|
||||
|
||||
iQEzBAABCAAdFiEEEnaKlnlZkBB6DS/f/FfjzKzZmngFAmckY48ACgkQ/FfjzKzZ
|
||||
mnh5AQgApprk2Em9+SG4jhXp3c7NxcBD8OCwKHWg8TI+l7myUKGfBdUyikMulTAE
|
||||
rtmzyoHhCCUZFgUf/85/NBw0w1BJn9VKSWLBRNhFMq9YCKSpU65bz1dMF5h3EviH
|
||||
H9itE1zv+F0qd3OLZ7hLXnl2KjhI7zBBHgT7NFA1Xb8akm3LeL70igvxrius4TIi
|
||||
+G0i3vbzxEkmeb0/uTLmfMrJD7P7hJ9WtPRTMCx2Pn/VS0ln670WSIJf74Zag2aa
|
||||
v4nTKd+jiCbYb5L+ZjbjFvuAbKCUs6t6/DSgn3A0G90HNCkea0jK7eCfZCmxeAAz
|
||||
ee2dSKYSfnM3caxq9E3dSBQvFtsiFg==
|
||||
=8BwH
|
||||
-----END PGP SIGNATURE-----
|
||||
BIN
valgrind-3.25.1.tar.bz2
LFS
Normal file
BIN
valgrind-3.25.1.tar.bz2
LFS
Normal file
Binary file not shown.
11
valgrind-3.25.1.tar.bz2.asc
Normal file
11
valgrind-3.25.1.tar.bz2.asc
Normal file
@@ -0,0 +1,11 @@
|
||||
-----BEGIN PGP SIGNATURE-----
|
||||
|
||||
iQEzBAABCAAdFiEEEnaKlnlZkBB6DS/f/FfjzKzZmngFAmgsZEoACgkQ/FfjzKzZ
|
||||
mnh+Pwf/UuzK/s83KgJuQFDPhc6vwQd4KYpeMWnXwzBRQgko72s9c3mmnl4UgK0t
|
||||
JkdsuRqTCYpOo32G5Yr2mr7ukZFW0KSVv+jOF/D9Lg+IxZQ4Z20iQQAZNGwxUXi9
|
||||
I8OA+UYr52lCl0hjOubYLTUydmWPzqzuECqru/7JZeDZxs2z1YhDUIaA/g9E73yn
|
||||
mMTPJYUA0J/SsNn1Qo0YxuFZSQo4761Wjjn4CmosGf9w1oZiGMkB/dH3ANtdglhB
|
||||
FXW5ndpOiytdIzxHB20Wez/qcWBX52nrjzh9UFiyAnDYFDa1usMxLd7XOaVYqusW
|
||||
Ky9j2UvFQShhPiKgqlBcjiqF3hNuMg==
|
||||
=7GVO
|
||||
-----END PGP SIGNATURE-----
|
||||
@@ -1,3 +1,53 @@
|
||||
-------------------------------------------------------------------
|
||||
Fri Sep 19 14:11:07 UTC 2025 - Dirk Müller <dmueller@suse.com>
|
||||
|
||||
- update to 3.25.1:
|
||||
* 503098 Incorrect NAN-boxing for float registers in RISC-V
|
||||
* 503641 close_range syscalls started failing with 3.25.0
|
||||
* 503914 mount syscall param filesystemtype may be NULL
|
||||
* 504177 FILE DESCRIPTORS banner shows when closing some
|
||||
inherited fds
|
||||
* 504265 FreeBSD: missing syscall wrappers for fchroot and
|
||||
setcred
|
||||
* 504466 Double close causes SEGV
|
||||
* The valgrind gdbserver now supports the GDB remote protocol
|
||||
packet 'x addr,len' (available in GDB release >= 16).
|
||||
The x packet can reduce the time taken by GDB to read
|
||||
memory from valgrind.
|
||||
* Valgrind now supports zstd compressed debug sections.
|
||||
* The Linux Test Project (ltp) is integrated in the testsuite
|
||||
try 'make ltpchecks' (this will take a while and will point out
|
||||
various missing syscalls and valgrind crashes!)
|
||||
* Added RISCV64 support for Linux. Specifically for the RV64GC
|
||||
instruction set.
|
||||
* Numerous bug fixes for Illumos, in particular fixed a
|
||||
Valgrind crash whenever a signal handler was called.
|
||||
* The --track-fds=yes and --track-fds=all options now treat all
|
||||
inherited file descriptors the same as 0, 1, 2
|
||||
(stdin/out/err).
|
||||
And when the stdin/out/err descriptors are reassigned they
|
||||
are now treated as normal (non-inherited) file descriptors.
|
||||
* A new option --modify-fds=high can be used together with
|
||||
--track-fds=yes to create new file descriptors with the
|
||||
highest possible number (and then decreasing) instead of always
|
||||
using the lowest possible number (which is required by POSIX).
|
||||
This will help catch issues where a file descriptor number
|
||||
might normally be reused between a close and another open call.
|
||||
* There is a change to warnings about calls to
|
||||
pthread_cond_signal and pthread_cond_broadcast when the
|
||||
associated mutex is unlocked. Previously
|
||||
Helgrind would always warn about this. Now this error is
|
||||
controlled by a command line option,
|
||||
--check-cond-signal-mutex=yes|no. The default is
|
||||
no. This change has been made because some C and C++
|
||||
standard libraries use
|
||||
pthread_cond_signal/pthread_cond_broadcast in this way.
|
||||
Users are obliged to use suppressions if they wish to avoid this
|
||||
noise.
|
||||
- add
|
||||
0001-Bug-503241-s390x-Support-z17-changes-to-the-NNPA-ins.patch
|
||||
(jsc#PED-13403)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Jan 30 12:53:08 UTC 2025 - Dirk Müller <dmueller@suse.com>
|
||||
|
||||
@@ -84,7 +134,7 @@ Thu Jan 30 12:53:08 UTC 2025 - Dirk Müller <dmueller@suse.com>
|
||||
-------------------------------------------------------------------
|
||||
Sun Apr 28 15:47:13 UTC 2024 - Dirk Müller <dmueller@suse.com>
|
||||
|
||||
- update to 3.23.0 (jsc#PED-10260):
|
||||
- update to 3.23.0 (jsc#PED-10260) (bsc#1243671):
|
||||
* Added ARM64 support for FreeBSD.
|
||||
* ARM64 now supports dotprod instructions (sdot/udot).
|
||||
* AMD64 better supports code build with -march=x86-64-v3.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#
|
||||
# spec file for package valgrind
|
||||
#
|
||||
# Copyright (c) 2025 SUSE LLC
|
||||
# Copyright (c) 2025 SUSE LLC and contributors
|
||||
#
|
||||
# All modifications and additions to the file contributed by third parties
|
||||
# remain the property of their copyright owners, unless otherwise agreed
|
||||
@@ -29,7 +29,7 @@
|
||||
%endif
|
||||
%bcond_without docs
|
||||
Name: valgrind%{?psuffix}
|
||||
Version: 3.24.0
|
||||
Version: 3.25.1
|
||||
Release: 0
|
||||
Summary: Memory Management Debugger
|
||||
License: GFDL-1.2-only AND GPL-2.0-or-later
|
||||
@@ -43,6 +43,7 @@ Source2: valgrind.keyring
|
||||
Patch0: valgrind.xen.patch
|
||||
# bko#276780 missing implementation for PINSRD
|
||||
Patch1: VEX-x86-pinsrd.patch
|
||||
Patch2: 0001-Bug-503241-s390x-Support-z17-changes-to-the-NNPA-ins.patch
|
||||
Patch10: dhat-use-datadir.patch
|
||||
BuildRequires: automake
|
||||
BuildRequires: pkgconfig
|
||||
@@ -373,6 +374,12 @@ VALGRIND_LIB=$PWD/.in_place VALGRIND_LIB_INNER=$PWD/.in_place ./coregrind/valgri
|
||||
%{_libexecdir}/valgrind/powerpc-altivec32l.xml
|
||||
%{_libexecdir}/valgrind/powerpc-altivec64l-valgrind.xml
|
||||
%{_libexecdir}/valgrind/powerpc-altivec64l.xml
|
||||
%{_libexecdir}/valgrind/riscv64-cpu.xml
|
||||
%{_libexecdir}/valgrind/riscv64-cpu-valgrind-s*.xml
|
||||
%{_libexecdir}/valgrind/riscv64-fpu-valgrind-s*.xml
|
||||
%{_libexecdir}/valgrind/riscv64-fpu.xml
|
||||
%{_libexecdir}/valgrind/riscv64-linux.xml
|
||||
%{_libexecdir}/valgrind/riscv64-linux-valgrind.xml
|
||||
|
||||
%else
|
||||
|
||||
|
||||
Reference in New Issue
Block a user