140 lines
4.9 KiB
Diff
140 lines
4.9 KiB
Diff
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
|
Date: Wed Feb 12 09:04:22 2025 +0100
|
|
Subject: Restore the non-vectorized code from before PR4880 for POWER8
|
|
Patch-mainline: Not yet
|
|
Git-repo: https://github.com/xianyi/OpenBLAS
|
|
Git-commit: 98b5ef929cfc98f2f3c236966830276c255118d2
|
|
References: bsc#1239134
|
|
|
|
|
|
Signed-off-by: Egbert Eich <eich@suse.de>
|
|
---
|
|
kernel/power/sgemv_t.c | 23 +++++++++++++++++++----
|
|
1 file changed, 19 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/kernel/power/sgemv_t.c b/kernel/power/sgemv_t.c
|
|
index e133c815c..ed0a24230 100644
|
|
--- a/kernel/power/sgemv_t.c
|
|
+++ b/kernel/power/sgemv_t.c
|
|
@@ -78,7 +78,17 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
temp7 += v_x[i] * va7[i];
|
|
}
|
|
|
|
-
|
|
+ #if defined(POWER8)
|
|
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
|
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
|
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
|
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
|
+
|
|
+ y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
|
|
+ y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
|
|
+ y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
|
|
+ y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
|
|
+ #else
|
|
register __vector float t0, t1, t2, t3;
|
|
register __vector float a = { alpha, alpha, alpha, alpha };
|
|
__vector float *v_y = (__vector float*) y;
|
|
@@ -105,7 +115,7 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
|
|
v_y[0] += a * temp0;
|
|
v_y[1] += a * temp4;
|
|
-
|
|
+#endif
|
|
}
|
|
|
|
|
|
@@ -132,7 +142,12 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
temp2 += v_x[i] * va2[i];
|
|
temp3 += v_x[i] * va3[i];
|
|
}
|
|
-
|
|
+ #if defined(POWER8)
|
|
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
|
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
|
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
|
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
|
+ #else
|
|
register __vector float t0, t1, t2, t3;
|
|
register __vector float a = { alpha, alpha, alpha, alpha };
|
|
__vector float *v_y = (__vector float*) y;
|
|
@@ -148,7 +163,7 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
temp0 += temp1 + temp2 + temp3;
|
|
|
|
v_y[0] += a * temp0;
|
|
-
|
|
+#endif
|
|
}
|
|
|
|
|
|
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
|
|
Date: Wed Feb 12 09:07:20 2025 +0100
|
|
Subject: Restore the non-vectorized code from before PR4880 for POWER8
|
|
Patch-mainline: Not yet
|
|
Git-repo: https://github.com/xianyi/OpenBLAS
|
|
Git-commit: 81eed868b68c72ea1868663902f0904dc1b22326
|
|
References: bsc#1239134
|
|
|
|
|
|
Signed-off-by: Egbert Eich <eich@suse.de>
|
|
---
|
|
kernel/power/sgemv_t_8.c | 24 ++++++++++++++++++++----
|
|
1 file changed, 20 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/kernel/power/sgemv_t_8.c b/kernel/power/sgemv_t_8.c
|
|
index f21f6eb7d..b30bb1137 100644
|
|
--- a/kernel/power/sgemv_t_8.c
|
|
+++ b/kernel/power/sgemv_t_8.c
|
|
@@ -99,7 +99,17 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
temp7 += vx1* va7_1 + vx2 * va7_2;
|
|
}
|
|
|
|
-
|
|
+ #if defined(POWER8)
|
|
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
|
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
|
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
|
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
|
+
|
|
+ y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]);
|
|
+ y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]);
|
|
+ y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]);
|
|
+ y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]);
|
|
+ #else
|
|
register __vector float t0, t1, t2, t3;
|
|
register __vector float a = { alpha, alpha, alpha, alpha };
|
|
__vector float *v_y = (__vector float*) y;
|
|
@@ -126,7 +136,7 @@ static void sgemv_kernel_8x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
|
|
v_y[0] += a * temp0;
|
|
v_y[1] += a * temp4;
|
|
-
|
|
+#endif
|
|
}
|
|
|
|
|
|
@@ -153,7 +163,13 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
temp2 += v_x[i] * va2[i] + v_x[i+1] * va2[i+1];
|
|
temp3 += v_x[i] * va3[i] + v_x[i+1] * va3[i+1];
|
|
}
|
|
-
|
|
+
|
|
+ #if defined(POWER8)
|
|
+ y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]);
|
|
+ y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]);
|
|
+ y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]);
|
|
+ y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]);
|
|
+ #else
|
|
register __vector float t0, t1, t2, t3;
|
|
register __vector float a = { alpha, alpha, alpha, alpha };
|
|
__vector float *v_y = (__vector float*) y;
|
|
@@ -169,7 +185,7 @@ static void sgemv_kernel_8x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|
temp0 += temp1 + temp2 + temp3;
|
|
|
|
v_y[0] += a * temp0;
|
|
-
|
|
+#endif
|
|
}
|
|
|
|
|