- u_gallivm-correct-channel-shift-logic-on-big-endian.patch:

* instead of reverse applying a change on s390x
    ("U_draw-use-SoA-fetch-not-AoS-one.patch") address the 
    issue by a real fix (bsc#1032272, fdo#100613)

OBS-URL: https://build.opensuse.org/package/show/X11:XOrg/Mesa?expand=0&rev=607
This commit is contained in:
Stefan Dirsch 2017-04-28 13:08:39 +00:00 committed by Git OBS Bridge
parent c554e12bef
commit a77ad11291
4 changed files with 52 additions and 142 deletions

View File

@ -1,3 +1,11 @@
-------------------------------------------------------------------
Fri Apr 28 12:26:02 UTC 2017 - sndirsch@suse.com
- u_gallivm-correct-channel-shift-logic-on-big-endian.patch:
* instead of reverse applying a change on s390x
("U_draw-use-SoA-fetch-not-AoS-one.patch") address the
issue by a real fix (bsc#1032272, fdo#100613)
-------------------------------------------------------------------
Thu Apr 27 09:54:43 UTC 2017 - sndirsch@suse.com

View File

@ -88,8 +88,7 @@ Patch32: archlinux_glvnd-fix-gl-dot-pc.patch
Patch33: archlinux_0001-EGL-Implement-the-libglvnd-interface-for-EGL-v2.patch
Patch34: archlinux_0002-fixup-EGL-Implement-the-libglvnd-interface-for-EGL-v.patch
Patch35: fedora_0001-glxglvnddispatch-Add-missing-dispatch-for-GetDriverC.patch
# reverse-apply this to fix OpenGL support on s390x (bsc#1032272)
Patch40: U_draw-use-SoA-fetch-not-AoS-one.patch
Patch40: u_gallivm-correct-channel-shift-logic-on-big-endian.patch
BuildRequires: autoconf >= 2.60
BuildRequires: automake
@ -663,10 +662,7 @@ rm -rf docs/README.{VMS,WIN32,OS2}
%patch35 -p1
%endif
# reverse-apply this patch to fix OpenGL support on s390x (bsc#1032272)
%ifarch s390x
%patch40 -R -p1
%endif
%patch40 -p1
# Remove requires to libglvnd0/libglvnd-devel from baselibs.conf when
# disabling libglvnd build; ugly ...

View File

@ -1,136 +0,0 @@
From e827d9175675aaa6cfc0b981e2a80685fb7b3a74 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Wed, 21 Dec 2016 04:43:07 +0100
Subject: [PATCH] draw: use SoA fetch, not AoS one
Now that there's some SoA fetch which never falls back, we should always get
results which are better or at least not worse (something like rgba32f will
stay the same).
For cases which get way better, think something like R16_UNORM with 8-wide
vectors: this was 8 sign-extend fetches, 8 cvt, 8 muls, followed by
a couple of shuffles to stitch things together (if it is smart enough,
6 unpacks) and then a (8-wide) transpose (not sure if llvm could even
optimize the shuffles + transpose, since the 16bit values were actually
sign-extended to 128bit before being cast to a float vec, so that would be
another 8 unpacks). Now that is just 8 fetches (directly inserted into
vector, albeit there's one 128bit insert needed), 1 cvt, 1 mul.
v2: ditch the old AoS code instead of just disabling it.
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
---
src/gallium/auxiliary/draw/draw_llvm.c | 71 +++++++++++-----------------------
1 file changed, 23 insertions(+), 48 deletions(-)
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 19b75a5003..8952dc8d3b 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -713,39 +713,6 @@ fetch_instanced(struct gallivm_state *gallivm,
static void
-convert_to_soa(struct gallivm_state *gallivm,
- LLVMValueRef src_aos[LP_MAX_VECTOR_WIDTH / 32],
- LLVMValueRef dst_soa[TGSI_NUM_CHANNELS],
- const struct lp_type soa_type)
-{
- unsigned j, k;
- struct lp_type aos_channel_type = soa_type;
-
- LLVMValueRef aos_channels[TGSI_NUM_CHANNELS];
- unsigned pixels_per_channel = soa_type.length / TGSI_NUM_CHANNELS;
-
- debug_assert(TGSI_NUM_CHANNELS == 4);
- debug_assert((soa_type.length % TGSI_NUM_CHANNELS) == 0);
-
- aos_channel_type.length >>= 1;
-
- for (j = 0; j < TGSI_NUM_CHANNELS; ++j) {
- LLVMValueRef channel[LP_MAX_VECTOR_LENGTH] = { 0 };
-
- assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH);
-
- for (k = 0; k < pixels_per_channel; ++k) {
- channel[k] = src_aos[j + TGSI_NUM_CHANNELS * k];
- }
-
- aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel);
- }
-
- lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa);
-}
-
-
-static void
fetch_vector(struct gallivm_state *gallivm,
const struct util_format_description *format_desc,
struct lp_type vs_type,
@@ -755,11 +722,10 @@ fetch_vector(struct gallivm_state *gallivm,
LLVMValueRef *inputs,
LLVMValueRef indices)
{
- LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context blduivec;
+ struct lp_type fetch_type = vs_type;
LLVMValueRef offset, valid_mask;
- LLVMValueRef aos_fetch[LP_MAX_VECTOR_WIDTH / 32];
unsigned i;
lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
@@ -783,28 +749,37 @@ fetch_vector(struct gallivm_state *gallivm,
}
/*
- * Note: we probably really want to use SoA fetch, not AoS one (albeit
- * for most formats it will amount to the same as this isn't very
- * optimized). But looks dangerous since it assumes alignment.
+ * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches.
+ * This should always produce better code.
*/
- for (i = 0; i < vs_type.length; i++) {
- LLVMValueRef offset1, elem;
- elem = lp_build_const_int32(gallivm, i);
- offset1 = LLVMBuildExtractElement(builder, offset, elem, "");
- aos_fetch[i] = lp_build_fetch_rgba_aos(gallivm, format_desc,
- lp_float32_vec4_type(),
- FALSE, map_ptr, offset1,
- zero, zero, NULL);
+ /* The type handling is annoying here... */
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
+ format_desc->channel[0].pure_integer) {
+ if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length);
+ }
+ else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+ fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length);
+ }
}
- convert_to_soa(gallivm, aos_fetch, inputs, vs_type);
+
+ lp_build_fetch_rgba_soa(gallivm, format_desc,
+ fetch_type, FALSE, map_ptr, offset,
+ blduivec.zero, blduivec.zero,
+ NULL, inputs);
for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
+ inputs[i] = LLVMBuildBitCast(builder, inputs[i],
+ lp_build_vec_type(gallivm, vs_type), "");
+ }
+
+ /* out-of-bound fetches return all zeros */
+ for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, "");
inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, "");
inputs[i] = LLVMBuildBitCast(builder, inputs[i],
lp_build_vec_type(gallivm, vs_type), "");
-
}
}
--
2.12.0

View File

@ -0,0 +1,42 @@
From ab0e1e38ab9e8b23602af21d1b43e98afbf80e45 Mon Sep 17 00:00:00 2001
From: Ray Strode <rstrode@redhat.com>
Date: Fri, 21 Apr 2017 15:22:14 -0400
Subject: [PATCH] gallivm: correct channel shift logic on big endian
lp_build_fetch_rgba_soa fetches a texel from a texture.
Part of that process involves first gathering the element
together from memory into a packed format, and then breaking
out the individual color channels into separate, parallel
arrays.
The code fails to account for endianess when reading the packed
values.
This commit attempts to correct the problem by reversing the order
the packed values are read on big endian systems.
---
src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 98eb694..22c19b1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -650,7 +650,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
for (i = 0; i < format_desc->nr_channels; i++) {
struct util_format_channel_description chan_desc = format_desc->channel[i];
unsigned blockbits = type.width;
- unsigned vec_nr = chan_desc.shift / type.width;
+ unsigned vec_nr;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ vec_nr = (format_desc->block.bits - (chan_desc.shift + chan_desc.size)) / type.width;
+#else
+ vec_nr = chan_desc.shift / type.width;
+#endif
chan_desc.shift %= type.width;
output[i] = lp_build_extract_soa_chan(&bld,
--
1.8.3.1