diff --git a/Mesa.changes b/Mesa.changes index ba37e34..e948f86 100644 --- a/Mesa.changes +++ b/Mesa.changes @@ -1,3 +1,11 @@ +------------------------------------------------------------------- +Fri Apr 28 12:26:02 UTC 2017 - sndirsch@suse.com + +- u_gallivm-correct-channel-shift-logic-on-big-endian.patch: + * instead of reverse applying a change on s390x + ("U_draw-use-SoA-fetch-not-AoS-one.patch") address the + issue by a real fix (bsc#1032272, fdo#100613) + ------------------------------------------------------------------- Thu Apr 27 09:54:43 UTC 2017 - sndirsch@suse.com diff --git a/Mesa.spec b/Mesa.spec index 0a2a23f..7892cea 100644 --- a/Mesa.spec +++ b/Mesa.spec @@ -88,8 +88,7 @@ Patch32: archlinux_glvnd-fix-gl-dot-pc.patch Patch33: archlinux_0001-EGL-Implement-the-libglvnd-interface-for-EGL-v2.patch Patch34: archlinux_0002-fixup-EGL-Implement-the-libglvnd-interface-for-EGL-v.patch Patch35: fedora_0001-glxglvnddispatch-Add-missing-dispatch-for-GetDriverC.patch -# reverse-apply this to fix OpenGL support on s390x (bsc#1032272) -Patch40: U_draw-use-SoA-fetch-not-AoS-one.patch +Patch40: u_gallivm-correct-channel-shift-logic-on-big-endian.patch BuildRequires: autoconf >= 2.60 BuildRequires: automake @@ -663,10 +662,7 @@ rm -rf docs/README.{VMS,WIN32,OS2} %patch35 -p1 %endif -# reverse-apply this patch to fix OpenGL support on s390x (bsc#1032272) -%ifarch s390x -%patch40 -R -p1 -%endif +%patch40 -p1 # Remove requires to libglvnd0/libglvnd-devel from baselibs.conf when # disabling libglvnd build; ugly ... diff --git a/U_draw-use-SoA-fetch-not-AoS-one.patch b/U_draw-use-SoA-fetch-not-AoS-one.patch deleted file mode 100644 index 8d1bc51..0000000 --- a/U_draw-use-SoA-fetch-not-AoS-one.patch +++ /dev/null @@ -1,136 +0,0 @@ -From e827d9175675aaa6cfc0b981e2a80685fb7b3a74 Mon Sep 17 00:00:00 2001 -From: Roland Scheidegger -Date: Wed, 21 Dec 2016 04:43:07 +0100 -Subject: [PATCH] draw: use SoA fetch, not AoS one - -Now that there's some SoA fetch which never falls back, we should always get -results which are better or at least not worse (something like rgba32f will -stay the same). - -For cases which get way better, think something like R16_UNORM with 8-wide -vectors: this was 8 sign-extend fetches, 8 cvt, 8 muls, followed by -a couple of shuffles to stitch things together (if it is smart enough, -6 unpacks) and then a (8-wide) transpose (not sure if llvm could even -optimize the shuffles + transpose, since the 16bit values were actually -sign-extended to 128bit before being cast to a float vec, so that would be -another 8 unpacks). Now that is just 8 fetches (directly inserted into -vector, albeit there's one 128bit insert needed), 1 cvt, 1 mul. - -v2: ditch the old AoS code instead of just disabling it. - -Reviewed-by: Jose Fonseca ---- - src/gallium/auxiliary/draw/draw_llvm.c | 71 +++++++++++----------------------- - 1 file changed, 23 insertions(+), 48 deletions(-) - -diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c -index 19b75a5003..8952dc8d3b 100644 ---- a/src/gallium/auxiliary/draw/draw_llvm.c -+++ b/src/gallium/auxiliary/draw/draw_llvm.c -@@ -713,39 +713,6 @@ fetch_instanced(struct gallivm_state *gallivm, - - - static void --convert_to_soa(struct gallivm_state *gallivm, -- LLVMValueRef src_aos[LP_MAX_VECTOR_WIDTH / 32], -- LLVMValueRef dst_soa[TGSI_NUM_CHANNELS], -- const struct lp_type soa_type) --{ -- unsigned j, k; -- struct lp_type aos_channel_type = soa_type; -- -- LLVMValueRef aos_channels[TGSI_NUM_CHANNELS]; -- unsigned pixels_per_channel = soa_type.length / TGSI_NUM_CHANNELS; -- -- debug_assert(TGSI_NUM_CHANNELS == 4); -- debug_assert((soa_type.length % TGSI_NUM_CHANNELS) == 0); -- -- aos_channel_type.length >>= 1; -- -- for (j = 0; j < TGSI_NUM_CHANNELS; ++j) { -- LLVMValueRef channel[LP_MAX_VECTOR_LENGTH] = { 0 }; -- -- assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH); -- -- for (k = 0; k < pixels_per_channel; ++k) { -- channel[k] = src_aos[j + TGSI_NUM_CHANNELS * k]; -- } -- -- aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel); -- } -- -- lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa); --} -- -- --static void - fetch_vector(struct gallivm_state *gallivm, - const struct util_format_description *format_desc, - struct lp_type vs_type, -@@ -755,11 +722,10 @@ fetch_vector(struct gallivm_state *gallivm, - LLVMValueRef *inputs, - LLVMValueRef indices) - { -- LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); - LLVMBuilderRef builder = gallivm->builder; - struct lp_build_context blduivec; -+ struct lp_type fetch_type = vs_type; - LLVMValueRef offset, valid_mask; -- LLVMValueRef aos_fetch[LP_MAX_VECTOR_WIDTH / 32]; - unsigned i; - - lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type)); -@@ -783,28 +749,37 @@ fetch_vector(struct gallivm_state *gallivm, - } - - /* -- * Note: we probably really want to use SoA fetch, not AoS one (albeit -- * for most formats it will amount to the same as this isn't very -- * optimized). But looks dangerous since it assumes alignment. -+ * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches. -+ * This should always produce better code. - */ -- for (i = 0; i < vs_type.length; i++) { -- LLVMValueRef offset1, elem; -- elem = lp_build_const_int32(gallivm, i); -- offset1 = LLVMBuildExtractElement(builder, offset, elem, ""); - -- aos_fetch[i] = lp_build_fetch_rgba_aos(gallivm, format_desc, -- lp_float32_vec4_type(), -- FALSE, map_ptr, offset1, -- zero, zero, NULL); -+ /* The type handling is annoying here... */ -+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && -+ format_desc->channel[0].pure_integer) { -+ if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { -+ fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length); -+ } -+ else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { -+ fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length); -+ } - } -- convert_to_soa(gallivm, aos_fetch, inputs, vs_type); -+ -+ lp_build_fetch_rgba_soa(gallivm, format_desc, -+ fetch_type, FALSE, map_ptr, offset, -+ blduivec.zero, blduivec.zero, -+ NULL, inputs); - - for (i = 0; i < TGSI_NUM_CHANNELS; i++) { -+ inputs[i] = LLVMBuildBitCast(builder, inputs[i], -+ lp_build_vec_type(gallivm, vs_type), ""); -+ } -+ -+ /* out-of-bound fetches return all zeros */ -+ for (i = 0; i < TGSI_NUM_CHANNELS; i++) { - inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, ""); - inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, ""); - inputs[i] = LLVMBuildBitCast(builder, inputs[i], - lp_build_vec_type(gallivm, vs_type), ""); -- - } - } - --- -2.12.0 - diff --git a/u_gallivm-correct-channel-shift-logic-on-big-endian.patch b/u_gallivm-correct-channel-shift-logic-on-big-endian.patch new file mode 100644 index 0000000..f348f79 --- /dev/null +++ b/u_gallivm-correct-channel-shift-logic-on-big-endian.patch @@ -0,0 +1,42 @@ +From ab0e1e38ab9e8b23602af21d1b43e98afbf80e45 Mon Sep 17 00:00:00 2001 +From: Ray Strode +Date: Fri, 21 Apr 2017 15:22:14 -0400 +Subject: [PATCH] gallivm: correct channel shift logic on big endian + +lp_build_fetch_rgba_soa fetches a texel from a texture. +Part of that process involves first gathering the element +together from memory into a packed format, and then breaking +out the individual color channels into separate, parallel +arrays. + +The code fails to account for endianess when reading the packed +values. + +This commit attempts to correct the problem by reversing the order +the packed values are read on big endian systems. +--- + src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +index 98eb694..22c19b1 100644 +--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c ++++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +@@ -650,7 +650,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, + for (i = 0; i < format_desc->nr_channels; i++) { + struct util_format_channel_description chan_desc = format_desc->channel[i]; + unsigned blockbits = type.width; +- unsigned vec_nr = chan_desc.shift / type.width; ++ unsigned vec_nr; ++ ++#ifdef PIPE_ARCH_BIG_ENDIAN ++ vec_nr = (format_desc->block.bits - (chan_desc.shift + chan_desc.size)) / type.width; ++#else ++ vec_nr = chan_desc.shift / type.width; ++#endif + chan_desc.shift %= type.width; + + output[i] = lp_build_extract_soa_chan(&bld, +-- +1.8.3.1 +