diff --git a/Mesa.changes b/Mesa.changes index 321a7e7..6ce5cbf 100644 --- a/Mesa.changes +++ b/Mesa.changes @@ -1,3 +1,10 @@ +------------------------------------------------------------------- +Fri Apr 7 14:51:09 UTC 2017 - sndirsch@suse.com + +- U_draw-use-SoA-fetch-not-AoS-one.patch + * reverse-apply this patch to fix OpenGL support on s390x + (bsc#1032272) + ------------------------------------------------------------------- Wed Apr 5 11:32:26 UTC 2017 - afaerber@suse.de diff --git a/Mesa.spec b/Mesa.spec index 3344262..d3167fb 100644 --- a/Mesa.spec +++ b/Mesa.spec @@ -90,6 +90,8 @@ Patch32: archlinux_glvnd-fix-gl-dot-pc.patch Patch33: archlinux_0001-EGL-Implement-the-libglvnd-interface-for-EGL-v2.patch Patch34: archlinux_0002-fixup-EGL-Implement-the-libglvnd-interface-for-EGL-v.patch Patch35: fedora_0001-glxglvnddispatch-Add-missing-dispatch-for-GetDriverC.patch +# reverse-apply this to fix OpenGL support on s390x (bsc#1032272) +Patch40: U_draw-use-SoA-fetch-not-AoS-one.patch # Nouveau multithreading workarounds from https://github.com/imirkin/mesa/commits/locking Patch61: N_01-WIP-nouveau-add-locking.patch @@ -667,6 +669,9 @@ rm -rf docs/README.{VMS,WIN32,OS2} %patch35 -p1 %endif +# reverse-apply this patch to fix OpenGL support on s390x (bsc#1032272) +%patch40 -R -p1 + %if %{use_broken_nouveau_locking_patches} %patch61 -p1 %patch62 -p1 diff --git a/U_draw-use-SoA-fetch-not-AoS-one.patch b/U_draw-use-SoA-fetch-not-AoS-one.patch new file mode 100644 index 0000000..8d1bc51 --- /dev/null +++ b/U_draw-use-SoA-fetch-not-AoS-one.patch @@ -0,0 +1,136 @@ +From e827d9175675aaa6cfc0b981e2a80685fb7b3a74 Mon Sep 17 00:00:00 2001 +From: Roland Scheidegger +Date: Wed, 21 Dec 2016 04:43:07 +0100 +Subject: [PATCH] draw: use SoA fetch, not AoS one + +Now that there's some SoA fetch which never falls back, we should always get +results which are better or at least not worse (something like rgba32f will +stay the same). + +For cases which get way better, think something like R16_UNORM with 8-wide +vectors: this was 8 sign-extend fetches, 8 cvt, 8 muls, followed by +a couple of shuffles to stitch things together (if it is smart enough, +6 unpacks) and then a (8-wide) transpose (not sure if llvm could even +optimize the shuffles + transpose, since the 16bit values were actually +sign-extended to 128bit before being cast to a float vec, so that would be +another 8 unpacks). Now that is just 8 fetches (directly inserted into +vector, albeit there's one 128bit insert needed), 1 cvt, 1 mul. + +v2: ditch the old AoS code instead of just disabling it. + +Reviewed-by: Jose Fonseca +--- + src/gallium/auxiliary/draw/draw_llvm.c | 71 +++++++++++----------------------- + 1 file changed, 23 insertions(+), 48 deletions(-) + +diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c +index 19b75a5003..8952dc8d3b 100644 +--- a/src/gallium/auxiliary/draw/draw_llvm.c ++++ b/src/gallium/auxiliary/draw/draw_llvm.c +@@ -713,39 +713,6 @@ fetch_instanced(struct gallivm_state *gallivm, + + + static void +-convert_to_soa(struct gallivm_state *gallivm, +- LLVMValueRef src_aos[LP_MAX_VECTOR_WIDTH / 32], +- LLVMValueRef dst_soa[TGSI_NUM_CHANNELS], +- const struct lp_type soa_type) +-{ +- unsigned j, k; +- struct lp_type aos_channel_type = soa_type; +- +- LLVMValueRef aos_channels[TGSI_NUM_CHANNELS]; +- unsigned pixels_per_channel = soa_type.length / TGSI_NUM_CHANNELS; +- +- debug_assert(TGSI_NUM_CHANNELS == 4); +- debug_assert((soa_type.length % TGSI_NUM_CHANNELS) == 0); +- +- aos_channel_type.length >>= 1; +- +- for (j = 0; j < TGSI_NUM_CHANNELS; ++j) { +- LLVMValueRef channel[LP_MAX_VECTOR_LENGTH] = { 0 }; +- +- assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH); +- +- for (k = 0; k < pixels_per_channel; ++k) { +- channel[k] = src_aos[j + TGSI_NUM_CHANNELS * k]; +- } +- +- aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel); +- } +- +- lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa); +-} +- +- +-static void + fetch_vector(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + struct lp_type vs_type, +@@ -755,11 +722,10 @@ fetch_vector(struct gallivm_state *gallivm, + LLVMValueRef *inputs, + LLVMValueRef indices) + { +- LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context blduivec; ++ struct lp_type fetch_type = vs_type; + LLVMValueRef offset, valid_mask; +- LLVMValueRef aos_fetch[LP_MAX_VECTOR_WIDTH / 32]; + unsigned i; + + lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type)); +@@ -783,28 +749,37 @@ fetch_vector(struct gallivm_state *gallivm, + } + + /* +- * Note: we probably really want to use SoA fetch, not AoS one (albeit +- * for most formats it will amount to the same as this isn't very +- * optimized). But looks dangerous since it assumes alignment. ++ * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches. ++ * This should always produce better code. + */ +- for (i = 0; i < vs_type.length; i++) { +- LLVMValueRef offset1, elem; +- elem = lp_build_const_int32(gallivm, i); +- offset1 = LLVMBuildExtractElement(builder, offset, elem, ""); + +- aos_fetch[i] = lp_build_fetch_rgba_aos(gallivm, format_desc, +- lp_float32_vec4_type(), +- FALSE, map_ptr, offset1, +- zero, zero, NULL); ++ /* The type handling is annoying here... */ ++ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && ++ format_desc->channel[0].pure_integer) { ++ if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { ++ fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length); ++ } ++ else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { ++ fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length); ++ } + } +- convert_to_soa(gallivm, aos_fetch, inputs, vs_type); ++ ++ lp_build_fetch_rgba_soa(gallivm, format_desc, ++ fetch_type, FALSE, map_ptr, offset, ++ blduivec.zero, blduivec.zero, ++ NULL, inputs); + + for (i = 0; i < TGSI_NUM_CHANNELS; i++) { ++ inputs[i] = LLVMBuildBitCast(builder, inputs[i], ++ lp_build_vec_type(gallivm, vs_type), ""); ++ } ++ ++ /* out-of-bound fetches return all zeros */ ++ for (i = 0; i < TGSI_NUM_CHANNELS; i++) { + inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, ""); + inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, ""); + inputs[i] = LLVMBuildBitCast(builder, inputs[i], + lp_build_vec_type(gallivm, vs_type), ""); +- + } + } + +-- +2.12.0 +