Accepting request 487587 from X11:XOrg

- only reverse-apply 'U_draw-use-SoA-fetch-not-AoS-one.patch' on 
  s390x (bsc#1032272)

- build wayland on Leap >= 42.3
- separate package 'Mesa-dri-nouveau' on Leap

- removed broken locking patches for nouveau DRI driver
  * N_01-WIP-nouveau-add-locking.patch
  * N_02-nouveau-more-locking-make-sure-that-fence-work-is-always-done-with-the-push-mutex-acquired.patch
  * N_03-nv30-locking-fixes.patch
  * N_04-nv50-Fix-double-lock-in-nv50_hw_sm_get_query_result.patch
  * N_05-Use-nv50_render_condition-in-nv50_blitctx_post_blit.patch

- let Mesa require Mesa-libGL1 for a libglvnd build (bsc#1033708)

- U_draw-use-SoA-fetch-not-AoS-one.patch 
  * reverse-apply this patch to fix OpenGL support on s390x
    (bsc#1032272)

OBS-URL: https://build.opensuse.org/request/show/487587
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/Mesa?expand=0&rev=287
This commit is contained in:
Yuchen Lin 2017-04-17 08:20:20 +00:00 committed by Git OBS Bridge
commit 1d3dddd2bf
8 changed files with 188 additions and 2058 deletions

View File

@ -1,3 +1,37 @@
-------------------------------------------------------------------
Wed Apr 12 13:41:30 UTC 2017 - sndirsch@suse.com
- only reverse-apply 'U_draw-use-SoA-fetch-not-AoS-one.patch' on
s390x (bsc#1032272)
-------------------------------------------------------------------
Wed Apr 12 12:31:34 UTC 2017 - sndirsch@suse.com
- build wayland on Leap >= 42.3
- separate package 'Mesa-dri-nouveau' on Leap
-------------------------------------------------------------------
Wed Apr 12 12:15:27 UTC 2017 - sndirsch@suse.com
- removed broken locking patches for nouveau DRI driver
* N_01-WIP-nouveau-add-locking.patch
* N_02-nouveau-more-locking-make-sure-that-fence-work-is-always-done-with-the-push-mutex-acquired.patch
* N_03-nv30-locking-fixes.patch
* N_04-nv50-Fix-double-lock-in-nv50_hw_sm_get_query_result.patch
* N_05-Use-nv50_render_condition-in-nv50_blitctx_post_blit.patch
-------------------------------------------------------------------
Wed Apr 12 09:44:35 UTC 2017 - sndirsch@suse.com
- let Mesa require Mesa-libGL1 for a libglvnd build (bsc#1033708)
-------------------------------------------------------------------
Fri Apr 7 14:51:09 UTC 2017 - sndirsch@suse.com
- U_draw-use-SoA-fetch-not-AoS-one.patch
* reverse-apply this patch to fix OpenGL support on s390x
(bsc#1032272)
-------------------------------------------------------------------
Wed Apr 5 11:32:26 UTC 2017 - afaerber@suse.de

View File

@ -16,10 +16,6 @@
#
# Only enable the Nouveau locking patches if you know what you're doing.
# They may fix KDE on Nouveau. They may also deadlock your userland.
%define use_broken_nouveau_locking_patches 0
%define libglvnd 0
%if 0%{?suse_version} >= 1330
%define libglvnd 1
@ -90,13 +86,8 @@ Patch32: archlinux_glvnd-fix-gl-dot-pc.patch
Patch33: archlinux_0001-EGL-Implement-the-libglvnd-interface-for-EGL-v2.patch
Patch34: archlinux_0002-fixup-EGL-Implement-the-libglvnd-interface-for-EGL-v.patch
Patch35: fedora_0001-glxglvnddispatch-Add-missing-dispatch-for-GetDriverC.patch
# Nouveau multithreading workarounds from https://github.com/imirkin/mesa/commits/locking
Patch61: N_01-WIP-nouveau-add-locking.patch
Patch62: N_02-nouveau-more-locking-make-sure-that-fence-work-is-always-done-with-the-push-mutex-acquired.patch
Patch63: N_03-nv30-locking-fixes.patch
Patch64: N_04-nv50-Fix-double-lock-in-nv50_hw_sm_get_query_result.patch
Patch65: N_05-Use-nv50_render_condition-in-nv50_blitctx_post_blit.patch
# reverse-apply this to fix OpenGL support on s390x (bsc#1032272)
Patch40: U_draw-use-SoA-fetch-not-AoS-one.patch
BuildRequires: autoconf >= 2.60
BuildRequires: automake
@ -163,9 +154,9 @@ BuildRequires: libelf-devel
%endif
%endif
# Requirements for wayland bumped up from 17.0
%if 0%{?suse_version} > 1320
BuildRequires: pkgconfig(wayland-client)
BuildRequires: pkgconfig(wayland-server)
%if 0%{?suse_version} > 1320 || (0%{?sle_version} >= 120300 && 0%{?is_opensuse})
BuildRequires: pkgconfig(wayland-client) >= 1.11
BuildRequires: pkgconfig(wayland-server) >= 1.11
%endif
%ifarch aarch64 %arm ppc64 ppc64le s390x %ix86 x86_64
BuildRequires: llvm-devel
@ -178,6 +169,7 @@ BuildRequires: llvm-clang-devel
%endif
%if 0%{?libglvnd}
Requires: Mesa-libGL1 = %{version}
Requires: libglvnd0 >= 0.1.0
%endif
@ -214,7 +206,7 @@ Obsoletes: Mesa-devel-static < %{version}
Obsoletes: xorg-x11-Mesa-devel < %{version}
Provides: Mesa-libIndirectGL-devel = %{version}
Obsoletes: Mesa-libIndirectGL-devel < %{version}
%if 0%{?suse_version} > 1320
%if 0%{?suse_version} > 1320 || (0%{?sle_version} >= 120300 && 0%{?is_opensuse})
Requires: libwayland-egl-devel
%endif
@ -399,8 +391,8 @@ applications using the OpenGL|ES 3.x APIs.
%package -n libOSMesa8
Summary: Mesa Off-screen rendering extension
Group: System/Libraries
# Wrongly named package shipped .so.8
Group: System/Libraries
Obsoletes: libOSMesa9 < %{version}-%{release}
Provides: libOSMesa9 = %{version}-%{release}
@ -506,8 +498,8 @@ implementation of Mesa.
%package libd3d
Summary: Mesa Direct3D9 state tracker
Group: System/Libraries
# Manually provide d3d library (bnc#918294)
Group: System/Libraries
%ifarch x86_64 s390x ppc64le aarch64
Provides: d3dadapter9.so.1()(64bit)
%else
@ -667,12 +659,9 @@ rm -rf docs/README.{VMS,WIN32,OS2}
%patch35 -p1
%endif
%if %{use_broken_nouveau_locking_patches}
%patch61 -p1
%patch62 -p1
%patch63 -p1
%patch64 -p1
%patch65 -p1
# reverse-apply this patch to fix OpenGL support on s390x (bsc#1032272)
%ifarch s390x
%patch40 -R -p1
%endif
# Remove requires to libglvnd0/libglvnd-devel from baselibs.conf when
@ -683,7 +672,7 @@ grep -v libglvnd $RPM_SOURCE_DIR/baselibs.conf > $RPM_SOURCE_DIR/temp && \
%endif
%build
%if 0%{?suse_version} > 1320
%if 0%{?suse_version} > 1320 || (0%{?sle_version} >= 120300 && 0%{?is_opensuse})
egl_platforms=x11,drm,wayland
%else
egl_platforms=x11,drm
@ -854,10 +843,12 @@ done
%config %{_sysconfdir}/drirc
%dir %{_libdir}/dri
%{_libdir}/dri/*_dri.so
%if 0%{?is_opensuse}
%ifarch %ix86 x86_64 aarch64 %arm ppc64 ppc64le
%exclude %{_libdir}/dri/nouveau_dri.so
%exclude %{_libdir}/dri/nouveau_vieux_dri.so
%endif
%endif
%if 0%{with_opencl}
# only built with opencl
%dir %{_libdir}/gallium-pipe/
@ -953,7 +944,7 @@ done
%{_libdir}/libOSMesa.so
%{_libdir}/pkgconfig/osmesa.pc
%if 0%{?suse_version} > 1320
%if 0%{?suse_version} > 1320 || (0%{?sle_version} >= 120300 && 0%{?is_opensuse})
%files -n libwayland-egl1
%defattr(-,root,root)
%{_libdir}/libwayland-egl.so.1*
@ -1049,11 +1040,13 @@ done
%{_includedir}/GL/internal
%{_libdir}/pkgconfig/dri.pc
%if 0%{?is_opensuse}
%ifarch %ix86 x86_64 aarch64 %arm ppc64 ppc64le
%files -n Mesa-dri-nouveau
%{_libdir}/dri/nouveau_dri.so
%{_libdir}/dri/nouveau_vieux_dri.so
%endif
%endif
%files devel
%defattr(-,root,root)

File diff suppressed because it is too large Load Diff

View File

@ -1,96 +0,0 @@
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Mon Jun 6 20:30:48 2016 -0400
Subject: [PATCH 2/5]nouveau: more locking - make sure that fence work is always done with the push mutex acquired
Patch-mainline: N/A
References: boo#997171
Signed-off-by: Max Staudt <mstaudt@suse.de>
Cherry-picked from 2733e5483e1c2b80e4b0ae21187ec5e3e1579397
at https://github.com/imirkin/mesa.git
Signed-off-by: Max Staudt <mstaudt@suse.de>
---
src/gallium/drivers/nouveau/nouveau_buffer.c | 4 ++++
src/gallium/drivers/nouveau/nv50/nv50_miptree.c | 7 +++++--
src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 5 ++++-
src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 5 ++++-
4 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index b54c19b..a5a06cf 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -80,6 +80,8 @@ release_allocation(struct nouveau_mm_allocation **mm,
inline void
nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
{
+ if (buf->fence)
+ pipe_mutex_lock(buf->fence->screen->push_mutex);
if (buf->fence && buf->fence->state < NOUVEAU_FENCE_STATE_FLUSHED) {
nouveau_fence_work(buf->fence, nouveau_fence_unref_bo, buf->bo);
buf->bo = NULL;
@@ -89,6 +91,8 @@ nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
if (buf->mm)
release_allocation(&buf->mm, buf->fence);
+ if (buf->fence)
+ pipe_mutex_unlock(buf->fence->screen->push_mutex);
if (buf->domain == NOUVEAU_BO_VRAM)
NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_vid, -(uint64_t)buf->base.width0);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
index 7450119..38e4faf 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -163,10 +163,13 @@ nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
{
struct nv50_miptree *mt = nv50_miptree(pt);
- if (mt->base.fence && mt->base.fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
+ if (mt->base.fence && mt->base.fence->state < NOUVEAU_FENCE_STATE_FLUSHED) {
+ pipe_mutex_lock(nouveau_screen(pscreen)->push_mutex);
nouveau_fence_work(mt->base.fence, nouveau_fence_unref_bo, mt->base.bo);
- else
+ pipe_mutex_unlock(nouveau_screen(pscreen)->push_mutex);
+ } else {
nouveau_bo_ref(NULL, &mt->base.bo);
+ }
nouveau_fence_ref(NULL, &mt->base.fence);
nouveau_fence_ref(NULL, &mt->base.fence_wr);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index 9067bcc..d2ad72e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -56,9 +56,12 @@ nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q,
if (hq->mm) {
if (hq->state == NV50_HW_QUERY_STATE_READY)
nouveau_mm_free(hq->mm);
- else
+ else {
+ pipe_mutex_lock(screen->base.push_mutex);
nouveau_fence_work(screen->base.fence.current,
nouveau_mm_free_work, hq->mm);
+ pipe_mutex_unlock(screen->base.push_mutex);
+ }
}
}
if (size) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index f2584cb..4b51a67 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -48,9 +48,12 @@ nvc0_hw_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q,
if (hq->mm) {
if (hq->state == NVC0_HW_QUERY_STATE_READY)
nouveau_mm_free(hq->mm);
- else
+ else {
+ pipe_mutex_lock(screen->base.push_mutex);
nouveau_fence_work(screen->base.fence.current,
nouveau_mm_free_work, hq->mm);
+ pipe_mutex_unlock(screen->base.push_mutex);
+ }
}
}
if (size) {

View File

@ -1,120 +0,0 @@
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Tue Jun 21 22:59:50 2016 -0400
Subject: [PATCH 3/5]nv30 locking fixes
Patch-mainline: N/A
References: boo#997171
Signed-off-by: Max Staudt <mstaudt@suse.de>
Cherry-picked from 940b3a773f264f3f52574160f0d06c48f8e8aeb2
at https://github.com/imirkin/mesa.git
Signed-off-by: Max Staudt <mstaudt@suse.de>
---
src/gallium/drivers/nouveau/nv30/nv30_draw.c | 20 ++++++++++++++++++--
src/gallium/drivers/nouveau/nv30/nv30_fragprog.c | 4 ++++
2 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
index 7b0d074..1c71534 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
@@ -127,6 +127,8 @@ nv30_render_draw_elements(struct vbuf_render *render,
struct nouveau_pushbuf *push = nv30->screen->base.pushbuf;
unsigned i;
+ pipe_mutex_lock(nv30->screen->base.push_mutex);
+
BEGIN_NV04(push, NV30_3D(VTXBUF(0)), r->vertex_info.num_attribs);
for (i = 0; i < r->vertex_info.num_attribs; i++) {
PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP,
@@ -134,8 +136,10 @@ nv30_render_draw_elements(struct vbuf_render *render,
NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
}
- if (!nv30_state_validate(nv30, ~0, false))
+ if (!nv30_state_validate(nv30, ~0, false)) {
+ pipe_mutex_unlock(nv30->screen->base.push_mutex);
return;
+ }
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
PUSH_DATA (push, r->prim);
@@ -160,6 +164,8 @@ nv30_render_draw_elements(struct vbuf_render *render,
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);
PUSH_RESET(push, BUFCTX_VTXTMP);
+
+ pipe_mutex_unlock(nv30->screen->base.push_mutex);
}
static void
@@ -172,6 +178,8 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr)
unsigned ps = fn + (pn ? 1 : 0);
unsigned i;
+ pipe_mutex_lock(nv30->screen->base.push_mutex);
+
BEGIN_NV04(push, NV30_3D(VTXBUF(0)), r->vertex_info.num_attribs);
for (i = 0; i < r->vertex_info.num_attribs; i++) {
PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP,
@@ -179,8 +187,10 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr)
NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
}
- if (!nv30_state_validate(nv30, ~0, false))
+ if (!nv30_state_validate(nv30, ~0, false)) {
+ pipe_mutex_unlock(nv30->screen->base.push_mutex);
return;
+ }
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
PUSH_DATA (push, r->prim);
@@ -197,6 +207,8 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr)
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);
PUSH_RESET(push, BUFCTX_VTXTMP);
+
+ pipe_mutex_unlock(nv30->screen->base.push_mutex);
}
static void
@@ -383,6 +395,8 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nv30_render_validate(nv30);
+ pipe_mutex_unlock(nv30->screen->base.push_mutex);
+
if (nv30->draw_dirty & NV30_NEW_VIEWPORT)
draw_set_viewport_states(draw, 0, 1, &nv30->viewport);
if (nv30->draw_dirty & NV30_NEW_RASTERIZER)
@@ -448,6 +462,8 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (transfer[i])
pipe_buffer_unmap(pipe, transfer[i]);
+ pipe_mutex_lock(nv30->screen->base.push_mutex);
+
nv30->draw_dirty = 0;
nv30_state_release(nv30);
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
index 6de61bc..fd21f99 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
@@ -38,6 +38,8 @@ nv30_fragprog_upload(struct nv30_context *nv30)
struct nv30_fragprog *fp = nv30->fragprog.program;
struct pipe_context *pipe = &nv30->base.pipe;
+ pipe_mutex_unlock(nv->screen->push_mutex);
+
if (unlikely(!fp->buffer))
fp->buffer = pipe_buffer_create(pipe->screen, 0, 0, fp->insn_len * 4);
@@ -60,6 +62,8 @@ nv30_fragprog_upload(struct nv30_context *nv30)
if (nv04_resource(fp->buffer)->domain != NOUVEAU_BO_VRAM)
nouveau_buffer_migrate(nv, nv04_resource(fp->buffer), NOUVEAU_BO_VRAM);
+
+ pipe_mutex_lock(nv->screen->push_mutex);
}
void

View File

@ -1,25 +0,0 @@
From: Max Staudt <mstaudt@suse.de>
Date: Wed Oct 5 18:49:41 2016 +0200
Subject: [PATCH 4/5]nv50: Fix double lock in nv50_hw_sm_get_query_result()
Patch-mainline: N/A
References: boo#997171
Signed-off-by: Max Staudt <mstaudt@suse.de>
Signed-off-by: Max Staudt <mstaudt@suse.de>
---
src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
index 31445eb..acc64ac 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -351,7 +351,7 @@ nv50_hw_sm_get_query_result(struct nv50_context *nv50, struct nv50_hw_query *hq,
pipe_mutex_lock(nv50->screen->base.push_mutex);
ret = nv50_hw_sm_query_read_data(count, nv50, wait, hq, cfg, mp_count);
- pipe_mutex_lock(nv50->screen->base.push_mutex);
+ pipe_mutex_unlock(nv50->screen->base.push_mutex);
if (!ret)
return false;

View File

@ -1,29 +0,0 @@
From: Max Staudt <mstaudt@suse.de>
Date: Wed Oct 5 18:51:38 2016 +0200
Subject: [PATCH 5/5]Use nv50_render_condition() in nv50_blitctx_post_blit()
Patch-mainline: N/A
References: boo#997171
Signed-off-by: Max Staudt <mstaudt@suse.de>
Analogous to what happens in nvc0_blitctx_post_blit()
Signed-off-by: Max Staudt <mstaudt@suse.de>
---
src/gallium/drivers/nouveau/nv50/nv50_surface.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index d6b9de0..36cd72b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -1328,8 +1328,8 @@ nv50_blitctx_post_blit(struct nv50_blitctx *blit)
nv50->samplers[2][1] = blit->saved.sampler[1];
if (nv50->cond_query && !blit->render_condition_enable)
- nv50->base.pipe.render_condition(&nv50->base.pipe, nv50->cond_query,
- nv50->cond_cond, nv50->cond_mode);
+ nv50_render_condition(&nv50->base.pipe, nv50->cond_query,
+ nv50->cond_cond, nv50->cond_mode);
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);

View File

@ -0,0 +1,136 @@
From e827d9175675aaa6cfc0b981e2a80685fb7b3a74 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Wed, 21 Dec 2016 04:43:07 +0100
Subject: [PATCH] draw: use SoA fetch, not AoS one
Now that there's some SoA fetch which never falls back, we should always get
results which are better or at least not worse (something like rgba32f will
stay the same).
For cases which get way better, think something like R16_UNORM with 8-wide
vectors: this was 8 sign-extend fetches, 8 cvt, 8 muls, followed by
a couple of shuffles to stitch things together (if it is smart enough,
6 unpacks) and then a (8-wide) transpose (not sure if llvm could even
optimize the shuffles + transpose, since the 16bit values were actually
sign-extended to 128bit before being cast to a float vec, so that would be
another 8 unpacks). Now that is just 8 fetches (directly inserted into
vector, albeit there's one 128bit insert needed), 1 cvt, 1 mul.
v2: ditch the old AoS code instead of just disabling it.
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
---
src/gallium/auxiliary/draw/draw_llvm.c | 71 +++++++++++-----------------------
1 file changed, 23 insertions(+), 48 deletions(-)
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 19b75a5003..8952dc8d3b 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -713,39 +713,6 @@ fetch_instanced(struct gallivm_state *gallivm,
static void
-convert_to_soa(struct gallivm_state *gallivm,
- LLVMValueRef src_aos[LP_MAX_VECTOR_WIDTH / 32],
- LLVMValueRef dst_soa[TGSI_NUM_CHANNELS],
- const struct lp_type soa_type)
-{
- unsigned j, k;
- struct lp_type aos_channel_type = soa_type;
-
- LLVMValueRef aos_channels[TGSI_NUM_CHANNELS];
- unsigned pixels_per_channel = soa_type.length / TGSI_NUM_CHANNELS;
-
- debug_assert(TGSI_NUM_CHANNELS == 4);
- debug_assert((soa_type.length % TGSI_NUM_CHANNELS) == 0);
-
- aos_channel_type.length >>= 1;
-
- for (j = 0; j < TGSI_NUM_CHANNELS; ++j) {
- LLVMValueRef channel[LP_MAX_VECTOR_LENGTH] = { 0 };
-
- assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH);
-
- for (k = 0; k < pixels_per_channel; ++k) {
- channel[k] = src_aos[j + TGSI_NUM_CHANNELS * k];
- }
-
- aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel);
- }
-
- lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa);
-}
-
-
-static void
fetch_vector(struct gallivm_state *gallivm,
const struct util_format_description *format_desc,
struct lp_type vs_type,
@@ -755,11 +722,10 @@ fetch_vector(struct gallivm_state *gallivm,
LLVMValueRef *inputs,
LLVMValueRef indices)
{
- LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context blduivec;
+ struct lp_type fetch_type = vs_type;
LLVMValueRef offset, valid_mask;
- LLVMValueRef aos_fetch[LP_MAX_VECTOR_WIDTH / 32];
unsigned i;
lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
@@ -783,28 +749,37 @@ fetch_vector(struct gallivm_state *gallivm,
}
/*
- * Note: we probably really want to use SoA fetch, not AoS one (albeit
- * for most formats it will amount to the same as this isn't very
- * optimized). But looks dangerous since it assumes alignment.
+ * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches.
+ * This should always produce better code.
*/
- for (i = 0; i < vs_type.length; i++) {
- LLVMValueRef offset1, elem;
- elem = lp_build_const_int32(gallivm, i);
- offset1 = LLVMBuildExtractElement(builder, offset, elem, "");
- aos_fetch[i] = lp_build_fetch_rgba_aos(gallivm, format_desc,
- lp_float32_vec4_type(),
- FALSE, map_ptr, offset1,
- zero, zero, NULL);
+ /* The type handling is annoying here... */
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
+ format_desc->channel[0].pure_integer) {
+ if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length);
+ }
+ else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+ fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length);
+ }
}
- convert_to_soa(gallivm, aos_fetch, inputs, vs_type);
+
+ lp_build_fetch_rgba_soa(gallivm, format_desc,
+ fetch_type, FALSE, map_ptr, offset,
+ blduivec.zero, blduivec.zero,
+ NULL, inputs);
for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
+ inputs[i] = LLVMBuildBitCast(builder, inputs[i],
+ lp_build_vec_type(gallivm, vs_type), "");
+ }
+
+ /* out-of-bound fetches return all zeros */
+ for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, "");
inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, "");
inputs[i] = LLVMBuildBitCast(builder, inputs[i],
lp_build_vec_type(gallivm, vs_type), "");
-
}
}
--
2.12.0