Mesa/u_mesa-8.0-llvmpipe-shmget.patch

218 lines
6.4 KiB
Diff
Raw Normal View History

From c617fb498b2315efdccd799b8efb7a18a758fb36 Mon Sep 17 00:00:00 2001
From: Adam Jackson <ajax@redhat.com>
Date: Thu, 22 Mar 2012 09:29:19 +0000
Subject: [PATCHv4] glx: Use ShmGetImage if possible.
v2: Adam Jackson <ajax@redhat.com>
Fix image pitch bug.
v3: Adam Jackson <ajax@redhat.com>
Rediff for 8.1
v4: Stefan Brüns <stefan.bruens@rwth-aachen.de>
The patch handles failing XShmAttach with a special error handler, but in case
of an error it called XShmDetach unconditionally and unprotected. As there is
no XSync it failed later on the first call to a function causing a XSync.
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=917687
Bugzilla: https://bugzilla.novell.com/show_bug.cgi?id=807205
---
NOTE:
v3 has not been applied on Fedora for a while (since 9.0):
http://pkgs.fedoraproject.org/cgit/mesa.git/commit/?h=f18&id=9058f5a
# this fastpath is:
# - broken with swrast classic
# - broken on 24bpp
# - not a huge win anyway
# - ABI-broken wrt upstream
# - eventually obsoleted by vgem
#
# dear ajax: fix this one way or the other
#patch9 -p1 -b .shmget
#patch12 -p1 -b .16bpp
v4 solves an issue on openSUSE:12.2.
---
src/gallium/state_trackers/dri/sw/drisw.c | 11 ---
src/glx/drisw_glx.c | 114 +++++++++++++++++++++++++++++-
2 files changed, 113 insertions(+), 12 deletions(-)
diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c
index 41f66d5..28beb80 100644
--- a/src/gallium/state_trackers/dri/sw/drisw.c
+++ b/src/gallium/state_trackers/dri/sw/drisw.c
@@ -252,8 +252,6 @@ drisw_update_tex_buffer(struct dri_drawable *drawable,
struct pipe_transfer *transfer;
char *map;
int x, y, w, h;
- int ximage_stride, line;
- int cpp = util_format_get_blocksize(res->format);
get_drawable_info(dPriv, &x, &y, &w, &h);
@@ -265,15 +263,6 @@ drisw_update_tex_buffer(struct dri_drawable *drawable,
/* Copy the Drawable content to the mapped texture buffer */
get_image(dPriv, x, y, w, h, map);
- /* The pipe transfer has a pitch rounded up to the nearest 64 pixels.
- get_image() has a pitch rounded up to 4 bytes. */
- ximage_stride = ((w * cpp) + 3) & -4;
- for (line = h-1; line; --line) {
- memmove(&map[line * transfer->stride],
- &map[line * ximage_stride],
- ximage_stride);
- }
-
pipe_transfer_unmap(pipe, transfer);
}
diff --git a/src/glx/drisw_glx.c b/src/glx/drisw_glx.c
index 0583cd1..5643f15 100644
--- a/src/glx/drisw_glx.c
+++ b/src/glx/drisw_glx.c
@@ -24,6 +24,9 @@
#if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL)
#include <X11/Xlib.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <X11/extensions/XShm.h>
#include "glxclient.h"
#include <dlfcn.h>
#include "dri_common.h"
@@ -206,6 +209,96 @@ swrastPutImage(__DRIdrawable * draw, int op,
ximage->data = NULL;
}
+static int shm_error = 0;
+
+static int
+shm_handler(Display *d, XErrorEvent *e)
+{
+ shm_error = 1;
+ return 0;
+}
+
+static int
+align(int value, int alignment)
+{
+ return (value + alignment - 1) & ~(alignment - 1);
+}
+
+/*
+ * Slight fast path. Short of changing how texture memory is allocated, we
+ * have two options for getting the pixels out. GetImage is clamped by the
+ * server's write buffer size, so you end up doing lots of relatively small
+ * requests (128k each or so), with two memcpys: down into the kernel, and
+ * then back up. ShmGetImage is one big blit into the shm segment (which
+ * could be GPU DMA, in principle) and then another one here.
+ */
+static Bool
+swrastShmGetImage(__DRIdrawable *read, char *data, struct drisw_drawable *prp)
+{
+ __GLXDRIdrawable *pread = &(prp->base);
+ Display *dpy = pread->psc->dpy;
+ XImage *ximage = prp->ximage;
+ unsigned long image_size = ximage->height * ximage->bytes_per_line;
+ Bool ret = 0;
+ XShmSegmentInfo seg = { 0, -1, (void *)-1, 0 };
+ int (*old_handler)(Display *, XErrorEvent *);
+
+ if (shm_error || !XShmQueryExtension(dpy))
+ goto out;
+
+ /* image setup */
+ seg.shmid = shmget(IPC_PRIVATE, image_size, IPC_CREAT | 0777);
+ if (seg.shmid < 0)
+ goto out;
+
+ seg.shmaddr = shmat(seg.shmid, NULL, 0);
+ if (seg.shmaddr == (void *)-1)
+ goto out;
+
+ XSync(dpy, 0);
+ old_handler = XSetErrorHandler(shm_handler);
+ XShmAttach(dpy, &seg);
+ XSync(dpy, 0);
+ XSetErrorHandler(old_handler);
+ if (shm_error)
+ goto out;
+
+ ximage->data = seg.shmaddr;
+ ximage->obdata = &seg;
+ if (!XShmGetImage(dpy, pread->xDrawable, ximage, 0, 0, -1))
+ goto out;
+
+ /*
+ * ShmGetImage doesn't actually pay attention to ->bytes_per_line.
+ * We have to compensate for this somewhere since llvmpipe's natural
+ * tile width is 64. Do it here so we don't have to undo it with a
+ * bunch of memmove in the driver.
+ */
+ do {
+ int i;
+ char *src = ximage->data;
+ int dst_width = align(ximage->width * ximage->bits_per_pixel / 8, 256);
+
+ for (i = 0; i < ximage->height; i++) {
+ memcpy(data, src, ximage->bytes_per_line);
+ data += dst_width;
+ src += ximage->bytes_per_line;
+ }
+ } while (0);
+ ret = 1;
+
+out:
+ ximage->obdata = NULL;
+ ximage->data = NULL;
+ if (shm_error == 0)
+ XShmDetach(dpy, &seg);
+ if (seg.shmaddr != (void *)-1)
+ shmdt(seg.shmaddr);
+ if (seg.shmid > -1)
+ shmctl(seg.shmid, IPC_RMID, NULL);
+ return ret;
+}
+
static void
swrastGetImage(__DRIdrawable * read,
int x, int y, int w, int h,
@@ -220,13 +313,32 @@ swrastGetImage(__DRIdrawable * read,
readable = pread->xDrawable;
ximage = prp->ximage;
- ximage->data = data;
ximage->width = w;
ximage->height = h;
ximage->bytes_per_line = bytes_per_line(w * ximage->bits_per_pixel, 32);
+ /* XXX check dimensions, if any caller ever sub-images */
+ if (swrastShmGetImage(read, data, prp))
+ return;
+
+ /* shm failed, fall back to protocol */
+ ximage->data = data;
+
XGetSubImage(dpy, readable, x, y, w, h, ~0L, ZPixmap, ximage, 0, 0);
+ do {
+ int dst_width = align(ximage->width * ximage->bits_per_pixel / 8, 256);
+ int line;
+
+ if (dst_width != ximage->bytes_per_line) {
+ for (line = ximage->height-1; line; line--) {
+ memmove(&data[dst_width * line],
+ &data[ximage->bytes_per_line * line],
+ dst_width);
+ }
+ }
+ } while (0);
+
ximage->data = NULL;
}
--
1.8.1.4