From c617fb498b2315efdccd799b8efb7a18a758fb36 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Thu, 22 Mar 2012 09:29:19 +0000 Subject: [PATCHv4] glx: Use ShmGetImage if possible. v2: Adam Jackson Fix image pitch bug. v3: Adam Jackson Rediff for 8.1 v4: Stefan BrĂ¼ns The patch handles failing XShmAttach with a special error handler, but in case of an error it called XShmDetach unconditionally and unprotected. As there is no XSync it failed later on the first call to a function causing a XSync. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=917687 Bugzilla: https://bugzilla.novell.com/show_bug.cgi?id=807205 --- NOTE: v3 has not been applied on Fedora for a while (since 9.0): http://pkgs.fedoraproject.org/cgit/mesa.git/commit/?h=f18&id=9058f5a # this fastpath is: # - broken with swrast classic # - broken on 24bpp # - not a huge win anyway # - ABI-broken wrt upstream # - eventually obsoleted by vgem # # dear ajax: fix this one way or the other #patch9 -p1 -b .shmget #patch12 -p1 -b .16bpp v4 solves an issue on openSUSE:12.2. --- src/gallium/state_trackers/dri/sw/drisw.c | 11 --- src/glx/drisw_glx.c | 114 +++++++++++++++++++++++++++++- 2 files changed, 113 insertions(+), 12 deletions(-) diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c index 41f66d5..28beb80 100644 --- a/src/gallium/state_trackers/dri/sw/drisw.c +++ b/src/gallium/state_trackers/dri/sw/drisw.c @@ -252,8 +252,6 @@ drisw_update_tex_buffer(struct dri_drawable *drawable, struct pipe_transfer *transfer; char *map; int x, y, w, h; - int ximage_stride, line; - int cpp = util_format_get_blocksize(res->format); get_drawable_info(dPriv, &x, &y, &w, &h); @@ -265,15 +263,6 @@ drisw_update_tex_buffer(struct dri_drawable *drawable, /* Copy the Drawable content to the mapped texture buffer */ get_image(dPriv, x, y, w, h, map); - /* The pipe transfer has a pitch rounded up to the nearest 64 pixels. - get_image() has a pitch rounded up to 4 bytes. */ - ximage_stride = ((w * cpp) + 3) & -4; - for (line = h-1; line; --line) { - memmove(&map[line * transfer->stride], - &map[line * ximage_stride], - ximage_stride); - } - pipe_transfer_unmap(pipe, transfer); } diff --git a/src/glx/drisw_glx.c b/src/glx/drisw_glx.c index 0583cd1..5643f15 100644 --- a/src/glx/drisw_glx.c +++ b/src/glx/drisw_glx.c @@ -24,6 +24,9 @@ #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) #include +#include +#include +#include #include "glxclient.h" #include #include "dri_common.h" @@ -206,6 +209,96 @@ swrastPutImage(__DRIdrawable * draw, int op, ximage->data = NULL; } +static int shm_error = 0; + +static int +shm_handler(Display *d, XErrorEvent *e) +{ + shm_error = 1; + return 0; +} + +static int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + +/* + * Slight fast path. Short of changing how texture memory is allocated, we + * have two options for getting the pixels out. GetImage is clamped by the + * server's write buffer size, so you end up doing lots of relatively small + * requests (128k each or so), with two memcpys: down into the kernel, and + * then back up. ShmGetImage is one big blit into the shm segment (which + * could be GPU DMA, in principle) and then another one here. + */ +static Bool +swrastShmGetImage(__DRIdrawable *read, char *data, struct drisw_drawable *prp) +{ + __GLXDRIdrawable *pread = &(prp->base); + Display *dpy = pread->psc->dpy; + XImage *ximage = prp->ximage; + unsigned long image_size = ximage->height * ximage->bytes_per_line; + Bool ret = 0; + XShmSegmentInfo seg = { 0, -1, (void *)-1, 0 }; + int (*old_handler)(Display *, XErrorEvent *); + + if (shm_error || !XShmQueryExtension(dpy)) + goto out; + + /* image setup */ + seg.shmid = shmget(IPC_PRIVATE, image_size, IPC_CREAT | 0777); + if (seg.shmid < 0) + goto out; + + seg.shmaddr = shmat(seg.shmid, NULL, 0); + if (seg.shmaddr == (void *)-1) + goto out; + + XSync(dpy, 0); + old_handler = XSetErrorHandler(shm_handler); + XShmAttach(dpy, &seg); + XSync(dpy, 0); + XSetErrorHandler(old_handler); + if (shm_error) + goto out; + + ximage->data = seg.shmaddr; + ximage->obdata = &seg; + if (!XShmGetImage(dpy, pread->xDrawable, ximage, 0, 0, -1)) + goto out; + + /* + * ShmGetImage doesn't actually pay attention to ->bytes_per_line. + * We have to compensate for this somewhere since llvmpipe's natural + * tile width is 64. Do it here so we don't have to undo it with a + * bunch of memmove in the driver. + */ + do { + int i; + char *src = ximage->data; + int dst_width = align(ximage->width * ximage->bits_per_pixel / 8, 256); + + for (i = 0; i < ximage->height; i++) { + memcpy(data, src, ximage->bytes_per_line); + data += dst_width; + src += ximage->bytes_per_line; + } + } while (0); + ret = 1; + +out: + ximage->obdata = NULL; + ximage->data = NULL; + if (shm_error == 0) + XShmDetach(dpy, &seg); + if (seg.shmaddr != (void *)-1) + shmdt(seg.shmaddr); + if (seg.shmid > -1) + shmctl(seg.shmid, IPC_RMID, NULL); + return ret; +} + static void swrastGetImage(__DRIdrawable * read, int x, int y, int w, int h, @@ -220,13 +313,32 @@ swrastGetImage(__DRIdrawable * read, readable = pread->xDrawable; ximage = prp->ximage; - ximage->data = data; ximage->width = w; ximage->height = h; ximage->bytes_per_line = bytes_per_line(w * ximage->bits_per_pixel, 32); + /* XXX check dimensions, if any caller ever sub-images */ + if (swrastShmGetImage(read, data, prp)) + return; + + /* shm failed, fall back to protocol */ + ximage->data = data; + XGetSubImage(dpy, readable, x, y, w, h, ~0L, ZPixmap, ximage, 0, 0); + do { + int dst_width = align(ximage->width * ximage->bits_per_pixel / 8, 256); + int line; + + if (dst_width != ximage->bytes_per_line) { + for (line = ximage->height-1; line; line--) { + memmove(&data[dst_width * line], + &data[ximage->bytes_per_line * line], + dst_width); + } + } + } while (0); + ximage->data = NULL; } -- 1.8.1.4