From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat Jun 4 19:26:26 2016 -0400
Subject: [PATCH 1/5]WIP nouveau: add locking
Patch-mainline: N/A
References: boo#997171
Signed-off-by: Max Staudt <mstaudt@suse.de>

Cherry-picked from 2e6b0e24cfb0f467e8b6d6f394730442a72dcdaf
                at https://github.com/imirkin/mesa.git

Signed-off-by: Max Staudt <mstaudt@suse.de>

Conflicts:
	src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
---
 src/gallium/drivers/nouveau/nouveau_buffer.c       | 29 +++++++++--
 src/gallium/drivers/nouveau/nouveau_fence.c        | 18 +++++++
 src/gallium/drivers/nouveau/nouveau_fence.h        |  5 +-
 src/gallium/drivers/nouveau/nouveau_screen.c       | 12 ++++-
 src/gallium/drivers/nouveau/nouveau_screen.h       |  3 ++
 src/gallium/drivers/nouveau/nv30/nv30_clear.c      | 22 +++++++--
 src/gallium/drivers/nouveau/nv30/nv30_context.c    |  6 +++
 src/gallium/drivers/nouveau/nv30/nv30_miptree.c    | 13 ++++-
 src/gallium/drivers/nouveau/nv30/nv30_query.c      |  9 +++-
 src/gallium/drivers/nouveau/nv30/nv30_vbo.c        |  5 ++
 src/gallium/drivers/nouveau/nv50/nv50_compute.c    |  4 ++
 src/gallium/drivers/nouveau/nv50/nv50_context.c    | 11 +++++
 src/gallium/drivers/nouveau/nv50/nv50_context.h    |  5 ++
 src/gallium/drivers/nouveau/nv50/nv50_query.c      | 14 +++++-
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c   | 20 ++++++--
 .../drivers/nouveau/nv50/nv50_query_hw_sm.c        |  8 ++++
 src/gallium/drivers/nouveau/nv50/nv50_surface.c    | 43 +++++++++++++++--
 src/gallium/drivers/nouveau/nv50/nv50_transfer.c   |  6 +++
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c        |  5 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_compute.c    |  6 +++
 src/gallium/drivers/nouveau/nvc0/nvc0_context.c    | 15 ++++++
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  5 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c      | 14 +++++-
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c   | 16 ++++++-
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c        |  8 ++++
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |  2 +
 src/gallium/drivers/nouveau/nvc0/nvc0_surface.c    | 49 ++++++++++++++++---
 src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c   | 56 +++++++++++++++++-----
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c        |  4 ++
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c    |  4 ++
 30 files changed, 372 insertions(+), 45 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 2db538c..b54c19b 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -380,6 +380,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
                             struct pipe_transfer **ptransfer)
 {
    struct nouveau_context *nv = nouveau_context(pipe);
+   struct nouveau_screen *screen = nv->screen;
    struct nv04_resource *buf = nv04_resource(resource);
    struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
    uint8_t *map;
@@ -427,14 +428,19 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
                buf->data = NULL;
             }
             nouveau_transfer_staging(nv, tx, false);
+            pipe_mutex_lock(screen->push_mutex);
             nouveau_transfer_read(nv, tx);
+            pipe_mutex_unlock(screen->push_mutex);
          } else {
             /* The buffer is currently idle. Create a staging area for writes,
              * and make sure that the cached data is up-to-date. */
             if (usage & PIPE_TRANSFER_WRITE)
                nouveau_transfer_staging(nv, tx, true);
-            if (!buf->data)
+            if (!buf->data) {
+               pipe_mutex_lock(screen->push_mutex);
                nouveau_buffer_cache(nv, buf);
+               pipe_mutex_unlock(screen->push_mutex);
+            }
          }
       }
       return buf->data ? (buf->data + box->x) : tx->map;
@@ -479,7 +485,9 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
       if (unlikely(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)) {
          /* Discarding was not possible, must sync because
           * subsequent transfers might use UNSYNCHRONIZED. */
+         pipe_mutex_lock(screen->push_mutex);
          nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
+         pipe_mutex_unlock(screen->push_mutex);
       } else
       if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
          /* The whole range is being discarded, so it doesn't matter what was
@@ -488,10 +496,13 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
          map = tx->map;
       } else
       if (nouveau_buffer_busy(buf, PIPE_TRANSFER_READ)) {
-         if (usage & PIPE_TRANSFER_DONTBLOCK)
+         if (usage & PIPE_TRANSFER_DONTBLOCK) {
             map = NULL;
-         else
+         } else {
+            pipe_mutex_lock(screen->push_mutex);
             nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
+            pipe_mutex_unlock(screen->push_mutex);
+         }
       } else {
          /* It is expected that the returned buffer be a representation of the
           * data in question, so we must copy it over from the buffer. */
@@ -515,9 +526,13 @@ nouveau_buffer_transfer_flush_region(struct pipe_context *pipe,
 {
    struct nouveau_transfer *tx = nouveau_transfer(transfer);
    struct nv04_resource *buf = nv04_resource(transfer->resource);
+   struct nouveau_screen *screen = nouveau_context(pipe)->screen;

-   if (tx->map)
+   if (tx->map) {
+      pipe_mutex_lock(screen->push_mutex);
       nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width);
+      pipe_mutex_unlock(screen->push_mutex);
+   }

    util_range_add(&buf->valid_buffer_range,
                   tx->base.box.x + box->x,
@@ -537,11 +552,15 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
    struct nouveau_context *nv = nouveau_context(pipe);
    struct nouveau_transfer *tx = nouveau_transfer(transfer);
    struct nv04_resource *buf = nv04_resource(transfer->resource);
+   struct nouveau_screen *screen = nouveau_context(pipe)->screen;

    if (tx->base.usage & PIPE_TRANSFER_WRITE) {
       if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
-         if (tx->map)
+         if (tx->map) {
+            pipe_mutex_lock(screen->push_mutex);
             nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+            pipe_mutex_unlock(screen->push_mutex);
+         }

          util_range_add(&buf->valid_buffer_range,
                         tx->base.box.x, tx->base.box.x + tx->base.box.width);
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c
index 691553a..9cbfc2a 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -71,12 +71,14 @@ nouveau_fence_emit(struct nouveau_fence *fence)

    ++fence->ref;

+   pipe_mutex_lock(screen->fence.list_mutex);
    if (screen->fence.tail)
       screen->fence.tail->next = fence;
    else
       screen->fence.head = fence;

    screen->fence.tail = fence;
+   pipe_mutex_unlock(screen->fence.list_mutex);

    screen->fence.emit(&screen->base, &fence->sequence);

@@ -90,6 +92,9 @@ nouveau_fence_del(struct nouveau_fence *fence)
    struct nouveau_fence *it;
    struct nouveau_screen *screen = fence->screen;

+   /* XXX This can race against fence_update. But fence_update can also call
+    * into this, so ... be have to be careful.
+    */
    if (fence->state == NOUVEAU_FENCE_STATE_EMITTED ||
        fence->state == NOUVEAU_FENCE_STATE_FLUSHED) {
       if (fence == screen->fence.head) {
@@ -123,6 +128,7 @@ nouveau_fence_update(struct nouveau_screen *screen, bool flushed)
       return;
    screen->fence.sequence_ack = sequence;

+   pipe_mutex_lock(screen->fence.list_mutex);
    for (fence = screen->fence.head; fence; fence = next) {
       next = fence->next;
       sequence = fence->sequence;
@@ -144,6 +150,7 @@ nouveau_fence_update(struct nouveau_screen *screen, bool flushed)
          if (fence->state == NOUVEAU_FENCE_STATE_EMITTED)
             fence->state = NOUVEAU_FENCE_STATE_FLUSHED;
    }
+   pipe_mutex_unlock(screen->fence.list_mutex);
 }

 #define NOUVEAU_FENCE_MAX_SPINS (1 << 31)
@@ -198,18 +205,27 @@ nouveau_fence_wait(struct nouveau_fence *fence, struct pipe_debug_callback *debu
    uint32_t spins = 0;
    int64_t start = 0;

+   /* Fast-path for the case where the fence is already signaled to avoid
+    * messing around with mutexes and timing.
+    */
+   if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED)
+      return true;
+
    if (debug && debug->debug_message)
       start = os_time_get_nano();

    if (!nouveau_fence_kick(fence))
       return false;

+   pipe_mutex_unlock(screen->push_mutex);
+
    do {
       if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
          if (debug && debug->debug_message)
             pipe_debug_message(debug, PERF_INFO,
                                "stalled %.3f ms waiting for fence",
                                (os_time_get_nano() - start) / 1000000.f);
+         pipe_mutex_lock(screen->push_mutex);
          return true;
       }
       if (!spins)
@@ -227,6 +243,8 @@ nouveau_fence_wait(struct nouveau_fence *fence, struct pipe_debug_callback *debu
                 fence->sequence,
                 screen->fence.sequence_ack, screen->fence.sequence);

+   pipe_mutex_lock(screen->push_mutex);
+
    return false;
 }

diff --git a/src/gallium/drivers/nouveau/nouveau_fence.h b/src/gallium/drivers/nouveau/nouveau_fence.h
index f10016d..98d021e 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -2,6 +2,7 @@
 #ifndef __NOUVEAU_FENCE_H__
 #define __NOUVEAU_FENCE_H__

+#include "util/u_atomic.h"
 #include "util/u_inlines.h"
 #include "util/list.h"

@@ -47,10 +48,10 @@ static inline void
 nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
 {
    if (fence)
-      ++fence->ref;
+      p_atomic_inc(&fence->ref);

    if (*ref) {
-      if (--(*ref)->ref == 0)
+      if (p_atomic_dec_zero(&(*ref)->ref))
          nouveau_fence_del(*ref);
    }

diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 2c421cc..634fdc3 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -73,10 +73,14 @@ nouveau_screen_fence_finish(struct pipe_screen *screen,
                             struct pipe_fence_handle *pfence,
                             uint64_t timeout)
 {
+   bool ret;
    if (!timeout)
       return nouveau_fence_signalled(nouveau_fence(pfence));

-   return nouveau_fence_wait(nouveau_fence(pfence), NULL);
+   pipe_mutex_lock(nouveau_screen(screen)->push_mutex);
+   ret = nouveau_fence_wait(nouveau_fence(pfence), NULL);
+   pipe_mutex_unlock(nouveau_screen(screen)->push_mutex);
+   return ret;
 }


@@ -153,6 +157,9 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
    if (nv_dbg)
       nouveau_mesa_debug = atoi(nv_dbg);

+   pipe_mutex_init(screen->push_mutex);
+   pipe_mutex_init(screen->fence.list_mutex);
+
    /* These must be set before any failure is possible, as the cleanup
     * paths assume they're responsible for deleting them.
     */
@@ -253,6 +260,9 @@ nouveau_screen_fini(struct nouveau_screen *screen)
    nouveau_device_del(&screen->device);
    nouveau_drm_del(&screen->drm);
    close(fd);
+
+   pipe_mutex_destroy(screen->push_mutex);
+   pipe_mutex_destroy(screen->fence.list_mutex);
 }

 static void
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index 28c4760..28c8620 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -3,6 +3,7 @@

 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
+#include "os/os_thread.h"

 #ifdef DEBUG
 # define NOUVEAU_ENABLE_DRIVER_STATISTICS
@@ -22,6 +23,7 @@ struct nouveau_screen {
    struct nouveau_object *channel;
    struct nouveau_client *client;
    struct nouveau_pushbuf *pushbuf;
+   pipe_mutex push_mutex;

    int refcount;

@@ -39,6 +41,7 @@ struct nouveau_screen {
       struct nouveau_fence *head;
       struct nouveau_fence *tail;
       struct nouveau_fence *current;
+      pipe_mutex list_mutex;
       u32 sequence;
       u32 sequence_ack;
       void (*emit)(struct pipe_screen *, u32 *sequence);
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_clear.c b/src/gallium/drivers/nouveau/nv30/nv30_clear.c
index c8fa38e..3a72354 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_clear.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_clear.c
@@ -58,8 +58,11 @@ nv30_clear(struct pipe_context *pipe, unsigned buffers,
    struct pipe_framebuffer_state *fb = &nv30->framebuffer;
    uint32_t colr = 0, zeta = 0, mode = 0;

-   if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, true))
+   pipe_mutex_lock(nv30->screen->base.push_mutex);
+   if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, true)) {
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
+   }

    if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
       colr  = pack_rgba(fb->cbufs[0]->format, color->f);
@@ -96,6 +99,7 @@ nv30_clear(struct pipe_context *pipe, unsigned buffers,
    PUSH_DATA (push, mode);

    nv30_state_release(nv30);
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
 }

 static void
@@ -125,11 +129,15 @@ nv30_clear_render_target(struct pipe_context *pipe, struct pipe_surface *ps,
       rt_format |= NV30_3D_RT_FORMAT_TYPE_LINEAR;
    }

+   pipe_mutex_lock(nv30->screen->base.push_mutex);
+
    refn.bo = mt->base.bo;
    refn.flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
    if (nouveau_pushbuf_space(push, 16, 1, 0) ||
-       nouveau_pushbuf_refn (push, &refn, 1))
+       nouveau_pushbuf_refn (push, &refn, 1)) {
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
+   }

    BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1);
    PUSH_DATA (push, NV30_3D_RT_ENABLE_COLOR0);
@@ -154,6 +162,8 @@ nv30_clear_render_target(struct pipe_context *pipe, struct pipe_surface *ps,
                     NV30_3D_CLEAR_BUFFERS_COLOR_B |
                     NV30_3D_CLEAR_BUFFERS_COLOR_A);

+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
+
    nv30->dirty |= NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR;
 }

@@ -189,11 +199,15 @@ nv30_clear_depth_stencil(struct pipe_context *pipe, struct pipe_surface *ps,
    if (buffers & PIPE_CLEAR_STENCIL)
       mode |= NV30_3D_CLEAR_BUFFERS_STENCIL;

+   pipe_mutex_lock(nv30->screen->base.push_mutex);
+
    refn.bo = mt->base.bo;
    refn.flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
    if (nouveau_pushbuf_space(push, 32, 1, 0) ||
-       nouveau_pushbuf_refn (push, &refn, 1))
+       nouveau_pushbuf_refn (push, &refn, 1)) {
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
+   }

    BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1);
    PUSH_DATA (push, 0);
@@ -219,6 +233,8 @@ nv30_clear_depth_stencil(struct pipe_context *pipe, struct pipe_surface *ps,
    BEGIN_NV04(push, NV30_3D(CLEAR_BUFFERS), 1);
    PUSH_DATA (push, mode);

+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
+
    nv30->dirty |= NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR;
 }

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.c b/src/gallium/drivers/nouveau/nv30/nv30_context.c
index 3ed0889..fbc4136 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_context.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_context.c
@@ -201,6 +201,8 @@ nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
    if (!nv30)
       return NULL;

+   pipe_mutex_lock(screen->base.push_mutex);
+
    nv30->screen = screen;
    nv30->base.screen = &screen->base;
    nv30->base.copy_data = nv30_transfer_copy_data;
@@ -226,6 +228,7 @@ nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
    ret = nouveau_bufctx_new(nv30->base.client, 64, &nv30->bufctx);
    if (ret) {
       nv30_context_destroy(pipe);
+      pipe_mutex_unlock(screen->base.push_mutex);
       return NULL;
    }

@@ -259,10 +262,13 @@ nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
    nv30->blitter = util_blitter_create(pipe);
    if (!nv30->blitter) {
       nv30_context_destroy(pipe);
+      pipe_mutex_unlock(screen->base.push_mutex);
       return NULL;
    }

    nouveau_context_init_vdec(&nv30->base);

+   pipe_mutex_unlock(screen->base.push_mutex);
+
    return pipe;
 }
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
index c6f6965..389bfaa 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -130,10 +130,12 @@ nv30_resource_copy_region(struct pipe_context *pipe,
    struct nv30_context *nv30 = nv30_context(pipe);
    struct nv30_rect src, dst;

+   pipe_mutex_lock(nv30->screen->base.push_mutex);
    if (dstres->target == PIPE_BUFFER && srcres->target == PIPE_BUFFER) {
       nouveau_copy_buffer(&nv30->base,
                           nv04_resource(dstres), dstx,
                           nv04_resource(srcres), src_box->x, src_box->width);
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
    }

@@ -143,6 +145,7 @@ nv30_resource_copy_region(struct pipe_context *pipe,
                        src_box->width, src_box->height, &dst);

    nv30_transfer_rect(nv30, NEAREST, &src, &dst);
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
 }

 static void
@@ -163,6 +166,7 @@ nv30_resource_resolve(struct nv30_context *nv30,
    y1 = src.y1;

    /* On nv3x we must use sifm which is restricted to 1024x1024 tiles */
+   pipe_mutex_lock(nv30->screen->base.push_mutex);
    for (y = src.y0; y < y1; y += h) {
       h = y1 - y;
       if (h > 1024)
@@ -193,6 +197,7 @@ nv30_resource_resolve(struct nv30_context *nv30,
          nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
       }
    }
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
 }

 void
@@ -308,8 +313,12 @@ nv30_miptree_transfer_map(struct pipe_context *pipe, struct pipe_resource *pt,
    tx->tmp.y1     = tx->tmp.h;
    tx->tmp.z      = 0;

-   if (usage & PIPE_TRANSFER_READ)
+   if (usage & PIPE_TRANSFER_READ) {
+      pipe_mutex_lock(nv30->screen->base.push_mutex);
       nv30_transfer_rect(nv30, NEAREST, &tx->img, &tx->tmp);
+      PUSH_KICK(nv30->base.pushbuf);
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
+   }

    if (tx->tmp.bo->map) {
       *ptransfer = &tx->base;
@@ -340,11 +349,13 @@ nv30_miptree_transfer_unmap(struct pipe_context *pipe,
    struct nv30_transfer *tx = nv30_transfer(ptx);

    if (ptx->usage & PIPE_TRANSFER_WRITE) {
+      pipe_mutex_lock(nv30->screen->base.push_mutex);
       nv30_transfer_rect(nv30, NEAREST, &tx->tmp, &tx->img);

       /* Allow the copies above to finish executing before freeing the source */
       nouveau_fence_work(nv30->screen->base.fence.current,
                          nouveau_fence_unref_bo, tx->tmp.bo);
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
    } else {
       nouveau_bo_ref(NULL, &tx->tmp.bo);
    }
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_query.c b/src/gallium/drivers/nouveau/nv30/nv30_query.c
index aa9a12f..a047e15 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_query.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_query.c
@@ -152,6 +152,7 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
    struct nv30_query *q = nv30_query(pq);
    struct nouveau_pushbuf *push = nv30->base.pushbuf;

+   pipe_mutex_lock(nv30->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_TIME_ELAPSED:
       q->qo[0] = nv30_query_object_new(nv30->screen);
@@ -161,7 +162,7 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
       }
       break;
    case PIPE_QUERY_TIMESTAMP:
-      return true;
+      break;
    default:
       BEGIN_NV04(push, NV30_3D(QUERY_RESET), 1);
       PUSH_DATA (push, q->report);
@@ -172,6 +173,7 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
       BEGIN_NV04(push, SUBC_3D(q->enable), 1);
       PUSH_DATA (push, 1);
    }
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
    return true;
 }

@@ -183,6 +185,7 @@ nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq)
    struct nv30_query *q = nv30_query(pq);
    struct nouveau_pushbuf *push = nv30->base.pushbuf;

+   pipe_mutex_lock(nv30->screen->base.push_mutex);
    q->qo[1] = nv30_query_object_new(screen);
    if (q->qo[1]) {
       BEGIN_NV04(push, NV30_3D(QUERY_GET), 1);
@@ -194,6 +197,7 @@ nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq)
       PUSH_DATA (push, 0);
    }
    PUSH_KICK (push);
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
    return true;
 }

@@ -248,9 +252,11 @@ nv40_query_render_condition(struct pipe_context *pipe,
    nv30->render_cond_mode = mode;
    nv30->render_cond_cond = condition;

+   pipe_mutex_lock(nv30->screen->base.push_mutex);
    if (!pq) {
       BEGIN_NV04(push, SUBC_3D(0x1e98), 1);
       PUSH_DATA (push, 0x01000000);
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
    }

@@ -262,6 +268,7 @@ nv40_query_render_condition(struct pipe_context *pipe,

    BEGIN_NV04(push, SUBC_3D(0x1e98), 1);
    PUSH_DATA (push, 0x02000000 | q->qo[1]->hw->start);
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
 }

 static void
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
index bc9b9a1..8e3fdee 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
@@ -563,6 +563,8 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    if (nv30->vbo_push_hint != !!nv30->vbo_fifo)
       nv30->dirty |= NV30_NEW_ARRAYS;

+   pipe_mutex_lock(nv30->screen->base.push_mutex);
+
    push->user_priv = &nv30->bufctx;
    if (nv30->vbo_user && !(nv30->dirty & (NV30_NEW_VERTEX | NV30_NEW_ARRAYS)))
       nv30_update_user_vbufs(nv30);
@@ -570,10 +572,12 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    nv30_state_validate(nv30, ~0, true);
    if (nv30->draw_flags) {
       nv30_render_vbo(pipe, info);
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
    } else
    if (nv30->vbo_fifo) {
       nv30_push_vbo(nv30, info);
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
    }

@@ -630,6 +634,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)

    nv30_state_release(nv30);
    nv30_release_user_vbufs(nv30);
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
 }

 void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
index d781f6f..3c174e4 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -249,9 +249,11 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
    struct nv50_program *cp = nv50->compprog;
    bool ret;

+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    ret = !nv50_state_validate_cp(nv50, ~0);
    if (ret) {
       NOUVEAU_ERR("Failed to launch grid !\n");
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
    }

@@ -284,6 +286,8 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
    BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
    PUSH_DATA (push, 0);

+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+
    /* bind a compute shader clobbers fragment shader state */
    nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
 }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 5af0e9b..8a148fc 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -37,7 +37,9 @@ nv50_flush(struct pipe_context *pipe,
    if (fence)
       nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);

+   pipe_mutex_lock(screen->push_mutex);
    PUSH_KICK(screen->pushbuf);
+   pipe_mutex_unlock(screen->push_mutex);

    nouveau_context_update_frame_stats(nouveau_context(pipe));
 }
@@ -47,10 +49,12 @@ nv50_texture_barrier(struct pipe_context *pipe)
 {
    struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;

+   pipe_mutex_lock(nouveau_context(pipe)->screen->push_mutex);
    BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
    PUSH_DATA (push, 0);
    BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
    PUSH_DATA (push, 0x20);
+   pipe_mutex_unlock(nouveau_context(pipe)->screen->push_mutex);
 }

 static void
@@ -107,6 +111,7 @@ nv50_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
       data_words = string_words;
    else
       data_words = string_words + !!(len & 3);
+   pipe_mutex_lock(nouveau_context(pipe)->screen->push_mutex);
    BEGIN_NI04(push, SUBC_3D(NV04_GRAPH_NOP), data_words);
    if (string_words)
       PUSH_DATAp(push, str, string_words);
@@ -115,6 +120,7 @@ nv50_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
       memcpy(&data, &str[string_words * 4], len & 3);
       PUSH_DATA (push, data);
    }
+   pipe_mutex_unlock(nouveau_context(pipe)->screen->push_mutex);
 }

 void
@@ -291,6 +297,8 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
       return NULL;
    pipe = &nv50->base.pipe;

+   pipe_mutex_lock(screen->base.push_mutex);
+
    if (!nv50_blitctx_create(nv50))
       goto out_err;

@@ -391,9 +399,12 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)

    util_dynarray_init(&nv50->global_residents);

+   pipe_mutex_unlock(screen->base.push_mutex);
+
    return pipe;

 out_err:
+   pipe_mutex_unlock(screen->base.push_mutex);
    if (nv50->bufctx_3d)
       nouveau_bufctx_del(&nv50->bufctx_3d);
    if (nv50->bufctx_cp)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 2317fa2..b7963a4 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -217,6 +217,11 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *);
 /* nv50_draw.c */
 extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);

+/* nv50_query.c */
+void nv50_render_condition(struct pipe_context *pipe,
+                           struct pipe_query *pq,
+                           boolean condition, uint mode);
+
 /* nv50_shader_state.c */
 void nv50_vertprog_validate(struct nv50_context *);
 void nv50_gmtyprog_validate(struct nv50_context *);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 9a1397a..c90e20e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -70,7 +70,7 @@ nv50_get_query_result(struct pipe_context *pipe, struct pipe_query *pq,
    return q->funcs->get_query_result(nv50_context(pipe), q, wait, result);
 }

-static void
+void
 nv50_render_condition(struct pipe_context *pipe,
                       struct pipe_query *pq,
                       boolean condition, uint mode)
@@ -145,6 +145,16 @@ nv50_render_condition(struct pipe_context *pipe,
 }

 static void
+nv50_render_condition_locked(struct pipe_context *pipe,
+                             struct pipe_query *pq,
+                             boolean condition, uint mode)
+{
+   pipe_mutex_lock(nouveau_context(pipe)->screen->push_mutex);
+   nv50_render_condition(pipe, pq, condition, mode);
+   pipe_mutex_unlock(nouveau_context(pipe)->screen->push_mutex);
+}
+
+static void
 nv50_set_active_query_state(struct pipe_context *pipe, boolean enable)
 {
 }
@@ -160,7 +170,7 @@ nv50_init_query_functions(struct nv50_context *nv50)
    pipe->end_query = nv50_end_query;
    pipe->get_query_result = nv50_get_query_result;
    pipe->set_active_query_state = nv50_set_active_query_state;
-   pipe->render_condition = nv50_render_condition;
+   pipe->render_condition = nv50_render_condition_locked;
    nv50->cond_condmode = NV50_3D_COND_MODE_ALWAYS;
 }

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index 727b509..9067bcc 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -129,6 +129,7 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
 {
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    struct nv50_hw_query *hq = nv50_hw_query(q);
+   bool ret = true;

    if (hq->funcs && hq->funcs->begin_query)
       return hq->funcs->begin_query(nv50, hq);
@@ -154,6 +155,7 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
    if (!hq->is64bit)
       hq->data[0] = hq->sequence++; /* the previously used one */

+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -193,10 +195,13 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
       break;
    default:
       assert(0);
-      return false;
+      ret = false;
+      break;
    }
-   hq->state = NV50_HW_QUERY_STATE_ACTIVE;
-   return true;
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+   if (ret)
+      hq->state = NV50_HW_QUERY_STATE_ACTIVE;
+   return ret;
 }

 static void
@@ -212,6 +217,7 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)

    hq->state = NV50_HW_QUERY_STATE_ENDED;

+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -264,6 +270,7 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
       assert(0);
       break;
    }
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
    if (hq->is64bit)
       nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence);
 }
@@ -286,16 +293,21 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
       nv50_hw_query_update(q);

    if (hq->state != NV50_HW_QUERY_STATE_READY) {
+      pipe_mutex_lock(nv50->screen->base.push_mutex);
       if (!wait) {
          /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
          if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) {
             hq->state = NV50_HW_QUERY_STATE_FLUSHED;
             PUSH_KICK(nv50->base.pushbuf);
          }
+         pipe_mutex_unlock(nv50->screen->base.push_mutex);
          return false;
       }
-      if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
+      if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) {
+         pipe_mutex_unlock(nv50->screen->base.push_mutex);
          return false;
+      }
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
    }
    hq->state = NV50_HW_QUERY_STATE_READY;

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
index bcfba9f..31445eb 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -176,6 +176,7 @@ nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
       return false;
    }

+   pipe_mutex_lock(screen->base.push_mutex);
    assert(cfg->num_counters <= 4);
    PUSH_SPACE(push, 4 * 4);

@@ -208,6 +209,7 @@ nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
       BEGIN_NV04(push, NV50_CP(MP_PM_SET(c)), 1);
       PUSH_DATA (push, 0);
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
    return true;
 }

@@ -237,6 +239,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
       screen->pm.prog = prog;
    }

+   pipe_mutex_lock(screen->base.push_mutex);
    /* disable all counting */
    PUSH_SPACE(push, 8);
    for (c = 0; c < 4; c++) {
@@ -260,6 +263,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
    PUSH_SPACE(push, 2);
    BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
    PUSH_DATA (push, 0);
+   pipe_mutex_unlock(screen->base.push_mutex);

    pipe->bind_compute_state(pipe, screen->pm.prog);
    input[0] = hq->bo->offset + hq->base_offset;
@@ -276,6 +280,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)

    nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY);

+   pipe_mutex_lock(screen->base.push_mutex);
    /* re-active other counters */
    PUSH_SPACE(push, 8);
    mask = 0;
@@ -302,6 +307,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
                     | cfg->ctr[i].unit | cfg->ctr[i].mode);
       }
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
 }

 static inline bool
@@ -343,7 +349,9 @@ nv50_hw_sm_get_query_result(struct nv50_context *nv50, struct nv50_hw_query *hq,

    cfg = nv50_hw_sm_query_get_cfg(nv50, hq);

+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    ret = nv50_hw_sm_query_read_data(count, nv50, wait, hq, cfg, mp_count);
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    if (!ret)
       return false;

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 61dec3f..d6b9de0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -204,10 +204,13 @@ nv50_resource_copy_region(struct pipe_context *pipe,
    bool m2mf;
    unsigned dst_layer = dstz, src_layer = src_box->z;

+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
       nouveau_copy_buffer(&nv50->base,
                           nv04_resource(dst), dstx,
                           nv04_resource(src), src_box->x, src_box->width);
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
    }

@@ -247,6 +250,7 @@ nv50_resource_copy_region(struct pipe_context *pipe,
          else
             srect.base += src_mt->layer_stride;
       }
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
    }

@@ -270,6 +274,7 @@ nv50_resource_copy_region(struct pipe_context *pipe,
          break;
    }
    nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D);
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
 }

 static void
@@ -288,14 +293,18 @@ nv50_clear_render_target(struct pipe_context *pipe,

    assert(dst->texture->target != PIPE_BUFFER);

+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
    PUSH_DATAf(push, color->f[0]);
    PUSH_DATAf(push, color->f[1]);
    PUSH_DATAf(push, color->f[2]);
    PUSH_DATAf(push, color->f[3]);

-   if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
+   if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0)) {
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
+   }

    PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);

@@ -353,6 +362,8 @@ nv50_clear_render_target(struct pipe_context *pipe,
    BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
    PUSH_DATA (push, nv50->cond_condmode);

+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+
    nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
 }

@@ -376,6 +387,8 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
    assert(dst->texture->target != PIPE_BUFFER);
    assert(nouveau_bo_memtype(bo)); /* ZETA cannot be linear */

+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    if (clear_flags & PIPE_CLEAR_DEPTH) {
       BEGIN_NV04(push, NV50_3D(CLEAR_DEPTH), 1);
       PUSH_DATAf(push, depth);
@@ -388,8 +401,10 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
       mode |= NV50_3D_CLEAR_BUFFERS_S;
    }

-   if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
+   if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0)) {
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
+   }

    PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);

@@ -436,6 +451,8 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
    BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
    PUSH_DATA (push, nv50->cond_condmode);

+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+
    nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
 }

@@ -524,9 +541,12 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
    unsigned i, j, k;
    uint32_t mode = 0;

+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
-   if (!nv50_state_validate_3d(nv50, NV50_NEW_3D_FRAMEBUFFER))
+   if (!nv50_state_validate_3d(nv50, NV50_NEW_3D_FRAMEBUFFER)) {
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
+   }

    /* We have to clear ALL of the layers, not up to the min number of layers
     * of any attachment. */
@@ -592,6 +612,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
    /* restore the array mode */
    BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
    PUSH_DATA (push, nv50->rt_array_mode);
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
 }

 static void
@@ -719,14 +740,18 @@ nv50_clear_buffer(struct pipe_context *pipe,

    assert(size % data_size == 0);

+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    if (offset & 0xff) {
       unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
       assert(fixup_size % data_size == 0);
       nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
       offset += fixup_size;
       size -= fixup_size;
-      if (!size)
+      if (!size) {
+         pipe_mutex_unlock(nv50->screen->base.push_mutex);
          return;
+      }
    }

    elements = size / data_size;
@@ -742,8 +767,10 @@ nv50_clear_buffer(struct pipe_context *pipe,
    PUSH_DATAf(push, color.f[2]);
    PUSH_DATAf(push, color.f[3]);

-   if (nouveau_pushbuf_space(push, 32, 1, 0))
+   if (nouveau_pushbuf_space(push, 32, 1, 0)) {
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
+   }

    PUSH_REFN(push, buf->bo, buf->domain | NOUVEAU_BO_WR);

@@ -798,6 +825,8 @@ nv50_clear_buffer(struct pipe_context *pipe,
                              data, data_size);
    }

+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+
    nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
 }

@@ -1700,6 +1729,8 @@ nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
         info->src.box.height != -info->dst.box.height))
       eng3d = true;

+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    if (nv50->screen->num_occlusion_queries_active) {
       BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
       PUSH_DATA (push, 0);
@@ -1714,6 +1745,8 @@ nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
       BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
       PUSH_DATA (push, 1);
    }
+
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
 }

 static void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
index 86a8c15..f5c7c57 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -304,6 +304,7 @@ nv50_miptree_transfer_map(struct pipe_context *pctx,
       unsigned base = tx->rect[0].base;
       unsigned z = tx->rect[0].z;
       unsigned i;
+      pipe_mutex_lock(nv50->screen->base.push_mutex);
       for (i = 0; i < box->depth; ++i) {
          nv50_m2mf_transfer_rect(nv50, &tx->rect[1], &tx->rect[0],
                                  tx->nblocksx, tx->nblocksy);
@@ -313,6 +314,9 @@ nv50_miptree_transfer_map(struct pipe_context *pctx,
             tx->rect[0].base += mt->layer_stride;
          tx->rect[1].base += size;
       }
+      /* Kick these reads out so we don't have to reacquire a lock below */
+      PUSH_KICK(nv50->base.pushbuf);
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       tx->rect[0].z = z;
       tx->rect[0].base = base;
       tx->rect[1].base = 0;
@@ -349,6 +353,7 @@ nv50_miptree_transfer_unmap(struct pipe_context *pctx,
    unsigned i;

    if (tx->base.usage & PIPE_TRANSFER_WRITE) {
+      pipe_mutex_lock(nv50->screen->base.push_mutex);
       for (i = 0; i < tx->base.box.depth; ++i) {
          nv50_m2mf_transfer_rect(nv50, &tx->rect[0], &tx->rect[1],
                                  tx->nblocksx, tx->nblocksy);
@@ -362,6 +367,7 @@ nv50_miptree_transfer_unmap(struct pipe_context *pctx,
       /* Allow the copies above to finish executing before freeing the source */
       nouveau_fence_work(nv50->screen->base.fence.current,
                          nouveau_fence_unref_bo, tx->rect[1].bo);
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
    } else {
       nouveau_bo_ref(NULL, &tx->rect[1].bo);
    }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index a11cdf8..f73329c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -767,6 +767,8 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    bool tex_dirty = false;
    int s;

+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
    nv50->vb_elt_first = info->min_index + info->index_bias;
    nv50->vb_elt_limit = info->max_index - info->min_index;
@@ -827,6 +829,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       nv50_push_vbo(nv50, info);
       push->kick_notify = nv50_default_kick_notify;
       nouveau_pushbuf_bufctx(push, NULL);
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
    }

@@ -886,4 +889,6 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    nv50_release_user_vbufs(nv50);

    nouveau_pushbuf_bufctx(push, NULL);
+
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 59bbe1e..7511819 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -388,13 +388,17 @@ void
 nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
 {
    struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nvc0_screen *screen = nvc0->screen;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nvc0_program *cp = nvc0->compprog;
    int ret;

+   pipe_mutex_lock(screen->base.push_mutex);
+
    ret = !nvc0_state_validate_cp(nvc0, ~0);
    if (ret) {
       NOUVEAU_ERR("Failed to launch grid !\n");
+      pipe_mutex_unlock(screen->base.push_mutex);
       return;
    }

@@ -462,4 +466,6 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
    nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
    nvc0->images_dirty[5] |= nvc0->images_valid[5];
+
+   pipe_mutex_unlock(screen->base.push_mutex);
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 1c5f954..12d5b0e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -38,7 +38,9 @@ nvc0_flush(struct pipe_context *pipe,
    if (fence)
       nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);

+   pipe_mutex_lock(screen->push_mutex);
    PUSH_KICK(nvc0->base.pushbuf); /* fencing handled in kick_notify */
+   pipe_mutex_unlock(screen->push_mutex);

    nouveau_context_update_frame_stats(&nvc0->base);
 }
@@ -48,8 +50,10 @@ nvc0_texture_barrier(struct pipe_context *pipe)
 {
    struct nouveau_pushbuf *push = nvc0_context(pipe)->base.pushbuf;

+   pipe_mutex_lock(nvc0_context(pipe)->screen->base.push_mutex);
    IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
    IMMED_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 0);
+   pipe_mutex_unlock(nvc0_context(pipe)->screen->base.push_mutex);
 }

 static void
@@ -59,6 +63,8 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    int i, s;

+   pipe_mutex_lock(nvc0_context(pipe)->screen->base.push_mutex);
+
    if (flags & PIPE_BARRIER_MAPPED_BUFFER) {
       for (i = 0; i < nvc0->num_vtxbufs; ++i) {
          if (!nvc0->vtxbuf[i].buffer)
@@ -108,6 +114,8 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
       nvc0->cb_dirty = true;
    if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_INDEX_BUFFER))
       nvc0->base.vbo_dirty = true;
+
+   pipe_mutex_unlock(nvc0_context(pipe)->screen->base.push_mutex);
 }

 static void
@@ -124,6 +132,7 @@ nvc0_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
       data_words = string_words;
    else
       data_words = string_words + !!(len & 3);
+   pipe_mutex_lock(nvc0_context(pipe)->screen->base.push_mutex);
    BEGIN_NIC0(push, SUBC_3D(NV04_GRAPH_NOP), data_words);
    if (string_words)
       PUSH_DATAp(push, str, string_words);
@@ -132,6 +141,7 @@ nvc0_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
       memcpy(&data, &str[string_words * 4], len & 3);
       PUSH_DATA (push, data);
    }
+   pipe_mutex_unlock(nvc0_context(pipe)->screen->base.push_mutex);
 }

 static void
@@ -362,6 +372,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
       return NULL;
    pipe = &nvc0->base.pipe;

+   pipe_mutex_lock(screen->base.push_mutex);
+
    if (!nvc0_blitctx_create(nvc0))
       goto out_err;

@@ -465,9 +477,12 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)

    util_dynarray_init(&nvc0->global_residents);

+   pipe_mutex_unlock(screen->base.push_mutex);
+
    return pipe;

 out_err:
+   pipe_mutex_unlock(screen->base.push_mutex);
    if (nvc0) {
       if (nvc0->bufctx_3d)
          nouveau_bufctx_del(&nvc0->bufctx_3d);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 6e1548d..ff5467c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -293,6 +293,11 @@ uint32_t nvc0_program_symbol_offset(const struct nvc0_program *,
                                     uint32_t label);
 void nvc0_program_init_tcp_empty(struct nvc0_context *);

+/* nvc0_query.c */
+void nvc0_render_condition(struct pipe_context *pipe,
+                           struct pipe_query *pq,
+                           boolean condition, uint mode);
+
 /* nvc0_shader_state.c */
 void nvc0_vertprog_validate(struct nvc0_context *);
 void nvc0_tctlprog_validate(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index 91fb72f..55d1dc1 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -92,7 +92,7 @@ nvc0_get_query_result_resource(struct pipe_context *pipe,
                                        index, resource, offset);
 }

-static void
+void
 nvc0_render_condition(struct pipe_context *pipe,
                       struct pipe_query *pq,
                       boolean condition, uint mode)
@@ -161,6 +161,16 @@ nvc0_render_condition(struct pipe_context *pipe,
    PUSH_DATA (push, hq->bo->offset + hq->offset);
 }

+static void
+nvc0_render_condition_locked(struct pipe_context *pipe,
+                             struct pipe_query *pq,
+                             boolean condition, uint mode)
+{
+   pipe_mutex_lock(nouveau_context(pipe)->screen->push_mutex);
+   nvc0_render_condition(pipe, pq, condition, mode);
+   pipe_mutex_unlock(nouveau_context(pipe)->screen->push_mutex);
+}
+
 int
 nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
                                   unsigned id,
@@ -272,6 +282,6 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
    pipe->get_query_result = nvc0_get_query_result;
    pipe->get_query_result_resource = nvc0_get_query_result_resource;
    pipe->set_active_query_state = nvc0_set_active_query_state;
-   pipe->render_condition = nvc0_render_condition;
+   pipe->render_condition = nvc0_render_condition_locked;
    nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index 4c34593..f2584cb 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -154,6 +154,7 @@ nvc0_hw_begin_query(struct nvc0_context *nvc0, struct nvc0_query *q)
    }
    hq->sequence++;

+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -198,6 +199,7 @@ nvc0_hw_begin_query(struct nvc0_context *nvc0, struct nvc0_query *q)
    default:
       break;
    }
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
    hq->state = NVC0_HW_QUERY_STATE_ACTIVE;
    return ret;
 }
@@ -221,6 +223,7 @@ nvc0_hw_end_query(struct nvc0_context *nvc0, struct nvc0_query *q)
    }
    hq->state = NVC0_HW_QUERY_STATE_ENDED;

+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -276,6 +279,7 @@ nvc0_hw_end_query(struct nvc0_context *nvc0, struct nvc0_query *q)
    default:
       break;
    }
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
    if (hq->is64bit)
       nouveau_fence_ref(nvc0->screen->base.fence.current, &hq->fence);
 }
@@ -298,16 +302,21 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
       nvc0_hw_query_update(nvc0->screen->base.client, q);

    if (hq->state != NVC0_HW_QUERY_STATE_READY) {
+      pipe_mutex_lock(nvc0->screen->base.push_mutex);
       if (!wait) {
          if (hq->state != NVC0_HW_QUERY_STATE_FLUSHED) {
             hq->state = NVC0_HW_QUERY_STATE_FLUSHED;
             /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
             PUSH_KICK(nvc0->base.pushbuf);
          }
+         pipe_mutex_unlock(nvc0->screen->base.push_mutex);
          return false;
       }
-      if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
+      if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nvc0->screen->base.client)) {
+         pipe_mutex_unlock(nvc0->screen->base.push_mutex);
          return false;
+      }
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
    }
    hq->state = NVC0_HW_QUERY_STATE_READY;
@@ -374,6 +383,8 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,

    assert(!hq->funcs || !hq->funcs->get_query_result);

+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    if (index == -1) {
       /* TODO: Use a macro to write the availability of the query */
       if (hq->state != NVC0_HW_QUERY_STATE_READY)
@@ -382,6 +393,7 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
       nvc0->base.push_cb(&nvc0->base, buf, offset,
                          result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1,
                          ready);
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
    }

@@ -469,6 +481,8 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
                            4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
    }

+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
+
    if (buf->mm) {
       nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
       nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 27cbbc4..d416e00 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -1662,6 +1662,7 @@ nve4_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
       return false;
    }

+   pipe_mutex_lock(screen->base.push_mutex);
    assert(cfg->num_counters <= 4);
    PUSH_SPACE(push, 4 * 8 * + 6);

@@ -1710,6 +1711,7 @@ nve4_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
      BEGIN_NVC0(push, NVE4_CP(MP_PM_SET(c)), 1);
      PUSH_DATA (push, 0);
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
    return true;
 }

@@ -1733,6 +1735,7 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
       return false;
    }

+   pipe_mutex_lock(screen->base.push_mutex);
    assert(cfg->num_counters <= 8);
    PUSH_SPACE(push, 8 * 8 + 2);

@@ -1779,6 +1782,7 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
       BEGIN_NVC0(push, NVC0_CP(MP_PM_SET(c)), 1);
       PUSH_DATA (push, 0);
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
    return true;
 }

@@ -1866,6 +1870,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
    if (unlikely(!screen->pm.prog))
       screen->pm.prog = nvc0_hw_sm_get_program(screen);

+   pipe_mutex_lock(screen->base.push_mutex);
    /* disable all counting */
    PUSH_SPACE(push, 8);
    for (c = 0; c < 8; ++c)
@@ -1893,6 +1898,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)

    /* upload input data for the compute shader which reads MP counters */
    nvc0_hw_sm_upload_input(nvc0, hq);
+   pipe_mutex_unlock(screen->base.push_mutex);

    pipe->bind_compute_state(pipe, screen->pm.prog);
    for (i = 0; i < 3; i++) {
@@ -1906,6 +1912,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)

    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);

+   pipe_mutex_lock(screen->base.push_mutex);
    /* re-activate other counters */
    PUSH_SPACE(push, 16);
    mask = 0;
@@ -1930,6 +1937,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
          PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
       }
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
 }

 static inline bool
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 6541241..1d280d8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -491,7 +491,9 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
        * _current_ one, and remove both.
        */
       nouveau_fence_ref(screen->base.fence.current, &current);
+      pipe_mutex_lock(screen->base.push_mutex);
       nouveau_fence_wait(current, NULL);
+      pipe_mutex_unlock(screen->base.push_mutex);
       nouveau_fence_ref(NULL, &current);
       nouveau_fence_ref(NULL, &screen->base.fence.current);
    }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index a177569..67bd183 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -206,11 +206,14 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
    bool m2mf;
    unsigned dst_layer = dstz, src_layer = src_box->z;

+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
       nouveau_copy_buffer(&nvc0->base,
                           nv04_resource(dst), dstx,
                           nv04_resource(src), src_box->x, src_box->width);
       NOUVEAU_DRV_STAT(&nvc0->screen->base, buf_copy_bytes, src_box->width);
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
    }
    NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_copy_count, 1);
@@ -251,6 +254,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
          else
             srect.base += src_mt->layer_stride;
       }
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
    }

@@ -273,6 +277,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
          break;
    }
    nouveau_bufctx_reset(nvc0->bufctx, 0);
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }

 static void
@@ -290,8 +295,12 @@ nvc0_clear_render_target(struct pipe_context *pipe,

    assert(dst->texture->target != PIPE_BUFFER);

-   if (!PUSH_SPACE(push, 32 + sf->depth))
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
+   if (!PUSH_SPACE(push, 32 + sf->depth)) {
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
+   }

    PUSH_REFN (push, res->bo, res->domain | NOUVEAU_BO_WR);

@@ -354,6 +363,8 @@ nvc0_clear_render_target(struct pipe_context *pipe,
    IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);

    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }

 static void
@@ -539,8 +550,11 @@ nvc0_clear_buffer(struct pipe_context *pipe,

    assert(size % data_size == 0);

+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    if (data_size == 12) {
       nvc0_clear_buffer_push(pipe, res, offset, size, data, data_size);
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
    }

@@ -550,8 +564,10 @@ nvc0_clear_buffer(struct pipe_context *pipe,
       nvc0_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
       offset += fixup_size;
       size -= fixup_size;
-      if (!size)
+      if (!size) {
+         pipe_mutex_unlock(nvc0->screen->base.push_mutex);
          return;
+      }
    }

    elements = size / data_size;
@@ -561,8 +577,10 @@ nvc0_clear_buffer(struct pipe_context *pipe,
       width &= ~0xff;
    assert(width > 0);

-   if (!PUSH_SPACE(push, 40))
+   if (!PUSH_SPACE(push, 40)) {
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
+   }

    PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);

@@ -610,6 +628,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
    }

    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }

 static void
@@ -631,8 +651,11 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,

    assert(dst->texture->target != PIPE_BUFFER);

-   if (!PUSH_SPACE(push, 32 + sf->depth))
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+   if (!PUSH_SPACE(push, 32 + sf->depth)) {
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
+   }

    PUSH_REFN (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR);

@@ -679,6 +702,8 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
    IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);

    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }

 void
@@ -692,9 +717,13 @@ nvc0_clear(struct pipe_context *pipe, unsigned buffers,
    unsigned i, j, k;
    uint32_t mode = 0;

+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
-   if (!nvc0_state_validate_3d(nvc0, NVC0_NEW_3D_FRAMEBUFFER))
+   if (!nvc0_state_validate_3d(nvc0, NVC0_NEW_3D_FRAMEBUFFER)) {
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
+   }

    if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
       BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
@@ -753,6 +782,8 @@ nvc0_clear(struct pipe_context *pipe, unsigned buffers,
                     (j << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
       }
    }
+
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }


@@ -1146,8 +1177,8 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
    nvc0->samplers_dirty[4] |= 3;

    if (nvc0->cond_query && !blit->render_condition_enable)
-      nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query,
-                                       nvc0->cond_cond, nvc0->cond_mode);
+      nvc0_render_condition(&nvc0->base.pipe, nvc0->cond_query,
+                            nvc0->cond_cond, nvc0->cond_mode);

    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP);
    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
@@ -1606,6 +1637,8 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
         info->src.box.height != -info->dst.box.height))
       eng3d = true;

+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    if (nvc0->screen->num_occlusion_queries_active)
       IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);

@@ -1617,6 +1650,8 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
    if (nvc0->screen->num_occlusion_queries_active)
       IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);

+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
+
    NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_blit_count, 1);
 }

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
index 14fb53c..6cb39a9 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -342,16 +342,18 @@ nvc0_mt_sync(struct nvc0_context *nvc0, struct nv50_miptree *mt, unsigned usage)
    return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr, &nvc0->base.debug);
 }

-void *
-nvc0_miptree_transfer_map(struct pipe_context *pctx,
-                          struct pipe_resource *res,
-                          unsigned level,
-                          unsigned usage,
-                          const struct pipe_box *box,
-                          struct pipe_transfer **ptransfer)
+static void *
+nvc0_miptree_transfer_map_unlocked(
+      struct pipe_context *pctx,
+      struct pipe_resource *res,
+      unsigned level,
+      unsigned usage,
+      const struct pipe_box *box,
+      struct pipe_transfer **ptransfer)
 {
    struct nvc0_context *nvc0 = nvc0_context(pctx);
-   struct nouveau_device *dev = nvc0->screen->base.device;
+   struct nvc0_screen *screen = nvc0->screen;
+   struct nouveau_device *dev = screen->base.device;
    struct nv50_miptree *mt = nv50_miptree(res);
    struct nvc0_transfer *tx;
    uint32_t size;
@@ -465,9 +467,29 @@ nvc0_miptree_transfer_map(struct pipe_context *pctx,
    return tx->rect[1].bo->map;
 }

-void
-nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
-                            struct pipe_transfer *transfer)
+void *
+nvc0_miptree_transfer_map(
+      struct pipe_context *pctx,
+      struct pipe_resource *res,
+      unsigned level,
+      unsigned usage,
+      const struct pipe_box *box,
+      struct pipe_transfer **ptransfer)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pctx);
+   struct nvc0_screen *screen = nvc0->screen;
+
+   pipe_mutex_lock(screen->base.push_mutex);
+   void *ret = nvc0_miptree_transfer_map_unlocked(
+         pctx, res, level, usage, box, ptransfer);
+   pipe_mutex_unlock(screen->base.push_mutex);
+
+   return ret;
+}
+
+static void
+nvc0_miptree_transfer_unmap_unlocked(struct pipe_context *pctx,
+                                     struct pipe_transfer *transfer)
 {
    struct nvc0_context *nvc0 = nvc0_context(pctx);
    struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
@@ -507,6 +529,18 @@ nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
    FREE(tx);
 }

+void
+nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
+                            struct pipe_transfer *transfer)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pctx);
+   struct nvc0_screen *screen = nvc0->screen;
+
+   pipe_mutex_lock(screen->base.push_mutex);
+   nvc0_miptree_transfer_unmap_unlocked(pctx, transfer);
+   pipe_mutex_unlock(screen->base.push_mutex);
+}
+
 /* This happens rather often with DTD9/st. */
 static void
 nvc0_cb_push(struct nouveau_context *nv,
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 94274bc..3d20c68 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -940,6 +940,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    struct nvc0_screen *screen = nvc0->screen;
    int s;

+   pipe_mutex_lock(screen->base.push_mutex);
+
    /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
    nvc0->vb_elt_first = info->min_index + info->index_bias;
    nvc0->vb_elt_limit = info->max_index - info->min_index;
@@ -1033,6 +1035,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       nvc0_push_vbo(nvc0, info);
       push->kick_notify = nvc0_default_kick_notify;
       nouveau_pushbuf_bufctx(push, NULL);
+      pipe_mutex_unlock(screen->base.push_mutex);
       return;
    }

@@ -1085,4 +1088,5 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    nvc0_release_user_vbufs(nvc0);

    nouveau_pushbuf_bufctx(push, NULL);
+   pipe_mutex_unlock(screen->base.push_mutex);
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index f2e608d..d172d73 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -535,12 +535,15 @@ void
 nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
 {
    struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nvc0_screen *screen = nvc0->screen;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nve4_cp_launch_desc *desc;
    uint64_t desc_gpuaddr;
    struct nouveau_bo *desc_bo;
    int ret;

+   pipe_mutex_lock(screen->base.push_mutex);
+
    desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr);
    if (!desc) {
       ret = -1;
@@ -621,6 +624,7 @@ out:
       NOUVEAU_ERR("Failed to launch grid !\n");
    nouveau_scratch_done(&nvc0->base);
    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC);
+   pipe_mutex_unlock(screen->base.push_mutex);
 }