Add flags to io_writev and introduce io_flush as optional callback to QIOChannelClass, allowing the implementation of zero copy writes by subclasses. How to use them: - Write data using qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY), - Wait write completion with qio_channel_flush(). Notes: As some zero copy write implementations work asynchronously, it's recommended to keep the write buffer untouched until the return of qio_channel_flush(), to avoid the risk of sending an updated buffer instead of the buffer state during write. As io_flush callback is optional, if a subclass does not implement it, then: - io_flush will return 0 without changing anything. Also, some functions like qio_channel_writev_full_all() were adapted to receive a flag parameter. That allows shared code between zero copy and non-zero copy writev, and also an easier implementation on new flags. Signed-off-by: Leonardo Bras <leobras@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Peter Xu <peterx@redhat.com> Reviewed-by: Juan Quintela <quintela@redhat.com> Message-Id: <20220513062836.965425-3-leobras@redhat.com> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
		
			
				
	
	
		
			264 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			264 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Communication channel between QEMU and remote device process
 | |
|  *
 | |
|  * Copyright © 2018, 2021 Oracle and/or its affiliates.
 | |
|  *
 | |
|  * This work is licensed under the terms of the GNU GPL, version 2 or later.
 | |
|  * See the COPYING file in the top-level directory.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| #include "qemu/osdep.h"
 | |
| 
 | |
| #include "qemu/module.h"
 | |
| #include "hw/remote/mpqemu-link.h"
 | |
| #include "qapi/error.h"
 | |
| #include "qemu/iov.h"
 | |
| #include "qemu/error-report.h"
 | |
| #include "qemu/main-loop.h"
 | |
| #include "io/channel.h"
 | |
| #include "sysemu/iothread.h"
 | |
| #include "trace.h"
 | |
| 
 | |
| /*
 | |
|  * Send message over the ioc QIOChannel.
 | |
|  * This function is safe to call from:
 | |
|  * - main loop in co-routine context. Will block the main loop if not in
 | |
|  *   co-routine context;
 | |
|  * - vCPU thread with no co-routine context and if the channel is not part
 | |
|  *   of the main loop handling;
 | |
|  * - IOThread within co-routine context, outside of co-routine context
 | |
|  *   will block IOThread;
 | |
|  * Returns true if no errors were encountered, false otherwise.
 | |
|  */
 | |
| bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp)
 | |
| {
 | |
|     bool iolock = qemu_mutex_iothread_locked();
 | |
|     bool iothread = qemu_in_iothread();
 | |
|     struct iovec send[2] = {};
 | |
|     int *fds = NULL;
 | |
|     size_t nfds = 0;
 | |
|     bool ret = false;
 | |
| 
 | |
|     send[0].iov_base = msg;
 | |
|     send[0].iov_len = MPQEMU_MSG_HDR_SIZE;
 | |
| 
 | |
|     send[1].iov_base = (void *)&msg->data;
 | |
|     send[1].iov_len = msg->size;
 | |
| 
 | |
|     if (msg->num_fds) {
 | |
|         nfds = msg->num_fds;
 | |
|         fds = msg->fds;
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * Dont use in IOThread out of co-routine context as
 | |
|      * it will block IOThread.
 | |
|      */
 | |
|     assert(qemu_in_coroutine() || !iothread);
 | |
| 
 | |
|     /*
 | |
|      * Skip unlocking/locking iothread lock when the IOThread is running
 | |
|      * in co-routine context. Co-routine context is asserted above
 | |
|      * for IOThread case.
 | |
|      * Also skip lock handling while in a co-routine in the main context.
 | |
|      */
 | |
|     if (iolock && !iothread && !qemu_in_coroutine()) {
 | |
|         qemu_mutex_unlock_iothread();
 | |
|     }
 | |
| 
 | |
|     if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send),
 | |
|                                     fds, nfds, 0, errp)) {
 | |
|         ret = true;
 | |
|     } else {
 | |
|         trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds);
 | |
|     }
 | |
| 
 | |
|     if (iolock && !iothread && !qemu_in_coroutine()) {
 | |
|         /* See above comment why skip locking here. */
 | |
|         qemu_mutex_lock_iothread();
 | |
|     }
 | |
| 
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Read message from the ioc QIOChannel.
 | |
|  * This function is safe to call from:
 | |
|  * - From main loop in co-routine context. Will block the main loop if not in
 | |
|  *   co-routine context;
 | |
|  * - From vCPU thread with no co-routine context and if the channel is not part
 | |
|  *   of the main loop handling;
 | |
|  * - From IOThread within co-routine context, outside of co-routine context
 | |
|  *   will block IOThread;
 | |
|  */
 | |
| static ssize_t mpqemu_read(QIOChannel *ioc, void *buf, size_t len, int **fds,
 | |
|                            size_t *nfds, Error **errp)
 | |
| {
 | |
|     struct iovec iov = { .iov_base = buf, .iov_len = len };
 | |
|     bool iolock = qemu_mutex_iothread_locked();
 | |
|     bool iothread = qemu_in_iothread();
 | |
|     int ret = -1;
 | |
| 
 | |
|     /*
 | |
|      * Dont use in IOThread out of co-routine context as
 | |
|      * it will block IOThread.
 | |
|      */
 | |
|     assert(qemu_in_coroutine() || !iothread);
 | |
| 
 | |
|     if (iolock && !iothread && !qemu_in_coroutine()) {
 | |
|         qemu_mutex_unlock_iothread();
 | |
|     }
 | |
| 
 | |
|     ret = qio_channel_readv_full_all_eof(ioc, &iov, 1, fds, nfds, errp);
 | |
| 
 | |
|     if (iolock && !iothread && !qemu_in_coroutine()) {
 | |
|         qemu_mutex_lock_iothread();
 | |
|     }
 | |
| 
 | |
|     return (ret <= 0) ? ret : iov.iov_len;
 | |
| }
 | |
| 
 | |
| bool mpqemu_msg_recv(MPQemuMsg *msg, QIOChannel *ioc, Error **errp)
 | |
| {
 | |
|     ERRP_GUARD();
 | |
|     g_autofree int *fds = NULL;
 | |
|     size_t nfds = 0;
 | |
|     ssize_t len;
 | |
|     bool ret = false;
 | |
| 
 | |
|     len = mpqemu_read(ioc, msg, MPQEMU_MSG_HDR_SIZE, &fds, &nfds, errp);
 | |
|     if (len <= 0) {
 | |
|         goto fail;
 | |
|     } else if (len != MPQEMU_MSG_HDR_SIZE) {
 | |
|         error_setg(errp, "Message header corrupted");
 | |
|         goto fail;
 | |
|     }
 | |
| 
 | |
|     if (msg->size > sizeof(msg->data)) {
 | |
|         error_setg(errp, "Invalid size for message");
 | |
|         goto fail;
 | |
|     }
 | |
| 
 | |
|     if (!msg->size) {
 | |
|         goto copy_fds;
 | |
|     }
 | |
| 
 | |
|     len = mpqemu_read(ioc, &msg->data, msg->size, NULL, NULL, errp);
 | |
|     if (len <= 0) {
 | |
|         goto fail;
 | |
|     }
 | |
|     if (len != msg->size) {
 | |
|         error_setg(errp, "Unable to read full message");
 | |
|         goto fail;
 | |
|     }
 | |
| 
 | |
| copy_fds:
 | |
|     msg->num_fds = nfds;
 | |
|     if (nfds > G_N_ELEMENTS(msg->fds)) {
 | |
|         error_setg(errp,
 | |
|                    "Overflow error: received %zu fds, more than max of %d fds",
 | |
|                    nfds, REMOTE_MAX_FDS);
 | |
|         goto fail;
 | |
|     }
 | |
|     if (nfds) {
 | |
|         memcpy(msg->fds, fds, nfds * sizeof(int));
 | |
|     }
 | |
| 
 | |
|     ret = true;
 | |
| 
 | |
| fail:
 | |
|     if (*errp) {
 | |
|         trace_mpqemu_recv_io_error(msg->cmd, msg->size, nfds);
 | |
|     }
 | |
|     while (*errp && nfds) {
 | |
|         close(fds[nfds - 1]);
 | |
|         nfds--;
 | |
|     }
 | |
| 
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Send msg and wait for a reply with command code RET_MSG.
 | |
|  * Returns the message received of size u64 or UINT64_MAX
 | |
|  * on error.
 | |
|  * Called from VCPU thread in non-coroutine context.
 | |
|  * Used by the Proxy object to communicate to remote processes.
 | |
|  */
 | |
| uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, PCIProxyDev *pdev,
 | |
|                                          Error **errp)
 | |
| {
 | |
|     MPQemuMsg msg_reply = {0};
 | |
|     uint64_t ret = UINT64_MAX;
 | |
| 
 | |
|     assert(!qemu_in_coroutine());
 | |
| 
 | |
|     QEMU_LOCK_GUARD(&pdev->io_mutex);
 | |
|     if (!mpqemu_msg_send(msg, pdev->ioc, errp)) {
 | |
|         return ret;
 | |
|     }
 | |
| 
 | |
|     if (!mpqemu_msg_recv(&msg_reply, pdev->ioc, errp)) {
 | |
|         return ret;
 | |
|     }
 | |
| 
 | |
|     if (!mpqemu_msg_valid(&msg_reply) || msg_reply.cmd != MPQEMU_CMD_RET) {
 | |
|         error_setg(errp, "ERROR: Invalid reply received for command %d",
 | |
|                          msg->cmd);
 | |
|         return ret;
 | |
|     }
 | |
| 
 | |
|     return msg_reply.data.u64;
 | |
| }
 | |
| 
 | |
| bool mpqemu_msg_valid(MPQemuMsg *msg)
 | |
| {
 | |
|     if (msg->cmd >= MPQEMU_CMD_MAX || msg->cmd < 0) {
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     /* Verify FDs. */
 | |
|     if (msg->num_fds >= REMOTE_MAX_FDS) {
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     if (msg->num_fds > 0) {
 | |
|         for (int i = 0; i < msg->num_fds; i++) {
 | |
|             if (fcntl(msg->fds[i], F_GETFL) == -1) {
 | |
|                 return false;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|      /* Verify message specific fields. */
 | |
|     switch (msg->cmd) {
 | |
|     case MPQEMU_CMD_SYNC_SYSMEM:
 | |
|         if (msg->num_fds == 0 || msg->size != sizeof(SyncSysmemMsg)) {
 | |
|             return false;
 | |
|         }
 | |
|         break;
 | |
|     case MPQEMU_CMD_PCI_CFGWRITE:
 | |
|     case MPQEMU_CMD_PCI_CFGREAD:
 | |
|         if (msg->size != sizeof(PciConfDataMsg)) {
 | |
|             return false;
 | |
|         }
 | |
|         break;
 | |
|     case MPQEMU_CMD_BAR_WRITE:
 | |
|     case MPQEMU_CMD_BAR_READ:
 | |
|         if ((msg->size != sizeof(BarAccessMsg)) || (msg->num_fds != 0)) {
 | |
|             return false;
 | |
|         }
 | |
|         break;
 | |
|     case MPQEMU_CMD_SET_IRQFD:
 | |
|         if (msg->size || (msg->num_fds != 2)) {
 | |
|             return false;
 | |
|         }
 | |
|         break;
 | |
|     default:
 | |
|         break;
 | |
|     }
 | |
| 
 | |
|     return true;
 | |
| }
 |