3704 lines
98 KiB
Diff
3704 lines
98 KiB
Diff
2007-03-13 Gwenole Beauchesne <gbeauchesne@mandriva.com>
|
|
|
|
* Merge in KVM rev 4486. Requires kernel 2.6.17 >= 12mdv.
|
|
|
|
================================================================================
|
|
--- qemu-0.9.0/Makefile.target
|
|
+++ qemu-0.9.0/Makefile.target
|
|
@@ -1,5 +1,9 @@
|
|
+CFLAGS=
|
|
+LDFLAGS=
|
|
+
|
|
include config.mak
|
|
|
|
+LDFLAGS_BASE:=$(LDFLAGS)
|
|
TARGET_BASE_ARCH:=$(TARGET_ARCH)
|
|
ifeq ($(TARGET_ARCH), x86_64)
|
|
TARGET_BASE_ARCH:=i386
|
|
@@ -227,8 +231,8 @@
|
|
OBJS+= libqemu.a
|
|
|
|
# cpu emulator library
|
|
-LIBOBJS=exec.o kqemu.o translate-op.o translate-all.o cpu-exec.o\
|
|
- translate.o op.o
|
|
+LIBOBJS=exec.o kqemu.o qemu-kvm.o translate-op.o translate-all.o cpu-exec.o\
|
|
+ translate.o op.o
|
|
ifdef CONFIG_SOFTFLOAT
|
|
LIBOBJS+=fpu/softfloat.o
|
|
else
|
|
@@ -365,6 +369,13 @@
|
|
# PCI network cards
|
|
VL_OBJS+= ne2000.o rtl8139.o pcnet.o
|
|
|
|
+# KVM layer
|
|
+ifeq ($(USE_KVM), yes)
|
|
+VL_OBJS+= kvmctl.o
|
|
+# PCI Hypercall
|
|
+VL_OBJS+= hypercall.o
|
|
+endif
|
|
+
|
|
ifeq ($(TARGET_BASE_ARCH), i386)
|
|
# Hardware support
|
|
VL_OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o $(AUDIODRV)
|
|
@@ -429,7 +440,7 @@
|
|
VL_OBJS+=$(addprefix slirp/, $(SLIRP_OBJS))
|
|
endif
|
|
|
|
-VL_LDFLAGS=
|
|
+VL_LDFLAGS=$(LDFLAGS_BASE)
|
|
# specific flags are needed for non soft mmu emulator
|
|
ifdef CONFIG_STATIC
|
|
VL_LDFLAGS+=-static
|
|
@@ -440,7 +451,7 @@
|
|
ifndef CONFIG_DARWIN
|
|
ifndef CONFIG_WIN32
|
|
ifndef CONFIG_SOLARIS
|
|
-VL_LIBS=-lutil -lrt
|
|
+VL_LIBS=-lutil -lrt -luuid
|
|
endif
|
|
endif
|
|
endif
|
|
@@ -462,7 +473,7 @@
|
|
SDL_LIBS := $(filter-out -mwindows, $(SDL_LIBS)) -mconsole
|
|
endif
|
|
|
|
-$(QEMU_SYSTEM): $(VL_OBJS) libqemu.a
|
|
+$(QEMU_SYSTEM): $(VL_OBJS) libqemu.a $(DEPLIBS)
|
|
$(CC) $(VL_LDFLAGS) -o $@ $^ $(LIBS) $(SDL_LIBS) $(COCOA_LIBS) $(VL_LIBS)
|
|
|
|
cocoa.o: cocoa.m
|
|
@@ -521,6 +532,9 @@
|
|
cpu-exec.o: cpu-exec.c
|
|
$(CC) $(HELPER_CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) -c -o $@ $<
|
|
|
|
+qemu-kvm.o: qemu-kvm.c
|
|
+ $(CC) $(HELPER_CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) -c -o $@ $<
|
|
+
|
|
# Note: this is a workaround. The real fix is to avoid compiling
|
|
# cpu_signal_handler() in cpu-exec.c.
|
|
signal.o: signal.c
|
|
--- qemu-0.9.0/configure
|
|
+++ qemu-0.9.0/configure
|
|
@@ -89,7 +89,9 @@
|
|
bsd="no"
|
|
linux="no"
|
|
kqemu="no"
|
|
+kvm="no"
|
|
profiler="no"
|
|
+kernel_path=""
|
|
cocoa="no"
|
|
check_gfx="yes"
|
|
check_gcc="yes"
|
|
@@ -114,6 +116,7 @@
|
|
oss="yes"
|
|
if [ "$cpu" = "i386" -o "$cpu" = "x86_64" ] ; then
|
|
kqemu="yes"
|
|
+ kvm="yes"
|
|
fi
|
|
;;
|
|
NetBSD)
|
|
@@ -141,6 +144,7 @@
|
|
linux_user="yes"
|
|
if [ "$cpu" = "i386" -o "$cpu" = "x86_64" ] ; then
|
|
kqemu="yes"
|
|
+ kvm="yes"
|
|
fi
|
|
;;
|
|
esac
|
|
@@ -232,8 +236,12 @@
|
|
;;
|
|
--disable-kqemu) kqemu="no"
|
|
;;
|
|
+ --enable-kvm) kvm="yes"
|
|
+ ;;
|
|
--enable-profiler) profiler="yes"
|
|
;;
|
|
+ --kernel-path=*) kernel_path="$optarg"
|
|
+ ;;
|
|
--enable-cocoa) cocoa="yes" ; coreaudio="yes" ; sdl="no"
|
|
;;
|
|
--disable-gfx-check) check_gfx="no"
|
|
@@ -277,6 +285,8 @@
|
|
echo ""
|
|
echo "kqemu kernel acceleration support:"
|
|
echo " --disable-kqemu disable kqemu support"
|
|
+echo " --kernel-path=PATH set the kernel path (configure probes it)"
|
|
+echo " --enable-kvm enable kernel virtual machine support"
|
|
echo ""
|
|
echo "Advanced options (experts only):"
|
|
echo " --source-path=PATH path of source code [$source_path]"
|
|
@@ -623,6 +633,7 @@
|
|
fi
|
|
echo "FMOD support $fmod $fmod_support"
|
|
echo "kqemu support $kqemu"
|
|
+echo "kvm support $kvm"
|
|
echo "Documentation $build_docs"
|
|
[ ! -z "$uname_release" ] && \
|
|
echo "uname -r $uname_release"
|
|
@@ -857,6 +868,13 @@
|
|
interp_prefix1=`echo "$interp_prefix" | sed "s/%M/$target_cpu/g"`
|
|
echo "#define CONFIG_QEMU_PREFIX \"$interp_prefix1\"" >> $config_h
|
|
|
|
+configure_kvm() {
|
|
+ if test $kvm = "yes" -a "$target_softmmu" = "yes" -a $cpu = "$target_cpu" ; then
|
|
+ echo "#define USE_KVM 1" >> $config_h
|
|
+ echo "USE_KVM=yes" >> $config_mak
|
|
+ fi
|
|
+}
|
|
+
|
|
if test "$target_cpu" = "i386" ; then
|
|
echo "TARGET_ARCH=i386" >> $config_mak
|
|
echo "#define TARGET_ARCH \"i386\"" >> $config_h
|
|
@@ -864,6 +882,7 @@
|
|
if test $kqemu = "yes" -a "$target_softmmu" = "yes" -a $cpu = "i386" ; then
|
|
echo "#define USE_KQEMU 1" >> $config_h
|
|
fi
|
|
+ configure_kvm
|
|
elif test "$target_cpu" = "arm" -o "$target_cpu" = "armeb" ; then
|
|
echo "TARGET_ARCH=arm" >> $config_mak
|
|
echo "#define TARGET_ARCH \"arm\"" >> $config_h
|
|
@@ -895,6 +914,7 @@
|
|
if test $kqemu = "yes" -a "$target_softmmu" = "yes" -a $cpu = "x86_64" ; then
|
|
echo "#define USE_KQEMU 1" >> $config_h
|
|
fi
|
|
+ configure_kvm
|
|
elif test "$target_cpu" = "mips" -o "$target_cpu" = "mipsel" ; then
|
|
echo "TARGET_ARCH=mips" >> $config_mak
|
|
echo "#define TARGET_ARCH \"mips\"" >> $config_h
|
|
--- qemu-0.9.0/cpu-all.h
|
|
+++ qemu-0.9.0/cpu-all.h
|
|
@@ -834,6 +834,7 @@
|
|
extern int phys_ram_fd;
|
|
extern uint8_t *phys_ram_base;
|
|
extern uint8_t *phys_ram_dirty;
|
|
+extern uint8_t *bios_mem;
|
|
|
|
/* physical memory access */
|
|
#define TLB_INVALID_MASK (1 << 3)
|
|
--- qemu-0.9.0/cpu-exec.c
|
|
+++ qemu-0.9.0/cpu-exec.c
|
|
@@ -35,6 +35,11 @@
|
|
#include <sys/ucontext.h>
|
|
#endif
|
|
|
|
+#ifdef USE_KVM
|
|
+#include "qemu-kvm.h"
|
|
+extern int kvm_allowed;
|
|
+#endif
|
|
+
|
|
int tb_invalidated_flag;
|
|
|
|
//#define DEBUG_EXEC
|
|
@@ -401,6 +406,12 @@
|
|
}
|
|
#endif
|
|
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed) {
|
|
+ kvm_cpu_exec(env);
|
|
+ longjmp(env->jmp_env, 1);
|
|
+ }
|
|
+#endif
|
|
T0 = 0; /* force lookup of first TB */
|
|
for(;;) {
|
|
#if defined(__sparc__) && !defined(HOST_SOLARIS)
|
|
--- qemu-0.9.0/exec.c
|
|
+++ qemu-0.9.0/exec.c
|
|
@@ -69,6 +69,10 @@
|
|
#define TARGET_PHYS_ADDR_SPACE_BITS 32
|
|
#endif
|
|
|
|
+#ifdef USE_KVM
|
|
+extern int kvm_allowed;
|
|
+#endif
|
|
+
|
|
TranslationBlock tbs[CODE_GEN_MAX_BLOCKS];
|
|
TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
|
|
int nb_tbs;
|
|
@@ -82,6 +86,7 @@
|
|
int phys_ram_fd;
|
|
uint8_t *phys_ram_base;
|
|
uint8_t *phys_ram_dirty;
|
|
+uint8_t *bios_mem;
|
|
static int in_migration;
|
|
|
|
CPUState *first_cpu;
|
|
@@ -1044,6 +1049,11 @@
|
|
if (env->nb_breakpoints >= MAX_BREAKPOINTS)
|
|
return -1;
|
|
env->breakpoints[env->nb_breakpoints++] = pc;
|
|
+
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed)
|
|
+ kvm_update_debugger(env);
|
|
+#endif
|
|
|
|
breakpoint_invalidate(env, pc);
|
|
return 0;
|
|
@@ -1067,6 +1077,11 @@
|
|
if (i < env->nb_breakpoints)
|
|
env->breakpoints[i] = env->breakpoints[env->nb_breakpoints];
|
|
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed)
|
|
+ kvm_update_debugger(env);
|
|
+#endif
|
|
+
|
|
breakpoint_invalidate(env, pc);
|
|
return 0;
|
|
#else
|
|
@@ -1085,6 +1100,10 @@
|
|
/* XXX: only flush what is necessary */
|
|
tb_flush(env);
|
|
}
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed)
|
|
+ kvm_update_debugger(env);
|
|
+#endif
|
|
#endif
|
|
}
|
|
|
|
@@ -1425,6 +1444,9 @@
|
|
{
|
|
int r=0;
|
|
|
|
+#ifdef USE_KVM
|
|
+ r = kvm_physical_memory_set_dirty_tracking(enable);
|
|
+#endif
|
|
in_migration = enable;
|
|
return r;
|
|
}
|
|
--- qemu-0.9.0/hw/cirrus_vga.c
|
|
+++ qemu-0.9.0/hw/cirrus_vga.c
|
|
@@ -28,6 +28,9 @@
|
|
*/
|
|
#include "vl.h"
|
|
#include "vga_int.h"
|
|
+#ifndef _WIN32
|
|
+#include <sys/mman.h>
|
|
+#endif
|
|
|
|
/*
|
|
* TODO:
|
|
@@ -231,6 +234,10 @@
|
|
int cirrus_linear_io_addr;
|
|
int cirrus_linear_bitblt_io_addr;
|
|
int cirrus_mmio_io_addr;
|
|
+#ifdef USE_KVM
|
|
+ unsigned long cirrus_lfb_addr;
|
|
+ unsigned long cirrus_lfb_end;
|
|
+#endif
|
|
uint32_t cirrus_addr_mask;
|
|
uint32_t linear_mmio_mask;
|
|
uint8_t cirrus_shadow_gr0;
|
|
@@ -267,6 +274,10 @@
|
|
int last_hw_cursor_y_end;
|
|
int real_vram_size; /* XXX: suppress that */
|
|
CPUWriteMemoryFunc **cirrus_linear_write;
|
|
+#ifdef USE_KVM
|
|
+ unsigned long map_addr;
|
|
+ unsigned long map_end;
|
|
+#endif
|
|
} CirrusVGAState;
|
|
|
|
typedef struct PCICirrusVGAState {
|
|
@@ -2525,6 +2536,48 @@
|
|
cirrus_linear_bitblt_writel,
|
|
};
|
|
|
|
+#ifdef USE_KVM
|
|
+
|
|
+#include "qemu-kvm.h"
|
|
+
|
|
+extern kvm_context_t kvm_context;
|
|
+
|
|
+static void *set_vram_mapping(unsigned long begin, unsigned long end)
|
|
+{
|
|
+ void *vram_pointer = NULL;
|
|
+
|
|
+ /* align begin and end address */
|
|
+ begin = begin & TARGET_PAGE_MASK;
|
|
+ end = begin + VGA_RAM_SIZE;
|
|
+ end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
|
|
+
|
|
+ vram_pointer = kvm_create_phys_mem(kvm_context, begin, end - begin, 1,
|
|
+ 1, 1);
|
|
+
|
|
+ if (vram_pointer == NULL) {
|
|
+ printf("set_vram_mapping: cannot allocate memory: %m\n");
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ memset(vram_pointer, 0, end - begin);
|
|
+
|
|
+ return vram_pointer;
|
|
+}
|
|
+
|
|
+static int unset_vram_mapping(unsigned long begin, unsigned long end)
|
|
+{
|
|
+ /* align begin and end address */
|
|
+ end = begin + VGA_RAM_SIZE;
|
|
+ begin = begin & TARGET_PAGE_MASK;
|
|
+ end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
|
|
+
|
|
+ kvm_destroy_phys_mem(kvm_context, begin, end - begin);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#endif
|
|
+
|
|
/* Compute the memory access functions */
|
|
static void cirrus_update_memory_access(CirrusVGAState *s)
|
|
{
|
|
@@ -2543,11 +2596,45 @@
|
|
|
|
mode = s->gr[0x05] & 0x7;
|
|
if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) {
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed && s->cirrus_lfb_addr && s->cirrus_lfb_end &&
|
|
+ !s->map_addr) {
|
|
+ void *vram_pointer, *old_vram;
|
|
+
|
|
+ vram_pointer = set_vram_mapping(s->cirrus_lfb_addr,
|
|
+ s->cirrus_lfb_end);
|
|
+ if (!vram_pointer)
|
|
+ fprintf(stderr, "NULL vram_pointer\n");
|
|
+ else {
|
|
+ old_vram = vga_update_vram((VGAState *)s, vram_pointer,
|
|
+ VGA_RAM_SIZE);
|
|
+ qemu_free(old_vram);
|
|
+ }
|
|
+ s->map_addr = s->cirrus_lfb_addr;
|
|
+ s->map_end = s->cirrus_lfb_end;
|
|
+ }
|
|
+#endif
|
|
s->cirrus_linear_write[0] = cirrus_linear_mem_writeb;
|
|
s->cirrus_linear_write[1] = cirrus_linear_mem_writew;
|
|
s->cirrus_linear_write[2] = cirrus_linear_mem_writel;
|
|
} else {
|
|
generic_io:
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed && s->cirrus_lfb_addr && s->cirrus_lfb_end &&
|
|
+ s->map_addr) {
|
|
+ int error;
|
|
+ void *old_vram = NULL;
|
|
+
|
|
+ error = unset_vram_mapping(s->cirrus_lfb_addr,
|
|
+ s->cirrus_lfb_end);
|
|
+ if (!error)
|
|
+ old_vram = vga_update_vram((VGAState *)s, NULL,
|
|
+ VGA_RAM_SIZE);
|
|
+ if (old_vram)
|
|
+ munmap(old_vram, s->map_addr - s->map_end);
|
|
+ s->map_addr = s->map_end = 0;
|
|
+ }
|
|
+#endif
|
|
s->cirrus_linear_write[0] = cirrus_linear_writeb;
|
|
s->cirrus_linear_write[1] = cirrus_linear_writew;
|
|
s->cirrus_linear_write[2] = cirrus_linear_writel;
|
|
@@ -2946,6 +3033,13 @@
|
|
qemu_put_be32s(f, &s->hw_cursor_y);
|
|
/* XXX: we do not save the bitblt state - we assume we do not save
|
|
the state when the blitter is active */
|
|
+
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed) { /* XXX: KVM images ought to be loadable in QEMU */
|
|
+ qemu_put_be32s(f, &s->real_vram_size);
|
|
+ qemu_put_buffer(f, s->vram_ptr, s->real_vram_size);
|
|
+ }
|
|
+#endif
|
|
}
|
|
|
|
static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
|
|
@@ -2996,6 +3090,22 @@
|
|
qemu_get_be32s(f, &s->hw_cursor_x);
|
|
qemu_get_be32s(f, &s->hw_cursor_y);
|
|
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed) {
|
|
+ int real_vram_size;
|
|
+ qemu_get_be32s(f, &real_vram_size);
|
|
+ if (real_vram_size != s->real_vram_size) {
|
|
+ if (real_vram_size > s->real_vram_size)
|
|
+ real_vram_size = s->real_vram_size;
|
|
+ printf("%s: REAL_VRAM_SIZE MISMATCH !!!!!! SAVED=%d CURRENT=%d",
|
|
+ __FUNCTION__, real_vram_size, s->real_vram_size);
|
|
+ }
|
|
+ qemu_get_buffer(f, s->vram_ptr, real_vram_size);
|
|
+ cirrus_update_memory_access(s);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+
|
|
/* force refresh */
|
|
s->graphic_mode = -1;
|
|
cirrus_update_bank_ptr(s, 0);
|
|
@@ -3151,6 +3261,17 @@
|
|
/* XXX: add byte swapping apertures */
|
|
cpu_register_physical_memory(addr, s->vram_size,
|
|
s->cirrus_linear_io_addr);
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed) {
|
|
+ s->cirrus_lfb_addr = addr;
|
|
+ s->cirrus_lfb_end = addr + VGA_RAM_SIZE;
|
|
+
|
|
+ if (s->map_addr && (s->cirrus_lfb_addr != s->map_addr) &&
|
|
+ (s->cirrus_lfb_end != s->map_end))
|
|
+ printf("cirrus vga map change while on lfb mode\n");
|
|
+ }
|
|
+#endif
|
|
+
|
|
cpu_register_physical_memory(addr + 0x1000000, 0x400000,
|
|
s->cirrus_linear_bitblt_io_addr);
|
|
}
|
|
--- qemu-0.9.0/hw/hypercall.c
|
|
+++ qemu-0.9.0/hw/hypercall.c
|
|
@@ -0,0 +1,302 @@
|
|
+/*
|
|
+ * QEMU-KVM Hypercall emulation
|
|
+ *
|
|
+ * Copyright (c) 2003-2004 Fabrice Bellard
|
|
+ * Copyright (c) 2006 Qumranet
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to deal
|
|
+ * in the Software without restriction, including without limitation the rights
|
|
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
+ * copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
+ * THE SOFTWARE.
|
|
+ */
|
|
+#include "vl.h"
|
|
+#include "hypercall.h"
|
|
+#include <stddef.h>
|
|
+
|
|
+int use_hypercall_dev = 0;
|
|
+
|
|
+typedef struct VmChannelCharDriverState {
|
|
+ CharDriverState *vmchannel_hd;
|
|
+ uint32_t deviceid;
|
|
+} VmChannelCharDriverState;
|
|
+
|
|
+static VmChannelCharDriverState vmchannel_hds[MAX_VMCHANNEL_DEVICES];
|
|
+
|
|
+typedef struct HypercallState {
|
|
+ uint32_t hcr;
|
|
+ uint32_t hsr;
|
|
+ uint32_t txsize;
|
|
+ uint32_t txbuff;
|
|
+ uint32_t rxsize;
|
|
+ uint8_t RxBuff[HP_MEM_SIZE];
|
|
+ uint8_t txbufferaccu[HP_MEM_SIZE];
|
|
+ int txbufferaccu_offset;
|
|
+ int irq;
|
|
+ PCIDevice *pci_dev;
|
|
+ uint32_t index;
|
|
+} HypercallState;
|
|
+
|
|
+HypercallState *pHypercallStates[MAX_VMCHANNEL_DEVICES] = {NULL};
|
|
+
|
|
+//#define HYPERCALL_DEBUG 1
|
|
+
|
|
+static void hp_reset(HypercallState *s)
|
|
+{
|
|
+ s->hcr = 0;
|
|
+ s->hsr = 0;
|
|
+ s->txsize = 0;
|
|
+ s->txbuff = 0;
|
|
+ s->rxsize= 0;
|
|
+ s->txbufferaccu_offset = 0;
|
|
+}
|
|
+
|
|
+static void hypercall_update_irq(HypercallState *s);
|
|
+
|
|
+
|
|
+static void hp_ioport_write(void *opaque, uint32_t addr, uint32_t val)
|
|
+{
|
|
+ HypercallState *s = opaque;
|
|
+
|
|
+#ifdef HYPERCALL_DEBUG
|
|
+ printf("%s: addr=0x%x, val=0x%x\n", __FUNCTION__, addr, val);
|
|
+#endif
|
|
+ addr &= 0xff;
|
|
+
|
|
+ switch(addr)
|
|
+ {
|
|
+ case HCR_REGISTER:
|
|
+ {
|
|
+ s->hcr = val;
|
|
+ if (s->hcr & HCR_DI)
|
|
+ hypercall_update_irq(s);
|
|
+ if (val & HCR_GRS){
|
|
+ hp_reset(s);
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ case HP_TXSIZE:
|
|
+ {
|
|
+ // handle the case when the we are being called when txsize is not 0
|
|
+ if (s->txsize != 0) {
|
|
+ printf("txsize is being set, but txsize is not 0!!!\n");
|
|
+ }
|
|
+ if (val > HP_MEM_SIZE) {
|
|
+ printf("txsize is larger than allowed by hw!!!\n");
|
|
+ }
|
|
+ s->txsize = val;
|
|
+ s->txbufferaccu_offset = 0;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ case HP_TXBUFF:
|
|
+ {
|
|
+ if (s->txsize == 0) {
|
|
+ printf("error with txbuff!!!\n");
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ s->txbufferaccu[s->txbufferaccu_offset] = val;
|
|
+ s->txbufferaccu_offset++;
|
|
+ if (s->txbufferaccu_offset >= s->txsize) {
|
|
+ qemu_chr_write(vmchannel_hds[s->index].vmchannel_hd, s->txbufferaccu, s->txsize);
|
|
+ s->txbufferaccu_offset = 0;
|
|
+ s->txsize = 0;
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ {
|
|
+ printf("hp_ioport_write to unhandled address!!!\n");
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static uint32_t hp_ioport_read(void *opaque, uint32_t addr)
|
|
+{
|
|
+ HypercallState *s = opaque;
|
|
+ int ret;
|
|
+
|
|
+ addr &= 0xff;
|
|
+#ifdef HYPERCALL_DEBUG
|
|
+ // Since HSR_REGISTER is being repeatedly read in the guest ISR we don't print it
|
|
+ if (addr != HSR_REGISTER)
|
|
+ printf("%s: addr=0x%x\n", __FUNCTION__, addr);
|
|
+#endif
|
|
+
|
|
+ if (addr >= offsetof(HypercallState, RxBuff) )
|
|
+ {
|
|
+ int RxBuffOffset = addr - (offsetof(HypercallState, RxBuff));
|
|
+ ret = s->RxBuff[RxBuffOffset];
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ switch (addr)
|
|
+ {
|
|
+ case HSR_REGISTER:
|
|
+ ret = s->hsr;
|
|
+ if (ret & HSR_VDR) {
|
|
+ s->hsr &= ~HSR_VDR;
|
|
+ }
|
|
+ break;
|
|
+ case HP_RXSIZE:
|
|
+ ret = s->rxsize;
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ ret = 0x00;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/***********************************************************/
|
|
+/* PCI Hypercall definitions */
|
|
+
|
|
+typedef struct PCIHypercallState {
|
|
+ PCIDevice dev;
|
|
+ HypercallState hp;
|
|
+} PCIHypercallState;
|
|
+
|
|
+static void hp_map(PCIDevice *pci_dev, int region_num,
|
|
+ uint32_t addr, uint32_t size, int type)
|
|
+{
|
|
+ PCIHypercallState *d = (PCIHypercallState *)pci_dev;
|
|
+ HypercallState *s = &d->hp;
|
|
+
|
|
+ register_ioport_write(addr, 0x100, 1, hp_ioport_write, s);
|
|
+ register_ioport_read(addr, 0x100, 1, hp_ioport_read, s);
|
|
+
|
|
+}
|
|
+
|
|
+
|
|
+static void hypercall_update_irq(HypercallState *s)
|
|
+{
|
|
+ /* PCI irq */
|
|
+ pci_set_irq(s->pci_dev, 0, !(s->hcr & HCR_DI));
|
|
+}
|
|
+
|
|
+void pci_hypercall_single_init(PCIBus *bus, uint32_t deviceid, uint32_t index)
|
|
+{
|
|
+ PCIHypercallState *d;
|
|
+ HypercallState *s;
|
|
+ uint8_t *pci_conf;
|
|
+ char name[sizeof("HypercallX")];
|
|
+
|
|
+#ifdef HYPERCALL_DEBUG
|
|
+ printf("%s\n", __FUNCTION__);
|
|
+#endif
|
|
+
|
|
+ // If the vmchannel wasn't initialized, we don't want the Hypercall device in the guest
|
|
+ if (use_hypercall_dev == 0) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ d = (PCIHypercallState *)pci_register_device(bus,
|
|
+ name, sizeof(PCIHypercallState),
|
|
+ -1,
|
|
+ NULL, NULL);
|
|
+
|
|
+ pci_conf = d->dev.config;
|
|
+ pci_conf[0x00] = 0x02; // Qumranet vendor ID 0x5002
|
|
+ pci_conf[0x01] = 0x50;
|
|
+ pci_conf[0x02] = deviceid & 0x00ff;
|
|
+ pci_conf[0x03] = (deviceid & 0xff00) >> 8;
|
|
+
|
|
+ pci_conf[0x09] = 0x00; // ProgIf
|
|
+ pci_conf[0x0a] = 0x00; // SubClass
|
|
+ pci_conf[0x0b] = 0x05; // BaseClass
|
|
+
|
|
+ pci_conf[0x0e] = 0x00; // header_type
|
|
+ pci_conf[0x3d] = 1; // interrupt pin 0
|
|
+
|
|
+ pci_register_io_region(&d->dev, 0, 0x100,
|
|
+ PCI_ADDRESS_SPACE_IO, hp_map);
|
|
+ s = &d->hp;
|
|
+ pHypercallStates[index] = s;
|
|
+ s->index = index;
|
|
+ s->irq = 16; /* PCI interrupt */
|
|
+ s->pci_dev = (PCIDevice *)d;
|
|
+
|
|
+ hp_reset(s);
|
|
+}
|
|
+
|
|
+void pci_hypercall_init(PCIBus *bus)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ // loop devices & call pci_hypercall_single_init with device id's
|
|
+ for(i = 0; i < MAX_VMCHANNEL_DEVICES; i++){
|
|
+ if (vmchannel_hds[i].vmchannel_hd) {
|
|
+ pci_hypercall_single_init(bus, vmchannel_hds[i].deviceid, i);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static int vmchannel_can_read(void *opaque)
|
|
+{
|
|
+ return 128;
|
|
+}
|
|
+
|
|
+static void vmchannel_event(void *opaque, int event)
|
|
+{
|
|
+
|
|
+#ifdef HYPERCALL_DEBUG
|
|
+ // if index is to be used outside the printf, take it out of the #ifdef block!
|
|
+ long index = (long)opaque;
|
|
+ printf("%s index:%ld, got event %i\n", __FUNCTION__, index, event);
|
|
+#endif
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+// input from vmchannel outside caller
|
|
+static void vmchannel_read(void *opaque, const uint8_t *buf, int size)
|
|
+{
|
|
+ int i;
|
|
+ long index = (long)opaque;
|
|
+
|
|
+#ifdef HYPERCALL_DEBUG
|
|
+ printf("vmchannel_read buf size:%d\n", size);
|
|
+#endif
|
|
+
|
|
+ // if the hypercall device is in interrupts disabled state, don't accept the data
|
|
+ if (pHypercallStates[index]->hcr & HCR_DI) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ for(i = 0; i < size; i++) {
|
|
+ pHypercallStates[index]->RxBuff[i] = buf[i];
|
|
+ }
|
|
+ pHypercallStates[index]->rxsize = size;
|
|
+ pHypercallStates[index]->hsr = HSR_VDR;
|
|
+ hypercall_update_irq(pHypercallStates[index]);
|
|
+}
|
|
+
|
|
+void vmchannel_init(CharDriverState *hd, uint32_t deviceid, uint32_t index)
|
|
+{
|
|
+#ifdef HYPERCALL_DEBUG
|
|
+ printf("vmchannel_init, index=%d, deviceid=0x%x\n", index, deviceid);
|
|
+#endif
|
|
+
|
|
+ vmchannel_hds[index].deviceid = deviceid;
|
|
+ vmchannel_hds[index].vmchannel_hd = hd;
|
|
+
|
|
+ use_hypercall_dev = 1;
|
|
+ qemu_chr_add_handlers(vmchannel_hds[index].vmchannel_hd, vmchannel_can_read, vmchannel_read,
|
|
+ vmchannel_event, (void *)(long)index);
|
|
+}
|
|
--- qemu-0.9.0/hw/hypercall.h
|
|
+++ qemu-0.9.0/hw/hypercall.h
|
|
@@ -0,0 +1,45 @@
|
|
+/*
|
|
+ * QEMU-KVM Hypercall emulation
|
|
+ *
|
|
+ * Copyright (c) 2003-2004 Fabrice Bellard
|
|
+ * Copyright (c) 2006 Qumranet
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to deal
|
|
+ * in the Software without restriction, including without limitation the rights
|
|
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
+ * copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
+ * THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#define HCR_REGISTER 0x00 // Hypercall Command Register WR
|
|
+#define HSR_REGISTER 0x04 // Hypercall Status Register RD
|
|
+#define HP_TXSIZE 0x08
|
|
+#define HP_TXBUFF 0x0c
|
|
+#define HP_RXSIZE 0x10
|
|
+#define HP_RXBUFF 0x14
|
|
+
|
|
+// HCR_REGISTER commands
|
|
+#define HCR_DI 1 // disable interrupts
|
|
+#define HCR_EI 2 // enable interrupts
|
|
+#define HCR_GRS 4 // Global reset
|
|
+#define HCR_RESET (HCR_GRS|HCR_DI)
|
|
+
|
|
+
|
|
+// Bits in HSR_REGISTER
|
|
+#define HSR_VDR 0x01 // vmchannel Data is ready to be read
|
|
+
|
|
+#define HP_MEM_SIZE 0xE0
|
|
+
|
|
+
|
|
--- qemu-0.9.0/hw/pc.c
|
|
+++ qemu-0.9.0/hw/pc.c
|
|
@@ -22,6 +22,10 @@
|
|
* THE SOFTWARE.
|
|
*/
|
|
#include "vl.h"
|
|
+#ifdef USE_KVM
|
|
+#include "qemu-kvm.h"
|
|
+extern int kvm_allowed;
|
|
+#endif
|
|
|
|
/* output Bochs bios info messages */
|
|
//#define DEBUG_BIOS
|
|
@@ -444,6 +448,11 @@
|
|
nb_ne2k++;
|
|
}
|
|
|
|
+#ifdef USE_KVM
|
|
+extern kvm_context_t kvm_context;
|
|
+extern int kvm_allowed;
|
|
+#endif
|
|
+
|
|
/* PC hardware initialisation */
|
|
static void pc_init1(int ram_size, int vga_ram_size, int boot_device,
|
|
DisplayState *ds, const char **fd_filename, int snapshot,
|
|
@@ -511,6 +520,11 @@
|
|
/* setup basic memory access */
|
|
cpu_register_physical_memory(0xc0000, 0x10000,
|
|
vga_bios_offset | IO_MEM_ROM);
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed)
|
|
+ memcpy(phys_ram_base + 0xc0000, phys_ram_base + vga_bios_offset,
|
|
+ 0x10000);
|
|
+#endif
|
|
|
|
/* map the last 128KB of the BIOS in ISA space */
|
|
isa_bios_size = bios_size;
|
|
@@ -522,6 +536,26 @@
|
|
isa_bios_size,
|
|
(bios_offset + bios_size - isa_bios_size) | IO_MEM_ROM);
|
|
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed)
|
|
+ memcpy(phys_ram_base + 0x100000 - isa_bios_size,
|
|
+ phys_ram_base + (bios_offset + bios_size - isa_bios_size),
|
|
+ isa_bios_size);
|
|
+#endif
|
|
+
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed) {
|
|
+ bios_mem = kvm_create_phys_mem(kvm_context, (uint32_t)(-bios_size),
|
|
+ bios_size, 2, 0, 1);
|
|
+ if (!bios_mem)
|
|
+ exit(1);
|
|
+ memcpy(bios_mem, phys_ram_base + bios_offset, bios_size);
|
|
+
|
|
+ cpu_register_physical_memory(phys_ram_size - KVM_EXTRA_PAGES * 4096, KVM_EXTRA_PAGES * 4096,
|
|
+ (phys_ram_size - KVM_EXTRA_PAGES * 4096) | IO_MEM_ROM);
|
|
+ }
|
|
+#endif
|
|
+
|
|
option_rom_offset = 0;
|
|
for (i = 0; i < nb_option_roms; i++) {
|
|
int offset = bios_offset + bios_size + option_rom_offset;
|
|
@@ -718,6 +752,11 @@
|
|
}
|
|
}
|
|
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed) {
|
|
+ pci_hypercall_init(pci_bus);
|
|
+ }
|
|
+#endif
|
|
if (pci_enabled) {
|
|
pci_piix3_ide_init(pci_bus, bs_table, piix3_devfn + 1);
|
|
} else {
|
|
--- qemu-0.9.0/hw/vga.c
|
|
+++ qemu-0.9.0/hw/vga.c
|
|
@@ -1373,6 +1373,26 @@
|
|
}
|
|
}
|
|
|
|
+#ifdef USE_KVM
|
|
+
|
|
+#include "kvmctl.h"
|
|
+extern kvm_context_t kvm_context;
|
|
+
|
|
+static int bitmap_get_dirty(unsigned long *bitmap, unsigned nr)
|
|
+{
|
|
+ unsigned word = nr / ((sizeof bitmap[0]) * 8);
|
|
+ unsigned bit = nr % ((sizeof bitmap[0]) * 8);
|
|
+
|
|
+ //printf("%x -> %ld\n", nr, (bitmap[word] >> bit) & 1);
|
|
+ return (bitmap[word] >> bit) & 1;
|
|
+}
|
|
+
|
|
+#endif
|
|
+
|
|
+#ifdef USE_KVM
|
|
+extern int kvm_allowed;
|
|
+#endif
|
|
+
|
|
/*
|
|
* graphic modes
|
|
*/
|
|
@@ -1385,6 +1405,20 @@
|
|
uint32_t v, addr1, addr;
|
|
vga_draw_line_func *vga_draw_line;
|
|
|
|
+#ifdef USE_KVM
|
|
+
|
|
+ /* HACK ALERT */
|
|
+#define BITMAP_SIZE ((8*1024*1024) / 4096 / 8 / sizeof(long))
|
|
+ unsigned long bitmap[BITMAP_SIZE];
|
|
+ int r;
|
|
+
|
|
+ if (kvm_allowed) {
|
|
+ r = kvm_get_dirty_pages(kvm_context, 1, &bitmap);
|
|
+ if (r < 0)
|
|
+ fprintf(stderr, "kvm: get_dirty_pages returned %d\n", r);
|
|
+ }
|
|
+#endif
|
|
+
|
|
full_update |= update_basic_params(s);
|
|
|
|
s->get_resolution(s, &width, &height);
|
|
@@ -1491,10 +1525,20 @@
|
|
update = full_update |
|
|
cpu_physical_memory_get_dirty(page0, VGA_DIRTY_FLAG) |
|
|
cpu_physical_memory_get_dirty(page1, VGA_DIRTY_FLAG);
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed) {
|
|
+ update |= bitmap_get_dirty(bitmap, (page0 - s->vram_offset) >> TARGET_PAGE_BITS);
|
|
+ update |= bitmap_get_dirty(bitmap, (page1 - s->vram_offset) >> TARGET_PAGE_BITS);
|
|
+ }
|
|
+#endif
|
|
if ((page1 - page0) > TARGET_PAGE_SIZE) {
|
|
/* if wide line, can use another page */
|
|
update |= cpu_physical_memory_get_dirty(page0 + TARGET_PAGE_SIZE,
|
|
VGA_DIRTY_FLAG);
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed)
|
|
+ update |= bitmap_get_dirty(bitmap, (page0 - s->vram_offset) >> TARGET_PAGE_BITS);
|
|
+#endif
|
|
}
|
|
/* explicit invalidation for the hardware cursor */
|
|
update |= (s->invalidated_y_table[y >> 5] >> (y & 0x1f)) & 1;
|
|
@@ -1751,6 +1795,7 @@
|
|
}
|
|
}
|
|
|
|
+/* when used on xen/kvm environment, the vga_ram_base is not used */
|
|
void vga_common_init(VGAState *s, DisplayState *ds, uint8_t *vga_ram_base,
|
|
unsigned long vga_ram_offset, int vga_ram_size)
|
|
{
|
|
@@ -1781,7 +1826,14 @@
|
|
|
|
vga_reset(s);
|
|
|
|
+#ifndef USE_KVM
|
|
s->vram_ptr = vga_ram_base;
|
|
+#else
|
|
+ if (kvm_allowed)
|
|
+ s->vram_ptr = qemu_malloc(vga_ram_size);
|
|
+ else
|
|
+ s->vram_ptr = vga_ram_base;
|
|
+#endif
|
|
s->vram_offset = vga_ram_offset;
|
|
s->vram_size = vga_ram_size;
|
|
s->ds = ds;
|
|
@@ -1909,6 +1961,31 @@
|
|
return 0;
|
|
}
|
|
|
|
+void *vga_update_vram(VGAState *s, void *vga_ram_base, int vga_ram_size)
|
|
+{
|
|
+ uint8_t *old_pointer;
|
|
+
|
|
+ if (s->vram_size != vga_ram_size) {
|
|
+ fprintf(stderr, "No support to change vga_ram_size\n");
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ if (!vga_ram_base) {
|
|
+ vga_ram_base = qemu_malloc(vga_ram_size);
|
|
+ if (!vga_ram_base) {
|
|
+ fprintf(stderr, "reallocate error\n");
|
|
+ return NULL;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* XXX lock needed? */
|
|
+ memcpy(vga_ram_base, s->vram_ptr, vga_ram_size);
|
|
+ old_pointer = s->vram_ptr;
|
|
+ s->vram_ptr = vga_ram_base;
|
|
+
|
|
+ return old_pointer;
|
|
+}
|
|
+
|
|
/********************************************************/
|
|
/* vga screen dump */
|
|
|
|
--- qemu-0.9.0/hw/vga_int.h
|
|
+++ qemu-0.9.0/hw/vga_int.h
|
|
@@ -174,5 +174,6 @@
|
|
unsigned int color0, unsigned int color1,
|
|
unsigned int color_xor);
|
|
|
|
+void *vga_update_vram(VGAState *s, void *vga_ram_base, int vga_ram_size);
|
|
extern const uint8_t sr_mask[8];
|
|
extern const uint8_t gr_mask[16];
|
|
--- qemu-0.9.0/kvm.h
|
|
+++ qemu-0.9.0/kvm.h
|
|
@@ -0,0 +1,247 @@
|
|
+#ifndef __LINUX_KVM_H
|
|
+#define __LINUX_KVM_H
|
|
+
|
|
+/*
|
|
+ * Userspace interface for /dev/kvm - kernel based virtual machine
|
|
+ *
|
|
+ * Note: this interface is considered experimental and may change without
|
|
+ * notice.
|
|
+ */
|
|
+
|
|
+#include <asm/types.h>
|
|
+#include <linux/ioctl.h>
|
|
+
|
|
+#define KVM_API_VERSION 4
|
|
+
|
|
+/*
|
|
+ * Architectural interrupt line count, and the size of the bitmap needed
|
|
+ * to hold them.
|
|
+ */
|
|
+#define KVM_NR_INTERRUPTS 256
|
|
+#define KVM_IRQ_BITMAP_SIZE_BYTES ((KVM_NR_INTERRUPTS + 7) / 8)
|
|
+#define KVM_IRQ_BITMAP_SIZE(type) (KVM_IRQ_BITMAP_SIZE_BYTES / sizeof(type))
|
|
+
|
|
+
|
|
+/* for KVM_CREATE_MEMORY_REGION */
|
|
+struct kvm_memory_region {
|
|
+ __u32 slot;
|
|
+ __u32 flags;
|
|
+ __u64 guest_phys_addr;
|
|
+ __u64 memory_size; /* bytes */
|
|
+};
|
|
+
|
|
+/* for kvm_memory_region::flags */
|
|
+#define KVM_MEM_LOG_DIRTY_PAGES 1UL
|
|
+
|
|
+
|
|
+#define KVM_EXIT_TYPE_FAIL_ENTRY 1
|
|
+#define KVM_EXIT_TYPE_VM_EXIT 2
|
|
+
|
|
+enum kvm_exit_reason {
|
|
+ KVM_EXIT_UNKNOWN = 0,
|
|
+ KVM_EXIT_EXCEPTION = 1,
|
|
+ KVM_EXIT_IO = 2,
|
|
+ KVM_EXIT_CPUID = 3,
|
|
+ KVM_EXIT_DEBUG = 4,
|
|
+ KVM_EXIT_HLT = 5,
|
|
+ KVM_EXIT_MMIO = 6,
|
|
+ KVM_EXIT_IRQ_WINDOW_OPEN = 7,
|
|
+ KVM_EXIT_SHUTDOWN = 8,
|
|
+};
|
|
+
|
|
+/* for KVM_RUN */
|
|
+struct kvm_run {
|
|
+ /* in */
|
|
+ __u32 emulated; /* skip current instruction */
|
|
+ __u32 mmio_completed; /* mmio request completed */
|
|
+ __u8 request_interrupt_window;
|
|
+ __u8 padding1[7];
|
|
+
|
|
+ /* out */
|
|
+ __u32 exit_type;
|
|
+ __u32 exit_reason;
|
|
+ __u32 instruction_length;
|
|
+ __u8 ready_for_interrupt_injection;
|
|
+ __u8 if_flag;
|
|
+ __u16 padding2;
|
|
+
|
|
+ /* in (pre_kvm_run), out (post_kvm_run) */
|
|
+ __u64 cr8;
|
|
+ __u64 apic_base;
|
|
+
|
|
+ union {
|
|
+ /* KVM_EXIT_UNKNOWN */
|
|
+ struct {
|
|
+ __u32 hardware_exit_reason;
|
|
+ } hw;
|
|
+ /* KVM_EXIT_EXCEPTION */
|
|
+ struct {
|
|
+ __u32 exception;
|
|
+ __u32 error_code;
|
|
+ } ex;
|
|
+ /* KVM_EXIT_IO */
|
|
+ struct {
|
|
+#define KVM_EXIT_IO_IN 0
|
|
+#define KVM_EXIT_IO_OUT 1
|
|
+ __u8 direction;
|
|
+ __u8 size; /* bytes */
|
|
+ __u8 string;
|
|
+ __u8 string_down;
|
|
+ __u8 rep;
|
|
+ __u8 pad;
|
|
+ __u16 port;
|
|
+ __u64 count;
|
|
+ union {
|
|
+ __u64 address;
|
|
+ __u32 value;
|
|
+ };
|
|
+ } io;
|
|
+ struct {
|
|
+ } debug;
|
|
+ /* KVM_EXIT_MMIO */
|
|
+ struct {
|
|
+ __u64 phys_addr;
|
|
+ __u8 data[8];
|
|
+ __u32 len;
|
|
+ __u8 is_write;
|
|
+ } mmio;
|
|
+ };
|
|
+};
|
|
+
|
|
+/* for KVM_GET_REGS and KVM_SET_REGS */
|
|
+struct kvm_regs {
|
|
+ /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
|
|
+ __u64 rax, rbx, rcx, rdx;
|
|
+ __u64 rsi, rdi, rsp, rbp;
|
|
+ __u64 r8, r9, r10, r11;
|
|
+ __u64 r12, r13, r14, r15;
|
|
+ __u64 rip, rflags;
|
|
+};
|
|
+
|
|
+struct kvm_segment {
|
|
+ __u64 base;
|
|
+ __u32 limit;
|
|
+ __u16 selector;
|
|
+ __u8 type;
|
|
+ __u8 present, dpl, db, s, l, g, avl;
|
|
+ __u8 unusable;
|
|
+ __u8 padding;
|
|
+};
|
|
+
|
|
+struct kvm_dtable {
|
|
+ __u64 base;
|
|
+ __u16 limit;
|
|
+ __u16 padding[3];
|
|
+};
|
|
+
|
|
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
|
|
+struct kvm_sregs {
|
|
+ /* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
|
|
+ struct kvm_segment cs, ds, es, fs, gs, ss;
|
|
+ struct kvm_segment tr, ldt;
|
|
+ struct kvm_dtable gdt, idt;
|
|
+ __u64 cr0, cr2, cr3, cr4, cr8;
|
|
+ __u64 efer;
|
|
+ __u64 apic_base;
|
|
+ __u64 interrupt_bitmap[KVM_IRQ_BITMAP_SIZE(__u64)];
|
|
+};
|
|
+
|
|
+struct kvm_msr_entry {
|
|
+ __u32 index;
|
|
+ __u32 reserved;
|
|
+ __u64 data;
|
|
+};
|
|
+
|
|
+/* for KVM_GET_MSRS and KVM_SET_MSRS */
|
|
+struct kvm_msrs {
|
|
+ __u32 nmsrs; /* number of msrs in entries */
|
|
+ __u32 pad;
|
|
+
|
|
+ struct kvm_msr_entry entries[0];
|
|
+};
|
|
+
|
|
+/* for KVM_GET_MSR_INDEX_LIST */
|
|
+struct kvm_msr_list {
|
|
+ __u32 nmsrs; /* number of msrs in entries */
|
|
+ __u32 indices[0];
|
|
+};
|
|
+
|
|
+/* for KVM_TRANSLATE */
|
|
+struct kvm_translation {
|
|
+ /* in */
|
|
+ __u64 linear_address;
|
|
+
|
|
+ /* out */
|
|
+ __u64 physical_address;
|
|
+ __u8 valid;
|
|
+ __u8 writeable;
|
|
+ __u8 usermode;
|
|
+ __u8 pad[5];
|
|
+};
|
|
+
|
|
+/* for KVM_INTERRUPT */
|
|
+struct kvm_interrupt {
|
|
+ /* in */
|
|
+ __u32 irq;
|
|
+};
|
|
+
|
|
+struct kvm_breakpoint {
|
|
+ __u32 enabled;
|
|
+ __u32 padding;
|
|
+ __u64 address;
|
|
+};
|
|
+
|
|
+/* for KVM_DEBUG_GUEST */
|
|
+struct kvm_debug_guest {
|
|
+ /* int */
|
|
+ __u32 enabled;
|
|
+ __u32 pad;
|
|
+ struct kvm_breakpoint breakpoints[4];
|
|
+ __u32 singlestep;
|
|
+};
|
|
+
|
|
+/* for KVM_GET_DIRTY_LOG */
|
|
+struct kvm_dirty_log {
|
|
+ __u32 slot;
|
|
+ __u32 padding;
|
|
+ union {
|
|
+ void __user *dirty_bitmap; /* one bit per page */
|
|
+ __u64 padding;
|
|
+ };
|
|
+};
|
|
+
|
|
+#define KVMIO 0xAE
|
|
+
|
|
+/*
|
|
+ * ioctls for /dev/kvm fds:
|
|
+ */
|
|
+#define KVM_GET_API_VERSION _IO(KVMIO, 1)
|
|
+#define KVM_CREATE_VM _IO(KVMIO, 2) /* returns a VM fd */
|
|
+#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list)
|
|
+
|
|
+/*
|
|
+ * ioctls for VM fds
|
|
+ */
|
|
+#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 10, struct kvm_memory_region)
|
|
+/*
|
|
+ * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
|
|
+ * a vcpu fd.
|
|
+ */
|
|
+#define KVM_CREATE_VCPU _IOW(KVMIO, 11, int)
|
|
+#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log)
|
|
+
|
|
+/*
|
|
+ * ioctls for vcpu fds
|
|
+ */
|
|
+#define KVM_RUN _IOWR(KVMIO, 2, struct kvm_run)
|
|
+#define KVM_GET_REGS _IOR(KVMIO, 3, struct kvm_regs)
|
|
+#define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs)
|
|
+#define KVM_GET_SREGS _IOR(KVMIO, 5, struct kvm_sregs)
|
|
+#define KVM_SET_SREGS _IOW(KVMIO, 6, struct kvm_sregs)
|
|
+#define KVM_TRANSLATE _IOWR(KVMIO, 7, struct kvm_translation)
|
|
+#define KVM_INTERRUPT _IOW(KVMIO, 8, struct kvm_interrupt)
|
|
+#define KVM_DEBUG_GUEST _IOW(KVMIO, 9, struct kvm_debug_guest)
|
|
+#define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs)
|
|
+#define KVM_SET_MSRS _IOW(KVMIO, 14, struct kvm_msrs)
|
|
+
|
|
+#endif
|
|
--- qemu-0.9.0/kvmctl.c
|
|
+++ qemu-0.9.0/kvmctl.c
|
|
@@ -0,0 +1,809 @@
|
|
+/*
|
|
+ * Kernel-based Virtual Machine control library
|
|
+ *
|
|
+ * This library provides an API to control the kvm hardware virtualization
|
|
+ * module.
|
|
+ *
|
|
+ * Copyright (C) 2006 Qumranet
|
|
+ *
|
|
+ * Authors:
|
|
+ *
|
|
+ * Avi Kivity <avi@qumranet.com>
|
|
+ * Yaniv Kamay <yaniv@qumranet.com>
|
|
+ *
|
|
+ * This work is licensed under the GNU LGPL license, version 2.
|
|
+ */
|
|
+
|
|
+#include <unistd.h>
|
|
+#include <fcntl.h>
|
|
+#include <stdio.h>
|
|
+#include <stdlib.h>
|
|
+#include <sys/mman.h>
|
|
+#include <string.h>
|
|
+#include <errno.h>
|
|
+#include <sys/ioctl.h>
|
|
+#include "kvmctl.h"
|
|
+
|
|
+#define EXPECTED_KVM_API_VERSION 4
|
|
+
|
|
+#if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
|
|
+#error libkvm: userspace and kernel version mismatch
|
|
+#endif
|
|
+
|
|
+#define PAGE_SIZE 4096ul
|
|
+
|
|
+/* FIXME: share this number with kvm */
|
|
+/* FIXME: or dynamically alloc/realloc regions */
|
|
+#define KVM_MAX_NUM_MEM_REGIONS 4u
|
|
+
|
|
+/**
|
|
+ * \brief The KVM context
|
|
+ *
|
|
+ * The verbose KVM context
|
|
+ */
|
|
+struct kvm_context {
|
|
+ /// Filedescriptor to /dev/kvm
|
|
+ int fd;
|
|
+ int vm_fd;
|
|
+ int vcpu_fd[1];
|
|
+ /// Callbacks that KVM uses to emulate various unvirtualizable functionality
|
|
+ struct kvm_callbacks *callbacks;
|
|
+ void *opaque;
|
|
+ /// A pointer to the memory used as the physical memory for the guest
|
|
+ void *physical_memory;
|
|
+ /// is dirty pages logging enabled for all regions or not
|
|
+ int dirty_pages_log_all;
|
|
+ /// memory regions parameters
|
|
+ struct kvm_memory_region mem_regions[KVM_MAX_NUM_MEM_REGIONS];
|
|
+};
|
|
+
|
|
+struct translation_cache {
|
|
+ unsigned long linear;
|
|
+ void *physical;
|
|
+};
|
|
+
|
|
+static void translation_cache_init(struct translation_cache *tr)
|
|
+{
|
|
+ tr->physical = 0;
|
|
+}
|
|
+
|
|
+static int translate(kvm_context_t kvm, int vcpu, struct translation_cache *tr,
|
|
+ unsigned long linear, void **physical)
|
|
+{
|
|
+ unsigned long page = linear & ~(PAGE_SIZE-1);
|
|
+ unsigned long offset = linear & (PAGE_SIZE-1);
|
|
+
|
|
+ if (!(tr->physical && tr->linear == page)) {
|
|
+ struct kvm_translation kvm_tr;
|
|
+ int r;
|
|
+
|
|
+ kvm_tr.linear_address = page;
|
|
+
|
|
+ r = ioctl(kvm->vcpu_fd[vcpu], KVM_TRANSLATE, &kvm_tr);
|
|
+ if (r == -1)
|
|
+ return -errno;
|
|
+
|
|
+ if (!kvm_tr.valid)
|
|
+ return -EFAULT;
|
|
+
|
|
+ tr->linear = page;
|
|
+ tr->physical = kvm->physical_memory + kvm_tr.physical_address;
|
|
+ }
|
|
+ *physical = tr->physical + offset;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * memory regions parameters
|
|
+ */
|
|
+static void kvm_memory_region_save_params(kvm_context_t kvm,
|
|
+ struct kvm_memory_region *mem)
|
|
+{
|
|
+ if (!mem || (mem->slot >= KVM_MAX_NUM_MEM_REGIONS)) {
|
|
+ fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
|
|
+ return;
|
|
+ }
|
|
+ kvm->mem_regions[mem->slot] = *mem;
|
|
+}
|
|
+
|
|
+static void kvm_memory_region_clear_params(kvm_context_t kvm, int regnum)
|
|
+{
|
|
+ if (regnum >= KVM_MAX_NUM_MEM_REGIONS) {
|
|
+ fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
|
|
+ return;
|
|
+ }
|
|
+ kvm->mem_regions[regnum].memory_size = 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * dirty pages logging control
|
|
+ */
|
|
+static int kvm_dirty_pages_log_change(kvm_context_t kvm, int regnum, __u32 flag)
|
|
+{
|
|
+ int r;
|
|
+ struct kvm_memory_region *mem;
|
|
+
|
|
+ if (regnum >= KVM_MAX_NUM_MEM_REGIONS) {
|
|
+ fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
|
|
+ return 1;
|
|
+ }
|
|
+ mem = &kvm->mem_regions[regnum];
|
|
+ if (mem->memory_size == 0) /* not used */
|
|
+ return 0;
|
|
+ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) /* log already enabled */
|
|
+ return 0;
|
|
+ mem->flags |= flag; /* temporary turn on flag */
|
|
+ r = ioctl(kvm->vm_fd, KVM_SET_MEMORY_REGION, mem);
|
|
+ mem->flags &= ~flag; /* back to previous value */
|
|
+ if (r == -1) {
|
|
+ fprintf(stderr, "%s: %m\n", __FUNCTION__);
|
|
+ }
|
|
+ return r;
|
|
+}
|
|
+
|
|
+static int kvm_dirty_pages_log_change_all(kvm_context_t kvm, __u32 flag)
|
|
+{
|
|
+ int i, r;
|
|
+
|
|
+ for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
|
|
+ r = kvm_dirty_pages_log_change(kvm, i, flag);
|
|
+ }
|
|
+ return r;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Enable dirty page logging for all memory regions
|
|
+ */
|
|
+int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
|
|
+{
|
|
+ if (kvm->dirty_pages_log_all)
|
|
+ return 0;
|
|
+ kvm->dirty_pages_log_all = 1;
|
|
+ return kvm_dirty_pages_log_change_all(kvm, KVM_MEM_LOG_DIRTY_PAGES);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Enable dirty page logging only for memory regions that were created with
|
|
+ * dirty logging enabled (disable for all other memory regions).
|
|
+ */
|
|
+int kvm_dirty_pages_log_reset(kvm_context_t kvm)
|
|
+{
|
|
+ if (!kvm->dirty_pages_log_all)
|
|
+ return 0;
|
|
+ kvm->dirty_pages_log_all = 0;
|
|
+ return kvm_dirty_pages_log_change_all(kvm, 0);
|
|
+}
|
|
+
|
|
+
|
|
+kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
|
|
+ void *opaque)
|
|
+{
|
|
+ int fd;
|
|
+ kvm_context_t kvm;
|
|
+ int r;
|
|
+
|
|
+ fd = open("/dev/kvm", O_RDWR);
|
|
+ if (fd == -1) {
|
|
+ perror("open /dev/kvm");
|
|
+ return NULL;
|
|
+ }
|
|
+ r = ioctl(fd, KVM_GET_API_VERSION, 0);
|
|
+ if (r == -1) {
|
|
+ fprintf(stderr, "kvm kernel version too old\n");
|
|
+ goto out_close;
|
|
+ }
|
|
+ if (r < EXPECTED_KVM_API_VERSION) {
|
|
+ fprintf(stderr, "kvm kernel version too old\n");
|
|
+ goto out_close;
|
|
+ }
|
|
+ if (r > EXPECTED_KVM_API_VERSION) {
|
|
+ fprintf(stderr, "kvm userspace version too old\n");
|
|
+ goto out_close;
|
|
+ }
|
|
+ kvm = malloc(sizeof(*kvm));
|
|
+ kvm->fd = fd;
|
|
+ kvm->vm_fd = -1;
|
|
+ kvm->callbacks = callbacks;
|
|
+ kvm->opaque = opaque;
|
|
+ kvm->dirty_pages_log_all = 0;
|
|
+ memset(&kvm->mem_regions, 0, sizeof(kvm->mem_regions));
|
|
+
|
|
+ return kvm;
|
|
+ out_close:
|
|
+ close(fd);
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+void kvm_finalize(kvm_context_t kvm)
|
|
+{
|
|
+ if (kvm->vcpu_fd[0] != -1)
|
|
+ close(kvm->vcpu_fd[0]);
|
|
+ if (kvm->vm_fd != -1)
|
|
+ close(kvm->vm_fd);
|
|
+ close(kvm->fd);
|
|
+ free(kvm);
|
|
+}
|
|
+
|
|
+int kvm_create(kvm_context_t kvm, unsigned long memory, void **vm_mem)
|
|
+{
|
|
+ unsigned long dosmem = 0xa0000;
|
|
+ unsigned long exmem = 0xc0000;
|
|
+ int fd = kvm->fd;
|
|
+ int r;
|
|
+ struct kvm_memory_region low_memory = {
|
|
+ .slot = 3,
|
|
+ .memory_size = memory < dosmem ? memory : dosmem,
|
|
+ .guest_phys_addr = 0,
|
|
+ };
|
|
+ struct kvm_memory_region extended_memory = {
|
|
+ .slot = 0,
|
|
+ .memory_size = memory < exmem ? 0 : memory - exmem,
|
|
+ .guest_phys_addr = exmem,
|
|
+ };
|
|
+
|
|
+ kvm->vcpu_fd[0] = -1;
|
|
+
|
|
+ fd = ioctl(fd, KVM_CREATE_VM, 0);
|
|
+ if (fd == -1) {
|
|
+ fprintf(stderr, "kvm_create_vm: %m\n");
|
|
+ return -1;
|
|
+ }
|
|
+ kvm->vm_fd = fd;
|
|
+
|
|
+ /* 640K should be enough. */
|
|
+ r = ioctl(fd, KVM_SET_MEMORY_REGION, &low_memory);
|
|
+ if (r == -1) {
|
|
+ fprintf(stderr, "kvm_create_memory_region: %m\n");
|
|
+ return -1;
|
|
+ }
|
|
+ if (extended_memory.memory_size) {
|
|
+ r = ioctl(fd, KVM_SET_MEMORY_REGION, &extended_memory);
|
|
+ if (r == -1) {
|
|
+ fprintf(stderr, "kvm_create_memory_region: %m\n");
|
|
+ return -1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ kvm_memory_region_save_params(kvm, &low_memory);
|
|
+ kvm_memory_region_save_params(kvm, &extended_memory);
|
|
+
|
|
+ *vm_mem = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
|
|
+ if (*vm_mem == MAP_FAILED) {
|
|
+ fprintf(stderr, "mmap: %m\n");
|
|
+ return -1;
|
|
+ }
|
|
+ kvm->physical_memory = *vm_mem;
|
|
+
|
|
+ r = ioctl(fd, KVM_CREATE_VCPU, 0);
|
|
+ if (r == -1) {
|
|
+ fprintf(stderr, "kvm_create_vcpu: %m\n");
|
|
+ return -1;
|
|
+ }
|
|
+ kvm->vcpu_fd[0] = r;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start,
|
|
+ unsigned long len, int slot, int log, int writable)
|
|
+{
|
|
+ void *ptr;
|
|
+ int r;
|
|
+ int fd = kvm->vm_fd;
|
|
+ int prot = PROT_READ;
|
|
+ struct kvm_memory_region memory = {
|
|
+ .slot = slot,
|
|
+ .memory_size = len,
|
|
+ .guest_phys_addr = phys_start,
|
|
+ .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
|
|
+ };
|
|
+
|
|
+ r = ioctl(fd, KVM_SET_MEMORY_REGION, &memory);
|
|
+ if (r == -1)
|
|
+ return 0;
|
|
+
|
|
+ kvm_memory_region_save_params(kvm, &memory);
|
|
+
|
|
+ if (writable)
|
|
+ prot |= PROT_WRITE;
|
|
+
|
|
+ ptr = mmap(0, len, prot, MAP_SHARED, fd, phys_start);
|
|
+ if (ptr == MAP_FAILED)
|
|
+ return 0;
|
|
+ return ptr;
|
|
+}
|
|
+
|
|
+void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start,
|
|
+ unsigned long len)
|
|
+{
|
|
+ //for each memory region in (phys_start, phys_start+len) do
|
|
+ // kvm_memory_region_clear_params(kvm, region);
|
|
+ kvm_memory_region_clear_params(kvm, 0); /* avoid compiler warning */
|
|
+ printf("kvm_destroy_phys_mem: implement me\n");
|
|
+ exit(1);
|
|
+}
|
|
+
|
|
+
|
|
+int kvm_get_dirty_pages(kvm_context_t kvm, int slot, void *buf)
|
|
+{
|
|
+ int r;
|
|
+ struct kvm_dirty_log log = {
|
|
+ .slot = slot,
|
|
+ };
|
|
+
|
|
+ log.dirty_bitmap = buf;
|
|
+
|
|
+ r = ioctl(kvm->vm_fd, KVM_GET_DIRTY_LOG, &log);
|
|
+ if (r == -1)
|
|
+ return -errno;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int more_io(struct kvm_run *run, int first_time)
|
|
+{
|
|
+ if (!run->io.rep)
|
|
+ return first_time;
|
|
+ else
|
|
+ return run->io.count != 0;
|
|
+}
|
|
+
|
|
+static int handle_io(kvm_context_t kvm, struct kvm_run *run, int vcpu)
|
|
+{
|
|
+ uint16_t addr = run->io.port;
|
|
+ struct kvm_regs regs;
|
|
+ int first_time = 1;
|
|
+ int delta;
|
|
+ struct translation_cache tr;
|
|
+ int _in = (run->io.direction == KVM_EXIT_IO_IN);
|
|
+ int r;
|
|
+
|
|
+ translation_cache_init(&tr);
|
|
+
|
|
+ if (run->io.string || _in) {
|
|
+ r = ioctl(kvm->vcpu_fd[vcpu], KVM_GET_REGS, ®s);
|
|
+ if (r == -1)
|
|
+ return -errno;
|
|
+ }
|
|
+
|
|
+ delta = run->io.string_down ? -run->io.size : run->io.size;
|
|
+
|
|
+ while (more_io(run, first_time)) {
|
|
+ void *value_addr;
|
|
+
|
|
+ if (!run->io.string) {
|
|
+ if (_in)
|
|
+ value_addr = ®s.rax;
|
|
+ else
|
|
+ value_addr = &run->io.value;
|
|
+ } else {
|
|
+ r = translate(kvm, vcpu, &tr, run->io.address,
|
|
+ &value_addr);
|
|
+ if (r) {
|
|
+ fprintf(stderr, "failed translating I/O address %llx\n",
|
|
+ run->io.address);
|
|
+ return r;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ switch (run->io.direction) {
|
|
+ case KVM_EXIT_IO_IN: {
|
|
+ switch (run->io.size) {
|
|
+ case 1: {
|
|
+ uint8_t value;
|
|
+ r = kvm->callbacks->inb(kvm->opaque, addr, &value);
|
|
+ *(uint8_t *)value_addr = value;
|
|
+ break;
|
|
+ }
|
|
+ case 2: {
|
|
+ uint16_t value;
|
|
+ r = kvm->callbacks->inw(kvm->opaque, addr, &value);
|
|
+ *(uint16_t *)value_addr = value;
|
|
+ break;
|
|
+ }
|
|
+ case 4: {
|
|
+ uint32_t value;
|
|
+ r = kvm->callbacks->inl(kvm->opaque, addr, &value);
|
|
+ *(uint32_t *)value_addr = value;
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ fprintf(stderr, "bad I/O size %d\n", run->io.size);
|
|
+ return -EMSGSIZE;
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ case KVM_EXIT_IO_OUT:
|
|
+ switch (run->io.size) {
|
|
+ case 1:
|
|
+ r = kvm->callbacks->outb(kvm->opaque, addr,
|
|
+ *(uint8_t *)value_addr);
|
|
+ break;
|
|
+ case 2:
|
|
+ r = kvm->callbacks->outw(kvm->opaque, addr,
|
|
+ *(uint16_t *)value_addr);
|
|
+ break;
|
|
+ case 4:
|
|
+ r = kvm->callbacks->outl(kvm->opaque, addr,
|
|
+ *(uint32_t *)value_addr);
|
|
+ break;
|
|
+ default:
|
|
+ fprintf(stderr, "bad I/O size %d\n", run->io.size);
|
|
+ return -EMSGSIZE;
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
|
|
+ return -EPROTO;
|
|
+ }
|
|
+ if (run->io.string) {
|
|
+ run->io.address += delta;
|
|
+ switch (run->io.direction) {
|
|
+ case KVM_EXIT_IO_IN: regs.rdi += delta; break;
|
|
+ case KVM_EXIT_IO_OUT: regs.rsi += delta; break;
|
|
+ }
|
|
+ if (run->io.rep) {
|
|
+ --regs.rcx;
|
|
+ --run->io.count;
|
|
+ }
|
|
+ }
|
|
+ first_time = 0;
|
|
+ if (r) {
|
|
+ int savedret = r;
|
|
+ r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_REGS, ®s);
|
|
+ if (r == -1)
|
|
+ return -errno;
|
|
+
|
|
+ return savedret;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (run->io.string || _in) {
|
|
+ r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_REGS, ®s);
|
|
+ if (r == -1)
|
|
+ return -errno;
|
|
+
|
|
+ }
|
|
+
|
|
+ run->emulated = 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int handle_debug(kvm_context_t kvm, struct kvm_run *run, int vcpu)
|
|
+{
|
|
+ return kvm->callbacks->debug(kvm->opaque, vcpu);
|
|
+}
|
|
+
|
|
+int kvm_get_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
|
|
+{
|
|
+ return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_REGS, regs);
|
|
+}
|
|
+
|
|
+int kvm_set_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
|
|
+{
|
|
+ return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_REGS, regs);
|
|
+}
|
|
+
|
|
+int kvm_get_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
|
|
+{
|
|
+ return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_SREGS, sregs);
|
|
+}
|
|
+
|
|
+int kvm_set_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
|
|
+{
|
|
+ return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SREGS, sregs);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Returns available msr list. User must free.
|
|
+ */
|
|
+struct kvm_msr_list *kvm_get_msr_list(kvm_context_t kvm)
|
|
+{
|
|
+ struct kvm_msr_list sizer, *msrs;
|
|
+ int r, e;
|
|
+
|
|
+ sizer.nmsrs = 0;
|
|
+ r = ioctl(kvm->fd, KVM_GET_MSR_INDEX_LIST, &sizer);
|
|
+ if (r == -1 && errno != E2BIG)
|
|
+ return 0;
|
|
+ msrs = malloc(sizeof *msrs + sizer.nmsrs * sizeof *msrs->indices);
|
|
+ if (!msrs) {
|
|
+ errno = ENOMEM;
|
|
+ return 0;
|
|
+ }
|
|
+ msrs->nmsrs = sizer.nmsrs;
|
|
+ r = ioctl(kvm->fd, KVM_GET_MSR_INDEX_LIST, msrs);
|
|
+ if (r == -1) {
|
|
+ e = errno;
|
|
+ free(msrs);
|
|
+ errno = e;
|
|
+ return 0;
|
|
+ }
|
|
+ return msrs;
|
|
+}
|
|
+
|
|
+int kvm_get_msrs(kvm_context_t kvm, int vcpu, struct kvm_msr_entry *msrs,
|
|
+ int n)
|
|
+{
|
|
+ struct kvm_msrs *kmsrs = malloc(sizeof *kmsrs + n * sizeof *msrs);
|
|
+ int r, e;
|
|
+
|
|
+ if (!kmsrs) {
|
|
+ errno = ENOMEM;
|
|
+ return -1;
|
|
+ }
|
|
+ kmsrs->nmsrs = n;
|
|
+ memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
|
|
+ r = ioctl(kvm->vcpu_fd[vcpu], KVM_GET_MSRS, kmsrs);
|
|
+ e = errno;
|
|
+ memcpy(msrs, kmsrs->entries, n * sizeof *msrs);
|
|
+ free(kmsrs);
|
|
+ errno = e;
|
|
+ return r;
|
|
+}
|
|
+
|
|
+int kvm_set_msrs(kvm_context_t kvm, int vcpu, struct kvm_msr_entry *msrs,
|
|
+ int n)
|
|
+{
|
|
+ struct kvm_msrs *kmsrs = malloc(sizeof *kmsrs + n * sizeof *msrs);
|
|
+ int r, e;
|
|
+
|
|
+ if (!kmsrs) {
|
|
+ errno = ENOMEM;
|
|
+ return -1;
|
|
+ }
|
|
+ kmsrs->nmsrs = n;
|
|
+ memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
|
|
+ r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_MSRS, kmsrs);
|
|
+ e = errno;
|
|
+ free(kmsrs);
|
|
+ errno = e;
|
|
+ return r;
|
|
+}
|
|
+
|
|
+static void print_seg(FILE *file, const char *name, struct kvm_segment *seg)
|
|
+{
|
|
+ fprintf(stderr,
|
|
+ "%s %04x (%08llx/%08x p %d dpl %d db %d s %d type %x l %d"
|
|
+ " g %d avl %d)\n",
|
|
+ name, seg->selector, seg->base, seg->limit, seg->present,
|
|
+ seg->dpl, seg->db, seg->s, seg->type, seg->l, seg->g,
|
|
+ seg->avl);
|
|
+}
|
|
+
|
|
+static void print_dt(FILE *file, const char *name, struct kvm_dtable *dt)
|
|
+{
|
|
+ fprintf(stderr, "%s %llx/%x\n", name, dt->base, dt->limit);
|
|
+}
|
|
+
|
|
+void kvm_show_regs(kvm_context_t kvm, int vcpu)
|
|
+{
|
|
+ int fd = kvm->vcpu_fd[vcpu];
|
|
+ struct kvm_regs regs;
|
|
+ struct kvm_sregs sregs;
|
|
+ int r;
|
|
+
|
|
+ r = ioctl(fd, KVM_GET_REGS, ®s);
|
|
+ if (r == -1) {
|
|
+ perror("KVM_GET_REGS");
|
|
+ return;
|
|
+ }
|
|
+ fprintf(stderr,
|
|
+ "rax %016llx rbx %016llx rcx %016llx rdx %016llx\n"
|
|
+ "rsi %016llx rdi %016llx rsp %016llx rbp %016llx\n"
|
|
+ "r8 %016llx r9 %016llx r10 %016llx r11 %016llx\n"
|
|
+ "r12 %016llx r13 %016llx r14 %016llx r15 %016llx\n"
|
|
+ "rip %016llx rflags %08llx\n",
|
|
+ regs.rax, regs.rbx, regs.rcx, regs.rdx,
|
|
+ regs.rsi, regs.rdi, regs.rsp, regs.rbp,
|
|
+ regs.r8, regs.r9, regs.r10, regs.r11,
|
|
+ regs.r12, regs.r13, regs.r14, regs.r15,
|
|
+ regs.rip, regs.rflags);
|
|
+ r = ioctl(fd, KVM_GET_SREGS, &sregs);
|
|
+ if (r == -1) {
|
|
+ perror("KVM_GET_SREGS");
|
|
+ return;
|
|
+ }
|
|
+ print_seg(stderr, "cs", &sregs.cs);
|
|
+ print_seg(stderr, "ds", &sregs.ds);
|
|
+ print_seg(stderr, "es", &sregs.es);
|
|
+ print_seg(stderr, "ss", &sregs.ss);
|
|
+ print_seg(stderr, "fs", &sregs.fs);
|
|
+ print_seg(stderr, "gs", &sregs.gs);
|
|
+ print_seg(stderr, "tr", &sregs.tr);
|
|
+ print_seg(stderr, "ldt", &sregs.ldt);
|
|
+ print_dt(stderr, "gdt", &sregs.gdt);
|
|
+ print_dt(stderr, "idt", &sregs.idt);
|
|
+ fprintf(stderr, "cr0 %llx cr2 %llx cr3 %llx cr4 %llx cr8 %llx"
|
|
+ " efer %llx\n",
|
|
+ sregs.cr0, sregs.cr2, sregs.cr3, sregs.cr4, sregs.cr8,
|
|
+ sregs.efer);
|
|
+}
|
|
+
|
|
+static int handle_cpuid(kvm_context_t kvm, struct kvm_run *run, int vcpu)
|
|
+{
|
|
+ struct kvm_regs regs;
|
|
+ uint32_t orig_eax;
|
|
+ uint64_t rax, rbx, rcx, rdx;
|
|
+ int r;
|
|
+
|
|
+ kvm_get_regs(kvm, vcpu, ®s);
|
|
+ orig_eax = regs.rax;
|
|
+ rax = regs.rax;
|
|
+ rbx = regs.rbx;
|
|
+ rcx = regs.rcx;
|
|
+ rdx = regs.rdx;
|
|
+ r = kvm->callbacks->cpuid(kvm->opaque, &rax, &rbx, &rcx, &rdx);
|
|
+ regs.rax = rax;
|
|
+ regs.rbx = rbx;
|
|
+ regs.rcx = rcx;
|
|
+ regs.rdx = rdx;
|
|
+ if (orig_eax == 1)
|
|
+ regs.rdx &= ~(1ull << 12); /* disable mtrr support */
|
|
+ kvm_set_regs(kvm, vcpu, ®s);
|
|
+ run->emulated = 1;
|
|
+ return r;
|
|
+}
|
|
+
|
|
+static int handle_mmio(kvm_context_t kvm, struct kvm_run *kvm_run)
|
|
+{
|
|
+ unsigned long addr = kvm_run->mmio.phys_addr;
|
|
+ void *data = kvm_run->mmio.data;
|
|
+ int r = -1;
|
|
+
|
|
+ if (kvm_run->mmio.is_write) {
|
|
+ switch (kvm_run->mmio.len) {
|
|
+ case 1:
|
|
+ r = kvm->callbacks->writeb(kvm->opaque, addr, *(uint8_t *)data);
|
|
+ break;
|
|
+ case 2:
|
|
+ r = kvm->callbacks->writew(kvm->opaque, addr, *(uint16_t *)data);
|
|
+ break;
|
|
+ case 4:
|
|
+ r = kvm->callbacks->writel(kvm->opaque, addr, *(uint32_t *)data);
|
|
+ break;
|
|
+ case 8:
|
|
+ r = kvm->callbacks->writeq(kvm->opaque, addr, *(uint64_t *)data);
|
|
+ break;
|
|
+ }
|
|
+ } else {
|
|
+ switch (kvm_run->mmio.len) {
|
|
+ case 1:
|
|
+ r = kvm->callbacks->readb(kvm->opaque, addr, (uint8_t *)data);
|
|
+ break;
|
|
+ case 2:
|
|
+ r = kvm->callbacks->readw(kvm->opaque, addr, (uint16_t *)data);
|
|
+ break;
|
|
+ case 4:
|
|
+ r = kvm->callbacks->readl(kvm->opaque, addr, (uint32_t *)data);
|
|
+ break;
|
|
+ case 8:
|
|
+ r = kvm->callbacks->readq(kvm->opaque, addr, (uint64_t *)data);
|
|
+ break;
|
|
+ }
|
|
+ kvm_run->mmio_completed = 1;
|
|
+ }
|
|
+ return r;
|
|
+}
|
|
+
|
|
+static int handle_io_window(kvm_context_t kvm, struct kvm_run *kvm_run)
|
|
+{
|
|
+ return kvm->callbacks->io_window(kvm->opaque);
|
|
+}
|
|
+
|
|
+static int handle_halt(kvm_context_t kvm, struct kvm_run *kvm_run, int vcpu)
|
|
+{
|
|
+ return kvm->callbacks->halt(kvm->opaque, vcpu);
|
|
+}
|
|
+
|
|
+static int handle_shutdown(kvm_context_t kvm, struct kvm_run *kvm_run,
|
|
+ int vcpu)
|
|
+{
|
|
+ return kvm->callbacks->shutdown(kvm->opaque, vcpu);
|
|
+}
|
|
+
|
|
+int try_push_interrupts(kvm_context_t kvm)
|
|
+{
|
|
+ return kvm->callbacks->try_push_interrupts(kvm->opaque);
|
|
+}
|
|
+
|
|
+static void post_kvm_run(kvm_context_t kvm, struct kvm_run *kvm_run)
|
|
+{
|
|
+ kvm->callbacks->post_kvm_run(kvm->opaque, kvm_run);
|
|
+}
|
|
+
|
|
+static void pre_kvm_run(kvm_context_t kvm, struct kvm_run *kvm_run)
|
|
+{
|
|
+ kvm->callbacks->pre_kvm_run(kvm->opaque, kvm_run);
|
|
+}
|
|
+
|
|
+int kvm_run(kvm_context_t kvm, int vcpu)
|
|
+{
|
|
+ int r;
|
|
+ int fd = kvm->vcpu_fd[vcpu];
|
|
+ struct kvm_run kvm_run = {
|
|
+ .emulated = 0,
|
|
+ .mmio_completed = 0,
|
|
+ };
|
|
+
|
|
+again:
|
|
+ kvm_run.request_interrupt_window = try_push_interrupts(kvm);
|
|
+ pre_kvm_run(kvm, &kvm_run);
|
|
+ r = ioctl(fd, KVM_RUN, &kvm_run);
|
|
+ post_kvm_run(kvm, &kvm_run);
|
|
+
|
|
+ kvm_run.emulated = 0;
|
|
+ kvm_run.mmio_completed = 0;
|
|
+ if (r == -1 && errno != EINTR) {
|
|
+ r = -errno;
|
|
+ printf("kvm_run: %m\n");
|
|
+ return r;
|
|
+ }
|
|
+ if (r == -1) {
|
|
+ r = handle_io_window(kvm, &kvm_run);
|
|
+ goto more;
|
|
+ }
|
|
+ switch (kvm_run.exit_type) {
|
|
+ case KVM_EXIT_TYPE_FAIL_ENTRY:
|
|
+ fprintf(stderr, "kvm_run: failed entry, reason %u\n",
|
|
+ kvm_run.exit_reason & 0xffff);
|
|
+ return -ENOEXEC;
|
|
+ break;
|
|
+ case KVM_EXIT_TYPE_VM_EXIT:
|
|
+ switch (kvm_run.exit_reason) {
|
|
+ case KVM_EXIT_UNKNOWN:
|
|
+ fprintf(stderr, "unhandled vm exit: 0x%x\n",
|
|
+ kvm_run.hw.hardware_exit_reason);
|
|
+ kvm_show_regs(kvm, vcpu);
|
|
+ abort();
|
|
+ break;
|
|
+ case KVM_EXIT_EXCEPTION:
|
|
+ fprintf(stderr, "exception %d (%x)\n",
|
|
+ kvm_run.ex.exception,
|
|
+ kvm_run.ex.error_code);
|
|
+ kvm_show_regs(kvm, vcpu);
|
|
+ abort();
|
|
+ break;
|
|
+ case KVM_EXIT_IO:
|
|
+ r = handle_io(kvm, &kvm_run, vcpu);
|
|
+ break;
|
|
+ case KVM_EXIT_CPUID:
|
|
+ r = handle_cpuid(kvm, &kvm_run, vcpu);
|
|
+ break;
|
|
+ case KVM_EXIT_DEBUG:
|
|
+ r = handle_debug(kvm, &kvm_run, vcpu);
|
|
+ break;
|
|
+ case KVM_EXIT_MMIO:
|
|
+ r = handle_mmio(kvm, &kvm_run);
|
|
+ break;
|
|
+ case KVM_EXIT_HLT:
|
|
+ r = handle_halt(kvm, &kvm_run, vcpu);
|
|
+ break;
|
|
+ case KVM_EXIT_IRQ_WINDOW_OPEN:
|
|
+ break;
|
|
+ case KVM_EXIT_SHUTDOWN:
|
|
+ r = handle_shutdown(kvm, &kvm_run, vcpu);
|
|
+ break;
|
|
+ default:
|
|
+ fprintf(stderr, "unhandled vm exit: 0x%x\n", kvm_run.exit_reason);
|
|
+ kvm_show_regs(kvm, vcpu);
|
|
+ abort();
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+more:
|
|
+ if (!r)
|
|
+ goto again;
|
|
+ return r;
|
|
+}
|
|
+
|
|
+int kvm_inject_irq(kvm_context_t kvm, int vcpu, unsigned irq)
|
|
+{
|
|
+ struct kvm_interrupt intr;
|
|
+
|
|
+ intr.irq = irq;
|
|
+ return ioctl(kvm->vcpu_fd[vcpu], KVM_INTERRUPT, &intr);
|
|
+}
|
|
+
|
|
+int kvm_guest_debug(kvm_context_t kvm, int vcpu, struct kvm_debug_guest *dbg)
|
|
+{
|
|
+ return ioctl(kvm->vcpu_fd[vcpu], KVM_DEBUG_GUEST, dbg);
|
|
+}
|
|
--- qemu-0.9.0/kvmctl.h
|
|
+++ qemu-0.9.0/kvmctl.h
|
|
@@ -0,0 +1,269 @@
|
|
+/** \file kvmctl.h
|
|
+ * libkvm API
|
|
+ */
|
|
+
|
|
+#ifndef KVMCTL_H
|
|
+#define KVMCTL_H
|
|
+
|
|
+#define __user /* temporary, until installed via make headers_install */
|
|
+#include "kvm.h"
|
|
+#include <stdint.h>
|
|
+
|
|
+struct kvm_context;
|
|
+
|
|
+typedef struct kvm_context *kvm_context_t;
|
|
+
|
|
+/*!
|
|
+ * \brief KVM callbacks structure
|
|
+ *
|
|
+ * This structure holds pointers to various functions that KVM will call
|
|
+ * when it encounters something that cannot be virtualized, such as
|
|
+ * accessing hardware devices via MMIO or regular IO.
|
|
+ */
|
|
+struct kvm_callbacks {
|
|
+ int (*cpuid)(void *opaque,
|
|
+ uint64_t *rax, uint64_t *rbx, uint64_t *rcx, uint64_t *rdx);
|
|
+ /// For 8bit IO reads from the guest (Usually when executing 'inb')
|
|
+ int (*inb)(void *opaque, uint16_t addr, uint8_t *data);
|
|
+ /// For 16bit IO reads from the guest (Usually when executing 'inw')
|
|
+ int (*inw)(void *opaque, uint16_t addr, uint16_t *data);
|
|
+ /// For 32bit IO reads from the guest (Usually when executing 'inl')
|
|
+ int (*inl)(void *opaque, uint16_t addr, uint32_t *data);
|
|
+ /// For 8bit IO writes from the guest (Usually when executing 'outb')
|
|
+ int (*outb)(void *opaque, uint16_t addr, uint8_t data);
|
|
+ /// For 16bit IO writes from the guest (Usually when executing 'outw')
|
|
+ int (*outw)(void *opaque, uint16_t addr, uint16_t data);
|
|
+ /// For 32bit IO writes from the guest (Usually when executing 'outl')
|
|
+ int (*outl)(void *opaque, uint16_t addr, uint32_t data);
|
|
+ /// For 8bit memory reads from unmapped memory (For MMIO devices)
|
|
+ int (*readb)(void *opaque, uint64_t addr, uint8_t *data);
|
|
+ /// For 16bit memory reads from unmapped memory (For MMIO devices)
|
|
+ int (*readw)(void *opaque, uint64_t addr, uint16_t *data);
|
|
+ /// For 32bit memory reads from unmapped memory (For MMIO devices)
|
|
+ int (*readl)(void *opaque, uint64_t addr, uint32_t *data);
|
|
+ /// For 64bit memory reads from unmapped memory (For MMIO devices)
|
|
+ int (*readq)(void *opaque, uint64_t addr, uint64_t *data);
|
|
+ /// For 8bit memory writes to unmapped memory (For MMIO devices)
|
|
+ int (*writeb)(void *opaque, uint64_t addr, uint8_t data);
|
|
+ /// For 16bit memory writes to unmapped memory (For MMIO devices)
|
|
+ int (*writew)(void *opaque, uint64_t addr, uint16_t data);
|
|
+ /// For 32bit memory writes to unmapped memory (For MMIO devices)
|
|
+ int (*writel)(void *opaque, uint64_t addr, uint32_t data);
|
|
+ /// For 64bit memory writes to unmapped memory (For MMIO devices)
|
|
+ int (*writeq)(void *opaque, uint64_t addr, uint64_t data);
|
|
+ int (*debug)(void *opaque, int vcpu);
|
|
+ /*!
|
|
+ * \brief Called when the VCPU issues an 'hlt' instruction.
|
|
+ *
|
|
+ * Typically, you should yeild here to prevent 100% CPU utilization
|
|
+ * on the host CPU.
|
|
+ */
|
|
+ int (*halt)(void *opaque, int vcpu);
|
|
+ int (*shutdown)(void *opaque, int vcpu);
|
|
+ int (*io_window)(void *opaque);
|
|
+ int (*try_push_interrupts)(void *opaque);
|
|
+ void (*post_kvm_run)(void *opaque, struct kvm_run *kvm_run);
|
|
+ void (*pre_kvm_run)(void *opaque, struct kvm_run *kvm_run);
|
|
+};
|
|
+
|
|
+/*!
|
|
+ * \brief Create new KVM context
|
|
+ *
|
|
+ * This creates a new kvm_context. A KVM context is a small area of data that
|
|
+ * holds information about the KVM instance that gets created by this call.\n
|
|
+ * This should always be your first call to KVM.
|
|
+ *
|
|
+ * \param callbacks Pointer to a valid kvm_callbacks structure
|
|
+ * \param opaque Not used
|
|
+ * \return NULL on failure
|
|
+ */
|
|
+kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
|
|
+ void *opaque);
|
|
+
|
|
+/*!
|
|
+ * \brief Cleanup the KVM context
|
|
+ *
|
|
+ * Should always be called when closing down KVM.\n
|
|
+ * Exception: If kvm_init() fails, this function should not be called, as the
|
|
+ * context would be invalid
|
|
+ *
|
|
+ * \param kvm Pointer to the kvm_context that is to be freed
|
|
+ */
|
|
+void kvm_finalize(kvm_context_t kvm);
|
|
+
|
|
+/*!
|
|
+ * \brief Create new virtual machine
|
|
+ *
|
|
+ * This creates a new virtual machine, maps physical RAM to it, and creates a
|
|
+ * virtual CPU for it.\n
|
|
+ * \n
|
|
+ * Memory gets mapped for addresses 0->0xA0000, 0xC0000->phys_mem_bytes
|
|
+ *
|
|
+ * \param kvm Pointer to the current kvm_context
|
|
+ * \param phys_mem_bytes The amount of physical ram you want the VM to have
|
|
+ * \param phys_mem This pointer will be set to point to the memory that
|
|
+ * kvm_create allocates for physical RAM
|
|
+ * \return 0 on success
|
|
+ */
|
|
+int kvm_create(kvm_context_t kvm,
|
|
+ unsigned long phys_mem_bytes,
|
|
+ void **phys_mem);
|
|
+
|
|
+/*!
|
|
+ * \brief Start the VCPU
|
|
+ *
|
|
+ * This starts the VCPU and virtualization is started.\n
|
|
+ * \n
|
|
+ * This function will not return until any of these conditions are met:
|
|
+ * - An IO/MMIO handler does not return "0"
|
|
+ * - An exception that neither the guest OS, nor KVM can handle occurs
|
|
+ *
|
|
+ * \note This function will call the callbacks registered in kvm_init()
|
|
+ * to emulate those functions
|
|
+ * \note If you at any point want to interrupt the VCPU, kvm_run() will
|
|
+ * listen to the EINTR signal. This allows you to simulate external interrupts
|
|
+ * and asyncronous IO.
|
|
+ *
|
|
+ * \param kvm Pointer to the current kvm_context
|
|
+ * \param vcpu Which virtual CPU should be started
|
|
+ * \return 0 on success, but you really shouldn't expect this function to
|
|
+ * return except for when an error has occured, or when you have sent it
|
|
+ * an EINTR signal.
|
|
+ */
|
|
+int kvm_run(kvm_context_t kvm, int vcpu);
|
|
+
|
|
+/*!
|
|
+ * \brief Read VCPU registers
|
|
+ *
|
|
+ * This gets the GP registers from the VCPU and outputs them
|
|
+ * into a kvm_regs structure
|
|
+ *
|
|
+ * \note This function returns a \b copy of the VCPUs registers.\n
|
|
+ * If you wish to modify the VCPUs GP registers, you should call kvm_set_regs()
|
|
+ *
|
|
+ * \param kvm Pointer to the current kvm_context
|
|
+ * \param vcpu Which virtual CPU should get dumped
|
|
+ * \param regs Pointer to a kvm_regs which will be populated with the VCPUs
|
|
+ * registers values
|
|
+ * \return 0 on success
|
|
+ */
|
|
+int kvm_get_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs);
|
|
+
|
|
+/*!
|
|
+ * \brief Write VCPU registers
|
|
+ *
|
|
+ * This sets the GP registers on the VCPU from a kvm_regs structure
|
|
+ *
|
|
+ * \note When this function returns, the regs pointer and the data it points to
|
|
+ * can be discarded
|
|
+ * \param kvm Pointer to the current kvm_context
|
|
+ * \param vcpu Which virtual CPU should get dumped
|
|
+ * \param regs Pointer to a kvm_regs which will be populated with the VCPUs
|
|
+ * registers values
|
|
+ * \return 0 on success
|
|
+ */
|
|
+int kvm_set_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs);
|
|
+
|
|
+/*!
|
|
+ * \brief Read VCPU system registers
|
|
+ *
|
|
+ * This gets the non-GP registers from the VCPU and outputs them
|
|
+ * into a kvm_sregs structure
|
|
+ *
|
|
+ * \note This function returns a \b copy of the VCPUs registers.\n
|
|
+ * If you wish to modify the VCPUs non-GP registers, you should call
|
|
+ * kvm_set_sregs()
|
|
+ *
|
|
+ * \param kvm Pointer to the current kvm_context
|
|
+ * \param vcpu Which virtual CPU should get dumped
|
|
+ * \param regs Pointer to a kvm_sregs which will be populated with the VCPUs
|
|
+ * registers values
|
|
+ * \return 0 on success
|
|
+ */
|
|
+int kvm_get_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *regs);
|
|
+
|
|
+/*!
|
|
+ * \brief Write VCPU system registers
|
|
+ *
|
|
+ * This sets the non-GP registers on the VCPU from a kvm_sregs structure
|
|
+ *
|
|
+ * \note When this function returns, the regs pointer and the data it points to
|
|
+ * can be discarded
|
|
+ * \param kvm Pointer to the current kvm_context
|
|
+ * \param vcpu Which virtual CPU should get dumped
|
|
+ * \param regs Pointer to a kvm_sregs which will be populated with the VCPUs
|
|
+ * registers values
|
|
+ * \return 0 on success
|
|
+ */
|
|
+int kvm_set_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *regs);
|
|
+
|
|
+struct kvm_msr_list *kvm_get_msr_list(kvm_context_t);
|
|
+int kvm_get_msrs(kvm_context_t, int vcpu, struct kvm_msr_entry *msrs, int n);
|
|
+int kvm_set_msrs(kvm_context_t, int vcpu, struct kvm_msr_entry *msrs, int n);
|
|
+
|
|
+/*!
|
|
+ * \brief Simulate an external vectored interrupt
|
|
+ *
|
|
+ * This allows you to simulate an external vectored interrupt.
|
|
+ *
|
|
+ * \param kvm Pointer to the current kvm_context
|
|
+ * \param vcpu Which virtual CPU should get dumped
|
|
+ * \param irq Vector number
|
|
+ * \return 0 on success
|
|
+ */
|
|
+int kvm_inject_irq(kvm_context_t kvm, int vcpu, unsigned irq);
|
|
+int kvm_guest_debug(kvm_context_t, int vcpu, struct kvm_debug_guest *dbg);
|
|
+
|
|
+/*!
|
|
+ * \brief Dump all VCPU information
|
|
+ *
|
|
+ * This dumps \b all the information that KVM has about a virtual CPU, namely:
|
|
+ * - GP Registers
|
|
+ * - System registers (selectors, descriptors, etc)
|
|
+ * - VMCS Data
|
|
+ * - MSRS
|
|
+ * - Pending interrupts
|
|
+ *
|
|
+ * \param kvm Pointer to the current kvm_context
|
|
+ * \param vcpu Which virtual CPU should get dumped
|
|
+ * \return 0 on success
|
|
+ */
|
|
+int kvm_dump_vcpu(kvm_context_t kvm, int vcpu);
|
|
+
|
|
+/*!
|
|
+ * \brief Dump VCPU registers
|
|
+ *
|
|
+ * This dumps some of the information that KVM has about a virtual CPU, namely:
|
|
+ * - GP Registers
|
|
+ *
|
|
+ * A much more verbose version of this is available as kvm_dump_vcpu()
|
|
+ *
|
|
+ * \param kvm Pointer to the current kvm_context
|
|
+ * \param vcpu Which virtual CPU should get dumped
|
|
+ * \return 0 on success
|
|
+ */
|
|
+void kvm_show_regs(kvm_context_t kvm, int vcpu);
|
|
+
|
|
+void *kvm_create_phys_mem(kvm_context_t, unsigned long phys_start,
|
|
+ unsigned long len, int slot, int log, int writable);
|
|
+void kvm_destroy_phys_mem(kvm_context_t, unsigned long phys_start,
|
|
+ unsigned long len);
|
|
+int kvm_get_dirty_pages(kvm_context_t, int slot, void *buf);
|
|
+
|
|
+/*!
|
|
+ * \brief Enable dirty-pages-logging for all memory regions
|
|
+ *
|
|
+ * \param kvm Pointer to the current kvm_context
|
|
+ */
|
|
+int kvm_dirty_pages_log_enable_all(kvm_context_t kvm);
|
|
+
|
|
+/*!
|
|
+ * \brief Disable dirty-page-logging for some memory regions
|
|
+ *
|
|
+ * Disable dirty-pages-logging for those memory regions that were
|
|
+ * created with dirty-page-logging disabled.
|
|
+ *
|
|
+ * \param kvm Pointer to the current kvm_context
|
|
+ */
|
|
+int kvm_dirty_pages_log_reset(kvm_context_t kvm);
|
|
+#endif
|
|
--- qemu-0.9.0/migration.c
|
|
+++ qemu-0.9.0/migration.c
|
|
@@ -24,6 +24,9 @@
|
|
|
|
#include "vl.h"
|
|
#include "qemu_socket.h"
|
|
+#ifdef USE_KVM
|
|
+#include "qemu-kvm.h"
|
|
+#endif
|
|
|
|
#include <sys/wait.h>
|
|
|
|
@@ -172,6 +175,10 @@
|
|
int dirty_count = 0;
|
|
|
|
for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed && (addr>=0xa0000) && (addr<0xc0000)) /* do not access video-addresses */
|
|
+ continue;
|
|
+#endif
|
|
if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
|
|
dirty_count++;
|
|
}
|
|
@@ -186,6 +193,11 @@
|
|
if (migrate_write_buffer(s))
|
|
return;
|
|
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed && !*s->has_error)
|
|
+ *s->has_error = kvm_update_dirty_pages_log();
|
|
+#endif
|
|
+
|
|
if (migrate_check_convergence(s) || *s->has_error) {
|
|
qemu_del_timer(s->timer);
|
|
qemu_free_timer(s->timer);
|
|
@@ -195,6 +207,11 @@
|
|
}
|
|
|
|
while (s->addr < phys_ram_size) {
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed && (s->addr>=0xa0000) && (s->addr<0xc0000)) /* do not access video-addresses */
|
|
+ s->addr = 0xc0000;
|
|
+#endif
|
|
+
|
|
if (cpu_physical_memory_get_dirty(s->addr, MIGRATION_DIRTY_FLAG)) {
|
|
uint32_t value = cpu_to_be32(s->addr);
|
|
|
|
@@ -254,6 +271,10 @@
|
|
fcntl(s->fd, F_SETFL, O_NONBLOCK);
|
|
|
|
for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed && (addr>=0xa0000) && (addr<0xc0000)) /* do not access video-addresses */
|
|
+ continue;
|
|
+#endif
|
|
if (!cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
|
|
cpu_physical_memory_set_dirty(addr);
|
|
}
|
|
@@ -723,6 +744,10 @@
|
|
unsigned int sum;
|
|
|
|
for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed && (addr>=0xa0000) && (addr<0xc0000)) /* do not access video-addresses */
|
|
+ continue;
|
|
+#endif
|
|
sum = calc_page_checksum(addr);
|
|
qemu_put_be32(f, addr);
|
|
qemu_put_be32(f, sum);
|
|
@@ -737,6 +762,10 @@
|
|
int num_errors = 0;
|
|
|
|
for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed && (addr>=0xa0000) && (addr<0xc0000)) /* do not access video-addresses */
|
|
+ continue;
|
|
+#endif
|
|
sum = calc_page_checksum(addr);
|
|
raddr = qemu_get_be32(f);
|
|
rsum = qemu_get_be32(f);
|
|
--- qemu-0.9.0/qemu-kvm.c
|
|
+++ qemu-0.9.0/qemu-kvm.c
|
|
@@ -0,0 +1,793 @@
|
|
+
|
|
+#include "config.h"
|
|
+#include "config-host.h"
|
|
+
|
|
+#ifdef USE_KVM
|
|
+
|
|
+#include "exec.h"
|
|
+
|
|
+#include "qemu-kvm.h"
|
|
+#include <kvmctl.h>
|
|
+#include <string.h>
|
|
+
|
|
+#define MSR_IA32_TSC 0x10
|
|
+
|
|
+extern void perror(const char *s);
|
|
+
|
|
+int kvm_allowed = 1;
|
|
+kvm_context_t kvm_context;
|
|
+static struct kvm_msr_list *kvm_msr_list;
|
|
+static int kvm_has_msr_star;
|
|
+
|
|
+#define NR_CPU 16
|
|
+static CPUState *saved_env[NR_CPU];
|
|
+
|
|
+static void set_msr_entry(struct kvm_msr_entry *entry, uint32_t index,
|
|
+ uint64_t data)
|
|
+{
|
|
+ entry->index = index;
|
|
+ entry->data = data;
|
|
+}
|
|
+
|
|
+/* returns 0 on success, non-0 on failure */
|
|
+static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
|
|
+{
|
|
+ switch (entry->index) {
|
|
+ case MSR_IA32_SYSENTER_CS:
|
|
+ env->sysenter_cs = entry->data;
|
|
+ break;
|
|
+ case MSR_IA32_SYSENTER_ESP:
|
|
+ env->sysenter_esp = entry->data;
|
|
+ break;
|
|
+ case MSR_IA32_SYSENTER_EIP:
|
|
+ env->sysenter_eip = entry->data;
|
|
+ break;
|
|
+ case MSR_STAR:
|
|
+ env->star = entry->data;
|
|
+ break;
|
|
+#ifdef TARGET_X86_64
|
|
+ case MSR_CSTAR:
|
|
+ env->cstar = entry->data;
|
|
+ break;
|
|
+ case MSR_KERNELGSBASE:
|
|
+ env->kernelgsbase = entry->data;
|
|
+ break;
|
|
+ case MSR_FMASK:
|
|
+ env->fmask = entry->data;
|
|
+ break;
|
|
+ case MSR_LSTAR:
|
|
+ env->lstar = entry->data;
|
|
+ break;
|
|
+#endif
|
|
+ case MSR_IA32_TSC:
|
|
+ env->tsc = entry->data;
|
|
+ break;
|
|
+ default:
|
|
+ printf("Warning unknown msr index 0x%x\n", entry->index);
|
|
+ return 1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#ifdef TARGET_X86_64
|
|
+#define MSR_COUNT 9
|
|
+#else
|
|
+#define MSR_COUNT 5
|
|
+#endif
|
|
+
|
|
+static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
|
|
+{
|
|
+ lhs->selector = rhs->selector;
|
|
+ lhs->base = rhs->base;
|
|
+ lhs->limit = rhs->limit;
|
|
+ lhs->type = 3;
|
|
+ lhs->present = 1;
|
|
+ lhs->dpl = 3;
|
|
+ lhs->db = 0;
|
|
+ lhs->s = 1;
|
|
+ lhs->l = 0;
|
|
+ lhs->g = 0;
|
|
+ lhs->avl = 0;
|
|
+ lhs->unusable = 0;
|
|
+}
|
|
+
|
|
+static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
|
|
+{
|
|
+ unsigned flags = rhs->flags;
|
|
+ lhs->selector = rhs->selector;
|
|
+ lhs->base = rhs->base;
|
|
+ lhs->limit = rhs->limit;
|
|
+ lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
|
|
+ lhs->present = (flags & DESC_P_MASK) != 0;
|
|
+ lhs->dpl = rhs->selector & 3;
|
|
+ lhs->db = (flags >> DESC_B_SHIFT) & 1;
|
|
+ lhs->s = (flags & DESC_S_MASK) != 0;
|
|
+ lhs->l = (flags >> DESC_L_SHIFT) & 1;
|
|
+ lhs->g = (flags & DESC_G_MASK) != 0;
|
|
+ lhs->avl = (flags & DESC_AVL_MASK) != 0;
|
|
+ lhs->unusable = 0;
|
|
+}
|
|
+
|
|
+static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
|
|
+{
|
|
+ lhs->selector = rhs->selector;
|
|
+ lhs->base = rhs->base;
|
|
+ lhs->limit = rhs->limit;
|
|
+ lhs->flags =
|
|
+ (rhs->type << DESC_TYPE_SHIFT)
|
|
+ | (rhs->present * DESC_P_MASK)
|
|
+ | (rhs->dpl << DESC_DPL_SHIFT)
|
|
+ | (rhs->db << DESC_B_SHIFT)
|
|
+ | (rhs->s * DESC_S_MASK)
|
|
+ | (rhs->l << DESC_L_SHIFT)
|
|
+ | (rhs->g * DESC_G_MASK)
|
|
+ | (rhs->avl * DESC_AVL_MASK);
|
|
+}
|
|
+
|
|
+/* the reset values of qemu are not compatible to SVM
|
|
+ * this function is used to fix the segment descriptor values */
|
|
+static void fix_realmode_dataseg(struct kvm_segment *seg)
|
|
+{
|
|
+ seg->type = 0x02;
|
|
+ seg->present = 1;
|
|
+ seg->s = 1;
|
|
+}
|
|
+
|
|
+static void load_regs(CPUState *env)
|
|
+{
|
|
+ struct kvm_regs regs;
|
|
+ struct kvm_sregs sregs;
|
|
+ struct kvm_msr_entry msrs[MSR_COUNT];
|
|
+ int rc, n;
|
|
+
|
|
+ /* hack: save env */
|
|
+ if (!saved_env[0])
|
|
+ saved_env[0] = env;
|
|
+
|
|
+ regs.rax = env->regs[R_EAX];
|
|
+ regs.rbx = env->regs[R_EBX];
|
|
+ regs.rcx = env->regs[R_ECX];
|
|
+ regs.rdx = env->regs[R_EDX];
|
|
+ regs.rsi = env->regs[R_ESI];
|
|
+ regs.rdi = env->regs[R_EDI];
|
|
+ regs.rsp = env->regs[R_ESP];
|
|
+ regs.rbp = env->regs[R_EBP];
|
|
+#ifdef TARGET_X86_64
|
|
+ regs.r8 = env->regs[8];
|
|
+ regs.r9 = env->regs[9];
|
|
+ regs.r10 = env->regs[10];
|
|
+ regs.r11 = env->regs[11];
|
|
+ regs.r12 = env->regs[12];
|
|
+ regs.r13 = env->regs[13];
|
|
+ regs.r14 = env->regs[14];
|
|
+ regs.r15 = env->regs[15];
|
|
+#endif
|
|
+
|
|
+ regs.rflags = env->eflags;
|
|
+ regs.rip = env->eip;
|
|
+
|
|
+ kvm_set_regs(kvm_context, 0, ®s);
|
|
+
|
|
+ memcpy(sregs.interrupt_bitmap, env->kvm_interrupt_bitmap, sizeof(sregs.interrupt_bitmap));
|
|
+
|
|
+ if ((env->eflags & VM_MASK)) {
|
|
+ set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
|
|
+ set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
|
|
+ set_v8086_seg(&sregs.es, &env->segs[R_ES]);
|
|
+ set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
|
|
+ set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
|
|
+ set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
|
|
+ } else {
|
|
+ set_seg(&sregs.cs, &env->segs[R_CS]);
|
|
+ set_seg(&sregs.ds, &env->segs[R_DS]);
|
|
+ set_seg(&sregs.es, &env->segs[R_ES]);
|
|
+ set_seg(&sregs.fs, &env->segs[R_FS]);
|
|
+ set_seg(&sregs.gs, &env->segs[R_GS]);
|
|
+ set_seg(&sregs.ss, &env->segs[R_SS]);
|
|
+
|
|
+ if (env->cr[0] & CR0_PE_MASK) {
|
|
+ /* force ss cpl to cs cpl */
|
|
+ sregs.ss.selector = (sregs.ss.selector & ~3) |
|
|
+ (sregs.cs.selector & 3);
|
|
+ sregs.ss.dpl = sregs.ss.selector & 3;
|
|
+ }
|
|
+
|
|
+ if (!(env->cr[0] & CR0_PG_MASK)) {
|
|
+ fix_realmode_dataseg(&sregs.ds);
|
|
+ fix_realmode_dataseg(&sregs.es);
|
|
+ fix_realmode_dataseg(&sregs.fs);
|
|
+ fix_realmode_dataseg(&sregs.gs);
|
|
+ fix_realmode_dataseg(&sregs.ss);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ set_seg(&sregs.tr, &env->tr);
|
|
+ set_seg(&sregs.ldt, &env->ldt);
|
|
+
|
|
+ sregs.idt.limit = env->idt.limit;
|
|
+ sregs.idt.base = env->idt.base;
|
|
+ sregs.gdt.limit = env->gdt.limit;
|
|
+ sregs.gdt.base = env->gdt.base;
|
|
+
|
|
+ sregs.cr0 = env->cr[0];
|
|
+ sregs.cr2 = env->cr[2];
|
|
+ sregs.cr3 = env->cr[3];
|
|
+ sregs.cr4 = env->cr[4];
|
|
+
|
|
+ sregs.apic_base = cpu_get_apic_base(env);
|
|
+ sregs.efer = env->efer;
|
|
+ sregs.cr8 = cpu_get_apic_tpr(env);
|
|
+
|
|
+ kvm_set_sregs(kvm_context, 0, &sregs);
|
|
+
|
|
+ /* msrs */
|
|
+ n = 0;
|
|
+ set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
|
|
+ set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
|
|
+ set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
|
|
+ if (kvm_has_msr_star)
|
|
+ set_msr_entry(&msrs[n++], MSR_STAR, env->star);
|
|
+ set_msr_entry(&msrs[n++], MSR_IA32_TSC, env->tsc);
|
|
+#ifdef TARGET_X86_64
|
|
+ set_msr_entry(&msrs[n++], MSR_CSTAR, env->cstar);
|
|
+ set_msr_entry(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
|
|
+ set_msr_entry(&msrs[n++], MSR_FMASK, env->fmask);
|
|
+ set_msr_entry(&msrs[n++], MSR_LSTAR , env->lstar);
|
|
+#endif
|
|
+
|
|
+ rc = kvm_set_msrs(kvm_context, 0, msrs, n);
|
|
+ if (rc == -1)
|
|
+ perror("kvm_set_msrs FAILED");
|
|
+}
|
|
+
|
|
+
|
|
+static void save_regs(CPUState *env)
|
|
+{
|
|
+ struct kvm_regs regs;
|
|
+ struct kvm_sregs sregs;
|
|
+ struct kvm_msr_entry msrs[MSR_COUNT];
|
|
+ uint32_t hflags;
|
|
+ uint32_t i, n, rc;
|
|
+
|
|
+ kvm_get_regs(kvm_context, 0, ®s);
|
|
+
|
|
+ env->regs[R_EAX] = regs.rax;
|
|
+ env->regs[R_EBX] = regs.rbx;
|
|
+ env->regs[R_ECX] = regs.rcx;
|
|
+ env->regs[R_EDX] = regs.rdx;
|
|
+ env->regs[R_ESI] = regs.rsi;
|
|
+ env->regs[R_EDI] = regs.rdi;
|
|
+ env->regs[R_ESP] = regs.rsp;
|
|
+ env->regs[R_EBP] = regs.rbp;
|
|
+#ifdef TARGET_X86_64
|
|
+ env->regs[8] = regs.r8;
|
|
+ env->regs[9] = regs.r9;
|
|
+ env->regs[10] = regs.r10;
|
|
+ env->regs[11] = regs.r11;
|
|
+ env->regs[12] = regs.r12;
|
|
+ env->regs[13] = regs.r13;
|
|
+ env->regs[14] = regs.r14;
|
|
+ env->regs[15] = regs.r15;
|
|
+#endif
|
|
+
|
|
+ env->eflags = regs.rflags;
|
|
+ env->eip = regs.rip;
|
|
+
|
|
+ kvm_get_sregs(kvm_context, 0, &sregs);
|
|
+
|
|
+ memcpy(env->kvm_interrupt_bitmap, sregs.interrupt_bitmap, sizeof(env->kvm_interrupt_bitmap));
|
|
+
|
|
+ get_seg(&env->segs[R_CS], &sregs.cs);
|
|
+ get_seg(&env->segs[R_DS], &sregs.ds);
|
|
+ get_seg(&env->segs[R_ES], &sregs.es);
|
|
+ get_seg(&env->segs[R_FS], &sregs.fs);
|
|
+ get_seg(&env->segs[R_GS], &sregs.gs);
|
|
+ get_seg(&env->segs[R_SS], &sregs.ss);
|
|
+
|
|
+ get_seg(&env->tr, &sregs.tr);
|
|
+ get_seg(&env->ldt, &sregs.ldt);
|
|
+
|
|
+ env->idt.limit = sregs.idt.limit;
|
|
+ env->idt.base = sregs.idt.base;
|
|
+ env->gdt.limit = sregs.gdt.limit;
|
|
+ env->gdt.base = sregs.gdt.base;
|
|
+
|
|
+ env->cr[0] = sregs.cr0;
|
|
+ env->cr[2] = sregs.cr2;
|
|
+ env->cr[3] = sregs.cr3;
|
|
+ env->cr[4] = sregs.cr4;
|
|
+
|
|
+ cpu_set_apic_base(env, sregs.apic_base);
|
|
+
|
|
+ env->efer = sregs.efer;
|
|
+ cpu_set_apic_tpr(env, sregs.cr8);
|
|
+
|
|
+#define HFLAG_COPY_MASK ~( \
|
|
+ HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
|
|
+ HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
|
|
+ HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
|
|
+ HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
|
|
+
|
|
+
|
|
+
|
|
+ hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
|
|
+ hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
|
|
+ hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
|
|
+ (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
|
|
+ hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
|
|
+ hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
|
|
+ (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
|
|
+
|
|
+ if (env->efer & MSR_EFER_LMA) {
|
|
+ hflags |= HF_LMA_MASK;
|
|
+ }
|
|
+
|
|
+ if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
|
|
+ hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
|
|
+ } else {
|
|
+ hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
|
|
+ (DESC_B_SHIFT - HF_CS32_SHIFT);
|
|
+ hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
|
|
+ (DESC_B_SHIFT - HF_SS32_SHIFT);
|
|
+ if (!(env->cr[0] & CR0_PE_MASK) ||
|
|
+ (env->eflags & VM_MASK) ||
|
|
+ !(hflags & HF_CS32_MASK)) {
|
|
+ hflags |= HF_ADDSEG_MASK;
|
|
+ } else {
|
|
+ hflags |= ((env->segs[R_DS].base |
|
|
+ env->segs[R_ES].base |
|
|
+ env->segs[R_SS].base) != 0) <<
|
|
+ HF_ADDSEG_SHIFT;
|
|
+ }
|
|
+ }
|
|
+ env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
|
|
+ CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
|
|
+ DF = 1 - (2 * ((env->eflags >> 10) & 1));
|
|
+ CC_OP = CC_OP_EFLAGS;
|
|
+ env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
|
|
+
|
|
+ tlb_flush(env, 1);
|
|
+
|
|
+ /* msrs */
|
|
+ n = 0;
|
|
+ msrs[n++].index = MSR_IA32_SYSENTER_CS;
|
|
+ msrs[n++].index = MSR_IA32_SYSENTER_ESP;
|
|
+ msrs[n++].index = MSR_IA32_SYSENTER_EIP;
|
|
+ if (kvm_has_msr_star)
|
|
+ msrs[n++].index = MSR_STAR;
|
|
+ msrs[n++].index = MSR_IA32_TSC;
|
|
+#ifdef TARGET_X86_64
|
|
+ msrs[n++].index = MSR_CSTAR;
|
|
+ msrs[n++].index = MSR_KERNELGSBASE;
|
|
+ msrs[n++].index = MSR_FMASK;
|
|
+ msrs[n++].index = MSR_LSTAR;
|
|
+#endif
|
|
+ rc = kvm_get_msrs(kvm_context, 0, msrs, n);
|
|
+ if (rc == -1) {
|
|
+ perror("kvm_get_msrs FAILED");
|
|
+ }
|
|
+ else {
|
|
+ n = rc; /* actual number of MSRs */
|
|
+ for (i=0 ; i<n; i++) {
|
|
+ if (get_msr_entry(&msrs[i], env))
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+#include <signal.h>
|
|
+
|
|
+
|
|
+static int try_push_interrupts(void *opaque)
|
|
+{
|
|
+ CPUState **envs = opaque, *env;
|
|
+ env = envs[0];
|
|
+
|
|
+ if (env->ready_for_interrupt_injection &&
|
|
+ (env->interrupt_request & CPU_INTERRUPT_HARD) &&
|
|
+ (env->eflags & IF_MASK)) {
|
|
+ env->interrupt_request &= ~CPU_INTERRUPT_HARD;
|
|
+ // for now using cpu 0
|
|
+ kvm_inject_irq(kvm_context, 0, cpu_get_pic_interrupt(env));
|
|
+ }
|
|
+
|
|
+ return (env->interrupt_request & CPU_INTERRUPT_HARD) != 0;
|
|
+}
|
|
+
|
|
+static void post_kvm_run(void *opaque, struct kvm_run *kvm_run)
|
|
+{
|
|
+ CPUState **envs = opaque, *env;
|
|
+ env = envs[0];
|
|
+
|
|
+ env->eflags = (kvm_run->if_flag) ? env->eflags | IF_MASK:env->eflags & ~IF_MASK;
|
|
+ env->ready_for_interrupt_injection = kvm_run->ready_for_interrupt_injection;
|
|
+ cpu_set_apic_tpr(env, kvm_run->cr8);
|
|
+ cpu_set_apic_base(env, kvm_run->apic_base);
|
|
+}
|
|
+
|
|
+static void pre_kvm_run(void *opaque, struct kvm_run *kvm_run)
|
|
+{
|
|
+ CPUState **envs = opaque, *env;
|
|
+ env = envs[0];
|
|
+
|
|
+ kvm_run->cr8 = cpu_get_apic_tpr(env);
|
|
+}
|
|
+
|
|
+void kvm_load_registers(CPUState *env)
|
|
+{
|
|
+ load_regs(env);
|
|
+}
|
|
+
|
|
+void kvm_save_registers(CPUState *env)
|
|
+{
|
|
+ save_regs(env);
|
|
+}
|
|
+
|
|
+int kvm_cpu_exec(CPUState *env)
|
|
+{
|
|
+ int r;
|
|
+ int pending = (!env->ready_for_interrupt_injection ||
|
|
+ ((env->interrupt_request & CPU_INTERRUPT_HARD) &&
|
|
+ (env->eflags & IF_MASK)));
|
|
+
|
|
+ if (!pending && (env->interrupt_request & CPU_INTERRUPT_EXIT)) {
|
|
+ env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
|
|
+ env->exception_index = EXCP_INTERRUPT;
|
|
+ cpu_loop_exit();
|
|
+ }
|
|
+
|
|
+
|
|
+ if (!saved_env[0])
|
|
+ saved_env[0] = env;
|
|
+
|
|
+ r = kvm_run(kvm_context, 0);
|
|
+ if (r < 0) {
|
|
+ printf("kvm_run returned %d\n", r);
|
|
+ exit(1);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int kvm_cpuid(void *opaque, uint64_t *rax, uint64_t *rbx,
|
|
+ uint64_t *rcx, uint64_t *rdx)
|
|
+{
|
|
+ CPUState **envs = opaque;
|
|
+ CPUState *saved_env;
|
|
+ uint32_t eax = *rax;
|
|
+
|
|
+ saved_env = env;
|
|
+ env = envs[0];
|
|
+
|
|
+ env->regs[R_EAX] = *rax;
|
|
+ env->regs[R_EBX] = *rbx;
|
|
+ env->regs[R_ECX] = *rcx;
|
|
+ env->regs[R_EDX] = *rdx;
|
|
+ helper_cpuid();
|
|
+ *rdx = env->regs[R_EDX];
|
|
+ *rcx = env->regs[R_ECX];
|
|
+ *rbx = env->regs[R_EBX];
|
|
+ *rax = env->regs[R_EAX];
|
|
+ // don't report long mode/syscall/nx if no native support
|
|
+ if (eax == 0x80000001) {
|
|
+ unsigned long h_eax = eax, h_edx;
|
|
+
|
|
+
|
|
+ // push/pop hack to workaround gcc 3 register pressure trouble
|
|
+ asm (
|
|
+#ifdef __x86_64__
|
|
+ "push %%rbx; push %%rcx; cpuid; pop %%rcx; pop %%rbx"
|
|
+#else
|
|
+ "push %%ebx; push %%ecx; cpuid; pop %%ecx; pop %%ebx"
|
|
+#endif
|
|
+ : "+a"(h_eax), "=d"(h_edx));
|
|
+
|
|
+ // long mode
|
|
+ if ((h_edx & 0x20000000) == 0)
|
|
+ *rdx &= ~0x20000000ull;
|
|
+ // syscall
|
|
+ if ((h_edx & 0x00000800) == 0)
|
|
+ *rdx &= ~0x00000800ull;
|
|
+ // nx
|
|
+ if ((h_edx & 0x00100000) == 0)
|
|
+ *rdx &= ~0x00100000ull;
|
|
+ }
|
|
+ env = saved_env;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_debug(void *opaque, int vcpu)
|
|
+{
|
|
+ CPUState **envs = opaque;
|
|
+
|
|
+ env = envs[0];
|
|
+ env->exception_index = EXCP_DEBUG;
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
|
|
+{
|
|
+ *data = cpu_inb(0, addr);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
|
|
+{
|
|
+ *data = cpu_inw(0, addr);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
|
|
+{
|
|
+ *data = cpu_inl(0, addr);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
|
|
+{
|
|
+ if (addr == 0xb2 && data == 0) {
|
|
+ struct kvm_regs regs;
|
|
+
|
|
+ kvm_get_regs(kvm_context, 0, ®s);
|
|
+
|
|
+ /* hack around smm entry: kvm doesn't emulate smm at this time */
|
|
+ if (regs.rip == 0x409f4)
|
|
+ regs.rip += 0x4b;
|
|
+ kvm_set_regs(kvm_context, 0, ®s);
|
|
+
|
|
+ return 0;
|
|
+ }
|
|
+ cpu_outb(0, addr, data);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
|
|
+{
|
|
+ cpu_outw(0, addr, data);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
|
|
+{
|
|
+ cpu_outl(0, addr, data);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_readb(void *opaque, uint64_t addr, uint8_t *data)
|
|
+{
|
|
+ *data = ldub_phys(addr);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_readw(void *opaque, uint64_t addr, uint16_t *data)
|
|
+{
|
|
+ *data = lduw_phys(addr);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_readl(void *opaque, uint64_t addr, uint32_t *data)
|
|
+{
|
|
+ *data = ldl_phys(addr);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_readq(void *opaque, uint64_t addr, uint64_t *data)
|
|
+{
|
|
+ *data = ldq_phys(addr);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_writeb(void *opaque, uint64_t addr, uint8_t data)
|
|
+{
|
|
+ stb_phys(addr, data);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_writew(void *opaque, uint64_t addr, uint16_t data)
|
|
+{
|
|
+ stw_phys(addr, data);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_writel(void *opaque, uint64_t addr, uint32_t data)
|
|
+{
|
|
+ stl_phys(addr, data);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_writeq(void *opaque, uint64_t addr, uint64_t data)
|
|
+{
|
|
+ stq_phys(addr, data);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int kvm_io_window(void *opaque)
|
|
+{
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+
|
|
+static int kvm_halt(void *opaque, int vcpu)
|
|
+{
|
|
+ CPUState **envs = opaque, *env;
|
|
+
|
|
+ env = envs[0];
|
|
+ if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
|
|
+ (env->eflags & IF_MASK))) {
|
|
+ env->hflags |= HF_HALTED_MASK;
|
|
+ env->exception_index = EXCP_HLT;
|
|
+ }
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+static int kvm_shutdown(void *opaque, int vcpu)
|
|
+{
|
|
+ qemu_system_reset_request();
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+static struct kvm_callbacks qemu_kvm_ops = {
|
|
+ .cpuid = kvm_cpuid,
|
|
+ .debug = kvm_debug,
|
|
+ .inb = kvm_inb,
|
|
+ .inw = kvm_inw,
|
|
+ .inl = kvm_inl,
|
|
+ .outb = kvm_outb,
|
|
+ .outw = kvm_outw,
|
|
+ .outl = kvm_outl,
|
|
+ .readb = kvm_readb,
|
|
+ .readw = kvm_readw,
|
|
+ .readl = kvm_readl,
|
|
+ .readq = kvm_readq,
|
|
+ .writeb = kvm_writeb,
|
|
+ .writew = kvm_writew,
|
|
+ .writel = kvm_writel,
|
|
+ .writeq = kvm_writeq,
|
|
+ .halt = kvm_halt,
|
|
+ .shutdown = kvm_shutdown,
|
|
+ .io_window = kvm_io_window,
|
|
+ .try_push_interrupts = try_push_interrupts,
|
|
+ .post_kvm_run = post_kvm_run,
|
|
+ .pre_kvm_run = pre_kvm_run,
|
|
+};
|
|
+
|
|
+int kvm_qemu_init()
|
|
+{
|
|
+ /* Try to initialize kvm */
|
|
+ kvm_context = kvm_init(&qemu_kvm_ops, saved_env);
|
|
+ if (!kvm_context) {
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int kvm_qemu_create_context(void)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
|
|
+ kvm_qemu_destroy();
|
|
+ return -1;
|
|
+ }
|
|
+ kvm_msr_list = kvm_get_msr_list(kvm_context);
|
|
+ if (!kvm_msr_list) {
|
|
+ kvm_qemu_destroy();
|
|
+ return -1;
|
|
+ }
|
|
+ for (i = 0; i < kvm_msr_list->nmsrs; ++i)
|
|
+ if (kvm_msr_list->indices[i] == MSR_STAR)
|
|
+ kvm_has_msr_star = 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void kvm_qemu_destroy(void)
|
|
+{
|
|
+ kvm_finalize(kvm_context);
|
|
+}
|
|
+
|
|
+int kvm_update_debugger(CPUState *env)
|
|
+{
|
|
+ struct kvm_debug_guest dbg;
|
|
+ int i;
|
|
+
|
|
+ dbg.enabled = 0;
|
|
+ if (env->nb_breakpoints || env->singlestep_enabled) {
|
|
+ dbg.enabled = 1;
|
|
+ for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
|
|
+ dbg.breakpoints[i].enabled = 1;
|
|
+ dbg.breakpoints[i].address = env->breakpoints[i];
|
|
+ }
|
|
+ dbg.singlestep = env->singlestep_enabled;
|
|
+ }
|
|
+ return kvm_guest_debug(kvm_context, 0, &dbg);
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * dirty pages logging
|
|
+ */
|
|
+/* FIXME: use unsigned long pointer instead of unsigned char */
|
|
+unsigned char *kvm_dirty_bitmap = NULL;
|
|
+int kvm_physical_memory_set_dirty_tracking(int enable)
|
|
+{
|
|
+ int r = 0;
|
|
+
|
|
+ if (!kvm_allowed)
|
|
+ return 0;
|
|
+
|
|
+ if (enable) {
|
|
+ if (!kvm_dirty_bitmap) {
|
|
+ unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
|
|
+ kvm_dirty_bitmap = qemu_malloc(bitmap_size);
|
|
+ if (kvm_dirty_bitmap == NULL) {
|
|
+ perror("Failed to allocate dirty pages bitmap");
|
|
+ r=-1;
|
|
+ }
|
|
+ else {
|
|
+ r = kvm_dirty_pages_log_enable_all(kvm_context);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+ if (kvm_dirty_bitmap) {
|
|
+ r = kvm_dirty_pages_log_reset(kvm_context);
|
|
+ qemu_free(kvm_dirty_bitmap);
|
|
+ kvm_dirty_bitmap = NULL;
|
|
+ }
|
|
+ }
|
|
+ return r;
|
|
+}
|
|
+
|
|
+/* get kvm's dirty pages bitmap and update qemu's */
|
|
+int kvm_get_dirty_pages_log_slot(int slot,
|
|
+ unsigned char *bitmap,
|
|
+ unsigned int offset,
|
|
+ unsigned int len)
|
|
+{
|
|
+ int r;
|
|
+ unsigned int i, j, n=0;
|
|
+ unsigned char c;
|
|
+ unsigned page_number, addr, addr1;
|
|
+
|
|
+ memset(bitmap, 0, len);
|
|
+ r = kvm_get_dirty_pages(kvm_context, slot, bitmap);
|
|
+ if (r)
|
|
+ return r;
|
|
+
|
|
+ /*
|
|
+ * bitmap-traveling is faster than memory-traveling (for addr...)
|
|
+ * especially when most of the memory is not dirty.
|
|
+ */
|
|
+ for (i=0; i<len; i++) {
|
|
+ c = bitmap[i];
|
|
+ while (c>0) {
|
|
+ j = ffsl(c) - 1;
|
|
+ c &= ~(1u<<j);
|
|
+ page_number = i * 8 + j;
|
|
+ addr1 = page_number * TARGET_PAGE_SIZE;
|
|
+ addr = offset + addr1;
|
|
+ cpu_physical_memory_set_dirty(addr);
|
|
+ n++;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * get kvm's dirty pages bitmap and update qemu's
|
|
+ * we only care about physical ram, which resides in slots 0 and 3
|
|
+ */
|
|
+int kvm_update_dirty_pages_log(void)
|
|
+{
|
|
+ int r = 0, len;
|
|
+
|
|
+ len = BITMAP_SIZE(0xa0000);
|
|
+ r = kvm_get_dirty_pages_log_slot(3, kvm_dirty_bitmap, 0 , len);
|
|
+ len = BITMAP_SIZE(phys_ram_size - 0xc0000);
|
|
+ r = r || kvm_get_dirty_pages_log_slot(0, kvm_dirty_bitmap, 0xc0000, len);
|
|
+ return r;
|
|
+}
|
|
+#endif
|
|
--- qemu-0.9.0/qemu-kvm.h
|
|
+++ qemu-0.9.0/qemu-kvm.h
|
|
@@ -0,0 +1,19 @@
|
|
+#ifndef QEMU_KVM_H
|
|
+#define QEMU_KVM_H
|
|
+
|
|
+#include "kvmctl.h"
|
|
+
|
|
+int kvm_qemu_init(void);
|
|
+int kvm_qemu_create_context(void);
|
|
+void kvm_qemu_destroy(void);
|
|
+void kvm_load_registers(CPUState *env);
|
|
+void kvm_save_registers(CPUState *env);
|
|
+int kvm_cpu_exec(CPUState *env);
|
|
+int kvm_update_debugger(CPUState *env);
|
|
+
|
|
+int kvm_physical_memory_set_dirty_tracking(int enable);
|
|
+int kvm_update_dirty_pages_log(void);
|
|
+
|
|
+#define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
|
|
+#define BITMAP_SIZE(m) (ALIGN(((m)>>TARGET_PAGE_BITS), HOST_LONG_BITS) / 8)
|
|
+#endif
|
|
--- qemu-0.9.0/sdl.c
|
|
+++ qemu-0.9.0/sdl.c
|
|
@@ -214,6 +214,11 @@
|
|
{
|
|
char buf[1024];
|
|
strcpy(buf, "QEMU");
|
|
+#if USE_KVM
|
|
+ if (kvm_allowed) {
|
|
+ strcat(buf, "/KVM");
|
|
+ }
|
|
+#endif
|
|
if (!vm_running) {
|
|
strcat(buf, " [Stopped]");
|
|
}
|
|
--- qemu-0.9.0/target-i386/cpu.h
|
|
+++ qemu-0.9.0/target-i386/cpu.h
|
|
@@ -161,12 +161,17 @@
|
|
#define HF_MP_MASK (1 << HF_MP_SHIFT)
|
|
#define HF_EM_MASK (1 << HF_EM_SHIFT)
|
|
#define HF_TS_MASK (1 << HF_TS_SHIFT)
|
|
+#define HF_IOPL_MASK (3 << HF_IOPL_SHIFT)
|
|
#define HF_LMA_MASK (1 << HF_LMA_SHIFT)
|
|
#define HF_CS64_MASK (1 << HF_CS64_SHIFT)
|
|
#define HF_OSFXSR_MASK (1 << HF_OSFXSR_SHIFT)
|
|
+#define HF_VM_MASK (1 << HF_VM_SHIFT)
|
|
#define HF_HALTED_MASK (1 << HF_HALTED_SHIFT)
|
|
#define HF_SMM_MASK (1 << HF_SMM_SHIFT)
|
|
|
|
+#define CR0_PE_SHIFT 0
|
|
+#define CR0_MP_SHIFT 1
|
|
+
|
|
#define CR0_PE_MASK (1 << 0)
|
|
#define CR0_MP_MASK (1 << 1)
|
|
#define CR0_EM_MASK (1 << 2)
|
|
@@ -185,7 +190,8 @@
|
|
#define CR4_PAE_MASK (1 << 5)
|
|
#define CR4_PGE_MASK (1 << 7)
|
|
#define CR4_PCE_MASK (1 << 8)
|
|
-#define CR4_OSFXSR_MASK (1 << 9)
|
|
+#define CR4_OSFXSR_SHIFT 9
|
|
+#define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT)
|
|
#define CR4_OSXMMEXCPT_MASK (1 << 10)
|
|
|
|
#define PG_PRESENT_BIT 0
|
|
@@ -496,6 +502,10 @@
|
|
target_ulong kernelgsbase;
|
|
#endif
|
|
|
|
+#ifdef USE_KVM
|
|
+ uint64_t tsc; /* time stamp counter */
|
|
+ uint8_t ready_for_interrupt_injection;
|
|
+#endif
|
|
uint64_t pat;
|
|
|
|
/* temporary data for USE_CODE_COPY mode */
|
|
@@ -534,6 +544,13 @@
|
|
int kqemu_enabled;
|
|
int last_io_time;
|
|
#endif
|
|
+
|
|
+#ifdef USE_KVM
|
|
+#define BITS_PER_LONG (8 * sizeof (long))
|
|
+#define NR_IRQ_WORDS (256/ BITS_PER_LONG)
|
|
+ unsigned long kvm_interrupt_bitmap[NR_IRQ_WORDS];
|
|
+#endif
|
|
+
|
|
/* in order to simplify APIC support, we leave this pointer to the
|
|
user */
|
|
struct APICState *apic_state;
|
|
--- qemu-0.9.0/target-i386/helper.c
|
|
+++ qemu-0.9.0/target-i386/helper.c
|
|
@@ -18,7 +18,9 @@
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
#include "exec.h"
|
|
-
|
|
+#ifdef USE_KVM
|
|
+extern int kvm_allowed;
|
|
+#endif
|
|
//#define DEBUG_PCALL
|
|
|
|
#if 0
|
|
@@ -839,6 +841,13 @@
|
|
uint32_t e1, e2, e3, ss;
|
|
target_ulong old_eip, esp, offset;
|
|
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed) {
|
|
+ printf("%s: unexpect\n", __FUNCTION__);
|
|
+ exit(-1);
|
|
+ }
|
|
+#endif
|
|
+
|
|
has_error_code = 0;
|
|
if (!is_int && !is_hw) {
|
|
switch(intno) {
|
|
@@ -1122,6 +1131,12 @@
|
|
int dpl, cpl;
|
|
uint32_t e2;
|
|
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed) {
|
|
+ printf("%s: unexpect\n", __FUNCTION__);
|
|
+ exit(-1);
|
|
+ }
|
|
+#endif
|
|
dt = &env->idt;
|
|
ptr = dt->base + (intno * 8);
|
|
e2 = ldl_kernel(ptr + 4);
|
|
@@ -1147,6 +1162,12 @@
|
|
void do_interrupt(int intno, int is_int, int error_code,
|
|
target_ulong next_eip, int is_hw)
|
|
{
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed) {
|
|
+ printf("%s: unexpect\n", __FUNCTION__);
|
|
+ exit(-1);
|
|
+ }
|
|
+#endif
|
|
if (loglevel & CPU_LOG_INT) {
|
|
if ((env->cr[0] & CR0_PE_MASK)) {
|
|
static int count;
|
|
@@ -1958,6 +1979,12 @@
|
|
cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
|
|
get_seg_base(e1, e2), limit, e2);
|
|
EIP = new_eip;
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed && (e2 & DESC_L_MASK)) {
|
|
+ env->exception_index = -1;
|
|
+ cpu_loop_exit();
|
|
+ }
|
|
+#endif
|
|
} else {
|
|
/* jump to call or task gate */
|
|
dpl = (e2 >> DESC_DPL_SHIFT) & 3;
|
|
--- qemu-0.9.0/target-i386/helper2.c
|
|
+++ qemu-0.9.0/target-i386/helper2.c
|
|
@@ -143,6 +143,9 @@
|
|
#ifdef USE_KQEMU
|
|
kqemu_init(env);
|
|
#endif
|
|
+#ifdef USE_KVM
|
|
+ env->ready_for_interrupt_injection = 1;
|
|
+#endif
|
|
return env;
|
|
}
|
|
|
|
--- qemu-0.9.0/vl.c
|
|
+++ qemu-0.9.0/vl.c
|
|
@@ -88,6 +88,10 @@
|
|
|
|
#include "exec-all.h"
|
|
|
|
+#if USE_KVM
|
|
+#include "qemu-kvm.h"
|
|
+#endif
|
|
+
|
|
#define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
|
|
#ifdef __sun__
|
|
#define SMBD_COMMAND "/usr/sfw/sbin/smbd"
|
|
@@ -149,6 +153,9 @@
|
|
int graphic_depth = 15;
|
|
int full_screen = 0;
|
|
int no_quit = 0;
|
|
+#ifdef USE_KVM
|
|
+CharDriverState *vmchannel_hds[MAX_VMCHANNEL_DEVICES];
|
|
+#endif
|
|
CharDriverState *serial_hds[MAX_SERIAL_PORTS];
|
|
CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
|
|
#ifdef TARGET_I386
|
|
@@ -5407,6 +5414,15 @@
|
|
/* XXX: compute hflags from scratch, except for CPL and IIF */
|
|
env->hflags = hflags;
|
|
tlb_flush(env, 1);
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed) {
|
|
+ for (i = 0; i < NR_IRQ_WORDS ; i++) {
|
|
+ qemu_get_betls(f, &env->kvm_interrupt_bitmap[i]);
|
|
+ }
|
|
+ qemu_get_be64s(f, &env->tsc);
|
|
+ kvm_load_registers(env);
|
|
+ }
|
|
+#endif
|
|
return 0;
|
|
}
|
|
|
|
@@ -5555,6 +5571,10 @@
|
|
if (qemu_get_be32(f) != phys_ram_size)
|
|
return -EINVAL;
|
|
for(i = 0; i < phys_ram_size; i+= TARGET_PAGE_SIZE) {
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed && (i>=0xa0000) && (i<0xc0000)) /* do not access video-addresses */
|
|
+ continue;
|
|
+#endif
|
|
ret = ram_get_page(f, phys_ram_base + i, TARGET_PAGE_SIZE);
|
|
if (ret)
|
|
return ret;
|
|
@@ -5689,6 +5709,10 @@
|
|
target_ulong addr;
|
|
|
|
for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed && (addr>=0xa0000) && (addr<0xc0000)) /* do not access video-addresses */
|
|
+ continue;
|
|
+#endif
|
|
if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG)) {
|
|
qemu_put_be32(f, addr);
|
|
qemu_put_buffer(f, phys_ram_base + addr, TARGET_PAGE_SIZE);
|
|
@@ -6237,6 +6261,10 @@
|
|
if (reset_requested) {
|
|
reset_requested = 0;
|
|
qemu_system_reset();
|
|
+#ifdef USE_KVM
|
|
+ if (kvm_allowed)
|
|
+ kvm_load_registers(env);
|
|
+#endif
|
|
ret = EXCP_INTERRUPT;
|
|
}
|
|
if (powerdown_requested) {
|
|
@@ -6354,6 +6382,9 @@
|
|
"\n"
|
|
"Debug/Expert options:\n"
|
|
"-monitor dev redirect the monitor to char device 'dev'\n"
|
|
+#ifdef USE_KVM
|
|
+ "-vmchannel di:DI,dev redirect the hypercall device with device id DI, to char device 'dev'\n"
|
|
+#endif
|
|
"-serial dev redirect the serial port to char device 'dev'\n"
|
|
"-parallel dev redirect the parallel port to char device 'dev'\n"
|
|
"-pidfile file Write PID to 'file'\n"
|
|
@@ -6368,6 +6399,9 @@
|
|
"-kernel-kqemu enable KQEMU full virtualization (default is user mode only)\n"
|
|
"-no-kqemu disable KQEMU kernel module usage\n"
|
|
#endif
|
|
+#ifdef USE_KVM
|
|
+ "-no-kvm disable KVM hardware virtualization\n"
|
|
+#endif
|
|
#ifdef USE_CODE_COPY
|
|
"-no-code-copy disable code copy acceleration\n"
|
|
#endif
|
|
@@ -6448,6 +6482,9 @@
|
|
QEMU_OPTION_g,
|
|
QEMU_OPTION_std_vga,
|
|
QEMU_OPTION_monitor,
|
|
+#ifdef USE_KVM
|
|
+ QEMU_OPTION_vmchannel,
|
|
+#endif
|
|
QEMU_OPTION_serial,
|
|
QEMU_OPTION_parallel,
|
|
QEMU_OPTION_loadvm,
|
|
@@ -6462,6 +6499,7 @@
|
|
QEMU_OPTION_smp,
|
|
QEMU_OPTION_vnc,
|
|
QEMU_OPTION_no_acpi,
|
|
+ QEMU_OPTION_no_kvm,
|
|
QEMU_OPTION_no_reboot,
|
|
QEMU_OPTION_daemonize,
|
|
QEMU_OPTION_option_rom,
|
|
@@ -6524,12 +6562,18 @@
|
|
{ "no-kqemu", 0, QEMU_OPTION_no_kqemu },
|
|
{ "kernel-kqemu", 0, QEMU_OPTION_kernel_kqemu },
|
|
#endif
|
|
+#ifdef USE_KVM
|
|
+ { "no-kvm", 0, QEMU_OPTION_no_kvm },
|
|
+#endif
|
|
#if defined(TARGET_PPC) || defined(TARGET_SPARC)
|
|
{ "g", 1, QEMU_OPTION_g },
|
|
#endif
|
|
{ "localtime", 0, QEMU_OPTION_localtime },
|
|
{ "std-vga", 0, QEMU_OPTION_std_vga },
|
|
{ "monitor", 1, QEMU_OPTION_monitor },
|
|
+#ifdef USE_KVM
|
|
+ { "vmchannel", 1, QEMU_OPTION_vmchannel },
|
|
+#endif
|
|
{ "serial", 1, QEMU_OPTION_serial },
|
|
{ "parallel", 1, QEMU_OPTION_parallel },
|
|
{ "loadvm", HAS_ARG, QEMU_OPTION_loadvm },
|
|
@@ -6787,6 +6831,10 @@
|
|
const char *r, *optarg;
|
|
CharDriverState *monitor_hd;
|
|
char monitor_device[128];
|
|
+#ifdef USE_KVM
|
|
+ char vmchannel_devices[MAX_VMCHANNEL_DEVICES][128];
|
|
+ int vmchannel_device_index;
|
|
+#endif
|
|
char serial_devices[MAX_SERIAL_PORTS][128];
|
|
int serial_device_index;
|
|
char parallel_devices[MAX_PARALLEL_PORTS][128];
|
|
@@ -6858,6 +6906,12 @@
|
|
translation = BIOS_ATA_TRANSLATION_AUTO;
|
|
pstrcpy(monitor_device, sizeof(monitor_device), "vc");
|
|
|
|
+#ifdef USE_KVM
|
|
+ for(i = 0; i < MAX_VMCHANNEL_DEVICES; i++)
|
|
+ vmchannel_devices[i][0] = '\0';
|
|
+ vmchannel_device_index = 0;
|
|
+#endif
|
|
+
|
|
pstrcpy(serial_devices[0], sizeof(serial_devices[0]), "vc");
|
|
for(i = 1; i < MAX_SERIAL_PORTS; i++)
|
|
serial_devices[i][0] = '\0';
|
|
@@ -7145,6 +7199,17 @@
|
|
case QEMU_OPTION_monitor:
|
|
pstrcpy(monitor_device, sizeof(monitor_device), optarg);
|
|
break;
|
|
+#ifdef USE_KVM
|
|
+ case QEMU_OPTION_vmchannel:
|
|
+ if (vmchannel_device_index >= MAX_VMCHANNEL_DEVICES) {
|
|
+ fprintf(stderr, "qemu: too many vmchannel devices\n");
|
|
+ exit(1);
|
|
+ }
|
|
+ pstrcpy(vmchannel_devices[vmchannel_device_index],
|
|
+ sizeof(vmchannel_devices[0]), optarg);
|
|
+ vmchannel_device_index++;
|
|
+ break;
|
|
+#endif
|
|
case QEMU_OPTION_serial:
|
|
if (serial_device_index >= MAX_SERIAL_PORTS) {
|
|
fprintf(stderr, "qemu: too many serial ports\n");
|
|
@@ -7193,6 +7258,11 @@
|
|
kqemu_allowed = 2;
|
|
break;
|
|
#endif
|
|
+#ifdef USE_KVM
|
|
+ case QEMU_OPTION_no_kvm:
|
|
+ kvm_allowed = 0;
|
|
+ break;
|
|
+#endif
|
|
case QEMU_OPTION_usb:
|
|
usb_enabled = 1;
|
|
break;
|
|
@@ -7283,6 +7353,15 @@
|
|
}
|
|
#endif
|
|
|
|
+#if USE_KVM
|
|
+ if (kvm_allowed) {
|
|
+ if (kvm_qemu_init() < 0) {
|
|
+ fprintf(stderr, "Could not initialize KVM, will disable KVM support\n");
|
|
+ kvm_allowed = 0;
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
#ifdef USE_KQEMU
|
|
if (smp_cpus > 1)
|
|
kqemu_allowed = 0;
|
|
@@ -7362,11 +7441,28 @@
|
|
phys_ram_size += ret;
|
|
}
|
|
|
|
+#if USE_KVM
|
|
+ /* Initialize kvm */
|
|
+ if (kvm_allowed) {
|
|
+ phys_ram_size += KVM_EXTRA_PAGES * 4096;
|
|
+ if (kvm_qemu_create_context() < 0) {
|
|
+ fprintf(stderr, "Could not create KVM context\n");
|
|
+ exit(1);
|
|
+ }
|
|
+ } else {
|
|
+ phys_ram_base = qemu_vmalloc(phys_ram_size);
|
|
+ if (!phys_ram_base) {
|
|
+ fprintf(stderr, "Could not allocate physical memory\n");
|
|
+ exit(1);
|
|
+ }
|
|
+ }
|
|
+#else
|
|
phys_ram_base = qemu_vmalloc(phys_ram_size);
|
|
if (!phys_ram_base) {
|
|
fprintf(stderr, "Could not allocate physical memory\n");
|
|
exit(1);
|
|
}
|
|
+#endif
|
|
|
|
/* we always create the cdrom drive, even if no disk is there */
|
|
bdrv_init();
|
|
@@ -7445,6 +7541,33 @@
|
|
}
|
|
monitor_init(monitor_hd, !nographic);
|
|
|
|
+#ifdef USE_KVM
|
|
+ for(i = 0; i < MAX_VMCHANNEL_DEVICES; i++) {
|
|
+ const char *devname = vmchannel_devices[i];
|
|
+ if (devname[0] != '\0' && strcmp(devname, "none")) {
|
|
+ int devid;
|
|
+ char *termn;
|
|
+
|
|
+ if (strstart(devname, "di:", &devname)) {
|
|
+ devid = strtol(devname, &termn, 16);
|
|
+ devname = termn + 1;
|
|
+ }
|
|
+ else {
|
|
+ fprintf(stderr, "qemu: could not find vmchannel device id '%s'\n",
|
|
+ devname);
|
|
+ exit(1);
|
|
+ }
|
|
+ vmchannel_hds[i] = qemu_chr_open(devname);
|
|
+ if (!vmchannel_hds[i]) {
|
|
+ fprintf(stderr, "qemu: could not open vmchannel device '%s'\n",
|
|
+ devname);
|
|
+ exit(1);
|
|
+ }
|
|
+ vmchannel_init(vmchannel_hds[i], devid, i);
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
for(i = 0; i < MAX_SERIAL_PORTS; i++) {
|
|
const char *devname = serial_devices[i];
|
|
if (devname[0] != '\0' && strcmp(devname, "none")) {
|
|
--- qemu-0.9.0/vl.h
|
|
+++ qemu-0.9.0/vl.h
|
|
@@ -157,6 +157,7 @@
|
|
extern int graphic_depth;
|
|
extern const char *keyboard_layout;
|
|
extern int kqemu_allowed;
|
|
+extern int kvm_allowed;
|
|
extern int win2k_install_hack;
|
|
extern int usb_enabled;
|
|
extern int smp_cpus;
|
|
@@ -177,6 +178,10 @@
|
|
#define BIOS_SIZE ((256 + 64) * 1024)
|
|
#endif
|
|
|
|
+#if USE_KVM
|
|
+#define KVM_EXTRA_PAGES 3
|
|
+#endif
|
|
+
|
|
/* keyboard/mouse support */
|
|
|
|
#define MOUSE_EVENT_LBUTTON 0x01
|
|
@@ -342,6 +347,10 @@
|
|
CharDriverState *text_console_init(DisplayState *ds);
|
|
void console_select(unsigned int index);
|
|
|
|
+/* vmchannel devices */
|
|
+
|
|
+#define MAX_VMCHANNEL_DEVICES 4
|
|
+
|
|
/* serial ports */
|
|
|
|
#define MAX_SERIAL_PORTS 4
|
|
@@ -1220,6 +1229,11 @@
|
|
|
|
typedef struct ADBDevice ADBDevice;
|
|
|
|
+/* hypercall.c */
|
|
+
|
|
+void pci_hypercall_init(PCIBus *bus);
|
|
+void vmchannel_init(CharDriverState *hd, uint32_t deviceid, uint32_t index);
|
|
+
|
|
/* buf = NULL means polling */
|
|
typedef int ADBDeviceRequest(ADBDevice *d, uint8_t *buf_out,
|
|
const uint8_t *buf, int len);
|