Fixes needed to build vboxdrv with kernel 5.8. The changes handle the following kernel API changes: 1. In struct mm_struct, member mmap_sem was renamed to mmap_lock. 2. The information in cpu_tlbstate is no longer exported. 3. The routines __get_vm_area() and map_vm_area() no longer exist and their replacements are not exported. Two fixes have been attempted: a. The missing routines were not available until kernel 2.6.23, thus the code was changed to revert back to the "old" method. Unfortunately, this did not work, and likely it will require Oracle to make the changes. b. The replacements for __get_vm_area() and map_vm_area() are implemented. The resulting code builds but gets missing globals on loading. For testing, the kernel is modified. This change cannot be permanent, but it can be temporary. Index: VirtualBox-6.1.12/src/VBox/Runtime/r0drv/linux/memobj-r0drv-linux.c =================================================================== --- VirtualBox-6.1.12.orig/src/VBox/Runtime/r0drv/linux/memobj-r0drv-linux.c +++ VirtualBox-6.1.12/src/VBox/Runtime/r0drv/linux/memobj-r0drv-linux.c @@ -1,4 +1,4 @@ -/* $Id: memobj-r0drv-linux.c $ */ +/* $Id: memobj-r0drv-linux.c 85516 2020-07-29 10:47:38Z vboxsync $ */ /** @file * IPRT - Ring-0 Memory Objects, Linux. */ @@ -52,6 +52,14 @@ # define PAGE_READONLY_EXEC PAGE_READONLY #endif +/** @def IPRT_USE_ALLOC_VM_AREA_FOR_EXEC + * Whether we use alloc_vm_area (3.2+) for executable memory. + * This is a must for 5.8+, but we enable it all the way back to 3.2.x for + * better W^R compliance (fExecutable flag). */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 0) || defined(DOXYGEN_RUNNING) +# define IPRT_USE_ALLOC_VM_AREA_FOR_EXEC +#endif + /* * 2.6.29+ kernels don't work with remap_pfn_range() anymore because * track_pfn_vma_new() is apparently not defined for non-RAM pages. @@ -72,12 +80,27 @@ # define gfp_t unsigned #endif +/* + * Wrappers around mmap_lock/mmap_sem difference. + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) +# define LNX_MM_DOWN_READ(a_pMm) down_read(&(a_pMm)->mmap_lock) +# define LNX_MM_UP_READ(a_pMm) up_read(&(a_pMm)->mmap_lock) +# define LNX_MM_DOWN_WRITE(a_pMm) down_write(&(a_pMm)->mmap_lock) +# define LNX_MM_UP_WRITE(a_pMm) up_write(&(a_pMm)->mmap_lock) +#else +# define LNX_MM_DOWN_READ(a_pMm) down_read(&(a_pMm)->mmap_sem) +# define LNX_MM_UP_READ(a_pMm) up_read(&(a_pMm)->mmap_sem) +# define LNX_MM_DOWN_WRITE(a_pMm) down_write(&(a_pMm)->mmap_sem) +# define LNX_MM_UP_WRITE(a_pMm) up_write(&(a_pMm)->mmap_sem) +#endif + /********************************************************************************************************************************* * Structures and Typedefs * *********************************************************************************************************************************/ /** - * The Darwin version of the memory object structure. + * The Linux version of the memory object structure. */ typedef struct RTR0MEMOBJLNX { @@ -90,11 +113,20 @@ typedef struct RTR0MEMOBJLNX bool fExecutable; /** Set if we've vmap'ed the memory into ring-0. */ bool fMappedToRing0; +#ifdef IPRT_USE_ALLOC_VM_AREA_FOR_EXEC + /** Return from alloc_vm_area() that we now need to use for executable + * memory. */ + struct vm_struct *pArea; + /** PTE array that goes along with pArea (must be freed). */ + pte_t **papPtesForArea; +#endif /** The pages in the apPages array. */ size_t cPages; /** Array of struct page pointers. (variable size) */ struct page *apPages[1]; -} RTR0MEMOBJLNX, *PRTR0MEMOBJLNX; +} RTR0MEMOBJLNX; +/** Pointer to the linux memory object. */ +typedef RTR0MEMOBJLNX *PRTR0MEMOBJLNX; static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx); @@ -182,7 +214,7 @@ static pgprot_t rtR0MemObjLinuxConvertPr * Worker for rtR0MemObjNativeReserveUser and rtR0MemObjNativerMapUser that creates * an empty user space mapping. * - * We acquire the mmap_sem of the task! + * We acquire the mmap_sem/mmap_lock of the task! * * @returns Pointer to the mapping. * (void *)-1 on failure. @@ -222,9 +254,9 @@ static void *rtR0MemObjLinuxDoMmap(RTR3P #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) ulAddr = vm_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0); #else - down_write(&pTask->mm->mmap_sem); + LNX_MM_DOWN_WRITE(pTask->mm); ulAddr = do_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0); - up_write(&pTask->mm->mmap_sem); + LNX_MM_UP_WRITE(pTask->mm); #endif } else @@ -232,9 +264,9 @@ static void *rtR0MemObjLinuxDoMmap(RTR3P #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) ulAddr = vm_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0); #else - down_write(&pTask->mm->mmap_sem); + LNX_MM_DOWN_WRITE(pTask->mm); ulAddr = do_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0); - up_write(&pTask->mm->mmap_sem); + LNX_MM_UP_WRITE(pTask->mm); #endif if ( !(ulAddr & ~PAGE_MASK) && (ulAddr & (uAlignment - 1))) @@ -257,7 +289,7 @@ static void *rtR0MemObjLinuxDoMmap(RTR3P * Worker that destroys a user space mapping. * Undoes what rtR0MemObjLinuxDoMmap did. * - * We acquire the mmap_sem of the task! + * We acquire the mmap_sem/mmap_lock of the task! * * @param pv The ring-3 mapping. * @param cb The size of the mapping. @@ -269,13 +301,13 @@ static void rtR0MemObjLinuxDoMunmap(void Assert(pTask == current); RT_NOREF_PV(pTask); vm_munmap((unsigned long)pv, cb); #elif defined(USE_RHEL4_MUNMAP) - down_write(&pTask->mm->mmap_sem); + LNX_MM_DOWN_WRITE(pTask->mm); do_munmap(pTask->mm, (unsigned long)pv, cb, 0); /* should it be 1 or 0? */ - up_write(&pTask->mm->mmap_sem); + LNX_MM_UP_WRITE(pTask->mm); #else - down_write(&pTask->mm->mmap_sem); + LNX_MM_DOWN_WRITE(pTask->mm); do_munmap(pTask->mm, (unsigned long)pv, cb); - up_write(&pTask->mm->mmap_sem); + LNX_MM_UP_WRITE(pTask->mm); #endif } @@ -520,15 +552,49 @@ static int rtR0MemObjLinuxVMap(PRTR0MEMO pgprot_val(fPg) |= _PAGE_NX; # endif +# ifdef IPRT_USE_ALLOC_VM_AREA_FOR_EXEC + if (fExecutable) + { + pte_t **papPtes = (pte_t **)kmalloc_array(pMemLnx->cPages, sizeof(papPtes[0]), GFP_KERNEL); + if (papPtes) + { + pMemLnx->pArea = alloc_vm_area(pMemLnx->Core.cb, papPtes); /* Note! pArea->nr_pages is not set. */ + if (pMemLnx->pArea) + { + size_t i; + Assert(pMemLnx->pArea->size >= pMemLnx->Core.cb); /* Note! includes guard page. */ + Assert(pMemLnx->pArea->addr); +# ifdef _PAGE_NX + pgprot_val(fPg) |= _PAGE_NX; /* Uses RTR0MemObjProtect to clear NX when memory ready, W^X fashion. */ +# endif + pMemLnx->papPtesForArea = papPtes; + for (i = 0; i < pMemLnx->cPages; i++) + *papPtes[i] = mk_pte(pMemLnx->apPages[i], fPg); + pMemLnx->Core.pv = pMemLnx->pArea->addr; + pMemLnx->fMappedToRing0 = true; + } + else + { + kfree(papPtes); + rc = VERR_MAP_FAILED; + } + } + else + rc = VERR_MAP_FAILED; + } + else +# endif + { # ifdef VM_MAP - pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_MAP, fPg); + pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_MAP, fPg); # else - pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_ALLOC, fPg); + pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_ALLOC, fPg); # endif - if (pMemLnx->Core.pv) - pMemLnx->fMappedToRing0 = true; - else - rc = VERR_MAP_FAILED; + if (pMemLnx->Core.pv) + pMemLnx->fMappedToRing0 = true; + else + rc = VERR_MAP_FAILED; + } #else /* < 2.4.22 */ rc = VERR_NOT_SUPPORTED; #endif @@ -554,6 +620,22 @@ static int rtR0MemObjLinuxVMap(PRTR0MEMO static void rtR0MemObjLinuxVUnmap(PRTR0MEMOBJLNX pMemLnx) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22) +# ifdef IPRT_USE_ALLOC_VM_AREA_FOR_EXEC + if (pMemLnx->pArea) + { +# if 0 + pte_t **papPtes = pMemLnx->papPtesForArea; + size_t i; + for (i = 0; i < pMemLnx->cPages; i++) + *papPtes[i] = 0; +# endif + free_vm_area(pMemLnx->pArea); + kfree(pMemLnx->papPtesForArea); + pMemLnx->pArea = NULL; + pMemLnx->papPtesForArea = NULL; + } + else +# endif if (pMemLnx->fMappedToRing0) { Assert(pMemLnx->Core.pv); @@ -593,7 +675,7 @@ DECLHIDDEN(int) rtR0MemObjNativeFree(RTR size_t iPage; Assert(pTask); if (pTask && pTask->mm) - down_read(&pTask->mm->mmap_sem); + LNX_MM_DOWN_READ(pTask->mm); iPage = pMemLnx->cPages; while (iPage-- > 0) @@ -608,7 +690,7 @@ DECLHIDDEN(int) rtR0MemObjNativeFree(RTR } if (pTask && pTask->mm) - up_read(&pTask->mm->mmap_sem); + LNX_MM_UP_READ(pTask->mm); } /* else: kernel memory - nothing to do here. */ break; @@ -1076,7 +1158,7 @@ DECLHIDDEN(int) rtR0MemObjNativeLockUser papVMAs = (struct vm_area_struct **)RTMemAlloc(sizeof(*papVMAs) * cPages); if (papVMAs) { - down_read(&pTask->mm->mmap_sem); + LNX_MM_DOWN_READ(pTask->mm); /* * Get user pages. @@ -1162,7 +1244,7 @@ DECLHIDDEN(int) rtR0MemObjNativeLockUser papVMAs[rc]->vm_flags |= VM_DONTCOPY | VM_LOCKED; } - up_read(&pTask->mm->mmap_sem); + LNX_MM_UP_READ(pTask->mm); RTMemFree(papVMAs); @@ -1189,7 +1271,7 @@ DECLHIDDEN(int) rtR0MemObjNativeLockUser #endif } - up_read(&pTask->mm->mmap_sem); + LNX_MM_UP_READ(pTask->mm); RTMemFree(papVMAs); rc = VERR_LOCK_FAILED; @@ -1422,6 +1504,7 @@ DECLHIDDEN(int) rtR0MemObjNativeMapKerne * Use vmap - 2.4.22 and later. */ pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, true /* kernel */); + /** @todo We don't really care too much for EXEC here... 5.8 always adds NX. */ Assert(((offSub + cbSub) >> PAGE_SHIFT) <= pMemLnxToMap->cPages); # ifdef VM_MAP pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[offSub >> PAGE_SHIFT], cbSub >> PAGE_SHIFT, VM_MAP, fPg); @@ -1469,7 +1552,7 @@ DECLHIDDEN(int) rtR0MemObjNativeMapKerne pMemLnx->Core.pv = pMemLnxToMap->Core.u.Phys.uCachePolicy == RTMEM_CACHE_POLICY_MMIO ? ioremap(pMemLnxToMap->Core.u.Phys.PhysBase + offSub, cbSub) : ioremap_cache(pMemLnxToMap->Core.u.Phys.PhysBase + offSub, cbSub); -#else +#else /* KERNEL_VERSION < 2.6.25 */ pMemLnx->Core.pv = pMemLnxToMap->Core.u.Phys.uCachePolicy == RTMEM_CACHE_POLICY_MMIO ? ioremap_nocache(pMemLnxToMap->Core.u.Phys.PhysBase + offSub, cbSub) : ioremap(pMemLnxToMap->Core.u.Phys.PhysBase + offSub, cbSub); @@ -1604,7 +1687,7 @@ DECLHIDDEN(int) rtR0MemObjNativeMapUser( const size_t cPages = (offSub + cbSub) >> PAGE_SHIFT; size_t iPage; - down_write(&pTask->mm->mmap_sem); + LNX_MM_DOWN_WRITE(pTask->mm); rc = VINF_SUCCESS; if (pMemLnxToMap->cPages) @@ -1721,7 +1804,7 @@ DECLHIDDEN(int) rtR0MemObjNativeMapUser( } #endif /* CONFIG_NUMA_BALANCING */ - up_write(&pTask->mm->mmap_sem); + LNX_MM_UP_WRITE(pTask->mm); if (RT_SUCCESS(rc)) { @@ -1753,6 +1836,29 @@ DECLHIDDEN(int) rtR0MemObjNativeMapUser( DECLHIDDEN(int) rtR0MemObjNativeProtect(PRTR0MEMOBJINTERNAL pMem, size_t offSub, size_t cbSub, uint32_t fProt) { +# ifdef IPRT_USE_ALLOC_VM_AREA_FOR_EXEC + /* + * Currently only supported when we've got addresses PTEs from the kernel. + */ + PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem; + if (pMemLnx->pArea && pMemLnx->papPtesForArea) + { + pgprot_t const fPg = rtR0MemObjLinuxConvertProt(fProt, true /*fKernel*/); + size_t const cPages = (offSub + cbSub) >> PAGE_SHIFT; + pte_t **papPtes = pMemLnx->papPtesForArea; + size_t i; + + for (i = offSub >> PAGE_SHIFT; i < cPages; i++) + { + set_pte(papPtes[i], mk_pte(pMemLnx->apPages[i], fPg)); + } + preempt_disable(); + __flush_tlb_all(); + preempt_enable(); + return VINF_SUCCESS; + } +# endif + NOREF(pMem); NOREF(offSub); NOREF(cbSub); Index: VirtualBox-6.1.12/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c =================================================================== --- VirtualBox-6.1.12.orig/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c +++ VirtualBox-6.1.12/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c @@ -763,12 +763,19 @@ EXPORT_SYMBOL(SUPDrvLinuxIDC); RTCCUINTREG VBOXCALL supdrvOSChangeCR4(RTCCUINTREG fOrMask, RTCCUINTREG fAndMask) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 20, 0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0) RTCCUINTREG uOld = this_cpu_read(cpu_tlbstate.cr4); +#else + RTCCUINTREG uOld = __read_cr4(); +#endif RTCCUINTREG uNew = (uOld & fAndMask) | fOrMask; if (uNew != uOld) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0) this_cpu_write(cpu_tlbstate.cr4, uNew); __write_cr4(uNew); +#endif + ASMSetCR4(uNew); } #else RTCCUINTREG uOld = ASMGetCR4(); Index: VirtualBox-6.1.12/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c =================================================================== --- VirtualBox-6.1.12.orig/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c +++ VirtualBox-6.1.12/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c @@ -38,7 +38,7 @@ #include #if (defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)) && !defined(RTMEMALLOC_EXEC_HEAP) -# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) +# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) && LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0) /** * Starting with 2.6.23 we can use __get_vm_area and map_vm_area to allocate * memory in the moduel range. This is preferrable to the exec heap below. Index: VirtualBox-6.1.12/src/VBox/Additions/linux/sharedfolders/vfsmod.c =================================================================== --- VirtualBox-6.1.12.orig/src/VBox/Additions/linux/sharedfolders/vfsmod.c +++ VirtualBox-6.1.12/src/VBox/Additions/linux/sharedfolders/vfsmod.c @@ -52,7 +52,7 @@ #endif #include #include -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 62) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 62) && LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0) # include #endif #include Index: VirtualBox-6.1.12/src/VBox/Runtime/r0drv/linux/the-linux-kernel.h =================================================================== --- VirtualBox-6.1.12.orig/src/VBox/Runtime/r0drv/linux/the-linux-kernel.h +++ VirtualBox-6.1.12/src/VBox/Runtime/r0drv/linux/the-linux-kernel.h @@ -1,4 +1,4 @@ -/* $Id: the-linux-kernel.h $ */ +/* $Id: the-linux-kernel.h 85518 2020-07-29 11:01:45Z vboxsync $ */ /** @file * IPRT - Include all necessary headers for the Linux kernel. */ @@ -176,6 +176,11 @@ # include #endif +/* for __flush_tlb_all() */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)) +# include +#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) # include #else Index: VirtualBox-6.1.12/src/VBox/Runtime/r0drv/linux/time-r0drv-linux.c =================================================================== --- VirtualBox-6.1.12.orig/src/VBox/Runtime/r0drv/linux/time-r0drv-linux.c +++ VirtualBox-6.1.12/src/VBox/Runtime/r0drv/linux/time-r0drv-linux.c @@ -1,4 +1,4 @@ -/* $Id: time-r0drv-linux.c $ */ +/* $Id: time-r0drv-linux.c 85208 2020-07-10 23:58:39Z vboxsync $ */ /** @file * IPRT - Time, Ring-0 Driver, Linux. */ @@ -31,6 +31,12 @@ #define LOG_GROUP RTLOGGROUP_TIME #include "the-linux-kernel.h" #include "internal/iprt.h" +/* Make sure we have the setting functions we need for RTTimeNow: */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 16) +# define RTTIME_INCL_TIMEVAL +#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +# define RTTIME_INCL_TIMESPEC +#endif #include #include @@ -45,6 +51,7 @@ DECLINLINE(uint64_t) rtTimeGetSystemNano */ uint64_t u64; struct timespec64 Ts = { 0, 0 }; + ktime_get_ts64(&Ts); u64 = Ts.tv_sec * RT_NS_1SEC_64 + Ts.tv_nsec; return u64; @@ -181,22 +188,19 @@ RT_EXPORT_SYMBOL(RTTimeSystemMilliTS); RTDECL(PRTTIMESPEC) RTTimeNow(PRTTIMESPEC pTime) { IPRT_LINUX_SAVE_EFL_AC(); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 16) -/* On Linux 4.20, time.h includes time64.h and we have to use 64-bit times. */ -# ifdef _LINUX_TIME64_H +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) struct timespec64 Ts; - ktime_get_real_ts64(&Ts); -# else - struct timespec Ts; - ktime_get_real_ts(&Ts); -# endif + ktime_get_real_ts64(&Ts); /* ktime_get_real_ts64 was added as a macro in 3.17, function since 4.18. */ IPRT_LINUX_RESTORE_EFL_AC(); -# ifdef _LINUX_TIME64_H return RTTimeSpecSetTimespec64(pTime, &Ts); -# else + +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 16) + struct timespec Ts; + ktime_get_real_ts(&Ts); /* ktime_get_real_ts was removed in Linux 4.20. */ + IPRT_LINUX_RESTORE_EFL_AC(); return RTTimeSpecSetTimespec(pTime, &Ts); -# endif -#else /* < 2.6.16 */ + +#else /* < 2.6.16 */ struct timeval Tv; do_gettimeofday(&Tv); IPRT_LINUX_RESTORE_EFL_AC();