diff --git a/0001-ARM-Don-t-extend-bit-LG_VADDR-to-compute-high-addres.patch b/0001-ARM-Don-t-extend-bit-LG_VADDR-to-compute-high-addres.patch new file mode 100644 index 0000000..02178b7 --- /dev/null +++ b/0001-ARM-Don-t-extend-bit-LG_VADDR-to-compute-high-addres.patch @@ -0,0 +1,42 @@ +From 7a8bc7172b17e219b3603e99c8da44efb283e652 Mon Sep 17 00:00:00 2001 +From: David Goldblatt +Date: Fri, 29 Sep 2017 13:54:08 -0700 +Subject: [PATCH] ARM: Don't extend bit LG_VADDR to compute high address bits. + +In userspace ARM on Linux, zero-ing the high bits is the correct way to do this. +This doesn't fix the fact that we currently set LG_VADDR to 48 on ARM, when in +fact larger virtual address sizes are coming soon. We'll cross that bridge when +we come to it. +--- + include/jemalloc/internal/rtree.h | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h +index b5d4db39..4563db23 100644 +--- a/include/jemalloc/internal/rtree.h ++++ b/include/jemalloc/internal/rtree.h +@@ -178,9 +178,21 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + + JEMALLOC_ALWAYS_INLINE extent_t * + rtree_leaf_elm_bits_extent_get(uintptr_t bits) { ++# ifdef __aarch64__ ++ /* ++ * aarch64 doesn't sign extend the highest virtual address bit to set ++ * the higher ones. Instead, the high bits gets zeroed. ++ */ ++ uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1; ++ /* Mask off the slab bit. */ ++ uintptr_t low_bit_mask = ~(uintptr_t)1; ++ uintptr_t mask = high_bit_mask & low_bit_mask; ++ return (extent_t *)(bits & mask); ++# else + /* Restore sign-extended high bits, mask slab bit. */ + return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >> + RTREE_NHIB) & ~((uintptr_t)0x1)); ++# endif + } + + JEMALLOC_ALWAYS_INLINE szind_t +-- +2.14.2 + diff --git a/0001-remove-CPU_SPINWAIT.patch b/0001-remove-CPU_SPINWAIT.patch new file mode 100644 index 0000000..bed29f7 --- /dev/null +++ b/0001-remove-CPU_SPINWAIT.patch @@ -0,0 +1,16 @@ +--- a/configure 2017-09-12 12:42:21.626542578 +0200 ++++ b/configure 2017-09-12 12:42:24.046589256 +0200 +@@ -6864,12 +6852,11 @@ + fi + fi + ;; +- powerpc*) ++ powerpc) + cat >>confdefs.h <<_ACEOF + #define HAVE_ALTIVEC + _ACEOF + +- CPU_SPINWAIT='__asm__ volatile("or 31,31,31")' + ;; + *) + ;; diff --git a/jemalloc-4.5.0.tar.bz2 b/jemalloc-4.5.0.tar.bz2 deleted file mode 100644 index 0e1b69f..0000000 --- a/jemalloc-4.5.0.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9409d85664b4f135b77518b0b118c549009dc10f6cba14557d170476611f6780 -size 449992 diff --git a/jemalloc-5.0.1.tar.bz2 b/jemalloc-5.0.1.tar.bz2 new file mode 100644 index 0000000..791bed0 --- /dev/null +++ b/jemalloc-5.0.1.tar.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4814781d395b0ef093b21a08e8e6e0bd3dab8762f9935bbfb71679b0dea7c3e9 +size 499300 diff --git a/jemalloc.changes b/jemalloc.changes index c3500da..b4cd8bf 100644 --- a/jemalloc.changes +++ b/jemalloc.changes @@ -1,3 +1,237 @@ +------------------------------------------------------------------- +Wed Oct 4 09:19:52 UTC 2017 - mliska@suse.cz + +- Add 0001-ARM-Don-t-extend-bit-LG_VADDR-to-compute-high-addres.patch: + fixes #979. + +------------------------------------------------------------------- +Tue Sep 12 10:28:43 UTC 2017 - mliska@suse.cz + +- Revert 701daa5298b3befe2aff05ce590533165abb9ba4 in order to fix + #761. + +------------------------------------------------------------------- +Tue Sep 12 08:12:04 UTC 2017 - mliska@suse.cz + +- Update to version 5.0.1 + Bug fixes: + * Update decay->nunpurged before purging, in order to avoid potential + update races and subsequent incorrect purging volume. + ([37]@interwq) + * Only abort on dlsym(3) error if the failure impacts an enabled + feature (lazy locking and/or background threads). This mitigates an + initialization failure bug for which we still do not have a clear + reproduction test case. ([38]@interwq) + * Modify tsd management so that it neither crashes nor leaks if a + thread's only allocation activity is to call free() after TLS + destructors have been executed. This behavior was observed when + operating with GNU libc, and is unlikely to be an issue with other + libc implementations. ([39]@interwq) + * Mask signals during background thread creation. This prevents + signals from being inadvertently delivered to background threads. + ([40]@jasone, [41]@davidtgoldblatt, [42]@interwq) + * Avoid inactivity checks within background threads, in order to + prevent recursive mutex acquisition. ([43]@interwq) + * Fix extent_grow_retained() to use the specified hooks when the + arena..extent_hooks mallctl is used to override the default + hooks. ([44]@interwq) + * Add missing reentrancy support for custom extent hooks which + allocate. ([45]@interwq) + * Post-fork(2), re-initialize the list of tcaches associated with + each arena to contain no tcaches except the forking thread's. + ([46]@interwq) + * Add missing post-fork(2) mutex reinitialization for + extent_grow_mtx. This fixes potential deadlocks after fork(2). + ([47]@interwq) + * Enforce minimum autoconf version (currently 2.68), since 2.63 is + known to generate corrupt configure scripts. ([48]@jasone) + * Ensure that the configured page size (--with-lg-page) is no larger + than the configured huge page size (--with-lg-hugepage). + ([49]@jasone) + + New features: + * Implement optional per-CPU arena support; threads choose which + arena to use based on current CPU rather than on fixed + thread-->arena associations. ([59]@interwq) + * Implement two-phase decay of unused dirty pages. Pages transition + from dirty-->muzzy-->clean, where the first phase transition relies + on madvise(... MADV_FREE) semantics, and the second phase + transition discards pages such that they are replaced with + demand-zeroed pages on next access. ([60]@jasone) + * Increase decay time resolution from seconds to milliseconds. + ([61]@jasone) + * Implement opt-in per CPU background threads, and use them for + asynchronous decay-driven unused dirty page purging. ([62]@interwq) + * Add mutex profiling, which collects a variety of statistics useful + for diagnosing overhead/contention issues. ([63]@interwq) + * Add C++ new/delete operator bindings. ([64]@djwatson) + * Support manually created arena destruction, such that all data and + metadata are discarded. Add MALLCTL_ARENAS_DESTROYED for accessing + merged stats associated with destroyed arenas. ([65]@jasone) + * Add MALLCTL_ARENAS_ALL as a fixed index for use in accessing + merged/destroyed arena statistics via mallctl. ([66]@jasone) + * Add opt.abort_conf to optionally abort if invalid configuration + options are detected during initialization. ([67]@interwq) + * Add opt.stats_print_opts, so that e.g. JSON output can be selected + for the stats dumped during exit if opt.stats_print is true. + ([68]@jasone) + * Add --with-version=VERSION for use when embedding jemalloc into + another project's git repository. ([69]@jasone) + * Add --disable-thp to support cross compiling. ([70]@jasone) + * Add --with-lg-hugepage to support cross compiling. ([71]@jasone) + * Add mallctl interfaces (various authors): + + background_thread + + opt.abort_conf + + opt.retain + + opt.percpu_arena + + opt.background_thread + + opt.{dirty,muzzy}_decay_ms + + opt.stats_print_opts + + arena..initialized + + arena..destroy + + arena..{dirty,muzzy}_decay_ms + + arena..extent_hooks + + arenas.{dirty,muzzy}_decay_ms + + arenas.bin..slab_size + + arenas.nlextents + + arenas.lextent..size + + arenas.create + + stats.background_thread.{num_threads,num_runs,run_interval} + + stats.mutexes.{ctl,background_thread,prof,reset}.{num_ops,num_ + spin_acq,num_wait,max_wait_time,total_wait_time,max_num_thds,n + um_owner_switch} + + stats.arenas..{dirty,muzzy}_decay_ms + + stats.arenas..uptime + + stats.arenas..{pmuzzy,base,internal,resident} + + stats.arenas..{dirty,muzzy}_{npurge,nmadvise,purged} + + stats.arenas..bins..{nslabs,reslabs,curslabs} + + stats.arenas..bins..mutex.{num_ops,num_spin_acq,num_wait + ,max_wait_time,total_wait_time,max_num_thds,num_owner_switch} + + stats.arenas..lextents..{nmalloc,ndalloc,nrequests,curle + xtents} + + stats.arenas.i.mutexes.{large,extent_avail,extents_dirty,exten + ts_muzzy,extents_retained,decay_dirty,decay_muzzy,base,tcache_ + list}.{num_ops,num_spin_acq,num_wait,max_wait_time,total_wait_ + time,max_num_thds,num_owner_switch} + + Portability improvements: + * Improve reentrant allocation support, such that deadlock is less + likely if e.g. a system library call in turn allocates memory. + ([72]@davidtgoldblatt, [73]@interwq) + * Support static linking of jemalloc with glibc. ([74]@djwatson) + + Optimizations and refactors: + * Organize virtual memory as "extents" of virtual memory pages, + rather than as naturally aligned "chunks", and store all metadata + in arbitrarily distant locations. This reduces virtual memory + external fragmentation, and will interact better with huge pages + (not yet explicitly supported). ([75]@jasone) + * Fold large and huge size classes together; only small and large + size classes remain. ([76]@jasone) + * Unify the allocation paths, and merge most fast-path branching + decisions. ([77]@davidtgoldblatt, [78]@interwq) + * Embed per thread automatic tcache into thread-specific data, which + reduces conditional branches and dereferences. Also reorganize + tcache to increase fast-path data locality. ([79]@interwq) + * Rewrite atomics to closely model the C11 API, convert various + synchronization from mutex-based to atomic, and use the explicit + memory ordering control to resolve various hypothetical races + without increasing synchronization overhead. ([80]@davidtgoldblatt) + * Extensively optimize rtree via various methods: + + Add multiple layers of rtree lookup caching, since rtree + lookups are now part of fast-path deallocation. ([81]@interwq) + + Determine rtree layout at compile time. ([82]@jasone) + + Make the tree shallower for common configurations. + ([83]@jasone) + + Embed the root node in the top-level rtree data structure, + thus avoiding one level of indirection. ([84]@jasone) + + Further specialize leaf elements as compared to internal node + elements, and directly embed extent metadata needed for + fast-path deallocation. ([85]@jasone) + + Ignore leading always-zero address bits + (architecture-specific). ([86]@jasone) + * Reorganize headers (ongoing work) to make them hermetic, and + disentangle various module dependencies. ([87]@davidtgoldblatt) + * Convert various internal data structures such as size class + metadata from boot-time-initialized to compile-time-initialized. + Propagate resulting data structure simplifications, such as making + arena metadata fixed-size. ([88]@jasone) + * Simplify size class lookups when constrained to size classes that + are multiples of the page size. This speeds lookups, but the + primary benefit is complexity reduction in code that was the source + of numerous regressions. ([89]@jasone) + * Lock individual extents when possible for localized extent + operations, rather than relying on a top-level arena lock. + ([90]@davidtgoldblatt, [91]@jasone) + * Use first fit layout policy instead of best fit, in order to + improve packing. ([92]@jasone) + * If munmap(2) is not in use, use an exponential series to grow each + arena's virtual memory, so that the number of disjoint virtual + memory mappings remains low. ([93]@jasone) + * Implement per arena base allocators, so that arenas never share any + virtual memory pages. ([94]@jasone) + * Automatically generate private symbol name mangling macros. + ([95]@jasone) + + Incompatible changes: + * Replace chunk hooks with an expanded/normalized set of extent + hooks. ([96]@jasone) + * Remove ratio-based purging. ([97]@jasone) + * Remove --disable-tcache. ([98]@jasone) + * Remove --disable-tls. ([99]@jasone) + * Remove --enable-ivsalloc. ([100]@jasone) + * Remove --with-lg-size-class-group. ([101]@jasone) + * Remove --with-lg-tiny-min. ([102]@jasone) + * Remove --disable-cc-silence. ([103]@jasone) + * Remove --enable-code-coverage. ([104]@jasone) + * Remove --disable-munmap (replaced by opt.retain). ([105]@jasone) + * Remove Valgrind support. ([106]@jasone) + * Remove quarantine support. ([107]@jasone) + * Remove redzone support. ([108]@jasone) + * Remove mallctl interfaces (various authors): + + config.munmap + + config.tcache + + config.tls + + config.valgrind + + opt.lg_chunk + + opt.purge + + opt.lg_dirty_mult + + opt.decay_time + + opt.quarantine + + opt.redzone + + opt.thp + + arena..lg_dirty_mult + + arena..decay_time + + arena..chunk_hooks + + arenas.initialized + + arenas.lg_dirty_mult + + arenas.decay_time + + arenas.bin..run_size + + arenas.nlruns + + arenas.lrun..size + + arenas.nhchunks + + arenas.hchunk..size + + arenas.extend + + stats.cactive + + stats.arenas..lg_dirty_mult + + stats.arenas..decay_time + + stats.arenas..metadata.{mapped,allocated} + + stats.arenas..{npurge,nmadvise,purged} + + stats.arenas..huge.{allocated,nmalloc,ndalloc,nrequests} + + stats.arenas..bins..{nruns,reruns,curruns} + + stats.arenas..lruns..{nmalloc,ndalloc,nrequests,curruns} + + stats.arenas..hchunks..{nmalloc,ndalloc,nrequests,curhch + unks} + + Bug fixes: + * Improve interval-based profile dump triggering to dump only one + profile when a single allocation's size exceeds the interval. + ([109]@jasone) + * Use prefixed function names (as controlled by + --with-jemalloc-prefix) when pruning backtrace frames in jeprof. + ([110]@jasone) + + ------------------------------------------------------------------- Fri Mar 10 19:50:27 UTC 2017 - idonmez@suse.com diff --git a/jemalloc.spec b/jemalloc.spec index 5029e22..ae63bad 100644 --- a/jemalloc.spec +++ b/jemalloc.spec @@ -18,13 +18,15 @@ %define lname libjemalloc2 Name: jemalloc -Version: 4.5.0 +Version: 5.0.1 Release: 0 Summary: General-purpose scalable concurrent malloc implementation License: BSD-2-Clause Group: Development/Libraries/C and C++ Url: http://canonware.com/jemalloc/ Source: https://github.com/jemalloc/jemalloc/releases/download/%{version}/jemalloc-%{version}.tar.bz2 +Patch0: 0001-remove-CPU_SPINWAIT.patch +Patch1: 0001-ARM-Don-t-extend-bit-LG_VADDR-to-compute-high-addres.patch BuildRequires: docbook-xsl-stylesheets BuildRequires: libxslt BuildRequires: pkg-config @@ -66,6 +68,8 @@ malloc(3) implementation. %prep %setup -q +%patch0 -p1 +%patch1 -p1 %build export EXTRA_CFLAGS="%optflags -std=gnu99" @@ -80,8 +84,7 @@ if [ -f "%_bindir/gcc-4.8" ]; then export CC=gcc-4.8 fi %endif -%configure --enable-cc-silence \ - --enable-prof +%configure --enable-prof make %{?_smp_mflags} %install