diff options
Diffstat (limited to 'memory/jemalloc/src/include')
56 files changed, 12568 insertions, 0 deletions
diff --git a/memory/jemalloc/src/include/jemalloc/internal/arena.h b/memory/jemalloc/src/include/jemalloc/internal/arena.h new file mode 100644 index 000000000..f39ce54b5 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/arena.h @@ -0,0 +1,1516 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS) + +/* Maximum number of regions in one run. */ +#define LG_RUN_MAXREGS (LG_PAGE - LG_TINY_MIN) +#define RUN_MAXREGS (1U << LG_RUN_MAXREGS) + +/* + * Minimum redzone size. Redzones may be larger than this if necessary to + * preserve region alignment. + */ +#define REDZONE_MINSIZE 16 + +/* + * The minimum ratio of active:dirty pages per arena is computed as: + * + * (nactive >> lg_dirty_mult) >= ndirty + * + * So, supposing that lg_dirty_mult is 3, there can be no less than 8 times as + * many active pages as dirty pages. + */ +#define LG_DIRTY_MULT_DEFAULT 3 + +typedef enum { + purge_mode_ratio = 0, + purge_mode_decay = 1, + + purge_mode_limit = 2 +} purge_mode_t; +#define PURGE_DEFAULT purge_mode_ratio +/* Default decay time in seconds. */ +#define DECAY_TIME_DEFAULT 10 +/* Number of event ticks between time checks. */ +#define DECAY_NTICKS_PER_UPDATE 1000 + +typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t; +typedef struct arena_avail_links_s arena_avail_links_t; +typedef struct arena_run_s arena_run_t; +typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; +typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t; +typedef struct arena_chunk_s arena_chunk_t; +typedef struct arena_bin_info_s arena_bin_info_t; +typedef struct arena_decay_s arena_decay_t; +typedef struct arena_bin_s arena_bin_t; +typedef struct arena_s arena_t; +typedef struct arena_tdata_s arena_tdata_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#ifdef JEMALLOC_ARENA_STRUCTS_A +struct arena_run_s { + /* Index of bin this run is associated with. */ + szind_t binind; + + /* Number of free regions in run. */ + unsigned nfree; + + /* Per region allocated/deallocated bitmap. */ + bitmap_t bitmap[BITMAP_GROUPS_MAX]; +}; + +/* Each element of the chunk map corresponds to one page within the chunk. */ +struct arena_chunk_map_bits_s { + /* + * Run address (or size) and various flags are stored together. The bit + * layout looks like (assuming 32-bit system): + * + * ???????? ???????? ???nnnnn nnndumla + * + * ? : Unallocated: Run address for first/last pages, unset for internal + * pages. + * Small: Run page offset. + * Large: Run page count for first page, unset for trailing pages. + * n : binind for small size class, BININD_INVALID for large size class. + * d : dirty? + * u : unzeroed? + * m : decommitted? + * l : large? + * a : allocated? + * + * Following are example bit patterns for the three types of runs. + * + * p : run page offset + * s : run size + * n : binind for size class; large objects set these to BININD_INVALID + * x : don't care + * - : 0 + * + : 1 + * [DUMLA] : bit set + * [dumla] : bit unset + * + * Unallocated (clean): + * ssssssss ssssssss sss+++++ +++dum-a + * xxxxxxxx xxxxxxxx xxxxxxxx xxx-Uxxx + * ssssssss ssssssss sss+++++ +++dUm-a + * + * Unallocated (dirty): + * ssssssss ssssssss sss+++++ +++D-m-a + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * ssssssss ssssssss sss+++++ +++D-m-a + * + * Small: + * pppppppp pppppppp pppnnnnn nnnd---A + * pppppppp pppppppp pppnnnnn nnn----A + * pppppppp pppppppp pppnnnnn nnnd---A + * + * Large: + * ssssssss ssssssss sss+++++ +++D--LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ---+++++ +++D--LA + * + * Large (sampled, size <= LARGE_MINCLASS): + * ssssssss ssssssss sssnnnnn nnnD--LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ---+++++ +++D--LA + * + * Large (not sampled, size == LARGE_MINCLASS): + * ssssssss ssssssss sss+++++ +++D--LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ---+++++ +++D--LA + */ + size_t bits; +#define CHUNK_MAP_ALLOCATED ((size_t)0x01U) +#define CHUNK_MAP_LARGE ((size_t)0x02U) +#define CHUNK_MAP_STATE_MASK ((size_t)0x3U) + +#define CHUNK_MAP_DECOMMITTED ((size_t)0x04U) +#define CHUNK_MAP_UNZEROED ((size_t)0x08U) +#define CHUNK_MAP_DIRTY ((size_t)0x10U) +#define CHUNK_MAP_FLAGS_MASK ((size_t)0x1cU) + +#define CHUNK_MAP_BININD_SHIFT 5 +#define BININD_INVALID ((size_t)0xffU) +#define CHUNK_MAP_BININD_MASK (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) +#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK + +#define CHUNK_MAP_RUNIND_SHIFT (CHUNK_MAP_BININD_SHIFT + 8) +#define CHUNK_MAP_SIZE_SHIFT (CHUNK_MAP_RUNIND_SHIFT - LG_PAGE) +#define CHUNK_MAP_SIZE_MASK \ + (~(CHUNK_MAP_BININD_MASK | CHUNK_MAP_FLAGS_MASK | CHUNK_MAP_STATE_MASK)) +}; + +struct arena_runs_dirty_link_s { + qr(arena_runs_dirty_link_t) rd_link; +}; + +/* + * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just + * like arena_chunk_map_bits_t. Two separate arrays are stored within each + * chunk header in order to improve cache locality. + */ +struct arena_chunk_map_misc_s { + /* + * Linkage for run heaps. There are two disjoint uses: + * + * 1) arena_t's runs_avail heaps. + * 2) arena_run_t conceptually uses this linkage for in-use non-full + * runs, rather than directly embedding linkage. + */ + phn(arena_chunk_map_misc_t) ph_link; + + union { + /* Linkage for list of dirty runs. */ + arena_runs_dirty_link_t rd; + + /* Profile counters, used for large object runs. */ + union { + void *prof_tctx_pun; + prof_tctx_t *prof_tctx; + }; + + /* Small region run metadata. */ + arena_run_t run; + }; +}; +typedef ph(arena_chunk_map_misc_t) arena_run_heap_t; +#endif /* JEMALLOC_ARENA_STRUCTS_A */ + +#ifdef JEMALLOC_ARENA_STRUCTS_B +/* Arena chunk header. */ +struct arena_chunk_s { + /* + * A pointer to the arena that owns the chunk is stored within the node. + * This field as a whole is used by chunks_rtree to support both + * ivsalloc() and core-based debugging. + */ + extent_node_t node; + + /* + * Map of pages within chunk that keeps track of free/large/small. The + * first map_bias entries are omitted, since the chunk header does not + * need to be tracked in the map. This omission saves a header page + * for common chunk sizes (e.g. 4 MiB). + */ + arena_chunk_map_bits_t map_bits[1]; /* Dynamically sized. */ +}; + +/* + * Read-only information associated with each element of arena_t's bins array + * is stored separately, partly to reduce memory usage (only one copy, rather + * than one per arena), but mainly to avoid false cacheline sharing. + * + * Each run has the following layout: + * + * /--------------------\ + * | pad? | + * |--------------------| + * | redzone | + * reg0_offset | region 0 | + * | redzone | + * |--------------------| \ + * | redzone | | + * | region 1 | > reg_interval + * | redzone | / + * |--------------------| + * | ... | + * | ... | + * | ... | + * |--------------------| + * | redzone | + * | region nregs-1 | + * | redzone | + * |--------------------| + * | alignment pad? | + * \--------------------/ + * + * reg_interval has at least the same minimum alignment as reg_size; this + * preserves the alignment constraint that sa2u() depends on. Alignment pad is + * either 0 or redzone_size; it is present only if needed to align reg0_offset. + */ +struct arena_bin_info_s { + /* Size of regions in a run for this bin's size class. */ + size_t reg_size; + + /* Redzone size. */ + size_t redzone_size; + + /* Interval between regions (reg_size + (redzone_size << 1)). */ + size_t reg_interval; + + /* Total size of a run for this bin's size class. */ + size_t run_size; + + /* Total number of regions in a run for this bin's size class. */ + uint32_t nregs; + + /* + * Metadata used to manipulate bitmaps for runs associated with this + * bin. + */ + bitmap_info_t bitmap_info; + + /* Offset of first region in a run for this bin's size class. */ + uint32_t reg0_offset; +}; + +struct arena_decay_s { + /* + * Approximate time in seconds from the creation of a set of unused + * dirty pages until an equivalent set of unused dirty pages is purged + * and/or reused. + */ + ssize_t time; + /* time / SMOOTHSTEP_NSTEPS. */ + nstime_t interval; + /* + * Time at which the current decay interval logically started. We do + * not actually advance to a new epoch until sometime after it starts + * because of scheduling and computation delays, and it is even possible + * to completely skip epochs. In all cases, during epoch advancement we + * merge all relevant activity into the most recently recorded epoch. + */ + nstime_t epoch; + /* Deadline randomness generator. */ + uint64_t jitter_state; + /* + * Deadline for current epoch. This is the sum of interval and per + * epoch jitter which is a uniform random variable in [0..interval). + * Epochs always advance by precise multiples of interval, but we + * randomize the deadline to reduce the likelihood of arenas purging in + * lockstep. + */ + nstime_t deadline; + /* + * Number of dirty pages at beginning of current epoch. During epoch + * advancement we use the delta between arena->decay.ndirty and + * arena->ndirty to determine how many dirty pages, if any, were + * generated. + */ + size_t ndirty; + /* + * Trailing log of how many unused dirty pages were generated during + * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last + * element is the most recent epoch. Corresponding epoch times are + * relative to epoch. + */ + size_t backlog[SMOOTHSTEP_NSTEPS]; +}; + +struct arena_bin_s { + /* + * All operations on runcur, runs, and stats require that lock be + * locked. Run allocation/deallocation are protected by the arena lock, + * which may be acquired while holding one or more bin locks, but not + * vise versa. + */ + malloc_mutex_t lock; + + /* + * Current run being used to service allocations of this bin's size + * class. + */ + arena_run_t *runcur; + + /* + * Heap of non-full runs. This heap is used when looking for an + * existing run when runcur is no longer usable. We choose the + * non-full run that is lowest in memory; this policy tends to keep + * objects packed well, and it can also help reduce the number of + * almost-empty chunks. + */ + arena_run_heap_t runs; + + /* Bin statistics. */ + malloc_bin_stats_t stats; +}; + +struct arena_s { + /* This arena's index within the arenas array. */ + unsigned ind; + + /* + * Number of threads currently assigned to this arena, synchronized via + * atomic operations. Each thread has two distinct assignments, one for + * application-serving allocation, and the other for internal metadata + * allocation. Internal metadata must not be allocated from arenas + * created via the arenas.extend mallctl, because the arena.<i>.reset + * mallctl indiscriminately discards all allocations for the affected + * arena. + * + * 0: Application allocation. + * 1: Internal metadata allocation. + */ + unsigned nthreads[2]; + + /* + * There are three classes of arena operations from a locking + * perspective: + * 1) Thread assignment (modifies nthreads) is synchronized via atomics. + * 2) Bin-related operations are protected by bin locks. + * 3) Chunk- and run-related operations are protected by this mutex. + */ + malloc_mutex_t lock; + + arena_stats_t stats; + /* + * List of tcaches for extant threads associated with this arena. + * Stats from these are merged incrementally, and at exit if + * opt_stats_print is enabled. + */ + ql_head(tcache_t) tcache_ql; + + uint64_t prof_accumbytes; + + /* + * PRNG state for cache index randomization of large allocation base + * pointers. + */ + size_t offset_state; + + dss_prec_t dss_prec; + + + /* Extant arena chunks. */ + ql_head(extent_node_t) achunks; + + /* + * In order to avoid rapid chunk allocation/deallocation when an arena + * oscillates right on the cusp of needing a new chunk, cache the most + * recently freed chunk. The spare is left in the arena's chunk trees + * until it is deleted. + * + * There is one spare chunk per arena, rather than one spare total, in + * order to avoid interactions between multiple threads that could make + * a single spare inadequate. + */ + arena_chunk_t *spare; + + /* Minimum ratio (log base 2) of nactive:ndirty. */ + ssize_t lg_dirty_mult; + + /* True if a thread is currently executing arena_purge_to_limit(). */ + bool purging; + + /* Number of pages in active runs and huge regions. */ + size_t nactive; + + /* + * Current count of pages within unused runs that are potentially + * dirty, and for which madvise(... MADV_DONTNEED) has not been called. + * By tracking this, we can institute a limit on how much dirty unused + * memory is mapped for each arena. + */ + size_t ndirty; + + /* + * Unused dirty memory this arena manages. Dirty memory is conceptually + * tracked as an arbitrarily interleaved LRU of dirty runs and cached + * chunks, but the list linkage is actually semi-duplicated in order to + * avoid extra arena_chunk_map_misc_t space overhead. + * + * LRU-----------------------------------------------------------MRU + * + * /-- arena ---\ + * | | + * | | + * |------------| /- chunk -\ + * ...->|chunks_cache|<--------------------------->| /----\ |<--... + * |------------| | |node| | + * | | | | | | + * | | /- run -\ /- run -\ | | | | + * | | | | | | | | | | + * | | | | | | | | | | + * |------------| |-------| |-------| | |----| | + * ...->|runs_dirty |<-->|rd |<-->|rd |<---->|rd |<----... + * |------------| |-------| |-------| | |----| | + * | | | | | | | | | | + * | | | | | | | \----/ | + * | | \-------/ \-------/ | | + * | | | | + * | | | | + * \------------/ \---------/ + */ + arena_runs_dirty_link_t runs_dirty; + extent_node_t chunks_cache; + + /* Decay-based purging state. */ + arena_decay_t decay; + + /* Extant huge allocations. */ + ql_head(extent_node_t) huge; + /* Synchronizes all huge allocation/update/deallocation. */ + malloc_mutex_t huge_mtx; + + /* + * Trees of chunks that were previously allocated (trees differ only in + * node ordering). These are used when allocating chunks, in an attempt + * to re-use address space. Depending on function, different tree + * orderings are needed, which is why there are two trees with the same + * contents. + */ + extent_tree_t chunks_szad_cached; + extent_tree_t chunks_ad_cached; + extent_tree_t chunks_szad_retained; + extent_tree_t chunks_ad_retained; + + malloc_mutex_t chunks_mtx; + /* Cache of nodes that were allocated via base_alloc(). */ + ql_head(extent_node_t) node_cache; + malloc_mutex_t node_cache_mtx; + + /* User-configurable chunk hook functions. */ + chunk_hooks_t chunk_hooks; + + /* bins is used to store trees of free regions. */ + arena_bin_t bins[NBINS]; + + /* + * Size-segregated address-ordered heaps of this arena's available runs, + * used for first-best-fit run allocation. Runs are quantized, i.e. + * they reside in the last heap which corresponds to a size class less + * than or equal to the run size. + */ + arena_run_heap_t runs_avail[NPSIZES]; +}; + +/* Used in conjunction with tsd for fast arena-related context lookup. */ +struct arena_tdata_s { + ticker_t decay_ticker; +}; +#endif /* JEMALLOC_ARENA_STRUCTS_B */ + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +static const size_t large_pad = +#ifdef JEMALLOC_CACHE_OBLIVIOUS + PAGE +#else + 0 +#endif + ; + +extern purge_mode_t opt_purge; +extern const char *purge_mode_names[]; +extern ssize_t opt_lg_dirty_mult; +extern ssize_t opt_decay_time; + +extern arena_bin_info_t arena_bin_info[NBINS]; + +extern size_t map_bias; /* Number of arena chunk header pages. */ +extern size_t map_misc_offset; +extern size_t arena_maxrun; /* Max run size for arenas. */ +extern size_t large_maxclass; /* Max large size class. */ +extern unsigned nlclasses; /* Number of large size classes. */ +extern unsigned nhclasses; /* Number of huge size classes. */ + +#ifdef JEMALLOC_JET +typedef size_t (run_quantize_t)(size_t); +extern run_quantize_t *run_quantize_floor; +extern run_quantize_t *run_quantize_ceil; +#endif +void arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, + bool cache); +void arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, + bool cache); +extent_node_t *arena_node_alloc(tsdn_t *tsdn, arena_t *arena); +void arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node); +void *arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool *zero); +void arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, + size_t usize); +void arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, + void *chunk, size_t oldsize, size_t usize); +void arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, + void *chunk, size_t oldsize, size_t usize); +bool arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, + void *chunk, size_t oldsize, size_t usize, bool *zero); +ssize_t arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena); +bool arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena, + ssize_t lg_dirty_mult); +ssize_t arena_decay_time_get(tsdn_t *tsdn, arena_t *arena); +bool arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time); +void arena_purge(tsdn_t *tsdn, arena_t *arena, bool all); +void arena_maybe_purge(tsdn_t *tsdn, arena_t *arena); +void arena_reset(tsd_t *tsd, arena_t *arena); +void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, + tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); +void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, + bool zero); +#ifdef JEMALLOC_JET +typedef void (arena_redzone_corruption_t)(void *, size_t, bool, size_t, + uint8_t); +extern arena_redzone_corruption_t *arena_redzone_corruption; +typedef void (arena_dalloc_junk_small_t)(void *, arena_bin_info_t *); +extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; +#else +void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); +#endif +void arena_quarantine_junk_small(void *ptr, size_t usize); +void *arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t ind, + bool zero); +void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, + szind_t ind, bool zero); +void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero, tcache_t *tcache); +void arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size); +void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, + arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm); +void arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, + void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm); +void arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, + void *ptr, size_t pageind); +#ifdef JEMALLOC_JET +typedef void (arena_dalloc_junk_large_t)(void *, size_t); +extern arena_dalloc_junk_large_t *arena_dalloc_junk_large; +#else +void arena_dalloc_junk_large(void *ptr, size_t usize); +#endif +void arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena, + arena_chunk_t *chunk, void *ptr); +void arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, + void *ptr); +#ifdef JEMALLOC_JET +typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t); +extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; +#endif +bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, + size_t size, size_t extra, bool zero); +void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, + size_t size, size_t alignment, bool zero, tcache_t *tcache); +dss_prec_t arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena); +bool arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec); +ssize_t arena_lg_dirty_mult_default_get(void); +bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult); +ssize_t arena_decay_time_default_get(void); +bool arena_decay_time_default_set(ssize_t decay_time); +void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, + unsigned *nthreads, const char **dss, ssize_t *lg_dirty_mult, + ssize_t *decay_time, size_t *nactive, size_t *ndirty); +void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, + const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, + size_t *nactive, size_t *ndirty, arena_stats_t *astats, + malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, + malloc_huge_stats_t *hstats); +unsigned arena_nthreads_get(arena_t *arena, bool internal); +void arena_nthreads_inc(arena_t *arena, bool internal); +void arena_nthreads_dec(arena_t *arena, bool internal); +arena_t *arena_new(tsdn_t *tsdn, unsigned ind); +void arena_boot(void); +void arena_prefork0(tsdn_t *tsdn, arena_t *arena); +void arena_prefork1(tsdn_t *tsdn, arena_t *arena); +void arena_prefork2(tsdn_t *tsdn, arena_t *arena); +void arena_prefork3(tsdn_t *tsdn, arena_t *arena); +void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); +void arena_postfork_child(tsdn_t *tsdn, arena_t *arena); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +arena_chunk_map_bits_t *arena_bitselm_get_mutable(arena_chunk_t *chunk, + size_t pageind); +const arena_chunk_map_bits_t *arena_bitselm_get_const( + const arena_chunk_t *chunk, size_t pageind); +arena_chunk_map_misc_t *arena_miscelm_get_mutable(arena_chunk_t *chunk, + size_t pageind); +const arena_chunk_map_misc_t *arena_miscelm_get_const( + const arena_chunk_t *chunk, size_t pageind); +size_t arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm); +void *arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm); +arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd); +arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run); +size_t *arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind); +const size_t *arena_mapbitsp_get_const(const arena_chunk_t *chunk, + size_t pageind); +size_t arena_mapbitsp_read(const size_t *mapbitsp); +size_t arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_size_decode(size_t mapbits); +size_t arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, + size_t pageind); +size_t arena_mapbits_large_size_get(const arena_chunk_t *chunk, + size_t pageind); +size_t arena_mapbits_small_runind_get(const arena_chunk_t *chunk, + size_t pageind); +szind_t arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_decommitted_get(const arena_chunk_t *chunk, + size_t pageind); +size_t arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind); +void arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits); +size_t arena_mapbits_size_encode(size_t size); +void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, + size_t size, size_t flags); +void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, + size_t size); +void arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, + size_t flags); +void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, + size_t size, size_t flags); +void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, + szind_t binind); +void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, + size_t runind, szind_t binind, size_t flags); +void arena_metadata_allocated_add(arena_t *arena, size_t size); +void arena_metadata_allocated_sub(arena_t *arena, size_t size); +size_t arena_metadata_allocated_get(arena_t *arena); +bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); +bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes); +szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); +szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin); +size_t arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, + const void *ptr); +prof_tctx_t *arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr); +void arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, + prof_tctx_t *tctx); +void arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, + const void *old_ptr, prof_tctx_t *old_tctx); +void arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks); +void arena_decay_tick(tsdn_t *tsdn, arena_t *arena); +void *arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, + bool zero, tcache_t *tcache, bool slow_path); +arena_t *arena_aalloc(const void *ptr); +size_t arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote); +void arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path); +void arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + bool slow_path); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +# ifdef JEMALLOC_ARENA_INLINE_A +JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t * +arena_bitselm_get_mutable(arena_chunk_t *chunk, size_t pageind) +{ + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return (&chunk->map_bits[pageind-map_bias]); +} + +JEMALLOC_ALWAYS_INLINE const arena_chunk_map_bits_t * +arena_bitselm_get_const(const arena_chunk_t *chunk, size_t pageind) +{ + + return (arena_bitselm_get_mutable((arena_chunk_t *)chunk, pageind)); +} + +JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * +arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind) +{ + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return ((arena_chunk_map_misc_t *)((uintptr_t)chunk + + (uintptr_t)map_misc_offset) + pageind-map_bias); +} + +JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t * +arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind) +{ + + return (arena_miscelm_get_mutable((arena_chunk_t *)chunk, pageind)); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk + + map_misc_offset)) / sizeof(arena_chunk_map_misc_t) + map_bias; + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return (pageind); +} + +JEMALLOC_ALWAYS_INLINE void * +arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); + + return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE))); +} + +JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * +arena_rd_to_miscelm(arena_runs_dirty_link_t *rd) +{ + arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t + *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd)); + + assert(arena_miscelm_to_pageind(miscelm) >= map_bias); + assert(arena_miscelm_to_pageind(miscelm) < chunk_npages); + + return (miscelm); +} + +JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * +arena_run_to_miscelm(arena_run_t *run) +{ + arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t + *)((uintptr_t)run - offsetof(arena_chunk_map_misc_t, run)); + + assert(arena_miscelm_to_pageind(miscelm) >= map_bias); + assert(arena_miscelm_to_pageind(miscelm) < chunk_npages); + + return (miscelm); +} + +JEMALLOC_ALWAYS_INLINE size_t * +arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind) +{ + + return (&arena_bitselm_get_mutable(chunk, pageind)->bits); +} + +JEMALLOC_ALWAYS_INLINE const size_t * +arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind) +{ + + return (arena_mapbitsp_get_mutable((arena_chunk_t *)chunk, pageind)); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbitsp_read(const size_t *mapbitsp) +{ + + return (*mapbitsp); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind) +{ + + return (arena_mapbitsp_read(arena_mapbitsp_get_const(chunk, pageind))); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_size_decode(size_t mapbits) +{ + size_t size; + +#if CHUNK_MAP_SIZE_SHIFT > 0 + size = (mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT; +#elif CHUNK_MAP_SIZE_SHIFT == 0 + size = mapbits & CHUNK_MAP_SIZE_MASK; +#else + size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT; +#endif + + return (size); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); + return (arena_mapbits_size_decode(mapbits)); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_large_size_get(const arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == + (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); + return (arena_mapbits_size_decode(mapbits)); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_small_runind_get(const arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == + CHUNK_MAP_ALLOCATED); + return (mapbits >> CHUNK_MAP_RUNIND_SHIFT); +} + +JEMALLOC_ALWAYS_INLINE szind_t +arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + szind_t binind; + + mapbits = arena_mapbits_get(chunk, pageind); + binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; + assert(binind < NBINS || binind == BININD_INVALID); + return (binind); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); + return (mapbits & CHUNK_MAP_DIRTY); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); + return (mapbits & CHUNK_MAP_UNZEROED); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_decommitted_get(const arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); + return (mapbits & CHUNK_MAP_DECOMMITTED); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_LARGE); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_ALLOCATED); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits) +{ + + *mapbitsp = mapbits; +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_size_encode(size_t size) +{ + size_t mapbits; + +#if CHUNK_MAP_SIZE_SHIFT > 0 + mapbits = size << CHUNK_MAP_SIZE_SHIFT; +#elif CHUNK_MAP_SIZE_SHIFT == 0 + mapbits = size; +#else + mapbits = size >> -CHUNK_MAP_SIZE_SHIFT; +#endif + + assert((mapbits & ~CHUNK_MAP_SIZE_MASK) == 0); + return (mapbits); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, + size_t flags) +{ + size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); + + assert((size & PAGE_MASK) == 0); + assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); + assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); + arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | + CHUNK_MAP_BININD_INVALID | flags); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, + size_t size) +{ + size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); + size_t mapbits = arena_mapbitsp_read(mapbitsp); + + assert((size & PAGE_MASK) == 0); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); + arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | + (mapbits & ~CHUNK_MAP_SIZE_MASK)); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags) +{ + size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); + + assert((flags & CHUNK_MAP_UNZEROED) == flags); + arena_mapbitsp_write(mapbitsp, flags); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, + size_t flags) +{ + size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); + + assert((size & PAGE_MASK) == 0); + assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); + assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); + arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | + CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, + szind_t binind) +{ + size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); + size_t mapbits = arena_mapbitsp_read(mapbitsp); + + assert(binind <= BININD_INVALID); + assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS + + large_pad); + arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) | + (binind << CHUNK_MAP_BININD_SHIFT)); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, + szind_t binind, size_t flags) +{ + size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind); + + assert(binind < BININD_INVALID); + assert(pageind - runind >= map_bias); + assert((flags & CHUNK_MAP_UNZEROED) == flags); + arena_mapbitsp_write(mapbitsp, (runind << CHUNK_MAP_RUNIND_SHIFT) | + (binind << CHUNK_MAP_BININD_SHIFT) | flags | CHUNK_MAP_ALLOCATED); +} + +JEMALLOC_INLINE void +arena_metadata_allocated_add(arena_t *arena, size_t size) +{ + + atomic_add_z(&arena->stats.metadata_allocated, size); +} + +JEMALLOC_INLINE void +arena_metadata_allocated_sub(arena_t *arena, size_t size) +{ + + atomic_sub_z(&arena->stats.metadata_allocated, size); +} + +JEMALLOC_INLINE size_t +arena_metadata_allocated_get(arena_t *arena) +{ + + return (atomic_read_z(&arena->stats.metadata_allocated)); +} + +JEMALLOC_INLINE bool +arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + assert(prof_interval != 0); + + arena->prof_accumbytes += accumbytes; + if (arena->prof_accumbytes >= prof_interval) { + arena->prof_accumbytes -= prof_interval; + return (true); + } + return (false); +} + +JEMALLOC_INLINE bool +arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + + if (likely(prof_interval == 0)) + return (false); + return (arena_prof_accum_impl(arena, accumbytes)); +} + +JEMALLOC_INLINE bool +arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + + if (likely(prof_interval == 0)) + return (false); + + { + bool ret; + + malloc_mutex_lock(tsdn, &arena->lock); + ret = arena_prof_accum_impl(arena, accumbytes); + malloc_mutex_unlock(tsdn, &arena->lock); + return (ret); + } +} + +JEMALLOC_ALWAYS_INLINE szind_t +arena_ptr_small_binind_get(const void *ptr, size_t mapbits) +{ + szind_t binind; + + binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; + + if (config_debug) { + arena_chunk_t *chunk; + arena_t *arena; + size_t pageind; + size_t actual_mapbits; + size_t rpages_ind; + const arena_run_t *run; + arena_bin_t *bin; + szind_t run_binind, actual_binind; + arena_bin_info_t *bin_info; + const arena_chunk_map_misc_t *miscelm; + const void *rpages; + + assert(binind != BININD_INVALID); + assert(binind < NBINS); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = extent_node_arena_get(&chunk->node); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + actual_mapbits = arena_mapbits_get(chunk, pageind); + assert(mapbits == actual_mapbits); + assert(arena_mapbits_large_get(chunk, pageind) == 0); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, + pageind); + miscelm = arena_miscelm_get_const(chunk, rpages_ind); + run = &miscelm->run; + run_binind = run->binind; + bin = &arena->bins[run_binind]; + actual_binind = (szind_t)(bin - arena->bins); + assert(run_binind == actual_binind); + bin_info = &arena_bin_info[actual_binind]; + rpages = arena_miscelm_to_rpages(miscelm); + assert(((uintptr_t)ptr - ((uintptr_t)rpages + + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval + == 0); + } + + return (binind); +} +# endif /* JEMALLOC_ARENA_INLINE_A */ + +# ifdef JEMALLOC_ARENA_INLINE_B +JEMALLOC_INLINE szind_t +arena_bin_index(arena_t *arena, arena_bin_t *bin) +{ + szind_t binind = (szind_t)(bin - arena->bins); + assert(binind < NBINS); + return (binind); +} + +JEMALLOC_INLINE size_t +arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) +{ + size_t diff, interval, shift, regind; + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); + void *rpages = arena_miscelm_to_rpages(miscelm); + + /* + * Freeing a pointer lower than region zero can cause assertion + * failure. + */ + assert((uintptr_t)ptr >= (uintptr_t)rpages + + (uintptr_t)bin_info->reg0_offset); + + /* + * Avoid doing division with a variable divisor if possible. Using + * actual division here can reduce allocator throughput by over 20%! + */ + diff = (size_t)((uintptr_t)ptr - (uintptr_t)rpages - + bin_info->reg0_offset); + + /* Rescale (factor powers of 2 out of the numerator and denominator). */ + interval = bin_info->reg_interval; + shift = ffs_zu(interval) - 1; + diff >>= shift; + interval >>= shift; + + if (interval == 1) { + /* The divisor was a power of 2. */ + regind = diff; + } else { + /* + * To divide by a number D that is not a power of two we + * multiply by (2^21 / D) and then right shift by 21 positions. + * + * X / D + * + * becomes + * + * (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT + * + * We can omit the first three elements, because we never + * divide by 0, and 1 and 2 are both powers of two, which are + * handled above. + */ +#define SIZE_INV_SHIFT ((sizeof(size_t) << 3) - LG_RUN_MAXREGS) +#define SIZE_INV(s) (((ZU(1) << SIZE_INV_SHIFT) / (s)) + 1) + static const size_t interval_invs[] = { + SIZE_INV(3), + SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), + SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), + SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), + SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), + SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), + SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), + SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) + }; + + if (likely(interval <= ((sizeof(interval_invs) / sizeof(size_t)) + + 2))) { + regind = (diff * interval_invs[interval - 3]) >> + SIZE_INV_SHIFT; + } else + regind = diff / interval; +#undef SIZE_INV +#undef SIZE_INV_SHIFT + } + assert(diff == regind * interval); + assert(regind < bin_info->nregs); + + return (regind); +} + +JEMALLOC_INLINE prof_tctx_t * +arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr) +{ + prof_tctx_t *ret; + arena_chunk_t *chunk; + + cassert(config_prof); + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) + ret = (prof_tctx_t *)(uintptr_t)1U; + else { + arena_chunk_map_misc_t *elm = + arena_miscelm_get_mutable(chunk, pageind); + ret = atomic_read_p(&elm->prof_tctx_pun); + } + } else + ret = huge_prof_tctx_get(tsdn, ptr); + + return (ret); +} + +JEMALLOC_INLINE void +arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, + prof_tctx_t *tctx) +{ + arena_chunk_t *chunk; + + cassert(config_prof); + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + + if (unlikely(usize > SMALL_MAXCLASS || (uintptr_t)tctx > + (uintptr_t)1U)) { + arena_chunk_map_misc_t *elm; + + assert(arena_mapbits_large_get(chunk, pageind) != 0); + + elm = arena_miscelm_get_mutable(chunk, pageind); + atomic_write_p(&elm->prof_tctx_pun, tctx); + } else { + /* + * tctx must always be initialized for large runs. + * Assert that the surrounding conditional logic is + * equivalent to checking whether ptr refers to a large + * run. + */ + assert(arena_mapbits_large_get(chunk, pageind) == 0); + } + } else + huge_prof_tctx_set(tsdn, ptr, tctx); +} + +JEMALLOC_INLINE void +arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, + const void *old_ptr, prof_tctx_t *old_tctx) +{ + + cassert(config_prof); + assert(ptr != NULL); + + if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr && + (uintptr_t)old_tctx > (uintptr_t)1U))) { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + size_t pageind; + arena_chunk_map_misc_t *elm; + + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> + LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != + 0); + assert(arena_mapbits_large_get(chunk, pageind) != 0); + + elm = arena_miscelm_get_mutable(chunk, pageind); + atomic_write_p(&elm->prof_tctx_pun, + (prof_tctx_t *)(uintptr_t)1U); + } else + huge_prof_tctx_reset(tsdn, ptr); + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) +{ + tsd_t *tsd; + ticker_t *decay_ticker; + + if (unlikely(tsdn_null(tsdn))) + return; + tsd = tsdn_tsd(tsdn); + decay_ticker = decay_ticker_get(tsd, arena->ind); + if (unlikely(decay_ticker == NULL)) + return; + if (unlikely(ticker_ticks(decay_ticker, nticks))) + arena_purge(tsdn, arena, false); +} + +JEMALLOC_ALWAYS_INLINE void +arena_decay_tick(tsdn_t *tsdn, arena_t *arena) +{ + + arena_decay_ticks(tsdn, arena, 1); +} + +JEMALLOC_ALWAYS_INLINE void * +arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, + tcache_t *tcache, bool slow_path) +{ + + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(size != 0); + + if (likely(tcache != NULL)) { + if (likely(size <= SMALL_MAXCLASS)) { + return (tcache_alloc_small(tsdn_tsd(tsdn), arena, + tcache, size, ind, zero, slow_path)); + } + if (likely(size <= tcache_maxclass)) { + return (tcache_alloc_large(tsdn_tsd(tsdn), arena, + tcache, size, ind, zero, slow_path)); + } + /* (size > tcache_maxclass) case falls through. */ + assert(size > tcache_maxclass); + } + + return (arena_malloc_hard(tsdn, arena, size, ind, zero)); +} + +JEMALLOC_ALWAYS_INLINE arena_t * +arena_aalloc(const void *ptr) +{ + arena_chunk_t *chunk; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) + return (extent_node_arena_get(&chunk->node)); + else + return (huge_aalloc(ptr)); +} + +/* Return the size of the allocation pointed to by ptr. */ +JEMALLOC_ALWAYS_INLINE size_t +arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote) +{ + size_t ret; + arena_chunk_t *chunk; + size_t pageind; + szind_t binind; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + binind = arena_mapbits_binind_get(chunk, pageind); + if (unlikely(binind == BININD_INVALID || (config_prof && !demote + && arena_mapbits_large_get(chunk, pageind) != 0))) { + /* + * Large allocation. In the common case (demote), and + * as this is an inline function, most callers will only + * end up looking at binind to determine that ptr is a + * small allocation. + */ + assert(config_cache_oblivious || ((uintptr_t)ptr & + PAGE_MASK) == 0); + ret = arena_mapbits_large_size_get(chunk, pageind) - + large_pad; + assert(ret != 0); + assert(pageind + ((ret+large_pad)>>LG_PAGE) <= + chunk_npages); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, + pageind+((ret+large_pad)>>LG_PAGE)-1)); + } else { + /* + * Small allocation (possibly promoted to a large + * object). + */ + assert(arena_mapbits_large_get(chunk, pageind) != 0 || + arena_ptr_small_binind_get(ptr, + arena_mapbits_get(chunk, pageind)) == binind); + ret = index2size(binind); + } + } else + ret = huge_salloc(tsdn, ptr); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void +arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) +{ + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = arena_mapbits_get(chunk, pageind); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { + /* Small allocation. */ + if (likely(tcache != NULL)) { + szind_t binind = arena_ptr_small_binind_get(ptr, + mapbits); + tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, + binind, slow_path); + } else { + arena_dalloc_small(tsdn, + extent_node_arena_get(&chunk->node), chunk, + ptr, pageind); + } + } else { + size_t size = arena_mapbits_large_size_get(chunk, + pageind); + + assert(config_cache_oblivious || ((uintptr_t)ptr & + PAGE_MASK) == 0); + + if (likely(tcache != NULL) && size - large_pad <= + tcache_maxclass) { + tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, + size - large_pad, slow_path); + } else { + arena_dalloc_large(tsdn, + extent_node_arena_get(&chunk->node), chunk, + ptr); + } + } + } else + huge_dalloc(tsdn, ptr); +} + +JEMALLOC_ALWAYS_INLINE void +arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + bool slow_path) +{ + arena_chunk_t *chunk; + + assert(!tsdn_null(tsdn) || tcache == NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + if (config_prof && opt_prof) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> + LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != + 0); + if (arena_mapbits_large_get(chunk, pageind) != 0) { + /* + * Make sure to use promoted size, not request + * size. + */ + size = arena_mapbits_large_size_get(chunk, + pageind) - large_pad; + } + } + assert(s2u(size) == s2u(arena_salloc(tsdn, ptr, false))); + + if (likely(size <= SMALL_MAXCLASS)) { + /* Small allocation. */ + if (likely(tcache != NULL)) { + szind_t binind = size2index(size); + tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, + binind, slow_path); + } else { + size_t pageind = ((uintptr_t)ptr - + (uintptr_t)chunk) >> LG_PAGE; + arena_dalloc_small(tsdn, + extent_node_arena_get(&chunk->node), chunk, + ptr, pageind); + } + } else { + assert(config_cache_oblivious || ((uintptr_t)ptr & + PAGE_MASK) == 0); + + if (likely(tcache != NULL) && size <= tcache_maxclass) { + tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, + size, slow_path); + } else { + arena_dalloc_large(tsdn, + extent_node_arena_get(&chunk->node), chunk, + ptr); + } + } + } else + huge_dalloc(tsdn, ptr); +} +# endif /* JEMALLOC_ARENA_INLINE_B */ +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/assert.h b/memory/jemalloc/src/include/jemalloc/internal/assert.h new file mode 100644 index 000000000..6f8f7eb93 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/assert.h @@ -0,0 +1,45 @@ +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifndef assert +#define assert(e) do { \ + if (unlikely(config_debug && !(e))) { \ + malloc_printf( \ + "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef not_reached +#define not_reached() do { \ + if (config_debug) { \ + malloc_printf( \ + "<jemalloc>: %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ + unreachable(); \ +} while (0) +#endif + +#ifndef not_implemented +#define not_implemented() do { \ + if (config_debug) { \ + malloc_printf("<jemalloc>: %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef assert_not_implemented +#define assert_not_implemented(e) do { \ + if (unlikely(config_debug && !(e))) \ + not_implemented(); \ +} while (0) +#endif + + diff --git a/memory/jemalloc/src/include/jemalloc/internal/atomic.h b/memory/jemalloc/src/include/jemalloc/internal/atomic.h new file mode 100644 index 000000000..3f15ea149 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/atomic.h @@ -0,0 +1,651 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#define atomic_read_uint64(p) atomic_add_uint64(p, 0) +#define atomic_read_uint32(p) atomic_add_uint32(p, 0) +#define atomic_read_p(p) atomic_add_p(p, NULL) +#define atomic_read_z(p) atomic_add_z(p, 0) +#define atomic_read_u(p) atomic_add_u(p, 0) + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +/* + * All arithmetic functions return the arithmetic result of the atomic + * operation. Some atomic operation APIs return the value prior to mutation, in + * which case the following functions must redundantly compute the result so + * that it can be returned. These functions are normally inlined, so the extra + * operations can be optimized away if the return values aren't used by the + * callers. + * + * <t> atomic_read_<t>(<t> *p) { return (*p); } + * <t> atomic_add_<t>(<t> *p, <t> x) { return (*p += x); } + * <t> atomic_sub_<t>(<t> *p, <t> x) { return (*p -= x); } + * bool atomic_cas_<t>(<t> *p, <t> c, <t> s) + * { + * if (*p != c) + * return (true); + * *p = s; + * return (false); + * } + * void atomic_write_<t>(<t> *p, <t> x) { *p = x; } + */ + +#ifndef JEMALLOC_ENABLE_INLINE +uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); +uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); +bool atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s); +void atomic_write_uint64(uint64_t *p, uint64_t x); +uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); +uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); +bool atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s); +void atomic_write_uint32(uint32_t *p, uint32_t x); +void *atomic_add_p(void **p, void *x); +void *atomic_sub_p(void **p, void *x); +bool atomic_cas_p(void **p, void *c, void *s); +void atomic_write_p(void **p, const void *x); +size_t atomic_add_z(size_t *p, size_t x); +size_t atomic_sub_z(size_t *p, size_t x); +bool atomic_cas_z(size_t *p, size_t c, size_t s); +void atomic_write_z(size_t *p, size_t x); +unsigned atomic_add_u(unsigned *p, unsigned x); +unsigned atomic_sub_u(unsigned *p, unsigned x); +bool atomic_cas_u(unsigned *p, unsigned c, unsigned s); +void atomic_write_u(unsigned *p, unsigned x); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) +/******************************************************************************/ +/* 64-bit operations. */ +#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +# if (defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + uint64_t t = x; + + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + uint64_t t; + + x = (uint64_t)(-(int64_t)x); + t = x; + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + uint8_t success; + + asm volatile ( + "lock; cmpxchgq %4, %0;" + "sete %1;" + : "=m" (*p), "=a" (success) /* Outputs. */ + : "m" (*p), "a" (c), "r" (s) /* Inputs. */ + : "memory" /* Clobbers. */ + ); + + return (!(bool)success); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + asm volatile ( + "xchgq %1, %0;" /* Lock is implied by xchgq. */ + : "=m" (*p), "+r" (x) /* Outputs. */ + : "m" (*p) /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +# elif (defined(JEMALLOC_C11ATOMICS)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (atomic_fetch_add(a, x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (atomic_fetch_sub(a, x) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (!atomic_compare_exchange_strong(a, &c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + atomic_store(a, x); +} +# elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + /* + * atomic_fetchadd_64() doesn't exist, but we only ever use this + * function on LP64 systems, so atomic_fetchadd_long() will do. + */ + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s)); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + atomic_store_rel_long(p, x); +} +# elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + + return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p)); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + uint64_t o; + + /*The documented OSAtomic*() API does not expose an atomic exchange. */ + do { + o = atomic_read_uint64(p); + } while (atomic_cas_uint64(p, o, x)); +} +# elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + uint64_t o; + + o = InterlockedCompareExchange64(p, s, c); + return (o != c); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + InterlockedExchange64(p, x); +} +# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \ + defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + + return (!__sync_bool_compare_and_swap(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + __sync_lock_test_and_set(p, x); +} +# else +# error "Missing implementation for 64-bit atomic operations" +# endif +#endif + +/******************************************************************************/ +/* 32-bit operations. */ +#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + uint32_t t = x; + + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + uint32_t t; + + x = (uint32_t)(-(int32_t)x); + t = x; + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + uint8_t success; + + asm volatile ( + "lock; cmpxchgl %4, %0;" + "sete %1;" + : "=m" (*p), "=a" (success) /* Outputs. */ + : "m" (*p), "a" (c), "r" (s) /* Inputs. */ + : "memory" + ); + + return (!(bool)success); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + asm volatile ( + "xchgl %1, %0;" /* Lock is implied by xchgl. */ + : "=m" (*p), "+r" (x) /* Outputs. */ + : "m" (*p) /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +# elif (defined(JEMALLOC_C11ATOMICS)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (atomic_fetch_add(a, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (atomic_fetch_sub(a, x) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (!atomic_compare_exchange_strong(a, &c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + atomic_store(a, x); +} +#elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!atomic_cmpset_32(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + atomic_store_rel_32(p, x); +} +#elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p)); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + uint32_t o; + + /*The documented OSAtomic*() API does not expose an atomic exchange. */ + do { + o = atomic_read_uint32(p); + } while (atomic_cas_uint32(p, o, x)); +} +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, -((int32_t)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + uint32_t o; + + o = InterlockedCompareExchange(p, s, c); + return (o != c); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + InterlockedExchange(p, x); +} +#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \ + defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!__sync_bool_compare_and_swap(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + __sync_lock_test_and_set(p, x); +} +#else +# error "Missing implementation for 32-bit atomic operations" +#endif + +/******************************************************************************/ +/* Pointer operations. */ +JEMALLOC_INLINE void * +atomic_add_p(void **p, void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((void *)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_PTR == 2) + return ((void *)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE void * +atomic_sub_p(void **p, void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((void *)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_PTR == 2) + return ((void *)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} + +JEMALLOC_INLINE bool +atomic_cas_p(void **p, void *c, void *s) +{ + +#if (LG_SIZEOF_PTR == 3) + return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_PTR == 2) + return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_p(void **p, const void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + atomic_write_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_PTR == 2) + atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + +/******************************************************************************/ +/* size_t operations. */ +JEMALLOC_INLINE size_t +atomic_add_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE size_t +atomic_sub_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} + +JEMALLOC_INLINE bool +atomic_cas_z(size_t *p, size_t c, size_t s) +{ + +#if (LG_SIZEOF_PTR == 3) + return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_PTR == 2) + return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + atomic_write_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_PTR == 2) + atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + +/******************************************************************************/ +/* unsigned operations. */ +JEMALLOC_INLINE unsigned +atomic_add_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE unsigned +atomic_sub_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} + +JEMALLOC_INLINE bool +atomic_cas_u(unsigned *p, unsigned c, unsigned s) +{ + +#if (LG_SIZEOF_INT == 3) + return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_INT == 2) + return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + atomic_write_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_INT == 2) + atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + +/******************************************************************************/ +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/base.h b/memory/jemalloc/src/include/jemalloc/internal/base.h new file mode 100644 index 000000000..d6b81e162 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/base.h @@ -0,0 +1,25 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *base_alloc(tsdn_t *tsdn, size_t size); +void base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident, + size_t *mapped); +bool base_boot(void); +void base_prefork(tsdn_t *tsdn); +void base_postfork_parent(tsdn_t *tsdn); +void base_postfork_child(tsdn_t *tsdn); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/bitmap.h b/memory/jemalloc/src/include/jemalloc/internal/bitmap.h new file mode 100644 index 000000000..36f38b59c --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/bitmap.h @@ -0,0 +1,274 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ +#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS +#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS) + +typedef struct bitmap_level_s bitmap_level_t; +typedef struct bitmap_info_s bitmap_info_t; +typedef unsigned long bitmap_t; +#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG + +/* Number of bits per group. */ +#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) +#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) +#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) + +/* + * Do some analysis on how big the bitmap is before we use a tree. For a brute + * force linear search, if we would have to call ffs_lu() more than 2^3 times, + * use a tree instead. + */ +#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3 +# define USE_TREE +#endif + +/* Number of groups required to store a given number of bits. */ +#define BITMAP_BITS2GROUPS(nbits) \ + ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) + +/* + * Number of groups required at a particular level for a given number of bits. + */ +#define BITMAP_GROUPS_L0(nbits) \ + BITMAP_BITS2GROUPS(nbits) +#define BITMAP_GROUPS_L1(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits)) +#define BITMAP_GROUPS_L2(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))) +#define BITMAP_GROUPS_L3(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ + BITMAP_BITS2GROUPS((nbits))))) + +/* + * Assuming the number of levels, number of groups required for a given number + * of bits. + */ +#define BITMAP_GROUPS_1_LEVEL(nbits) \ + BITMAP_GROUPS_L0(nbits) +#define BITMAP_GROUPS_2_LEVEL(nbits) \ + (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits)) +#define BITMAP_GROUPS_3_LEVEL(nbits) \ + (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits)) +#define BITMAP_GROUPS_4_LEVEL(nbits) \ + (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits)) + +/* + * Maximum number of groups required to support LG_BITMAP_MAXBITS. + */ +#ifdef USE_TREE + +#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS) +#else +# error "Unsupported bitmap size" +#endif + +/* Maximum number of levels possible. */ +#define BITMAP_MAX_LEVELS \ + (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) + +#else /* USE_TREE */ + +#define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS) + +#endif /* USE_TREE */ + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct bitmap_level_s { + /* Offset of this level's groups within the array of groups. */ + size_t group_offset; +}; + +struct bitmap_info_s { + /* Logical number of bits in bitmap (stored at bottom level). */ + size_t nbits; + +#ifdef USE_TREE + /* Number of levels necessary for nbits. */ + unsigned nlevels; + + /* + * Only the first (nlevels+1) elements are used, and levels are ordered + * bottom to top (e.g. the bottom level is stored in levels[0]). + */ + bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; +#else /* USE_TREE */ + /* Number of groups necessary for nbits. */ + size_t ngroups; +#endif /* USE_TREE */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); +void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); +size_t bitmap_size(const bitmap_info_t *binfo); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo); +bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo); +void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) +JEMALLOC_INLINE bool +bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ +#ifdef USE_TREE + size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1; + bitmap_t rg = bitmap[rgoff]; + /* The bitmap is full iff the root group is 0. */ + return (rg == 0); +#else + size_t i; + + for (i = 0; i < binfo->ngroups; i++) { + if (bitmap[i] != 0) + return (false); + } + return (true); +#endif +} + +JEMALLOC_INLINE bool +bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t g; + + assert(bit < binfo->nbits); + goff = bit >> LG_BITMAP_GROUP_NBITS; + g = bitmap[goff]; + return (!(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))); +} + +JEMALLOC_INLINE void +bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + + assert(bit < binfo->nbits); + assert(!bitmap_get(bitmap, binfo, bit)); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(bitmap_get(bitmap, binfo, bit)); +#ifdef USE_TREE + /* Propagate group state transitions up the tree. */ + if (g == 0) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (g != 0) + break; + } + } +#endif +} + +/* sfu: set first unset. */ +JEMALLOC_INLINE size_t +bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t bit; + bitmap_t g; + unsigned i; + + assert(!bitmap_full(bitmap, binfo)); + +#ifdef USE_TREE + i = binfo->nlevels - 1; + g = bitmap[binfo->levels[i].group_offset]; + bit = ffs_lu(g) - 1; + while (i > 0) { + i--; + g = bitmap[binfo->levels[i].group_offset + bit]; + bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1); + } +#else + i = 0; + g = bitmap[0]; + while ((bit = ffs_lu(g)) == 0) { + i++; + g = bitmap[i]; + } + bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1); +#endif + bitmap_set(bitmap, binfo, bit); + return (bit); +} + +JEMALLOC_INLINE void +bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + UNUSED bool propagate; + + assert(bit < binfo->nbits); + assert(bitmap_get(bitmap, binfo, bit)); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + propagate = (g == 0); + assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(!bitmap_get(bitmap, binfo, bit)); +#ifdef USE_TREE + /* Propagate group state transitions up the tree. */ + if (propagate) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + propagate = (g == 0); + assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) + == 0); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (!propagate) + break; + } + } +#endif /* USE_TREE */ +} + +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/chunk.h b/memory/jemalloc/src/include/jemalloc/internal/chunk.h new file mode 100644 index 000000000..38c9a012d --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/chunk.h @@ -0,0 +1,96 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * Size and alignment of memory chunks that are allocated by the OS's virtual + * memory system. + */ +#define LG_CHUNK_DEFAULT 21 + +/* Return the chunk address for allocation address a. */ +#define CHUNK_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~chunksize_mask)) + +/* Return the chunk offset of address a. */ +#define CHUNK_ADDR2OFFSET(a) \ + ((size_t)((uintptr_t)(a) & chunksize_mask)) + +/* Return the smallest chunk multiple that is >= s. */ +#define CHUNK_CEILING(s) \ + (((s) + chunksize_mask) & ~chunksize_mask) + +#define CHUNK_HOOKS_INITIALIZER { \ + NULL, \ + NULL, \ + NULL, \ + NULL, \ + NULL, \ + NULL, \ + NULL \ +} + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern size_t opt_lg_chunk; +extern const char *opt_dss; + +extern rtree_t chunks_rtree; + +extern size_t chunksize; +extern size_t chunksize_mask; /* (chunksize - 1). */ +extern size_t chunk_npages; + +extern const chunk_hooks_t chunk_hooks_default; + +chunk_hooks_t chunk_hooks_get(tsdn_t *tsdn, arena_t *arena); +chunk_hooks_t chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, + const chunk_hooks_t *chunk_hooks); + +bool chunk_register(tsdn_t *tsdn, const void *chunk, + const extent_node_t *node); +void chunk_deregister(const void *chunk, const extent_node_t *node); +void *chunk_alloc_base(size_t size); +void *chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, + bool *zero, bool *commit, bool dalloc_node); +void *chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, + bool *zero, bool *commit); +void chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed); +void chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool zeroed, + bool committed); +bool chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, + chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset, + size_t length); +bool chunk_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +extent_node_t *chunk_lookup(const void *chunk, bool dependent); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_)) +JEMALLOC_INLINE extent_node_t * +chunk_lookup(const void *ptr, bool dependent) +{ + + return (rtree_get(&chunks_rtree, (uintptr_t)ptr, dependent)); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + +#include "jemalloc/internal/chunk_dss.h" +#include "jemalloc/internal/chunk_mmap.h" diff --git a/memory/jemalloc/src/include/jemalloc/internal/chunk_dss.h b/memory/jemalloc/src/include/jemalloc/internal/chunk_dss.h new file mode 100644 index 000000000..da8511ba0 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/chunk_dss.h @@ -0,0 +1,37 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef enum { + dss_prec_disabled = 0, + dss_prec_primary = 1, + dss_prec_secondary = 2, + + dss_prec_limit = 3 +} dss_prec_t; +#define DSS_PREC_DEFAULT dss_prec_secondary +#define DSS_DEFAULT "secondary" + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +extern const char *dss_prec_names[]; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +dss_prec_t chunk_dss_prec_get(void); +bool chunk_dss_prec_set(dss_prec_t dss_prec); +void *chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, + size_t size, size_t alignment, bool *zero, bool *commit); +bool chunk_in_dss(void *chunk); +bool chunk_dss_mergeable(void *chunk_a, void *chunk_b); +void chunk_dss_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/chunk_mmap.h b/memory/jemalloc/src/include/jemalloc/internal/chunk_mmap.h new file mode 100644 index 000000000..6f2d0ac2e --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/chunk_mmap.h @@ -0,0 +1,21 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, + bool *zero, bool *commit); +bool chunk_dalloc_mmap(void *chunk, size_t size); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/ckh.h b/memory/jemalloc/src/include/jemalloc/internal/ckh.h new file mode 100644 index 000000000..f75ad90b7 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/ckh.h @@ -0,0 +1,86 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ckh_s ckh_t; +typedef struct ckhc_s ckhc_t; + +/* Typedefs to allow easy function pointer passing. */ +typedef void ckh_hash_t (const void *, size_t[2]); +typedef bool ckh_keycomp_t (const void *, const void *); + +/* Maintain counters used to get an idea of performance. */ +/* #define CKH_COUNT */ +/* Print counter values in ckh_delete() (requires CKH_COUNT). */ +/* #define CKH_VERBOSE */ + +/* + * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit + * one bucket per L1 cache line. + */ +#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Hash table cell. */ +struct ckhc_s { + const void *key; + const void *data; +}; + +struct ckh_s { +#ifdef CKH_COUNT + /* Counters used to get an idea of performance. */ + uint64_t ngrows; + uint64_t nshrinks; + uint64_t nshrinkfails; + uint64_t ninserts; + uint64_t nrelocs; +#endif + + /* Used for pseudo-random number generation. */ + uint64_t prng_state; + + /* Total number of items. */ + size_t count; + + /* + * Minimum and current number of hash table buckets. There are + * 2^LG_CKH_BUCKET_CELLS cells per bucket. + */ + unsigned lg_minbuckets; + unsigned lg_curbuckets; + + /* Hash and comparison functions. */ + ckh_hash_t *hash; + ckh_keycomp_t *keycomp; + + /* Hash table with 2^lg_curbuckets buckets. */ + ckhc_t *tab; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, + ckh_keycomp_t *keycomp); +void ckh_delete(tsd_t *tsd, ckh_t *ckh); +size_t ckh_count(ckh_t *ckh); +bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); +bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); +bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, + void **data); +bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data); +void ckh_string_hash(const void *key, size_t r_hash[2]); +bool ckh_string_keycomp(const void *k1, const void *k2); +void ckh_pointer_hash(const void *key, size_t r_hash[2]); +bool ckh_pointer_keycomp(const void *k1, const void *k2); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/ctl.h b/memory/jemalloc/src/include/jemalloc/internal/ctl.h new file mode 100644 index 000000000..af0f6d7c5 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/ctl.h @@ -0,0 +1,118 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ctl_node_s ctl_node_t; +typedef struct ctl_named_node_s ctl_named_node_t; +typedef struct ctl_indexed_node_s ctl_indexed_node_t; +typedef struct ctl_arena_stats_s ctl_arena_stats_t; +typedef struct ctl_stats_s ctl_stats_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct ctl_node_s { + bool named; +}; + +struct ctl_named_node_s { + struct ctl_node_s node; + const char *name; + /* If (nchildren == 0), this is a terminal node. */ + unsigned nchildren; + const ctl_node_t *children; + int (*ctl)(tsd_t *, const size_t *, size_t, void *, + size_t *, void *, size_t); +}; + +struct ctl_indexed_node_s { + struct ctl_node_s node; + const ctl_named_node_t *(*index)(tsdn_t *, const size_t *, size_t, + size_t); +}; + +struct ctl_arena_stats_s { + bool initialized; + unsigned nthreads; + const char *dss; + ssize_t lg_dirty_mult; + ssize_t decay_time; + size_t pactive; + size_t pdirty; + + /* The remainder are only populated if config_stats is true. */ + + arena_stats_t astats; + + /* Aggregate stats for small size classes, based on bin stats. */ + size_t allocated_small; + uint64_t nmalloc_small; + uint64_t ndalloc_small; + uint64_t nrequests_small; + + malloc_bin_stats_t bstats[NBINS]; + malloc_large_stats_t *lstats; /* nlclasses elements. */ + malloc_huge_stats_t *hstats; /* nhclasses elements. */ +}; + +struct ctl_stats_s { + size_t allocated; + size_t active; + size_t metadata; + size_t resident; + size_t mapped; + size_t retained; + unsigned narenas; + ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp, + void *newp, size_t newlen); +int ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, + size_t *miblenp); + +int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen); +bool ctl_boot(void); +void ctl_prefork(tsdn_t *tsdn); +void ctl_postfork_parent(tsdn_t *tsdn); +void ctl_postfork_child(tsdn_t *tsdn); + +#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ + if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ + != 0) { \ + malloc_printf( \ + "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n", \ + name); \ + abort(); \ + } \ +} while (0) + +#define xmallctlnametomib(name, mibp, miblenp) do { \ + if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \ + malloc_printf("<jemalloc>: Failure in " \ + "xmallctlnametomib(\"%s\", ...)\n", name); \ + abort(); \ + } \ +} while (0) + +#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ + if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, \ + newlen) != 0) { \ + malloc_write( \ + "<jemalloc>: Failure in xmallctlbymib()\n"); \ + abort(); \ + } \ +} while (0) + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/memory/jemalloc/src/include/jemalloc/internal/extent.h b/memory/jemalloc/src/include/jemalloc/internal/extent.h new file mode 100644 index 000000000..49d76a57f --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/extent.h @@ -0,0 +1,239 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct extent_node_s extent_node_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Tree of extents. Use accessor functions for en_* fields. */ +struct extent_node_s { + /* Arena from which this extent came, if any. */ + arena_t *en_arena; + + /* Pointer to the extent that this tree node is responsible for. */ + void *en_addr; + + /* Total region size. */ + size_t en_size; + + /* + * The zeroed flag is used by chunk recycling code to track whether + * memory is zero-filled. + */ + bool en_zeroed; + + /* + * True if physical memory is committed to the extent, whether + * explicitly or implicitly as on a system that overcommits and + * satisfies physical memory needs on demand via soft page faults. + */ + bool en_committed; + + /* + * The achunk flag is used to validate that huge allocation lookups + * don't return arena chunks. + */ + bool en_achunk; + + /* Profile counters, used for huge objects. */ + prof_tctx_t *en_prof_tctx; + + /* Linkage for arena's runs_dirty and chunks_cache rings. */ + arena_runs_dirty_link_t rd; + qr(extent_node_t) cc_link; + + union { + /* Linkage for the size/address-ordered tree. */ + rb_node(extent_node_t) szad_link; + + /* Linkage for arena's achunks, huge, and node_cache lists. */ + ql_elm(extent_node_t) ql_link; + }; + + /* Linkage for the address-ordered tree. */ + rb_node(extent_node_t) ad_link; +}; +typedef rb_tree(extent_node_t) extent_tree_t; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) + +rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +arena_t *extent_node_arena_get(const extent_node_t *node); +void *extent_node_addr_get(const extent_node_t *node); +size_t extent_node_size_get(const extent_node_t *node); +bool extent_node_zeroed_get(const extent_node_t *node); +bool extent_node_committed_get(const extent_node_t *node); +bool extent_node_achunk_get(const extent_node_t *node); +prof_tctx_t *extent_node_prof_tctx_get(const extent_node_t *node); +void extent_node_arena_set(extent_node_t *node, arena_t *arena); +void extent_node_addr_set(extent_node_t *node, void *addr); +void extent_node_size_set(extent_node_t *node, size_t size); +void extent_node_zeroed_set(extent_node_t *node, bool zeroed); +void extent_node_committed_set(extent_node_t *node, bool committed); +void extent_node_achunk_set(extent_node_t *node, bool achunk); +void extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx); +void extent_node_init(extent_node_t *node, arena_t *arena, void *addr, + size_t size, bool zeroed, bool committed); +void extent_node_dirty_linkage_init(extent_node_t *node); +void extent_node_dirty_insert(extent_node_t *node, + arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty); +void extent_node_dirty_remove(extent_node_t *node); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_)) +JEMALLOC_INLINE arena_t * +extent_node_arena_get(const extent_node_t *node) +{ + + return (node->en_arena); +} + +JEMALLOC_INLINE void * +extent_node_addr_get(const extent_node_t *node) +{ + + return (node->en_addr); +} + +JEMALLOC_INLINE size_t +extent_node_size_get(const extent_node_t *node) +{ + + return (node->en_size); +} + +JEMALLOC_INLINE bool +extent_node_zeroed_get(const extent_node_t *node) +{ + + return (node->en_zeroed); +} + +JEMALLOC_INLINE bool +extent_node_committed_get(const extent_node_t *node) +{ + + assert(!node->en_achunk); + return (node->en_committed); +} + +JEMALLOC_INLINE bool +extent_node_achunk_get(const extent_node_t *node) +{ + + return (node->en_achunk); +} + +JEMALLOC_INLINE prof_tctx_t * +extent_node_prof_tctx_get(const extent_node_t *node) +{ + + return (node->en_prof_tctx); +} + +JEMALLOC_INLINE void +extent_node_arena_set(extent_node_t *node, arena_t *arena) +{ + + node->en_arena = arena; +} + +JEMALLOC_INLINE void +extent_node_addr_set(extent_node_t *node, void *addr) +{ + + node->en_addr = addr; +} + +JEMALLOC_INLINE void +extent_node_size_set(extent_node_t *node, size_t size) +{ + + node->en_size = size; +} + +JEMALLOC_INLINE void +extent_node_zeroed_set(extent_node_t *node, bool zeroed) +{ + + node->en_zeroed = zeroed; +} + +JEMALLOC_INLINE void +extent_node_committed_set(extent_node_t *node, bool committed) +{ + + node->en_committed = committed; +} + +JEMALLOC_INLINE void +extent_node_achunk_set(extent_node_t *node, bool achunk) +{ + + node->en_achunk = achunk; +} + +JEMALLOC_INLINE void +extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx) +{ + + node->en_prof_tctx = tctx; +} + +JEMALLOC_INLINE void +extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size, + bool zeroed, bool committed) +{ + + extent_node_arena_set(node, arena); + extent_node_addr_set(node, addr); + extent_node_size_set(node, size); + extent_node_zeroed_set(node, zeroed); + extent_node_committed_set(node, committed); + extent_node_achunk_set(node, false); + if (config_prof) + extent_node_prof_tctx_set(node, NULL); +} + +JEMALLOC_INLINE void +extent_node_dirty_linkage_init(extent_node_t *node) +{ + + qr_new(&node->rd, rd_link); + qr_new(node, cc_link); +} + +JEMALLOC_INLINE void +extent_node_dirty_insert(extent_node_t *node, + arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty) +{ + + qr_meld(runs_dirty, &node->rd, rd_link); + qr_meld(chunks_dirty, node, cc_link); +} + +JEMALLOC_INLINE void +extent_node_dirty_remove(extent_node_t *node) +{ + + qr_remove(&node->rd, rd_link); + qr_remove(node, cc_link); +} + +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/memory/jemalloc/src/include/jemalloc/internal/hash.h b/memory/jemalloc/src/include/jemalloc/internal/hash.h new file mode 100644 index 000000000..1ff2d9a05 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/hash.h @@ -0,0 +1,357 @@ +/* + * The following hash function is based on MurmurHash3, placed into the public + * domain by Austin Appleby. See https://github.com/aappleby/smhasher for + * details. + */ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +uint32_t hash_x86_32(const void *key, int len, uint32_t seed); +void hash_x86_128(const void *key, const int len, uint32_t seed, + uint64_t r_out[2]); +void hash_x64_128(const void *key, const int len, const uint32_t seed, + uint64_t r_out[2]); +void hash(const void *key, size_t len, const uint32_t seed, + size_t r_hash[2]); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) +/******************************************************************************/ +/* Internal implementation. */ +JEMALLOC_INLINE uint32_t +hash_rotl_32(uint32_t x, int8_t r) +{ + + return ((x << r) | (x >> (32 - r))); +} + +JEMALLOC_INLINE uint64_t +hash_rotl_64(uint64_t x, int8_t r) +{ + + return ((x << r) | (x >> (64 - r))); +} + +JEMALLOC_INLINE uint32_t +hash_get_block_32(const uint32_t *p, int i) +{ + + /* Handle unaligned read. */ + if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) { + uint32_t ret; + + memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t)); + return (ret); + } + + return (p[i]); +} + +JEMALLOC_INLINE uint64_t +hash_get_block_64(const uint64_t *p, int i) +{ + + /* Handle unaligned read. */ + if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) { + uint64_t ret; + + memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t)); + return (ret); + } + + return (p[i]); +} + +JEMALLOC_INLINE uint32_t +hash_fmix_32(uint32_t h) +{ + + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return (h); +} + +JEMALLOC_INLINE uint64_t +hash_fmix_64(uint64_t k) +{ + + k ^= k >> 33; + k *= KQU(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= KQU(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return (k); +} + +JEMALLOC_INLINE uint32_t +hash_x86_32(const void *key, int len, uint32_t seed) +{ + const uint8_t *data = (const uint8_t *) key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + /* body */ + { + const uint32_t *blocks = (const uint32_t *) (data + nblocks*4); + int i; + + for (i = -nblocks; i; i++) { + uint32_t k1 = hash_get_block_32(blocks, i); + + k1 *= c1; + k1 = hash_rotl_32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = hash_rotl_32(h1, 13); + h1 = h1*5 + 0xe6546b64; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t *) (data + nblocks*4); + + uint32_t k1 = 0; + + switch (len & 3) { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15); + k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; + + h1 = hash_fmix_32(h1); + + return (h1); +} + +UNUSED JEMALLOC_INLINE void +hash_x86_128(const void *key, const int len, uint32_t seed, + uint64_t r_out[2]) +{ + const uint8_t * data = (const uint8_t *) key; + const int nblocks = len / 16; + + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + + const uint32_t c1 = 0x239b961b; + const uint32_t c2 = 0xab0e9789; + const uint32_t c3 = 0x38b34ae5; + const uint32_t c4 = 0xa1e38b93; + + /* body */ + { + const uint32_t *blocks = (const uint32_t *) (data + nblocks*16); + int i; + + for (i = -nblocks; i; i++) { + uint32_t k1 = hash_get_block_32(blocks, i*4 + 0); + uint32_t k2 = hash_get_block_32(blocks, i*4 + 1); + uint32_t k3 = hash_get_block_32(blocks, i*4 + 2); + uint32_t k4 = hash_get_block_32(blocks, i*4 + 3); + + k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; + + h1 = hash_rotl_32(h1, 19); h1 += h2; + h1 = h1*5 + 0x561ccd1b; + + k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; + + h2 = hash_rotl_32(h2, 17); h2 += h3; + h2 = h2*5 + 0x0bcaa747; + + k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; + + h3 = hash_rotl_32(h3, 15); h3 += h4; + h3 = h3*5 + 0x96cd1c35; + + k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; + + h4 = hash_rotl_32(h4, 13); h4 += h1; + h4 = h4*5 + 0x32ac3b17; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t *) (data + nblocks*16); + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + + switch (len & 15) { + case 15: k4 ^= tail[14] << 16; + case 14: k4 ^= tail[13] << 8; + case 13: k4 ^= tail[12] << 0; + k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; + + case 12: k3 ^= tail[11] << 24; + case 11: k3 ^= tail[10] << 16; + case 10: k3 ^= tail[ 9] << 8; + case 9: k3 ^= tail[ 8] << 0; + k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; + + case 8: k2 ^= tail[ 7] << 24; + case 7: k2 ^= tail[ 6] << 16; + case 6: k2 ^= tail[ 5] << 8; + case 5: k2 ^= tail[ 4] << 0; + k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; + + case 4: k1 ^= tail[ 3] << 24; + case 3: k1 ^= tail[ 2] << 16; + case 2: k1 ^= tail[ 1] << 8; + case 1: k1 ^= tail[ 0] << 0; + k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + h1 = hash_fmix_32(h1); + h2 = hash_fmix_32(h2); + h3 = hash_fmix_32(h3); + h4 = hash_fmix_32(h4); + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + r_out[0] = (((uint64_t) h2) << 32) | h1; + r_out[1] = (((uint64_t) h4) << 32) | h3; +} + +UNUSED JEMALLOC_INLINE void +hash_x64_128(const void *key, const int len, const uint32_t seed, + uint64_t r_out[2]) +{ + const uint8_t *data = (const uint8_t *) key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + const uint64_t c1 = KQU(0x87c37b91114253d5); + const uint64_t c2 = KQU(0x4cf5ad432745937f); + + /* body */ + { + const uint64_t *blocks = (const uint64_t *) (data); + int i; + + for (i = 0; i < nblocks; i++) { + uint64_t k1 = hash_get_block_64(blocks, i*2 + 0); + uint64_t k2 = hash_get_block_64(blocks, i*2 + 1); + + k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; + + h1 = hash_rotl_64(h1, 27); h1 += h2; + h1 = h1*5 + 0x52dce729; + + k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; + + h2 = hash_rotl_64(h2, 31); h2 += h1; + h2 = h2*5 + 0x38495ab5; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t*)(data + nblocks*16); + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch (len & 15) { + case 15: k2 ^= ((uint64_t)(tail[14])) << 48; + case 14: k2 ^= ((uint64_t)(tail[13])) << 40; + case 13: k2 ^= ((uint64_t)(tail[12])) << 32; + case 12: k2 ^= ((uint64_t)(tail[11])) << 24; + case 11: k2 ^= ((uint64_t)(tail[10])) << 16; + case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; + case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0; + k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; + + case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; + case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; + case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; + case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; + case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; + case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; + case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; + case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0; + k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = hash_fmix_64(h1); + h2 = hash_fmix_64(h2); + + h1 += h2; + h2 += h1; + + r_out[0] = h1; + r_out[1] = h2; +} + +/******************************************************************************/ +/* API. */ +JEMALLOC_INLINE void +hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) +{ + + assert(len <= INT_MAX); /* Unfortunate implementation limitation. */ + +#if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN)) + hash_x64_128(key, (int)len, seed, (uint64_t *)r_hash); +#else + { + uint64_t hashes[2]; + hash_x86_128(key, (int)len, seed, hashes); + r_hash[0] = (size_t)hashes[0]; + r_hash[1] = (size_t)hashes[1]; + } +#endif +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/huge.h b/memory/jemalloc/src/include/jemalloc/internal/huge.h new file mode 100644 index 000000000..22184d9bb --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/huge.h @@ -0,0 +1,35 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero); +void *huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero); +bool huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, + size_t usize_min, size_t usize_max, bool zero); +void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, + size_t usize, size_t alignment, bool zero, tcache_t *tcache); +#ifdef JEMALLOC_JET +typedef void (huge_dalloc_junk_t)(void *, size_t); +extern huge_dalloc_junk_t *huge_dalloc_junk; +#endif +void huge_dalloc(tsdn_t *tsdn, void *ptr); +arena_t *huge_aalloc(const void *ptr); +size_t huge_salloc(tsdn_t *tsdn, const void *ptr); +prof_tctx_t *huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr); +void huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx); +void huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal.h.in b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal.h.in new file mode 100644 index 000000000..fdc8fef9d --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal.h.in @@ -0,0 +1,1288 @@ +#ifndef JEMALLOC_INTERNAL_H +#define JEMALLOC_INTERNAL_H + +#include "jemalloc_internal_defs.h" +#include "jemalloc/internal/jemalloc_internal_decls.h" + +#ifdef JEMALLOC_UTRACE +#include <sys/ktrace.h> +#endif + +#define JEMALLOC_NO_DEMANGLE +#ifdef JEMALLOC_JET +# define JEMALLOC_N(n) jet_##n +# include "jemalloc/internal/public_namespace.h" +# define JEMALLOC_NO_RENAME +# include "../jemalloc@install_suffix@.h" +# undef JEMALLOC_NO_RENAME +#else +# define JEMALLOC_N(n) @private_namespace@##n +# include "../jemalloc@install_suffix@.h" +#endif +#include "jemalloc/internal/private_namespace.h" + +static const bool config_debug = +#ifdef JEMALLOC_DEBUG + true +#else + false +#endif + ; +static const bool have_dss = +#ifdef JEMALLOC_DSS + true +#else + false +#endif + ; +static const bool config_fill = +#ifdef JEMALLOC_FILL + true +#else + false +#endif + ; +static const bool config_lazy_lock = +#ifdef JEMALLOC_LAZY_LOCK + true +#else + false +#endif + ; +static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF; +static const bool config_prof = +#ifdef JEMALLOC_PROF + true +#else + false +#endif + ; +static const bool config_prof_libgcc = +#ifdef JEMALLOC_PROF_LIBGCC + true +#else + false +#endif + ; +static const bool config_prof_libunwind = +#ifdef JEMALLOC_PROF_LIBUNWIND + true +#else + false +#endif + ; +static const bool maps_coalesce = +#ifdef JEMALLOC_MAPS_COALESCE + true +#else + false +#endif + ; +static const bool config_munmap = +#ifdef JEMALLOC_MUNMAP + true +#else + false +#endif + ; +static const bool config_stats = +#ifdef JEMALLOC_STATS + true +#else + false +#endif + ; +static const bool config_tcache = +#ifdef JEMALLOC_TCACHE + true +#else + false +#endif + ; +static const bool config_tls = +#ifdef JEMALLOC_TLS + true +#else + false +#endif + ; +static const bool config_utrace = +#ifdef JEMALLOC_UTRACE + true +#else + false +#endif + ; +static const bool config_valgrind = +#ifdef JEMALLOC_VALGRIND + true +#else + false +#endif + ; +static const bool config_xmalloc = +#ifdef JEMALLOC_XMALLOC + true +#else + false +#endif + ; +static const bool config_ivsalloc = +#ifdef JEMALLOC_IVSALLOC + true +#else + false +#endif + ; +static const bool config_cache_oblivious = +#ifdef JEMALLOC_CACHE_OBLIVIOUS + true +#else + false +#endif + ; + +#ifdef JEMALLOC_C11ATOMICS +#include <stdatomic.h> +#endif + +#ifdef JEMALLOC_ATOMIC9 +#include <machine/atomic.h> +#endif + +#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) +#include <libkern/OSAtomic.h> +#endif + +#ifdef JEMALLOC_ZONE +#include <mach/mach_error.h> +#include <mach/mach_init.h> +#include <mach/vm_map.h> +#include <malloc/malloc.h> +#endif + +#include "jemalloc/internal/ph.h" +#ifndef __PGI +#define RB_COMPACT +#endif +#include "jemalloc/internal/rb.h" +#include "jemalloc/internal/qr.h" +#include "jemalloc/internal/ql.h" + +/* + * jemalloc can conceptually be broken into components (arena, tcache, etc.), + * but there are circular dependencies that cannot be broken without + * substantial performance degradation. In order to reduce the effect on + * visual code flow, read the header files in multiple passes, with one of the + * following cpp variables defined during each pass: + * + * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data + * types. + * JEMALLOC_H_STRUCTS : Data structures. + * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. + * JEMALLOC_H_INLINES : Inline functions. + */ +/******************************************************************************/ +#define JEMALLOC_H_TYPES + +#include "jemalloc/internal/jemalloc_internal_macros.h" + +/* Page size index type. */ +typedef unsigned pszind_t; + +/* Size class index type. */ +typedef unsigned szind_t; + +/* + * Flags bits: + * + * a: arena + * t: tcache + * 0: unused + * z: zero + * n: alignment + * + * aaaaaaaa aaaatttt tttttttt 0znnnnnn + */ +#define MALLOCX_ARENA_MASK ((int)~0xfffff) +#define MALLOCX_ARENA_MAX 0xffe +#define MALLOCX_TCACHE_MASK ((int)~0xfff000ffU) +#define MALLOCX_TCACHE_MAX 0xffd +#define MALLOCX_LG_ALIGN_MASK ((int)0x3f) +/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */ +#define MALLOCX_ALIGN_GET_SPECIFIED(flags) \ + (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)) +#define MALLOCX_ALIGN_GET(flags) \ + (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1)) +#define MALLOCX_ZERO_GET(flags) \ + ((bool)(flags & MALLOCX_ZERO)) + +#define MALLOCX_TCACHE_GET(flags) \ + (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> 8)) - 2) +#define MALLOCX_ARENA_GET(flags) \ + (((unsigned)(((unsigned)flags) >> 20)) - 1) + +/* Smallest size class to support. */ +#define TINY_MIN (1U << LG_TINY_MIN) + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +#ifndef LG_QUANTUM +# if (defined(__i386__) || defined(_M_IX86)) +# define LG_QUANTUM 4 +# endif +# ifdef __ia64__ +# define LG_QUANTUM 4 +# endif +# ifdef __alpha__ +# define LG_QUANTUM 4 +# endif +# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__)) +# define LG_QUANTUM 4 +# endif +# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) +# define LG_QUANTUM 4 +# endif +# ifdef __arm__ +# define LG_QUANTUM 3 +# endif +# ifdef __aarch64__ +# define LG_QUANTUM 4 +# endif +# ifdef __hppa__ +# define LG_QUANTUM 4 +# endif +# ifdef __mips__ +# define LG_QUANTUM 3 +# endif +# ifdef __or1k__ +# define LG_QUANTUM 3 +# endif +# ifdef __powerpc__ +# define LG_QUANTUM 4 +# endif +# ifdef __riscv__ +# define LG_QUANTUM 4 +# endif +# ifdef __s390__ +# define LG_QUANTUM 4 +# endif +# ifdef __SH4__ +# define LG_QUANTUM 4 +# endif +# ifdef __tile__ +# define LG_QUANTUM 4 +# endif +# ifdef __le32__ +# define LG_QUANTUM 4 +# endif +# ifndef LG_QUANTUM +# error "Unknown minimum alignment for architecture; specify via " + "--with-lg-quantum" +# endif +#endif + +#define QUANTUM ((size_t)(1U << LG_QUANTUM)) +#define QUANTUM_MASK (QUANTUM - 1) + +/* Return the smallest quantum multiple that is >= a. */ +#define QUANTUM_CEILING(a) \ + (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) + +#define LONG ((size_t)(1U << LG_SIZEOF_LONG)) +#define LONG_MASK (LONG - 1) + +/* Return the smallest long multiple that is >= a. */ +#define LONG_CEILING(a) \ + (((a) + LONG_MASK) & ~LONG_MASK) + +#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) +#define PTR_MASK (SIZEOF_PTR - 1) + +/* Return the smallest (void *) multiple that is >= a. */ +#define PTR_CEILING(a) \ + (((a) + PTR_MASK) & ~PTR_MASK) + +/* + * Maximum size of L1 cache line. This is used to avoid cache line aliasing. + * In addition, this controls the spacing of cacheline-spaced size classes. + * + * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can + * only handle raw constants. + */ +#define LG_CACHELINE 6 +#define CACHELINE 64 +#define CACHELINE_MASK (CACHELINE - 1) + +/* Return the smallest cacheline multiple that is >= s. */ +#define CACHELINE_CEILING(s) \ + (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) + +/* Page size. LG_PAGE is determined by the configure script. */ +#ifdef PAGE_MASK +# undef PAGE_MASK +#endif +#define PAGE ((size_t)(1U << LG_PAGE)) +#define PAGE_MASK ((size_t)(PAGE - 1)) + +/* Return the page base address for the page containing address a. */ +#define PAGE_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~PAGE_MASK)) + +/* Return the smallest pagesize multiple that is >= s. */ +#define PAGE_CEILING(s) \ + (((s) + PAGE_MASK) & ~PAGE_MASK) + +/* Return the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2BASE(a, alignment) \ + ((void *)((uintptr_t)(a) & (-(alignment)))) + +/* Return the offset between a and the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2OFFSET(a, alignment) \ + ((size_t)((uintptr_t)(a) & (alignment - 1))) + +/* Return the smallest alignment multiple that is >= s. */ +#define ALIGNMENT_CEILING(s, alignment) \ + (((s) + (alignment - 1)) & (-(alignment))) + +/* Declare a variable-length array. */ +#if __STDC_VERSION__ < 199901L +# ifdef _MSC_VER +# include <malloc.h> +# define alloca _alloca +# else +# ifdef JEMALLOC_HAS_ALLOCA_H +# include <alloca.h> +# else +# include <stdlib.h> +# endif +# endif +# define VARIABLE_ARRAY(type, name, count) \ + type *name = alloca(sizeof(type) * (count)) +#else +# define VARIABLE_ARRAY(type, name, count) type name[(count)] +#endif + +#include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/valgrind.h" +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/witness.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/pages.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_TYPES +/******************************************************************************/ +#define JEMALLOC_H_STRUCTS + +#include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/valgrind.h" +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/witness.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/bitmap.h" +#define JEMALLOC_ARENA_STRUCTS_A +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_STRUCTS_A +#include "jemalloc/internal/extent.h" +#define JEMALLOC_ARENA_STRUCTS_B +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_STRUCTS_B +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/pages.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" +#include "jemalloc/internal/prof.h" + +#include "jemalloc/internal/tsd.h" + +#undef JEMALLOC_H_STRUCTS +/******************************************************************************/ +#define JEMALLOC_H_EXTERNS + +extern bool opt_abort; +extern const char *opt_junk; +extern bool opt_junk_alloc; +extern bool opt_junk_free; +extern size_t opt_quarantine; +extern bool opt_redzone; +extern bool opt_utrace; +extern bool opt_xmalloc; +extern bool opt_zero; +extern unsigned opt_narenas; + +extern bool in_valgrind; + +/* Number of CPUs. */ +extern unsigned ncpus; + +/* Number of arenas used for automatic multiplexing of threads and arenas. */ +extern unsigned narenas_auto; + +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + */ +extern arena_t **arenas; + +/* + * pind2sz_tab encodes the same information as could be computed by + * pind2sz_compute(). + */ +extern size_t const pind2sz_tab[NPSIZES]; +/* + * index2size_tab encodes the same information as could be computed (at + * unacceptable cost in some code paths) by index2size_compute(). + */ +extern size_t const index2size_tab[NSIZES]; +/* + * size2index_tab is a compact lookup table that rounds request sizes up to + * size classes. In order to reduce cache footprint, the table is compressed, + * and all accesses are via size2index(). + */ +extern uint8_t const size2index_tab[]; + +arena_t *a0get(void); +void *a0malloc(size_t size); +void a0dalloc(void *ptr); +void *bootstrap_malloc(size_t size); +void *bootstrap_calloc(size_t num, size_t size); +void bootstrap_free(void *ptr); +unsigned narenas_total_get(void); +arena_t *arena_init(tsdn_t *tsdn, unsigned ind); +arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind); +arena_t *arena_choose_hard(tsd_t *tsd, bool internal); +void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); +void thread_allocated_cleanup(tsd_t *tsd); +void thread_deallocated_cleanup(tsd_t *tsd); +void iarena_cleanup(tsd_t *tsd); +void arena_cleanup(tsd_t *tsd); +void arenas_tdata_cleanup(tsd_t *tsd); +void narenas_tdata_cleanup(tsd_t *tsd); +void arenas_tdata_bypass_cleanup(tsd_t *tsd); +void jemalloc_prefork(void); +void jemalloc_postfork_parent(void); +void jemalloc_postfork_child(void); + +#include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/valgrind.h" +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/witness.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/pages.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" +#include "jemalloc/internal/prof.h" +#include "jemalloc/internal/tsd.h" + +#undef JEMALLOC_H_EXTERNS +/******************************************************************************/ +#define JEMALLOC_H_INLINES + +#include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/valgrind.h" +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/spin.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/witness.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/pages.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" + +#ifndef JEMALLOC_ENABLE_INLINE +pszind_t psz2ind(size_t psz); +size_t pind2sz_compute(pszind_t pind); +size_t pind2sz_lookup(pszind_t pind); +size_t pind2sz(pszind_t pind); +size_t psz2u(size_t psz); +szind_t size2index_compute(size_t size); +szind_t size2index_lookup(size_t size); +szind_t size2index(size_t size); +size_t index2size_compute(szind_t index); +size_t index2size_lookup(szind_t index); +size_t index2size(szind_t index); +size_t s2u_compute(size_t size); +size_t s2u_lookup(size_t size); +size_t s2u(size_t size); +size_t sa2u(size_t size, size_t alignment); +arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal); +arena_t *arena_choose(tsd_t *tsd, arena_t *arena); +arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena); +arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind, + bool refresh_if_missing); +arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing); +ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_INLINE pszind_t +psz2ind(size_t psz) +{ + + if (unlikely(psz > HUGE_MAXCLASS)) + return (NPSIZES); + { + pszind_t x = lg_floor((psz<<1)-1); + pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x - + (LG_SIZE_CLASS_GROUP + LG_PAGE); + pszind_t grp = shift << LG_SIZE_CLASS_GROUP; + + pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? + LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZI(-1) << lg_delta; + pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + pszind_t ind = grp + mod; + return (ind); + } +} + +JEMALLOC_INLINE size_t +pind2sz_compute(pszind_t pind) +{ + + { + size_t grp = pind >> LG_SIZE_CLASS_GROUP; + size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_PAGE + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_PAGE-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t sz = grp_size + mod_size; + return (sz); + } +} + +JEMALLOC_INLINE size_t +pind2sz_lookup(pszind_t pind) +{ + size_t ret = (size_t)pind2sz_tab[pind]; + assert(ret == pind2sz_compute(pind)); + return (ret); +} + +JEMALLOC_INLINE size_t +pind2sz(pszind_t pind) +{ + + assert(pind < NPSIZES); + return (pind2sz_lookup(pind)); +} + +JEMALLOC_INLINE size_t +psz2u(size_t psz) +{ + + if (unlikely(psz > HUGE_MAXCLASS)) + return (0); + { + size_t x = lg_floor((psz<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? + LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (psz + delta_mask) & ~delta_mask; + return (usize); + } +} + +JEMALLOC_INLINE szind_t +size2index_compute(size_t size) +{ + + if (unlikely(size > HUGE_MAXCLASS)) + return (NSIZES); +#if (NTBINS != 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + szind_t lg_ceil = lg_floor(pow2_ceil_zu(size)); + return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); + } +#endif + { + szind_t x = lg_floor((size<<1)-1); + szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); + szind_t grp = shift << LG_SIZE_CLASS_GROUP; + + szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZI(-1) << lg_delta; + szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + szind_t index = NTBINS + grp + mod; + return (index); + } +} + +JEMALLOC_ALWAYS_INLINE szind_t +size2index_lookup(size_t size) +{ + + assert(size <= LOOKUP_MAXCLASS); + { + szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]); + assert(ret == size2index_compute(size)); + return (ret); + } +} + +JEMALLOC_ALWAYS_INLINE szind_t +size2index(size_t size) +{ + + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) + return (size2index_lookup(size)); + return (size2index_compute(size)); +} + +JEMALLOC_INLINE size_t +index2size_compute(szind_t index) +{ + +#if (NTBINS > 0) + if (index < NTBINS) + return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index)); +#endif + { + size_t reduced_index = index - NTBINS; + size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP; + size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) - + 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_QUANTUM + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_QUANTUM-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t usize = grp_size + mod_size; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +index2size_lookup(szind_t index) +{ + size_t ret = (size_t)index2size_tab[index]; + assert(ret == index2size_compute(index)); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +index2size(szind_t index) +{ + + assert(index < NSIZES); + return (index2size_lookup(index)); +} + +JEMALLOC_ALWAYS_INLINE size_t +s2u_compute(size_t size) +{ + + if (unlikely(size > HUGE_MAXCLASS)) + return (0); +#if (NTBINS > 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil_zu(size)); + return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : + (ZU(1) << lg_ceil)); + } +#endif + { + size_t x = lg_floor((size<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (size + delta_mask) & ~delta_mask; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +s2u_lookup(size_t size) +{ + size_t ret = index2size_lookup(size2index_lookup(size)); + + assert(ret == s2u_compute(size)); + return (ret); +} + +/* + * Compute usable size that would result from allocating an object with the + * specified size. + */ +JEMALLOC_ALWAYS_INLINE size_t +s2u(size_t size) +{ + + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) + return (s2u_lookup(size)); + return (s2u_compute(size)); +} + +/* + * Compute usable size that would result from allocating an object with the + * specified size and alignment. + */ +JEMALLOC_ALWAYS_INLINE size_t +sa2u(size_t size, size_t alignment) +{ + size_t usize; + + assert(alignment != 0 && ((alignment - 1) & alignment) == 0); + + /* Try for a small size class. */ + if (size <= SMALL_MAXCLASS && alignment < PAGE) { + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each + * small size class, every object is aligned at the smallest + * power of two that is non-zero in the base two representation + * of the size. For example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + */ + usize = s2u(ALIGNMENT_CEILING(size, alignment)); + if (usize < LARGE_MINCLASS) + return (usize); + } + + /* Try for a large size class. */ + if (likely(size <= large_maxclass) && likely(alignment < chunksize)) { + /* + * We can't achieve subpage alignment, so round up alignment + * to the minimum that can actually be supported. + */ + alignment = PAGE_CEILING(alignment); + + /* Make sure result is a large size class. */ + usize = (size <= LARGE_MINCLASS) ? LARGE_MINCLASS : s2u(size); + + /* + * Calculate the size of the over-size run that arena_palloc() + * would need to allocate in order to guarantee the alignment. + */ + if (usize + large_pad + alignment - PAGE <= arena_maxrun) + return (usize); + } + + /* Huge size class. Beware of overflow. */ + + if (unlikely(alignment > HUGE_MAXCLASS)) + return (0); + + /* + * We can't achieve subchunk alignment, so round up alignment to the + * minimum that can actually be supported. + */ + alignment = CHUNK_CEILING(alignment); + + /* Make sure result is a huge size class. */ + if (size <= chunksize) + usize = chunksize; + else { + usize = s2u(size); + if (usize < size) { + /* size_t overflow. */ + return (0); + } + } + + /* + * Calculate the multi-chunk mapping that huge_palloc() would need in + * order to guarantee the alignment. + */ + if (usize + alignment - PAGE < usize) { + /* size_t overflow. */ + return (0); + } + return (usize); +} + +/* Choose an arena based on a per-thread value. */ +JEMALLOC_INLINE arena_t * +arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) +{ + arena_t *ret; + + if (arena != NULL) + return (arena); + + ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd); + if (unlikely(ret == NULL)) + ret = arena_choose_hard(tsd, internal); + + return (ret); +} + +JEMALLOC_INLINE arena_t * +arena_choose(tsd_t *tsd, arena_t *arena) +{ + + return (arena_choose_impl(tsd, arena, false)); +} + +JEMALLOC_INLINE arena_t * +arena_ichoose(tsd_t *tsd, arena_t *arena) +{ + + return (arena_choose_impl(tsd, arena, true)); +} + +JEMALLOC_INLINE arena_tdata_t * +arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) +{ + arena_tdata_t *tdata; + arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd); + + if (unlikely(arenas_tdata == NULL)) { + /* arenas_tdata hasn't been initialized yet. */ + return (arena_tdata_get_hard(tsd, ind)); + } + if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) { + /* + * ind is invalid, cache is old (too small), or tdata to be + * initialized. + */ + return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) : + NULL); + } + + tdata = &arenas_tdata[ind]; + if (likely(tdata != NULL) || !refresh_if_missing) + return (tdata); + return (arena_tdata_get_hard(tsd, ind)); +} + +JEMALLOC_INLINE arena_t * +arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) +{ + arena_t *ret; + + assert(ind <= MALLOCX_ARENA_MAX); + + ret = arenas[ind]; + if (unlikely(ret == NULL)) { + ret = atomic_read_p((void *)&arenas[ind]); + if (init_if_missing && unlikely(ret == NULL)) + ret = arena_init(tsdn, ind); + } + return (ret); +} + +JEMALLOC_INLINE ticker_t * +decay_ticker_get(tsd_t *tsd, unsigned ind) +{ + arena_tdata_t *tdata; + + tdata = arena_tdata_get(tsd, ind, true); + if (unlikely(tdata == NULL)) + return (NULL); + return (&tdata->decay_ticker); +} +#endif + +#include "jemalloc/internal/bitmap.h" +/* + * Include portions of arena.h interleaved with tcache.h in order to resolve + * circular dependencies. + */ +#define JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/tcache.h" +#define JEMALLOC_ARENA_INLINE_B +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_B +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" + +#ifndef JEMALLOC_ENABLE_INLINE +arena_t *iaalloc(const void *ptr); +size_t isalloc(tsdn_t *tsdn, const void *ptr, bool demote); +void *iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, + tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path); +void *ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, + bool slow_path); +void *ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, bool is_metadata, arena_t *arena); +void *ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena); +void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); +size_t ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote); +size_t u2rz(size_t usize); +size_t p2rz(tsdn_t *tsdn, const void *ptr); +void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata, + bool slow_path); +void idalloc(tsd_t *tsd, void *ptr); +void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); +void isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + bool slow_path); +void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, + bool slow_path); +void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, tcache_t *tcache, + arena_t *arena); +void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); +void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t alignment, bool zero); +bool ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_ALWAYS_INLINE arena_t * +iaalloc(const void *ptr) +{ + + assert(ptr != NULL); + + return (arena_aalloc(ptr)); +} + +/* + * Typical usage: + * tsdn_t *tsdn = [...] + * void *ptr = [...] + * size_t sz = isalloc(tsdn, ptr, config_prof); + */ +JEMALLOC_ALWAYS_INLINE size_t +isalloc(tsdn_t *tsdn, const void *ptr, bool demote) +{ + + assert(ptr != NULL); + /* Demotion only makes sense if config_prof is true. */ + assert(config_prof || !demote); + + return (arena_salloc(tsdn, ptr, demote)); +} + +JEMALLOC_ALWAYS_INLINE void * +iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache, + bool is_metadata, arena_t *arena, bool slow_path) +{ + void *ret; + + assert(size != 0); + assert(!is_metadata || tcache == NULL); + assert(!is_metadata || arena == NULL || arena->ind < narenas_auto); + + ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path); + if (config_stats && is_metadata && likely(ret != NULL)) { + arena_metadata_allocated_add(iaalloc(ret), + isalloc(tsdn, ret, config_prof)); + } + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void * +ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) +{ + + return (iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true), + false, NULL, slow_path)); +} + +JEMALLOC_ALWAYS_INLINE void * +ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, bool is_metadata, arena_t *arena) +{ + void *ret; + + assert(usize != 0); + assert(usize == sa2u(usize, alignment)); + assert(!is_metadata || tcache == NULL); + assert(!is_metadata || arena == NULL || arena->ind < narenas_auto); + + ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache); + assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); + if (config_stats && is_metadata && likely(ret != NULL)) { + arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn, ret, + config_prof)); + } + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void * +ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena) +{ + + return (ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena)); +} + +JEMALLOC_ALWAYS_INLINE void * +ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) +{ + + return (ipallocztm(tsd_tsdn(tsd), usize, alignment, zero, + tcache_get(tsd, true), false, NULL)); +} + +JEMALLOC_ALWAYS_INLINE size_t +ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote) +{ + extent_node_t *node; + + /* Return 0 if ptr is not within a chunk managed by jemalloc. */ + node = chunk_lookup(ptr, false); + if (node == NULL) + return (0); + /* Only arena chunks should be looked up via interior pointers. */ + assert(extent_node_addr_get(node) == ptr || + extent_node_achunk_get(node)); + + return (isalloc(tsdn, ptr, demote)); +} + +JEMALLOC_INLINE size_t +u2rz(size_t usize) +{ + size_t ret; + + if (usize <= SMALL_MAXCLASS) { + szind_t binind = size2index(usize); + ret = arena_bin_info[binind].redzone_size; + } else + ret = 0; + + return (ret); +} + +JEMALLOC_INLINE size_t +p2rz(tsdn_t *tsdn, const void *ptr) +{ + size_t usize = isalloc(tsdn, ptr, false); + + return (u2rz(usize)); +} + +JEMALLOC_ALWAYS_INLINE void +idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata, + bool slow_path) +{ + + assert(ptr != NULL); + assert(!is_metadata || tcache == NULL); + assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto); + if (config_stats && is_metadata) { + arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsdn, ptr, + config_prof)); + } + + arena_dalloc(tsdn, ptr, tcache, slow_path); +} + +JEMALLOC_ALWAYS_INLINE void +idalloc(tsd_t *tsd, void *ptr) +{ + + idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd, false), false, true); +} + +JEMALLOC_ALWAYS_INLINE void +iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) +{ + + if (slow_path && config_fill && unlikely(opt_quarantine)) + quarantine(tsd, ptr); + else + idalloctm(tsd_tsdn(tsd), ptr, tcache, false, slow_path); +} + +JEMALLOC_ALWAYS_INLINE void +isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + bool slow_path) +{ + + arena_sdalloc(tsdn, ptr, size, tcache, slow_path); +} + +JEMALLOC_ALWAYS_INLINE void +isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path) +{ + + if (slow_path && config_fill && unlikely(opt_quarantine)) + quarantine(tsd, ptr); + else + isdalloct(tsd_tsdn(tsd), ptr, size, tcache, slow_path); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) +{ + void *p; + size_t usize, copysize; + + usize = sa2u(size + extra, alignment); + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) + return (NULL); + p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, arena); + if (p == NULL) { + if (extra == 0) + return (NULL); + /* Try again, without extra this time. */ + usize = sa2u(size, alignment); + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) + return (NULL); + p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, + arena); + if (p == NULL) + return (NULL); + } + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + memcpy(p, ptr, copysize); + isqalloc(tsd, ptr, oldsize, tcache, true); + return (p); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero, tcache_t *tcache, arena_t *arena) +{ + + assert(ptr != NULL); + assert(size != 0); + + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + /* + * Existing object alignment is inadequate; allocate new space + * and copy. + */ + return (iralloct_realign(tsd, ptr, oldsize, size, 0, alignment, + zero, tcache, arena)); + } + + return (arena_ralloc(tsd, arena, ptr, oldsize, size, alignment, zero, + tcache)); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero) +{ + + return (iralloct(tsd, ptr, oldsize, size, alignment, zero, + tcache_get(tsd, true), NULL)); +} + +JEMALLOC_ALWAYS_INLINE bool +ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero) +{ + + assert(ptr != NULL); + assert(size != 0); + + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + /* Existing object alignment is inadequate. */ + return (true); + } + + return (arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero)); +} +#endif + +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_INLINES +/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_H */ diff --git a/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_decls.h b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_decls.h new file mode 100644 index 000000000..c907d9109 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_decls.h @@ -0,0 +1,75 @@ +#ifndef JEMALLOC_INTERNAL_DECLS_H +#define JEMALLOC_INTERNAL_DECLS_H + +#include <math.h> +#ifdef _WIN32 +# include <windows.h> +# include "msvc_compat/windows_extra.h" + +#else +# include <sys/param.h> +# include <sys/mman.h> +# if !defined(__pnacl__) && !defined(__native_client__) +# include <sys/syscall.h> +# if !defined(SYS_write) && defined(__NR_write) +# define SYS_write __NR_write +# endif +# include <sys/uio.h> +# endif +# include <pthread.h> +# ifdef JEMALLOC_OS_UNFAIR_LOCK +# include <os/lock.h> +# endif +# ifdef JEMALLOC_GLIBC_MALLOC_HOOK +# include <sched.h> +# endif +# include <errno.h> +# include <sys/time.h> +# include <time.h> +# ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME +# include <mach/mach_time.h> +# endif +#endif +#include <sys/types.h> + +#include <limits.h> +#ifndef SIZE_T_MAX +# define SIZE_T_MAX SIZE_MAX +#endif +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stddef.h> +#ifndef offsetof +# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) +#endif +#include <string.h> +#include <strings.h> +#include <ctype.h> +#ifdef _MSC_VER +# include <io.h> +typedef intptr_t ssize_t; +# define PATH_MAX 1024 +# define STDERR_FILENO 2 +# define __func__ __FUNCTION__ +# ifdef JEMALLOC_HAS_RESTRICT +# define restrict __restrict +# endif +/* Disable warnings about deprecated system functions. */ +# pragma warning(disable: 4996) +#if _MSC_VER < 1800 +static int +isblank(int c) +{ + + return (c == '\t' || c == ' '); +} +#endif +#else +# include <unistd.h> +#endif +#include <fcntl.h> + +#endif /* JEMALLOC_INTERNAL_H */ diff --git a/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_defs.h.in b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 000000000..9b3dca504 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,307 @@ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +#undef JEMALLOC_PREFIX +#undef JEMALLOC_CPREFIX + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#undef JEMALLOC_PRIVATE_NAMESPACE + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#undef CPU_SPINWAIT + +/* Defined if C11 atomics are available. */ +#undef JEMALLOC_C11ATOMICS + +/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ +#undef JEMALLOC_ATOMIC9 + +/* + * Defined if OSAtomic*() functions are available, as provided by Darwin, and + * documented in the atomic(3) manual page. + */ +#undef JEMALLOC_OSATOMIC + +/* + * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and + * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the + * functions are defined in libgcc instead of being inlines). + */ +#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 + +/* + * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and + * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the + * functions are defined in libgcc instead of being inlines). + */ +#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#undef JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if madvise(2) is available. + */ +#undef JEMALLOC_HAVE_MADVISE + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +#undef JEMALLOC_OS_UNFAIR_LOCK + +/* + * Defined if OSSpin*() functions are available, as provided by Darwin, and + * documented in the spinlock(3) manual page. + */ +#undef JEMALLOC_OSSPIN + +/* Defined if syscall(2) is available. */ +#undef JEMALLOC_HAVE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +#undef JEMALLOC_HAVE_SECURE_GETENV + +/* + * Defined if issetugid(2) is available. + */ +#undef JEMALLOC_HAVE_ISSETUGID + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#undef JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +#undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +#undef JEMALLOC_MALLOC_THREAD_CLEANUP + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#undef JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +#undef JEMALLOC_MUTEX_INIT_CB + +/* Non-empty if the tls_model attribute is supported. */ +#undef JEMALLOC_TLS_MODEL + +/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ +#undef JEMALLOC_CC_SILENCE + +/* JEMALLOC_CODE_COVERAGE enables test code coverage analysis. */ +#undef JEMALLOC_CODE_COVERAGE + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +#undef JEMALLOC_DEBUG + +/* JEMALLOC_STATS enables statistics calculation. */ +#undef JEMALLOC_STATS + +/* JEMALLOC_PROF enables allocation profiling. */ +#undef JEMALLOC_PROF + +/* Use libunwind for profile backtracing if defined. */ +#undef JEMALLOC_PROF_LIBUNWIND + +/* Use libgcc for profile backtracing if defined. */ +#undef JEMALLOC_PROF_LIBGCC + +/* Use gcc intrinsics for profile backtracing if defined. */ +#undef JEMALLOC_PROF_GCC + +/* + * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. + * This makes it possible to allocate/deallocate objects without any locking + * when the cache is in the steady state. + */ +#undef JEMALLOC_TCACHE + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage + * segment (DSS). + */ +#undef JEMALLOC_DSS + +/* Support memory filling (junk/zero/quarantine/redzone). */ +#undef JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +#undef JEMALLOC_UTRACE + +/* Support Valgrind. */ +#undef JEMALLOC_VALGRIND + +/* Support optional abort() on OOM. */ +#undef JEMALLOC_XMALLOC + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +#undef JEMALLOC_LAZY_LOCK + +/* Minimum size class to support is 2^LG_TINY_MIN bytes. */ +#undef LG_TINY_MIN + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +#undef LG_QUANTUM + +/* One page is 2^LG_PAGE bytes. */ +#undef LG_PAGE + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#undef JEMALLOC_MAPS_COALESCE + +/* + * If defined, use munmap() to unmap freed chunks, rather than storing them for + * later reuse. This is disabled by default on Linux because common sequences + * of mmap()/munmap() calls will cause virtual memory map holes. + */ +#undef JEMALLOC_MUNMAP + +/* TLS is used to map arenas and magazine caches to threads. */ +#undef JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#undef JEMALLOC_INTERNAL_UNREACHABLE + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#undef JEMALLOC_INTERNAL_FFSLL +#undef JEMALLOC_INTERNAL_FFSL +#undef JEMALLOC_INTERNAL_FFS + +/* + * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside + * within jemalloc-owned chunks before dereferencing them. + */ +#undef JEMALLOC_IVSALLOC + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#undef JEMALLOC_CACHE_OBLIVIOUS + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +#undef JEMALLOC_ZONE +#undef JEMALLOC_ZONE_VERSION + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +#undef JEMALLOC_SYSCTL_VM_OVERCOMMIT +#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched. + * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being + * unused, such that they will be discarded rather + * than swapped out. + */ +#undef JEMALLOC_PURGE_MADVISE_DONTNEED +#undef JEMALLOC_PURGE_MADVISE_FREE + +/* Define if operating system has alloca.h header. */ +#undef JEMALLOC_HAS_ALLOCA_H + +/* C99 restrict keyword supported. */ +#undef JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +#undef JEMALLOC_BIG_ENDIAN + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#undef LG_SIZEOF_INT + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#undef LG_SIZEOF_LONG + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#undef LG_SIZEOF_LONG_LONG + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#undef LG_SIZEOF_INTMAX_T + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +#undef JEMALLOC_GLIBC_MALLOC_HOOK + +/* glibc memalign hook. */ +#undef JEMALLOC_GLIBC_MEMALIGN_HOOK + +/* Adaptive mutex support in pthreads. */ +#undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +#undef JEMALLOC_EXPORT + +/* config.malloc_conf options string. */ +#undef JEMALLOC_CONFIG_MALLOC_CONF + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_macros.h b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_macros.h new file mode 100644 index 000000000..a08ba772e --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/jemalloc_internal_macros.h @@ -0,0 +1,57 @@ +/* + * JEMALLOC_ALWAYS_INLINE and JEMALLOC_INLINE are used within header files for + * functions that are static inline functions if inlining is enabled, and + * single-definition library-private functions if inlining is disabled. + * + * JEMALLOC_ALWAYS_INLINE_C and JEMALLOC_INLINE_C are for use in .c files, in + * which case the denoted functions are always static, regardless of whether + * inlining is enabled. + */ +#if defined(JEMALLOC_DEBUG) || defined(JEMALLOC_CODE_COVERAGE) + /* Disable inlining to make debugging/profiling easier. */ +# define JEMALLOC_ALWAYS_INLINE +# define JEMALLOC_ALWAYS_INLINE_C static +# define JEMALLOC_INLINE +# define JEMALLOC_INLINE_C static +# define inline +#else +# define JEMALLOC_ENABLE_INLINE +# ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ALWAYS_INLINE \ + static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) +# define JEMALLOC_ALWAYS_INLINE_C \ + static inline JEMALLOC_ATTR(always_inline) +# else +# define JEMALLOC_ALWAYS_INLINE static inline +# define JEMALLOC_ALWAYS_INLINE_C static inline +# endif +# define JEMALLOC_INLINE static inline +# define JEMALLOC_INLINE_C static inline +# ifdef _MSC_VER +# define inline _inline +# endif +#endif + +#ifdef JEMALLOC_CC_SILENCE +# define UNUSED JEMALLOC_ATTR(unused) +#else +# define UNUSED +#endif + +#define ZU(z) ((size_t)z) +#define ZI(z) ((ssize_t)z) +#define QU(q) ((uint64_t)q) +#define QI(q) ((int64_t)q) + +#define KZU(z) ZU(z##ULL) +#define KZI(z) ZI(z##LL) +#define KQU(q) QU(q##ULL) +#define KQI(q) QI(q##LL) + +#ifndef __DECONST +# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) +#endif + +#ifndef JEMALLOC_HAS_RESTRICT +# define restrict +#endif diff --git a/memory/jemalloc/src/include/jemalloc/internal/mb.h b/memory/jemalloc/src/include/jemalloc/internal/mb.h new file mode 100644 index 000000000..5384728fd --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/mb.h @@ -0,0 +1,115 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void mb_write(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_)) +#ifdef __i386__ +/* + * According to the Intel Architecture Software Developer's Manual, current + * processors execute instructions in order from the perspective of other + * processors in a multiprocessor system, but 1) Intel reserves the right to + * change that, and 2) the compiler's optimizer could re-order instructions if + * there weren't some form of barrier. Therefore, even if running on an + * architecture that does not need memory barriers (everything through at least + * i686), an "optimizer barrier" is necessary. + */ +JEMALLOC_INLINE void +mb_write(void) +{ + +# if 0 + /* This is a true memory barrier. */ + asm volatile ("pusha;" + "xor %%eax,%%eax;" + "cpuid;" + "popa;" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +# else + /* + * This is hopefully enough to keep the compiler from reordering + * instructions around this one. + */ + asm volatile ("nop;" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +# endif +} +#elif (defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("sfence" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#elif defined(__powerpc__) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("eieio" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#elif defined(__sparc64__) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("membar #StoreStore" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#elif defined(__tile__) +JEMALLOC_INLINE void +mb_write(void) +{ + + __sync_synchronize(); +} +#else +/* + * This is much slower than a simple memory barrier, but the semantics of mutex + * unlock make this work. + */ +JEMALLOC_INLINE void +mb_write(void) +{ + malloc_mutex_t mtx; + + malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT); + malloc_mutex_lock(TSDN_NULL, &mtx); + malloc_mutex_unlock(TSDN_NULL, &mtx); +} +#endif +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/mutex.h b/memory/jemalloc/src/include/jemalloc/internal/mutex.h new file mode 100644 index 000000000..b442d2d4e --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/mutex.h @@ -0,0 +1,147 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct malloc_mutex_s malloc_mutex_t; + +#ifdef _WIN32 +# define MALLOC_MUTEX_INITIALIZER +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) +# define MALLOC_MUTEX_INITIALIZER \ + {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} +#elif (defined(JEMALLOC_OSSPIN)) +# define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) +# define MALLOC_MUTEX_INITIALIZER \ + {PTHREAD_MUTEX_INITIALIZER, NULL, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} +#else +# if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \ + defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP +# define MALLOC_MUTEX_INITIALIZER \ + {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP, \ + WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} +# else +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT +# define MALLOC_MUTEX_INITIALIZER \ + {PTHREAD_MUTEX_INITIALIZER, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)} +# endif +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct malloc_mutex_s { +#ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 + SRWLOCK lock; +# else + CRITICAL_SECTION lock; +# endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock lock; +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLock lock; +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) + pthread_mutex_t lock; + malloc_mutex_t *postponed_next; +#else + pthread_mutex_t lock; +#endif + witness_t witness; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#ifdef JEMALLOC_LAZY_LOCK +extern bool isthreaded; +#else +# undef isthreaded /* Undo private_namespace.h definition. */ +# define isthreaded true +#endif + +bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name, + witness_rank_t rank); +void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex); +bool malloc_mutex_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) +JEMALLOC_INLINE void +malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) +{ + + if (isthreaded) { + witness_assert_not_owner(tsdn, &mutex->witness); +#ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 + AcquireSRWLockExclusive(&mutex->lock); +# else + EnterCriticalSection(&mutex->lock); +# endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_lock(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLockLock(&mutex->lock); +#else + pthread_mutex_lock(&mutex->lock); +#endif + witness_lock(tsdn, &mutex->witness); + } +} + +JEMALLOC_INLINE void +malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) +{ + + if (isthreaded) { + witness_unlock(tsdn, &mutex->witness); +#ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 + ReleaseSRWLockExclusive(&mutex->lock); +# else + LeaveCriticalSection(&mutex->lock); +# endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock_unlock(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLockUnlock(&mutex->lock); +#else + pthread_mutex_unlock(&mutex->lock); +#endif + } +} + +JEMALLOC_INLINE void +malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) +{ + + if (isthreaded) + witness_assert_owner(tsdn, &mutex->witness); +} + +JEMALLOC_INLINE void +malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) +{ + + if (isthreaded) + witness_assert_not_owner(tsdn, &mutex->witness); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/nstime.h b/memory/jemalloc/src/include/jemalloc/internal/nstime.h new file mode 100644 index 000000000..93b27dc80 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/nstime.h @@ -0,0 +1,48 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct nstime_s nstime_t; + +/* Maximum supported number of seconds (~584 years). */ +#define NSTIME_SEC_MAX KQU(18446744072) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct nstime_s { + uint64_t ns; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void nstime_init(nstime_t *time, uint64_t ns); +void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec); +uint64_t nstime_ns(const nstime_t *time); +uint64_t nstime_sec(const nstime_t *time); +uint64_t nstime_nsec(const nstime_t *time); +void nstime_copy(nstime_t *time, const nstime_t *source); +int nstime_compare(const nstime_t *a, const nstime_t *b); +void nstime_add(nstime_t *time, const nstime_t *addend); +void nstime_subtract(nstime_t *time, const nstime_t *subtrahend); +void nstime_imultiply(nstime_t *time, uint64_t multiplier); +void nstime_idivide(nstime_t *time, uint64_t divisor); +uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor); +#ifdef JEMALLOC_JET +typedef bool (nstime_monotonic_t)(void); +extern nstime_monotonic_t *nstime_monotonic; +typedef bool (nstime_update_t)(nstime_t *); +extern nstime_update_t *nstime_update; +#else +bool nstime_monotonic(void); +bool nstime_update(nstime_t *time); +#endif + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/pages.h b/memory/jemalloc/src/include/jemalloc/internal/pages.h new file mode 100644 index 000000000..e21effd14 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/pages.h @@ -0,0 +1,27 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *pages_map(void *addr, size_t size, bool *commit); +void pages_unmap(void *addr, size_t size); +void *pages_trim(void *addr, size_t alloc_size, size_t leadsize, + size_t size, bool *commit); +bool pages_commit(void *addr, size_t size); +bool pages_decommit(void *addr, size_t size); +bool pages_purge(void *addr, size_t size); +void pages_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/memory/jemalloc/src/include/jemalloc/internal/ph.h b/memory/jemalloc/src/include/jemalloc/internal/ph.h new file mode 100644 index 000000000..4f91c333f --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/ph.h @@ -0,0 +1,345 @@ +/* + * A Pairing Heap implementation. + * + * "The Pairing Heap: A New Form of Self-Adjusting Heap" + * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf + * + * With auxiliary twopass list, described in a follow on paper. + * + * "Pairing Heaps: Experiments and Analysis" + * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf + * + ******************************************************************************* + */ + +#ifndef PH_H_ +#define PH_H_ + +/* Node structure. */ +#define phn(a_type) \ +struct { \ + a_type *phn_prev; \ + a_type *phn_next; \ + a_type *phn_lchild; \ +} + +/* Root structure. */ +#define ph(a_type) \ +struct { \ + a_type *ph_root; \ +} + +/* Internal utility macros. */ +#define phn_lchild_get(a_type, a_field, a_phn) \ + (a_phn->a_field.phn_lchild) +#define phn_lchild_set(a_type, a_field, a_phn, a_lchild) do { \ + a_phn->a_field.phn_lchild = a_lchild; \ +} while (0) + +#define phn_next_get(a_type, a_field, a_phn) \ + (a_phn->a_field.phn_next) +#define phn_prev_set(a_type, a_field, a_phn, a_prev) do { \ + a_phn->a_field.phn_prev = a_prev; \ +} while (0) + +#define phn_prev_get(a_type, a_field, a_phn) \ + (a_phn->a_field.phn_prev) +#define phn_next_set(a_type, a_field, a_phn, a_next) do { \ + a_phn->a_field.phn_next = a_next; \ +} while (0) + +#define phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do { \ + a_type *phn0child; \ + \ + assert(a_phn0 != NULL); \ + assert(a_phn1 != NULL); \ + assert(a_cmp(a_phn0, a_phn1) <= 0); \ + \ + phn_prev_set(a_type, a_field, a_phn1, a_phn0); \ + phn0child = phn_lchild_get(a_type, a_field, a_phn0); \ + phn_next_set(a_type, a_field, a_phn1, phn0child); \ + if (phn0child != NULL) \ + phn_prev_set(a_type, a_field, phn0child, a_phn1); \ + phn_lchild_set(a_type, a_field, a_phn0, a_phn1); \ +} while (0) + +#define phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do { \ + if (a_phn0 == NULL) \ + r_phn = a_phn1; \ + else if (a_phn1 == NULL) \ + r_phn = a_phn0; \ + else if (a_cmp(a_phn0, a_phn1) < 0) { \ + phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, \ + a_cmp); \ + r_phn = a_phn0; \ + } else { \ + phn_merge_ordered(a_type, a_field, a_phn1, a_phn0, \ + a_cmp); \ + r_phn = a_phn1; \ + } \ +} while (0) + +#define ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do { \ + a_type *head = NULL; \ + a_type *tail = NULL; \ + a_type *phn0 = a_phn; \ + a_type *phn1 = phn_next_get(a_type, a_field, phn0); \ + \ + /* \ + * Multipass merge, wherein the first two elements of a FIFO \ + * are repeatedly merged, and each result is appended to the \ + * singly linked FIFO, until the FIFO contains only a single \ + * element. We start with a sibling list but no reference to \ + * its tail, so we do a single pass over the sibling list to \ + * populate the FIFO. \ + */ \ + if (phn1 != NULL) { \ + a_type *phnrest = phn_next_get(a_type, a_field, phn1); \ + if (phnrest != NULL) \ + phn_prev_set(a_type, a_field, phnrest, NULL); \ + phn_prev_set(a_type, a_field, phn0, NULL); \ + phn_next_set(a_type, a_field, phn0, NULL); \ + phn_prev_set(a_type, a_field, phn1, NULL); \ + phn_next_set(a_type, a_field, phn1, NULL); \ + phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0); \ + head = tail = phn0; \ + phn0 = phnrest; \ + while (phn0 != NULL) { \ + phn1 = phn_next_get(a_type, a_field, phn0); \ + if (phn1 != NULL) { \ + phnrest = phn_next_get(a_type, a_field, \ + phn1); \ + if (phnrest != NULL) { \ + phn_prev_set(a_type, a_field, \ + phnrest, NULL); \ + } \ + phn_prev_set(a_type, a_field, phn0, \ + NULL); \ + phn_next_set(a_type, a_field, phn0, \ + NULL); \ + phn_prev_set(a_type, a_field, phn1, \ + NULL); \ + phn_next_set(a_type, a_field, phn1, \ + NULL); \ + phn_merge(a_type, a_field, phn0, phn1, \ + a_cmp, phn0); \ + phn_next_set(a_type, a_field, tail, \ + phn0); \ + tail = phn0; \ + phn0 = phnrest; \ + } else { \ + phn_next_set(a_type, a_field, tail, \ + phn0); \ + tail = phn0; \ + phn0 = NULL; \ + } \ + } \ + phn0 = head; \ + phn1 = phn_next_get(a_type, a_field, phn0); \ + if (phn1 != NULL) { \ + while (true) { \ + head = phn_next_get(a_type, a_field, \ + phn1); \ + assert(phn_prev_get(a_type, a_field, \ + phn0) == NULL); \ + phn_next_set(a_type, a_field, phn0, \ + NULL); \ + assert(phn_prev_get(a_type, a_field, \ + phn1) == NULL); \ + phn_next_set(a_type, a_field, phn1, \ + NULL); \ + phn_merge(a_type, a_field, phn0, phn1, \ + a_cmp, phn0); \ + if (head == NULL) \ + break; \ + phn_next_set(a_type, a_field, tail, \ + phn0); \ + tail = phn0; \ + phn0 = head; \ + phn1 = phn_next_get(a_type, a_field, \ + phn0); \ + } \ + } \ + } \ + r_phn = phn0; \ +} while (0) + +#define ph_merge_aux(a_type, a_field, a_ph, a_cmp) do { \ + a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root); \ + if (phn != NULL) { \ + phn_prev_set(a_type, a_field, a_ph->ph_root, NULL); \ + phn_next_set(a_type, a_field, a_ph->ph_root, NULL); \ + phn_prev_set(a_type, a_field, phn, NULL); \ + ph_merge_siblings(a_type, a_field, phn, a_cmp, phn); \ + assert(phn_next_get(a_type, a_field, phn) == NULL); \ + phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp, \ + a_ph->ph_root); \ + } \ +} while (0) + +#define ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do { \ + a_type *lchild = phn_lchild_get(a_type, a_field, a_phn); \ + if (lchild == NULL) \ + r_phn = NULL; \ + else { \ + ph_merge_siblings(a_type, a_field, lchild, a_cmp, \ + r_phn); \ + } \ +} while (0) + +/* + * The ph_proto() macro generates function prototypes that correspond to the + * functions generated by an equivalently parameterized call to ph_gen(). + */ +#define ph_proto(a_attr, a_prefix, a_ph_type, a_type) \ +a_attr void a_prefix##new(a_ph_type *ph); \ +a_attr bool a_prefix##empty(a_ph_type *ph); \ +a_attr a_type *a_prefix##first(a_ph_type *ph); \ +a_attr void a_prefix##insert(a_ph_type *ph, a_type *phn); \ +a_attr a_type *a_prefix##remove_first(a_ph_type *ph); \ +a_attr void a_prefix##remove(a_ph_type *ph, a_type *phn); + +/* + * The ph_gen() macro generates a type-specific pairing heap implementation, + * based on the above cpp macros. + */ +#define ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp) \ +a_attr void \ +a_prefix##new(a_ph_type *ph) \ +{ \ + \ + memset(ph, 0, sizeof(ph(a_type))); \ +} \ +a_attr bool \ +a_prefix##empty(a_ph_type *ph) \ +{ \ + \ + return (ph->ph_root == NULL); \ +} \ +a_attr a_type * \ +a_prefix##first(a_ph_type *ph) \ +{ \ + \ + if (ph->ph_root == NULL) \ + return (NULL); \ + ph_merge_aux(a_type, a_field, ph, a_cmp); \ + return (ph->ph_root); \ +} \ +a_attr void \ +a_prefix##insert(a_ph_type *ph, a_type *phn) \ +{ \ + \ + memset(&phn->a_field, 0, sizeof(phn(a_type))); \ + \ + /* \ + * Treat the root as an aux list during insertion, and lazily \ + * merge during a_prefix##remove_first(). For elements that \ + * are inserted, then removed via a_prefix##remove() before the \ + * aux list is ever processed, this makes insert/remove \ + * constant-time, whereas eager merging would make insert \ + * O(log n). \ + */ \ + if (ph->ph_root == NULL) \ + ph->ph_root = phn; \ + else { \ + phn_next_set(a_type, a_field, phn, phn_next_get(a_type, \ + a_field, ph->ph_root)); \ + if (phn_next_get(a_type, a_field, ph->ph_root) != \ + NULL) { \ + phn_prev_set(a_type, a_field, \ + phn_next_get(a_type, a_field, ph->ph_root), \ + phn); \ + } \ + phn_prev_set(a_type, a_field, phn, ph->ph_root); \ + phn_next_set(a_type, a_field, ph->ph_root, phn); \ + } \ +} \ +a_attr a_type * \ +a_prefix##remove_first(a_ph_type *ph) \ +{ \ + a_type *ret; \ + \ + if (ph->ph_root == NULL) \ + return (NULL); \ + ph_merge_aux(a_type, a_field, ph, a_cmp); \ + \ + ret = ph->ph_root; \ + \ + ph_merge_children(a_type, a_field, ph->ph_root, a_cmp, \ + ph->ph_root); \ + \ + return (ret); \ +} \ +a_attr void \ +a_prefix##remove(a_ph_type *ph, a_type *phn) \ +{ \ + a_type *replace, *parent; \ + \ + /* \ + * We can delete from aux list without merging it, but we need \ + * to merge if we are dealing with the root node. \ + */ \ + if (ph->ph_root == phn) { \ + ph_merge_aux(a_type, a_field, ph, a_cmp); \ + if (ph->ph_root == phn) { \ + ph_merge_children(a_type, a_field, ph->ph_root, \ + a_cmp, ph->ph_root); \ + return; \ + } \ + } \ + \ + /* Get parent (if phn is leftmost child) before mutating. */ \ + if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) { \ + if (phn_lchild_get(a_type, a_field, parent) != phn) \ + parent = NULL; \ + } \ + /* Find a possible replacement node, and link to parent. */ \ + ph_merge_children(a_type, a_field, phn, a_cmp, replace); \ + /* Set next/prev for sibling linked list. */ \ + if (replace != NULL) { \ + if (parent != NULL) { \ + phn_prev_set(a_type, a_field, replace, parent); \ + phn_lchild_set(a_type, a_field, parent, \ + replace); \ + } else { \ + phn_prev_set(a_type, a_field, replace, \ + phn_prev_get(a_type, a_field, phn)); \ + if (phn_prev_get(a_type, a_field, phn) != \ + NULL) { \ + phn_next_set(a_type, a_field, \ + phn_prev_get(a_type, a_field, phn), \ + replace); \ + } \ + } \ + phn_next_set(a_type, a_field, replace, \ + phn_next_get(a_type, a_field, phn)); \ + if (phn_next_get(a_type, a_field, phn) != NULL) { \ + phn_prev_set(a_type, a_field, \ + phn_next_get(a_type, a_field, phn), \ + replace); \ + } \ + } else { \ + if (parent != NULL) { \ + a_type *next = phn_next_get(a_type, a_field, \ + phn); \ + phn_lchild_set(a_type, a_field, parent, next); \ + if (next != NULL) { \ + phn_prev_set(a_type, a_field, next, \ + parent); \ + } \ + } else { \ + assert(phn_prev_get(a_type, a_field, phn) != \ + NULL); \ + phn_next_set(a_type, a_field, \ + phn_prev_get(a_type, a_field, phn), \ + phn_next_get(a_type, a_field, phn)); \ + } \ + if (phn_next_get(a_type, a_field, phn) != NULL) { \ + phn_prev_set(a_type, a_field, \ + phn_next_get(a_type, a_field, phn), \ + phn_prev_get(a_type, a_field, phn)); \ + } \ + } \ +} + +#endif /* PH_H_ */ diff --git a/memory/jemalloc/src/include/jemalloc/internal/private_namespace.sh b/memory/jemalloc/src/include/jemalloc/internal/private_namespace.sh new file mode 100755 index 000000000..cd25eb306 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/private_namespace.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +for symbol in `cat $1` ; do + echo "#define ${symbol} JEMALLOC_N(${symbol})" +done diff --git a/memory/jemalloc/src/include/jemalloc/internal/private_symbols.txt b/memory/jemalloc/src/include/jemalloc/internal/private_symbols.txt new file mode 100644 index 000000000..87c8c9b71 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/private_symbols.txt @@ -0,0 +1,626 @@ +a0dalloc +a0get +a0malloc +arena_aalloc +arena_alloc_junk_small +arena_basic_stats_merge +arena_bin_index +arena_bin_info +arena_bitselm_get_const +arena_bitselm_get_mutable +arena_boot +arena_choose +arena_choose_hard +arena_choose_impl +arena_chunk_alloc_huge +arena_chunk_cache_maybe_insert +arena_chunk_cache_maybe_remove +arena_chunk_dalloc_huge +arena_chunk_ralloc_huge_expand +arena_chunk_ralloc_huge_shrink +arena_chunk_ralloc_huge_similar +arena_cleanup +arena_dalloc +arena_dalloc_bin +arena_dalloc_bin_junked_locked +arena_dalloc_junk_large +arena_dalloc_junk_small +arena_dalloc_large +arena_dalloc_large_junked_locked +arena_dalloc_small +arena_decay_tick +arena_decay_ticks +arena_decay_time_default_get +arena_decay_time_default_set +arena_decay_time_get +arena_decay_time_set +arena_dss_prec_get +arena_dss_prec_set +arena_get +arena_ichoose +arena_init +arena_lg_dirty_mult_default_get +arena_lg_dirty_mult_default_set +arena_lg_dirty_mult_get +arena_lg_dirty_mult_set +arena_malloc +arena_malloc_hard +arena_malloc_large +arena_mapbits_allocated_get +arena_mapbits_binind_get +arena_mapbits_decommitted_get +arena_mapbits_dirty_get +arena_mapbits_get +arena_mapbits_internal_set +arena_mapbits_large_binind_set +arena_mapbits_large_get +arena_mapbits_large_set +arena_mapbits_large_size_get +arena_mapbits_size_decode +arena_mapbits_size_encode +arena_mapbits_small_runind_get +arena_mapbits_small_set +arena_mapbits_unallocated_set +arena_mapbits_unallocated_size_get +arena_mapbits_unallocated_size_set +arena_mapbits_unzeroed_get +arena_mapbitsp_get_const +arena_mapbitsp_get_mutable +arena_mapbitsp_read +arena_mapbitsp_write +arena_maxrun +arena_maybe_purge +arena_metadata_allocated_add +arena_metadata_allocated_get +arena_metadata_allocated_sub +arena_migrate +arena_miscelm_get_const +arena_miscelm_get_mutable +arena_miscelm_to_pageind +arena_miscelm_to_rpages +arena_new +arena_node_alloc +arena_node_dalloc +arena_nthreads_dec +arena_nthreads_get +arena_nthreads_inc +arena_palloc +arena_postfork_child +arena_postfork_parent +arena_prefork0 +arena_prefork1 +arena_prefork2 +arena_prefork3 +arena_prof_accum +arena_prof_accum_impl +arena_prof_accum_locked +arena_prof_promoted +arena_prof_tctx_get +arena_prof_tctx_reset +arena_prof_tctx_set +arena_ptr_small_binind_get +arena_purge +arena_quarantine_junk_small +arena_ralloc +arena_ralloc_junk_large +arena_ralloc_no_move +arena_rd_to_miscelm +arena_redzone_corruption +arena_reset +arena_run_regind +arena_run_to_miscelm +arena_salloc +arena_sdalloc +arena_stats_merge +arena_tcache_fill_small +arena_tdata_get +arena_tdata_get_hard +arenas +arenas_tdata_bypass_cleanup +arenas_tdata_cleanup +atomic_add_p +atomic_add_u +atomic_add_uint32 +atomic_add_uint64 +atomic_add_z +atomic_cas_p +atomic_cas_u +atomic_cas_uint32 +atomic_cas_uint64 +atomic_cas_z +atomic_sub_p +atomic_sub_u +atomic_sub_uint32 +atomic_sub_uint64 +atomic_sub_z +atomic_write_p +atomic_write_u +atomic_write_uint32 +atomic_write_uint64 +atomic_write_z +base_alloc +base_boot +base_postfork_child +base_postfork_parent +base_prefork +base_stats_get +bitmap_full +bitmap_get +bitmap_info_init +bitmap_init +bitmap_set +bitmap_sfu +bitmap_size +bitmap_unset +bootstrap_calloc +bootstrap_free +bootstrap_malloc +bt_init +buferror +chunk_alloc_base +chunk_alloc_cache +chunk_alloc_dss +chunk_alloc_mmap +chunk_alloc_wrapper +chunk_boot +chunk_dalloc_cache +chunk_dalloc_mmap +chunk_dalloc_wrapper +chunk_deregister +chunk_dss_boot +chunk_dss_mergeable +chunk_dss_prec_get +chunk_dss_prec_set +chunk_hooks_default +chunk_hooks_get +chunk_hooks_set +chunk_in_dss +chunk_lookup +chunk_npages +chunk_purge_wrapper +chunk_register +chunks_rtree +chunksize +chunksize_mask +ckh_count +ckh_delete +ckh_insert +ckh_iter +ckh_new +ckh_pointer_hash +ckh_pointer_keycomp +ckh_remove +ckh_search +ckh_string_hash +ckh_string_keycomp +ctl_boot +ctl_bymib +ctl_byname +ctl_nametomib +ctl_postfork_child +ctl_postfork_parent +ctl_prefork +decay_ticker_get +dss_prec_names +extent_node_achunk_get +extent_node_achunk_set +extent_node_addr_get +extent_node_addr_set +extent_node_arena_get +extent_node_arena_set +extent_node_committed_get +extent_node_committed_set +extent_node_dirty_insert +extent_node_dirty_linkage_init +extent_node_dirty_remove +extent_node_init +extent_node_prof_tctx_get +extent_node_prof_tctx_set +extent_node_size_get +extent_node_size_set +extent_node_zeroed_get +extent_node_zeroed_set +extent_tree_ad_destroy +extent_tree_ad_destroy_recurse +extent_tree_ad_empty +extent_tree_ad_first +extent_tree_ad_insert +extent_tree_ad_iter +extent_tree_ad_iter_recurse +extent_tree_ad_iter_start +extent_tree_ad_last +extent_tree_ad_new +extent_tree_ad_next +extent_tree_ad_nsearch +extent_tree_ad_prev +extent_tree_ad_psearch +extent_tree_ad_remove +extent_tree_ad_reverse_iter +extent_tree_ad_reverse_iter_recurse +extent_tree_ad_reverse_iter_start +extent_tree_ad_search +extent_tree_szad_destroy +extent_tree_szad_destroy_recurse +extent_tree_szad_empty +extent_tree_szad_first +extent_tree_szad_insert +extent_tree_szad_iter +extent_tree_szad_iter_recurse +extent_tree_szad_iter_start +extent_tree_szad_last +extent_tree_szad_new +extent_tree_szad_next +extent_tree_szad_nsearch +extent_tree_szad_prev +extent_tree_szad_psearch +extent_tree_szad_remove +extent_tree_szad_reverse_iter +extent_tree_szad_reverse_iter_recurse +extent_tree_szad_reverse_iter_start +extent_tree_szad_search +ffs_llu +ffs_lu +ffs_u +ffs_u32 +ffs_u64 +ffs_zu +get_errno +hash +hash_fmix_32 +hash_fmix_64 +hash_get_block_32 +hash_get_block_64 +hash_rotl_32 +hash_rotl_64 +hash_x64_128 +hash_x86_128 +hash_x86_32 +huge_aalloc +huge_dalloc +huge_dalloc_junk +huge_malloc +huge_palloc +huge_prof_tctx_get +huge_prof_tctx_reset +huge_prof_tctx_set +huge_ralloc +huge_ralloc_no_move +huge_salloc +iaalloc +ialloc +iallocztm +iarena_cleanup +idalloc +idalloctm +in_valgrind +index2size +index2size_compute +index2size_lookup +index2size_tab +ipalloc +ipalloct +ipallocztm +iqalloc +iralloc +iralloct +iralloct_realign +isalloc +isdalloct +isqalloc +isthreaded +ivsalloc +ixalloc +jemalloc_postfork_child +jemalloc_postfork_parent +jemalloc_prefork +large_maxclass +lg_floor +lg_prof_sample +malloc_cprintf +malloc_mutex_assert_not_owner +malloc_mutex_assert_owner +malloc_mutex_boot +malloc_mutex_init +malloc_mutex_lock +malloc_mutex_postfork_child +malloc_mutex_postfork_parent +malloc_mutex_prefork +malloc_mutex_unlock +malloc_printf +malloc_snprintf +malloc_strtoumax +malloc_tsd_boot0 +malloc_tsd_boot1 +malloc_tsd_cleanup_register +malloc_tsd_dalloc +malloc_tsd_malloc +malloc_tsd_no_cleanup +malloc_vcprintf +malloc_vsnprintf +malloc_write +map_bias +map_misc_offset +mb_write +narenas_auto +narenas_tdata_cleanup +narenas_total_get +ncpus +nhbins +nhclasses +nlclasses +nstime_add +nstime_compare +nstime_copy +nstime_divide +nstime_idivide +nstime_imultiply +nstime_init +nstime_init2 +nstime_monotonic +nstime_ns +nstime_nsec +nstime_sec +nstime_subtract +nstime_update +opt_abort +opt_decay_time +opt_dss +opt_junk +opt_junk_alloc +opt_junk_free +opt_lg_chunk +opt_lg_dirty_mult +opt_lg_prof_interval +opt_lg_prof_sample +opt_lg_tcache_max +opt_narenas +opt_prof +opt_prof_accum +opt_prof_active +opt_prof_final +opt_prof_gdump +opt_prof_leak +opt_prof_prefix +opt_prof_thread_active_init +opt_purge +opt_quarantine +opt_redzone +opt_stats_print +opt_tcache +opt_utrace +opt_xmalloc +opt_zero +p2rz +pages_boot +pages_commit +pages_decommit +pages_map +pages_purge +pages_trim +pages_unmap +pind2sz +pind2sz_compute +pind2sz_lookup +pind2sz_tab +pow2_ceil_u32 +pow2_ceil_u64 +pow2_ceil_zu +prng_lg_range_u32 +prng_lg_range_u64 +prng_lg_range_zu +prng_range_u32 +prng_range_u64 +prng_range_zu +prng_state_next_u32 +prng_state_next_u64 +prng_state_next_zu +prof_active +prof_active_get +prof_active_get_unlocked +prof_active_set +prof_alloc_prep +prof_alloc_rollback +prof_backtrace +prof_boot0 +prof_boot1 +prof_boot2 +prof_bt_count +prof_dump_header +prof_dump_open +prof_free +prof_free_sampled_object +prof_gdump +prof_gdump_get +prof_gdump_get_unlocked +prof_gdump_set +prof_gdump_val +prof_idump +prof_interval +prof_lookup +prof_malloc +prof_malloc_sample_object +prof_mdump +prof_postfork_child +prof_postfork_parent +prof_prefork0 +prof_prefork1 +prof_realloc +prof_reset +prof_sample_accum_update +prof_sample_threshold_update +prof_tctx_get +prof_tctx_reset +prof_tctx_set +prof_tdata_cleanup +prof_tdata_count +prof_tdata_get +prof_tdata_init +prof_tdata_reinit +prof_thread_active_get +prof_thread_active_init_get +prof_thread_active_init_set +prof_thread_active_set +prof_thread_name_get +prof_thread_name_set +psz2ind +psz2u +purge_mode_names +quarantine +quarantine_alloc_hook +quarantine_alloc_hook_work +quarantine_cleanup +rtree_child_read +rtree_child_read_hard +rtree_child_tryread +rtree_delete +rtree_get +rtree_new +rtree_node_valid +rtree_set +rtree_start_level +rtree_subkey +rtree_subtree_read +rtree_subtree_read_hard +rtree_subtree_tryread +rtree_val_read +rtree_val_write +run_quantize_ceil +run_quantize_floor +s2u +s2u_compute +s2u_lookup +sa2u +set_errno +size2index +size2index_compute +size2index_lookup +size2index_tab +spin_adaptive +spin_init +stats_cactive +stats_cactive_add +stats_cactive_get +stats_cactive_sub +stats_print +tcache_alloc_easy +tcache_alloc_large +tcache_alloc_small +tcache_alloc_small_hard +tcache_arena_reassociate +tcache_bin_flush_large +tcache_bin_flush_small +tcache_bin_info +tcache_boot +tcache_cleanup +tcache_create +tcache_dalloc_large +tcache_dalloc_small +tcache_enabled_cleanup +tcache_enabled_get +tcache_enabled_set +tcache_event +tcache_event_hard +tcache_flush +tcache_get +tcache_get_hard +tcache_maxclass +tcache_salloc +tcache_stats_merge +tcaches +tcaches_create +tcaches_destroy +tcaches_flush +tcaches_get +thread_allocated_cleanup +thread_deallocated_cleanup +ticker_copy +ticker_init +ticker_read +ticker_tick +ticker_ticks +tsd_arena_get +tsd_arena_set +tsd_arenap_get +tsd_arenas_tdata_bypass_get +tsd_arenas_tdata_bypass_set +tsd_arenas_tdata_bypassp_get +tsd_arenas_tdata_get +tsd_arenas_tdata_set +tsd_arenas_tdatap_get +tsd_boot +tsd_boot0 +tsd_boot1 +tsd_booted +tsd_booted_get +tsd_cleanup +tsd_cleanup_wrapper +tsd_fetch +tsd_fetch_impl +tsd_get +tsd_get_allocates +tsd_iarena_get +tsd_iarena_set +tsd_iarenap_get +tsd_initialized +tsd_init_check_recursion +tsd_init_finish +tsd_init_head +tsd_narenas_tdata_get +tsd_narenas_tdata_set +tsd_narenas_tdatap_get +tsd_wrapper_get +tsd_wrapper_set +tsd_nominal +tsd_prof_tdata_get +tsd_prof_tdata_set +tsd_prof_tdatap_get +tsd_quarantine_get +tsd_quarantine_set +tsd_quarantinep_get +tsd_set +tsd_tcache_enabled_get +tsd_tcache_enabled_set +tsd_tcache_enabledp_get +tsd_tcache_get +tsd_tcache_set +tsd_tcachep_get +tsd_thread_allocated_get +tsd_thread_allocated_set +tsd_thread_allocatedp_get +tsd_thread_deallocated_get +tsd_thread_deallocated_set +tsd_thread_deallocatedp_get +tsd_tls +tsd_tsd +tsd_tsdn +tsd_witness_fork_get +tsd_witness_fork_set +tsd_witness_forkp_get +tsd_witnesses_get +tsd_witnesses_set +tsd_witnessesp_get +tsdn_fetch +tsdn_null +tsdn_tsd +u2rz +valgrind_freelike_block +valgrind_make_mem_defined +valgrind_make_mem_noaccess +valgrind_make_mem_undefined +witness_assert_lockless +witness_assert_not_owner +witness_assert_owner +witness_fork_cleanup +witness_init +witness_lock +witness_lock_error +witness_lockless_error +witness_not_owner_error +witness_owner +witness_owner_error +witness_postfork_child +witness_postfork_parent +witness_prefork +witness_unlock +witnesses_cleanup +zone_register diff --git a/memory/jemalloc/src/include/jemalloc/internal/private_unnamespace.sh b/memory/jemalloc/src/include/jemalloc/internal/private_unnamespace.sh new file mode 100755 index 000000000..23fed8e80 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/private_unnamespace.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +for symbol in `cat $1` ; do + echo "#undef ${symbol}" +done diff --git a/memory/jemalloc/src/include/jemalloc/internal/prng.h b/memory/jemalloc/src/include/jemalloc/internal/prng.h new file mode 100644 index 000000000..c2bda19c6 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/prng.h @@ -0,0 +1,207 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * Simple linear congruential pseudo-random number generator: + * + * prng(y) = (a*x + c) % m + * + * where the following constants ensure maximal period: + * + * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4. + * c == Odd number (relatively prime to 2^n). + * m == 2^32 + * + * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. + * + * This choice of m has the disadvantage that the quality of the bits is + * proportional to bit position. For example, the lowest bit has a cycle of 2, + * the next has a cycle of 4, etc. For this reason, we prefer to use the upper + * bits. + */ + +#define PRNG_A_32 UINT32_C(1103515241) +#define PRNG_C_32 UINT32_C(12347) + +#define PRNG_A_64 UINT64_C(6364136223846793005) +#define PRNG_C_64 UINT64_C(1442695040888963407) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +uint32_t prng_state_next_u32(uint32_t state); +uint64_t prng_state_next_u64(uint64_t state); +size_t prng_state_next_zu(size_t state); + +uint32_t prng_lg_range_u32(uint32_t *state, unsigned lg_range, + bool atomic); +uint64_t prng_lg_range_u64(uint64_t *state, unsigned lg_range); +size_t prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic); + +uint32_t prng_range_u32(uint32_t *state, uint32_t range, bool atomic); +uint64_t prng_range_u64(uint64_t *state, uint64_t range); +size_t prng_range_zu(size_t *state, size_t range, bool atomic); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_)) +JEMALLOC_ALWAYS_INLINE uint32_t +prng_state_next_u32(uint32_t state) +{ + + return ((state * PRNG_A_32) + PRNG_C_32); +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_state_next_u64(uint64_t state) +{ + + return ((state * PRNG_A_64) + PRNG_C_64); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_state_next_zu(size_t state) +{ + +#if LG_SIZEOF_PTR == 2 + return ((state * PRNG_A_32) + PRNG_C_32); +#elif LG_SIZEOF_PTR == 3 + return ((state * PRNG_A_64) + PRNG_C_64); +#else +#error Unsupported pointer size +#endif +} + +JEMALLOC_ALWAYS_INLINE uint32_t +prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic) +{ + uint32_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= 32); + + if (atomic) { + uint32_t state0; + + do { + state0 = atomic_read_uint32(state); + state1 = prng_state_next_u32(state0); + } while (atomic_cas_uint32(state, state0, state1)); + } else { + state1 = prng_state_next_u32(*state); + *state = state1; + } + ret = state1 >> (32 - lg_range); + + return (ret); +} + +/* 64-bit atomic operations cannot be supported on all relevant platforms. */ +JEMALLOC_ALWAYS_INLINE uint64_t +prng_lg_range_u64(uint64_t *state, unsigned lg_range) +{ + uint64_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= 64); + + state1 = prng_state_next_u64(*state); + *state = state1; + ret = state1 >> (64 - lg_range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic) +{ + size_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR)); + + if (atomic) { + size_t state0; + + do { + state0 = atomic_read_z(state); + state1 = prng_state_next_zu(state0); + } while (atomic_cas_z(state, state0, state1)); + } else { + state1 = prng_state_next_zu(*state); + *state = state1; + } + ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE uint32_t +prng_range_u32(uint32_t *state, uint32_t range, bool atomic) +{ + uint32_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u32(pow2_ceil_u32(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_u32(state, lg_range, atomic); + } while (ret >= range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_range_u64(uint64_t *state, uint64_t range) +{ + uint64_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_u64(state, lg_range); + } while (ret >= range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_range_zu(size_t *state, size_t range, bool atomic) +{ + size_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_zu(state, lg_range, atomic); + } while (ret >= range); + + return (ret); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/prof.h b/memory/jemalloc/src/include/jemalloc/internal/prof.h new file mode 100644 index 000000000..8293b71ed --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/prof.h @@ -0,0 +1,547 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct prof_bt_s prof_bt_t; +typedef struct prof_cnt_s prof_cnt_t; +typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_gctx_s prof_gctx_t; +typedef struct prof_tdata_s prof_tdata_t; + +/* Option defaults. */ +#ifdef JEMALLOC_PROF +# define PROF_PREFIX_DEFAULT "jeprof" +#else +# define PROF_PREFIX_DEFAULT "" +#endif +#define LG_PROF_SAMPLE_DEFAULT 19 +#define LG_PROF_INTERVAL_DEFAULT -1 + +/* + * Hard limit on stack backtrace depth. The version of prof_backtrace() that + * is based on __builtin_return_address() necessarily has a hard-coded number + * of backtrace frame handlers, and should be kept in sync with this setting. + */ +#define PROF_BT_MAX 128 + +/* Initial hash table size. */ +#define PROF_CKH_MINITEMS 64 + +/* Size of memory buffer to use when writing dump files. */ +#define PROF_DUMP_BUFSIZE 65536 + +/* Size of stack-allocated buffer used by prof_printf(). */ +#define PROF_PRINTF_BUFSIZE 128 + +/* + * Number of mutexes shared among all gctx's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NCTX_LOCKS 1024 + +/* + * Number of mutexes shared among all tdata's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NTDATA_LOCKS 256 + +/* + * prof_tdata pointers close to NULL are used to encode state information that + * is used for cleaning up during thread shutdown. + */ +#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) +#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) +#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct prof_bt_s { + /* Backtrace, stored as len program counters. */ + void **vec; + unsigned len; +}; + +#ifdef JEMALLOC_PROF_LIBGCC +/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ +typedef struct { + prof_bt_t *bt; + unsigned max; +} prof_unwind_data_t; +#endif + +struct prof_cnt_s { + /* Profiling counters. */ + uint64_t curobjs; + uint64_t curbytes; + uint64_t accumobjs; + uint64_t accumbytes; +}; + +typedef enum { + prof_tctx_state_initializing, + prof_tctx_state_nominal, + prof_tctx_state_dumping, + prof_tctx_state_purgatory /* Dumper must finish destroying. */ +} prof_tctx_state_t; + +struct prof_tctx_s { + /* Thread data for thread that performed the allocation. */ + prof_tdata_t *tdata; + + /* + * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be + * defunct during teardown. + */ + uint64_t thr_uid; + uint64_t thr_discrim; + + /* Profiling counters, protected by tdata->lock. */ + prof_cnt_t cnts; + + /* Associated global context. */ + prof_gctx_t *gctx; + + /* + * UID that distinguishes multiple tctx's created by the same thread, + * but coexisting in gctx->tctxs. There are two ways that such + * coexistence can occur: + * - A dumper thread can cause a tctx to be retained in the purgatory + * state. + * - Although a single "producer" thread must create all tctx's which + * share the same thr_uid, multiple "consumers" can each concurrently + * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only + * gets called once each time cnts.cur{objs,bytes} drop to 0, but this + * threshold can be hit again before the first consumer finishes + * executing prof_tctx_destroy(). + */ + uint64_t tctx_uid; + + /* Linkage into gctx's tctxs. */ + rb_node(prof_tctx_t) tctx_link; + + /* + * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents + * sample vs destroy race. + */ + bool prepared; + + /* Current dump-related state, protected by gctx->lock. */ + prof_tctx_state_t state; + + /* + * Copy of cnts snapshotted during early dump phase, protected by + * dump_mtx. + */ + prof_cnt_t dump_cnts; +}; +typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; + +struct prof_gctx_s { + /* Protects nlimbo, cnt_summed, and tctxs. */ + malloc_mutex_t *lock; + + /* + * Number of threads that currently cause this gctx to be in a state of + * limbo due to one of: + * - Initializing this gctx. + * - Initializing per thread counters associated with this gctx. + * - Preparing to destroy this gctx. + * - Dumping a heap profile that includes this gctx. + * nlimbo must be 1 (single destroyer) in order to safely destroy the + * gctx. + */ + unsigned nlimbo; + + /* + * Tree of profile counters, one for each thread that has allocated in + * this context. + */ + prof_tctx_tree_t tctxs; + + /* Linkage for tree of contexts to be dumped. */ + rb_node(prof_gctx_t) dump_link; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Associated backtrace. */ + prof_bt_t bt; + + /* Backtrace vector, variable size, referred to by bt. */ + void *vec[1]; +}; +typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; + +struct prof_tdata_s { + malloc_mutex_t *lock; + + /* Monotonically increasing unique thread identifier. */ + uint64_t thr_uid; + + /* + * Monotonically increasing discriminator among tdata structures + * associated with the same thr_uid. + */ + uint64_t thr_discrim; + + /* Included in heap profile dumps if non-NULL. */ + char *thread_name; + + bool attached; + bool expired; + + rb_node(prof_tdata_t) tdata_link; + + /* + * Counter used to initialize prof_tctx_t's tctx_uid. No locking is + * necessary when incrementing this field, because only one thread ever + * does so. + */ + uint64_t tctx_uid_next; + + /* + * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks + * backtraces for which it has non-zero allocation/deallocation counters + * associated with thread-specific prof_tctx_t objects. Other threads + * may write to prof_tctx_t contents when freeing associated objects. + */ + ckh_t bt2tctx; + + /* Sampling state. */ + uint64_t prng_state; + uint64_t bytes_until_sample; + + /* State used to avoid dumping while operating on prof internals. */ + bool enq; + bool enq_idump; + bool enq_gdump; + + /* + * Set to true during an early dump phase for tdata's which are + * currently being dumped. New threads' tdata's have this initialized + * to false so that they aren't accidentally included in later dump + * phases. + */ + bool dumping; + + /* + * True if profiling is active for this tdata's thread + * (thread.prof.active mallctl). + */ + bool active; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void *vec[PROF_BT_MAX]; +}; +typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_prof; +extern bool opt_prof_active; +extern bool opt_prof_thread_active_init; +extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ +extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ +extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_final; /* Final profile dumping. */ +extern bool opt_prof_leak; /* Dump leak summary at exit. */ +extern bool opt_prof_accum; /* Report cumulative bytes. */ +extern char opt_prof_prefix[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1]; + +/* Accessed via prof_active_[gs]et{_unlocked,}(). */ +extern bool prof_active; + +/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ +extern bool prof_gdump_val; + +/* + * Profile dump interval, measured in bytes allocated. Each arena triggers a + * profile dump when it reaches this threshold. The effect is that the + * interval between profile dumps averages prof_interval, though the actual + * interval between dumps will tend to be sporadic, and the interval will be a + * maximum of approximately (prof_interval * narenas). + */ +extern uint64_t prof_interval; + +/* + * Initialized as opt_lg_prof_sample, and potentially modified during profiling + * resets. + */ +extern size_t lg_prof_sample; + +void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); +void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, + prof_tctx_t *tctx); +void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); +void bt_init(prof_bt_t *bt, void **vec); +void prof_backtrace(prof_bt_t *bt); +prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); +#ifdef JEMALLOC_JET +size_t prof_tdata_count(void); +size_t prof_bt_count(void); +const prof_cnt_t *prof_cnt_all(void); +typedef int (prof_dump_open_t)(bool, const char *); +extern prof_dump_open_t *prof_dump_open; +typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *); +extern prof_dump_header_t *prof_dump_header; +#endif +void prof_idump(tsdn_t *tsdn); +bool prof_mdump(tsd_t *tsd, const char *filename); +void prof_gdump(tsdn_t *tsdn); +prof_tdata_t *prof_tdata_init(tsd_t *tsd); +prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); +void prof_reset(tsd_t *tsd, size_t lg_sample); +void prof_tdata_cleanup(tsd_t *tsd); +bool prof_active_get(tsdn_t *tsdn); +bool prof_active_set(tsdn_t *tsdn, bool active); +const char *prof_thread_name_get(tsd_t *tsd); +int prof_thread_name_set(tsd_t *tsd, const char *thread_name); +bool prof_thread_active_get(tsd_t *tsd); +bool prof_thread_active_set(tsd_t *tsd, bool active); +bool prof_thread_active_init_get(tsdn_t *tsdn); +bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); +bool prof_gdump_get(tsdn_t *tsdn); +bool prof_gdump_set(tsdn_t *tsdn, bool active); +void prof_boot0(void); +void prof_boot1(void); +bool prof_boot2(tsd_t *tsd); +void prof_prefork0(tsdn_t *tsdn); +void prof_prefork1(tsdn_t *tsdn); +void prof_postfork_parent(tsdn_t *tsdn); +void prof_postfork_child(tsdn_t *tsdn); +void prof_sample_threshold_update(prof_tdata_t *tdata); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +bool prof_active_get_unlocked(void); +bool prof_gdump_get_unlocked(void); +prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); +prof_tctx_t *prof_tctx_get(tsdn_t *tsdn, const void *ptr); +void prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, + prof_tctx_t *tctx); +void prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, + const void *old_ptr, prof_tctx_t *tctx); +bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, + prof_tdata_t **tdata_out); +prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, + bool update); +void prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, + prof_tctx_t *tctx); +void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, + prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr, + size_t old_usize, prof_tctx_t *old_tctx); +void prof_free(tsd_t *tsd, const void *ptr, size_t usize); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +JEMALLOC_ALWAYS_INLINE bool +prof_active_get_unlocked(void) +{ + + /* + * Even if opt_prof is true, sampling can be temporarily disabled by + * setting prof_active to false. No locking is used when reading + * prof_active in the fast path, so there are no guarantees regarding + * how long it will take for all threads to notice state changes. + */ + return (prof_active); +} + +JEMALLOC_ALWAYS_INLINE bool +prof_gdump_get_unlocked(void) +{ + + /* + * No locking is used when reading prof_gdump_val in the fast path, so + * there are no guarantees regarding how long it will take for all + * threads to notice state changes. + */ + return (prof_gdump_val); +} + +JEMALLOC_ALWAYS_INLINE prof_tdata_t * +prof_tdata_get(tsd_t *tsd, bool create) +{ + prof_tdata_t *tdata; + + cassert(config_prof); + + tdata = tsd_prof_tdata_get(tsd); + if (create) { + if (unlikely(tdata == NULL)) { + if (tsd_nominal(tsd)) { + tdata = prof_tdata_init(tsd); + tsd_prof_tdata_set(tsd, tdata); + } + } else if (unlikely(tdata->expired)) { + tdata = prof_tdata_reinit(tsd, tdata); + tsd_prof_tdata_set(tsd, tdata); + } + assert(tdata == NULL || tdata->attached); + } + + return (tdata); +} + +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_tctx_get(tsdn_t *tsdn, const void *ptr) +{ + + cassert(config_prof); + assert(ptr != NULL); + + return (arena_prof_tctx_get(tsdn, ptr)); +} + +JEMALLOC_ALWAYS_INLINE void +prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) +{ + + cassert(config_prof); + assert(ptr != NULL); + + arena_prof_tctx_set(tsdn, ptr, usize, tctx); +} + +JEMALLOC_ALWAYS_INLINE void +prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr, + prof_tctx_t *old_tctx) +{ + + cassert(config_prof); + assert(ptr != NULL); + + arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx); +} + +JEMALLOC_ALWAYS_INLINE bool +prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, + prof_tdata_t **tdata_out) +{ + prof_tdata_t *tdata; + + cassert(config_prof); + + tdata = prof_tdata_get(tsd, true); + if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) + tdata = NULL; + + if (tdata_out != NULL) + *tdata_out = tdata; + + if (unlikely(tdata == NULL)) + return (true); + + if (likely(tdata->bytes_until_sample >= usize)) { + if (update) + tdata->bytes_until_sample -= usize; + return (true); + } else { + /* Compute new sample threshold. */ + if (update) + prof_sample_threshold_update(tdata); + return (!tdata->active); + } +} + +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) +{ + prof_tctx_t *ret; + prof_tdata_t *tdata; + prof_bt_t bt; + + assert(usize == s2u(usize)); + + if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, + &tdata))) + ret = (prof_tctx_t *)(uintptr_t)1U; + else { + bt_init(&bt, tdata->vec); + prof_backtrace(&bt); + ret = prof_lookup(tsd, &bt); + } + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void +prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) +{ + + cassert(config_prof); + assert(ptr != NULL); + assert(usize == isalloc(tsdn, ptr, true)); + + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) + prof_malloc_sample_object(tsdn, ptr, usize, tctx); + else + prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U); +} + +JEMALLOC_ALWAYS_INLINE void +prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, + bool prof_active, bool updated, const void *old_ptr, size_t old_usize, + prof_tctx_t *old_tctx) +{ + bool sampled, old_sampled; + + cassert(config_prof); + assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); + + if (prof_active && !updated && ptr != NULL) { + assert(usize == isalloc(tsd_tsdn(tsd), ptr, true)); + if (prof_sample_accum_update(tsd, usize, true, NULL)) { + /* + * Don't sample. The usize passed to prof_alloc_prep() + * was larger than what actually got allocated, so a + * backtrace was captured for this allocation, even + * though its actual usize was insufficient to cross the + * sample threshold. + */ + prof_alloc_rollback(tsd, tctx, true); + tctx = (prof_tctx_t *)(uintptr_t)1U; + } + } + + sampled = ((uintptr_t)tctx > (uintptr_t)1U); + old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); + + if (unlikely(sampled)) + prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx); + else + prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx); + + if (unlikely(old_sampled)) + prof_free_sampled_object(tsd, old_usize, old_tctx); +} + +JEMALLOC_ALWAYS_INLINE void +prof_free(tsd_t *tsd, const void *ptr, size_t usize) +{ + prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr); + + cassert(config_prof); + assert(usize == isalloc(tsd_tsdn(tsd), ptr, true)); + + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) + prof_free_sampled_object(tsd, usize, tctx); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/public_namespace.sh b/memory/jemalloc/src/include/jemalloc/internal/public_namespace.sh new file mode 100755 index 000000000..362109f71 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/public_namespace.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +for nm in `cat $1` ; do + n=`echo ${nm} |tr ':' ' ' |awk '{print $1}'` + echo "#define je_${n} JEMALLOC_N(${n})" +done diff --git a/memory/jemalloc/src/include/jemalloc/internal/public_unnamespace.sh b/memory/jemalloc/src/include/jemalloc/internal/public_unnamespace.sh new file mode 100755 index 000000000..4239d1775 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/public_unnamespace.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +for nm in `cat $1` ; do + n=`echo ${nm} |tr ':' ' ' |awk '{print $1}'` + echo "#undef je_${n}" +done diff --git a/memory/jemalloc/src/include/jemalloc/internal/ql.h b/memory/jemalloc/src/include/jemalloc/internal/ql.h new file mode 100644 index 000000000..1834bb855 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/ql.h @@ -0,0 +1,81 @@ +/* List definitions. */ +#define ql_head(a_type) \ +struct { \ + a_type *qlh_first; \ +} + +#define ql_head_initializer(a_head) {NULL} + +#define ql_elm(a_type) qr(a_type) + +/* List functions. */ +#define ql_new(a_head) do { \ + (a_head)->qlh_first = NULL; \ +} while (0) + +#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) + +#define ql_first(a_head) ((a_head)->qlh_first) + +#define ql_last(a_head, a_field) \ + ((ql_first(a_head) != NULL) \ + ? qr_prev(ql_first(a_head), a_field) : NULL) + +#define ql_next(a_head, a_elm, a_field) \ + ((ql_last(a_head, a_field) != (a_elm)) \ + ? qr_next((a_elm), a_field) : NULL) + +#define ql_prev(a_head, a_elm, a_field) \ + ((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) \ + : NULL) + +#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ + qr_before_insert((a_qlelm), (a_elm), a_field); \ + if (ql_first(a_head) == (a_qlelm)) { \ + ql_first(a_head) = (a_elm); \ + } \ +} while (0) + +#define ql_after_insert(a_qlelm, a_elm, a_field) \ + qr_after_insert((a_qlelm), (a_elm), a_field) + +#define ql_head_insert(a_head, a_elm, a_field) do { \ + if (ql_first(a_head) != NULL) { \ + qr_before_insert(ql_first(a_head), (a_elm), a_field); \ + } \ + ql_first(a_head) = (a_elm); \ +} while (0) + +#define ql_tail_insert(a_head, a_elm, a_field) do { \ + if (ql_first(a_head) != NULL) { \ + qr_before_insert(ql_first(a_head), (a_elm), a_field); \ + } \ + ql_first(a_head) = qr_next((a_elm), a_field); \ +} while (0) + +#define ql_remove(a_head, a_elm, a_field) do { \ + if (ql_first(a_head) == (a_elm)) { \ + ql_first(a_head) = qr_next(ql_first(a_head), a_field); \ + } \ + if (ql_first(a_head) != (a_elm)) { \ + qr_remove((a_elm), a_field); \ + } else { \ + ql_first(a_head) = NULL; \ + } \ +} while (0) + +#define ql_head_remove(a_head, a_type, a_field) do { \ + a_type *t = ql_first(a_head); \ + ql_remove((a_head), t, a_field); \ +} while (0) + +#define ql_tail_remove(a_head, a_type, a_field) do { \ + a_type *t = ql_last(a_head, a_field); \ + ql_remove((a_head), t, a_field); \ +} while (0) + +#define ql_foreach(a_var, a_head, a_field) \ + qr_foreach((a_var), ql_first(a_head), a_field) + +#define ql_reverse_foreach(a_var, a_head, a_field) \ + qr_reverse_foreach((a_var), ql_first(a_head), a_field) diff --git a/memory/jemalloc/src/include/jemalloc/internal/qr.h b/memory/jemalloc/src/include/jemalloc/internal/qr.h new file mode 100644 index 000000000..0fbaec25e --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/qr.h @@ -0,0 +1,69 @@ +/* Ring definitions. */ +#define qr(a_type) \ +struct { \ + a_type *qre_next; \ + a_type *qre_prev; \ +} + +/* Ring functions. */ +#define qr_new(a_qr, a_field) do { \ + (a_qr)->a_field.qre_next = (a_qr); \ + (a_qr)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) + +#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) + +#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ + (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \ + (a_qr)->a_field.qre_next = (a_qrelm); \ + (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \ + (a_qrelm)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_after_insert(a_qrelm, a_qr, a_field) \ + do \ + { \ + (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \ + (a_qr)->a_field.qre_prev = (a_qrelm); \ + (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \ + (a_qrelm)->a_field.qre_next = (a_qr); \ + } while (0) + +#define qr_meld(a_qr_a, a_qr_b, a_field) do { \ + void *t; \ + (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \ + (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \ + t = (a_qr_a)->a_field.qre_prev; \ + (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \ + (a_qr_b)->a_field.qre_prev = t; \ +} while (0) + +/* + * qr_meld() and qr_split() are functionally equivalent, so there's no need to + * have two copies of the code. + */ +#define qr_split(a_qr_a, a_qr_b, a_field) \ + qr_meld((a_qr_a), (a_qr_b), a_field) + +#define qr_remove(a_qr, a_field) do { \ + (a_qr)->a_field.qre_prev->a_field.qre_next \ + = (a_qr)->a_field.qre_next; \ + (a_qr)->a_field.qre_next->a_field.qre_prev \ + = (a_qr)->a_field.qre_prev; \ + (a_qr)->a_field.qre_next = (a_qr); \ + (a_qr)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_foreach(var, a_qr, a_field) \ + for ((var) = (a_qr); \ + (var) != NULL; \ + (var) = (((var)->a_field.qre_next != (a_qr)) \ + ? (var)->a_field.qre_next : NULL)) + +#define qr_reverse_foreach(var, a_qr, a_field) \ + for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \ + (var) != NULL; \ + (var) = (((var) != (a_qr)) \ + ? (var)->a_field.qre_prev : NULL)) diff --git a/memory/jemalloc/src/include/jemalloc/internal/quarantine.h b/memory/jemalloc/src/include/jemalloc/internal/quarantine.h new file mode 100644 index 000000000..ae607399f --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/quarantine.h @@ -0,0 +1,60 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct quarantine_obj_s quarantine_obj_t; +typedef struct quarantine_s quarantine_t; + +/* Default per thread quarantine size if valgrind is enabled. */ +#define JEMALLOC_VALGRIND_QUARANTINE_DEFAULT (ZU(1) << 24) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct quarantine_obj_s { + void *ptr; + size_t usize; +}; + +struct quarantine_s { + size_t curbytes; + size_t curobjs; + size_t first; +#define LG_MAXOBJS_INIT 10 + size_t lg_maxobjs; + quarantine_obj_t objs[1]; /* Dynamically sized ring buffer. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void quarantine_alloc_hook_work(tsd_t *tsd); +void quarantine(tsd_t *tsd, void *ptr); +void quarantine_cleanup(tsd_t *tsd); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void quarantine_alloc_hook(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_)) +JEMALLOC_ALWAYS_INLINE void +quarantine_alloc_hook(void) +{ + tsd_t *tsd; + + assert(config_fill && opt_quarantine); + + tsd = tsd_fetch(); + if (tsd_quarantine_get(tsd) == NULL) + quarantine_alloc_hook_work(tsd); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/memory/jemalloc/src/include/jemalloc/internal/rb.h b/memory/jemalloc/src/include/jemalloc/internal/rb.h new file mode 100644 index 000000000..3770342f8 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/rb.h @@ -0,0 +1,1003 @@ +/*- + ******************************************************************************* + * + * cpp macro implementation of left-leaning 2-3 red-black trees. Parent + * pointers are not used, and color bits are stored in the least significant + * bit of right-child pointers (if RB_COMPACT is defined), thus making node + * linkage as compact as is possible for red-black trees. + * + * Usage: + * + * #include <stdint.h> + * #include <stdbool.h> + * #define NDEBUG // (Optional, see assert(3).) + * #include <assert.h> + * #define RB_COMPACT // (Optional, embed color bits in right-child pointers.) + * #include <rb.h> + * ... + * + ******************************************************************************* + */ + +#ifndef RB_H_ +#define RB_H_ + +#ifdef RB_COMPACT +/* Node structure. */ +#define rb_node(a_type) \ +struct { \ + a_type *rbn_left; \ + a_type *rbn_right_red; \ +} +#else +#define rb_node(a_type) \ +struct { \ + a_type *rbn_left; \ + a_type *rbn_right; \ + bool rbn_red; \ +} +#endif + +/* Root structure. */ +#define rb_tree(a_type) \ +struct { \ + a_type *rbt_root; \ +} + +/* Left accessors. */ +#define rbtn_left_get(a_type, a_field, a_node) \ + ((a_node)->a_field.rbn_left) +#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \ + (a_node)->a_field.rbn_left = a_left; \ +} while (0) + +#ifdef RB_COMPACT +/* Right accessors. */ +#define rbtn_right_get(a_type, a_field, a_node) \ + ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red) \ + & ((ssize_t)-2))) +#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right) \ + | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1))); \ +} while (0) + +/* Color accessors. */ +#define rbtn_red_get(a_type, a_field, a_node) \ + ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red) \ + & ((size_t)1))) +#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t) \ + (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)) \ + | ((ssize_t)a_red)); \ +} while (0) +#define rbtn_red_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) \ + (a_node)->a_field.rbn_right_red) | ((size_t)1)); \ +} while (0) +#define rbtn_black_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ + (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ +} while (0) + +/* Node initializer. */ +#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ + /* Bookkeeping bit cannot be used by node pointer. */ \ + assert(((uintptr_t)(a_node) & 0x1) == 0); \ + rbtn_left_set(a_type, a_field, (a_node), NULL); \ + rbtn_right_set(a_type, a_field, (a_node), NULL); \ + rbtn_red_set(a_type, a_field, (a_node)); \ +} while (0) +#else +/* Right accessors. */ +#define rbtn_right_get(a_type, a_field, a_node) \ + ((a_node)->a_field.rbn_right) +#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ + (a_node)->a_field.rbn_right = a_right; \ +} while (0) + +/* Color accessors. */ +#define rbtn_red_get(a_type, a_field, a_node) \ + ((a_node)->a_field.rbn_red) +#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ + (a_node)->a_field.rbn_red = (a_red); \ +} while (0) +#define rbtn_red_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_red = true; \ +} while (0) +#define rbtn_black_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_red = false; \ +} while (0) + +/* Node initializer. */ +#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ + rbtn_left_set(a_type, a_field, (a_node), NULL); \ + rbtn_right_set(a_type, a_field, (a_node), NULL); \ + rbtn_red_set(a_type, a_field, (a_node)); \ +} while (0) +#endif + +/* Tree initializer. */ +#define rb_new(a_type, a_field, a_rbt) do { \ + (a_rbt)->rbt_root = NULL; \ +} while (0) + +/* Internal utility macros. */ +#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ + (r_node) = (a_root); \ + if ((r_node) != NULL) { \ + for (; \ + rbtn_left_get(a_type, a_field, (r_node)) != NULL; \ + (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \ + } \ + } \ +} while (0) + +#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ + (r_node) = (a_root); \ + if ((r_node) != NULL) { \ + for (; rbtn_right_get(a_type, a_field, (r_node)) != NULL; \ + (r_node) = rbtn_right_get(a_type, a_field, (r_node))) { \ + } \ + } \ +} while (0) + +#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \ + (r_node) = rbtn_right_get(a_type, a_field, (a_node)); \ + rbtn_right_set(a_type, a_field, (a_node), \ + rbtn_left_get(a_type, a_field, (r_node))); \ + rbtn_left_set(a_type, a_field, (r_node), (a_node)); \ +} while (0) + +#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \ + (r_node) = rbtn_left_get(a_type, a_field, (a_node)); \ + rbtn_left_set(a_type, a_field, (a_node), \ + rbtn_right_get(a_type, a_field, (r_node))); \ + rbtn_right_set(a_type, a_field, (r_node), (a_node)); \ +} while (0) + +/* + * The rb_proto() macro generates function prototypes that correspond to the + * functions generated by an equivalently parameterized call to rb_gen(). + */ + +#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ +a_attr void \ +a_prefix##new(a_rbt_type *rbtree); \ +a_attr bool \ +a_prefix##empty(a_rbt_type *rbtree); \ +a_attr a_type * \ +a_prefix##first(a_rbt_type *rbtree); \ +a_attr a_type * \ +a_prefix##last(a_rbt_type *rbtree); \ +a_attr a_type * \ +a_prefix##next(a_rbt_type *rbtree, a_type *node); \ +a_attr a_type * \ +a_prefix##prev(a_rbt_type *rbtree, a_type *node); \ +a_attr a_type * \ +a_prefix##search(a_rbt_type *rbtree, const a_type *key); \ +a_attr a_type * \ +a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key); \ +a_attr a_type * \ +a_prefix##psearch(a_rbt_type *rbtree, const a_type *key); \ +a_attr void \ +a_prefix##insert(a_rbt_type *rbtree, a_type *node); \ +a_attr void \ +a_prefix##remove(a_rbt_type *rbtree, a_type *node); \ +a_attr a_type * \ +a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ + a_rbt_type *, a_type *, void *), void *arg); \ +a_attr a_type * \ +a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); \ +a_attr void \ +a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ + void *arg); + +/* + * The rb_gen() macro generates a type-specific red-black tree implementation, + * based on the above cpp macros. + * + * Arguments: + * + * a_attr : Function attribute for generated functions (ex: static). + * a_prefix : Prefix for generated functions (ex: ex_). + * a_rb_type : Type for red-black tree data structure (ex: ex_t). + * a_type : Type for red-black tree node data structure (ex: ex_node_t). + * a_field : Name of red-black tree node linkage (ex: ex_link). + * a_cmp : Node comparison function name, with the following prototype: + * int (a_cmp *)(a_type *a_node, a_type *a_other); + * ^^^^^^ + * or a_key + * Interpretation of comparison function return values: + * -1 : a_node < a_other + * 0 : a_node == a_other + * 1 : a_node > a_other + * In all cases, the a_node or a_key macro argument is the first + * argument to the comparison function, which makes it possible + * to write comparison functions that treat the first argument + * specially. + * + * Assuming the following setup: + * + * typedef struct ex_node_s ex_node_t; + * struct ex_node_s { + * rb_node(ex_node_t) ex_link; + * }; + * typedef rb_tree(ex_node_t) ex_t; + * rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp) + * + * The following API is generated: + * + * static void + * ex_new(ex_t *tree); + * Description: Initialize a red-black tree structure. + * Args: + * tree: Pointer to an uninitialized red-black tree object. + * + * static bool + * ex_empty(ex_t *tree); + * Description: Determine whether tree is empty. + * Args: + * tree: Pointer to an initialized red-black tree object. + * Ret: True if tree is empty, false otherwise. + * + * static ex_node_t * + * ex_first(ex_t *tree); + * static ex_node_t * + * ex_last(ex_t *tree); + * Description: Get the first/last node in tree. + * Args: + * tree: Pointer to an initialized red-black tree object. + * Ret: First/last node in tree, or NULL if tree is empty. + * + * static ex_node_t * + * ex_next(ex_t *tree, ex_node_t *node); + * static ex_node_t * + * ex_prev(ex_t *tree, ex_node_t *node); + * Description: Get node's successor/predecessor. + * Args: + * tree: Pointer to an initialized red-black tree object. + * node: A node in tree. + * Ret: node's successor/predecessor in tree, or NULL if node is + * last/first. + * + * static ex_node_t * + * ex_search(ex_t *tree, const ex_node_t *key); + * Description: Search for node that matches key. + * Args: + * tree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in tree that matches key, or NULL if no match. + * + * static ex_node_t * + * ex_nsearch(ex_t *tree, const ex_node_t *key); + * static ex_node_t * + * ex_psearch(ex_t *tree, const ex_node_t *key); + * Description: Search for node that matches key. If no match is found, + * return what would be key's successor/predecessor, were + * key in tree. + * Args: + * tree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in tree that matches key, or if no match, hypothetical node's + * successor/predecessor (NULL if no successor/predecessor). + * + * static void + * ex_insert(ex_t *tree, ex_node_t *node); + * Description: Insert node into tree. + * Args: + * tree: Pointer to an initialized red-black tree object. + * node: Node to be inserted into tree. + * + * static void + * ex_remove(ex_t *tree, ex_node_t *node); + * Description: Remove node from tree. + * Args: + * tree: Pointer to an initialized red-black tree object. + * node: Node in tree to be removed. + * + * static ex_node_t * + * ex_iter(ex_t *tree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, + * ex_node_t *, void *), void *arg); + * static ex_node_t * + * ex_reverse_iter(ex_t *tree, ex_node_t *start, ex_node *(*cb)(ex_t *, + * ex_node_t *, void *), void *arg); + * Description: Iterate forward/backward over tree, starting at node. If + * tree is modified, iteration must be immediately + * terminated by the callback function that causes the + * modification. + * Args: + * tree : Pointer to an initialized red-black tree object. + * start: Node at which to start iteration, or NULL to start at + * first/last node. + * cb : Callback function, which is called for each node during + * iteration. Under normal circumstances the callback function + * should return NULL, which causes iteration to continue. If a + * callback function returns non-NULL, iteration is immediately + * terminated and the non-NULL return value is returned by the + * iterator. This is useful for re-starting iteration after + * modifying tree. + * arg : Opaque pointer passed to cb(). + * Ret: NULL if iteration completed, or the non-NULL callback return value + * that caused termination of the iteration. + * + * static void + * ex_destroy(ex_t *tree, void (*cb)(ex_node_t *, void *), void *arg); + * Description: Iterate over the tree with post-order traversal, remove + * each node, and run the callback if non-null. This is + * used for destroying a tree without paying the cost to + * rebalance it. The tree must not be otherwise altered + * during traversal. + * Args: + * tree: Pointer to an initialized red-black tree object. + * cb : Callback function, which, if non-null, is called for each node + * during iteration. There is no way to stop iteration once it + * has begun. + * arg : Opaque pointer passed to cb(). + */ +#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ +a_attr void \ +a_prefix##new(a_rbt_type *rbtree) { \ + rb_new(a_type, a_field, rbtree); \ +} \ +a_attr bool \ +a_prefix##empty(a_rbt_type *rbtree) { \ + return (rbtree->rbt_root == NULL); \ +} \ +a_attr a_type * \ +a_prefix##first(a_rbt_type *rbtree) { \ + a_type *ret; \ + rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##last(a_rbt_type *rbtree) { \ + a_type *ret; \ + rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ + a_type *ret; \ + if (rbtn_right_get(a_type, a_field, node) != NULL) { \ + rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \ + a_field, node), ret); \ + } else { \ + a_type *tnode = rbtree->rbt_root; \ + assert(tnode != NULL); \ + ret = NULL; \ + while (true) { \ + int cmp = (a_cmp)(node, tnode); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + break; \ + } \ + assert(tnode != NULL); \ + } \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ + a_type *ret; \ + if (rbtn_left_get(a_type, a_field, node) != NULL) { \ + rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \ + a_field, node), ret); \ + } else { \ + a_type *tnode = rbtree->rbt_root; \ + assert(tnode != NULL); \ + ret = NULL; \ + while (true) { \ + int cmp = (a_cmp)(node, tnode); \ + if (cmp < 0) { \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + ret = tnode; \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + break; \ + } \ + assert(tnode != NULL); \ + } \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##search(a_rbt_type *rbtree, const a_type *key) { \ + a_type *ret; \ + int cmp; \ + ret = rbtree->rbt_root; \ + while (ret != NULL \ + && (cmp = (a_cmp)(key, ret)) != 0) { \ + if (cmp < 0) { \ + ret = rbtn_left_get(a_type, a_field, ret); \ + } else { \ + ret = rbtn_right_get(a_type, a_field, ret); \ + } \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) { \ + a_type *ret; \ + a_type *tnode = rbtree->rbt_root; \ + ret = NULL; \ + while (tnode != NULL) { \ + int cmp = (a_cmp)(key, tnode); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + ret = tnode; \ + break; \ + } \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) { \ + a_type *ret; \ + a_type *tnode = rbtree->rbt_root; \ + ret = NULL; \ + while (tnode != NULL) { \ + int cmp = (a_cmp)(key, tnode); \ + if (cmp < 0) { \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + ret = tnode; \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + ret = tnode; \ + break; \ + } \ + } \ + return (ret); \ +} \ +a_attr void \ +a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ + struct { \ + a_type *node; \ + int cmp; \ + } path[sizeof(void *) << 4], *pathp; \ + rbt_node_new(a_type, a_field, rbtree, node); \ + /* Wind. */ \ + path->node = rbtree->rbt_root; \ + for (pathp = path; pathp->node != NULL; pathp++) { \ + int cmp = pathp->cmp = a_cmp(node, pathp->node); \ + assert(cmp != 0); \ + if (cmp < 0) { \ + pathp[1].node = rbtn_left_get(a_type, a_field, \ + pathp->node); \ + } else { \ + pathp[1].node = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + } \ + } \ + pathp->node = node; \ + /* Unwind. */ \ + for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ + a_type *cnode = pathp->node; \ + if (pathp->cmp < 0) { \ + a_type *left = pathp[1].node; \ + rbtn_left_set(a_type, a_field, cnode, left); \ + if (rbtn_red_get(a_type, a_field, left)) { \ + a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ + /* Fix up 4-node. */ \ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, leftleft); \ + rbtn_rotate_right(a_type, a_field, cnode, tnode); \ + cnode = tnode; \ + } \ + } else { \ + return; \ + } \ + } else { \ + a_type *right = pathp[1].node; \ + rbtn_right_set(a_type, a_field, cnode, right); \ + if (rbtn_red_get(a_type, a_field, right)) { \ + a_type *left = rbtn_left_get(a_type, a_field, cnode); \ + if (left != NULL && rbtn_red_get(a_type, a_field, \ + left)) { \ + /* Split 4-node. */ \ + rbtn_black_set(a_type, a_field, left); \ + rbtn_black_set(a_type, a_field, right); \ + rbtn_red_set(a_type, a_field, cnode); \ + } else { \ + /* Lean left. */ \ + a_type *tnode; \ + bool tred = rbtn_red_get(a_type, a_field, cnode); \ + rbtn_rotate_left(a_type, a_field, cnode, tnode); \ + rbtn_color_set(a_type, a_field, tnode, tred); \ + rbtn_red_set(a_type, a_field, cnode); \ + cnode = tnode; \ + } \ + } else { \ + return; \ + } \ + } \ + pathp->node = cnode; \ + } \ + /* Set root, and make it black. */ \ + rbtree->rbt_root = path->node; \ + rbtn_black_set(a_type, a_field, rbtree->rbt_root); \ +} \ +a_attr void \ +a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ + struct { \ + a_type *node; \ + int cmp; \ + } *pathp, *nodep, path[sizeof(void *) << 4]; \ + /* Wind. */ \ + nodep = NULL; /* Silence compiler warning. */ \ + path->node = rbtree->rbt_root; \ + for (pathp = path; pathp->node != NULL; pathp++) { \ + int cmp = pathp->cmp = a_cmp(node, pathp->node); \ + if (cmp < 0) { \ + pathp[1].node = rbtn_left_get(a_type, a_field, \ + pathp->node); \ + } else { \ + pathp[1].node = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + if (cmp == 0) { \ + /* Find node's successor, in preparation for swap. */ \ + pathp->cmp = 1; \ + nodep = pathp; \ + for (pathp++; pathp->node != NULL; \ + pathp++) { \ + pathp->cmp = -1; \ + pathp[1].node = rbtn_left_get(a_type, a_field, \ + pathp->node); \ + } \ + break; \ + } \ + } \ + } \ + assert(nodep->node == node); \ + pathp--; \ + if (pathp->node != node) { \ + /* Swap node with its successor. */ \ + bool tred = rbtn_red_get(a_type, a_field, pathp->node); \ + rbtn_color_set(a_type, a_field, pathp->node, \ + rbtn_red_get(a_type, a_field, node)); \ + rbtn_left_set(a_type, a_field, pathp->node, \ + rbtn_left_get(a_type, a_field, node)); \ + /* If node's successor is its right child, the following code */\ + /* will do the wrong thing for the right child pointer. */\ + /* However, it doesn't matter, because the pointer will be */\ + /* properly set when the successor is pruned. */\ + rbtn_right_set(a_type, a_field, pathp->node, \ + rbtn_right_get(a_type, a_field, node)); \ + rbtn_color_set(a_type, a_field, node, tred); \ + /* The pruned leaf node's child pointers are never accessed */\ + /* again, so don't bother setting them to nil. */\ + nodep->node = pathp->node; \ + pathp->node = node; \ + if (nodep == path) { \ + rbtree->rbt_root = nodep->node; \ + } else { \ + if (nodep[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, nodep[-1].node, \ + nodep->node); \ + } else { \ + rbtn_right_set(a_type, a_field, nodep[-1].node, \ + nodep->node); \ + } \ + } \ + } else { \ + a_type *left = rbtn_left_get(a_type, a_field, node); \ + if (left != NULL) { \ + /* node has no successor, but it has a left child. */\ + /* Splice node out, without losing the left child. */\ + assert(!rbtn_red_get(a_type, a_field, node)); \ + assert(rbtn_red_get(a_type, a_field, left)); \ + rbtn_black_set(a_type, a_field, left); \ + if (pathp == path) { \ + rbtree->rbt_root = left; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + left); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + left); \ + } \ + } \ + return; \ + } else if (pathp == path) { \ + /* The tree only contained one node. */ \ + rbtree->rbt_root = NULL; \ + return; \ + } \ + } \ + if (rbtn_red_get(a_type, a_field, pathp->node)) { \ + /* Prune red node, which requires no fixup. */ \ + assert(pathp[-1].cmp < 0); \ + rbtn_left_set(a_type, a_field, pathp[-1].node, NULL); \ + return; \ + } \ + /* The node to be pruned is black, so unwind until balance is */\ + /* restored. */\ + pathp->node = NULL; \ + for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ + assert(pathp->cmp != 0); \ + if (pathp->cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp->node, \ + pathp[1].node); \ + if (rbtn_red_get(a_type, a_field, pathp->node)) { \ + a_type *right = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + a_type *rightleft = rbtn_left_get(a_type, a_field, \ + right); \ + a_type *tnode; \ + if (rightleft != NULL && rbtn_red_get(a_type, a_field, \ + rightleft)) { \ + /* In the following diagrams, ||, //, and \\ */\ + /* indicate the path to the removed node. */\ + /* */\ + /* || */\ + /* pathp(r) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + /* */\ + rbtn_black_set(a_type, a_field, pathp->node); \ + rbtn_rotate_right(a_type, a_field, right, tnode); \ + rbtn_right_set(a_type, a_field, pathp->node, tnode);\ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + } else { \ + /* || */\ + /* pathp(r) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + /* */\ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + } \ + /* Balance restored, but rotation modified subtree */\ + /* root. */\ + assert((uintptr_t)pathp > (uintptr_t)path); \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } \ + return; \ + } else { \ + a_type *right = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + a_type *rightleft = rbtn_left_get(a_type, a_field, \ + right); \ + if (rightleft != NULL && rbtn_red_get(a_type, a_field, \ + rightleft)) { \ + /* || */\ + /* pathp(b) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, rightleft); \ + rbtn_rotate_right(a_type, a_field, right, tnode); \ + rbtn_right_set(a_type, a_field, pathp->node, tnode);\ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + /* Balance restored, but rotation modified */\ + /* subtree root, which may actually be the tree */\ + /* root. */\ + if (pathp == path) { \ + /* Set root. */ \ + rbtree->rbt_root = tnode; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } \ + } \ + return; \ + } else { \ + /* || */\ + /* pathp(b) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + a_type *tnode; \ + rbtn_red_set(a_type, a_field, pathp->node); \ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + pathp->node = tnode; \ + } \ + } \ + } else { \ + a_type *left; \ + rbtn_right_set(a_type, a_field, pathp->node, \ + pathp[1].node); \ + left = rbtn_left_get(a_type, a_field, pathp->node); \ + if (rbtn_red_get(a_type, a_field, left)) { \ + a_type *tnode; \ + a_type *leftright = rbtn_right_get(a_type, a_field, \ + left); \ + a_type *leftrightleft = rbtn_left_get(a_type, a_field, \ + leftright); \ + if (leftrightleft != NULL && rbtn_red_get(a_type, \ + a_field, leftrightleft)) { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (r) (b) */\ + /* \ */\ + /* (b) */\ + /* / */\ + /* (r) */\ + a_type *unode; \ + rbtn_black_set(a_type, a_field, leftrightleft); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + unode); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + rbtn_right_set(a_type, a_field, unode, tnode); \ + rbtn_rotate_left(a_type, a_field, unode, tnode); \ + } else { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (r) (b) */\ + /* \ */\ + /* (b) */\ + /* / */\ + /* (b) */\ + assert(leftright != NULL); \ + rbtn_red_set(a_type, a_field, leftright); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + rbtn_black_set(a_type, a_field, tnode); \ + } \ + /* Balance restored, but rotation modified subtree */\ + /* root, which may actually be the tree root. */\ + if (pathp == path) { \ + /* Set root. */ \ + rbtree->rbt_root = tnode; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } \ + } \ + return; \ + } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \ + a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ + /* || */\ + /* pathp(r) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, pathp->node); \ + rbtn_red_set(a_type, a_field, left); \ + rbtn_black_set(a_type, a_field, leftleft); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + /* Balance restored, but rotation modified */\ + /* subtree root. */\ + assert((uintptr_t)pathp > (uintptr_t)path); \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } \ + return; \ + } else { \ + /* || */\ + /* pathp(r) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + rbtn_red_set(a_type, a_field, left); \ + rbtn_black_set(a_type, a_field, pathp->node); \ + /* Balance restored. */ \ + return; \ + } \ + } else { \ + a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, leftleft); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + /* Balance restored, but rotation modified */\ + /* subtree root, which may actually be the tree */\ + /* root. */\ + if (pathp == path) { \ + /* Set root. */ \ + rbtree->rbt_root = tnode; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } \ + } \ + return; \ + } else { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + rbtn_red_set(a_type, a_field, left); \ + } \ + } \ + } \ + } \ + /* Set root. */ \ + rbtree->rbt_root = path->node; \ + assert(!rbtn_red_get(a_type, a_field, rbtree->rbt_root)); \ +} \ +a_attr a_type * \ +a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + if (node == NULL) { \ + return (NULL); \ + } else { \ + a_type *ret; \ + if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \ + a_field, node), cb, arg)) != NULL || (ret = cb(rbtree, node, \ + arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + int cmp = a_cmp(start, node); \ + if (cmp < 0) { \ + a_type *ret; \ + if ((ret = a_prefix##iter_start(rbtree, start, \ + rbtn_left_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg)); \ + } else if (cmp > 0) { \ + return (a_prefix##iter_start(rbtree, start, \ + rbtn_right_get(a_type, a_field, node), cb, arg)); \ + } else { \ + a_type *ret; \ + if ((ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ + a_rbt_type *, a_type *, void *), void *arg) { \ + a_type *ret; \ + if (start != NULL) { \ + ret = a_prefix##iter_start(rbtree, start, rbtree->rbt_root, \ + cb, arg); \ + } else { \ + ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + if (node == NULL) { \ + return (NULL); \ + } else { \ + a_type *ret; \ + if ((ret = a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \ + a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *), \ + void *arg) { \ + int cmp = a_cmp(start, node); \ + if (cmp > 0) { \ + a_type *ret; \ + if ((ret = a_prefix##reverse_iter_start(rbtree, start, \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } else if (cmp < 0) { \ + return (a_prefix##reverse_iter_start(rbtree, start, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } else { \ + a_type *ret; \ + if ((ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + a_type *ret; \ + if (start != NULL) { \ + ret = a_prefix##reverse_iter_start(rbtree, start, \ + rbtree->rbt_root, cb, arg); \ + } else { \ + ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \ + cb, arg); \ + } \ + return (ret); \ +} \ +a_attr void \ +a_prefix##destroy_recurse(a_rbt_type *rbtree, a_type *node, void (*cb)( \ + a_type *, void *), void *arg) { \ + if (node == NULL) { \ + return; \ + } \ + a_prefix##destroy_recurse(rbtree, rbtn_left_get(a_type, a_field, \ + node), cb, arg); \ + rbtn_left_set(a_type, a_field, (node), NULL); \ + a_prefix##destroy_recurse(rbtree, rbtn_right_get(a_type, a_field, \ + node), cb, arg); \ + rbtn_right_set(a_type, a_field, (node), NULL); \ + if (cb) { \ + cb(node, arg); \ + } \ +} \ +a_attr void \ +a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ + void *arg) { \ + a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg); \ + rbtree->rbt_root = NULL; \ +} + +#endif /* RB_H_ */ diff --git a/memory/jemalloc/src/include/jemalloc/internal/rtree.h b/memory/jemalloc/src/include/jemalloc/internal/rtree.h new file mode 100644 index 000000000..8d0c584da --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/rtree.h @@ -0,0 +1,366 @@ +/* + * This radix tree implementation is tailored to the singular purpose of + * associating metadata with chunks that are currently owned by jemalloc. + * + ******************************************************************************* + */ +#ifdef JEMALLOC_H_TYPES + +typedef struct rtree_node_elm_s rtree_node_elm_t; +typedef struct rtree_level_s rtree_level_t; +typedef struct rtree_s rtree_t; + +/* + * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the + * machine address width. + */ +#define LG_RTREE_BITS_PER_LEVEL 4 +#define RTREE_BITS_PER_LEVEL (1U << LG_RTREE_BITS_PER_LEVEL) +/* Maximum rtree height. */ +#define RTREE_HEIGHT_MAX \ + ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) + +/* Used for two-stage lock-free node initialization. */ +#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1) + +/* + * The node allocation callback function's argument is the number of contiguous + * rtree_node_elm_t structures to allocate, and the resulting memory must be + * zeroed. + */ +typedef rtree_node_elm_t *(rtree_node_alloc_t)(size_t); +typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *); + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct rtree_node_elm_s { + union { + void *pun; + rtree_node_elm_t *child; + extent_node_t *val; + }; +}; + +struct rtree_level_s { + /* + * A non-NULL subtree points to a subtree rooted along the hypothetical + * path to the leaf node corresponding to key 0. Depending on what keys + * have been used to store to the tree, an arbitrary combination of + * subtree pointers may remain NULL. + * + * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4. + * This results in a 3-level tree, and the leftmost leaf can be directly + * accessed via subtrees[2], the subtree prefixed by 0x0000 (excluding + * 0x00000000) can be accessed via subtrees[1], and the remainder of the + * tree can be accessed via subtrees[0]. + * + * levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...] + * + * levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ] + * + * levels[2] : [val(0x000000000000) | val(0x000000000001) | ...] + * + * This has practical implications on x64, which currently uses only the + * lower 47 bits of virtual address space in userland, thus leaving + * subtrees[0] unused and avoiding a level of tree traversal. + */ + union { + void *subtree_pun; + rtree_node_elm_t *subtree; + }; + /* Number of key bits distinguished by this level. */ + unsigned bits; + /* + * Cumulative number of key bits distinguished by traversing to + * corresponding tree level. + */ + unsigned cumbits; +}; + +struct rtree_s { + rtree_node_alloc_t *alloc; + rtree_node_dalloc_t *dalloc; + unsigned height; + /* + * Precomputed table used to convert from the number of leading 0 key + * bits to which subtree level to start at. + */ + unsigned start_level[RTREE_HEIGHT_MAX]; + rtree_level_t levels[RTREE_HEIGHT_MAX]; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, + rtree_node_dalloc_t *dalloc); +void rtree_delete(rtree_t *rtree); +rtree_node_elm_t *rtree_subtree_read_hard(rtree_t *rtree, + unsigned level); +rtree_node_elm_t *rtree_child_read_hard(rtree_t *rtree, + rtree_node_elm_t *elm, unsigned level); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +unsigned rtree_start_level(rtree_t *rtree, uintptr_t key); +uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level); + +bool rtree_node_valid(rtree_node_elm_t *node); +rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm, + bool dependent); +rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, + unsigned level, bool dependent); +extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, + bool dependent); +void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, + const extent_node_t *val); +rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level, + bool dependent); +rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level, + bool dependent); + +extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent); +bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) +JEMALLOC_ALWAYS_INLINE unsigned +rtree_start_level(rtree_t *rtree, uintptr_t key) +{ + unsigned start_level; + + if (unlikely(key == 0)) + return (rtree->height - 1); + + start_level = rtree->start_level[lg_floor(key) >> + LG_RTREE_BITS_PER_LEVEL]; + assert(start_level < rtree->height); + return (start_level); +} + +JEMALLOC_ALWAYS_INLINE uintptr_t +rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) +{ + + return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - + rtree->levels[level].cumbits)) & ((ZU(1) << + rtree->levels[level].bits) - 1)); +} + +JEMALLOC_ALWAYS_INLINE bool +rtree_node_valid(rtree_node_elm_t *node) +{ + + return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING); +} + +JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * +rtree_child_tryread(rtree_node_elm_t *elm, bool dependent) +{ + rtree_node_elm_t *child; + + /* Double-checked read (first read may be stale. */ + child = elm->child; + if (!dependent && !rtree_node_valid(child)) + child = atomic_read_p(&elm->pun); + assert(!dependent || child != NULL); + return (child); +} + +JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * +rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level, + bool dependent) +{ + rtree_node_elm_t *child; + + child = rtree_child_tryread(elm, dependent); + if (!dependent && unlikely(!rtree_node_valid(child))) + child = rtree_child_read_hard(rtree, elm, level); + assert(!dependent || child != NULL); + return (child); +} + +JEMALLOC_ALWAYS_INLINE extent_node_t * +rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent) +{ + + if (dependent) { + /* + * Reading a val on behalf of a pointer to a valid allocation is + * guaranteed to be a clean read even without synchronization, + * because the rtree update became visible in memory before the + * pointer came into existence. + */ + return (elm->val); + } else { + /* + * An arbitrary read, e.g. on behalf of ivsalloc(), may not be + * dependent on a previous rtree write, which means a stale read + * could result if synchronization were omitted here. + */ + return (atomic_read_p(&elm->pun)); + } +} + +JEMALLOC_INLINE void +rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val) +{ + + atomic_write_p(&elm->pun, val); +} + +JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * +rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent) +{ + rtree_node_elm_t *subtree; + + /* Double-checked read (first read may be stale. */ + subtree = rtree->levels[level].subtree; + if (!dependent && unlikely(!rtree_node_valid(subtree))) + subtree = atomic_read_p(&rtree->levels[level].subtree_pun); + assert(!dependent || subtree != NULL); + return (subtree); +} + +JEMALLOC_ALWAYS_INLINE rtree_node_elm_t * +rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent) +{ + rtree_node_elm_t *subtree; + + subtree = rtree_subtree_tryread(rtree, level, dependent); + if (!dependent && unlikely(!rtree_node_valid(subtree))) + subtree = rtree_subtree_read_hard(rtree, level); + assert(!dependent || subtree != NULL); + return (subtree); +} + +JEMALLOC_ALWAYS_INLINE extent_node_t * +rtree_get(rtree_t *rtree, uintptr_t key, bool dependent) +{ + uintptr_t subkey; + unsigned start_level; + rtree_node_elm_t *node; + + start_level = rtree_start_level(rtree, key); + + node = rtree_subtree_tryread(rtree, start_level, dependent); +#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height) + switch (start_level + RTREE_GET_BIAS) { +#define RTREE_GET_SUBTREE(level) \ + case level: \ + assert(level < (RTREE_HEIGHT_MAX-1)); \ + if (!dependent && unlikely(!rtree_node_valid(node))) \ + return (NULL); \ + subkey = rtree_subkey(rtree, key, level - \ + RTREE_GET_BIAS); \ + node = rtree_child_tryread(&node[subkey], dependent); \ + /* Fall through. */ +#define RTREE_GET_LEAF(level) \ + case level: \ + assert(level == (RTREE_HEIGHT_MAX-1)); \ + if (!dependent && unlikely(!rtree_node_valid(node))) \ + return (NULL); \ + subkey = rtree_subkey(rtree, key, level - \ + RTREE_GET_BIAS); \ + /* \ + * node is a leaf, so it contains values rather than \ + * child pointers. \ + */ \ + return (rtree_val_read(rtree, &node[subkey], \ + dependent)); +#if RTREE_HEIGHT_MAX > 1 + RTREE_GET_SUBTREE(0) +#endif +#if RTREE_HEIGHT_MAX > 2 + RTREE_GET_SUBTREE(1) +#endif +#if RTREE_HEIGHT_MAX > 3 + RTREE_GET_SUBTREE(2) +#endif +#if RTREE_HEIGHT_MAX > 4 + RTREE_GET_SUBTREE(3) +#endif +#if RTREE_HEIGHT_MAX > 5 + RTREE_GET_SUBTREE(4) +#endif +#if RTREE_HEIGHT_MAX > 6 + RTREE_GET_SUBTREE(5) +#endif +#if RTREE_HEIGHT_MAX > 7 + RTREE_GET_SUBTREE(6) +#endif +#if RTREE_HEIGHT_MAX > 8 + RTREE_GET_SUBTREE(7) +#endif +#if RTREE_HEIGHT_MAX > 9 + RTREE_GET_SUBTREE(8) +#endif +#if RTREE_HEIGHT_MAX > 10 + RTREE_GET_SUBTREE(9) +#endif +#if RTREE_HEIGHT_MAX > 11 + RTREE_GET_SUBTREE(10) +#endif +#if RTREE_HEIGHT_MAX > 12 + RTREE_GET_SUBTREE(11) +#endif +#if RTREE_HEIGHT_MAX > 13 + RTREE_GET_SUBTREE(12) +#endif +#if RTREE_HEIGHT_MAX > 14 + RTREE_GET_SUBTREE(13) +#endif +#if RTREE_HEIGHT_MAX > 15 + RTREE_GET_SUBTREE(14) +#endif +#if RTREE_HEIGHT_MAX > 16 +# error Unsupported RTREE_HEIGHT_MAX +#endif + RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1) +#undef RTREE_GET_SUBTREE +#undef RTREE_GET_LEAF + default: not_reached(); + } +#undef RTREE_GET_BIAS + not_reached(); +} + +JEMALLOC_INLINE bool +rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val) +{ + uintptr_t subkey; + unsigned i, start_level; + rtree_node_elm_t *node, *child; + + start_level = rtree_start_level(rtree, key); + + node = rtree_subtree_read(rtree, start_level, false); + if (node == NULL) + return (true); + for (i = start_level; /**/; i++, node = child) { + subkey = rtree_subkey(rtree, key, i); + if (i == rtree->height - 1) { + /* + * node is a leaf, so it contains values rather than + * child pointers. + */ + rtree_val_write(rtree, &node[subkey], val); + return (false); + } + assert(i + 1 < rtree->height); + child = rtree_child_read(rtree, &node[subkey], i, false); + if (child == NULL) + return (true); + } + not_reached(); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/size_classes.sh b/memory/jemalloc/src/include/jemalloc/internal/size_classes.sh new file mode 100755 index 000000000..f6fbce4ef --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/size_classes.sh @@ -0,0 +1,318 @@ +#!/bin/sh +# +# Usage: size_classes.sh <lg_qarr> <lg_tmin> <lg_parr> <lg_g> + +# The following limits are chosen such that they cover all supported platforms. + +# Pointer sizes. +lg_zarr="2 3" + +# Quanta. +lg_qarr=$1 + +# The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)]. +lg_tmin=$2 + +# Maximum lookup size. +lg_kmax=12 + +# Page sizes. +lg_parr=`echo $3 | tr ',' ' '` + +# Size class group size (number of size classes for each size doubling). +lg_g=$4 + +pow2() { + e=$1 + pow2_result=1 + while [ ${e} -gt 0 ] ; do + pow2_result=$((${pow2_result} + ${pow2_result})) + e=$((${e} - 1)) + done +} + +lg() { + x=$1 + lg_result=0 + while [ ${x} -gt 1 ] ; do + lg_result=$((${lg_result} + 1)) + x=$((${x} / 2)) + done +} + +size_class() { + index=$1 + lg_grp=$2 + lg_delta=$3 + ndelta=$4 + lg_p=$5 + lg_kmax=$6 + + if [ ${lg_delta} -ge ${lg_p} ] ; then + psz="yes" + else + pow2 ${lg_p}; p=${pow2_result} + pow2 ${lg_grp}; grp=${pow2_result} + pow2 ${lg_delta}; delta=${pow2_result} + sz=$((${grp} + ${delta} * ${ndelta})) + npgs=$((${sz} / ${p})) + if [ ${sz} -eq $((${npgs} * ${p})) ] ; then + psz="yes" + else + psz="no" + fi + fi + + lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta} + if [ ${pow2_result} -lt ${ndelta} ] ; then + rem="yes" + else + rem="no" + fi + + lg_size=${lg_grp} + if [ $((${lg_delta} + ${lg_ndelta})) -eq ${lg_grp} ] ; then + lg_size=$((${lg_grp} + 1)) + else + lg_size=${lg_grp} + rem="yes" + fi + + if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then + bin="yes" + else + bin="no" + fi + if [ ${lg_size} -lt ${lg_kmax} \ + -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then + lg_delta_lookup=${lg_delta} + else + lg_delta_lookup="no" + fi + printf ' SC(%3d, %6d, %8d, %6d, %3s, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${lg_delta_lookup} + # Defined upon return: + # - psz ("yes" or "no") + # - bin ("yes" or "no") + # - lg_delta_lookup (${lg_delta} or "no") +} + +sep_line() { + echo " \\" +} + +size_classes() { + lg_z=$1 + lg_q=$2 + lg_t=$3 + lg_p=$4 + lg_g=$5 + + pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result} + pow2 ${lg_g}; g=${pow2_result} + + echo "#define SIZE_CLASSES \\" + echo " /* index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup */ \\" + + ntbins=0 + nlbins=0 + lg_tiny_maxclass='"NA"' + nbins=0 + npsizes=0 + + # Tiny size classes. + ndelta=0 + index=0 + lg_grp=${lg_t} + lg_delta=${lg_grp} + while [ ${lg_grp} -lt ${lg_q} ] ; do + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + if [ ${lg_delta_lookup} != "no" ] ; then + nlbins=$((${index} + 1)) + fi + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi + if [ ${bin} != "no" ] ; then + nbins=$((${index} + 1)) + fi + ntbins=$((${ntbins} + 1)) + lg_tiny_maxclass=${lg_grp} # Final written value is correct. + index=$((${index} + 1)) + lg_delta=${lg_grp} + lg_grp=$((${lg_grp} + 1)) + done + + # First non-tiny group. + if [ ${ntbins} -gt 0 ] ; then + sep_line + # The first size class has an unusual encoding, because the size has to be + # split between grp and delta*ndelta. + lg_grp=$((${lg_grp} - 1)) + ndelta=1 + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + index=$((${index} + 1)) + lg_grp=$((${lg_grp} + 1)) + lg_delta=$((${lg_delta} + 1)) + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi + fi + while [ ${ndelta} -lt ${g} ] ; do + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + index=$((${index} + 1)) + ndelta=$((${ndelta} + 1)) + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi + done + + # All remaining groups. + lg_grp=$((${lg_grp} + ${lg_g})) + while [ ${lg_grp} -lt $((${ptr_bits} - 1)) ] ; do + sep_line + ndelta=1 + if [ ${lg_grp} -eq $((${ptr_bits} - 2)) ] ; then + ndelta_limit=$((${g} - 1)) + else + ndelta_limit=${g} + fi + while [ ${ndelta} -le ${ndelta_limit} ] ; do + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + if [ ${lg_delta_lookup} != "no" ] ; then + nlbins=$((${index} + 1)) + # Final written value is correct: + lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + fi + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi + if [ ${bin} != "no" ] ; then + nbins=$((${index} + 1)) + # Final written value is correct: + small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + if [ ${lg_g} -gt 0 ] ; then + lg_large_minclass=$((${lg_grp} + 1)) + else + lg_large_minclass=$((${lg_grp} + 2)) + fi + fi + # Final written value is correct: + huge_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + index=$((${index} + 1)) + ndelta=$((${ndelta} + 1)) + done + lg_grp=$((${lg_grp} + 1)) + lg_delta=$((${lg_delta} + 1)) + done + echo + nsizes=${index} + + # Defined upon completion: + # - ntbins + # - nlbins + # - nbins + # - nsizes + # - npsizes + # - lg_tiny_maxclass + # - lookup_maxclass + # - small_maxclass + # - lg_large_minclass + # - huge_maxclass +} + +cat <<EOF +/* This file was automatically generated by size_classes.sh. */ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * This header requires LG_SIZEOF_PTR, LG_TINY_MIN, LG_QUANTUM, and LG_PAGE to + * be defined prior to inclusion, and it in turn defines: + * + * LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling. + * SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz, + * bin, lg_delta_lookup) tuples. + * index: Size class index. + * lg_grp: Lg group base size (no deltas added). + * lg_delta: Lg delta to previous size class. + * ndelta: Delta multiplier. size == 1<<lg_grp + ndelta<<lg_delta + * psz: 'yes' if a multiple of the page size, 'no' otherwise. + * bin: 'yes' if a small bin size class, 'no' otherwise. + * lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no' + * otherwise. + * NTBINS: Number of tiny bins. + * NLBINS: Number of bins supported by the lookup table. + * NBINS: Number of small size class bins. + * NSIZES: Number of size classes. + * NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE). + * LG_TINY_MAXCLASS: Lg of maximum tiny size class. + * LOOKUP_MAXCLASS: Maximum size class included in lookup table. + * SMALL_MAXCLASS: Maximum small size class. + * LG_LARGE_MINCLASS: Lg of minimum large size class. + * HUGE_MAXCLASS: Maximum (huge) size class. + */ + +#define LG_SIZE_CLASS_GROUP ${lg_g} + +EOF + +for lg_z in ${lg_zarr} ; do + for lg_q in ${lg_qarr} ; do + lg_t=${lg_tmin} + while [ ${lg_t} -le ${lg_q} ] ; do + # Iterate through page sizes and compute how many bins there are. + for lg_p in ${lg_parr} ; do + echo "#if (LG_SIZEOF_PTR == ${lg_z} && LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})" + size_classes ${lg_z} ${lg_q} ${lg_t} ${lg_p} ${lg_g} + echo "#define SIZE_CLASSES_DEFINED" + echo "#define NTBINS ${ntbins}" + echo "#define NLBINS ${nlbins}" + echo "#define NBINS ${nbins}" + echo "#define NSIZES ${nsizes}" + echo "#define NPSIZES ${npsizes}" + echo "#define LG_TINY_MAXCLASS ${lg_tiny_maxclass}" + echo "#define LOOKUP_MAXCLASS ${lookup_maxclass}" + echo "#define SMALL_MAXCLASS ${small_maxclass}" + echo "#define LG_LARGE_MINCLASS ${lg_large_minclass}" + echo "#define HUGE_MAXCLASS ${huge_maxclass}" + echo "#endif" + echo + done + lg_t=$((${lg_t} + 1)) + done + done +done + +cat <<EOF +#ifndef SIZE_CLASSES_DEFINED +# error "No size class definitions match configuration" +#endif +#undef SIZE_CLASSES_DEFINED +/* + * The size2index_tab lookup table uses uint8_t to encode each bin index, so we + * cannot support more than 256 small size classes. Further constrain NBINS to + * 255 since all small size classes, plus a "not small" size class must be + * stored in 8 bits of arena_chunk_map_bits_t's bits field. + */ +#if (NBINS > 255) +# error "Too many small size classes" +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +EOF diff --git a/memory/jemalloc/src/include/jemalloc/internal/smoothstep.h b/memory/jemalloc/src/include/jemalloc/internal/smoothstep.h new file mode 100644 index 000000000..c5333ccad --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/smoothstep.h @@ -0,0 +1,246 @@ +/* + * This file was generated by the following command: + * sh smoothstep.sh smoother 200 24 3 15 + */ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * This header defines a precomputed table based on the smoothstep family of + * sigmoidal curves (https://en.wikipedia.org/wiki/Smoothstep) that grow from 0 + * to 1 in 0 <= x <= 1. The table is stored as integer fixed point values so + * that floating point math can be avoided. + * + * 3 2 + * smoothstep(x) = -2x + 3x + * + * 5 4 3 + * smootherstep(x) = 6x - 15x + 10x + * + * 7 6 5 4 + * smootheststep(x) = -20x + 70x - 84x + 35x + */ + +#define SMOOTHSTEP_VARIANT "smoother" +#define SMOOTHSTEP_NSTEPS 200 +#define SMOOTHSTEP_BFP 24 +#define SMOOTHSTEP \ + /* STEP(step, h, x, y) */ \ + STEP( 1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \ + STEP( 2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \ + STEP( 3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \ + STEP( 4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \ + STEP( 5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \ + STEP( 6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \ + STEP( 7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \ + STEP( 8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \ + STEP( 9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \ + STEP( 10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \ + STEP( 11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \ + STEP( 12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \ + STEP( 13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \ + STEP( 14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \ + STEP( 15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \ + STEP( 16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \ + STEP( 17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \ + STEP( 18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \ + STEP( 19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \ + STEP( 20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \ + STEP( 21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \ + STEP( 22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \ + STEP( 23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \ + STEP( 24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \ + STEP( 25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \ + STEP( 26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \ + STEP( 27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \ + STEP( 28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \ + STEP( 29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \ + STEP( 30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \ + STEP( 31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \ + STEP( 32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \ + STEP( 33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \ + STEP( 34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \ + STEP( 35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \ + STEP( 36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \ + STEP( 37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \ + STEP( 38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \ + STEP( 39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \ + STEP( 40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \ + STEP( 41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \ + STEP( 42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \ + STEP( 43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \ + STEP( 44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \ + STEP( 45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \ + STEP( 46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \ + STEP( 47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \ + STEP( 48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \ + STEP( 49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \ + STEP( 50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \ + STEP( 51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \ + STEP( 52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \ + STEP( 53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \ + STEP( 54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \ + STEP( 55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \ + STEP( 56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \ + STEP( 57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \ + STEP( 58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \ + STEP( 59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \ + STEP( 60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \ + STEP( 61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \ + STEP( 62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \ + STEP( 63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \ + STEP( 64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \ + STEP( 65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \ + STEP( 66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \ + STEP( 67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \ + STEP( 68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \ + STEP( 69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \ + STEP( 70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \ + STEP( 71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \ + STEP( 72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \ + STEP( 73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \ + STEP( 74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \ + STEP( 75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \ + STEP( 76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \ + STEP( 77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \ + STEP( 78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \ + STEP( 79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \ + STEP( 80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \ + STEP( 81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \ + STEP( 82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \ + STEP( 83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \ + STEP( 84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \ + STEP( 85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \ + STEP( 86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \ + STEP( 87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \ + STEP( 88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \ + STEP( 89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \ + STEP( 90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \ + STEP( 91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \ + STEP( 92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \ + STEP( 93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \ + STEP( 94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \ + STEP( 95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \ + STEP( 96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \ + STEP( 97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \ + STEP( 98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \ + STEP( 99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \ + STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \ + STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \ + STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \ + STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \ + STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \ + STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \ + STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \ + STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \ + STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \ + STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \ + STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \ + STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \ + STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \ + STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \ + STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \ + STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \ + STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \ + STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \ + STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \ + STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \ + STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \ + STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \ + STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \ + STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \ + STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \ + STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \ + STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \ + STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \ + STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \ + STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \ + STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \ + STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \ + STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \ + STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \ + STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \ + STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \ + STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \ + STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \ + STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \ + STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \ + STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \ + STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \ + STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \ + STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \ + STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \ + STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \ + STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \ + STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \ + STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \ + STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \ + STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \ + STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \ + STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \ + STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \ + STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \ + STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \ + STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \ + STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \ + STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \ + STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \ + STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \ + STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \ + STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \ + STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \ + STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \ + STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \ + STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \ + STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \ + STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \ + STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \ + STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \ + STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \ + STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \ + STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \ + STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \ + STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \ + STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \ + STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \ + STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \ + STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \ + STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \ + STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \ + STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \ + STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \ + STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \ + STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \ + STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \ + STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \ + STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \ + STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \ + STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \ + STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \ + STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \ + STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \ + STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \ + STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \ + STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \ + STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \ + STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \ + STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \ + STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \ + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/smoothstep.sh b/memory/jemalloc/src/include/jemalloc/internal/smoothstep.sh new file mode 100755 index 000000000..8124693f7 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/smoothstep.sh @@ -0,0 +1,115 @@ +#!/bin/sh +# +# Generate a discrete lookup table for a sigmoid function in the smoothstep +# family (https://en.wikipedia.org/wiki/Smoothstep), where the lookup table +# entries correspond to x in [1/nsteps, 2/nsteps, ..., nsteps/nsteps]. Encode +# the entries using a binary fixed point representation. +# +# Usage: smoothstep.sh <variant> <nsteps> <bfp> <xprec> <yprec> +# +# <variant> is in {smooth, smoother, smoothest}. +# <nsteps> must be greater than zero. +# <bfp> must be in [0..62]; reasonable values are roughly [10..30]. +# <xprec> is x decimal precision. +# <yprec> is y decimal precision. + +#set -x + +cmd="sh smoothstep.sh $*" +variant=$1 +nsteps=$2 +bfp=$3 +xprec=$4 +yprec=$5 + +case "${variant}" in + smooth) + ;; + smoother) + ;; + smoothest) + ;; + *) + echo "Unsupported variant" + exit 1 + ;; +esac + +smooth() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx _2 lx 3 ^ '*' 3 lx 2 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +smoother() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx 6 lx 5 ^ '*' _15 lx 4 ^ '*' + 10 lx 3 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +smoothest() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx _20 lx 7 ^ '*' 70 lx 6 ^ '*' + _84 lx 5 ^ '*' + 35 lx 4 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +cat <<EOF +/* + * This file was generated by the following command: + * $cmd + */ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * This header defines a precomputed table based on the smoothstep family of + * sigmoidal curves (https://en.wikipedia.org/wiki/Smoothstep) that grow from 0 + * to 1 in 0 <= x <= 1. The table is stored as integer fixed point values so + * that floating point math can be avoided. + * + * 3 2 + * smoothstep(x) = -2x + 3x + * + * 5 4 3 + * smootherstep(x) = 6x - 15x + 10x + * + * 7 6 5 4 + * smootheststep(x) = -20x + 70x - 84x + 35x + */ + +#define SMOOTHSTEP_VARIANT "${variant}" +#define SMOOTHSTEP_NSTEPS ${nsteps} +#define SMOOTHSTEP_BFP ${bfp} +#define SMOOTHSTEP \\ + /* STEP(step, h, x, y) */ \\ +EOF + +s=1 +while [ $s -le $nsteps ] ; do + $variant ${s} + x=`echo ${xprec} k ${s} ${nsteps} / p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + printf ' STEP(%4d, UINT64_C(0x%016x), %s, %s) \\\n' ${s} ${h} ${x} ${y} + + s=$((s+1)) +done +echo + +cat <<EOF +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +EOF diff --git a/memory/jemalloc/src/include/jemalloc/internal/spin.h b/memory/jemalloc/src/include/jemalloc/internal/spin.h new file mode 100644 index 000000000..9ef5ceb92 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/spin.h @@ -0,0 +1,51 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct spin_s spin_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct spin_s { + unsigned iteration; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void spin_init(spin_t *spin); +void spin_adaptive(spin_t *spin); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_)) +JEMALLOC_INLINE void +spin_init(spin_t *spin) +{ + + spin->iteration = 0; +} + +JEMALLOC_INLINE void +spin_adaptive(spin_t *spin) +{ + volatile uint64_t i; + + for (i = 0; i < (KQU(1) << spin->iteration); i++) + CPU_SPINWAIT; + + if (spin->iteration < 63) + spin->iteration++; +} + +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/memory/jemalloc/src/include/jemalloc/internal/stats.h b/memory/jemalloc/src/include/jemalloc/internal/stats.h new file mode 100644 index 000000000..b62181783 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/stats.h @@ -0,0 +1,201 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct tcache_bin_stats_s tcache_bin_stats_t; +typedef struct malloc_bin_stats_s malloc_bin_stats_t; +typedef struct malloc_large_stats_s malloc_large_stats_t; +typedef struct malloc_huge_stats_s malloc_huge_stats_t; +typedef struct arena_stats_s arena_stats_t; +typedef struct chunk_stats_s chunk_stats_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct tcache_bin_stats_s { + /* + * Number of allocation requests that corresponded to the size of this + * bin. + */ + uint64_t nrequests; +}; + +struct malloc_bin_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the bin. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to the size of this + * bin. This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + + /* + * Current number of regions of this size class, including regions + * currently cached by tcache. + */ + size_t curregs; + + /* Number of tcache fills from this bin. */ + uint64_t nfills; + + /* Number of tcache flushes to this bin. */ + uint64_t nflushes; + + /* Total number of runs created for this bin's size class. */ + uint64_t nruns; + + /* + * Total number of runs reused by extracting them from the runs tree for + * this bin's size class. + */ + uint64_t reruns; + + /* Current number of runs in this bin. */ + size_t curruns; +}; + +struct malloc_large_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the arena. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to this size class. + * This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + + /* + * Current number of runs of this size class, including runs currently + * cached by tcache. + */ + size_t curruns; +}; + +struct malloc_huge_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the arena. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* Current number of (multi-)chunk allocations of this size class. */ + size_t curhchunks; +}; + +struct arena_stats_s { + /* Number of bytes currently mapped. */ + size_t mapped; + + /* + * Number of bytes currently retained as a side effect of munmap() being + * disabled/bypassed. Retained bytes are technically mapped (though + * always decommitted or purged), but they are excluded from the mapped + * statistic (above). + */ + size_t retained; + + /* + * Total number of purge sweeps, total number of madvise calls made, + * and total pages purged in order to keep dirty unused memory under + * control. + */ + uint64_t npurge; + uint64_t nmadvise; + uint64_t purged; + + /* + * Number of bytes currently mapped purely for metadata purposes, and + * number of bytes currently allocated for internal metadata. + */ + size_t metadata_mapped; + size_t metadata_allocated; /* Protected via atomic_*_z(). */ + + /* Per-size-category statistics. */ + size_t allocated_large; + uint64_t nmalloc_large; + uint64_t ndalloc_large; + uint64_t nrequests_large; + + size_t allocated_huge; + uint64_t nmalloc_huge; + uint64_t ndalloc_huge; + + /* One element for each large size class. */ + malloc_large_stats_t *lstats; + + /* One element for each huge size class. */ + malloc_huge_stats_t *hstats; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_stats_print; + +extern size_t stats_cactive; + +void stats_print(void (*write)(void *, const char *), void *cbopaque, + const char *opts); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +size_t stats_cactive_get(void); +void stats_cactive_add(size_t size); +void stats_cactive_sub(size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_)) +JEMALLOC_INLINE size_t +stats_cactive_get(void) +{ + + return (atomic_read_z(&stats_cactive)); +} + +JEMALLOC_INLINE void +stats_cactive_add(size_t size) +{ + UNUSED size_t cactive; + + assert(size > 0); + assert((size & chunksize_mask) == 0); + + cactive = atomic_add_z(&stats_cactive, size); + assert(cactive - size < cactive); +} + +JEMALLOC_INLINE void +stats_cactive_sub(size_t size) +{ + UNUSED size_t cactive; + + assert(size > 0); + assert((size & chunksize_mask) == 0); + + cactive = atomic_sub_z(&stats_cactive, size); + assert(cactive + size > cactive); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/tcache.h b/memory/jemalloc/src/include/jemalloc/internal/tcache.h new file mode 100644 index 000000000..01ba062de --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/tcache.h @@ -0,0 +1,469 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct tcache_bin_info_s tcache_bin_info_t; +typedef struct tcache_bin_s tcache_bin_t; +typedef struct tcache_s tcache_t; +typedef struct tcaches_s tcaches_t; + +/* + * tcache pointers close to NULL are used to encode state information that is + * used for two purposes: preventing thread caching on a per thread basis and + * cleaning up during thread shutdown. + */ +#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1) +#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2) +#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3) +#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY + +/* + * Absolute minimum number of cache slots for each small bin. + */ +#define TCACHE_NSLOTS_SMALL_MIN 20 + +/* + * Absolute maximum number of cache slots for each small bin in the thread + * cache. This is an additional constraint beyond that imposed as: twice the + * number of regions per run for this size class. + * + * This constant must be an even number. + */ +#define TCACHE_NSLOTS_SMALL_MAX 200 + +/* Number of cache slots for large size classes. */ +#define TCACHE_NSLOTS_LARGE 20 + +/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ +#define LG_TCACHE_MAXCLASS_DEFAULT 15 + +/* + * TCACHE_GC_SWEEP is the approximate number of allocation events between + * full GC sweeps. Integer rounding may cause the actual number to be + * slightly higher, since GC is performed incrementally. + */ +#define TCACHE_GC_SWEEP 8192 + +/* Number of tcache allocation/deallocation events between incremental GCs. */ +#define TCACHE_GC_INCR \ + ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +typedef enum { + tcache_enabled_false = 0, /* Enable cast to/from bool. */ + tcache_enabled_true = 1, + tcache_enabled_default = 2 +} tcache_enabled_t; + +/* + * Read-only information associated with each element of tcache_t's tbins array + * is stored separately, mainly to reduce memory usage. + */ +struct tcache_bin_info_s { + unsigned ncached_max; /* Upper limit on ncached. */ +}; + +struct tcache_bin_s { + tcache_bin_stats_t tstats; + int low_water; /* Min # cached since last GC. */ + unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ + unsigned ncached; /* # of cached objects. */ + /* + * To make use of adjacent cacheline prefetch, the items in the avail + * stack goes to higher address for newer allocations. avail points + * just above the available space, which means that + * avail[-ncached, ... -1] are available items and the lowest item will + * be allocated first. + */ + void **avail; /* Stack of available objects. */ +}; + +struct tcache_s { + ql_elm(tcache_t) link; /* Used for aggregating stats. */ + uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ + ticker_t gc_ticker; /* Drives incremental GC. */ + szind_t next_gc_bin; /* Next bin to GC. */ + tcache_bin_t tbins[1]; /* Dynamically sized. */ + /* + * The pointer stacks associated with tbins follow as a contiguous + * array. During tcache initialization, the avail pointer in each + * element of tbins is initialized to point to the proper offset within + * this array. + */ +}; + +/* Linkage for list of available (previously used) explicit tcache IDs. */ +struct tcaches_s { + union { + tcache_t *tcache; + tcaches_t *next; + }; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_tcache; +extern ssize_t opt_lg_tcache_max; + +extern tcache_bin_info_t *tcache_bin_info; + +/* + * Number of tcache bins. There are NBINS small-object bins, plus 0 or more + * large-object bins. + */ +extern unsigned nhbins; + +/* Maximum cached size class. */ +extern size_t tcache_maxclass; + +/* + * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and + * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are + * completely disjoint from this data structure. tcaches starts off as a sparse + * array, so it has no physical memory footprint until individual pages are + * touched. This allows the entire array to be allocated the first time an + * explicit tcache is created without a disproportionate impact on memory usage. + */ +extern tcaches_t *tcaches; + +size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); +void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); +void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, + tcache_bin_t *tbin, szind_t binind, bool *tcache_success); +void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, + szind_t binind, unsigned rem); +void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, + unsigned rem, tcache_t *tcache); +void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, + arena_t *oldarena, arena_t *newarena); +tcache_t *tcache_get_hard(tsd_t *tsd); +tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena); +void tcache_cleanup(tsd_t *tsd); +void tcache_enabled_cleanup(tsd_t *tsd); +void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); +bool tcaches_create(tsd_t *tsd, unsigned *r_ind); +void tcaches_flush(tsd_t *tsd, unsigned ind); +void tcaches_destroy(tsd_t *tsd, unsigned ind); +bool tcache_boot(tsdn_t *tsdn); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void tcache_event(tsd_t *tsd, tcache_t *tcache); +void tcache_flush(void); +bool tcache_enabled_get(void); +tcache_t *tcache_get(tsd_t *tsd, bool create); +void tcache_enabled_set(bool enabled); +void *tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success); +void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + size_t size, szind_t ind, bool zero, bool slow_path); +void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + size_t size, szind_t ind, bool zero, bool slow_path); +void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, + szind_t binind, bool slow_path); +void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, + size_t size, bool slow_path); +tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) +JEMALLOC_INLINE void +tcache_flush(void) +{ + tsd_t *tsd; + + cassert(config_tcache); + + tsd = tsd_fetch(); + tcache_cleanup(tsd); +} + +JEMALLOC_INLINE bool +tcache_enabled_get(void) +{ + tsd_t *tsd; + tcache_enabled_t tcache_enabled; + + cassert(config_tcache); + + tsd = tsd_fetch(); + tcache_enabled = tsd_tcache_enabled_get(tsd); + if (tcache_enabled == tcache_enabled_default) { + tcache_enabled = (tcache_enabled_t)opt_tcache; + tsd_tcache_enabled_set(tsd, tcache_enabled); + } + + return ((bool)tcache_enabled); +} + +JEMALLOC_INLINE void +tcache_enabled_set(bool enabled) +{ + tsd_t *tsd; + tcache_enabled_t tcache_enabled; + + cassert(config_tcache); + + tsd = tsd_fetch(); + + tcache_enabled = (tcache_enabled_t)enabled; + tsd_tcache_enabled_set(tsd, tcache_enabled); + + if (!enabled) + tcache_cleanup(tsd); +} + +JEMALLOC_ALWAYS_INLINE tcache_t * +tcache_get(tsd_t *tsd, bool create) +{ + tcache_t *tcache; + + if (!config_tcache) + return (NULL); + + tcache = tsd_tcache_get(tsd); + if (!create) + return (tcache); + if (unlikely(tcache == NULL) && tsd_nominal(tsd)) { + tcache = tcache_get_hard(tsd); + tsd_tcache_set(tsd, tcache); + } + + return (tcache); +} + +JEMALLOC_ALWAYS_INLINE void +tcache_event(tsd_t *tsd, tcache_t *tcache) +{ + + if (TCACHE_GC_INCR == 0) + return; + + if (unlikely(ticker_tick(&tcache->gc_ticker))) + tcache_event_hard(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) +{ + void *ret; + + if (unlikely(tbin->ncached == 0)) { + tbin->low_water = -1; + *tcache_success = false; + return (NULL); + } + /* + * tcache_success (instead of ret) should be checked upon the return of + * this function. We avoid checking (ret == NULL) because there is + * never a null stored on the avail stack (which is unknown to the + * compiler), and eagerly checking ret would cause pipeline stall + * (waiting for the cacheline). + */ + *tcache_success = true; + ret = *(tbin->avail - tbin->ncached); + tbin->ncached--; + + if (unlikely((int)tbin->ncached < tbin->low_water)) + tbin->low_water = tbin->ncached; + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, + szind_t binind, bool zero, bool slow_path) +{ + void *ret; + tcache_bin_t *tbin; + bool tcache_success; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + + assert(binind < NBINS); + tbin = &tcache->tbins[binind]; + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { + bool tcache_hard_success; + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, + tbin, binind, &tcache_hard_success); + if (tcache_hard_success == false) + return (NULL); + } + + assert(ret); + /* + * Only compute usize if required. The checks in the following if + * statement are all static. + */ + if (config_prof || (slow_path && config_fill) || unlikely(zero)) { + usize = index2size(binind); + assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize); + } + + if (likely(!zero)) { + if (slow_path && config_fill) { + if (unlikely(opt_junk_alloc)) { + arena_alloc_junk_small(ret, + &arena_bin_info[binind], false); + } else if (unlikely(opt_zero)) + memset(ret, 0, usize); + } + } else { + if (slow_path && config_fill && unlikely(opt_junk_alloc)) { + arena_alloc_junk_small(ret, &arena_bin_info[binind], + true); + } + memset(ret, 0, usize); + } + + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += usize; + tcache_event(tsd, tcache); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, + szind_t binind, bool zero, bool slow_path) +{ + void *ret; + tcache_bin_t *tbin; + bool tcache_success; + + assert(binind < nhbins); + tbin = &tcache->tbins[binind]; + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { + /* + * Only allocate one large object at a time, because it's quite + * expensive to create one and not use it. + */ + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + ret = arena_malloc_large(tsd_tsdn(tsd), arena, binind, zero); + if (ret == NULL) + return (NULL); + } else { + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + + /* Only compute usize on demand */ + if (config_prof || (slow_path && config_fill) || + unlikely(zero)) { + usize = index2size(binind); + assert(usize <= tcache_maxclass); + } + + if (config_prof && usize == LARGE_MINCLASS) { + arena_chunk_t *chunk = + (arena_chunk_t *)CHUNK_ADDR2BASE(ret); + size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> + LG_PAGE); + arena_mapbits_large_binind_set(chunk, pageind, + BININD_INVALID); + } + if (likely(!zero)) { + if (slow_path && config_fill) { + if (unlikely(opt_junk_alloc)) { + memset(ret, JEMALLOC_ALLOC_JUNK, + usize); + } else if (unlikely(opt_zero)) + memset(ret, 0, usize); + } + } else + memset(ret, 0, usize); + + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += usize; + } + + tcache_event(tsd, tcache); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, + bool slow_path) +{ + tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; + + assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS); + + if (slow_path && config_fill && unlikely(opt_junk_free)) + arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); + + tbin = &tcache->tbins[binind]; + tbin_info = &tcache_bin_info[binind]; + if (unlikely(tbin->ncached == tbin_info->ncached_max)) { + tcache_bin_flush_small(tsd, tcache, tbin, binind, + (tbin_info->ncached_max >> 1)); + } + assert(tbin->ncached < tbin_info->ncached_max); + tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; + + tcache_event(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE void +tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size, + bool slow_path) +{ + szind_t binind; + tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; + + assert((size & PAGE_MASK) == 0); + assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS); + assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass); + + binind = size2index(size); + + if (slow_path && config_fill && unlikely(opt_junk_free)) + arena_dalloc_junk_large(ptr, size); + + tbin = &tcache->tbins[binind]; + tbin_info = &tcache_bin_info[binind]; + if (unlikely(tbin->ncached == tbin_info->ncached_max)) { + tcache_bin_flush_large(tsd, tbin, binind, + (tbin_info->ncached_max >> 1), tcache); + } + assert(tbin->ncached < tbin_info->ncached_max); + tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; + + tcache_event(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE tcache_t * +tcaches_get(tsd_t *tsd, unsigned ind) +{ + tcaches_t *elm = &tcaches[ind]; + if (unlikely(elm->tcache == NULL)) { + elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd, + NULL)); + } + return (elm->tcache); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/ticker.h b/memory/jemalloc/src/include/jemalloc/internal/ticker.h new file mode 100644 index 000000000..4696e56d2 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/ticker.h @@ -0,0 +1,75 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ticker_s ticker_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct ticker_s { + int32_t tick; + int32_t nticks; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void ticker_init(ticker_t *ticker, int32_t nticks); +void ticker_copy(ticker_t *ticker, const ticker_t *other); +int32_t ticker_read(const ticker_t *ticker); +bool ticker_ticks(ticker_t *ticker, int32_t nticks); +bool ticker_tick(ticker_t *ticker); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TICKER_C_)) +JEMALLOC_INLINE void +ticker_init(ticker_t *ticker, int32_t nticks) +{ + + ticker->tick = nticks; + ticker->nticks = nticks; +} + +JEMALLOC_INLINE void +ticker_copy(ticker_t *ticker, const ticker_t *other) +{ + + *ticker = *other; +} + +JEMALLOC_INLINE int32_t +ticker_read(const ticker_t *ticker) +{ + + return (ticker->tick); +} + +JEMALLOC_INLINE bool +ticker_ticks(ticker_t *ticker, int32_t nticks) +{ + + if (unlikely(ticker->tick < nticks)) { + ticker->tick = ticker->nticks; + return (true); + } + ticker->tick -= nticks; + return(false); +} + +JEMALLOC_INLINE bool +ticker_tick(ticker_t *ticker) +{ + + return (ticker_ticks(ticker, 1)); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/tsd.h b/memory/jemalloc/src/include/jemalloc/internal/tsd.h new file mode 100644 index 000000000..9055acafd --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/tsd.h @@ -0,0 +1,787 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Maximum number of malloc_tsd users with cleanup functions. */ +#define MALLOC_TSD_CLEANUPS_MAX 2 + +typedef bool (*malloc_tsd_cleanup_t)(void); + +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +typedef struct tsd_init_block_s tsd_init_block_t; +typedef struct tsd_init_head_s tsd_init_head_t; +#endif + +typedef struct tsd_s tsd_t; +typedef struct tsdn_s tsdn_t; + +#define TSDN_NULL ((tsdn_t *)0) + +typedef enum { + tsd_state_uninitialized, + tsd_state_nominal, + tsd_state_purgatory, + tsd_state_reincarnated +} tsd_state_t; + +/* + * TLS/TSD-agnostic macro-based implementation of thread-specific data. There + * are five macros that support (at least) three use cases: file-private, + * library-private, and library-private inlined. Following is an example + * library-private tsd variable: + * + * In example.h: + * typedef struct { + * int x; + * int y; + * } example_t; + * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) + * malloc_tsd_types(example_, example_t) + * malloc_tsd_protos(, example_, example_t) + * malloc_tsd_externs(example_, example_t) + * In example.c: + * malloc_tsd_data(, example_, example_t, EX_INITIALIZER) + * malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER, + * example_tsd_cleanup) + * + * The result is a set of generated functions, e.g.: + * + * bool example_tsd_boot(void) {...} + * bool example_tsd_booted_get(void) {...} + * example_t *example_tsd_get(bool init) {...} + * void example_tsd_set(example_t *val) {...} + * + * Note that all of the functions deal in terms of (a_type *) rather than + * (a_type) so that it is possible to support non-pointer types (unlike + * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is + * cast to (void *). This means that the cleanup function needs to cast the + * function argument to (a_type *), then dereference the resulting pointer to + * access fields, e.g. + * + * void + * example_tsd_cleanup(void *arg) + * { + * example_t *example = (example_t *)arg; + * + * example->x = 42; + * [...] + * if ([want the cleanup function to be called again]) + * example_tsd_set(example); + * } + * + * If example_tsd_set() is called within example_tsd_cleanup(), it will be + * called again. This is similar to how pthreads TSD destruction works, except + * that pthreads only calls the cleanup function again if the value was set to + * non-NULL. + */ + +/* malloc_tsd_types(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_types(a_name, a_type) +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_types(a_name, a_type) +#elif (defined(_WIN32)) +#define malloc_tsd_types(a_name, a_type) \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##tsd_wrapper_t; +#else +#define malloc_tsd_types(a_name, a_type) \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##tsd_wrapper_t; +#endif + +/* malloc_tsd_protos(). */ +#define malloc_tsd_protos(a_attr, a_name, a_type) \ +a_attr bool \ +a_name##tsd_boot0(void); \ +a_attr void \ +a_name##tsd_boot1(void); \ +a_attr bool \ +a_name##tsd_boot(void); \ +a_attr bool \ +a_name##tsd_booted_get(void); \ +a_attr a_type * \ +a_name##tsd_get(bool init); \ +a_attr void \ +a_name##tsd_set(a_type *val); + +/* malloc_tsd_externs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##tsd_tls; \ +extern __thread bool a_name##tsd_initialized; \ +extern bool a_name##tsd_booted; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##tsd_tls; \ +extern pthread_key_t a_name##tsd_tsd; \ +extern bool a_name##tsd_booted; +#elif (defined(_WIN32)) +#define malloc_tsd_externs(a_name, a_type) \ +extern DWORD a_name##tsd_tsd; \ +extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ +extern bool a_name##tsd_booted; +#else +#define malloc_tsd_externs(a_name, a_type) \ +extern pthread_key_t a_name##tsd_tsd; \ +extern tsd_init_head_t a_name##tsd_init_head; \ +extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ +extern bool a_name##tsd_booted; +#endif + +/* malloc_tsd_data(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ + a_name##tsd_tls = a_initializer; \ +a_attr __thread bool JEMALLOC_TLS_MODEL \ + a_name##tsd_initialized = false; \ +a_attr bool a_name##tsd_booted = false; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ + a_name##tsd_tls = a_initializer; \ +a_attr pthread_key_t a_name##tsd_tsd; \ +a_attr bool a_name##tsd_booted = false; +#elif (defined(_WIN32)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr DWORD a_name##tsd_tsd; \ +a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ + false, \ + a_initializer \ +}; \ +a_attr bool a_name##tsd_booted = false; +#else +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr pthread_key_t a_name##tsd_tsd; \ +a_attr tsd_init_head_t a_name##tsd_init_head = { \ + ql_head_initializer(blocks), \ + MALLOC_MUTEX_INITIALIZER \ +}; \ +a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ + false, \ + a_initializer \ +}; \ +a_attr bool a_name##tsd_booted = false; +#endif + +/* malloc_tsd_funcs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##tsd_cleanup_wrapper(void) \ +{ \ + \ + if (a_name##tsd_initialized) { \ + a_name##tsd_initialized = false; \ + a_cleanup(&a_name##tsd_tls); \ + } \ + return (a_name##tsd_initialized); \ +} \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##tsd_cleanup_wrapper); \ + } \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + \ + /* Do nothing. */ \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (a_name##tsd_boot0()); \ +} \ +a_attr bool \ +a_name##tsd_booted_get(void) \ +{ \ + \ + return (a_name##tsd_booted); \ +} \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##tsd_get(bool init) \ +{ \ + \ + assert(a_name##tsd_booted); \ + return (&a_name##tsd_tls); \ +} \ +a_attr void \ +a_name##tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##tsd_booted); \ + a_name##tsd_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + a_name##tsd_initialized = true; \ +} +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) != \ + 0) \ + return (true); \ + } \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + \ + /* Do nothing. */ \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (a_name##tsd_boot0()); \ +} \ +a_attr bool \ +a_name##tsd_booted_get(void) \ +{ \ + \ + return (a_name##tsd_booted); \ +} \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##tsd_get(bool init) \ +{ \ + \ + assert(a_name##tsd_booted); \ + return (&a_name##tsd_tls); \ +} \ +a_attr void \ +a_name##tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##tsd_booted); \ + a_name##tsd_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_setspecific(a_name##tsd_tsd, \ + (void *)(&a_name##tsd_tls))) { \ + malloc_write("<jemalloc>: Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + } \ +} +#elif (defined(_WIN32)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##tsd_cleanup_wrapper(void) \ +{ \ + DWORD error = GetLastError(); \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + TlsGetValue(a_name##tsd_tsd); \ + SetLastError(error); \ + \ + if (wrapper == NULL) \ + return (false); \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + wrapper->initialized = false; \ + a_cleanup(&wrapper->val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + return (true); \ + } \ + } \ + malloc_tsd_dalloc(wrapper); \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ +{ \ + \ + if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \ + malloc_write("<jemalloc>: Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ +} \ +a_attr a_name##tsd_wrapper_t * \ +a_name##tsd_wrapper_get(bool init) \ +{ \ + DWORD error = GetLastError(); \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + TlsGetValue(a_name##tsd_tsd); \ + SetLastError(error); \ + \ + if (init && unlikely(wrapper == NULL)) { \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write("<jemalloc>: Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } else { \ + wrapper->initialized = false; \ + wrapper->val = a_initializer; \ + } \ + a_name##tsd_wrapper_set(wrapper); \ + } \ + return (wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + a_name##tsd_tsd = TlsAlloc(); \ + if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \ + return (true); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##tsd_cleanup_wrapper); \ + } \ + a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write("<jemalloc>: Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + memcpy(wrapper, &a_name##tsd_boot_wrapper, \ + sizeof(a_name##tsd_wrapper_t)); \ + a_name##tsd_wrapper_set(wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + if (a_name##tsd_boot0()) \ + return (true); \ + a_name##tsd_boot1(); \ + return (false); \ +} \ +a_attr bool \ +a_name##tsd_booted_get(void) \ +{ \ + \ + return (a_name##tsd_booted); \ +} \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (true); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##tsd_get(bool init) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(init); \ + if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ + return (NULL); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##tsd_set(a_type *val) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(true); \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} +#else +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr void \ +a_name##tsd_cleanup_wrapper(void *arg) \ +{ \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)arg; \ + \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + wrapper->initialized = false; \ + a_cleanup(&wrapper->val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + if (pthread_setspecific(a_name##tsd_tsd, \ + (void *)wrapper)) { \ + malloc_write("<jemalloc>: Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + return; \ + } \ + } \ + malloc_tsd_dalloc(wrapper); \ +} \ +a_attr void \ +a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ +{ \ + \ + if (pthread_setspecific(a_name##tsd_tsd, \ + (void *)wrapper)) { \ + malloc_write("<jemalloc>: Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ +} \ +a_attr a_name##tsd_wrapper_t * \ +a_name##tsd_wrapper_get(bool init) \ +{ \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + pthread_getspecific(a_name##tsd_tsd); \ + \ + if (init && unlikely(wrapper == NULL)) { \ + tsd_init_block_t block; \ + wrapper = tsd_init_check_recursion( \ + &a_name##tsd_init_head, &block); \ + if (wrapper) \ + return (wrapper); \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + block.data = wrapper; \ + if (wrapper == NULL) { \ + malloc_write("<jemalloc>: Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } else { \ + wrapper->initialized = false; \ + wrapper->val = a_initializer; \ + } \ + a_name##tsd_wrapper_set(wrapper); \ + tsd_init_finish(&a_name##tsd_init_head, &block); \ + } \ + return (wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + if (pthread_key_create(&a_name##tsd_tsd, \ + a_name##tsd_cleanup_wrapper) != 0) \ + return (true); \ + a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write("<jemalloc>: Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + memcpy(wrapper, &a_name##tsd_boot_wrapper, \ + sizeof(a_name##tsd_wrapper_t)); \ + a_name##tsd_wrapper_set(wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + if (a_name##tsd_boot0()) \ + return (true); \ + a_name##tsd_boot1(); \ + return (false); \ +} \ +a_attr bool \ +a_name##tsd_booted_get(void) \ +{ \ + \ + return (a_name##tsd_booted); \ +} \ +a_attr bool \ +a_name##tsd_get_allocates(void) \ +{ \ + \ + return (true); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##tsd_get(bool init) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(init); \ + if (a_name##tsd_get_allocates() && !init && wrapper == NULL) \ + return (NULL); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##tsd_set(a_type *val) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(true); \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +struct tsd_init_block_s { + ql_elm(tsd_init_block_t) link; + pthread_t thread; + void *data; +}; +struct tsd_init_head_s { + ql_head(tsd_init_block_t) blocks; + malloc_mutex_t lock; +}; +#endif + +#define MALLOC_TSD \ +/* O(name, type) */ \ + O(tcache, tcache_t *) \ + O(thread_allocated, uint64_t) \ + O(thread_deallocated, uint64_t) \ + O(prof_tdata, prof_tdata_t *) \ + O(iarena, arena_t *) \ + O(arena, arena_t *) \ + O(arenas_tdata, arena_tdata_t *) \ + O(narenas_tdata, unsigned) \ + O(arenas_tdata_bypass, bool) \ + O(tcache_enabled, tcache_enabled_t) \ + O(quarantine, quarantine_t *) \ + O(witnesses, witness_list_t) \ + O(witness_fork, bool) \ + +#define TSD_INITIALIZER { \ + tsd_state_uninitialized, \ + NULL, \ + 0, \ + 0, \ + NULL, \ + NULL, \ + NULL, \ + NULL, \ + 0, \ + false, \ + tcache_enabled_default, \ + NULL, \ + ql_head_initializer(witnesses), \ + false \ +} + +struct tsd_s { + tsd_state_t state; +#define O(n, t) \ + t n; +MALLOC_TSD +#undef O +}; + +/* + * Wrapper around tsd_t that makes it possible to avoid implicit conversion + * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be + * explicitly converted to tsd_t, which is non-nullable. + */ +struct tsdn_s { + tsd_t tsd; +}; + +static const tsd_t tsd_initializer = TSD_INITIALIZER; + +malloc_tsd_types(, tsd_t) + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *malloc_tsd_malloc(size_t size); +void malloc_tsd_dalloc(void *wrapper); +void malloc_tsd_no_cleanup(void *arg); +void malloc_tsd_cleanup_register(bool (*f)(void)); +tsd_t *malloc_tsd_boot0(void); +void malloc_tsd_boot1(void); +#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ + !defined(_WIN32)) +void *tsd_init_check_recursion(tsd_init_head_t *head, + tsd_init_block_t *block); +void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); +#endif +void tsd_cleanup(void *arg); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t) + +tsd_t *tsd_fetch_impl(bool init); +tsd_t *tsd_fetch(void); +tsdn_t *tsd_tsdn(tsd_t *tsd); +bool tsd_nominal(tsd_t *tsd); +#define O(n, t) \ +t *tsd_##n##p_get(tsd_t *tsd); \ +t tsd_##n##_get(tsd_t *tsd); \ +void tsd_##n##_set(tsd_t *tsd, t n); +MALLOC_TSD +#undef O +tsdn_t *tsdn_fetch(void); +bool tsdn_null(const tsdn_t *tsdn); +tsd_t *tsdn_tsd(tsdn_t *tsdn); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_)) +malloc_tsd_externs(, tsd_t) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) + +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch_impl(bool init) +{ + tsd_t *tsd = tsd_get(init); + + if (!init && tsd_get_allocates() && tsd == NULL) + return (NULL); + assert(tsd != NULL); + + if (unlikely(tsd->state != tsd_state_nominal)) { + if (tsd->state == tsd_state_uninitialized) { + tsd->state = tsd_state_nominal; + /* Trigger cleanup handler registration. */ + tsd_set(tsd); + } else if (tsd->state == tsd_state_purgatory) { + tsd->state = tsd_state_reincarnated; + tsd_set(tsd); + } else + assert(tsd->state == tsd_state_reincarnated); + } + + return (tsd); +} + +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch(void) +{ + + return (tsd_fetch_impl(true)); +} + +JEMALLOC_ALWAYS_INLINE tsdn_t * +tsd_tsdn(tsd_t *tsd) +{ + + return ((tsdn_t *)tsd); +} + +JEMALLOC_INLINE bool +tsd_nominal(tsd_t *tsd) +{ + + return (tsd->state == tsd_state_nominal); +} + +#define O(n, t) \ +JEMALLOC_ALWAYS_INLINE t * \ +tsd_##n##p_get(tsd_t *tsd) \ +{ \ + \ + return (&tsd->n); \ +} \ + \ +JEMALLOC_ALWAYS_INLINE t \ +tsd_##n##_get(tsd_t *tsd) \ +{ \ + \ + return (*tsd_##n##p_get(tsd)); \ +} \ + \ +JEMALLOC_ALWAYS_INLINE void \ +tsd_##n##_set(tsd_t *tsd, t n) \ +{ \ + \ + assert(tsd->state == tsd_state_nominal); \ + tsd->n = n; \ +} +MALLOC_TSD +#undef O + +JEMALLOC_ALWAYS_INLINE tsdn_t * +tsdn_fetch(void) +{ + + if (!tsd_booted_get()) + return (NULL); + + return (tsd_tsdn(tsd_fetch_impl(false))); +} + +JEMALLOC_ALWAYS_INLINE bool +tsdn_null(const tsdn_t *tsdn) +{ + + return (tsdn == NULL); +} + +JEMALLOC_ALWAYS_INLINE tsd_t * +tsdn_tsd(tsdn_t *tsdn) +{ + + assert(!tsdn_null(tsdn)); + + return (&tsdn->tsd); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/util.h b/memory/jemalloc/src/include/jemalloc/internal/util.h new file mode 100644 index 000000000..aee00d6d9 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/util.h @@ -0,0 +1,338 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#ifdef _WIN32 +# ifdef _WIN64 +# define FMT64_PREFIX "ll" +# define FMTPTR_PREFIX "ll" +# else +# define FMT64_PREFIX "ll" +# define FMTPTR_PREFIX "" +# endif +# define FMTd32 "d" +# define FMTu32 "u" +# define FMTx32 "x" +# define FMTd64 FMT64_PREFIX "d" +# define FMTu64 FMT64_PREFIX "u" +# define FMTx64 FMT64_PREFIX "x" +# define FMTdPTR FMTPTR_PREFIX "d" +# define FMTuPTR FMTPTR_PREFIX "u" +# define FMTxPTR FMTPTR_PREFIX "x" +#else +# include <inttypes.h> +# define FMTd32 PRId32 +# define FMTu32 PRIu32 +# define FMTx32 PRIx32 +# define FMTd64 PRId64 +# define FMTu64 PRIu64 +# define FMTx64 PRIx64 +# define FMTdPTR PRIdPTR +# define FMTuPTR PRIuPTR +# define FMTxPTR PRIxPTR +#endif + +/* Size of stack-allocated buffer passed to buferror(). */ +#define BUFERROR_BUF 64 + +/* + * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be + * large enough for all possible uses within jemalloc. + */ +#define MALLOC_PRINTF_BUFSIZE 4096 + +/* Junk fill patterns. */ +#define JEMALLOC_ALLOC_JUNK ((uint8_t)0xa5) +#define JEMALLOC_FREE_JUNK ((uint8_t)0x5a) + +/* + * Wrap a cpp argument that contains commas such that it isn't broken up into + * multiple arguments. + */ +#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__ + +/* + * Silence compiler warnings due to uninitialized values. This is used + * wherever the compiler fails to recognize that the variable is never used + * uninitialized. + */ +#ifdef JEMALLOC_CC_SILENCE +# define JEMALLOC_CC_SILENCE_INIT(v) = v +#else +# define JEMALLOC_CC_SILENCE_INIT(v) +#endif + +#ifdef __GNUC__ +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +#else +# define likely(x) !!(x) +# define unlikely(x) !!(x) +#endif + +#if !defined(JEMALLOC_INTERNAL_UNREACHABLE) +# error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure +#endif + +#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE() + +#include "jemalloc/internal/assert.h" + +/* Use to assert a particular configuration, e.g., cassert(config_debug). */ +#define cassert(c) do { \ + if (unlikely(!(c))) \ + not_reached(); \ +} while (0) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +int buferror(int err, char *buf, size_t buflen); +uintmax_t malloc_strtoumax(const char *restrict nptr, + char **restrict endptr, int base); +void malloc_write(const char *s); + +/* + * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating + * point math. + */ +size_t malloc_vsnprintf(char *str, size_t size, const char *format, + va_list ap); +size_t malloc_snprintf(char *str, size_t size, const char *format, ...) + JEMALLOC_FORMAT_PRINTF(3, 4); +void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap); +void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, + const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); +void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +unsigned ffs_llu(unsigned long long bitmap); +unsigned ffs_lu(unsigned long bitmap); +unsigned ffs_u(unsigned bitmap); +unsigned ffs_zu(size_t bitmap); +unsigned ffs_u64(uint64_t bitmap); +unsigned ffs_u32(uint32_t bitmap); +uint64_t pow2_ceil_u64(uint64_t x); +uint32_t pow2_ceil_u32(uint32_t x); +size_t pow2_ceil_zu(size_t x); +unsigned lg_floor(size_t x); +void set_errno(int errnum); +int get_errno(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) + +/* Sanity check. */ +#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \ + || !defined(JEMALLOC_INTERNAL_FFS) +# error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure +#endif + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_llu(unsigned long long bitmap) +{ + + return (JEMALLOC_INTERNAL_FFSLL(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_lu(unsigned long bitmap) +{ + + return (JEMALLOC_INTERNAL_FFSL(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u(unsigned bitmap) +{ + + return (JEMALLOC_INTERNAL_FFS(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_zu(size_t bitmap) +{ + +#if LG_SIZEOF_PTR == LG_SIZEOF_INT + return (ffs_u(bitmap)); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG + return (ffs_lu(bitmap)); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG + return (ffs_llu(bitmap)); +#else +#error No implementation for size_t ffs() +#endif +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u64(uint64_t bitmap) +{ + +#if LG_SIZEOF_LONG == 3 + return (ffs_lu(bitmap)); +#elif LG_SIZEOF_LONG_LONG == 3 + return (ffs_llu(bitmap)); +#else +#error No implementation for 64-bit ffs() +#endif +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u32(uint32_t bitmap) +{ + +#if LG_SIZEOF_INT == 2 + return (ffs_u(bitmap)); +#else +#error No implementation for 32-bit ffs() +#endif + return (ffs_u(bitmap)); +} + +JEMALLOC_INLINE uint64_t +pow2_ceil_u64(uint64_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x |= x >> 32; + x++; + return (x); +} + +JEMALLOC_INLINE uint32_t +pow2_ceil_u32(uint32_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return (x); +} + +/* Compute the smallest power of 2 that is >= x. */ +JEMALLOC_INLINE size_t +pow2_ceil_zu(size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return (pow2_ceil_u64(x)); +#else + return (pow2_ceil_u32(x)); +#endif +} + +#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE unsigned +lg_floor(size_t x) +{ + size_t ret; + + assert(x != 0); + + asm ("bsr %1, %0" + : "=r"(ret) // Outputs. + : "r"(x) // Inputs. + ); + assert(ret < UINT_MAX); + return ((unsigned)ret); +} +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE unsigned +lg_floor(size_t x) +{ + unsigned long ret; + + assert(x != 0); + +#if (LG_SIZEOF_PTR == 3) + _BitScanReverse64(&ret, x); +#elif (LG_SIZEOF_PTR == 2) + _BitScanReverse(&ret, x); +#else +# error "Unsupported type size for lg_floor()" +#endif + assert(ret < UINT_MAX); + return ((unsigned)ret); +} +#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) +JEMALLOC_INLINE unsigned +lg_floor(size_t x) +{ + + assert(x != 0); + +#if (LG_SIZEOF_PTR == LG_SIZEOF_INT) + return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x)); +#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) + return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x)); +#else +# error "Unsupported type size for lg_floor()" +#endif +} +#else +JEMALLOC_INLINE unsigned +lg_floor(size_t x) +{ + + assert(x != 0); + + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); +#if (LG_SIZEOF_PTR == 3) + x |= (x >> 32); +#endif + if (x == SIZE_T_MAX) + return ((8 << LG_SIZEOF_PTR) - 1); + x++; + return (ffs_zu(x) - 2); +} +#endif + +/* Set error code. */ +JEMALLOC_INLINE void +set_errno(int errnum) +{ + +#ifdef _WIN32 + SetLastError(errnum); +#else + errno = errnum; +#endif +} + +/* Get last error code. */ +JEMALLOC_INLINE int +get_errno(void) +{ + +#ifdef _WIN32 + return (GetLastError()); +#else + return (errno); +#endif +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/internal/valgrind.h b/memory/jemalloc/src/include/jemalloc/internal/valgrind.h new file mode 100644 index 000000000..1a8680828 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/valgrind.h @@ -0,0 +1,114 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#ifdef JEMALLOC_VALGRIND +#include <valgrind/valgrind.h> + +/* + * The size that is reported to Valgrind must be consistent through a chain of + * malloc..realloc..realloc calls. Request size isn't recorded anywhere in + * jemalloc, so it is critical that all callers of these macros provide usize + * rather than request size. As a result, buffer overflow detection is + * technically weakened for the standard API, though it is generally accepted + * practice to consider any extra bytes reported by malloc_usable_size() as + * usable space. + */ +#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do { \ + if (unlikely(in_valgrind)) \ + valgrind_make_mem_noaccess(ptr, usize); \ +} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do { \ + if (unlikely(in_valgrind)) \ + valgrind_make_mem_undefined(ptr, usize); \ +} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do { \ + if (unlikely(in_valgrind)) \ + valgrind_make_mem_defined(ptr, usize); \ +} while (0) +/* + * The VALGRIND_MALLOCLIKE_BLOCK() and VALGRIND_RESIZEINPLACE_BLOCK() macro + * calls must be embedded in macros rather than in functions so that when + * Valgrind reports errors, there are no extra stack frames in the backtraces. + */ +#define JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do { \ + if (unlikely(in_valgrind && cond)) { \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsdn, ptr), \ + zero); \ + } \ +} while (0) +#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize, \ + ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ + zero) do { \ + if (unlikely(in_valgrind)) { \ + size_t rzsize = p2rz(tsdn, ptr); \ + \ + if (!maybe_moved || ptr == old_ptr) { \ + VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ + usize, rzsize); \ + if (zero && old_usize < usize) { \ + valgrind_make_mem_defined( \ + (void *)((uintptr_t)ptr + \ + old_usize), usize - old_usize); \ + } \ + } else { \ + if (!old_ptr_maybe_null || old_ptr != NULL) { \ + valgrind_freelike_block(old_ptr, \ + old_rzsize); \ + } \ + if (!ptr_maybe_null || ptr != NULL) { \ + size_t copy_size = (old_usize < usize) \ + ? old_usize : usize; \ + size_t tail_size = usize - copy_size; \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ + rzsize, false); \ + if (copy_size > 0) { \ + valgrind_make_mem_defined(ptr, \ + copy_size); \ + } \ + if (zero && tail_size > 0) { \ + valgrind_make_mem_defined( \ + (void *)((uintptr_t)ptr + \ + copy_size), tail_size); \ + } \ + } \ + } \ + } \ +} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ + if (unlikely(in_valgrind)) \ + valgrind_freelike_block(ptr, rzsize); \ +} while (0) +#else +#define RUNNING_ON_VALGRIND ((unsigned)0) +#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0) +#define JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {} while (0) +#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize, \ + ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ + zero) do {} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#ifdef JEMALLOC_VALGRIND +void valgrind_make_mem_noaccess(void *ptr, size_t usize); +void valgrind_make_mem_undefined(void *ptr, size_t usize); +void valgrind_make_mem_defined(void *ptr, size_t usize); +void valgrind_freelike_block(void *ptr, size_t usize); +#endif + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/memory/jemalloc/src/include/jemalloc/internal/witness.h b/memory/jemalloc/src/include/jemalloc/internal/witness.h new file mode 100644 index 000000000..cdf15d797 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/internal/witness.h @@ -0,0 +1,266 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct witness_s witness_t; +typedef unsigned witness_rank_t; +typedef ql_head(witness_t) witness_list_t; +typedef int witness_comp_t (const witness_t *, const witness_t *); + +/* + * Lock ranks. Witnesses with rank WITNESS_RANK_OMIT are completely ignored by + * the witness machinery. + */ +#define WITNESS_RANK_OMIT 0U + +#define WITNESS_RANK_INIT 1U +#define WITNESS_RANK_CTL 1U +#define WITNESS_RANK_ARENAS 2U + +#define WITNESS_RANK_PROF_DUMP 3U +#define WITNESS_RANK_PROF_BT2GCTX 4U +#define WITNESS_RANK_PROF_TDATAS 5U +#define WITNESS_RANK_PROF_TDATA 6U +#define WITNESS_RANK_PROF_GCTX 7U + +#define WITNESS_RANK_ARENA 8U +#define WITNESS_RANK_ARENA_CHUNKS 9U +#define WITNESS_RANK_ARENA_NODE_CACHE 10 + +#define WITNESS_RANK_BASE 11U + +#define WITNESS_RANK_LEAF 0xffffffffU +#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF +#define WITNESS_RANK_ARENA_HUGE WITNESS_RANK_LEAF +#define WITNESS_RANK_DSS WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT WITNESS_RANK_LEAF + +#define WITNESS_INITIALIZER(rank) {"initializer", rank, NULL, {NULL, NULL}} + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct witness_s { + /* Name, used for printing lock order reversal messages. */ + const char *name; + + /* + * Witness rank, where 0 is lowest and UINT_MAX is highest. Witnesses + * must be acquired in order of increasing rank. + */ + witness_rank_t rank; + + /* + * If two witnesses are of equal rank and they have the samp comp + * function pointer, it is called as a last attempt to differentiate + * between witnesses of equal rank. + */ + witness_comp_t *comp; + + /* Linkage for thread's currently owned locks. */ + ql_elm(witness_t) link; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void witness_init(witness_t *witness, const char *name, witness_rank_t rank, + witness_comp_t *comp); +#ifdef JEMALLOC_JET +typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *); +extern witness_lock_error_t *witness_lock_error; +#else +void witness_lock_error(const witness_list_t *witnesses, + const witness_t *witness); +#endif +#ifdef JEMALLOC_JET +typedef void (witness_owner_error_t)(const witness_t *); +extern witness_owner_error_t *witness_owner_error; +#else +void witness_owner_error(const witness_t *witness); +#endif +#ifdef JEMALLOC_JET +typedef void (witness_not_owner_error_t)(const witness_t *); +extern witness_not_owner_error_t *witness_not_owner_error; +#else +void witness_not_owner_error(const witness_t *witness); +#endif +#ifdef JEMALLOC_JET +typedef void (witness_lockless_error_t)(const witness_list_t *); +extern witness_lockless_error_t *witness_lockless_error; +#else +void witness_lockless_error(const witness_list_t *witnesses); +#endif + +void witnesses_cleanup(tsd_t *tsd); +void witness_fork_cleanup(tsd_t *tsd); +void witness_prefork(tsd_t *tsd); +void witness_postfork_parent(tsd_t *tsd); +void witness_postfork_child(tsd_t *tsd); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +bool witness_owner(tsd_t *tsd, const witness_t *witness); +void witness_assert_owner(tsdn_t *tsdn, const witness_t *witness); +void witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness); +void witness_assert_lockless(tsdn_t *tsdn); +void witness_lock(tsdn_t *tsdn, witness_t *witness); +void witness_unlock(tsdn_t *tsdn, witness_t *witness); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) +JEMALLOC_INLINE bool +witness_owner(tsd_t *tsd, const witness_t *witness) +{ + witness_list_t *witnesses; + witness_t *w; + + witnesses = tsd_witnessesp_get(tsd); + ql_foreach(w, witnesses, link) { + if (w == witness) + return (true); + } + + return (false); +} + +JEMALLOC_INLINE void +witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) +{ + tsd_t *tsd; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + if (witness->rank == WITNESS_RANK_OMIT) + return; + + if (witness_owner(tsd, witness)) + return; + witness_owner_error(witness); +} + +JEMALLOC_INLINE void +witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness) +{ + tsd_t *tsd; + witness_list_t *witnesses; + witness_t *w; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + if (witness->rank == WITNESS_RANK_OMIT) + return; + + witnesses = tsd_witnessesp_get(tsd); + ql_foreach(w, witnesses, link) { + if (w == witness) + witness_not_owner_error(witness); + } +} + +JEMALLOC_INLINE void +witness_assert_lockless(tsdn_t *tsdn) +{ + tsd_t *tsd; + witness_list_t *witnesses; + witness_t *w; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + + witnesses = tsd_witnessesp_get(tsd); + w = ql_last(witnesses, link); + if (w != NULL) + witness_lockless_error(witnesses); +} + +JEMALLOC_INLINE void +witness_lock(tsdn_t *tsdn, witness_t *witness) +{ + tsd_t *tsd; + witness_list_t *witnesses; + witness_t *w; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + if (witness->rank == WITNESS_RANK_OMIT) + return; + + witness_assert_not_owner(tsdn, witness); + + witnesses = tsd_witnessesp_get(tsd); + w = ql_last(witnesses, link); + if (w == NULL) { + /* No other locks; do nothing. */ + } else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) { + /* Forking, and relaxed ranking satisfied. */ + } else if (w->rank > witness->rank) { + /* Not forking, rank order reversal. */ + witness_lock_error(witnesses, witness); + } else if (w->rank == witness->rank && (w->comp == NULL || w->comp != + witness->comp || w->comp(w, witness) > 0)) { + /* + * Missing/incompatible comparison function, or comparison + * function indicates rank order reversal. + */ + witness_lock_error(witnesses, witness); + } + + ql_elm_new(witness, link); + ql_tail_insert(witnesses, witness, link); +} + +JEMALLOC_INLINE void +witness_unlock(tsdn_t *tsdn, witness_t *witness) +{ + tsd_t *tsd; + witness_list_t *witnesses; + + if (!config_debug) + return; + + if (tsdn_null(tsdn)) + return; + tsd = tsdn_tsd(tsdn); + if (witness->rank == WITNESS_RANK_OMIT) + return; + + /* + * Check whether owner before removal, rather than relying on + * witness_assert_owner() to abort, so that unit tests can test this + * function's failure mode without causing undefined behavior. + */ + if (witness_owner(tsd, witness)) { + witnesses = tsd_witnessesp_get(tsd); + ql_remove(witnesses, witness, link); + } else + witness_assert_owner(tsdn, witness); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/memory/jemalloc/src/include/jemalloc/jemalloc.sh b/memory/jemalloc/src/include/jemalloc/jemalloc.sh new file mode 100755 index 000000000..c085814f2 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/jemalloc.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +objroot=$1 + +cat <<EOF +#ifndef JEMALLOC_H_ +#define JEMALLOC_H_ +#ifdef __cplusplus +extern "C" { +#endif + +EOF + +for hdr in jemalloc_defs.h jemalloc_rename.h jemalloc_macros.h \ + jemalloc_protos.h jemalloc_typedefs.h jemalloc_mangle.h ; do + cat "${objroot}include/jemalloc/${hdr}" \ + | grep -v 'Generated from .* by configure\.' \ + | sed -e 's/^#define /#define /g' \ + | sed -e 's/ $//g' + echo +done + +cat <<EOF +#ifdef __cplusplus +} +#endif +#endif /* JEMALLOC_H_ */ +EOF diff --git a/memory/jemalloc/src/include/jemalloc/jemalloc_defs.h.in b/memory/jemalloc/src/include/jemalloc/jemalloc_defs.h.in new file mode 100644 index 000000000..6d89435c2 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/jemalloc_defs.h.in @@ -0,0 +1,45 @@ +/* Defined if __attribute__((...)) syntax is supported. */ +#undef JEMALLOC_HAVE_ATTR + +/* Defined if alloc_size attribute is supported. */ +#undef JEMALLOC_HAVE_ATTR_ALLOC_SIZE + +/* Defined if format(gnu_printf, ...) attribute is supported. */ +#undef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF + +/* Defined if format(printf, ...) attribute is supported. */ +#undef JEMALLOC_HAVE_ATTR_FORMAT_PRINTF + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#undef JEMALLOC_OVERRIDE_MEMALIGN +#undef JEMALLOC_OVERRIDE_VALLOC + +/* + * At least Linux omits the "const" in: + * + * size_t malloc_usable_size(const void *ptr); + * + * Match the operating system's prototype. + */ +#undef JEMALLOC_USABLE_SIZE_CONST + +/* + * If defined, specify throw() for the public function prototypes when compiling + * with C++. The only justification for this is to match the prototypes that + * glibc defines. + */ +#undef JEMALLOC_USE_CXX_THROW + +#ifdef _MSC_VER +# ifdef _WIN64 +# define LG_SIZEOF_PTR_WIN 3 +# else +# define LG_SIZEOF_PTR_WIN 2 +# endif +#endif + +/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ +#undef LG_SIZEOF_PTR diff --git a/memory/jemalloc/src/include/jemalloc/jemalloc_macros.h.in b/memory/jemalloc/src/include/jemalloc/jemalloc_macros.h.in new file mode 100644 index 000000000..129240ed9 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/jemalloc_macros.h.in @@ -0,0 +1,103 @@ +#include <stdlib.h> +#include <stdbool.h> +#include <stdint.h> +#include <limits.h> +#include <strings.h> + +#define JEMALLOC_VERSION "@jemalloc_version@" +#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@ +#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@ +#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@ +#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ +#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" + +# define MALLOCX_LG_ALIGN(la) ((int)(la)) +# if LG_SIZEOF_PTR == 2 +# define MALLOCX_ALIGN(a) ((int)(ffs((int)(a))-1)) +# else +# define MALLOCX_ALIGN(a) \ + ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ + ffs((int)(((size_t)(a))>>32))+31)) +# endif +# define MALLOCX_ZERO ((int)0x40) +/* + * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 + * encodes MALLOCX_TCACHE_NONE. + */ +# define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) +# define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) +/* + * Bias arena index bits so that 0 encodes "use an automatically chosen arena". + */ +# define MALLOCX_ARENA(a) ((((int)(a))+1) << 20) + +#if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) +# define JEMALLOC_CXX_THROW throw() +#else +# define JEMALLOC_CXX_THROW +#endif + +#if _MSC_VER +# define JEMALLOC_ATTR(s) +# define JEMALLOC_ALIGNED(s) __declspec(align(s)) +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# ifndef JEMALLOC_EXPORT +# ifdef DLLEXPORT +# define JEMALLOC_EXPORT __declspec(dllexport) +# else +# define JEMALLOC_EXPORT __declspec(dllimport) +# endif +# endif +# define JEMALLOC_FORMAT_PRINTF(s, i) +# define JEMALLOC_NOINLINE __declspec(noinline) +# ifdef __cplusplus +# define JEMALLOC_NOTHROW __declspec(nothrow) +# else +# define JEMALLOC_NOTHROW +# endif +# define JEMALLOC_SECTION(s) __declspec(allocate(s)) +# define JEMALLOC_RESTRICT_RETURN __declspec(restrict) +# if _MSC_VER >= 1900 && !defined(__EDG__) +# define JEMALLOC_ALLOCATOR __declspec(allocator) +# else +# define JEMALLOC_ALLOCATOR +# endif +#elif defined(JEMALLOC_HAVE_ATTR) +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE +# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) +# else +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# endif +# ifndef JEMALLOC_EXPORT +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# endif +# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) +# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) +# else +# define JEMALLOC_FORMAT_PRINTF(s, i) +# endif +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_RESTRICT_RETURN +# define JEMALLOC_ALLOCATOR +#else +# define JEMALLOC_ATTR(s) +# define JEMALLOC_ALIGNED(s) +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# define JEMALLOC_EXPORT +# define JEMALLOC_FORMAT_PRINTF(s, i) +# define JEMALLOC_NOINLINE +# define JEMALLOC_NOTHROW +# define JEMALLOC_SECTION(s) +# define JEMALLOC_RESTRICT_RETURN +# define JEMALLOC_ALLOCATOR +#endif diff --git a/memory/jemalloc/src/include/jemalloc/jemalloc_mangle.sh b/memory/jemalloc/src/include/jemalloc/jemalloc_mangle.sh new file mode 100755 index 000000000..df328b78d --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/jemalloc_mangle.sh @@ -0,0 +1,45 @@ +#!/bin/sh + +public_symbols_txt=$1 +symbol_prefix=$2 + +cat <<EOF +/* + * By default application code must explicitly refer to mangled symbol names, + * so that it is possible to use jemalloc in conjunction with another allocator + * in the same application. Define JEMALLOC_MANGLE in order to cause automatic + * name mangling that matches the API prefixing that happened as a result of + * --with-mangling and/or --with-jemalloc-prefix configuration settings. + */ +#ifdef JEMALLOC_MANGLE +# ifndef JEMALLOC_NO_DEMANGLE +# define JEMALLOC_NO_DEMANGLE +# endif +EOF + +for nm in `cat ${public_symbols_txt}` ; do + n=`echo ${nm} |tr ':' ' ' |awk '{print $1}'` + echo "# define ${n} ${symbol_prefix}${n}" +done + +cat <<EOF +#endif + +/* + * The ${symbol_prefix}* macros can be used as stable alternative names for the + * public jemalloc API if JEMALLOC_NO_DEMANGLE is defined. This is primarily + * meant for use in jemalloc itself, but it can be used by application code to + * provide isolation from the name mangling specified via --with-mangling + * and/or --with-jemalloc-prefix. + */ +#ifndef JEMALLOC_NO_DEMANGLE +EOF + +for nm in `cat ${public_symbols_txt}` ; do + n=`echo ${nm} |tr ':' ' ' |awk '{print $1}'` + echo "# undef ${symbol_prefix}${n}" +done + +cat <<EOF +#endif +EOF diff --git a/memory/jemalloc/src/include/jemalloc/jemalloc_protos.h.in b/memory/jemalloc/src/include/jemalloc/jemalloc_protos.h.in new file mode 100644 index 000000000..a78414b19 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/jemalloc_protos.h.in @@ -0,0 +1,66 @@ +/* + * The @je_@ prefix on the following public symbol declarations is an artifact + * of namespace management, and should be omitted in application code unless + * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle@install_suffix@.h). + */ +extern JEMALLOC_EXPORT const char *@je_@malloc_conf; +extern JEMALLOC_EXPORT void (*@je_@malloc_message)(void *cbopaque, + const char *s); + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *@je_@malloc(size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *@je_@calloc(size_t num, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW @je_@posix_memalign(void **memptr, + size_t alignment, size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *@je_@aligned_alloc(size_t alignment, + size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) + JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *@je_@realloc(void *ptr, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW @je_@free(void *ptr) + JEMALLOC_CXX_THROW; + +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *@je_@mallocx(size_t size, int flags) + JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *@je_@rallocx(void *ptr, size_t size, + int flags) JEMALLOC_ALLOC_SIZE(2); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @je_@xallocx(void *ptr, size_t size, + size_t extra, int flags); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @je_@sallocx(const void *ptr, + int flags) JEMALLOC_ATTR(pure); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW @je_@dallocx(void *ptr, int flags); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW @je_@sdallocx(void *ptr, size_t size, + int flags); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @je_@nallocx(size_t size, int flags) + JEMALLOC_ATTR(pure); + +JEMALLOC_EXPORT int JEMALLOC_NOTHROW @je_@mallctl(const char *name, + void *oldp, size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW @je_@mallctlnametomib(const char *name, + size_t *mibp, size_t *miblenp); +JEMALLOC_EXPORT int JEMALLOC_NOTHROW @je_@mallctlbymib(const size_t *mib, + size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT void JEMALLOC_NOTHROW @je_@malloc_stats_print( + void (*write_cb)(void *, const char *), void *@je_@cbopaque, + const char *opts); +JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @je_@malloc_usable_size( + JEMALLOC_USABLE_SIZE_CONST void *ptr) JEMALLOC_CXX_THROW; + +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *@je_@memalign(size_t alignment, size_t size) + JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc); +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN + void JEMALLOC_NOTHROW *@je_@valloc(size_t size) JEMALLOC_CXX_THROW + JEMALLOC_ATTR(malloc); +#endif diff --git a/memory/jemalloc/src/include/jemalloc/jemalloc_rename.sh b/memory/jemalloc/src/include/jemalloc/jemalloc_rename.sh new file mode 100755 index 000000000..f94389120 --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/jemalloc_rename.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +public_symbols_txt=$1 + +cat <<EOF +/* + * Name mangling for public symbols is controlled by --with-mangling and + * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by + * these macro definitions. + */ +#ifndef JEMALLOC_NO_RENAME +EOF + +for nm in `cat ${public_symbols_txt}` ; do + n=`echo ${nm} |tr ':' ' ' |awk '{print $1}'` + m=`echo ${nm} |tr ':' ' ' |awk '{print $2}'` + echo "# define je_${n} ${m}" +done + +cat <<EOF +#endif +EOF diff --git a/memory/jemalloc/src/include/jemalloc/jemalloc_typedefs.h.in b/memory/jemalloc/src/include/jemalloc/jemalloc_typedefs.h.in new file mode 100644 index 000000000..fa7b350ad --- /dev/null +++ b/memory/jemalloc/src/include/jemalloc/jemalloc_typedefs.h.in @@ -0,0 +1,57 @@ +/* + * void * + * chunk_alloc(void *new_addr, size_t size, size_t alignment, bool *zero, + * bool *commit, unsigned arena_ind); + */ +typedef void *(chunk_alloc_t)(void *, size_t, size_t, bool *, bool *, unsigned); + +/* + * bool + * chunk_dalloc(void *chunk, size_t size, bool committed, unsigned arena_ind); + */ +typedef bool (chunk_dalloc_t)(void *, size_t, bool, unsigned); + +/* + * bool + * chunk_commit(void *chunk, size_t size, size_t offset, size_t length, + * unsigned arena_ind); + */ +typedef bool (chunk_commit_t)(void *, size_t, size_t, size_t, unsigned); + +/* + * bool + * chunk_decommit(void *chunk, size_t size, size_t offset, size_t length, + * unsigned arena_ind); + */ +typedef bool (chunk_decommit_t)(void *, size_t, size_t, size_t, unsigned); + +/* + * bool + * chunk_purge(void *chunk, size_t size, size_t offset, size_t length, + * unsigned arena_ind); + */ +typedef bool (chunk_purge_t)(void *, size_t, size_t, size_t, unsigned); + +/* + * bool + * chunk_split(void *chunk, size_t size, size_t size_a, size_t size_b, + * bool committed, unsigned arena_ind); + */ +typedef bool (chunk_split_t)(void *, size_t, size_t, size_t, bool, unsigned); + +/* + * bool + * chunk_merge(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b, + * bool committed, unsigned arena_ind); + */ +typedef bool (chunk_merge_t)(void *, size_t, void *, size_t, bool, unsigned); + +typedef struct { + chunk_alloc_t *alloc; + chunk_dalloc_t *dalloc; + chunk_commit_t *commit; + chunk_decommit_t *decommit; + chunk_purge_t *purge; + chunk_split_t *split; + chunk_merge_t *merge; +} chunk_hooks_t; diff --git a/memory/jemalloc/src/include/msvc_compat/C99/stdbool.h b/memory/jemalloc/src/include/msvc_compat/C99/stdbool.h new file mode 100644 index 000000000..d92160ebc --- /dev/null +++ b/memory/jemalloc/src/include/msvc_compat/C99/stdbool.h @@ -0,0 +1,20 @@ +#ifndef stdbool_h +#define stdbool_h + +#include <wtypes.h> + +/* MSVC doesn't define _Bool or bool in C, but does have BOOL */ +/* Note this doesn't pass autoconf's test because (bool) 0.5 != true */ +/* Clang-cl uses MSVC headers, so needs msvc_compat, but has _Bool as + * a built-in type. */ +#ifndef __clang__ +typedef BOOL _Bool; +#endif + +#define bool _Bool +#define true 1 +#define false 0 + +#define __bool_true_false_are_defined 1 + +#endif /* stdbool_h */ diff --git a/memory/jemalloc/src/include/msvc_compat/C99/stdint.h b/memory/jemalloc/src/include/msvc_compat/C99/stdint.h new file mode 100644 index 000000000..d02608a59 --- /dev/null +++ b/memory/jemalloc/src/include/msvc_compat/C99/stdint.h @@ -0,0 +1,247 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2008 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include <limits.h> + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include <wchar.h> +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h> +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#define INTMAX_C INT64_C +#define UINTMAX_C UINT64_C + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] diff --git a/memory/jemalloc/src/include/msvc_compat/strings.h b/memory/jemalloc/src/include/msvc_compat/strings.h new file mode 100644 index 000000000..a3ee25063 --- /dev/null +++ b/memory/jemalloc/src/include/msvc_compat/strings.h @@ -0,0 +1,59 @@ +#ifndef strings_h +#define strings_h + +/* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided + * for both */ +#ifdef _MSC_VER +# include <intrin.h> +# pragma intrinsic(_BitScanForward) +static __forceinline int ffsl(long x) +{ + unsigned long i; + + if (_BitScanForward(&i, x)) + return (i + 1); + return (0); +} + +static __forceinline int ffs(int x) +{ + + return (ffsl(x)); +} + +# ifdef _M_X64 +# pragma intrinsic(_BitScanForward64) +# endif + +static __forceinline int ffsll(unsigned __int64 x) +{ + unsigned long i; +#ifdef _M_X64 + if (_BitScanForward64(&i, x)) + return (i + 1); + return (0); +#else +// Fallback for 32-bit build where 64-bit version not available +// assuming little endian + union { + unsigned __int64 ll; + unsigned long l[2]; + } s; + + s.ll = x; + + if (_BitScanForward(&i, s.l[0])) + return (i + 1); + else if(_BitScanForward(&i, s.l[1])) + return (i + 33); + return (0); +#endif +} + +#else +# define ffsll(x) __builtin_ffsll(x) +# define ffsl(x) __builtin_ffsl(x) +# define ffs(x) __builtin_ffs(x) +#endif + +#endif /* strings_h */ diff --git a/memory/jemalloc/src/include/msvc_compat/windows_extra.h b/memory/jemalloc/src/include/msvc_compat/windows_extra.h new file mode 100644 index 000000000..3008faa37 --- /dev/null +++ b/memory/jemalloc/src/include/msvc_compat/windows_extra.h @@ -0,0 +1,6 @@ +#ifndef MSVC_COMPAT_WINDOWS_EXTRA_H +#define MSVC_COMPAT_WINDOWS_EXTRA_H + +#include <errno.h> + +#endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */ |