From 0ecb606cb6cf65de1d9fc8a919bceb4be476c602 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 12 Jul 2007 18:26:36 +0000 Subject: 2.5-18.1 --- malloc/Makefile | 16 ++- malloc/arena.c | 179 +++++++++++++++-------- malloc/hooks.c | 49 +++++-- malloc/malloc.c | 374 +++++++++++++++++++++++++++++++++++++++--------- malloc/malloc.h | 142 +++++------------- malloc/mcheck.c | 30 +++- malloc/memusage.c | 325 +++++++++++++++++++++++++---------------- malloc/memusage.sh | 28 ++-- malloc/memusagestat.c | 51 +++---- malloc/morecore.c | 54 +++++++ malloc/mtrace.c | 20 ++- malloc/mtrace.pl | 4 +- malloc/obstack.c | 71 +++++---- malloc/obstack.h | 114 ++++++++------- malloc/tst-malloc.c | 12 +- malloc/tst-mallocfork.c | 51 +++++++ malloc/tst-mcheck.c | 91 ++++++++++++ malloc/tst-mtrace.sh | 5 +- 18 files changed, 1109 insertions(+), 507 deletions(-) create mode 100644 malloc/morecore.c create mode 100644 malloc/tst-mallocfork.c create mode 100644 malloc/tst-mcheck.c (limited to 'malloc') diff --git a/malloc/Makefile b/malloc/Makefile index 0512c49839..c39eae5474 100644 --- a/malloc/Makefile +++ b/malloc/Makefile @@ -1,4 +1,5 @@ -# Copyright (C) 1991-1999,2000,2001,2002,2003 Free Software Foundation, Inc. +# Copyright (C) 1991-1999, 2000, 2001, 2002, 2003, 2005, 2006 +# Free Software Foundation, Inc. # This file is part of the GNU C Library. # The GNU C Library is free software; you can redistribute it and/or @@ -26,7 +27,7 @@ all: dist-headers := malloc.h headers := $(dist-headers) obstack.h mcheck.h tests := mallocbug tst-malloc tst-valloc tst-calloc tst-obstack \ - tst-mallocstate + tst-mallocstate tst-mcheck tst-mallocfork test-srcs = tst-mtrace distribute = thread-m.h mtrace.pl mcheck-init.c stackinfo.h memusage.h \ @@ -78,6 +79,7 @@ endif ifneq ($(cross-compiling),yes) # If the gd library is available we build the `memusagestat' program. ifneq ($(LIBGD),no) +others: $(objpfx)memusage install-bin = memusagestat install-bin-script += memusage generated += memusagestat memusage @@ -101,6 +103,8 @@ $(objpfx)memusagestat: $(memusagestat-modules:%=$(objpfx)%.o) include ../Rules +CFLAGS-mcheck-init.c = $(PIC-ccflag) + $(objpfx)libmcheck.a: $(objpfx)mcheck-init.o -rm -f $@ $(patsubst %/,cd % &&,$(objpfx)) \ @@ -118,9 +122,13 @@ endif endif endif +tst-mcheck-ENV = MALLOC_CHECK_=3 + # Uncomment this for test releases. For public releases it is too expensive. #CPPFLAGS-malloc.o += -DMALLOC_DEBUG=1 +sLIBdir := $(shell echo $(slibdir) | sed 's,lib\(\|64\)$$,\\\\$$LIB,') + $(objpfx)mtrace: mtrace.pl rm -f $@.new sed -e 's|@PERL@|$(PERL)|' -e 's|@XXX@|$(address-width)|' \ @@ -130,12 +138,12 @@ $(objpfx)mtrace: mtrace.pl $(objpfx)memusage: memusage.sh rm -f $@.new sed -e 's|@BASH@|$(BASH)|' -e 's|@VERSION@|$(version)|' \ - -e 's|@SLIBDIR@|$(slibdir)|' -e 's|@BINDIR@|$(bindir)|' $^ > $@.new \ + -e 's|@SLIBDIR@|$(sLIBdir)|' -e 's|@BINDIR@|$(bindir)|' $^ > $@.new \ && rm -f $@ && mv $@.new $@ && chmod +x $@ # The implementation uses `dlsym' -$(objpfx)libmemusage.so: $(common-objpfx)dlfcn/libdl.so +$(objpfx)libmemusage.so: $(common-objpfx)dlfcn/libdl.so $(elfobjdir)/ld.so # Extra dependencies $(foreach o,$(all-object-suffixes),$(objpfx)malloc$(o)): arena.c hooks.c diff --git a/malloc/arena.c b/malloc/arena.c index 026f2c7822..0dcb7cb9f8 100644 --- a/malloc/arena.c +++ b/malloc/arena.c @@ -1,5 +1,6 @@ /* Malloc implementation for multiple threads without lock contention. - Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation, Inc. + Copyright (C) 2001,2002,2003,2004,2005,2006,2007 + Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Wolfram Gloger , 2001. @@ -18,13 +19,17 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* $Id$ */ +#include /* Compile-time constants. */ #define HEAP_MIN_SIZE (32*1024) #ifndef HEAP_MAX_SIZE -#define HEAP_MAX_SIZE (1024*1024) /* must be a power of two */ +# ifdef DEFAULT_MMAP_THRESHOLD_MAX +# define HEAP_MAX_SIZE (2 * DEFAULT_MMAP_THRESHOLD_MAX) +# else +# define HEAP_MAX_SIZE (1024*1024) /* must be a power of two */ +# endif #endif /* HEAP_MIN_SIZE and HEAP_MAX_SIZE limit the size of mmap()ed heaps @@ -55,9 +60,20 @@ typedef struct _heap_info { mstate ar_ptr; /* Arena for this heap. */ struct _heap_info *prev; /* Previous heap. */ size_t size; /* Current size in bytes. */ - size_t pad; /* Make sure the following data is properly aligned. */ + size_t mprotect_size; /* Size in bytes that has been mprotected + PROT_READ|PROT_WRITE. */ + /* Make sure the following data is properly aligned, particularly + that sizeof (heap_info) + 2 * SIZE_SZ is a multiple of + MALLOC_ALIGNMENT. */ + char pad[-6 * SIZE_SZ & MALLOC_ALIGN_MASK]; } heap_info; +/* Get a compile-time error if the heap_info padding is not correct + to make alignment work as expected in sYSMALLOc. */ +extern int sanity_check_heap_info_alignment[(sizeof (heap_info) + + 2 * SIZE_SZ) % MALLOC_ALIGNMENT + ? -1 : 1]; + /* Thread specific data */ static tsd_key_t arena_key; @@ -208,6 +224,10 @@ free_atfork(Void_t* mem, const Void_t *caller) (void)mutex_unlock(&ar_ptr->mutex); } + +/* Counter for number of times the list is locked by the same thread. */ +static unsigned int atfork_recursive_cntr; + /* The following two functions are registered via thread_atfork() to make sure that the mutexes remain in a consistent state in the fork()ed version of a thread. Also adapt the malloc and free hooks @@ -221,7 +241,18 @@ ptmalloc_lock_all (void) if(__malloc_initialized < 1) return; - (void)mutex_lock(&list_lock); + if (mutex_trylock(&list_lock)) + { + Void_t *my_arena; + tsd_getspecific(arena_key, my_arena); + if (my_arena == ATFORK_ARENA_PTR) + /* This is the same thread which already locks the global list. + Just bump the counter. */ + goto out; + + /* This thread has to wait its turn. */ + (void)mutex_lock(&list_lock); + } for(ar_ptr = &main_arena;;) { (void)mutex_lock(&ar_ptr->mutex); ar_ptr = ar_ptr->next; @@ -234,6 +265,8 @@ ptmalloc_lock_all (void) /* Only the current thread may perform malloc/free calls now. */ tsd_getspecific(arena_key, save_arena); tsd_setspecific(arena_key, ATFORK_ARENA_PTR); + out: + ++atfork_recursive_cntr; } static void @@ -243,6 +276,8 @@ ptmalloc_unlock_all (void) if(__malloc_initialized < 1) return; + if (--atfork_recursive_cntr != 0) + return; tsd_setspecific(arena_key, save_arena); __malloc_hook = save_malloc_hook; __free_hook = save_free_hook; @@ -256,7 +291,7 @@ ptmalloc_unlock_all (void) #ifdef __linux__ -/* In LinuxThreads, unlocking a mutex in the child process after a +/* In NPTL, unlocking a mutex in the child process after a fork() is currently unsafe, whereas re-initializing it is safe and does not leak resources. Therefore, a special atfork handler is installed for the child. */ @@ -279,6 +314,7 @@ ptmalloc_unlock_all2 (void) if(ar_ptr == &main_arena) break; } mutex_init(&list_lock); + atfork_recursive_cntr = 0; } #else @@ -353,8 +389,6 @@ libc_hidden_proto (_dl_open_hook); # endif # if defined SHARED && defined USE_TLS && !USE___THREAD -# include - /* This is called by __pthread_initialize_minimal when it needs to use malloc to set up the TLS state. We cannot do the full work of ptmalloc_init (below) until __pthread_initialize_minimal has finished, @@ -482,8 +516,13 @@ ptmalloc_init (void) s = &envline[7]; break; case 8: - if (! secure && memcmp (envline, "TOP_PAD_", 8) == 0) - mALLOPt(M_TOP_PAD, atoi(&envline[9])); + if (! secure) + { + if (memcmp (envline, "TOP_PAD_", 8) == 0) + mALLOPt(M_TOP_PAD, atoi(&envline[9])); + else if (memcmp (envline, "PERTURB_", 8) == 0) + mALLOPt(M_PERTURB, atoi(&envline[9])); + } break; case 9: if (! secure && memcmp (envline, "MMAP_MAX_", 9) == 0) @@ -510,6 +549,8 @@ ptmalloc_init (void) mALLOPt(M_TRIM_THRESHOLD, atoi(s)); if((s = getenv("MALLOC_TOP_PAD_"))) mALLOPt(M_TOP_PAD, atoi(s)); + if((s = getenv("MALLOC_PERTURB_"))) + mALLOPt(M_PERTURB, atoi(s)); if((s = getenv("MALLOC_MMAP_THRESHOLD_"))) mALLOPt(M_MMAP_THRESHOLD, atoi(s)); if((s = getenv("MALLOC_MMAP_MAX_"))) @@ -517,8 +558,8 @@ ptmalloc_init (void) } s = getenv("MALLOC_CHECK_"); #endif - if(s) { - if(s[0]) mALLOPt(M_CHECK_ACTION, (int)(s[0] - '0')); + if(s && s[0]) { + mALLOPt(M_CHECK_ACTION, (int)(s[0] - '0')); if (check_action != 0) __malloc_check_init(); } @@ -654,6 +695,7 @@ new_heap(size, top_pad) size_t size, top_pad; } h = (heap_info *)p2; h->size = size; + h->mprotect_size = size; THREAD_STAT(stat_n_heaps++); return h; } @@ -674,19 +716,36 @@ grow_heap(h, diff) heap_info *h; long diff; if(diff >= 0) { diff = (diff + page_mask) & ~page_mask; new_size = (long)h->size + diff; - if(new_size > HEAP_MAX_SIZE) + if((unsigned long) new_size > (unsigned long) HEAP_MAX_SIZE) return -1; - if(mprotect((char *)h + h->size, diff, PROT_READ|PROT_WRITE) != 0) - return -2; + if((unsigned long) new_size > h->mprotect_size) { + if (mprotect((char *)h + h->mprotect_size, + (unsigned long) new_size - h->mprotect_size, + PROT_READ|PROT_WRITE) != 0) + return -2; + h->mprotect_size = new_size; + } } else { new_size = (long)h->size + diff; if(new_size < (long)sizeof(*h)) return -1; /* Try to re-map the extra heap space freshly to save memory, and make it inaccessible. */ - if((char *)MMAP((char *)h + new_size, -diff, PROT_NONE, - MAP_PRIVATE|MAP_FIXED) == (char *) MAP_FAILED) - return -2; +#ifdef _LIBC + if (__builtin_expect (__libc_enable_secure, 0)) +#else + if (1) +#endif + { + if((char *)MMAP((char *)h + new_size, -diff, PROT_NONE, + MAP_PRIVATE|MAP_FIXED) == (char *) MAP_FAILED) + return -2; + h->mprotect_size = new_size; + } +#ifdef _LIBC + else + madvise ((char *)h + new_size, -diff, MADV_DONTNEED); +#endif /*fprintf(stderr, "shrink %p %08lx\n", h, new_size);*/ } h->size = new_size; @@ -759,6 +818,48 @@ heap_trim(heap, pad) heap_info *heap; size_t pad; return 1; } +/* Create a new arena with initial size "size". */ + +static mstate +_int_new_arena(size_t size) +{ + mstate a; + heap_info *h; + char *ptr; + unsigned long misalign; + + h = new_heap(size + (sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT), + mp_.top_pad); + if(!h) { + /* Maybe size is too large to fit in a single heap. So, just try + to create a minimally-sized arena and let _int_malloc() attempt + to deal with the large request via mmap_chunk(). */ + h = new_heap(sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT, mp_.top_pad); + if(!h) + return 0; + } + a = h->ar_ptr = (mstate)(h+1); + malloc_init_state(a); + /*a->next = NULL;*/ + a->system_mem = a->max_system_mem = h->size; + arena_mem += h->size; +#ifdef NO_THREADS + if((unsigned long)(mp_.mmapped_mem + arena_mem + main_arena.system_mem) > + mp_.max_total_mem) + mp_.max_total_mem = mp_.mmapped_mem + arena_mem + main_arena.system_mem; +#endif + + /* Set up the top chunk, with proper alignment. */ + ptr = (char *)(a + 1); + misalign = (unsigned long)chunk2mem(ptr) & MALLOC_ALIGN_MASK; + if (misalign > 0) + ptr += MALLOC_ALIGNMENT - misalign; + top(a) = (mchunkptr)ptr; + set_head(top(a), (((char*)h + h->size) - ptr) | PREV_INUSE); + + return a; +} + static mstate internal_function #if __STD_C @@ -829,48 +930,6 @@ arena_get2(a_tsd, size) mstate a_tsd; size_t size; return a; } -/* Create a new arena with initial size "size". */ - -mstate -_int_new_arena(size_t size) -{ - mstate a; - heap_info *h; - char *ptr; - unsigned long misalign; - - h = new_heap(size + (sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT), - mp_.top_pad); - if(!h) { - /* Maybe size is too large to fit in a single heap. So, just try - to create a minimally-sized arena and let _int_malloc() attempt - to deal with the large request via mmap_chunk(). */ - h = new_heap(sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT, mp_.top_pad); - if(!h) - return 0; - } - a = h->ar_ptr = (mstate)(h+1); - malloc_init_state(a); - /*a->next = NULL;*/ - a->system_mem = a->max_system_mem = h->size; - arena_mem += h->size; -#ifdef NO_THREADS - if((unsigned long)(mp_.mmapped_mem + arena_mem + main_arena.system_mem) > - mp_.max_total_mem) - mp_.max_total_mem = mp_.mmapped_mem + arena_mem + main_arena.system_mem; -#endif - - /* Set up the top chunk, with proper alignment. */ - ptr = (char *)(a + 1); - misalign = (unsigned long)chunk2mem(ptr) & MALLOC_ALIGN_MASK; - if (misalign > 0) - ptr += MALLOC_ALIGNMENT - misalign; - top(a) = (mchunkptr)ptr; - set_head(top(a), (((char*)h + h->size) - ptr) | PREV_INUSE); - - return a; -} - #endif /* USE_ARENAS */ /* diff --git a/malloc/hooks.c b/malloc/hooks.c index a5c97f3133..708f0faf83 100644 --- a/malloc/hooks.c +++ b/malloc/hooks.c @@ -1,5 +1,5 @@ /* Malloc implementation for multiple threads without lock contention. - Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation, Inc. + Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Wolfram Gloger , 2001. @@ -18,8 +18,6 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* $Id$ */ - /* What to do if the standard debugging hooks are in place and a corrupt pointer is detected: do nothing (0), print an error message (1), or call abort() (2). */ @@ -146,9 +144,9 @@ mem2mem_check(ptr, sz) Void_t *ptr; size_t sz; static mchunkptr internal_function #if __STD_C -mem2chunk_check(Void_t* mem) +mem2chunk_check(Void_t* mem, unsigned char **magic_p) #else -mem2chunk_check(mem) Void_t* mem; +mem2chunk_check(mem, magic_p) Void_t* mem; unsigned char **magic_p; #endif { mchunkptr p; @@ -173,7 +171,6 @@ mem2chunk_check(mem) Void_t* mem; for(sz += SIZE_SZ-1; (c = ((unsigned char*)p)[sz]) != magic; sz -= c) { if(c<=0 || sz<(c+2*SIZE_SZ)) return NULL; } - ((unsigned char*)p)[sz] ^= 0xFF; } else { unsigned long offset, page_mask = malloc_getpagesize-1; @@ -193,8 +190,10 @@ mem2chunk_check(mem) Void_t* mem; for(sz -= 1; (c = ((unsigned char*)p)[sz]) != magic; sz -= c) { if(c<=0 || sz<(c+2*SIZE_SZ)) return NULL; } - ((unsigned char*)p)[sz] ^= 0xFF; } + ((unsigned char*)p)[sz] ^= 0xFF; + if (magic_p) + *magic_p = (unsigned char *)p + sz; return p; } @@ -232,7 +231,11 @@ top_check() sbrk_size = front_misalign + mp_.top_pad + MINSIZE; sbrk_size += pagesz - ((unsigned long)(brk + sbrk_size) & (pagesz - 1)); new_brk = (char*)(MORECORE (sbrk_size)); - if (new_brk == (char*)(MORECORE_FAILURE)) return -1; + if (new_brk == (char*)(MORECORE_FAILURE)) + { + MALLOC_FAILURE_ACTION; + return -1; + } /* Call the `morecore' hook if necessary. */ if (__after_morecore_hook) (*__after_morecore_hook) (); @@ -253,6 +256,11 @@ malloc_check(sz, caller) size_t sz; const Void_t *caller; { Void_t *victim; + if (sz+1 == 0) { + MALLOC_FAILURE_ACTION; + return NULL; + } + (void)mutex_lock(&main_arena.mutex); victim = (top_check() >= 0) ? _int_malloc(&main_arena, sz+1) : NULL; (void)mutex_unlock(&main_arena.mutex); @@ -270,7 +278,7 @@ free_check(mem, caller) Void_t* mem; const Void_t *caller; if(!mem) return; (void)mutex_lock(&main_arena.mutex); - p = mem2chunk_check(mem); + p = mem2chunk_check(mem, NULL); if(!p) { (void)mutex_unlock(&main_arena.mutex); @@ -302,10 +310,19 @@ realloc_check(oldmem, bytes, caller) mchunkptr oldp; INTERNAL_SIZE_T nb, oldsize; Void_t* newmem = 0; + unsigned char *magic_p; + if (bytes+1 == 0) { + MALLOC_FAILURE_ACTION; + return NULL; + } if (oldmem == 0) return malloc_check(bytes, NULL); + if (bytes == 0) { + free_check (oldmem, NULL); + return NULL; + } (void)mutex_lock(&main_arena.mutex); - oldp = mem2chunk_check(oldmem); + oldp = mem2chunk_check(oldmem, &magic_p); (void)mutex_unlock(&main_arena.mutex); if(!oldp) { malloc_printerr(check_action, "realloc(): invalid pointer", oldmem); @@ -357,6 +374,12 @@ realloc_check(oldmem, bytes, caller) #if HAVE_MMAP } #endif + + /* mem2chunk_check changed the magic byte in the old chunk. + If newmem is NULL, then the old chunk will still be used though, + so we need to invert that change here. */ + if (newmem == NULL) *magic_p ^= 0xFF; + (void)mutex_unlock(&main_arena.mutex); return mem2mem_check(newmem, bytes); @@ -376,6 +399,10 @@ memalign_check(alignment, bytes, caller) if (alignment <= MALLOC_ALIGNMENT) return malloc_check(bytes, NULL); if (alignment < MINSIZE) alignment = MINSIZE; + if (bytes+1 == 0) { + MALLOC_FAILURE_ACTION; + return NULL; + } checked_request2size(bytes+1, nb); (void)mutex_lock(&main_arena.mutex); mem = (top_check() >= 0) ? _int_memalign(&main_arena, alignment, bytes+1) : @@ -555,7 +582,7 @@ public_sET_STATe(Void_t* msptr) (void)mutex_lock(&main_arena.mutex); /* There are no fastchunks. */ clear_fastchunks(&main_arena); - set_max_fast(&main_arena, DEFAULT_MXFAST); + set_max_fast(DEFAULT_MXFAST); for (i=0; i and Doug Lea , 2001. @@ -24,7 +24,6 @@ Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger. * Version ptmalloc2-20011215 - $Id$ based on: VERSION 2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) @@ -189,7 +188,8 @@ Changing default word sizes: INTERNAL_SIZE_T size_t - MALLOC_ALIGNMENT 2 * sizeof(INTERNAL_SIZE_T) + MALLOC_ALIGNMENT MAX (2 * sizeof(INTERNAL_SIZE_T), + __alignof__ (long double)) Configuration and functionality options: @@ -259,6 +259,7 @@ #ifdef _LIBC #include +#include #endif #ifdef __cplusplus @@ -381,6 +382,15 @@ extern "C" { #ifndef MALLOC_ALIGNMENT +/* XXX This is the correct definition. It differs from 2*SIZE_SZ only on + powerpc32. For the time being, changing this is causing more + compatibility problems due to malloc_get_state/malloc_set_state than + will returning blocks not adequately aligned for long double objects + under -mlong-double-128. + +#define MALLOC_ALIGNMENT (2 * SIZE_SZ < __alignof__ (long double) \ + ? __alignof__ (long double) : 2 * SIZE_SZ) +*/ #define MALLOC_ALIGNMENT (2 * SIZE_SZ) #endif @@ -1006,6 +1016,7 @@ struct mallinfo public_mALLINFo(void); struct mallinfo public_mALLINFo(); #endif +#ifndef _LIBC /* independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]); @@ -1129,6 +1140,8 @@ Void_t** public_iCOMALLOc(size_t, size_t*, Void_t**); Void_t** public_iCOMALLOc(); #endif +#endif /* _LIBC */ + /* pvalloc(size_t n); @@ -1402,6 +1415,27 @@ int __posix_memalign(void **, size_t, size_t); #define DEFAULT_TOP_PAD (0) #endif +/* + MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically + adjusted MMAP_THRESHOLD. +*/ + +#ifndef DEFAULT_MMAP_THRESHOLD_MIN +#define DEFAULT_MMAP_THRESHOLD_MIN (128 * 1024) +#endif + +#ifndef DEFAULT_MMAP_THRESHOLD_MAX + /* For 32-bit platforms we cannot increase the maximum mmap + threshold much because it is also the minimum value for the + maximum heap size and its alignment. Going above 512k (i.e., 1M + for new heaps) wastes too much address space. */ +# if __WORDSIZE == 32 +# define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024) +# else +# define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long)) +# endif +#endif + /* M_MMAP_THRESHOLD is the request size threshold for using mmap() to service a request. Requests of at least this size that cannot @@ -1441,12 +1475,63 @@ int __posix_memalign(void **, size_t, size_t); "large" chunks, but the value of "large" varies across systems. The default is an empirically derived value that works well in most systems. + + + Update in 2006: + The above was written in 2001. Since then the world has changed a lot. + Memory got bigger. Applications got bigger. The virtual address space + layout in 32 bit linux changed. + + In the new situation, brk() and mmap space is shared and there are no + artificial limits on brk size imposed by the kernel. What is more, + applications have started using transient allocations larger than the + 128Kb as was imagined in 2001. + + The price for mmap is also high now; each time glibc mmaps from the + kernel, the kernel is forced to zero out the memory it gives to the + application. Zeroing memory is expensive and eats a lot of cache and + memory bandwidth. This has nothing to do with the efficiency of the + virtual memory system, by doing mmap the kernel just has no choice but + to zero. + + In 2001, the kernel had a maximum size for brk() which was about 800 + megabytes on 32 bit x86, at that point brk() would hit the first + mmaped shared libaries and couldn't expand anymore. With current 2.6 + kernels, the VA space layout is different and brk() and mmap + both can span the entire heap at will. + + Rather than using a static threshold for the brk/mmap tradeoff, + we are now using a simple dynamic one. The goal is still to avoid + fragmentation. The old goals we kept are + 1) try to get the long lived large allocations to use mmap() + 2) really large allocations should always use mmap() + and we're adding now: + 3) transient allocations should use brk() to avoid forcing the kernel + having to zero memory over and over again + + The implementation works with a sliding threshold, which is by default + limited to go between 128Kb and 32Mb (64Mb for 64 bitmachines) and starts + out at 128Kb as per the 2001 default. + + This allows us to satisfy requirement 1) under the assumption that long + lived allocations are made early in the process' lifespan, before it has + started doing dynamic allocations of the same size (which will + increase the threshold). + + The upperbound on the threshold satisfies requirement 2) + + The threshold goes up in value when the application frees memory that was + allocated with the mmap allocator. The idea is that once the application + starts freeing memory of a certain size, it's highly probable that this is + a size the application uses for transient allocations. This estimator + is there to satisfy the new third requirement. + */ #define M_MMAP_THRESHOLD -3 #ifndef DEFAULT_MMAP_THRESHOLD -#define DEFAULT_MMAP_THRESHOLD (128 * 1024) +#define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN #endif /* @@ -1507,8 +1592,10 @@ Void_t* _int_memalign(mstate, size_t, size_t); Void_t* _int_valloc(mstate, size_t); static Void_t* _int_pvalloc(mstate, size_t); /*static Void_t* cALLOc(size_t, size_t);*/ +#ifndef _LIBC static Void_t** _int_icalloc(mstate, size_t, size_t, Void_t**); static Void_t** _int_icomalloc(mstate, size_t, size_t*, Void_t**); +#endif static int mTRIm(size_t); static size_t mUSABLe(Void_t*); static void mSTATs(void); @@ -1721,7 +1808,7 @@ struct malloc_chunk { mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | User data starts here... . . . - . (malloc_usable_space() bytes) . + . (malloc_usable_size() bytes) . . | nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of chunk | @@ -1803,7 +1890,11 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ /* Check if m has acceptable alignment */ -#define aligned_OK(m) (((unsigned long)((m)) & (MALLOC_ALIGN_MASK)) == 0) +#define aligned_OK(m) (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0) + +#define misaligned_chunk(p) \ + ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \ + & MALLOC_ALIGN_MASK) /* @@ -1970,7 +2061,9 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ typedef struct malloc_chunk* mbinptr; /* addressing -- note that bin_at(0) does not exist */ -#define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - (SIZE_SZ<<1))) +#define bin_at(m, i) \ + (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2])) \ + - offsetof (struct malloc_chunk, fd)) /* analog of ++bin */ #define next_bin(b) ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1))) @@ -2152,9 +2245,9 @@ typedef struct malloc_chunk* mfastbinptr; #define FASTCHUNKS_BIT (1U) -#define have_fastchunks(M) (((M)->max_fast & FASTCHUNKS_BIT) == 0) -#define clear_fastchunks(M) ((M)->max_fast |= FASTCHUNKS_BIT) -#define set_fastchunks(M) ((M)->max_fast &= ~FASTCHUNKS_BIT) +#define have_fastchunks(M) (((M)->flags & FASTCHUNKS_BIT) == 0) +#define clear_fastchunks(M) ((M)->flags |= FASTCHUNKS_BIT) +#define set_fastchunks(M) ((M)->flags &= ~FASTCHUNKS_BIT) /* NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous @@ -2167,10 +2260,10 @@ typedef struct malloc_chunk* mfastbinptr; #define NONCONTIGUOUS_BIT (2U) -#define contiguous(M) (((M)->max_fast & NONCONTIGUOUS_BIT) == 0) -#define noncontiguous(M) (((M)->max_fast & NONCONTIGUOUS_BIT) != 0) -#define set_noncontiguous(M) ((M)->max_fast |= NONCONTIGUOUS_BIT) -#define set_contiguous(M) ((M)->max_fast &= ~NONCONTIGUOUS_BIT) +#define contiguous(M) (((M)->flags & NONCONTIGUOUS_BIT) == 0) +#define noncontiguous(M) (((M)->flags & NONCONTIGUOUS_BIT) != 0) +#define set_noncontiguous(M) ((M)->flags |= NONCONTIGUOUS_BIT) +#define set_contiguous(M) ((M)->flags &= ~NONCONTIGUOUS_BIT) /* Set value of max_fast. @@ -2179,10 +2272,9 @@ typedef struct malloc_chunk* mfastbinptr; Setting the value clears fastchunk bit but preserves noncontiguous bit. */ -#define set_max_fast(M, s) \ - (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \ - FASTCHUNKS_BIT | \ - ((M)->max_fast & NONCONTIGUOUS_BIT) +#define set_max_fast(s) \ + global_max_fast = ((s) == 0)? SMALLBIN_WIDTH: request2size(s) +#define get_max_fast() global_max_fast /* @@ -2192,16 +2284,15 @@ typedef struct malloc_chunk* mfastbinptr; struct malloc_state { /* Serialize access. */ mutex_t mutex; - // Should we have padding to move the mutex to its own cache line? + + /* Flags (formerly in max_fast). */ + int flags; #if THREAD_STATS /* Statistics for locking. Only used if THREAD_STATS is defined. */ long stat_lock_direct, stat_lock_loop, stat_lock_wait; #endif - /* The maximum chunk size to be eligible for fastbin */ - INTERNAL_SIZE_T max_fast; /* low 2 bits used as flags */ - /* Fastbins */ mfastbinptr fastbins[NFASTBINS]; @@ -2212,7 +2303,7 @@ struct malloc_state { mchunkptr last_remainder; /* Normal bins packed as described above */ - mchunkptr bins[NBINS * 2]; + mchunkptr bins[NBINS * 2 - 2]; /* Bitmap of bins */ unsigned int binmap[BINMAPSIZE]; @@ -2235,6 +2326,10 @@ struct malloc_par { int n_mmaps; int n_mmaps_max; int max_n_mmaps; + /* the mmap_threshold is dynamic, until the user sets + it manually, at which point we need to disable any + dynamic behavior. */ + int no_dyn_threshold; /* Cache malloc_getpagesize */ unsigned int pagesize; @@ -2262,6 +2357,10 @@ static struct malloc_state main_arena; static struct malloc_par mp_; + +/* Maximum size of memory handled in fastbins. */ +static INTERNAL_SIZE_T global_max_fast; + /* Initialize a malloc_state struct. @@ -2291,8 +2390,9 @@ static void malloc_init_state(av) mstate av; if (av != &main_arena) #endif set_noncontiguous(av); - - set_max_fast(av, DEFAULT_MXFAST); + if (av == &main_arena) + set_max_fast(DEFAULT_MXFAST); + av->flags |= FASTCHUNKS_BIT; av->top = initial_top(av); } @@ -2305,7 +2405,9 @@ static void malloc_init_state(av) mstate av; static Void_t* sYSMALLOc(INTERNAL_SIZE_T, mstate); static int sYSTRIm(size_t, mstate); static void malloc_consolidate(mstate); +#ifndef _LIBC static Void_t** iALLOc(mstate, size_t, size_t*, int, Void_t**); +#endif #else static Void_t* sYSMALLOc(); static int sYSTRIm(); @@ -2359,6 +2461,14 @@ void weak_variable (*__after_morecore_hook) (void) = NULL; static int check_action = DEFAULT_CHECK_ACTION; +/* ------------------ Testing support ----------------------------------*/ + +static int perturb_byte; + +#define alloc_perturb(p, n) memset (p, (perturb_byte ^ 0xff) & 0xff, n) +#define free_perturb(p, n) memset (p, perturb_byte & 0xff, n) + + /* ------------------- Support for multiple arenas -------------------- */ #include "arena.c" @@ -2624,9 +2734,9 @@ static void do_check_malloc_state(mstate av) /* properties of fastbins */ /* max_fast is in allowed range */ - assert((av->max_fast & ~1) <= request2size(MAX_FAST_SIZE)); + assert((get_max_fast () & ~1) <= request2size(MAX_FAST_SIZE)); - max_fast_bin = fastbin_index(av->max_fast); + max_fast_bin = fastbin_index(get_max_fast ()); for (i = 0; i < NFASTBINS; ++i) { p = av->fastbins[i]; @@ -2752,6 +2862,7 @@ static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av; unsigned long sum; /* for updating stats */ size_t pagemask = mp_.pagesize - 1; + bool tried_mmap = false; #if HAVE_MMAP @@ -2768,12 +2879,14 @@ static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av; char* mm; /* return value from mmap call*/ + try_mmap: /* Round up size to nearest page. For mmapped chunks, the overhead is one SIZE_SZ unit larger than for normal chunks, because there is no following chunk whose prev_size field could be used. */ size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask; + tried_mmap = true; /* Don't try if size wraps around 0 */ if ((unsigned long)(size) > (unsigned long)(nb)) { @@ -2857,7 +2970,8 @@ static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av; /* First try to extend the current heap. */ old_heap = heap_for_ptr(old_top); old_heap_size = old_heap->size; - if (grow_heap(old_heap, MINSIZE + nb - old_size) == 0) { + if ((long) (MINSIZE + nb - old_size) > 0 + && grow_heap(old_heap, MINSIZE + nb - old_size) == 0) { av->system_mem += old_heap->size - old_heap_size; arena_mem += old_heap->size - old_heap_size; #if 0 @@ -2897,6 +3011,9 @@ static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av; set_foot(old_top, (old_size + 2*SIZE_SZ)); } } + else if (!tried_mmap) + /* We can at least try to use to mmap memory. */ + goto try_mmap; } else { /* av == main_arena */ @@ -3250,19 +3367,31 @@ munmap_chunk(p) mchunkptr p; #endif { INTERNAL_SIZE_T size = chunksize(p); - int ret; assert (chunk_is_mmapped(p)); #if 0 assert(! ((char*)p >= mp_.sbrk_base && (char*)p < mp_.sbrk_base + mp_.sbrked_mem)); assert((mp_.n_mmaps > 0)); #endif - assert(((p->prev_size + size) & (mp_.pagesize-1)) == 0); + + uintptr_t block = (uintptr_t) p - p->prev_size; + size_t total_size = p->prev_size + size; + /* Unfortunately we have to do the compilers job by hand here. Normally + we would test BLOCK and TOTAL-SIZE separately for compliance with the + page size. But gcc does not recognize the optimization possibility + (in the moment at least) so we combine the two values into one before + the bit test. */ + if (__builtin_expect (((block | total_size) & (mp_.pagesize - 1)) != 0, 0)) + { + malloc_printerr (check_action, "munmap_chunk(): invalid pointer", + chunk2mem (p)); + return; + } mp_.n_mmaps--; - mp_.mmapped_mem -= (size + p->prev_size); + mp_.mmapped_mem -= total_size; - ret = munmap((char *)p - p->prev_size, size + p->prev_size); + int ret __attribute__ ((unused)) = munmap((char *)block, total_size); /* munmap returns non-zero on failure */ assert(ret == 0); @@ -3385,6 +3514,14 @@ public_fREe(Void_t* mem) #if HAVE_MMAP if (chunk_is_mmapped(p)) /* release mmapped memory. */ { + /* see if the dynamic brk/mmap threshold needs adjusting */ + if (!mp_.no_dyn_threshold + && p->size > mp_.mmap_threshold + && p->size <= DEFAULT_MMAP_THRESHOLD_MAX) + { + mp_.mmap_threshold = chunksize (p); + mp_.trim_threshold = 2 * mp_.mmap_threshold; + } munmap_chunk(p); return; } @@ -3439,7 +3576,7 @@ public_rEALLOc(Void_t* oldmem, size_t bytes) Therefore we can exclude some size values which might appear here by accident or by "design" from some intruder. */ if (__builtin_expect ((uintptr_t) oldp > (uintptr_t) -oldsize, 0) - || __builtin_expect ((uintptr_t) oldp & MALLOC_ALIGN_MASK, 0)) + || __builtin_expect (misaligned_chunk (oldp), 0)) { malloc_printerr (check_action, "realloc(): invalid pointer", oldmem); return NULL; @@ -3489,6 +3626,29 @@ public_rEALLOc(Void_t* oldmem, size_t bytes) (void)mutex_unlock(&ar_ptr->mutex); assert(!newp || chunk_is_mmapped(mem2chunk(newp)) || ar_ptr == arena_for_chunk(mem2chunk(newp))); + + if (newp == NULL) + { + /* Try harder to allocate memory in other arenas. */ + newp = public_mALLOc(bytes); + if (newp != NULL) + { + MALLOC_COPY (newp, oldmem, oldsize - 2 * SIZE_SZ); +#if THREAD_STATS + if(!mutex_trylock(&ar_ptr->mutex)) + ++(ar_ptr->stat_lock_direct); + else { + (void)mutex_lock(&ar_ptr->mutex); + ++(ar_ptr->stat_lock_wait); + } +#else + (void)mutex_lock(&ar_ptr->mutex); +#endif + _int_free(ar_ptr, oldmem); + (void)mutex_unlock(&ar_ptr->mutex); + } + } + return newp; } #ifdef libc_hidden_def @@ -3676,14 +3836,18 @@ public_cALLOc(size_t n, size_t elem_size) /* Two optional cases in which clearing not necessary */ #if HAVE_MMAP - if (chunk_is_mmapped(p)) - return mem; + if (chunk_is_mmapped (p)) + { + if (__builtin_expect (perturb_byte, 0)) + MALLOC_ZERO (mem, sz); + return mem; + } #endif csz = chunksize(p); #if MORECORE_CLEARS - if (p == oldtop && csz > oldtopsize) { + if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize)) { /* clear only the bytes from non-freshly-sbrked memory */ csz = oldtopsize; } @@ -3721,6 +3885,8 @@ public_cALLOc(size_t n, size_t elem_size) return mem; } +#ifndef _LIBC + Void_t** public_iCALLOc(size_t n, size_t elem_size, Void_t** chunks) { @@ -3751,8 +3917,6 @@ public_iCOMALLOc(size_t n, size_t sizes[], Void_t** chunks) return m; } -#ifndef _LIBC - void public_cFREe(Void_t* m) { @@ -3766,6 +3930,8 @@ public_mTRIm(size_t s) { int result; + if(__malloc_initialized < 0) + ptmalloc_init (); (void)mutex_lock(&main_arena.mutex); result = mTRIm(s); (void)mutex_unlock(&main_arena.mutex); @@ -3850,7 +4016,7 @@ _int_malloc(mstate av, size_t bytes) can try it without checking, which saves some time on this fast path. */ - if ((unsigned long)(nb) <= (unsigned long)(av->max_fast)) { + if ((unsigned long)(nb) <= (unsigned long)(get_max_fast ())) { long int idx = fastbin_index(nb); fb = &(av->fastbins[idx]); if ( (victim = *fb) != 0) { @@ -3859,7 +4025,10 @@ _int_malloc(mstate av, size_t bytes) chunk2mem (victim)); *fb = victim->fd; check_remalloced_chunk(av, victim, nb); - return chunk2mem(victim); + void *p = chunk2mem(victim); + if (__builtin_expect (perturb_byte, 0)) + alloc_perturb (p, bytes); + return p; } } @@ -3887,7 +4056,10 @@ _int_malloc(mstate av, size_t bytes) if (av != &main_arena) victim->size |= NON_MAIN_ARENA; check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); + void *p = chunk2mem(victim); + if (__builtin_expect (perturb_byte, 0)) + alloc_perturb (p, bytes); + return p; } } } @@ -3924,6 +4096,8 @@ _int_malloc(mstate av, size_t bytes) for(;;) { + int iters = 0; + bool any_larger = false; while ( (victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) { bck = victim->bk; if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0) @@ -3958,7 +4132,10 @@ _int_malloc(mstate av, size_t bytes) set_foot(remainder, remainder_size); check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); + void *p = chunk2mem(victim); + if (__builtin_expect (perturb_byte, 0)) + alloc_perturb (p, bytes); + return p; } /* remove from unsorted list */ @@ -3972,7 +4149,10 @@ _int_malloc(mstate av, size_t bytes) if (av != &main_arena) victim->size |= NON_MAIN_ARENA; check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); + void *p = chunk2mem(victim); + if (__builtin_expect (perturb_byte, 0)) + alloc_perturb (p, bytes); + return p; } /* place chunk in bin */ @@ -4013,6 +4193,12 @@ _int_malloc(mstate av, size_t bytes) victim->fd = fwd; fwd->bk = victim; bck->fd = victim; + + if (size >= nb + MINSIZE) + any_larger = true; +#define MAX_ITERS 10000 + if (++iters >= MAX_ITERS) + break; } /* @@ -4041,21 +4227,28 @@ _int_malloc(mstate av, size_t bytes) set_inuse_bit_at_offset(victim, size); if (av != &main_arena) victim->size |= NON_MAIN_ARENA; - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); } /* Split */ else { remainder = chunk_at_offset(victim, nb); - unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; - remainder->bk = remainder->fd = unsorted_chunks(av); + /* We cannot assume the unsorted list is empty and therefore + have to perform a complete insert here. */ + bck = unsorted_chunks(av); + fwd = bck->fd; + remainder->bk = bck; + remainder->fd = fwd; + bck->fd = remainder; + fwd->bk = remainder; set_head(victim, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0)); set_head(remainder, remainder_size | PREV_INUSE); set_foot(remainder, remainder_size); - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); } + check_malloced_chunk(av, victim, nb); + void *p = chunk2mem(victim); + if (__builtin_expect (perturb_byte, 0)) + alloc_perturb (p, bytes); + return p; } } @@ -4124,16 +4317,21 @@ _int_malloc(mstate av, size_t bytes) set_inuse_bit_at_offset(victim, size); if (av != &main_arena) victim->size |= NON_MAIN_ARENA; - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); } /* Split */ else { remainder = chunk_at_offset(victim, nb); - unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; - remainder->bk = remainder->fd = unsorted_chunks(av); + /* We cannot assume the unsorted list is empty and therefore + have to perform a complete insert here. */ + bck = unsorted_chunks(av); + fwd = bck->fd; + remainder->bk = bck; + remainder->fd = fwd; + bck->fd = remainder; + fwd->bk = remainder; + /* advertise as last remainder */ if (in_smallbin_range(nb)) av->last_remainder = remainder; @@ -4142,9 +4340,12 @@ _int_malloc(mstate av, size_t bytes) (av != &main_arena ? NON_MAIN_ARENA : 0)); set_head(remainder, remainder_size | PREV_INUSE); set_foot(remainder, remainder_size); - check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); } + check_malloced_chunk(av, victim, nb); + void *p = chunk2mem(victim); + if (__builtin_expect (perturb_byte, 0)) + alloc_perturb (p, bytes); + return p; } } @@ -4176,7 +4377,10 @@ _int_malloc(mstate av, size_t bytes) set_head(remainder, remainder_size | PREV_INUSE); check_malloced_chunk(av, victim, nb); - return chunk2mem(victim); + void *p = chunk2mem(victim); + if (__builtin_expect (perturb_byte, 0)) + alloc_perturb (p, bytes); + return p; } /* @@ -4194,8 +4398,12 @@ _int_malloc(mstate av, size_t bytes) /* Otherwise, relay to handle system-dependent cases */ - else - return sYSMALLOc(nb, av); + else { + void *p = sYSMALLOc(nb, av); + if (__builtin_expect (perturb_byte, 0)) + alloc_perturb (p, bytes); + return p; + } } } @@ -4226,13 +4434,19 @@ _int_free(mstate av, Void_t* mem) Therefore we can exclude some size values which might appear here by accident or by "design" from some intruder. */ if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0) - || __builtin_expect ((uintptr_t) p & MALLOC_ALIGN_MASK, 0)) + || __builtin_expect (misaligned_chunk (p), 0)) { errstr = "free(): invalid pointer"; errout: malloc_printerr (check_action, errstr, mem); return; } + /* We know that each chunk is at least MINSIZE bytes in size. */ + if (__builtin_expect (size < MINSIZE, 0)) + { + errstr = "free(): invalid size"; + goto errout; + } check_inuse_chunk(av, p); @@ -4241,7 +4455,7 @@ _int_free(mstate av, Void_t* mem) and used quickly in malloc. */ - if ((unsigned long)(size) <= (unsigned long)(av->max_fast) + if ((unsigned long)(size) <= (unsigned long)(get_max_fast ()) #if TRIM_FASTBINS /* @@ -4269,6 +4483,10 @@ _int_free(mstate av, Void_t* mem) errstr = "double free or corruption (fasttop)"; goto errout; } + + if (__builtin_expect (perturb_byte, 0)) + free_perturb (mem, size - SIZE_SZ); + p->fd = *fb; *fb = p; } @@ -4310,6 +4528,9 @@ _int_free(mstate av, Void_t* mem) goto errout; } + if (__builtin_expect (perturb_byte, 0)) + free_perturb (mem, size - SIZE_SZ); + /* consolidate backward */ if (!prev_inuse(p)) { prevsize = p->prev_size; @@ -4450,7 +4671,7 @@ static void malloc_consolidate(av) mstate av; yet been initialized, in which case do so below */ - if (av->max_fast != 0) { + if (get_max_fast () != 0) { clear_fastchunks(av); unsorted_bin = unsorted_chunks(av); @@ -4463,7 +4684,7 @@ static void malloc_consolidate(av) mstate av; reused anyway. */ - maxfb = &(av->fastbins[fastbin_index(av->max_fast)]); + maxfb = &(av->fastbins[fastbin_index(get_max_fast ())]); fb = &(av->fastbins[0]); do { if ( (p = *fb) != 0) { @@ -4559,7 +4780,7 @@ _int_realloc(mstate av, Void_t* oldmem, size_t bytes) oldsize = chunksize(oldp); /* Simple tests for old block integrity. */ - if (__builtin_expect ((uintptr_t) oldp & MALLOC_ALIGN_MASK, 0)) + if (__builtin_expect (misaligned_chunk (oldp), 0)) { errstr = "realloc(): invalid pointer"; errout: @@ -4569,7 +4790,7 @@ _int_realloc(mstate av, Void_t* oldmem, size_t bytes) if (__builtin_expect (oldp->size <= 2 * SIZE_SZ, 0) || __builtin_expect (oldsize >= av->system_mem, 0)) { - errstr = "realloc(): invalid size"; + errstr = "realloc(): invalid old size"; goto errout; } @@ -4926,6 +5147,7 @@ Void_t* cALLOc(n_elements, elem_size) size_t n_elements; size_t elem_size; } #endif /* 0 */ +#ifndef _LIBC /* ------------------------- independent_calloc ------------------------- */ @@ -5089,6 +5311,7 @@ mstate av; size_t n_elements; size_t* sizes; int opts; Void_t* chunks[]; return marray; } +#endif /* _LIBC */ /* @@ -5325,7 +5548,7 @@ int mALLOPt(param_number, value) int param_number; int value; switch(param_number) { case M_MXFAST: if (value >= 0 && value <= MAX_FAST_SIZE) { - set_max_fast(av, value); + set_max_fast(value); } else res = 0; @@ -5333,10 +5556,12 @@ int mALLOPt(param_number, value) int param_number; int value; case M_TRIM_THRESHOLD: mp_.trim_threshold = value; + mp_.no_dyn_threshold = 1; break; case M_TOP_PAD: mp_.top_pad = value; + mp_.no_dyn_threshold = 1; break; case M_MMAP_THRESHOLD: @@ -5347,6 +5572,7 @@ int mALLOPt(param_number, value) int param_number; int value; else #endif mp_.mmap_threshold = value; + mp_.no_dyn_threshold = 1; break; case M_MMAP_MAX: @@ -5356,11 +5582,16 @@ int mALLOPt(param_number, value) int param_number; int value; else #endif mp_.n_mmaps_max = value; + mp_.no_dyn_threshold = 1; break; case M_CHECK_ACTION: check_action = value; break; + + case M_PERTURB: + perturb_byte = value; + break; } (void)mutex_unlock(&av->mutex); return res; @@ -5508,10 +5739,14 @@ int mALLOPt(param_number, value) int param_number; int value; /* Helper code. */ +extern char **__libc_argv attribute_hidden; + static void malloc_printerr(int action, const char *str, void *ptr) { - if (action & 1) + if ((action & 5) == 5) + __libc_message (action & 2, "%s\n", str); + else if (action & 1) { char buf[2 * sizeof (uintptr_t) + 1]; @@ -5521,9 +5756,8 @@ malloc_printerr(int action, const char *str, void *ptr) *--cp = '0'; __libc_message (action & 2, - action & 4 - ? "%s\n" : "*** glibc detected *** %s: 0x%s ***\n", - str, cp); + "*** glibc detected *** %s: %s: 0x%s ***\n", + __libc_argv[0] ?: "", str, cp); } else if (action & 2) abort (); diff --git a/malloc/malloc.h b/malloc/malloc.h index 753539e7b0..1340aa15bc 100644 --- a/malloc/malloc.h +++ b/malloc/malloc.h @@ -1,5 +1,5 @@ /* Prototypes and definition for malloc implementation. - Copyright (C) 1996,97,99,2000,2002,2003,2004 Free Software Foundation, Inc. + Copyright (C) 1996,97,99,2000,2002-2004,2005 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,59 +20,16 @@ #ifndef _MALLOC_H #define _MALLOC_H 1 -#ifdef _LIBC #include -#endif - -/* - $Id$ - `ptmalloc2', a malloc implementation for multiple threads without - lock contention, by Wolfram Gloger . - - VERSION 2.7.0 - - This work is mainly derived from malloc-2.7.0 by Doug Lea - , which is available from: - - ftp://gee.cs.oswego.edu/pub/misc/malloc.c - - This trimmed-down header file only provides function prototypes and - the exported data structures. For more detailed function - descriptions and compile-time options, see the source file - `malloc.c'. -*/ - -#if defined(__STDC__) || defined (__cplusplus) -# include +#include # define __malloc_ptr_t void * -#else -# undef size_t -# define size_t unsigned int -# undef ptrdiff_t -# define ptrdiff_t int -# define __malloc_ptr_t char * -#endif -#ifdef _LIBC /* Used by GNU libc internals. */ -# define __malloc_size_t size_t -# define __malloc_ptrdiff_t ptrdiff_t -#elif !defined __attribute_malloc__ -# define __attribute_malloc__ -#endif +#define __malloc_size_t size_t +#define __malloc_ptrdiff_t ptrdiff_t #ifdef __GNUC__ -/* GCC can always grok prototypes. For C++ programs we add throw() - to help it optimize the function calls. But this works only with - gcc 2.8.x and egcs. */ -# ifndef __THROW -# if defined __cplusplus && (__GNUC__ >= 3 || __GNUC_MINOR__ >= 8) -# define __THROW throw () -# else -# define __THROW -# endif -# endif # define __MALLOC_P(args) args __THROW /* This macro will be used for functions which might take C++ callback functions. */ @@ -80,78 +37,51 @@ #else /* Not GCC. */ -# define __THROW - -# if (defined __STDC__ && __STDC__) || defined __cplusplus - -# define __MALLOC_P(args) args -# define __MALLOC_PMT(args) args - -# ifndef __const -# define __const const -# endif - -# else /* Not ANSI C or C++. */ - -# define __MALLOC_P(args) () /* No prototypes. */ -# define __MALLOC_PMT(args) () - -# ifndef __const -# define __const -# endif - -# endif /* ANSI C or C++. */ +# define __MALLOC_P(args) args +# define __MALLOC_PMT(args) args #endif /* GCC. */ -#ifndef NULL -# ifdef __cplusplus -# define NULL 0 -# else -# define NULL ((__malloc_ptr_t) 0) -# endif -#endif -#ifdef __cplusplus -extern "C" { -#endif +__BEGIN_DECLS /* Allocate SIZE bytes of memory. */ -extern __malloc_ptr_t malloc __MALLOC_P ((size_t __size)) __attribute_malloc__; +extern void *malloc __MALLOC_P ((size_t __size)) __attribute_malloc__ __wur; /* Allocate NMEMB elements of SIZE bytes each, all initialized to 0. */ -extern __malloc_ptr_t calloc __MALLOC_P ((size_t __nmemb, size_t __size)) - __attribute_malloc__; +extern void *calloc __MALLOC_P ((size_t __nmemb, size_t __size)) + __attribute_malloc__ __wur; /* Re-allocate the previously allocated block in __ptr, making the new block SIZE bytes long. */ -extern __malloc_ptr_t realloc __MALLOC_P ((__malloc_ptr_t __ptr, - size_t __size)) - __attribute_malloc__; +extern void *realloc __MALLOC_P ((void *__ptr, size_t __size)) + __attribute_malloc__ __attribute_warn_unused_result__; /* Free a block allocated by `malloc', `realloc' or `calloc'. */ -extern void free __MALLOC_P ((__malloc_ptr_t __ptr)); +extern void free __MALLOC_P ((void *__ptr)); /* Free a block allocated by `calloc'. */ -extern void cfree __MALLOC_P ((__malloc_ptr_t __ptr)); +extern void cfree __MALLOC_P ((void *__ptr)); /* Allocate SIZE bytes allocated to ALIGNMENT bytes. */ -extern __malloc_ptr_t memalign __MALLOC_P ((size_t __alignment, size_t __size)); +extern void *memalign __MALLOC_P ((size_t __alignment, size_t __size)) + __attribute_malloc__ __wur; /* Allocate SIZE bytes on a page boundary. */ -extern __malloc_ptr_t valloc __MALLOC_P ((size_t __size)) __attribute_malloc__; +extern void *valloc __MALLOC_P ((size_t __size)) + __attribute_malloc__ __wur; /* Equivalent to valloc(minimum-page-that-holds(n)), that is, round up __size to nearest pagesize. */ -extern __malloc_ptr_t pvalloc __MALLOC_P ((size_t __size)) - __attribute_malloc__; +extern void * pvalloc __MALLOC_P ((size_t __size)) + __attribute_malloc__ __wur; /* Underlying allocation function; successive calls should return contiguous pieces of memory. */ -extern __malloc_ptr_t (*__morecore) __MALLOC_PMT ((ptrdiff_t __size)); +extern void *(*__morecore) __MALLOC_PMT ((ptrdiff_t __size)); /* Default value of `__morecore'. */ -extern __malloc_ptr_t __default_morecore __MALLOC_P ((ptrdiff_t __size)) +extern void *__default_morecore __MALLOC_P ((ptrdiff_t __size)) __attribute_malloc__; /* SVID2/XPG mallinfo structure */ @@ -192,6 +122,7 @@ extern struct mallinfo mallinfo __MALLOC_P ((void)); #define M_MMAP_THRESHOLD -3 #define M_MMAP_MAX -4 #define M_CHECK_ACTION -5 +#define M_PERTURB -6 /* General SVID/XPG interface to tunable parameters. */ extern int mallopt __MALLOC_P ((int __param, int __val)); @@ -202,41 +133,38 @@ extern int malloc_trim __MALLOC_P ((size_t __pad)); /* Report the number of usable allocated bytes associated with allocated chunk __ptr. */ -extern size_t malloc_usable_size __MALLOC_P ((__malloc_ptr_t __ptr)); +extern size_t malloc_usable_size __MALLOC_P ((void *__ptr)); /* Prints brief summary statistics on stderr. */ extern void malloc_stats __MALLOC_P ((void)); /* Record the state of all malloc variables in an opaque data structure. */ -extern __malloc_ptr_t malloc_get_state __MALLOC_P ((void)); +extern void *malloc_get_state __MALLOC_P ((void)); /* Restore the state of all malloc variables from data obtained with malloc_get_state(). */ -extern int malloc_set_state __MALLOC_P ((__malloc_ptr_t __ptr)); +extern int malloc_set_state __MALLOC_P ((void *__ptr)); /* Called once when malloc is initialized; redefining this variable in the application provides the preferred way to set up the hook pointers. */ extern void (*__malloc_initialize_hook) __MALLOC_PMT ((void)); /* Hooks for debugging and user-defined versions. */ -extern void (*__free_hook) __MALLOC_PMT ((__malloc_ptr_t __ptr, +extern void (*__free_hook) __MALLOC_PMT ((void *__ptr, __const __malloc_ptr_t)); -extern __malloc_ptr_t (*__malloc_hook) __MALLOC_PMT ((size_t __size, - __const __malloc_ptr_t)); -extern __malloc_ptr_t (*__realloc_hook) __MALLOC_PMT ((__malloc_ptr_t __ptr, - size_t __size, - __const __malloc_ptr_t)); -extern __malloc_ptr_t (*__memalign_hook) __MALLOC_PMT ((size_t __alignment, - size_t __size, - __const __malloc_ptr_t)); +extern void *(*__malloc_hook) __MALLOC_PMT ((size_t __size, + __const __malloc_ptr_t)); +extern void *(*__realloc_hook) __MALLOC_PMT ((void *__ptr, size_t __size, + __const __malloc_ptr_t)); +extern void *(*__memalign_hook) __MALLOC_PMT ((size_t __alignment, + size_t __size, + __const __malloc_ptr_t)); extern void (*__after_morecore_hook) __MALLOC_PMT ((void)); /* Activate a standard set of debugging hooks. */ extern void __malloc_check_init __MALLOC_P ((void)); -#ifdef __cplusplus -} /* end of extern "C" */ -#endif +__END_DECLS #endif /* malloc.h */ diff --git a/malloc/mcheck.c b/malloc/mcheck.c index 02379d219d..9f88843445 100644 --- a/malloc/mcheck.c +++ b/malloc/mcheck.c @@ -24,9 +24,25 @@ # include # include # include +# include # include #endif +#ifdef _LIBC +extern __typeof (malloc) __libc_malloc; +extern __typeof (free) __libc_free; +extern __typeof (realloc) __libc_realloc; +libc_hidden_proto (__libc_malloc) +libc_hidden_proto (__libc_realloc) +libc_hidden_proto (__libc_free) +libc_hidden_proto (__libc_memalign) +#else +# define __libc_malloc(sz) malloc (sz) +# define __libc_free(ptr) free (ptr) +# define __libc_realloc(ptr, sz) realloc (ptr, sz) +# define __libc_memalign(al, sz) memalign (al, sz) +#endif + /* Old hook values. */ static void (*old_free_hook) (__ptr_t ptr, __const __ptr_t); static __ptr_t (*old_malloc_hook) (__malloc_size_t size, const __ptr_t); @@ -197,7 +213,7 @@ freehook (__ptr_t ptr, const __ptr_t caller) if (old_free_hook != NULL) (*old_free_hook) (ptr, caller); else - free (ptr); + __libc_free (ptr); __free_hook = freehook; } @@ -214,7 +230,7 @@ mallochook (__malloc_size_t size, const __ptr_t caller) hdr = (struct hdr *) (*old_malloc_hook) (sizeof (struct hdr) + size + 1, caller); else - hdr = (struct hdr *) malloc (sizeof (struct hdr) + size + 1); + hdr = (struct hdr *) __libc_malloc (sizeof (struct hdr) + size + 1); __malloc_hook = mallochook; if (hdr == NULL) return NULL; @@ -245,7 +261,7 @@ memalignhook (__malloc_size_t alignment, __malloc_size_t size, if (old_memalign_hook != NULL) block = (*old_memalign_hook) (alignment, slop + size + 1, caller); else - block = memalign (alignment, slop + size + 1); + block = __libc_memalign (alignment, slop + size + 1); __memalign_hook = memalignhook; if (block == NULL) return NULL; @@ -294,8 +310,8 @@ reallochook (__ptr_t ptr, __malloc_size_t size, const __ptr_t caller) sizeof (struct hdr) + size + 1, caller); else - hdr = (struct hdr *) realloc ((__ptr_t) hdr, - sizeof (struct hdr) + size + 1); + hdr = (struct hdr *) __libc_realloc ((__ptr_t) hdr, + sizeof (struct hdr) + size + 1); __free_hook = freehook; __malloc_hook = mallochook; __memalign_hook = memalignhook; @@ -355,8 +371,8 @@ mcheck (func) if (__malloc_initialized <= 0 && !mcheck_used) { /* We call malloc() once here to ensure it is initialized. */ - void *p = malloc (0); - free (p); + void *p = __libc_malloc (0); + __libc_free (p); old_free_hook = __free_hook; __free_hook = freehook; diff --git a/malloc/memusage.c b/malloc/memusage.c index b552ec37b0..16fa09060a 100644 --- a/malloc/memusage.c +++ b/malloc/memusage.c @@ -1,5 +1,5 @@ /* Profile heap and stack memory usage of running program. - Copyright (C) 1998-2002, 2004 Free Software Foundation, Inc. + Copyright (C) 1998-2002, 2004, 2005, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1998. @@ -18,11 +18,13 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ +#include #include #include #include #include #include +#include #include #include #include @@ -43,7 +45,7 @@ static void (*freep) (void *); static void *(*mmapp) (void *, size_t, int, int, int, off_t); static void *(*mmap64p) (void *, size_t, int, int, int, off64_t); static int (*munmapp) (void *, size_t); -static void *(*mremapp) (void *, size_t, size_t, int); +static void *(*mremapp) (void *, size_t, size_t, int, void *); enum { @@ -69,24 +71,23 @@ struct header #define MAGIC 0xfeedbeaf -static unsigned long int calls[idx_last]; -static unsigned long int failed[idx_last]; -static unsigned long long int total[idx_last]; -static unsigned long long int grand_total; -static unsigned long int histogram[65536 / 16]; -static unsigned long int large; -static unsigned long int calls_total; -static unsigned long int inplace; -static unsigned long int decreasing; -static unsigned long int inplace_mremap; -static unsigned long int decreasing_mremap; -static long int current_use[2]; -static long int peak_use[3]; -static uintptr_t start_sp; +static memusage_cntr_t calls[idx_last]; +static memusage_cntr_t failed[idx_last]; +static memusage_size_t total[idx_last]; +static memusage_size_t grand_total; +static memusage_cntr_t histogram[65536 / 16]; +static memusage_cntr_t large; +static memusage_cntr_t calls_total; +static memusage_cntr_t inplace; +static memusage_cntr_t decreasing; +static memusage_cntr_t realloc_free; +static memusage_cntr_t inplace_mremap; +static memusage_cntr_t decreasing_mremap; +static memusage_size_t current_heap; +static memusage_size_t peak_use[3]; +static __thread uintptr_t start_sp; /* A few macros to make the source more readable. */ -#define current_heap current_use[0] -#define current_stack current_use[1] #define peak_heap peak_use[0] #define peak_stack peak_use[1] #define peak_total peak_use[2] @@ -103,14 +104,14 @@ extern const char *__progname; struct entry { - size_t heap; - size_t stack; + uint64_t heap; + uint64_t stack; uint32_t time_low; uint32_t time_high; }; -static struct entry buffer[DEFAULT_BUFFER_SIZE]; -static size_t buffer_cnt; +static struct entry buffer[2 * DEFAULT_BUFFER_SIZE]; +static uatomic32_t buffer_cnt; static struct entry first; @@ -118,8 +119,6 @@ static struct entry first; static void update_data (struct header *result, size_t len, size_t old_len) { - long int total_use; - if (result != NULL) { /* Record the information we need and mark the block using a @@ -129,38 +128,60 @@ update_data (struct header *result, size_t len, size_t old_len) } /* Compute current heap usage and compare it with the maximum value. */ - current_heap += len - old_len; - if (current_heap > peak_heap) - peak_heap = current_heap; - - /* Compute current stack usage and compare it with the maximum value. */ + memusage_size_t heap + = atomic_exchange_and_add (¤t_heap, len - old_len) + len - old_len; + atomic_max (&peak_heap, heap); + + /* Compute current stack usage and compare it with the maximum + value. The base stack pointer might not be set if this is not + the main thread and it is the first call to any of these + functions. */ + if (__builtin_expect (!start_sp, 0)) + start_sp = GETSP (); + + uintptr_t sp = GETSP (); #ifdef STACK_GROWS_UPWARD - current_stack = GETSP () - start_sp; + /* This can happen in threads where we didn't catch the thread's + stack early enough. */ + if (__builtin_expect (sp < start_sp, 0)) + start_sp = sp; + size_t current_stack = sp - start_sp; #else - current_stack = start_sp - GETSP (); + /* This can happen in threads where we didn't catch the thread's + stack early enough. */ + if (__builtin_expect (sp > start_sp, 0)) + start_sp = sp; + size_t current_stack = start_sp - sp; #endif - if (current_stack > peak_stack) - peak_stack = current_stack; + atomic_max (&peak_stack, current_stack); /* Add up heap and stack usage and compare it with the maximum value. */ - total_use = current_heap + current_stack; - if (total_use > peak_total) - peak_total = total_use; + atomic_max (&peak_total, heap + current_stack); /* Store the value only if we are writing to a file. */ if (fd != -1) { - buffer[buffer_cnt].heap = current_heap; - buffer[buffer_cnt].stack = current_stack; - GETTIME (buffer[buffer_cnt].time_low, buffer[buffer_cnt].time_high); - ++buffer_cnt; - - /* Write out buffer if it is full. */ - if (buffer_cnt == buffer_size) + uatomic32_t idx = atomic_exchange_and_add (&buffer_cnt, 1); + if (idx >= 2 * buffer_size) { - write (fd, buffer, buffer_cnt * sizeof (struct entry)); - buffer_cnt = 0; + /* We try to reset the counter to the correct range. If + this fails because of another thread increasing the + counter it does not matter since that thread will take + care of the correction. */ + unsigned int reset = idx - 2 * buffer_size; + atomic_compare_and_exchange_val_acq (&buffer_size, reset, idx); + idx = reset; } + + buffer[idx].heap = current_heap; + buffer[idx].stack = current_stack; + GETTIME (buffer[idx].time_low, buffer[idx].time_high); + + /* Write out buffer if it is full. */ + if (idx + 1 == buffer_size) + write (fd, buffer, buffer_size * sizeof (struct entry)); + else if (idx + 1 == 2 * buffer_size) + write (fd, &buffer[buffer_size], buffer_size * sizeof (struct entry)); } } @@ -207,8 +228,8 @@ me (void) mmap64p = (void *(*) (void *, size_t, int, int, int, off64_t)) dlsym (RTLD_NEXT, "mmap64"); - mremapp = (void *(*) (void *, size_t, size_t, int)) dlsym (RTLD_NEXT, - "mremap"); + mremapp = (void *(*) (void *, size_t, size_t, int, void *)) dlsym (RTLD_NEXT, + "mremap"); munmapp = (int (*) (void *, size_t)) dlsym (RTLD_NEXT, "munmap"); initialized = 1; @@ -247,6 +268,7 @@ me (void) GETTIME (first.time_low, first.time_high); /* Write it two times since we need the starting and end time. */ write (fd, &first, sizeof (first)); + write (fd, &first, sizeof (first)); /* Determine the buffer size. We use the default if the environment variable is not present. */ @@ -317,24 +339,24 @@ malloc (size_t len) return (*mallocp) (len); /* Keep track of number of calls. */ - ++calls[idx_malloc]; + atomic_increment (&calls[idx_malloc]); /* Keep track of total memory consumption for `malloc'. */ - total[idx_malloc] += len; + atomic_add (&total[idx_malloc], len); /* Keep track of total memory requirement. */ - grand_total += len; + atomic_add (&grand_total,