On Fri, May 25, 2007 at 09:38:24PM +0400, Alexey Mazurov wrote:
> Package: libc6
> Version: 2.5-8
> Severity: important
> Tags: patch fixed-upstream
FWIW we plan to push glibc 2.6 "soon".
>
> Current malloc implementation in glibc-2.5 have serious problem with
> large number of free chunks, this bug reported to upstream
> http://sources.redhat.com/bugzilla/show_bug.cgi?id=4349
> and fixed in current cvs and glibc-2.6 release.
> This bug does not happens in Debian Sarge glibc-2.3.2
>
> Attached patch which fixes this bug extracted from current glibc cvs
> and small test program (it took number of seconds to run test as
> argument)
>
> To demonstrate how bad it is look at results of test program on
> Xeon 5160 with 32 GB ram:
> stock libc:
> 72 mallocs per second
> patched libc:
> 2251354 mallocs per second
> --- glibc-2.5/malloc/malloc.c 2006-09-07 20:06:02.000000000 +0400
> +++ glibc-2.6/malloc/malloc.c 2007-05-22 21:21:24.000000000 +0400
> @@ -1,5 +1,5 @@
> /* Malloc implementation for multiple threads without lock contention.
> - Copyright (C) 1996-2002,2003,2004,2005,2006 Free Software Foundation, Inc.
> + Copyright (C) 1996-2006, 2007 Free Software Foundation, Inc.
> This file is part of the GNU C Library.
> Contributed by Wolfram Gloger <wg@malloc.de>
> and Doug Lea <dl@cs.oswego.edu>, 2001.
> @@ -27,10 +27,6 @@
> based on:
> VERSION 2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee)
>
> - Note: There may be an updated version of this malloc obtainable at
> - http://www.malloc.de/malloc/ptmalloc2.tar.gz
> - Check before installing!
> -
> * Quickstart
>
> In order to compile this implementation, a Makefile is provided with
> @@ -1781,6 +1777,10 @@
>
> struct malloc_chunk* fd; /* double links -- used only if free. */
> struct malloc_chunk* bk;
> +
> + /* Only used for large blocks: pointer to next larger size. */
> + struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
> + struct malloc_chunk* bk_nextsize;
> };
>
>
> @@ -1881,7 +1881,7 @@
> #define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
>
> /* The smallest possible chunk */
> -#define MIN_CHUNK_SIZE (sizeof(struct malloc_chunk))
> +#define MIN_CHUNK_SIZE (offsetof(struct malloc_chunk, fd_nextsize))
>
> /* The smallest size we can malloc is an aligned minimal chunk */
>
> @@ -2081,6 +2081,24 @@
> else { \
> FD->bk = BK; \
> BK->fd = FD; \
> + if (!in_smallbin_range (P->size) \
> + && __builtin_expect (P->fd_nextsize != NULL, 0)) { \
> + assert (P->fd_nextsize->bk_nextsize == P); \
> + assert (P->bk_nextsize->fd_nextsize == P); \
> + if (FD->fd_nextsize == NULL) { \
> + if (P->fd_nextsize == P) \
> + FD->fd_nextsize = FD->bk_nextsize = FD; \
> + else { \
> + FD->fd_nextsize = P->fd_nextsize; \
> + FD->bk_nextsize = P->bk_nextsize; \
> + P->fd_nextsize->bk_nextsize = FD; \
> + P->bk_nextsize->fd_nextsize = FD; \
> + } \
> + } else { \
> + P->fd_nextsize->bk_nextsize = P->bk_nextsize; \
> + P->bk_nextsize->fd_nextsize = P->fd_nextsize; \
> + } \
> + } \
> } \
> }
>
> @@ -2107,22 +2125,37 @@
>
> #define NBINS 128
> #define NSMALLBINS 64
> -#define SMALLBIN_WIDTH 8
> -#define MIN_LARGE_SIZE 512
> +#define SMALLBIN_WIDTH MALLOC_ALIGNMENT
> +#define MIN_LARGE_SIZE (NSMALLBINS * SMALLBIN_WIDTH)
>
> #define in_smallbin_range(sz) \
> ((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE)
>
> -#define smallbin_index(sz) (((unsigned)(sz)) >> 3)
> +#define smallbin_index(sz) \
> + (SMALLBIN_WIDTH == 16 ? (((unsigned)(sz)) >> 4) : (((unsigned)(sz)) >> 3))
>
> -#define largebin_index(sz) \
> -(((((unsigned long)(sz)) >> 6) <= 32)? 56 + (((unsigned long)(sz)) >> 6): \
> +#define largebin_index_32(sz) \
> +(((((unsigned long)(sz)) >> 6) <= 38)? 56 + (((unsigned long)(sz)) >> 6): \
> ((((unsigned long)(sz)) >> 9) <= 20)? 91 + (((unsigned long)(sz)) >> 9): \
> ((((unsigned long)(sz)) >> 12) <= 10)? 110 + (((unsigned long)(sz)) >> 12): \
> ((((unsigned long)(sz)) >> 15) <= 4)? 119 + (((unsigned long)(sz)) >> 15): \
> ((((unsigned long)(sz)) >> 18) <= 2)? 124 + (((unsigned long)(sz)) >> 18): \
> 126)
>
> +// XXX It remains to be seen whether it is good to keep the widths of
> +// XXX the buckets the same or whether it should be scaled by a factor
> +// XXX of two as well.
> +#define largebin_index_64(sz) \
> +(((((unsigned long)(sz)) >> 6) <= 48)? 48 + (((unsigned long)(sz)) >> 6): \
> + ((((unsigned long)(sz)) >> 9) <= 20)? 91 + (((unsigned long)(sz)) >> 9): \
> + ((((unsigned long)(sz)) >> 12) <= 10)? 110 + (((unsigned long)(sz)) >> 12): \
> + ((((unsigned long)(sz)) >> 15) <= 4)? 119 + (((unsigned long)(sz)) >> 15): \
> + ((((unsigned long)(sz)) >> 18) <= 2)? 124 + (((unsigned long)(sz)) >> 18): \
> + 126)
> +
> +#define largebin_index(sz) \
> + (SIZE_SZ == 8 ? largebin_index_64 (sz) : largebin_index_32 (sz))
> +
> #define bin_index(sz) \
> ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz))
>
> @@ -2536,7 +2569,7 @@
> #if HAVE_MMAP
> /* address is outside main heap */
> if (contiguous(av) && av->top != initial_top(av)) {
> - assert(((char*)p) < min_address || ((char*)p) > max_address);
> + assert(((char*)p) < min_address || ((char*)p) >= max_address);
> }
> /* chunk is page-aligned */
> assert(((p->prev_size + sz) & (mp_.pagesize-1)) == 0);
> @@ -2741,8 +2774,19 @@
> for (i = 0; i < NFASTBINS; ++i) {
> p = av->fastbins[i];
>
> + /* The following test can only be performed for the main arena.
> + While mallopt calls malloc_consolidate to get rid of all fast
> + bins (especially those larger than the new maximum) this does
> + only happen for the main arena. Trying to do this for any
> + other arena would mean those arenas have to be locked and
> + malloc_consolidate be called for them. This is excessive. And
> + even if this is acceptable to somebody it still cannot solve
> + the problem completely since if the arena is locked a
> + concurrent malloc call might create a new arena which then
> + could use the newly invalid fast bins. */
> +
> /* all bins past max_fast are empty */
> - if (i > max_fast_bin)
> + if (av == &main_arena && i > max_fast_bin)
> assert(p == 0);
>
> while (p != 0) {
> @@ -2786,7 +2830,31 @@
> /* lists are sorted */
> assert(p->bk == b ||
> (unsigned long)chunksize(p->bk) >= (unsigned long)chunksize(p));
> - }
> +
> + if (!in_smallbin_range(size))
> + {
> + if (p->fd_nextsize != NULL)
> + {
> + if (p->fd_nextsize == p)
> + assert (p->bk_nextsize == p);
> + else
> + {
> + if (p->fd_nextsize == first (b))
> + assert (chunksize (p) < chunksize (p->fd_nextsize));
> + else
> + assert (chunksize (p) > chunksize (p->fd_nextsize));
> +
> + if (p == first (b))
> + assert (chunksize (p) > chunksize (p->bk_nextsize));
> + else
> + assert (chunksize (p) < chunksize (p->bk_nextsize));
> + }
> + }
> + else
> + assert (p->bk_nextsize == NULL);
> + }
> + } else if (!in_smallbin_range(size))
> + assert (p->fd_nextsize == NULL && p->bk_nextsize == NULL);
> /* chunk is followed by a legal chain of inuse chunks */
> for (q = next_chunk(p);
> (q != av->top && inuse(q) &&
> @@ -2805,7 +2873,6 @@
> assert(total <= (unsigned long)(mp_.max_total_mem));
> assert(mp_.n_mmaps >= 0);
> #endif
> - assert(mp_.n_mmaps <= mp_.n_mmaps_max);
> assert(mp_.n_mmaps <= mp_.max_n_mmaps);
>
> assert((unsigned long)(av->system_mem) <=
> @@ -2885,7 +2952,13 @@
> is one SIZE_SZ unit larger than for normal chunks, because there
> is no following chunk whose prev_size field could be used.
> */
> +#if 1
> + /* See the front_misalign handling below, for glibc there is no
> + need for further alignments. */
> + size = (nb + SIZE_SZ + pagemask) & ~pagemask;
> +#else
> size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask;
> +#endif
> tried_mmap = true;
>
> /* Don't try if size wraps around 0 */
> @@ -2903,6 +2976,12 @@
> address argument for later munmap in free() and realloc().
> */
>
> +#if 1
> + /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
> + MALLOC_ALIGN_MASK is 2*SIZE_SZ-1. Each mmap'ed area is page
> + aligned and therefore definitely MALLOC_ALIGN_MASK-aligned. */
> + assert (((INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK) == 0);
> +#else
> front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK;
> if (front_misalign > 0) {
> correction = MALLOC_ALIGNMENT - front_misalign;
> @@ -2910,10 +2989,12 @@
> p->prev_size = correction;
> set_head(p, (size - correction) |IS_MMAPPED);
> }
> - else {
> - p = (mchunkptr)mm;
> - set_head(p, size|IS_MMAPPED);
> - }
> + else
> +#endif
> + {
> + p = (mchunkptr)mm;
> + set_head(p, size|IS_MMAPPED);
> + }
>
> /* update statistics */
>
> @@ -4097,7 +4178,6 @@
> for(;;) {
>
> int iters = 0;
> - bool any_larger = false;
> while ( (victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) {
> bck = victim->bk;
> if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)
> @@ -4125,6 +4205,11 @@
> unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
> av->last_remainder = remainder;
> remainder->bk = remainder->fd = unsorted_chunks(av);
> + if (!in_smallbin_range(remainder_size))
> + {
> + remainder->fd_nextsize = NULL;
> + remainder->bk_nextsize = NULL;
> + }
>
> set_head(victim, nb | PREV_INUSE |
> (av != &main_arena ? NON_MAIN_ARENA : 0));
> @@ -4173,19 +4258,36 @@
> size |= PREV_INUSE;
> /* if smaller than smallest, bypass loop below */
> assert((bck->bk->size & NON_MAIN_ARENA) == 0);
> - if ((unsigned long)(size) <= (unsigned long)(bck->bk->size)) {
> + if ((unsigned long)(size) < (unsigned long)(bck->bk->size)) {
> fwd = bck;
> bck = bck->bk;
> +
> + victim->fd_nextsize = fwd->fd;
> + victim->bk_nextsize = fwd->fd->bk_nextsize;
> + fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
> }
> else {
> assert((fwd->size & NON_MAIN_ARENA) == 0);
> - while ((unsigned long)(size) < (unsigned long)(fwd->size)) {
> - fwd = fwd->fd;
> - assert((fwd->size & NON_MAIN_ARENA) == 0);
> - }
> - bck = fwd->bk;
> + while ((unsigned long) size < fwd->size)
> + {
> + fwd = fwd->fd_nextsize;
> + assert((fwd->size & NON_MAIN_ARENA) == 0);
> + }
> +
> + if ((unsigned long) size == (unsigned long) fwd->size)
> + /* Always insert in the second position. */
> + fwd = fwd->fd;
> + else
> + {
> + victim->fd_nextsize = fwd;
> + victim->bk_nextsize = fwd->bk_nextsize;
> + fwd->bk_nextsize = victim;
> + victim->bk_nextsize->fd_nextsize = victim;
> + }
> + bck = fwd->bk;
> }
> - }
> + } else
> + victim->fd_nextsize = victim->bk_nextsize = victim;
> }
>
> mark_bin(av, victim_index);
> @@ -4194,8 +4296,6 @@
> fwd->bk = victim;
> bck->fd = victim;
>
> - if (size >= nb + MINSIZE)
> - any_larger = true;
> #define MAX_ITERS 10000
> if (++iters >= MAX_ITERS)
> break;
> @@ -4203,21 +4303,25 @@
>
> /*
> If a large request, scan through the chunks of current bin in
> - sorted order to find smallest that fits. This is the only step
> - where an unbounded number of chunks might be scanned without doing
> - anything useful with them. However the lists tend to be short.
> + sorted order to find smallest that fits. Use the skip list for this.
> */
>
> if (!in_smallbin_range(nb)) {
> bin = bin_at(av, idx);
>
> /* skip scan if empty or largest chunk is too small */
> - if ((victim = last(bin)) != bin &&
> - (unsigned long)(first(bin)->size) >= (unsigned long)(nb)) {
> + if ((victim = first(bin)) != bin &&
> + (unsigned long)(victim->size) >= (unsigned long)(nb)) {
>
> + victim = victim->bk_nextsize;
> while (((unsigned long)(size = chunksize(victim)) <
> (unsigned long)(nb)))
> - victim = victim->bk;
> + victim = victim->bk_nextsize;
> +
> + /* Avoid removing the first entry for a size so that the skip
> + list does not have to be rerouted. */
> + if (victim != last(bin) && victim->size == victim->fd->size)
> + victim = victim->fd;
>
> remainder_size = size - nb;
> unlink(victim, bck, fwd);
> @@ -4239,6 +4343,11 @@
> remainder->fd = fwd;
> bck->fd = remainder;
> fwd->bk = remainder;
> + if (!in_smallbin_range(remainder_size))
> + {
> + remainder->fd_nextsize = NULL;
> + remainder->bk_nextsize = NULL;
> + }
> set_head(victim, nb | PREV_INUSE |
> (av != &main_arena ? NON_MAIN_ARENA : 0));
> set_head(remainder, remainder_size | PREV_INUSE);
> @@ -4308,9 +4417,7 @@
> remainder_size = size - nb;
>
> /* unlink */
> - bck = victim->bk;
> - bin->bk = bck;
> - bck->fd = bin;
> + unlink(victim, bck, fwd);
>
> /* Exhaust */
> if (remainder_size < MINSIZE) {
> @@ -4335,7 +4442,11 @@
> /* advertise as last remainder */
> if (in_smallbin_range(nb))
> av->last_remainder = remainder;
> -
> + if (!in_smallbin_range(remainder_size))
> + {
> + remainder->fd_nextsize = NULL;
> + remainder->bk_nextsize = NULL;
> + }
> set_head(victim, nb | PREV_INUSE |
> (av != &main_arena ? NON_MAIN_ARENA : 0));
> set_head(remainder, remainder_size | PREV_INUSE);
> @@ -4558,8 +4669,13 @@
>
> bck = unsorted_chunks(av);
> fwd = bck->fd;
> - p->bk = bck;
> p->fd = fwd;
> + p->bk = bck;
> + if (!in_smallbin_range(size))
> + {
> + p->fd_nextsize = NULL;
> + p->bk_nextsize = NULL;
> + }
> bck->fd = p;
> fwd->bk = p;
>
> @@ -4684,7 +4800,15 @@
> reused anyway.
> */
>
> +#if 0
> + /* It is wrong to limit the fast bins to search using get_max_fast
> + because, except for the main arena, all the others might have
> + blocks in the high fast bins. It's not worth it anyway, just
> + search all bins all the time. */
> maxfb = &(av->fastbins[fastbin_index(get_max_fast ())]);
> +#else
> + maxfb = &(av->fastbins[NFASTBINS - 1]);
> +#endif
> fb = &(av->fastbins[0]);
> do {
> if ( (p = *fb) != 0) {
> @@ -4719,6 +4843,11 @@
> unsorted_bin->fd = p;
> first_unsorted->bk = p;
>
> + if (!in_smallbin_range (size)) {
> + p->fd_nextsize = NULL;
> + p->bk_nextsize = NULL;
> + }
> +
> set_head(p, size | PREV_INUSE);
> p->bk = unsorted_bin;
> p->fd = first_unsorted;
> #include <stdlib.h>
> #include <sys/time.h>
> #include <stdio.h>
> #include <signal.h>
>
>
> #define NUM_CH (16 * 1024 * 1024)
> #define SIZE_MAX (1024)
>
> void *p[NUM_CH];
>
> struct drand48_data data;
>
> unsigned int rndm(unsigned int max) {
> double res;
> drand48_r(&data, &res);
> return res * max;
> }
>
> int work = 1;
>
> void alarm_handler(int i __attribute__((unused))) {
> work = 0;
> }
>
> int main(int argc, char *argv[]) {
> unsigned long i = 0;
> struct timeval t0, t1, tdiff;
> struct itimerval itimer;
> int t;
>
> if(argc < 2 || (t = atoi(argv[1])) <= 0) {
> fprintf(stderr, "Usage: %s <time>\n", argv[0]);
> return 1;
> }
>
> fputs("Preparing: ", stdout); fflush(stdout);
>
> gettimeofday(&t0, NULL);
>
> for(i = 0; i < NUM_CH; ++i) {
> p[i] = malloc(rndm(SIZE_MAX));
> }
>
> for(i = 0; i < NUM_CH; i += 2) {
> free(p[i]); p[i] = NULL;
> }
>
> gettimeofday(&t1, NULL);
> timersub(&t1, &t0, &tdiff);
>
> fprintf(stdout, "%ld.%03lu\n", tdiff.tv_sec, tdiff.tv_usec / 1000);
>
> fprintf(stdout, "Testing:"); fflush(stdout);
>
> itimer.it_interval.tv_sec = 0;
> itimer.it_interval.tv_usec = 0;
>
> itimer.it_value.tv_sec = t;
> itimer.it_value.tv_usec = 0;
>
> signal(SIGALRM, &alarm_handler);
> setitimer(ITIMER_REAL, &itimer, NULL);
>
> gettimeofday(&t0, NULL);
>
> for(i = 0; i < NUM_CH && work; i += 2) {
> p[i] = malloc(rndm(SIZE_MAX));
> }
>
> gettimeofday(&t1, NULL);
> timersub(&t1, &t0, &tdiff);
>
> fprintf(stdout, " %u mallocs per second\n", i/2/tdiff.tv_sec);
> return 0;
> }
>
> #if 0
> void **ptr = p + rndm(NUM_CH);
> if(*ptr) { free(*ptr); *ptr = NULL; }
> else { *ptr = malloc(rndm(SIZE_MAX)); }
> #endif
--
·O· Pierre Habouzit
··O madcoder@debian.org
OOO http://www.madism.org
Attachment:
pgpRitlbkIweX.pgp
Description: PGP signature