int main(void) { double std=0, ned=0; #if 0 { usCount start, end; start=GetUsCount(); THREADSLEEP(5000); end=GetUsCount(); printf("Wait was %lf\n", (end-start)/1000000000000.0); } #endif #ifdef WIN32 { /* Force load of user32.dll so we can debug */ BOOL v; SystemParametersInfo(SPI_GETBEEP, 0, &v, 0); } #endif if(0) { printf("\nTesting standard allocator with %d threads ...\n", THREADS); std=runtest(); } if(1) { printf("\nTesting nedmalloc with %d threads ...\n", THREADS); whichmalloc=1; ned=runtest(); } #ifdef WIN32 if(0) { ULONG data=2; win32heap=HeapCreate(0, 0, 0); HeapSetInformation(win32heap, HeapCompatibilityInformation, &data, sizeof(data)); HeapQueryInformation(win32heap, HeapCompatibilityInformation, &data, sizeof(data), NULL); if(2!=data) { printf("The win32 low frag allocator won't work under a debugger!\n"); } else { printf("Testing win32 low frag allocator with %d threads ...\n\n", THREADS); whichmalloc=2; runtest(); } HeapDestroy(win32heap); } #endif if(std && ned) { // ned should have more ops/sec printf("\n\nnedmalloc allocator is %lf times faster than standard\n", ned/std); } printf("\nPress a key to trim\n"); getchar(); nedmalloc_trim(0); #ifdef _MSC_VER printf("\nPress a key to end\n"); getchar(); #endif return 0; }
static double runtest() { unsigned int seed=1; int n, i; double opspersec=0; THREADVAR threads[THREADS]; for(n=0; n<THREADS; n++) { unsigned int *toallocptr; int m; threadstuff[n].ops=0; times[n]=0; threadstuff[n].toalloc=toallocptr=calloc(RECORDS, sizeof(unsigned int)); threadstuff[n].allocs=calloc(RECORDS, sizeof(void *)); for(m=0; m<RECORDS; m++) { unsigned int size=myrandom(&seed); if(size<(1<<30)) { /* Make it two power multiple of less than 512 bytes to model frequent C++ new's */ size=4<<(size & 7); } else { size&=0x3FFF; /* < 16Kb */ /*size&=0x1FFF;*/ /* < 8Kb */ /*size=(1<<6)<<(size & 7);*/ /* < 8Kb */ } *toallocptr++=size; } } #ifdef TORTURETEST for(n=0; n<THREADS; n++) { THREADINIT(&threads[n], n); } for(i=0; i<32; i++) { int found=-1; do { for(n=0; n<THREADS; n++) { THREADSLEEP(100); if(threadstuff[n].done) { found=n; break; } } } while(found<0); THREADWAIT(threads[found]); threads[found]=0; THREADINIT(&threads[found], found); printf("Relaunched thread %d\n", found); } for(n=THREADS-1; n>=0; n--) { THREADWAIT(threads[n]); threads[n]=0; } #else #if 1 for(n=0; n<THREADS; n++) { THREADINIT(&threads[n], n); } for(n=THREADS-1; n>=0; n--) { THREADWAIT(threads[n]); threads[n]=0; } #else /* Quick realloc() test */ doRealloc=1; for(n=0; n<THREADS; n++) { THREADINIT(&threads[n], n); } for(n=THREADS-1; n>=0; n--) { THREADWAIT(threads[n]); threads[n]=0; } #endif #endif { usCount totaltime=0; int totalops=0; for(n=0; n<THREADS; n++) { totaltime+=times[n]; totalops+=threadstuff[n].ops; } opspersec=1000000000000.0*totalops/totaltime*THREADS; printf("This allocator achieves %lfops/sec under %d threads\n", opspersec, THREADS); } for(n=THREADS-1; n>=0; n--) { free(threadstuff[n].allocs); threadstuff[n].allocs=0; free(threadstuff[n].toalloc); threadstuff[n].toalloc=0; } return opspersec; }
static void threadcode(int threadidx) { int n; unsigned int *toallocptr=threadstuff[threadidx].toalloc; void **allocptr=threadstuff[threadidx].allocs; unsigned int seed=threadidx; usCount start; threadstuff[threadidx].done=0; /*neddisablethreadcache(0);*/ THREADSLEEP(100); start=GetUsCount(); #ifdef TORTURETEST /* A randomised malloc/realloc/free test (torture test) */ for(n=0; n<RECORDS*100; n++) { unsigned int r=myrandom(&seed), i; i=(int)(r % RECORDS); if(!allocptr[i]) { allocptr[i]=mallocs[whichmalloc](r & 0x1FFF); threadstuff[threadidx].ops++; } else if(r & (1<<31)) { allocptr[i]=reallocs[whichmalloc](allocptr[i], r & 0x1FFF); threadstuff[threadidx].ops++; } else { frees[whichmalloc](allocptr[i]); allocptr[i]=0; } } for(n=0; n<RECORDS; n++) { if(allocptr[n]) { frees[whichmalloc](allocptr[n]); allocptr[n]=0; } } #else /* A simple stack which allocates and deallocates off the top (speed test) */ for(n=0; n<RECORDS;) { #if 1 r=myrandom(&seed); if(allocptr>threadstuff[threadidx].allocs && (r & 65535)<32760) /*<32760)*/ { /* free */ --toallocptr; --allocptr; --n; frees[whichmalloc](*allocptr); *allocptr=0; } else #endif { if(doRealloc && allocptr>threadstuff[threadidx].allocs && (r & 1)) { allocptr[-1]=reallocs[whichmalloc](allocptr[-1], *toallocptr); } else { allocptr[0]=mallocs[whichmalloc](*toallocptr); allocptr++; } n++; toallocptr++; threadstuff[threadidx].ops++; } } while(allocptr>threadstuff[threadidx].allocs) { frees[whichmalloc](*--allocptr); } #endif times[threadidx]+=GetUsCount()-start; neddisablethreadcache(0); threadstuff[threadidx].done=1; }
int main(void) { double std=0, ned=0; #if defined(WIN32) && defined(USE_NEDMALLOC_DLL) /*PatchInNedmallocDLL();*/ #endif #if 0 { usCount start, end; start=GetUsCount(); THREADSLEEP(5000); end=GetUsCount(); printf("Wait was %lf\n", (end-start)/1000000000000.0); } #endif #ifdef WIN32 #pragma comment(lib, "user32.lib") { /* Force load of user32.dll so we can debug */ BOOL v; SystemParametersInfo(SPI_GETBEEP, 0, &v, 0); } #endif #if 2==TESTTYPE printf("Running torture test\n" "-=-=-=-=-=-=-=-=-=-=\n"); #elif 1==TESTTYPE printf("Running speed test\n" "-=-=-=-=-=-=-=-=-=\n"); #endif printf("Block size <= %u, C++ test mode is %s\n", BLOCKSIZE, TESTCPLUSPLUS ? "on" : "off"); if(0) { printf("\nTesting standard allocator with %d threads ...\n", THREADS); std=runtest(); } if(1) { printf("\nTesting nedmalloc with %d threads ...\n", THREADS); whichmalloc=1; ned=runtest(); } #ifdef WIN32 if(0) { ULONG data=2; win32heap=HeapCreate(0, 0, 0); HeapSetInformation(win32heap, HeapCompatibilityInformation, &data, sizeof(data)); HeapQueryInformation(win32heap, HeapCompatibilityInformation, &data, sizeof(data), NULL); if(2!=data) { printf("The win32 low frag allocator won't work under a debugger!\n"); } else { printf("Testing win32 low frag allocator with %d threads ...\n\n", THREADS); whichmalloc=2; runtest(); } HeapDestroy(win32heap); } #endif if(std && ned) { // ned should have more ops/sec printf("\n\nnedmalloc allocator is %lf times faster than standard\n", ned/std); } printf("\nPress a key to trim\n"); getchar(); nedmalloc_trim(0); #ifdef _MSC_VER printf("\nPress a key to end\n"); getchar(); #endif return 0; }
static double runtest() { unsigned int seed=1; int n, i; double opspersec=0; THREADVAR threads[THREADS]; for(n=0; n<THREADS; n++) { unsigned int *toallocptr; int m; memset(&threadstuff[n].ops, 0, sizeof(threadstuff[n].ops)); times[n]=0; threadstuff[n].toalloc=toallocptr=calloc(RECORDS, sizeof(unsigned int)); threadstuff[n].allocs=calloc(RECORDS, sizeof(void *)); for(m=0; m<RECORDS; m++) { unsigned int size=myrandom(&seed); #if TESTCPLUSPLUS if(size&(1<<31)) { /* Make it two power multiple of less than 512 bytes to model frequent C++ new's */ size=4<<(size & 7); } else #endif { size&=BLOCKSIZE-1; } *toallocptr++=size; } } #if 2==TESTTYPE for(n=0; n<THREADS; n++) { THREADINIT(&threads[n], n); } for(i=0; i<8; i++) { int found=-1; do { for(n=0; n<THREADS; n++) { THREADSLEEP(100); if(threadstuff[n].done) { found=n; break; } } } while(found<0); THREADWAIT(threads[found]); threads[found]=0; #if DEBUG { usCount totaltime=0; int totalops=0, totalmallocs=0, totalreallocs=0; for(n=0; n<THREADS; n++) { totaltime+=times[n]; totalmallocs+=threadstuff[n].ops.mallocs; totalreallocs+=threadstuff[n].ops.reallocs; totalops+=threadstuff[n].ops.mallocs+threadstuff[n].ops.reallocs; } opspersec=1000000000000.0*totalops/totaltime*THREADS; printf("This test spent %f%% of its time doing reallocs\n", 100.0*totalreallocs/totalops); printf("This allocator achieves %lfops/sec under %d threads\n\n", opspersec, THREADS); } #endif THREADINIT(&threads[found], found); printf("Relaunched thread %d\n", found); } for(n=THREADS-1; n>=0; n--) { THREADWAIT(threads[n]); threads[n]=0; } #else #if 1 for(n=0; n<THREADS; n++) { THREADINIT(&threads[n], n); } for(n=THREADS-1; n>=0; n--) { THREADWAIT(threads[n]); threads[n]=0; } #else /* Quick realloc() test */ doRealloc=1; for(n=0; n<THREADS; n++) { THREADINIT(&threads[n], n); } for(n=THREADS-1; n>=0; n--) { THREADWAIT(threads[n]); threads[n]=0; } #endif #endif { usCount totaltime=0; int totalops=0, totalmallocs=0, totalreallocs=0; for(n=0; n<THREADS; n++) { totaltime+=times[n]; totalmallocs+=threadstuff[n].ops.mallocs; totalreallocs+=threadstuff[n].ops.reallocs; totalops+=threadstuff[n].ops.mallocs+threadstuff[n].ops.reallocs; } opspersec=1000000000000.0*totalops/totaltime*THREADS; printf("This test spent %f%% of its time doing reallocs\n", 100.0*totalreallocs/totalops); printf("This allocator achieves %lfops/sec under %d threads\n", opspersec, THREADS); } for(n=THREADS-1; n>=0; n--) { free(threadstuff[n].allocs); threadstuff[n].allocs=0; free(threadstuff[n].toalloc); threadstuff[n].toalloc=0; } return opspersec; }
static void threadcode(int threadidx) { int n; unsigned int *toallocptr=threadstuff[threadidx].toalloc; void **allocptr=threadstuff[threadidx].allocs; unsigned int r, seed=threadidx; usCount start; size_t allocated=0, size; threadstuff[threadidx].done=0; /*neddisablethreadcache(0);*/ THREADSLEEP(100); start=GetUsCount(); #if 2==TESTTYPE /* A randomised malloc/realloc/free test (torture test) */ for(n=0; n<RECORDS*100; n++) { static int reallocflip; unsigned int i, dorealloc=(reallocflip=!reallocflip); r=myrandom(&seed); i=(int)(r % RECORDS); #if TESTCPLUSPLUS dorealloc=!(r&(15<<28)); if(r&(1<<31)) { /* Make it two power multiple of less than 512 bytes to model frequent C++ new's */ size=4<<(r & 7); dorealloc=0; } else #endif size=(size_t)(r & (BLOCKSIZE-1)); if(allocated<MAXMEMORY2 && !allocptr[i]) { if(!(allocptr[i]=mallocs[whichmalloc](size))) abort(); #if TOUCH { volatile char *mem=(volatile char *)allocptr[i]; volatile char *end=mem+size; for(; mem<end; mem+=4096) *mem; } #endif allocated+=memsizes[whichmalloc](allocptr[i]); threadstuff[threadidx].ops.mallocs++; } else if(allocated<MAXMEMORY2 && dorealloc) /* If not TESTCPLUSPLUS, then how often realloc() gets called depends on how small RECORDS is. */ { allocated-=memsizes[whichmalloc](allocptr[i]); if(!(allocptr[i]=reallocs[whichmalloc](allocptr[i], size))) abort(); #if TOUCH { volatile char *mem=(volatile char *)allocptr[i]; volatile char *end=mem+size; for(; mem<end; mem+=4096) *mem; } #endif allocated+=memsizes[whichmalloc](allocptr[i]); threadstuff[threadidx].ops.reallocs++; } else if(allocptr[i]) { allocated-=memsizes[whichmalloc](allocptr[i]); frees[whichmalloc](allocptr[i]); allocptr[i]=0; threadstuff[threadidx].ops.frees++; } } for(n=0; n<RECORDS; n++) { if(allocptr[n]) { allocated-=memsizes[whichmalloc](allocptr[n]); frees[whichmalloc](allocptr[n]); allocptr[n]=0; threadstuff[threadidx].ops.frees++; } } assert(!allocated); #elif 1==TESTTYPE /* A simple stack which allocates and deallocates off the top (speed test) */ for(n=0; n<RECORDS;) { #if 1 r=myrandom(&seed); if(allocptr>threadstuff[threadidx].allocs && (r & 65535)<32760) /*<32760)*/ { /* free */ --toallocptr; --allocptr; --n; frees[whichmalloc](*allocptr); *allocptr=0; threadstuff[threadidx].ops.frees++; } else #endif { if(doRealloc && allocptr>threadstuff[threadidx].allocs && (r & 1)) { if(!(allocptr[-1]=reallocs[whichmalloc](allocptr[-1], *toallocptr))) abort(); #if TOUCH { volatile char *mem=(volatile char *)allocptr[-1]; volatile char *end=mem+*toallocptr; for(; mem<end; mem+=4096) *mem; } #endif threadstuff[threadidx].ops.reallocs++; } else { if(!(allocptr[0]=mallocs[whichmalloc](*toallocptr))) abort(); #if TOUCH { volatile char *mem=(volatile char *)allocptr[0]; volatile char *end=mem+*toallocptr; for(; mem<end; mem+=4096) *mem; } #endif threadstuff[threadidx].ops.mallocs++; allocptr++; } n++; toallocptr++; /*if(!(threadstuff[threadidx].ops & 0xff)) nedtrimthreadcache(0,0);*/ } } while(allocptr>threadstuff[threadidx].allocs) { frees[whichmalloc](*--allocptr); threadstuff[threadidx].ops.frees++; } #endif times[threadidx]+=GetUsCount()-start; neddisablethreadcache(0); threadstuff[threadidx].done=1; }