各种内存分配器的对比测试

最近两天测试了下tcmalloc,性能的确牛B.

所以修改了下固定对象分配器,模仿tcmalloc利用tls做thread cache.

下面是在我机器上对自己写的各个内存分配器与tcmalloc的对比测试,

fix_obj_pool finish:326

fix_obj_pool finish:165

fix_obj_pool finish:168

fix_obj_pool finish:164

fix_obj_pool finish:174

fix_obj_pool finish:164

fix_obj_pool finish:174

fix_obj_pool finish:185

fix_obj_pool finish:173

fix_obj_pool finish:168

gen_allocator finish:567

gen_allocator finish:264

gen_allocator finish:261

gen_allocator finish:260

gen_allocator finish:260

gen_allocator finish:261

gen_allocator finish:260

gen_allocator finish:261

gen_allocator finish:260

gen_allocator finish:263

block_obj_allocator finish:342

block_obj_allocator finish:257

block_obj_allocator finish:258

block_obj_allocator finish:257

block_obj_allocator finish:258

block_obj_allocator finish:257

block_obj_allocator finish:258

block_obj_allocator finish:259

block_obj_allocator finish:263

block_obj_allocator finish:262

tcmalloc finish:279

tcmalloc finish:266

tcmalloc finish:265

tcmalloc finish:267

tcmalloc finish:266

tcmalloc finish:266

tcmalloc finish:265

tcmalloc finish:264

tcmalloc finish:266

tcmalloc finish:267

test1 finish————

fix_obj_pool finish:606

fix_obj_pool finish:471

fix_obj_pool finish:469

fix_obj_pool finish:473

fix_obj_pool finish:468

fix_obj_pool finish:468

fix_obj_pool finish:470

fix_obj_pool finish:474

fix_obj_pool finish:475

fix_obj_pool finish:467

gen_allocator finish:928

gen_allocator finish:647

gen_allocator finish:677

gen_allocator finish:643

gen_allocator finish:645

gen_allocator finish:644

gen_allocator finish:643

gen_allocator finish:644

gen_allocator finish:643

gen_allocator finish:644

block_obj_allocator finish:586

block_obj_allocator finish:500

block_obj_allocator finish:502

block_obj_allocator finish:500

block_obj_allocator finish:502

block_obj_allocator finish:501

block_obj_allocator finish:501

block_obj_allocator finish:501

block_obj_allocator finish:501

block_obj_allocator finish:501

tcmalloc finish:551

tcmalloc finish:549

tcmalloc finish:549

tcmalloc finish:549

tcmalloc finish:551

tcmalloc finish:549

tcmalloc finish:548

tcmalloc finish:551

tcmalloc finish:549

tcmalloc finish:550

test2 finish————

fix_obj_pool finish:464

fix_obj_pool finish:466

fix_obj_pool finish:464

fix_obj_pool finish:465

fix_obj_pool finish:465

fix_obj_pool finish:466

fix_obj_pool finish:465

fix_obj_pool finish:464

fix_obj_pool finish:467

fix_obj_pool finish:465

gen_allocator finish:674

gen_allocator finish:661

gen_allocator finish:667

gen_allocator finish:656

gen_allocator finish:657

gen_allocator finish:658

gen_allocator finish:658

gen_allocator finish:660

gen_allocator finish:657

gen_allocator finish:660

block_obj_allocator finish:479

block_obj_allocator finish:479

block_obj_allocator finish:477

block_obj_allocator finish:477

block_obj_allocator finish:478

block_obj_allocator finish:480

block_obj_allocator finish:478

block_obj_allocator finish:481

block_obj_allocator finish:477

block_obj_allocator finish:478

tcmalloc finish:562

tcmalloc finish:565

tcmalloc finish:563

tcmalloc finish:562

tcmalloc finish:562

tcmalloc finish:563

tcmalloc finish:566

tcmalloc finish:565

tcmalloc finish:562

tcmalloc finish:562

test3 finish————

三个测试分别是

1)分配 1000万个16字节的对象

2)分配1000万,再释放1000万

3)分配10万,网站空间,释放10万,执行1000万/10万次

从输出可以看出fix_obj_pool 的第一个测试是最快的,因为它的分配处理最简单,但如果把释放也计入统计,优势就几乎没有了。

还有一个手段可以优化fix_obj_pool,就是释放时不将对象放回到可用列表中,只是增加一个计数,当整个内存块中的对象都被释放时

才将内存放回到可用列表中去。而gen_allocator效果是最差的,香港服务器,可以直接丢弃了。

从事测试结果可以看出,tcmalloc已经可以满足大多数的需求,网站空间,基本无必要自己写通用内存分配器。当然对象池还是可以考虑的。

代码如下:

block_obj_allocator.h

#ifndef _BLOCK_OBJ_ALLOCATOR#define _BLOCK_OBJ_ALLOCATORtypedef struct block_obj_allocator *block_obj_allocator_t;block_obj_allocator_t create_block_obj_allocator();void print_info(block_obj_allocator_t,int);#endif

block_obj_allocator.c

#include #include <pthread.h>#include #include <stdint.h>#include <assert.h>#include #include <stdlib.h>#include free_list{list_node next;uint32_t size;uint32_t init_size;list_node *head;list_node *tail;void *mem;};struct thread_allocator{list_node next;block_obj_allocator_t central_allocator;struct link_list *_free_list;struct link_list *_recover;uint32_t free_size;uint16_t array_idx;uint32_t collect_factor;};struct thread_cache{list_node next;struct thread_allocator _allocator[17];};struct block_obj_allocator{IMPLEMEMT(allocator);pthread_key_t t_key;spinlock_t _free_list_mtx[17];struct link_list *_free_list[17];spinlock_t mtx;struct link_list *thread_caches;};static void *free_list_get(struct free_list *f){void *ptr = (void*)f->head;f->head = f->head->next;if(!f->head)f->tail = NULL;–f->size;return ptr;}static void free_list_put(struct free_list *f,void *ptr){list_node *l = (list_node*)ptr;l->next = NULL;if(f->tail){f->tail->next = l;f->tail = l;}elsef->head = f->tail = l;++f->size;}#define DEFAULT_BLOCK_SIZE 1024*1024static struct free_list *creat_new_freelist(uint32_t size){uint32_t init_size = DEFAULT_BLOCK_SIZE/size;struct free_list *f = (struct free_list*)calloc(1,sizeof(*f));assert(f);f->mem = calloc(1,DEFAULT_BLOCK_SIZE);assert(f->mem);f->init_size = f->size = init_size;int32_t i = 0;for( ; i < init_size; ++i){list_node *l = (list_node*)(((uint8_t*)f->mem)+(i*size));free_list_put(f,l);}f->size = init_size;return f;}static struct free_list *central_get_freelist(block_obj_allocator_t central,uint16_t array_idx){free_list *f;spin_lock(central->_free_list_mtx[array_idx],4000);f = (struct free_list*)link_list_pop(central->_free_list[array_idx]);spin_unlock(central->_free_list_mtx[array_idx]);if(!f){//printf(“creat_new_freelist\n”);f = creat_new_freelist(1<<array_idx);}return f;}static void give_back_to_central(block_obj_allocator_t central,uint16_t array_idx,struct free_list *f){//printf(“give_back_to_central\n”);spin_lock(central->_free_list_mtx[array_idx],4000);LINK_LIST_PUSH_BACK(central->_free_list[array_idx],f);spin_unlock(central->_free_list_mtx[array_idx]);}void *thread_allocator_alloc(struct thread_allocator *a){void *ptr;struct free_list *f;if(!a->free_size){//thread cache不够内存了,从central获取f = central_get_freelist(a->central_allocator,a->array_idx);assert(f);LINK_LIST_PUSH_BACK(a->_free_list,f);a->free_size += f->size;}else{f = (struct free_list*)link_list_head(a->_free_list);if(!f){f = (struct free_list*)link_list_pop(a->_recover);LINK_LIST_PUSH_BACK(a->_free_list,f);}}ptr = free_list_get(f);assert(ptr);–a->free_size;if(!f->size){link_list_pop(a->_free_list);link_list_push_back(a->_recover,(list_node*)f);}return ptr;}void thread_allocator_dealloc(struct thread_allocator *a,void *ptr){struct free_list *f = (struct free_list*)link_list_head(a->_recover);if(f){free_list_put(f,ptr);++a->free_size;if(f->size == f->init_size){link_list_pop(a->_recover);//printf(“==init_size\n”);(a->free_size >= a->collect_factor){//将f归还给central_allocator; give_back_to_central(a->central_allocator,a->array_idx,f);a->free_size -= f->size;}elselink_list_push_back(a->_free_list,(list_node*)f);}}else{f = (struct free_list*)link_list_head(a->_free_list);assert(f);free_list_put(f,ptr);++a->free_size;}}void thread_allocator_info(struct thread_allocator *a){printf(,a->free_size);{struct free_list *f = (struct free_list*)link_list_head(a->_free_list);while(f){printf(,f->size);f = (struct free_list*)((list_node*)f)->next;}}{struct free_list *f = (struct free_list*)link_list_head(a->_recover);while(f){printf(,f->size);f = (struct free_list*)((list_node*)f)->next;}}}extern uint8_t GetK(uint32_t size);static struct thread_cache* thread_cache_create(block_obj_allocator_t ba){struct thread_cache *tc = calloc(1,sizeof(*tc));int32_t i = 0;for( ; i < 17; ++i){tc->_allocator[i].central_allocator = ba;tc->_allocator[i]._free_list = LINK_LIST_CREATE();tc->_allocator[i]._recover = LINK_LIST_CREATE();tc->_allocator[i].array_idx = i;tc->_allocator[i].collect_factor = ((DEFAULT_BLOCK_SIZE)*2)/(1<<i);}spin_lock(ba->mtx,4000);LINK_LIST_PUSH_BACK(ba->thread_caches,tc);spin_unlock(ba->mtx);return tc; }static void release_freelist(struct link_list *flist){list_node *l = link_list_head(flist);while(l){struct free_list *f = (struct free_list*)l;l = l->next;free(f->mem);free(f);//printf(“destroy_freelist\n”); }}static void destroy_thread_cache(struct thread_cache *tc){int32_t i = 0;for(; i < 17; ++i){release_freelist(tc->_allocator[i]._free_list);release_freelist(tc->_allocator[i]._recover);LINK_LIST_DESTROY(&(tc->_allocator[i]._free_list));LINK_LIST_DESTROY(&(tc->_allocator[i]._recover));}free(tc);}static void* thread_cache_alloc(struct thread_cache *tc,uint32_t size){size += sizeof(int32_t);uint8_t k = GetK(size);size = 1 << k;int32_t *ptr = (int32_t*)thread_allocator_alloc(&(tc->_allocator[k]));*ptr = k;ptr++;return (void*)ptr;}static void thread_cache_dealloc(struct thread_cache *tc,void *ptr){int32_t *_ptr = ((int32_t*)ptr)-1;uint8_t k = *_ptr;thread_allocator_dealloc(&(tc->_allocator[k]),_ptr);}static void thread_cache_info(struct thread_cache *tc,uint32_t size){size += sizeof(int32_t);uint8_t k = GetK(size);thread_allocator_info(&(tc->_allocator[k]));}static void* block_obj_al_alloc(struct allocator *a, int32_t size){block_obj_allocator_t ba = (block_obj_allocator_t)a;struct thread_cache *tc = (struct thread_cache*)pthread_getspecific(ba->t_key);if(!tc){tc = thread_cache_create(ba);pthread_setspecific(ba->t_key,(void*)tc);}return thread_cache_alloc(tc,size);}static void block_obj_al_dealloc(struct allocator*a, void *ptr){block_obj_allocator_t ba = (block_obj_allocator_t)a;struct thread_cache *tc = (struct thread_cache*)pthread_getspecific(ba->t_key);assert(tc);thread_cache_dealloc(tc,ptr);}static void destroy_block_obj_al(struct allocator **a){block_obj_allocator_t ba = (block_obj_allocator_t)*a;//销毁所有的thread_cache {list_node *l = link_list_head(ba->thread_caches);while(l){struct thread_cache *tc = (struct thread_cache *)l;l = l->next;destroy_thread_cache(tc);}LINK_LIST_DESTROY(&ba->thread_caches);}//销毁所有free_list {int32_t i = 0;for( ; i < 17; ++i){release_freelist(ba->_free_list[i]);LINK_LIST_DESTROY(&ba->_free_list[i]);}}{int32_t i = 0;for( ; i < 17; ++i){spin_destroy(&(ba->_free_list_mtx[i]));}}spin_destroy(&(ba->mtx));pthread_key_delete(ba->t_key);free(ba);*a = NULL;}block_obj_allocator_t create_block_obj_allocator(){block_obj_allocator_t ba = (block_obj_allocator_t)calloc(1,sizeof(*ba));ba->mtx = spin_create();ba->thread_caches = LINK_LIST_CREATE();int32_t i = 0;for( ; i < 17; ++i){ba->_free_list[i] = LINK_LIST_CREATE();ba->_free_list_mtx[i] = spin_create();}pthread_key_create(&ba->t_key,0);ba->super_class.Alloc = block_obj_al_alloc;ba->super_class.DeAlloc = block_obj_al_dealloc;ba->super_class.Destroy = destroy_block_obj_al;return ba;}void print_info(block_obj_allocator_t ba,int size){struct thread_cache *tc = (struct thread_cache*)pthread_getspecific(ba->t_key);thread_cache_info(tc,size);}

test.c

用最少的浪费面对现在

各种内存分配器的对比测试

相关文章:

你感兴趣的文章:

标签云: