2、多线程内存池
上一节很简略的说了下单线程内存池,单线程内存池如果要放在多线程环境下使用是不安全的,我们需要进行保护,如何保护,最简单的方法就是加临界区,云风的实现里面是用原子操作模拟一个临界区,我实测跟临界区性能非常接近,甚至很多时候不如临界区,所以我就不追求用原子操作模拟临界区了,还是直接用临界区最简单。
class CMemPool
{
public:
struct memory_list
{
memory_list *_next;
};
struct alloc_node
{
size_t _size;
size_t _number;
size_t _bksize;
long _guard;
memory_list *_free_list;
};
struct chunk_list
{
chunk_list *_next;
memory_list *_data;
size_t _size;
size_t _idx;
};
~CMemPool();
static CMemPool &instance()
{
if(!_instance)
{
create_instance();
}
return *_instance;
}
static int chunk_index(size_t size);
void *allocate(size_t size, size_t *psize=NULL);
void deallocate(void *p, size_t size);
//以下为几个检测和设置预分配参数的函数,2007.06.08增
size_t getallocnumber(size_t size, size_t *psize=NULL);
size_t setallocnumber(size_t size, size_t number);
static void dumpallocnode();
private:
static alloc_node _vnode[92];
chunk_list *_chunk_list;
long _chunk_guard;
static CMemPool *_instance;
static long _singleton_guard;
static bool _singleton_destroyed;
static void create_instance();
static void build_chunknode();
CMemPool();
memory_list *alloc_chunk(size_t idx);
};
CMemPool *CMemPool::_instance = 0;
long CMemPool::_singleton_guard = 0;
bool CMemPool::_singleton_destroyed = false;
CMemPool::alloc_node CMemPool::_vnode[92];
struct chunk_desc
{
int s; // 区间起始字节
int e; // 区间终止字节
int align; // 区间内分段对齐值
int number; // 区间内分段个数,计算属性
}_cd[] =
{
{0, 1024, 16, 0},
{1024, 8192, 256, 0}
};
void CMemPool::create_instance()
{
thread_guard guard(&_singleton_guard);
if(_instance)
return;
assert(!_singleton_destroyed);
static CMemPool obj;
_instance = &obj;
}
void CMemPool::build_chunknode()
{
const int cdnum = sizeof(_cd)/sizeof(_cd[0]);
int index=0, number;
for(int j=0; j<cdnum; j++)
{
_cd[j].number = (_cd[j].e-_cd[j].s)/_cd[j].align;
for(int i=0; i<_cd[j].number; i++)
{
int unitsize = (i+1)*_cd[j].align+_cd[j].s;
_vnode[index]._size = unitsize;
if(unitsize < 512)
number = 4096/unitsize;
else if(unitsize < 1024)
number = 16384/unitsize;
else
number = 65536/unitsize;
_vnode[index]._number = number;
_vnode[index]._bksize = unitsize * number;
_vnode[index]._guard = 0;
_vnode[index]._free_list = NULL;
++index;
}
}
}
size_t CMemPool::getallocnumber(size_t size, size_t *psize/*=NULL*/)
{
int idx = chunk_index(size);
if(idx >= 0)
return _vnode[idx]._number;
return 0;
}
size_t CMemPool::setallocnumber(size_t size, size_t number)
{
int idx = chunk_index(size);
if(idx >= 0)
{
size_t on = _vnode[idx]._number;
_vnode[idx]._number = number;
return on;
}
return 0;
}
void CMemPool::dumpallocnode()
{
int size = sizeof(_vnode)/sizeof(_vnode[0]);
printf("_vnode.size = %d\r\n", size);
for(int i=0; i<size; ++i)
{
printf("vnode.size %d, vnode.number %d\r\n", _vnode[i]._size, _vnode[i]._number);
}
}
int CMemPool::chunk_index(size_t bytes)
{
#if(0)
int idx = 0;
const int cdnum = sizeof(_cd)/sizeof(_cd[0]);
if(bytes > _cd[cdnum-1].e)
idx = -1;
else
{
for(int i=0; i<cdnum; i++)
{
if((bytes > _cd[i].s) && (bytes <= _cd[i].e))
{
idx += (bytes-_cd[i].s+_cd[i].align-1)/_cd[i].align-1;
break;
}
idx += _cd[i].number;
}
}
// printf("bytes %d idx = %d\r\n", bytes, idx);
return idx;
#else
//下面的代码是根据静态数据和上面的代码做了优化的,
//如果修改了基础数据需要对应的修改下面的代码
if(bytes > 8192)
{
return -1;
}
else if(bytes > 1024)
// idx = _cd[0].number+(bytes-_cd[1].s+_cd[1].align-1)/_cd[1].align-1;
return 64+(bytes-1024+255)/256-1;
// return _cd[0].number+(bytes-1024+255)/256-1;
else
// idx = (bytes-_cd[0].s+_cd[0].align-1)/_cd[0].align-1;
return (bytes+15)/16-1;
#endif
}
CMemPool::CMemPool()
{
_chunk_list = NULL;
_chunk_guard = 0;
build_chunknode();
}
CMemPool::~CMemPool()
{
int s = 0;
chunk_list *temp = _chunk_list;
while(temp)
{
++s;
temp = temp->_next;
}
void **chunk = reinterpret_cast<void **>(malloc(s * sizeof(void *)));
temp = _chunk_list;
int i=0;
while(temp)
{
chunk[i] = temp->_data;
++i;
temp = temp->_next;
}
for(i=0; i<s; i++)
{
free(chunk[i]);
}
free(chunk);
_singleton_destroyed = true;
_instance = 0;
}
CMemPool::memory_list *CMemPool::alloc_chunk(size_t idx)
{
thread_guard guard(&_chunk_guard);
memory_list *¤t_list = _vnode[idx]._free_list;
if(current_list)
return current_list;
const size_t node_size = _vnode[idx]._size;
const size_t number = _vnode[idx]._number;
const size_t chunk_size = node_size * number;
memory_list *ret = current_list = reinterpret_cast<memory_list *>(malloc(chunk_size));
memory_list *iter = ret;
//for(size_t i=0; i<=chunk_size-node_size*2; i+=node_size)
for(size_t i=0; i<number-1; ++i)
{
iter = iter->_next = iter+node_size/sizeof(*iter);
}
iter->_next = 0;
return ret;
}
void *CMemPool::allocate(size_t size, size_t *psize/*=NULL*/)
{
int idx = chunk_index(size);
if(idx < 0)
{
if(psize) *psize = size;
return malloc(size);
}
if(psize)
*psize = _vnode[idx]._size;
thread_guard guard(&_vnode[idx]._guard);
memory_list *&temp = _vnode[idx]._free_list;
if(!temp)
{
memory_list *new_chunk = alloc_chunk(idx);
chunk_list *chunk_node;
if(chunk_index(sizeof(chunk_list))==idx)
{
chunk_node = reinterpret_cast<chunk_list *>(temp);
temp = temp->_next;
}
else
{
chunk_node = reinterpret_cast<chunk_list *>(allocate(sizeof(chunk_list)));
}
thread_guard guard(&_chunk_guard);
chunk_node->_next = _chunk_list;
chunk_node->_data = new_chunk;
chunk_node->_size = _vnode[idx]._bksize;
chunk_node->_idx = idx;
_chunk_list = chunk_node;
}
void *ret = temp;
temp = temp->_next;
return ret;
}
void CMemPool::deallocate(void *p, size_t size)
{
int idx = chunk_index(size);
if(idx < 0)
{
free(p);
}
else
{
memory_list *free_block = reinterpret_cast<memory_list *>(p);
thread_guard guard(&_vnode[idx]._guard);
memory_list *&temp = _vnode[idx]._free_list; //_free_list[idx];
free_block->_next = temp;
temp = free_block;
}
}
以上基本是云风内存池的一个简单修改版,这种模式的内存池由于每次分配释放都要lock unlock,所以效率很低,大概只相当于malloc/free 2-4倍的速度,提速也不是很明显,大概相当于nedmalloc速度的一半左右。
发表于 @ 2010年02月04日 16:54:00 | | 编辑| 举报| 收藏