再参考了《Modern C++ Design》的FixedAllocator的设计,并且优化了一下算法,虽然最坏时间复杂度还是O(N)的,但是在通常情况下,new/delete的使用已经获得了比较好的性能了。
Chunk.h和version1.0的差不多,只是去掉了析构函数,让Chunk直接被FixedAlloctor操作
Chunk.h
#ifndef CHUNK_H
#define CHUNK_H
#include <cassert>
struct Chunk {
//初始化一个Chunk
void init(size_t blockSize, unsigned char blocks);
//分配一个block
void* allocate(size_t blockSize);
//回收一个block
void deallocate(void* p, size_t blockSize);
//Chunk的起始地址
unsigned char* pData_;
//第一个可用的block
unsigned char firstAvailableBlock_;
//block的块数
unsigned char blocksAvailable_;
};
void Chunk::init(size_t blockSize, unsigned char blocks) {
//从操作系统申请一个Chunk的内存
pData_ = new unsigned char[blockSize * blocks];
firstAvailableBlock_ = 0;
blocksAvailable_ = blocks;
unsigned char *p = pData_;
//每个可用的block存放它下一个可用的block的编号
for (unsigned char i = 1; i < blocks; i++, p += blockSize) {
*p = i;
}
}
void* Chunk::allocate(size_t blockSize) {
if (blocksAvailable_ == 0) return 0;
unsigned char* pRet = pData_ + (firstAvailableBlock_ * blockSize);
//更新第一个可用的block
firstAvailableBlock_ = *pRet;
blocksAvailable_--;
return pRet;
}
void Chunk::deallocate(void* p, size_t blockSize) {
//判断回收的地址是否合法
assert(p >= pData_);
unsigned char* toRel = static_cast<unsigned char*>(p);
//判断是否合法
assert((toRel - pData_) % blockSize == 0);
*toRel = firstAvailableBlock_;
firstAvailableBlock_ = static_cast<unsigned char>((toRel - pData_) / blockSize);
//判断是否产生精度误差
assert(firstAvailableBlock_ == ((toRel - pData_) / blockSize));
blocksAvailable_++;
}
#endif
FixedAllocator.h
毕竟工程还是工程,很多极端数据都是很难用到的,这个与用户使用习惯有关,对于常用的使用new的习惯(连续new,连续delete,连续new/delete)该FixedAllocator都有比较好的性能。
#ifndef FIXEDALLOCATOR_H
#define FIXEDALLOCATOR_H
#include "Chunk.h"
#include <vector>
using namespace std;
class FixedAllocator {
public :
FixedAllocator(size_t blockSize);
~FixedAllocator();
//分配内存
void* allocate();
//回收内存
void deallocate(void* p);
private :
//每个Chunk所含的Block数
static const int BLOCKS;
//block大小
size_t blockSize_;
//该FixedAllocator所含的Chunks
vector<Chunk> chunks_;
//最后一个被用于分配空间的Chunk
Chunk* lastAllocChunk_;
//最后一个被用于释放空间的Chunk
Chunk* lastDeallocChunk_;
//被使用过的Chunks的数
int numOfUsedChunk_;
//判断p是否属于某个chunk
bool isPtrInChunk(void* p, Chunk* chunk);
};
const int FixedAllocator::BLOCKS = 255;
FixedAllocator::FixedAllocator(size_t blockSize)
: blockSize_(blockSize), lastAllocChunk_(0), lastDeallocChunk_(0), numOfUsedChunk_(0) {}
FixedAllocator::~FixedAllocator() {
vector<Chunk>::iterator it = chunks_.begin();
for (; it != chunks_.end(); it++) {
delete[] it->pData_;
}
}
void* FixedAllocator::allocate() {
if (!lastAllocChunk_ || !lastAllocChunk_->blocksAvailable_) {
//该Chunk不可用,需要搜索新的Chunk,deallocate保证只有vector中的最后一个块为全空
bool noBlock = true;
if (numOfUsedChunk_ < chunks_.size()) {
vector<Chunk>::reverse_iterator it = chunks_.rbegin();
for (; it != chunks_.rend(); it++) {
if (it->blocksAvailable_ > 0) {
lastAllocChunk_ = &*it;
noBlock = false;
break;
}
}
}
if (noBlock) {
//没有可用Chunk,必须新增一个块
numOfUsedChunk_++;
if (chunks_.size()+1 > chunks_.capacity()) {
chunks_.reserve(chunks_.capacity() + 1000);
}
Chunk newChunk;
newChunk.init(blockSize_, BLOCKS);
chunks_.push_back(newChunk);
lastAllocChunk_ = &chunks_.back();
lastDeallocChunk_ = &chunks_.back();
}
}
assert(lastAllocChunk_ != 0);
assert(lastAllocChunk_->blocksAvailable_ > 0);
return lastAllocChunk_->allocate(blockSize_);
}
inline bool FixedAllocator::isPtrInChunk(void* p, Chunk* chunk) {
return (p >= chunk->pData_) && (p < chunk->pData_ + (blockSize_ * BLOCKS));
}
void FixedAllocator::deallocate(void* p) {
vector<Chunk>::iterator pChunkToRelease;
if (!isPtrInChunk(p, lastDeallocChunk_)) {
//要释放的空间不在lastDeallocChunk中
//从lastDeallocChunk开始up和down方向查找
vector<Chunk>::iterator up = lastDeallocChunk_+1;
vector<Chunk>::iterator down = lastDeallocChunk_;
vector<Chunk>::iterator begVec = chunks_.begin();
vector<Chunk>::iterator endVec = chunks_.end();
int t = 0;
while (down != begVec || up != endVec) {
t ^= 1;
if (up == endVec) t = 0;
if (down == begVec) t = 1;
if (t) {
//up方向
if (up != endVec) {
if (isPtrInChunk(p, up)) {
pChunkToRelease = up;
break;
}
up++;
}
} else {
//down方向
if (down != begVec) {
down--;
if (isPtrInChunk(p, down)) {
pChunkToRelease = down;
break;
}
}
}
}
} else {
pChunkToRelease = lastDeallocChunk_;
}
assert(&*pChunkToRelease != 0);
pChunkToRelease->deallocate(p, blockSize_);
lastDeallocChunk_ = pChunkToRelease;
if (pChunkToRelease->blocksAvailable_ == BLOCKS) {
//该块已经空
numOfUsedChunk_--;
Chunk* it = &chunks_.back();
if (it->blocksAvailable_ == BLOCKS) {
//若vector末尾的chunk已是空块,直接把pChunkToRelease删除
if (it != pChunkToRelease) {
delete[] pChunkToRelease->pData_;
chunks_.erase(pChunkToRelease);
}
} else {
//若vector末尾的chunk非空,把pChunkToRelease移到vector末尾
Chunk tmp(*pChunkToRelease);
chunks_.erase(pChunkToRelease);
chunks_.push_back(tmp);
}
lastDeallocChunk_ = &chunks_.front();
}
}
#endif
MemPool.h
比起1.0少了很多代码,这个是当然的,因为很多细节都被封装在FixedAllocator里面,而且还用了STL的vector,代码又减少了许多,但是测试时候发现vector貌似比自己写的list慢了点,估计原因是那个erase在list里面是O(1)但是vector是O(n)的。
#ifndef MEMPOOL_H
#define MEMPOOL_H
#include "FixedAllocator.h"
class MemPool {
public :
MemPool(size_t blockSize) : blockSize_(blockSize), allocator_(new FixedAllocator(blockSize)) {}
~MemPool() {
delete allocator_;
}
void* alloc(size_t size) {
if (size != blockSize_) {
return ::operator new(size);
}
return allocator_->allocate();
}
void free(void* p, size_t size) {
if (!p) return ;
if (size != blockSize_) {
::operator delete(p);
}
allocator_->deallocate(p);
}
private :
FixedAllocator* allocator_;
size_t blockSize_;
};
#endif
test.cpp
#include <iostream>
#include "FixedAllocator.h"
#include "MemPool.h"
#include "time.h"
using namespace std;
class TestClassA {
public :
int a;
static void* operator new(size_t size);
static void operator delete(void *p, size_t size);
static MemPool memPool;
};
inline void* TestClassA::operator new(size_t size) {
return memPool.alloc(size);
}
inline void TestClassA::operator delete(void* p, size_t size) {
memPool.free(p, size);
}
MemPool TestClassA::memPool(sizeof(TestClassA));
class TestClassB {
public :
int b;
};
const int CTIMES = 1000000;
TestClassA* pa[CTIMES];
TestClassB* pb[CTIMES];
int main() {
//测试新建1000000个SmallObjet所需要的时间
int i;
clock_t begA, begB, endA, endB;
begB = clock();
for (i=0; i<CTIMES; i++) {
pb[i] = new TestClassB;
}
endB = clock();
printf("Not Used MemPool Time For New = %d ms\n", endB - begB);
begA = clock();
for (i=0; i<CTIMES; i++) {
pa[i] = new TestClassA;
}
endA = clock();
printf("Used MemPool Time For New = %d ms\n", endA - begA);
begB = clock();
for (i=CTIMES-1; i>=0; i--) {
delete pb[i];
}
endB = clock();
printf("Not Used MemPool Time For Delete = %d ms\n", endB - begB);
begA = clock();
for (i=0; i<CTIMES; i++) {
delete pa[i];
}
endA = clock();
printf("Used MemPool Time For Delete = %d ms\n", endA - begA);
begB = clock();
for (i=0; i<CTIMES; i++) {
pb[i] = new TestClassB;
delete pb[i];
}
endB = clock();
printf("Not Used MemPool Time For New/Delete = %d ms\n", endB - begB);
begA = clock();
for (i=0; i<CTIMES; i++) {
pa[i] = new TestClassA;
delete pa[i];
}
endA = clock();
printf("Used MemPool Time For New/Delete = %d ms\n", endA - begA);
return 0;
}
测试结果如下:
Not Used MemPool Time For New = 360 ms
Used MemPool Time For New = 156 ms
Not Used MemPool Time For Delete = 531 ms
Used MemPool Time For Delete = 266 ms
Not Used MemPool Time For New/Delete = 906 ms
Used MemPool Time For New/Delete = 344 ms
Press any key to continue
明显比version1.0好很多,接着准备写versiong1.2,希望获得更好的封装,同时优化再优化FixedAllocator的算法。还有推荐大家看《Modern C++ Design》这本书,真的很好。
posted on 2008-04-21 16:15
豪 阅读(3271)
评论(3) 编辑 收藏 引用 所属分类:
C++之梦