使用共享内存的多级哈希表的一种实现

在一个服务程序运行的时候，它往往要把数据写入共享内存以便在进城需要重新启动的时候可以直接从共享内存中读取数据，另一方面，在服务进程因某种原因挂掉的时候，共享内存中的数据仍然存在，这样就可以减少带来的损失。关于共享内存的内容请google之，在这里，实现了一种在共享内存中存取数据的hash表，它采用了多级存储求模取余的方法，具体内容请看以下代码：
http://lmlf001.blog.sohu.com/

//hash_shm.h
#ifndef _STORMLI_HASH_SHM_H_
#define _STORMLI_HASH_SHM_H_

#include<iostream>
#include<cstdlib>
#include<cmath>
#include<sys/shm.h>
using namespace std;

template<typename valueType,unsigned long maxLine,int lines>
class hash_shm
{
public:
    int find(unsigned long _key);    //if _key in the table,return 0,and set lastFound the position,otherwise return -1
    int remove(unsigned long _key);    //if _key not in the table,return-1,else remove the node,set the node key 0 and return 0

    //insert node into the table,if the _key exists,return 1,if insert success,return 0;and if fail return -1
    int insert(unsigned long _key,const valueType &_value);
    void clear();        //remove all the data

public:    //some statistic function
    double getFullRate()const;        //the rate of the space used

public:
    //constructor,with the share memory start position and the space size,if the space is not enough,the program will exit
    hash_shm(void *startShm,unsigned long shmSize=sizeof(hash_node)*maxLine*lines);

    //constructor,with the share memory key,it will get share memory,if fail,exit
    hash_shm(key_t shm_key);
    ~hash_shm(){}    //destroy the class
private:
    void *mem;        //the start position of the share memory  // the mem+memSize  space used to storage the runtime data:currentSize
    unsigned long memSize;    //the size of the share memory
    unsigned long modTable[lines];    //modtable,the largest primes
    unsigned long maxSize;        //the size of the table
    unsigned long *currentSize;    //current size of the table ,the pointer of the shm mem+memSize
    void *lastFound;        //write by the find function,record the last find place

    struct hash_node{        //the node of the hash table
        unsigned long key;    //when key==0,the node is empty
        valueType value;    //name-value pair
    };
private:
    bool getShm(key_t shm_key);    //get share memory,used by the constructor
    void getMode();        //get the largest primes blow maxLine,use by the constructor
    void *getPos(unsigned int _row,unsigned long _col);//get the positon with the (row,col)
};

template<typename vT,unsigned long maxLine,int lines>
hash_shm<vT,maxLine,lines>::hash_shm(void *startShm,unsigned long shmSize)
{
    if(startShm!=NULL){
        cerr<<"Argument error

\n Please check the shm address

\n";
        exit(-1);
    }
    getMode();
    maxSize=0;
    int i;
    for(i=0;i<lines;i++)    //count the maxSize
        maxSize+=modTable[i];
    if(shmSize<sizeof(hash_node)*(maxSize+1)){    //check the share memory size
        cerr<<"Not enough share memory space

\n";
        exit(-1);
    }
    memSize=shmSize;
    if(*(currentSize=(unsigned long *)((long)mem+memSize))<0)
        *currentSize=0;;
}

template<typename vT,unsigned long maxLine,int lines>
hash_shm<vT,maxLine,lines>::hash_shm(key_t shm_key)
{    //constructor with get share memory
    getMode();
    maxSize=0;
    for(int i=0;i<lines;i++)
        maxSize+=modTable[i];
    memSize=sizeof(hash_node)*maxSize;
    if(!getShm(shm_key)){
        exit(-1);
    }
//    memset(mem,0,memSize);
    if(*(currentSize=(unsigned long *)((long)mem+memSize))<0)
        *currentSize=0;
}

template<typename vT,unsigned long maxLine,int lines>
int hash_shm<vT,maxLine,lines>::find(unsigned long _key)
{
    unsigned long hash;
    hash_node *pH=NULL;
    for(int i=0;i<lines;i++)
    {
        hash=(_key+maxLine)%modTable[i];    //calculate the col position
        pH=(hash_node *)getPos(i,hash);
//        if(pH==NULL)return -2;    //almost not need
        if(pH->key==_key){
            lastFound=pH;
            return 0;
        }
    }
    return -1;
}

template<typename vT,unsigned long maxLine,int lines>
int hash_shm<vT,maxLine,lines>::remove(unsigned long _key)
{
    if(find(_key)==-1)return -1;    //not found
    hash_node *pH=(hash_node *)lastFound;
    pH->key=0;        //only set the key 0
    (*currentSize)--;
    return 0;
}

template<typename vT,unsigned long maxLine,int lines>
int hash_shm<vT,maxLine,lines>::insert(unsigned long _key,const vT &_value)
{
    if(find(_key)==0)return 1;    //if the key exists
    unsigned long hash;
    hash_node *pH=NULL;
    for(int i=0;i<lines;i++){
        hash=(_key+maxLine)%modTable[i];
        pH=(hash_node *)getPos(i,hash);
        if(pH->key==0){        //find the insert position,insert the value
            pH->key=_key;
            pH->value=_value;
            (*currentSize)++;
            return 0;
        }
    }
    return -1;    //all the appropriate position filled
}

template<typename vT,unsigned long maxLine,int lines>
void hash_shm<vT,maxLine,lines>::clear()
{
    memset(mem,0,memSize);
    *currentSize=0;
}

template<typename vT,unsigned long maxLine,int lines>
bool hash_shm<vT,maxLine,lines>::getShm(key_t shm_key)
{
    int shm_id=shmget(shm_key,memSize,0666);
    if(shm_id==-1)    //check if the shm exists
    {
        shm_id=shmget(shm_key,memSize,0666|IPC_CREAT);//create the shm
        if(shm_id==-1){
            cerr<<"Share memory get failed

\n";
            return false;
        }
    }
    mem=shmat(shm_id,NULL,0);    //mount the shm
    if(int(mem)==-1){
        cerr<<"shmat system call failed

\n";
        return false;
    }
    return true;
}

template<typename vT,unsigned long maxLine,int lines>
void hash_shm<vT,maxLine,lines>::getMode()
{        //采用 6n+1 6n-1 素数集中原理
    if(maxLine<5){exit(-1);}

    unsigned long t,m,n,p;
    int i,j,a,b,k;
    int z=0;

    for(t=maxLine/6;t>=0,z<lines;t--)
    {
        i=1;j=1; k=t%10;
        m=6*t;                                        /**i,j的值是是否进行验证的标志也是对应的6t-1和6t+1的素性标志**/
        if(((k-4)==0)||((k-9)==0)||((m+1)%3==0))j=0;/*此处是简单验证6*t-1,6*t+1 是不是素数，借以提高素数纯度**/
        if(((k-6)==0)||((m-1)%3==0))i=0;            /***先通过初步判断去除末尾是5，及被3整除的数***/
        for(p=1;p*6<=sqrt(m+1)+2;p++ )
        {
            n=p*6;                                    /**将6*p-1和6*p+1看作伪素数来试除*****/
            k=p%10;
            a=1;b=1;                                /**同样此处a,b的值也是用来判断除数是否为素数提高除数的素数纯度**/
            if(((k-4)==0)||((k-9)==0))a=0;
            if(((k-6)==0))b=0;
            if(i){                            /*如果i非零就对m-1即所谓6*t-1进行验证，当然还要看除数n+1,n-1,素性纯度*/
                if(a){if((m-1)%(n+1)==0)i=0;}        /***一旦被整除就说明不是素数故素性为零即将i 赋值为零***/
                if(b){if((m-1)%(n-1)==0)i=0;}
            }
            if(j){                           /**如果j非零就对m+1即所谓6*t+1进行验证，当然还要看除数n+1,n-1,素性纯度*/
                if(a){if((m+1)%(n+1)==0)j=0;}         /***一旦被整除就说明不是素数故素性为零即将j 赋值为零***/
                if(b){if((m+1)%(n-1)==0)j=0;}
            }
            if((i+j)==0)break;                     /**如果已经知道6*t-1,6*t+1都不是素数了那就结束试除循环***/
        }
        if(j){modTable[z++]=m+1;if(z>= lines)return;}
        if(i){modTable[z++]=m-1;if(z>= lines)return;}
    }
}

template<typename vT,unsigned long maxLine,int lines>
void *hash_shm<vT,maxLine,lines>::getPos(unsigned int _row,unsigned long _col)
{
    unsigned long pos=0UL;
    for(int i=0;i<_row;i++)    //calculate the positon from the start
        pos+=modTable[i];
    pos+=_col;
    if(pos>=maxSize)return NULL;
    return (void *)((long)mem+pos*sizeof(hash_node));
}

template<typename vT,unsigned long maxLine,int lines>
double hash_shm<vT,maxLine,lines>::getFullRate()const
{
    return double(*currentSize)/maxSize;
}

#endif

//test.cpp

#include"hash_shm.h"
#include<cstdlib>
using namespace std;
int main()
{
    hash_shm<int,1000,100> ht(key_t(999));
    double rate=0.0;
//    ht.clear();
    for(int i=0;i<100;i++){
        srand(time(NULL)+i);
        while(true){
            if(ht.insert(rand(),0)==-1)break;
        }
        cout<<ht.getFullRate()<<endl;
        rate+=ht.getFullRate();
        ht.clear();
    }
    cout<<"\n\n\n";
    cout<<rate/100<<endl;
}

这段代码作测试的时候发现了一些问题，用gprof查看函数时间的时候发现，getPos函数占用了大部分的执行时间，始主要的性能瓶颈，后来又新设立了一个数组，用来记录每行开始时的位置，性能提高了很多，改动部分的代码如下：

template<typename valueType,unsigned long maxLine,int lines>
class hash_shm
{
private:
    void *mem;        //the start position of the share memory  // the mem+memSize  space used to storage the runtime data:currentSize
    unsigned long memSize;    //the size of the share memory
    unsigned long modTable[lines];    //modtable,the largest primes
    unsigned long modTotal[lines];    //modTotal[i] is the summary of the modTable when x<=i
                    //used by getPos to improve the performance
    ...
};

template<typename vT,unsigned long maxLine,int lines>
hash_shm<vT,maxLine,lines>::hash_shm(void *startShm,unsigned long shmSize)
{
    ...

    int i;
    for(i=0;i<lines;i++){    //count the maxSize
        maxSize+=modTable[i];
        if(i!=0)modTotal[i]=modTotal[i-1]+modTable[i-1];
        else modTotal[i]=0;    //caculate the modTotal
    }
    ...
}

template<typename vT,unsigned long maxLine,int lines>
hash_shm<vT,maxLine,lines>::hash_shm(key_t shm_key)
{    //constructor with get share memory
    getMode();
    maxSize=0;
    for(int i=0;i<lines;i++){
        maxSize+=modTable[i];
        if(i!=0)modTotal[i]=modTotal[i-1]+modTable[i-1];
        else modTotal[i]=0;
    }
    ...
}

template<typename vT,unsigned long maxLine,int lines>
void *hash_shm<vT,maxLine,lines>::getPos(unsigned int _row,unsigned long _col)
{
    unsigned long pos=_col+modTotal[_row];
    //for(int i=0;i<_row;i++)    //calculate the positon from the start
    //    pos+=modTable[i];
    if(pos<maxSize)
        return (void *)((long)mem+pos*sizeof(hash_node));
    return NULL;
}

新增了一个用于遍历的函数foreach

template<typename vT,unsigned long maxLine,int lines>
void hash_shm<vT,maxLine,lines>::foreach(void (*fn)(unsigned long _key,vT &_value))
{
    typedef  unsigned long u_long;
    u_long beg=(u_long)mem;
    u_long end=(u_long)mem+sizeof(hash_node)*(modTable[lines-1]+modTotal[lines-1]);
    hash_node *p=NULL;
    for(u_long pos=beg;pos<end;pos+=sizeof(hash_node))
    {
        p=(hash_node *)pos;
        if(p->key!=0)fn(p->key,p->value);
    }
}

为了利于使用新增一个用于查找的函数find,该函数同find(_key)类似，如果找到_key节点，把它赋给_value以返回

int find(unsigned long _key,vT &_value);

posted on 2007-09-08 21:17 芥之舟阅读(7673) 评论(3) 编辑收藏引用所属分类: 数据结构和算法

# re: 使用共享内存的多级哈希表的一种实现 2012-06-16 16:44 ydsec

不错，能发进一步改进，value如果是一个结构体，如何实现呢？内存用格式表示数据回复更多评论

# re: 使用共享内存的多级哈希表的一种实现 2014-08-28 13:14 null

if(*(currentSize=(unsigned long *)((long)mem+memSize))<0)
这一句不是很明白，回复更多评论

# re: 使用共享内存的多级哈希表的一种实现 2014-10-13 10:51 abc

p*6<=sqrt(m+1)+2 这个是什么原理？回复更多评论

刷新评论列表

只有注册用户登录后才能发表评论。
【推荐】100%开源！大型工业跨平台软件C++源码提供，建模，组态！

相关文章: 使用共享内存的多级哈希表的一种实现哈希表的一个实现素数算法

网站导航: 博客园 IT新闻 BlogJava 博问 Chat2DB 管理

# re: 使用共享内存的多级哈希表的一种实现 2012-06-16 16:44 ydsec

# re: 使用共享内存的多级哈希表的一种实现 2014-08-28 13:14 null

# re: 使用共享内存的多级哈希表的一种实现 2014-10-13 10:51 abc

洗尘斋

导航

公告

留言簿(6)

随笔分类(41)

随笔档案(40)

阅读排行榜

评论排行榜

常用链接

统计

最新评论

使用共享内存的多级哈希表的一种实现

评论