redis memory

这一节介绍redis中内存分配相关的api，下一节介绍redis内存使用过程中的一些细节。

redis中所有的内存分配都由自己接管。主要由zmalloc.c和zmalloc.h中的zmalloc、zremalloc、zfree实现。

static void zmalloc_oom(size_t size) {
#ifdef _WIN32
    fprintf(stderr, "zmalloc: Out of memory trying to allocate %llu bytes\n",
        (unsigned long long)size);
#else
    fprintf(stderr, "zmalloc: Out of memory trying to allocate %zu bytes\n",
        size);
#endif
    fflush(stderr);
    abort();
}

void *zmalloc(size_t size) {
    void *ptr = malloc(size+PREFIX_SIZE);

    if (!ptr) zmalloc_oom(size);
#ifdef HAVE_MALLOC_SIZE
    update_zmalloc_stat_alloc(zmalloc_size(ptr),size);
    return ptr;
#else
    *((size_t*)ptr) = size;
    update_zmalloc_stat_alloc(size+PREFIX_SIZE,size);
    return (char*)ptr+PREFIX_SIZE;
#endif
}

void *zcalloc(size_t size) {
    void *ptr = calloc(1, size+PREFIX_SIZE);

    if (!ptr) zmalloc_oom(size);
#ifdef HAVE_MALLOC_SIZE
    update_zmalloc_stat_alloc(zmalloc_size(ptr),size);
    return ptr;
#else
    *((size_t*)ptr) = size;
    update_zmalloc_stat_alloc(size+PREFIX_SIZE,size);
    return (char*)ptr+PREFIX_SIZE;
#endif
}

void *zrealloc(void *ptr, size_t size) {
#ifndef HAVE_MALLOC_SIZE
    void *realptr;
#endif
    size_t oldsize;
    void *newptr;

    if (ptr == NULL) return zmalloc(size);
#ifdef HAVE_MALLOC_SIZE
    oldsize = zmalloc_size(ptr);
    newptr = realloc(ptr,size);
    if (!newptr) zmalloc_oom(size);

    update_zmalloc_stat_free(oldsize);
    update_zmalloc_stat_alloc(zmalloc_size(newptr),size);
    return newptr;
#else
    realptr = (char*)ptr-PREFIX_SIZE;
    oldsize = *((size_t*)realptr);
    newptr = realloc(realptr,size+PREFIX_SIZE);
    if (!newptr) zmalloc_oom(size);

    *((size_t*)newptr) = size;
    update_zmalloc_stat_free(oldsize);
    update_zmalloc_stat_alloc(size,size);
    return (char*)newptr+PREFIX_SIZE;
#endif
}
void zfree(void *ptr) {
#ifndef HAVE_MALLOC_SIZE
    void *realptr;
    size_t oldsize;
#endif

    if (ptr == NULL) return;
#ifdef HAVE_MALLOC_SIZE
    update_zmalloc_stat_free(zmalloc_size(ptr));
    free(ptr);
#else
    realptr = (char*)ptr-PREFIX_SIZE;
    oldsize = *((size_t*)realptr);
    update_zmalloc_stat_free(oldsize+PREFIX_SIZE);
    free(realptr);
#endif
}

可以看到，系统中除了分配请求大小的内存外，还在该内存块头部保存了该内存块的大小，这样，释放的时候可以通过该大小找到该内存块的起始位置.
另外对于 apple系统，可以用malloc_size（redis_malloc_size是对它的封装）取得指针所指向的内存块大小，因此就不需要手动保存大小了。
介绍zmalloc/zfree函数时，我们看到redis会调用increment_used_memory/decrement_used_memory，这两个宏其实就是对static变量used_memory进行指定大小的增加/减少，该变量保存了redis所使用的内存大小
分配内存的线程安全性是由底层系统的malloc系列函数来保证的，而used_memory变量的线程安全性取决于线程锁zmalloc_thread_safe的值。由于redis处理客户端连接是单进程单线程事件多路循环的，那么就有一个疑问，redis在什么情况下，需要多线程保护了？在后续的介绍中，我们会看到，redis的虚拟内存（VM）可能启用多线程。
我们来看看zmalloc_thread_safe 的相关处理。zmalloc_thread_safe 初始值为0，仅在zmalloc_enable_thread_safeness函数中改为1，而zmalloc_enable_thread_safeness仅在全局变量server.vm_max_threads!=0（也即vm启用多线程时），才在vmInit中调用。
内存相关的函数，除了zmalloc、zremalloc、zfree等直接操作内存外，常用的就是使用zmalloc_used_memory返回已分配的内存大小了，redis对内存的监控仅限于此。
redis在某些位置输出log时需要调用该函数输出已分配的内存大小外，比如serverCron中的调用： int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {

    int j, loops = server.cronloops;
    REDIS_NOTUSED(eventLoop);
    REDIS_NOTUSED(id);
    REDIS_NOTUSED(clientData);

    /* We take a cached value of the unix time in the global state because
     * with virtual memory and aging there is to store the current time
     * in objects at every object access, and accuracy is not needed.
     * To access a global var is faster than calling time(NULL) */
    server.unixtime = time(NULL);

    /* We have just 22 bits per object for LRU information.
     * So we use an (eventually wrapping) LRU clock with 10 seconds resolution.
     * 2^22 bits with 10 seconds resoluton is more or less 1.5 years.
     *
     * Note that even if this will wrap after 1.5 years it's not a problem,
     * everything will still work but just some object will appear younger
     * to Redis. But for this to happen a given object should never be touched
     * for 1.5 years.
     *
     * Note that you can change the resolution altering the
     * REDIS_LRU_CLOCK_RESOLUTION define.
     */
    updateLRUClock();

    /* Record the max memory used since the server was started. */
    if (zmalloc_used_memory() > server.stat_peak_memory)
        server.stat_peak_memory = zmalloc_used_memory();

    /* We received a SIGTERM, shutting down here in a safe way, as it is
     * not ok doing so inside the signal handler. */
    if (server.shutdown_asap) {
        if (prepareForShutdown() == REDIS_OK) exit(0);
        redisLog(REDIS_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
    }

    /* Show some info about non-empty databases */
    for (j = 0; j < server.dbnum; j++) {
        long long size, used, vkeys;

        size = dictSlots(server.db[j].dict);
        used = dictSize(server.db[j].dict);
        vkeys = dictSize(server.db[j].expires);
        if (!(loops % 50) && (used || vkeys)) {
            redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
            /* dictPrintStats(server.dict); */
        }
    }

    /* We don't want to resize the hash tables while a bacground saving
     * is in progress: the saving child is created using fork() that is
     * implemented with a copy-on-write semantic in most modern systems, so
     * if we resize the HT while there is the saving child at work actually
     * a lot of memory movements in the parent will cause a lot of pages
     * copied. */
    if (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1) {
        if (!(loops % 10)) tryResizeHashTables();
        if (server.activerehashing) incrementallyRehash();
    }

    /* Show information about connected clients */
    if (!(loops % 50)) {
#ifdef _WIN32
        redisLog(REDIS_VERBOSE,"%d clients connected (%d slaves), %llu bytes in use",
            listLength(server.clients)-listLength(server.slaves),
            listLength(server.slaves),
            (unsigned long long)zmalloc_used_memory());
#else
        redisLog(REDIS_VERBOSE,"%d clients connected (%d slaves), %zu bytes in use",
            listLength(server.clients)-listLength(server.slaves),
            listLength(server.slaves),
            zmalloc_used_memory());
#endif
    }

    /* Close connections of timedout clients */
    if ((server.maxidletime && !(loops % 100)) || server.bpop_blocked_clients)
        closeTimedoutClients();

    /* Start a scheduled AOF rewrite if this was requested by the user while
     * a BGSAVE was in progress. */
    if (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1 &&
        server.aofrewrite_scheduled)
    {
        rewriteAppendOnlyFileBackground();
    }

    /* Check if a background saving or AOF rewrite in progress terminated */
    if (server.bgsavechildpid != -1 || server.bgrewritechildpid != -1) {
        int statloc;
        pid_t pid;

        if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
            if (pid == server.bgsavechildpid) {
                backgroundSaveDoneHandler(statloc);
            } else {
                backgroundRewriteDoneHandler(statloc);
            }
            updateDictResizePolicy();
        }
    } else {
         time_t now = time(NULL);

        /* If there is not a background saving in progress check if
         * we have to save now */
         for (j = 0; j < server.saveparamslen; j++) {
            struct saveparam *sp = server.saveparams+j;

            if (server.dirty >= sp->changes &&
                now-server.lastsave > sp->seconds) {
                redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving

",
                    sp->changes, sp->seconds);
                rdbSaveBackground(server.dbfilename);
#ifdef _WIN32
                /* On windows this will save in foreground and block */
                /* Here we are allready saved, and we should return */
                return 100;
#else
                break;
#endif
            }
         }

         /* Trigger an AOF rewrite if needed */
         if (server.bgsavechildpid == -1 &&
             server.bgrewritechildpid == -1 &&
             server.auto_aofrewrite_perc &&
             server.appendonly_current_size > server.auto_aofrewrite_min_size)
         {
            long long base = server.auto_aofrewrite_base_size ?
                            server.auto_aofrewrite_base_size : 1;
            long long growth = (server.appendonly_current_size*100/base) - 100;
            if (growth >= server.auto_aofrewrite_perc) {
                redisLog(REDIS_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
                rewriteAppendOnlyFileBackground();
            }
        }
    }

    /* If we postponed an AOF buffer flush, let's try to do it every time the
     * cron function is called. */
    if (server.aof_flush_postponed_start) flushAppendOnlyFile(0);

    /* Expire a few keys per cycle, only if this is a master.
     * On slaves we wait for DEL operations synthesized by the master
     * in order to guarantee a strict consistency. */
    if (server.masterhost == NULL) activeExpireCycle();

    /* Replication cron function -- used to reconnect to master and
     * to detect transfer failures. */
    if (!(loops % 10)) replicationCron();

    server.cronloops++;
    return 100;
}

used_memory变量保存了redis当前所使用的内存。其值常用来跟server.vm_max_memory、server.maxmemory进行比较。vm_max_memory表示redis vm启动swap的内存阈值，在超过该值后应启动vm的swap操作；maxmemory表示redis允许分配的最大内存，在超过该值后应进行内存的释放。这些比较主要在rdbLoad、loadAppendOnlyFile、serverCron、processCommand、vmThreadedIOCompletedJob等函数中。值得注意的是，尽管redis会尽量将内存使用量降低到server.maxmemory（甚至server.vm_max_memory）之下，但并不对此保证。

接下来的一节我们分析下rdbLoad、loadAppendOnlyFile、serverCron、processCommand、vmThreadedIOCompletedJob的内存检查策略。

在rdbLoad、loadAppendOnlyFile（分别代表以快照、aof方式进行数据的持久化后db的加载）中会检查vm_max_memory。超过vm_max_memory后，会先调用vmSwapOneObjectBlocking swap值到vm中，若一直到vmSwapOneObjectBlocking返回出错时，内存使用量还是超过vm_max_memory，则置swap_all_values为1，这样后面加载的数据都直接使用vmSwapObjectBlocking被swap到vm中，至于vmSwapOneObjectBlocking、vmSwapObjectBlocking怎么实现的（二者都是阻塞方式），我们在vm章节中再做详细分析。当然，在这两个函数中，对vm_max_memory的比较有所放宽，也就是只有比vm_max_memory多32M字节时，才进行swap的操作。从这里也可以看出，加载db时并不和server.maxmemory进行比较。此时，若超过最大内存限制，redis此时不管，也许加载时直接down掉（超过可用内存），或者等到加载完后运行到后面介绍的释放过程再进行释放。当然，检查vm_max_memory，并调用vmSwapOneObjectBlocking等函数是否起作用，还要看是否开启vm机制

int rdbLoad(char *filename) {
    FILE *fp;
    uint32_t dbid;
    int type, rdbver;
    redisDb *db = server.db+0;
    char buf[1024];
    time_t expiretime, now = time(NULL);
    long loops = 0;

#ifdef _WIN32
    fp = fopen(filename,"rb");
#else
    fp = fopen(filename,"r");
#endif
    if (!fp) {
        errno = ENOENT;
        return REDIS_ERR;
    }
    if (fread(buf,9,1,fp) == 0) goto eoferr;
    buf[9] = '\0';
    if (memcmp(buf,"REDIS",5) != 0) {
        fclose(fp);
        redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
        errno = EINVAL;
        return REDIS_ERR;
    }
    rdbver = atoi(buf+5);
    if (rdbver < 1 || rdbver > 2) {
        fclose(fp);
        redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
        errno = EINVAL;
        return REDIS_ERR;
    }

    startLoading(fp);
    while(1) {
        robj *key, *val;

        expiretime = -1;

        /* Serve the clients from time to time */
        if (!(loops++ % 1000)) {
            loadingProgress((off_t)ftello(fp));
            aeProcessEvents(server.el, AE_FILE_EVENTS|AE_DONT_WAIT);
        }

        /* Read type. */
        if ((type = rdbLoadType(fp)) == -1) goto eoferr;
        if (type == REDIS_EXPIRETIME) {
            if ((expiretime = rdbLoadTime(fp)) == -1) goto eoferr;
            /* We read the time so we need to read the object type again */
            if ((type = rdbLoadType(fp)) == -1) goto eoferr;
        }
        if (type == REDIS_EOF) break;
        /* Handle SELECT DB opcode as a special case */
        if (type == REDIS_SELECTDB) {
            if ((dbid = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR)
                goto eoferr;
            if (dbid >= (unsigned)server.dbnum) {
                redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
                exit(1);
            }
            db = server.db+dbid;
            continue;
        }
        /* Read key */
        if ((key = rdbLoadStringObject(fp)) == NULL) goto eoferr;
        /* Read value */
        if ((val = rdbLoadObject(type,fp)) == NULL) goto eoferr;
        /* Check if the key already expired. This function is used when loading
         * an RDB file from disk, either at startup, or when an RDB was
         * received from the master. In the latter case, the master is
         * responsible for key expiry. If we would expire keys here, the
         * snapshot taken by the master may not be reflected on the slave. */
        if (server.masterhost == NULL && expiretime != -1 && expiretime < now) {
            decrRefCount(key);
            decrRefCount(val);
            continue;
        }
        /* Add the new object in the hash table */
        dbAdd(db,key,val);

        /* Set the expire time if needed */
        if (expiretime != -1) setExpire(db,key,expiretime);

        decrRefCount(key);
    }
    fclose(fp);
    stopLoading();
    return REDIS_OK;

eoferr: /* unexpected end of file is handled here with a fatal exit */
    redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
    exit(1);
    return REDIS_ERR; /* Just to avoid warning */
}

serverCron是redis中的定时循环函数（100ms循环一次），serverCron是先使用tryFreeOneObjectFromFreelist释放些内存，只有在释放内存还是不够时才启用vm的swap操作，并根据是否启用多线程swap调用vmSwapOneObjectBlocking(阻塞方式) 或者vmSwapOneObjectThreaded（多线程）。另外，如果是多线程方式，则只swap一个object（并不是立即swap），因为在此方式下，会将swap的操作作为一个job，插入到工作线程中，而当该工作线程完成后，会自动调用vmThreadedIOCompletedJob，而在vmThreadedIOCompletedJob中，也有内存大小检查的操作（内存大小超过阈值时，也是调用vmSwapOneObjectThreaded）。可以看到，如果serverCron中的这段代码中的retval == REDIS_ERR的话，则一段时间内无法保证使用的内存在指定的范围之内。（swap的策略在后面VM章节中介绍。）

而在处理客户端命令的核心函数processCommand中，在超过内存阈值maxmemory时，会先调用freeMemoryIfNeeded释放一些内存；在释放内存后若还是超过了设置的内存大小，则在客户端命令设置了REDIS_CMD_DENYOOM参数时返回内存出错信息，否则还是会正常处理。

int processCommand(redisClient *c) {
    /* The QUIT command is handled separately. Normal command procs will
     * go through checking for replication and QUIT will cause trouble
     * when FORCE_REPLICATION is enabled and would be implemented in
     * a regular command proc. */
    if (!strcasecmp(c->argv[0]->ptr,"quit")) {
        addReply(c,shared.ok);
        c->flags |= REDIS_CLOSE_AFTER_REPLY;
        return REDIS_ERR;
    }

    /* Now lookup the command and check ASAP about trivial error conditions
     * such as wrong arity, bad command name and so forth. */
    c->cmd = c->lastcmd = lookupCommand(c->argv[0]->ptr);
    if (!c->cmd) {
        addReplyErrorFormat(c,"unknown command '%s'",
            (char*)c->argv[0]->ptr);
        return REDIS_OK;
    } else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
               (c->argc < -c->cmd->arity)) {
        addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
            c->cmd->name);
        return REDIS_OK;
    }

    /* Check if the user is authenticated */
    if (server.requirepass && !c->authenticated && c->cmd->proc != authCommand)
    {
        addReplyError(c,"operation not permitted");
        return REDIS_OK;
    }

    /* Handle the maxmemory directive.
     *
     * First we try to free some memory if possible (if there are volatile
     * keys in the dataset). If there are not the only thing we can do
     * is returning an error. */
    if (server.maxmemory) {
        int retval = freeMemoryIfNeeded();
        if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) {
            addReplyError(c,
                "command not allowed when used memory > 'maxmemory'");
            return REDIS_OK;
        }
    }

    /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
    if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0)
        &&
        c->cmd->proc != subscribeCommand &&
        c->cmd->proc != unsubscribeCommand &&
        c->cmd->proc != psubscribeCommand &&
        c->cmd->proc != punsubscribeCommand) {
        addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context");
        return REDIS_OK;
    }

    /* Only allow INFO and SLAVEOF when slave-serve-stale-data is no and
     * we are a slave with a broken link with master. */
    if (server.masterhost && server.replstate != REDIS_REPL_CONNECTED &&
        server.repl_serve_stale_data == 0 &&
        c->cmd->proc != infoCommand && c->cmd->proc != slaveofCommand)
    {
        addReplyError(c,
            "link with MASTER is down and slave-serve-stale-data is set to no");
        return REDIS_OK;
    }

    /* Loading DB? Return an error if the command is not INFO */
    if (server.loading && c->cmd->proc != infoCommand) {
        addReply(c, shared.loadingerr);
        return REDIS_OK;
    }

    /* Exec the command */
    if (c->flags & REDIS_MULTI &&
        c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
        c->cmd->proc != multiCommand && c->cmd->proc != watchCommand)
    {
        queueMultiCommand(c);
        addReply(c,shared.queued);
    } else {
        call(c);
    }
    return REDIS_OK;
}

再来看看 freeMemoryIfNeeded是怎么释放内存的。释放时先调用tryFreeOneObjectFromFreelist释放内存，在内存仍不够时，会试图释放带 expire标记的key。对于每个db中的expire dict，每次会随机选择3个key，并删除会最先expire的key（此时就很可能丢失带expire标记的数据了）。

int freeMemoryIfNeeded(void) {
    size_t mem_used, mem_tofree, mem_freed;
    int slaves = listLength(server.slaves);

    /* Remove the size of slaves output buffers and AOF buffer from the
     * count of used memory. */
    mem_used = zmalloc_used_memory();
    if (slaves) {
        listIter li;
        listNode *ln;

        listRewind(server.slaves,&li);
        while((ln = listNext(&li))) {
            redisClient *slave = listNodeValue(ln);
            unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
            if (obuf_bytes > mem_used)
                mem_used = 0;
            else
                mem_used -= obuf_bytes;
        }
    }
    if (server.appendonly) {
        mem_used -= sdslen(server.aofbuf);
        mem_used -= sdslen(server.bgrewritebuf);
    }

    /* Check if we are over the memory limit. */
    if (mem_used <= server.maxmemory) return REDIS_OK;

    if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION)
        return REDIS_ERR; /* We need to free memory, but policy forbids. */

    /* Compute how much memory we need to free. */
    mem_tofree = (size_t)(mem_used - server.maxmemory);
    mem_freed = 0;
    while (mem_freed < mem_tofree) {
        int j, k, keys_freed = 0;

        for (j = 0; j < server.dbnum; j++) {
            long bestval = 0; /* just to prevent warning */
            sds bestkey = NULL;
            struct dictEntry *de;
            redisDb *db = server.db+j;
            dict *dict;

            if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
                server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM)
            {
                dict = server.db[j].dict;
            } else {
                dict = server.db[j].expires;
            }
            if (dictSize(dict) == 0) continue;

            /* volatile-random and allkeys-random policy */
            if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM ||
                server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_RANDOM)
            {
                de = dictGetRandomKey(dict);
                bestkey = dictGetEntryKey(de);
            }

            /* volatile-lru and allkeys-lru policy */
            else if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
                server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
            {
                for (k = 0; k < server.maxmemory_samples; k++) {
                    sds thiskey;
                    long thisval;
                    robj *o;

                    de = dictGetRandomKey(dict);
                    thiskey = dictGetEntryKey(de);
                    /* When policy is volatile-lru we need an additonal lookup
                     * to locate the real key, as dict is set to db->expires. */
                    if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
                        de = dictFind(db->dict, thiskey);
                    o = dictGetEntryVal(de);
                    thisval = estimateObjectIdleTime(o);

                    /* Higher idle time is better candidate for deletion */
                    if (bestkey == NULL || thisval > bestval) {
                        bestkey = thiskey;
                        bestval = thisval;
                    }
                }
            }

            /* volatile-ttl */
            else if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_TTL) {
                for (k = 0; k < server.maxmemory_samples; k++) {
                    sds thiskey;
                    long thisval;

                    de = dictGetRandomKey(dict);
                    thiskey = dictGetEntryKey(de);
                    thisval = (long) dictGetEntryVal(de);

                    /* Expire sooner (minor expire unix timestamp) is better
                     * candidate for deletion */
                    if (bestkey == NULL || thisval < bestval) {
                        bestkey = thiskey;
                        bestval = thisval;
                    }
                }
            }

            /* Finally remove the selected key. */
            if (bestkey) {
                long long delta;

                robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
                propagateExpire(db,keyobj);
                /* We compute the amount of memory freed by dbDelete() alone.
                 * It is possible that actually the memory needed to propagate
                 * the DEL in AOF and replication link is greater than the one
                 * we are freeing removing the key, but we can't account for
                 * that otherwise we would never exit the loop.
                 *
                 * AOF and Output buffer memory will be freed eventually so
                 * we only care about memory used by the key space. */
                delta = (long long) zmalloc_used_memory();
                dbDelete(db,keyobj);
                delta -= (long long) zmalloc_used_memory();
                mem_freed += (size_t)delta;
                server.stat_evictedkeys++;
                decrRefCount(keyobj);
                keys_freed++;

                /* When the memory to free starts to be big enough, we may
                 * start spending so much time here that is impossible to
                 * deliver data to the slaves fast enough, so we force the
                 * transmission here inside the loop. */
                if (slaves) flushSlavesOutputBuffers();
            }
        }
        if (!keys_freed) return REDIS_ERR; /* nothing to free

*/
}
return REDIS_OK;
}

最后我们来看看tryFreeOneObjectFromFreelist函数。redis会将系统中的无效list node（即该node已解除对其内部value的引用）放到server.objfreelist链表中，平时如果需要list node，可直接从该list中获得一个，但此刻因为内存不够，该释放它们了。

static int tryFreeOneObjectFromFreelist(void) {
    robj *o;

    if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex);
    if (listLength(server.objfreelist)) {
        listNode *head = listFirst(server.objfreelist);
        o = listNodeValue(head);
        listDelNode(server.objfreelist,head);
        if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
        zfree(o);
        return REDIS_OK;
    } else {
        if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
        return REDIS_ERR;
    }
}

前面的过程搞清后，就可以回答一个问题了。
redis不开启VM时，内存超过maxmemory设置后，是怎么处理的？

不开启VM，redis并不保证内存使用一定低于maxmemory，只是会尽可能释放。

先看client，对于有些会增加内存使用的命令，比如set，此时会返回出错信息。

释放策略是：因为redis会保存先前已不再使用的object，也就是一个object链表，平时这个链表的作用使得redis可以直接从上面取得一个object，不需要使用zmalloc分配。

当内存超过阈值时，这个链表就会首先被释放了。

若还是超过内存阈值，redis对于每个db，会随机选择3个带expire标记的key，并释放最先expire的key及其val。

但如果此后还是超过内存阈值（把所有带expire标记的都释放后），我想redis是没办法了。

尽管如此，redis使用的内存>设置的maxmemory，只会出现在一开始加载的数据就超过maxmemroy。这样的话，client调用set等命令会一直返回出错信息。

posted on 2012-08-02 17:36 探路者阅读(3999) 评论(0) 编辑收藏引用所属分类: 学习笔记

只有注册用户登录后才能发表评论。
【推荐】100%开源！大型工业跨平台软件C++源码提供，建模，组态！

相关文章: 境界的提升 shell 脚本 Oracle 学习进阶 github clone boost 库成长、团队、信任设计模式六大原则 Component Object Framework bjam 参数删除.svn文件夹 LuaJIT之callback要注意的地方

网站导航: 博客园 IT新闻 BlogJava 博问 Chat2DB 管理

快乐的天空

redis memory

导航

统计

常用链接

留言簿

随笔分类

随笔档案

文章分类

文章档案

新闻档案

Android

Compiler Course

VIM

编译技术集合

测试

高性能计算

个人博客

框架/组件/库

搜索

最新评论

阅读排行榜

评论排行榜