/* Load the specified object from swap to memory. * The newly allocated object is returned. * * If preview is true the unserialized object is returned to the caller but * the pages are not marked as freed, nor the vp object is freed. */ robj *vmGenericLoadObject(vmpointer *vp, int preview) { robj *val; redisAssert(vp->type == REDIS_VMPOINTER && (vp->storage == REDIS_VM_SWAPPED || vp->storage == REDIS_VM_LOADING)); val = vmReadObjectFromSwap(vp->page,vp->vtype); if (!preview) { redisLog(REDIS_DEBUG, "VM: object %p loaded from disk", (void*)vp); vmMarkPagesFree(vp->page,vp->usedpages); zfree(vp); server.vm_stats_swapped_objects--; } else { redisLog(REDIS_DEBUG, "VM: object %p previewed from disk", (void*)vp); } server.vm_stats_swapins++; return val; }
void decrRefCount(void *obj) { robj *o = obj; /* Object is a swapped out value, or in the process of being loaded. */ if (server.vm_enabled && (o->storage == REDIS_VM_SWAPPED || o->storage == REDIS_VM_LOADING)) { vmpointer *vp = obj; if (o->storage == REDIS_VM_LOADING) vmCancelThreadedIOJob(o); vmMarkPagesFree(vp->page,vp->usedpages); server.vm_stats_swapped_objects--; zfree(vp); return; } if (o->refcount <= 0) redisPanic("decrRefCount against refcount <= 0"); /* Object is in memory, or in the process of being swapped out. * * If the object is being swapped out, abort the operation on * decrRefCount even if the refcount does not drop to 0: the object * is referenced at least two times, as value of the key AND as * job->val in the iojob. So if we don't invalidate the iojob, when it is * done but the relevant key was removed in the meantime, the * complete jobs handler will not find the key about the job and the * assert will fail. */ if (server.vm_enabled && o->storage == REDIS_VM_SWAPPING) vmCancelThreadedIOJob(o); if (--(o->refcount) == 0) { switch(o->type) { case REDIS_STRING: freeStringObject(o); break; case REDIS_LIST: freeListObject(o); break; case REDIS_SET: freeSetObject(o); break; case REDIS_ZSET: freeZsetObject(o); break; case REDIS_HASH: freeHashObject(o); break; default: redisPanic("Unknown object type"); break; } o->ptr = NULL; /* defensive programming. We'll see NULL in traces. */ if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex); if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX || !listAddNodeHead(server.objfreelist,o)) zfree(o); if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex); } }
static void decrRefCount(void *obj) { robj *o = obj; /* Object is a key of a swapped out value, or in the process of being * loaded. */ if (server.vm_enabled && (o->storage == REDIS_VM_SWAPPED || o->storage == REDIS_VM_LOADING)) { if (o->storage == REDIS_VM_SWAPPED || o->storage == REDIS_VM_LOADING) { redisAssert(o->refcount == 1); } if (o->storage == REDIS_VM_LOADING) vmCancelThreadedIOJob(obj); redisAssert(o->type == REDIS_STRING); freeStringObject(o); vmMarkPagesFree(o->vm.page,o->vm.usedpages); pthread_mutex_lock(&server.obj_freelist_mutex); if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX || !listAddNodeHead(server.objfreelist,o)) zfree(o); pthread_mutex_unlock(&server.obj_freelist_mutex); server.vm_stats_swapped_objects--; return; } /* Object is in memory, or in the process of being swapped out. */ if (--(o->refcount) == 0) { if (server.vm_enabled && o->storage == REDIS_VM_SWAPPING) vmCancelThreadedIOJob(obj); switch(o->type) { case REDIS_STRING: freeStringObject(o); break; case REDIS_LIST: freeListObject(o); break; case REDIS_SET: freeSetObject(o); break; case REDIS_ZSET: freeZsetObject(o); break; case REDIS_HASH: freeHashObject(o); break; default: redisAssert(0); break; } if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex); if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX || !listAddNodeHead(server.objfreelist,o)) zfree(o); if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex); } }
/* Remove the specified object from the threaded I/O queue if still not * processed, otherwise make sure to flag it as canceled. */ void vmCancelThreadedIOJob(robj *o) { list *lists[3] = { server.io_newjobs, /* 0 */ server.io_processing, /* 1 */ server.io_processed /* 2 */ }; int i; redisAssert(o->storage == REDIS_VM_LOADING || o->storage == REDIS_VM_SWAPPING); again: lockThreadedIO(); /* Search for a matching object in one of the queues */ for (i = 0; i < 3; i++) { listNode *ln; listIter li; listRewind(lists[i],&li); while ((ln = listNext(&li)) != NULL) { iojob *job = ln->value; if (job->canceled) continue; /* Skip this, already canceled. */ if (job->id == o) { redisLog(REDIS_DEBUG,"*** CANCELED %p (key %s) (type %d) (LIST ID %d)\n", (void*)job, (char*)job->key->ptr, job->type, i); /* Mark the pages as free since the swap didn't happened * or happened but is now discarded. */ if (i != 1 && job->type == REDIS_IOJOB_DO_SWAP) vmMarkPagesFree(job->page,job->pages); /* Cancel the job. It depends on the list the job is * living in. */ switch(i) { case 0: /* io_newjobs */ /* If the job was yet not processed the best thing to do * is to remove it from the queue at all */ freeIOJob(job); listDelNode(lists[i],ln); break; case 1: /* io_processing */ /* Oh Shi- the thread is messing with the Job: * * Probably it's accessing the object if this is a * PREPARE_SWAP or DO_SWAP job. * If it's a LOAD job it may be reading from disk and * if we don't wait for the job to terminate before to * cancel it, maybe in a few microseconds data can be * corrupted in this pages. So the short story is: * * Better to wait for the job to move into the * next queue (processed)... */ /* We try again and again until the job is completed. */ unlockThreadedIO(); /* But let's wait some time for the I/O thread * to finish with this job. After all this condition * should be very rare. */ usleep(1); goto again; case 2: /* io_processed */ /* The job was already processed, that's easy... * just mark it as canceled so that we'll ignore it * when processing completed jobs. */ job->canceled = 1; break; } /* Finally we have to adjust the storage type of the object * in order to "UNDO" the operaiton. */ if (o->storage == REDIS_VM_LOADING) o->storage = REDIS_VM_SWAPPED; else if (o->storage == REDIS_VM_SWAPPING) o->storage = REDIS_VM_MEMORY; unlockThreadedIO(); redisLog(REDIS_DEBUG,"*** DONE"); return; } } } unlockThreadedIO(); printf("Not found: %p\n", (void*)o); redisAssert(1 != 1); /* We should never reach this */ }
/* Every time a thread finished a Job, it writes a byte into the write side * of an unix pipe in order to "awake" the main thread, and this function * is called. * * Note that this is called both by the event loop, when a I/O thread * sends a byte in the notification pipe, and is also directly called from * waitEmptyIOJobsQueue(). * * In the latter case we don't want to swap more, so we use the * "privdata" argument setting it to a not NULL value to signal this * condition. */ void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, int mask) { char buf[1]; int retval, processed = 0, toprocess = -1, trytoswap = 1; REDIS_NOTUSED(el); REDIS_NOTUSED(mask); REDIS_NOTUSED(privdata); if (privdata != NULL) trytoswap = 0; /* check the comments above... */ /* For every byte we read in the read side of the pipe, there is one * I/O job completed to process. */ #ifndef _WIN32 while((retval = read(fd,buf,1)) == 1) { #else DWORD pipe_is_on = 0; while (1) { retval = 0; /*Windows fix: We need to peek pipe, since read would block. */ if (!PeekNamedPipe((HANDLE) _get_osfhandle(fd), NULL, 0, NULL, &pipe_is_on, NULL)) { redisLog(REDIS_DEBUG,"PeekReadPipe failed %s", strerror(GetLastError())); break; } /* No data on pipe */ if (!pipe_is_on) break; if ((retval = read(fd,buf,1)) != 1) break; #endif iojob *j; listNode *ln; struct dictEntry *de; /* Get the processed element (the oldest one) */ lockThreadedIO(); redisLog(REDIS_DEBUG,"Processing I/O completed job"); redisAssert(listLength(server.io_processed) != 0); if (toprocess == -1) { toprocess = (listLength(server.io_processed)*REDIS_MAX_COMPLETED_JOBS_PROCESSED)/100; if (toprocess <= 0) toprocess = 1; } ln = listFirst(server.io_processed); j = ln->value; listDelNode(server.io_processed,ln); unlockThreadedIO(); /* If this job is marked as canceled, just ignore it */ if (j->canceled) { freeIOJob(j); continue; } /* Post process it in the main thread, as there are things we * can do just here to avoid race conditions and/or invasive locks */ redisLog(REDIS_DEBUG,"COMPLETED Job type: %d, ID %p, key: %s", j->type, (void*)j->id, (unsigned char*)j->key->ptr); de = dictFind(j->db->dict,j->key->ptr); redisAssert(de != NULL); if (j->type == REDIS_IOJOB_LOAD) { redisDb *db; vmpointer *vp = dictGetEntryVal(de); /* Key loaded, bring it at home */ vmMarkPagesFree(vp->page,vp->usedpages); redisLog(REDIS_DEBUG, "VM: object %s loaded from disk (threaded)", (unsigned char*) j->key->ptr); server.vm_stats_swapped_objects--; server.vm_stats_swapins++; dictGetEntryVal(de) = j->val; incrRefCount(j->val); db = j->db; /* Handle clients waiting for this key to be loaded. */ handleClientsBlockedOnSwappedKey(db,j->key); freeIOJob(j); zfree(vp); } else if (j->type == REDIS_IOJOB_PREPARE_SWAP) { /* Now we know the amount of pages required to swap this object. * Let's find some space for it, and queue this task again * rebranded as REDIS_IOJOB_DO_SWAP. */ if (!vmCanSwapOut() || vmFindContiguousPages(&j->page,j->pages) == REDIS_ERR) { /* Ooops... no space or we can't swap as there is * a fork()ed Redis trying to save stuff on disk. */ j->val->storage = REDIS_VM_MEMORY; /* undo operation */ freeIOJob(j); } else { /* Note that we need to mark this pages as used now, * if the job will be canceled, we'll mark them as freed * again. */ vmMarkPagesUsed(j->page,j->pages); j->type = REDIS_IOJOB_DO_SWAP; lockThreadedIO(); queueIOJob(j); unlockThreadedIO(); } } else if (j->type == REDIS_IOJOB_DO_SWAP) { vmpointer *vp; /* Key swapped. We can finally free some memory. */ if (j->val->storage != REDIS_VM_SWAPPING) { vmpointer *vp = (vmpointer*) j->id; printf("storage: %d\n",vp->storage); printf("key->name: %s\n",(char*)j->key->ptr); printf("val: %p\n",(void*)j->val); printf("val->type: %d\n",j->val->type); printf("val->ptr: %s\n",(char*)j->val->ptr); } redisAssert(j->val->storage == REDIS_VM_SWAPPING); vp = createVmPointer(j->val); vp->page = j->page; vp->usedpages = j->pages; dictGetEntryVal(de) = vp; /* Fix the storage otherwise decrRefCount will attempt to * remove the associated I/O job */ j->val->storage = REDIS_VM_MEMORY; decrRefCount(j->val); redisLog(REDIS_DEBUG, "VM: object %s swapped out at %lld (%lld pages) (threaded)", (unsigned char*) j->key->ptr, (unsigned long long) j->page, (unsigned long long) j->pages); server.vm_stats_swapped_objects++; server.vm_stats_swapouts++; freeIOJob(j); /* Put a few more swap requests in queue if we are still * out of memory */ if (trytoswap && vmCanSwapOut() && zmalloc_used_memory() > server.vm_max_memory) { int more = 1; while(more) { lockThreadedIO(); more = listLength(server.io_newjobs) < (unsigned) server.vm_max_threads; unlockThreadedIO(); /* Don't waste CPU time if swappable objects are rare. */ if (vmSwapOneObjectThreaded() == REDIS_ERR) { trytoswap = 0; break; } } } } processed++; if (processed == toprocess) return; } if (retval < 0 && errno != EAGAIN) { redisLog(REDIS_WARNING, "WARNING: read(2) error in vmThreadedIOCompletedJob() %s", strerror(errno)); } } void lockThreadedIO(void) { pthread_mutex_lock(&server.io_mutex); }