static int infoTreeSize(lsm_db *db, int *pnOldKB, int *pnNewKB){ ShmHeader *pShm = db->pShmhdr; TreeHeader *p = &pShm->hdr1; /* The following code suffers from two race conditions, as it accesses and ** trusts the contents of shared memory without verifying checksums: ** ** * The two values read - TreeHeader.root.nByte and oldroot.nByte - are ** 32-bit fields. It is assumed that reading from one of these ** is atomic - that it is not possible to read a partially written ** garbage value. However the two values may be mutually inconsistent. ** ** * TreeHeader.iLogOff is a 64-bit value. And lsmCheckpointLogOffset() ** reads a 64-bit value from a snapshot stored in shared memory. It ** is assumed that in each case it is possible to read a partially ** written garbage value. If this occurs, then the value returned ** for the size of the "old" tree may reflect the size of an "old" ** tree that was recently flushed to disk. ** ** Given the context in which this function is called (as a result of an ** lsm_info(LSM_INFO_TREE_SIZE) request), neither of these are considered to ** be problems. */ *pnNewKB = ((int)p->root.nByte + 1023) / 1024; if( p->iOldShmid ){ if( p->iOldLog==lsmCheckpointLogOffset(pShm->aSnap1) ){ *pnOldKB = 0; }else{ *pnOldKB = ((int)p->oldroot.nByte + 1023) / 1024; } }else{ *pnOldKB = 0; } return LSM_OK; }
/* ** If required, store a new database checkpoint. ** ** The worker mutex must not be held when this is called. This is because ** this function may indirectly call fsync(). And the worker mutex should ** not be held that long (in case it is required by a client flushing an ** in-memory tree to disk). */ int lsmCheckpointWrite(lsm_db *pDb){ Snapshot *pSnap; /* Snapshot to checkpoint */ Database *p = pDb->pDatabase; int rc = LSM_OK; /* Return Code */ assert( pDb->pWorker==0 ); /* Try to obtain the checkpointer lock, then check if the a checkpoint ** is actually required. If successful, and one is, set stack variable ** pSnap to point to the client snapshot to checkpoint. */ lsmMutexEnter(pDb->pEnv, p->pClientMutex); pSnap = p->pClient; if( p->bCheckpointer==0 && pSnap->iId>p->iCheckpointId ){ p->bCheckpointer = 1; pSnap->nRef++; }else{ pSnap = 0; } lsmMutexLeave(pDb->pEnv, p->pClientMutex); /* Attempt to grab the checkpoint mutex. If the attempt fails, this ** function becomes a no-op. Some other thread is already running ** a checkpoint (or at least checking if one is required). */ if( pSnap ){ FileSystem *pFS = pDb->pFS; /* File system object */ int iPg = 1; /* TODO */ MetaPage *pPg = 0; /* Page to write to */ int doSync; /* True to sync the db */ /* If the safety mode is "off", omit calls to xSync(). */ doSync = (pDb->eSafety!=LSM_SAFETY_OFF); /* Sync the db. To make sure all runs referred to by the checkpoint ** are safely on disk. If we do not do this and a power failure occurs ** just after the checkpoint is written into the db header, the ** database could be corrupted following recovery. */ if( doSync ) rc = lsmFsSyncDb(pFS); /* Fetch a reference to the meta-page to write the checkpoint to. */ if( rc==LSM_OK ) rc = lsmFsMetaPageGet(pFS, 1, iPg, &pPg); /* Unless an error has occurred, copy the checkpoint blob into the ** meta-page, then release the reference to it (which will flush the ** checkpoint into the file). */ if( rc!=LSM_OK ){ lsmFsMetaPageRelease(pPg); }else{ u8 *aData; /* Page buffer */ int nData; /* Size of buffer aData[] */ aData = lsmFsMetaPageData(pPg, &nData); assert( pSnap->nExport<=nData ); memcpy(aData, pSnap->pExport, pSnap->nExport); rc = lsmFsMetaPageRelease(pPg); pPg = 0; } /* Sync the db file again. To make sure that the checkpoint just ** written is on the disk. */ if( rc==LSM_OK && doSync ) rc = lsmFsSyncDb(pFS); /* This is where space on disk is reclaimed. Now that the checkpoint ** has been written to the database and synced, part of the database ** log (the part containing the data just synced to disk) is no longer ** required and so the space that it was taking up on disk can be ** reused. ** ** It is also possible that database file blocks may be made available ** for reuse here. A database file block is free if it is not used by ** the most recently checkpointed snapshot, or by a snapshot that is ** in use by any existing database client. And "the most recently ** checkpointed snapshot" has just changed. */ lsmMutexEnter(pDb->pEnv, p->pClientMutex); if( rc==LSM_OK ){ lsmLogCheckpoint(pDb, &p->log, lsmCheckpointLogOffset(pSnap->pExport)); p->iCheckpointId = pSnap->iId; } p->bCheckpointer = 0; snapshotDecrRefcnt(pDb->pEnv, pSnap); lsmMutexLeave(pDb->pEnv, p->pClientMutex); } return rc; }