void forceDatabaseRefresh(OperationContext* opCtx, const StringData dbName) {
    invariant(!opCtx->lockState()->isLocked());
    invariant(!opCtx->getClient()->isInDirectClient());

    auto const shardingState = ShardingState::get(opCtx);
    invariant(shardingState->canAcceptShardedCommands());

    const auto refreshedDbVersion =
        uassertStatusOK(Grid::get(opCtx)->catalogCache()->getDatabaseWithRefresh(opCtx, dbName))
            .databaseVersion();

    // First, check under a shared lock if another thread already updated the cached version.
    // This is a best-effort optimization to make as few threads as possible to convoy on the
    // exclusive lock below.
    auto databaseHolder = DatabaseHolder::get(opCtx);
    {
        // Take the DBLock directly rather than using AutoGetDb, to prevent a recursive call
        // into checkDbVersion().
        Lock::DBLock dbLock(opCtx, dbName, MODE_IS);
        auto db = databaseHolder->getDb(opCtx, dbName);
        if (!db) {
            log() << "Database " << dbName
                  << " has been dropped; not caching the refreshed databaseVersion";
            return;
        }

        auto& dss = DatabaseShardingState::get(db);
        auto dssLock = DatabaseShardingState::DSSLock::lock(opCtx, &dss);

        const auto cachedDbVersion = dss.getDbVersion(opCtx, dssLock);
        if (cachedDbVersion && cachedDbVersion->getUuid() == refreshedDbVersion.getUuid() &&
            cachedDbVersion->getLastMod() >= refreshedDbVersion.getLastMod()) {
            LOG(2) << "Skipping setting cached databaseVersion for " << dbName
                   << " to refreshed version " << refreshedDbVersion.toBSON()
                   << " because current cached databaseVersion is already "
                   << cachedDbVersion->toBSON();
            return;
        }
    }

    // The cached version is older than the refreshed version; update the cached version.
    Lock::DBLock dbLock(opCtx, dbName, MODE_X);
    auto db = databaseHolder->getDb(opCtx, dbName);
    if (!db) {
        log() << "Database " << dbName
              << " has been dropped; not caching the refreshed databaseVersion";
        return;
    }

    auto& dss = DatabaseShardingState::get(db);
    auto dssLock = DatabaseShardingState::DSSLock::lockExclusive(opCtx, &dss);

    dss.setDbVersion(opCtx, std::move(refreshedDbVersion), dssLock);
}
    void dropCollection() {
        Lock::DBLock dbLock(&_opCtx, nss.db(), MODE_X);
        Database* database = DatabaseHolder::getDatabaseHolder().get(&_opCtx, nss.db());
        if (!database) {
            return;
        }

        WriteUnitOfWork wuow(&_opCtx);
        database->dropCollection(&_opCtx, nss.ns()).transitional_ignore();
        wuow.commit();
    }
Пример #3
0
    void dropCollection() {
        ScopedTransaction transaction(&_txn, MODE_X);
        Lock::DBLock dbLock(_txn.lockState(), nss.db(), MODE_X);
        Database* database = dbHolder().get(&_txn, nss.db());
        if (!database) {
            return;
        }

        WriteUnitOfWork wuow(&_txn);
        database->dropCollection(&_txn, nss.ns());
        wuow.commit();
    }
Пример #4
0
int main()
{
	FILE *fp;
	char dict[][50] = {"ChineseFamilyNames","CountryNames","FemaleFirstNames",
			"FuncWord","MaleFirstNames","Months","PlaceNames","PublisherNames","CityNames"};
	char valdict[50] = "LastNames";
	char fileName[1024];
	char data[1024];
	char name[1024];
	double val;
	dbConnect();
	
	dbLock();
	dbCreateTable();
	int i;
	for(i=0;i<9;i++)
	{
		sprintf(fileName,"res/%s",dict[i]);
		fp = fopen(fileName,"r");
		while(fgets(data,1024,fp))
		{
			dbAddNameDict(dict[i],removeCR(data));
			//printf("[%s]\n",removeCR(data));
		}
		fclose(fp);
	}
	sprintf(fileName,"res/%s",valdict);
	fp = fopen(fileName,"r");
	while(fgets(data,1024,fp))
	{
		spilitVal(name,&val,data);
		dbAddValDict(valdict,name,val);
		//printf("[%s]|[%f]\n",name,val);
	}
	dbUnlock();
	fclose(fp);
	
	//////////////////////////////////////////////////////////////
	// boolean
	printf("CityNames,abidjan:%d\n",dbGetNameLikeDict("CityNames","abigail"));
	printf("FemaleFirstNames,abigail:%d\n",dbGetNameDict("FemaleFirstNames","abigail"));
	printf("FemaleFirstNames,Abigail:%d\n",dbGetNameLikeDict("FemaleFirstNames","Abigail"));
	// double
	printf("LastNames,wilson:%f\n",dbGetValDict("LastNames","wilson"));
	printf("LastNames,Wilson:%f\n",dbGetValLikeDict("LastNames","Wilson"));
	//////////////////////////////////////////////////////////////
	
	dbFree();
	
	return 0;
}
Пример #5
0
Collection* RollbackTest::_createCollection(OperationContext* opCtx,
                                            const NamespaceString& nss,
                                            const CollectionOptions& options) {
    Lock::DBLock dbLock(opCtx, nss.db(), MODE_X);
    mongo::WriteUnitOfWork wuow(opCtx);
    auto databaseHolder = DatabaseHolder::get(opCtx);
    auto db = databaseHolder->openDb(opCtx, nss.db());
    ASSERT_TRUE(db);
    db->dropCollection(opCtx, nss.ns()).transitional_ignore();
    auto coll = db->createCollection(opCtx, nss.ns(), options);
    ASSERT_TRUE(coll);
    wuow.commit();
    return coll;
}
Пример #6
0
void Transaction::save()
{
	if ((mStatus == INVALID) || (mStatus == REMOVED))
		return;

	char status;
	switch (mStatus)
	{
		case NEW:			status = TXN_SQL_NEW;		break;
		case INCLUDED:		status = TXN_SQL_INCLUDED;	break;
		case CONFLICTED:	status = TXN_SQL_CONFLICT;	break;
		case COMMITTED:		status = TXN_SQL_VALIDATED;	break;
		case HELD:			status = TXN_SQL_HELD;		break;
		default:			status = TXN_SQL_UNKNOWN;
	}

	Database *db = theApp->getTxnDB()->getDB();
	ScopedLock dbLock(theApp->getTxnDB()->getDBLock());
	db->executeSQL(mTransaction->getSQLInsertReplaceHeader() + mTransaction->getSQL(getLedger(), status) + ";");
}
Пример #7
0
    virtual bool run(OperationContext* txn,
                     const string& dbname,
                     BSONObj& cmdObj,
                     int options,
                     string& errmsg,
                     BSONObjBuilder& result) {
        const NamespaceString ns(parseNs(dbname, cmdObj));

        Status status = userAllowedWriteNS(ns);
        if (!status.isOK())
            return appendCommandStatus(result, status);

        if (cmdObj["indexes"].type() != Array) {
            errmsg = "indexes has to be an array";
            result.append("cmdObj", cmdObj);
            return false;
        }

        std::vector<BSONObj> specs;
        {
            BSONObjIterator i(cmdObj["indexes"].Obj());
            while (i.more()) {
                BSONElement e = i.next();
                if (e.type() != Object) {
                    errmsg = "everything in indexes has to be an Object";
                    result.append("cmdObj", cmdObj);
                    return false;
                }
                specs.push_back(e.Obj());
            }
        }

        if (specs.size() == 0) {
            errmsg = "no indexes to add";
            return false;
        }

        // check specs
        for (size_t i = 0; i < specs.size(); i++) {
            BSONObj spec = specs[i];
            if (spec["ns"].eoo()) {
                spec = _addNsToSpec(ns, spec);
                specs[i] = spec;
            }

            if (spec["ns"].type() != String) {
                errmsg = "ns field must be a string";
                result.append("spec", spec);
                return false;
            }

            std::string nsFromUser = spec["ns"].String();
            if (nsFromUser.empty()) {
                errmsg = "ns field cannot be an empty string";
                result.append("spec", spec);
                return false;
            }

            if (ns != nsFromUser) {
                errmsg = str::stream() << "value of ns field '" << nsFromUser
                                       << "' doesn't match namespace " << ns.ns();
                result.append("spec", spec);
                return false;
            }
        }

        // now we know we have to create index(es)
        // Note: createIndexes command does not currently respect shard versioning.
        ScopedTransaction transaction(txn, MODE_IX);
        Lock::DBLock dbLock(txn->lockState(), ns.db(), MODE_X);
        if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) {
            return appendCommandStatus(
                result,
                Status(ErrorCodes::NotMaster,
                       str::stream() << "Not primary while creating indexes in " << ns.ns()));
        }

        Database* db = dbHolder().get(txn, ns.db());
        if (!db) {
            db = dbHolder().openDb(txn, ns.db());
        }

        Collection* collection = db->getCollection(ns.ns());
        if (collection) {
            result.appendBool("createdCollectionAutomatically", false);
        } else {
            MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
                WriteUnitOfWork wunit(txn);
                collection = db->createCollection(txn, ns.ns(), CollectionOptions());
                invariant(collection);
                wunit.commit();
            }
            MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns());
            result.appendBool("createdCollectionAutomatically", true);
        }

        const int numIndexesBefore = collection->getIndexCatalog()->numIndexesTotal(txn);
        result.append("numIndexesBefore", numIndexesBefore);

        auto client = txn->getClient();
        ScopeGuard lastOpSetterGuard =
            MakeObjGuard(repl::ReplClientInfo::forClient(client),
                         &repl::ReplClientInfo::setLastOpToSystemLastOpTime,
                         txn);

        MultiIndexBlock indexer(txn, collection);
        indexer.allowBackgroundBuilding();
        indexer.allowInterruption();

        const size_t origSpecsSize = specs.size();
        indexer.removeExistingIndexes(&specs);

        if (specs.size() == 0) {
            result.append("numIndexesAfter", numIndexesBefore);
            result.append("note", "all indexes already exist");
            return true;
        }

        if (specs.size() != origSpecsSize) {
            result.append("note", "index already exists");
        }

        for (size_t i = 0; i < specs.size(); i++) {
            const BSONObj& spec = specs[i];
            if (spec["unique"].trueValue()) {
                status = checkUniqueIndexConstraints(txn, ns.ns(), spec["key"].Obj());

                if (!status.isOK()) {
                    return appendCommandStatus(result, status);
                }
            }
            if (spec["v"].isNumber() && spec["v"].numberInt() == 0) {
                return appendCommandStatus(
                    result,
                    Status(ErrorCodes::CannotCreateIndex,
                           str::stream() << "illegal index specification: " << spec << ". "
                                         << "The option v:0 cannot be passed explicitly"));
            }
        }

        MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
            uassertStatusOK(indexer.init(specs));
        }
        MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns());

        // If we're a background index, replace exclusive db lock with an intent lock, so that
        // other readers and writers can proceed during this phase.
        if (indexer.getBuildInBackground()) {
            txn->recoveryUnit()->abandonSnapshot();
            dbLock.relockWithMode(MODE_IX);
            if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) {
                return appendCommandStatus(
                    result,
                    Status(ErrorCodes::NotMaster,
                           str::stream() << "Not primary while creating background indexes in "
                                         << ns.ns()));
            }
        }

        try {
            Lock::CollectionLock colLock(txn->lockState(), ns.ns(), MODE_IX);
            uassertStatusOK(indexer.insertAllDocumentsInCollection());
        } catch (const DBException& e) {
            invariant(e.getCode() != ErrorCodes::WriteConflict);
            // Must have exclusive DB lock before we clean up the index build via the
            // destructor of 'indexer'.
            if (indexer.getBuildInBackground()) {
                try {
                    // This function cannot throw today, but we will preemptively prepare for
                    // that day, to avoid data corruption due to lack of index cleanup.
                    txn->recoveryUnit()->abandonSnapshot();
                    dbLock.relockWithMode(MODE_X);
                    if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns)) {
                        return appendCommandStatus(
                            result,
                            Status(ErrorCodes::NotMaster,
                                   str::stream()
                                       << "Not primary while creating background indexes in "
                                       << ns.ns() << ": cleaning up index build failure due to "
                                       << e.toString()));
                    }
                } catch (...) {
                    std::terminate();
                }
            }
            throw;
        }
        // Need to return db lock back to exclusive, to complete the index build.
        if (indexer.getBuildInBackground()) {
            txn->recoveryUnit()->abandonSnapshot();
            dbLock.relockWithMode(MODE_X);
            uassert(ErrorCodes::NotMaster,
                    str::stream() << "Not primary while completing index build in " << dbname,
                    repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(ns));

            Database* db = dbHolder().get(txn, ns.db());
            uassert(28551, "database dropped during index build", db);
            uassert(28552, "collection dropped during index build", db->getCollection(ns.ns()));
        }

        MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
            WriteUnitOfWork wunit(txn);

            indexer.commit();

            for (size_t i = 0; i < specs.size(); i++) {
                std::string systemIndexes = ns.getSystemIndexesCollection();
                getGlobalServiceContext()->getOpObserver()->onCreateIndex(
                    txn, systemIndexes, specs[i]);
            }

            wunit.commit();
        }
        MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns());

        result.append("numIndexesAfter", collection->getIndexCatalog()->numIndexesTotal(txn));

        lastOpSetterGuard.Dismiss();

        return true;
    }
Пример #8
0
// static
Status SyncTail::syncApply(OperationContext* txn,
                           const BSONObj& op,
                           bool convertUpdateToUpsert,
                           ApplyOperationInLockFn applyOperationInLock,
                           ApplyCommandInLockFn applyCommandInLock,
                           IncrementOpsAppliedStatsFn incrementOpsAppliedStats) {
    if (inShutdown()) {
        return Status::OK();
    }

    // Count each log op application as a separate operation, for reporting purposes
    CurOp individualOp(txn);

    const char* ns = op.getStringField("ns");
    verify(ns);

    const char* opType = op["op"].valuestrsafe();

    bool isCommand(opType[0] == 'c');
    bool isNoOp(opType[0] == 'n');

    if ((*ns == '\0') || (*ns == '.')) {
        // this is ugly
        // this is often a no-op
        // but can't be 100% sure
        if (!isNoOp) {
            error() << "skipping bad op in oplog: " << op.toString();
        }
        return Status::OK();
    }

    if (isCommand) {
        MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
            // a command may need a global write lock. so we will conservatively go
            // ahead and grab one here. suboptimal. :-(
            Lock::GlobalWrite globalWriteLock(txn->lockState());

            // special case apply for commands to avoid implicit database creation
            Status status = applyCommandInLock(txn, op);
            incrementOpsAppliedStats();
            return status;
        }
        MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "syncApply_command", ns);
    }

    auto applyOp = [&](Database* db) {
        // For non-initial-sync, we convert updates to upserts
        // to suppress errors when replaying oplog entries.
        txn->setReplicatedWrites(false);
        DisableDocumentValidation validationDisabler(txn);

        Status status =
            applyOperationInLock(txn, db, op, convertUpdateToUpsert, incrementOpsAppliedStats);
        if (!status.isOK() && status.code() == ErrorCodes::WriteConflict) {
            throw WriteConflictException();
        }
        return status;
    };

    if (isNoOp || (opType[0] == 'i' && nsToCollectionSubstring(ns) == "system.indexes")) {
        auto opStr = isNoOp ? "syncApply_noop" : "syncApply_indexBuild";
        MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
            Lock::DBLock dbLock(txn->lockState(), nsToDatabaseSubstring(ns), MODE_X);
            OldClientContext ctx(txn, ns);
            return applyOp(ctx.db());
        }
        MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, opStr, ns);
    }
Пример #9
0
        virtual bool run(OperationContext* txn,  const string& dbname, BSONObj& cmdObj, int options,
                          string& errmsg, BSONObjBuilder& result,
                          bool fromRepl = false ) {

            // ---  parse

            NamespaceString ns( dbname, cmdObj[name].String() );
            Status status = userAllowedWriteNS( ns );
            if ( !status.isOK() )
                return appendCommandStatus( result, status );

            if ( cmdObj["indexes"].type() != Array ) {
                errmsg = "indexes has to be an array";
                result.append( "cmdObj", cmdObj );
                return false;
            }

            std::vector<BSONObj> specs;
            {
                BSONObjIterator i( cmdObj["indexes"].Obj() );
                while ( i.more() ) {
                    BSONElement e = i.next();
                    if ( e.type() != Object ) {
                        errmsg = "everything in indexes has to be an Object";
                        result.append( "cmdObj", cmdObj );
                        return false;
                    }
                    specs.push_back( e.Obj() );
                }
            }

            if ( specs.size() == 0 ) {
                errmsg = "no indexes to add";
                return false;
            }

            // check specs
            for ( size_t i = 0; i < specs.size(); i++ ) {
                BSONObj spec = specs[i];
                if ( spec["ns"].eoo() ) {
                    spec = _addNsToSpec( ns, spec );
                    specs[i] = spec;
                }

                if ( spec["ns"].type() != String ) {
                    errmsg = "spec has no ns";
                    result.append( "spec", spec );
                    return false;
                }
                if ( ns != spec["ns"].String() ) {
                    errmsg = "namespace mismatch";
                    result.append( "spec", spec );
                    return false;
                }
            }

            // now we know we have to create index(es)
            // Note: createIndexes command does not currently respect shard versioning.
            ScopedTransaction transaction(txn, MODE_IX);
            Lock::DBLock dbLock(txn->lockState(), ns.db(), MODE_X);
            if (!fromRepl &&
                !repl::getGlobalReplicationCoordinator()->canAcceptWritesForDatabase(dbname)) {
                return appendCommandStatus(result, Status(ErrorCodes::NotMaster, str::stream()
                    << "Not primary while creating indexes in " << ns.ns()));
            }

            Database* db = dbHolder().get(txn, ns.db());
            if (!db) {
                db = dbHolder().openDb(txn, ns.db());
            }

            Collection* collection = db->getCollection( ns.ns() );
            result.appendBool( "createdCollectionAutomatically", collection == NULL );
            if ( !collection ) {
                MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
                    WriteUnitOfWork wunit(txn);
                    collection = db->createCollection( txn, ns.ns() );
                    invariant( collection );
                    if (!fromRepl) {
                        getGlobalEnvironment()->getOpObserver()->onCreateCollection(
                                txn,
                                ns,
                                CollectionOptions());
                    }
                    wunit.commit();
                } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createIndexes", ns.ns());
            }
Пример #10
0
/****************************************************************************
Desc:	Set the RFL keep files flag.
****************************************************************************/
RCODE XFLAPI F_Db::setRflKeepFilesFlag(
	FLMBOOL	bKeepFiles)
{
	RCODE		rc = NE_XFLM_OK;
	FLMBOOL	bDbLocked = FALSE;

	// See if the database is being forced to close

	if (RC_BAD( rc = checkState( __FILE__, __LINE__)))
	{
		goto Exit;
	}

	// Make sure we don't have a transaction going

	if (m_eTransType != XFLM_NO_TRANS)
	{
		rc = RC_SET( NE_XFLM_TRANS_ACTIVE);
		goto Exit;
	}

	// Make sure there is no active backup running

	m_pDatabase->lockMutex();
	if (m_pDatabase->m_bBackupActive)
	{
		m_pDatabase->unlockMutex();
		rc = RC_SET( NE_XFLM_BACKUP_ACTIVE);
		goto Exit;
	}
	m_pDatabase->unlockMutex();

	// Need to lock the database but not start a transaction yet.

	if (!(m_uiFlags & (FDB_HAS_FILE_LOCK | FDB_FILE_LOCK_SHARED)))
	{
		if (RC_BAD( rc = dbLock( FLM_LOCK_EXCLUSIVE, 0, FLM_NO_TIMEOUT)))
		{
			goto Exit;
		}
		bDbLocked = TRUE;
	}

	// If we aren't changing the keep flag, jump to exit without doing
	// anything.

	if ((bKeepFiles &&
		  m_pDatabase->m_lastCommittedDbHdr.ui8RflKeepFiles) ||
		 (!bKeepFiles &&
		  !m_pDatabase->m_lastCommittedDbHdr.ui8RflKeepFiles))
	{
		goto Exit;	// Will return NE_XFLM_OK;
	}

	// Force a checkpoint and roll to the next RFL file numbers.
	// When changing from keep to no-keep or vice versa, we need to
	// go to a new RFL file so that the new RFL file gets new
	// serial numbers and a new keep or no-keep flag.

	if (RC_BAD( rc = doCheckpoint( FLM_NO_TIMEOUT)))
	{
		goto Exit;
	}

	f_memcpy( &m_pDatabase->m_uncommittedDbHdr,
				 &m_pDatabase->m_lastCommittedDbHdr,
				 sizeof( XFLM_DB_HDR));
	m_pDatabase->m_uncommittedDbHdr.ui8RflKeepFiles =
		(FLMUINT8)(bKeepFiles
					  ? (FLMUINT8)1
					  : (FLMUINT8)0);

	// Force a new RFL file - this will also write out the entire
	// log header - including the changes we made above.

	if (RC_BAD( rc = m_pDatabase->m_pRfl->finishCurrFile( this, TRUE)))
	{
		goto Exit;
	}

Exit:

	if (bDbLocked)
	{
		dbUnlock();
	}

	return( rc);
}
Пример #11
0
/****************************************************************************
Desc:	Set the RFL directory for a database.
****************************************************************************/
RCODE XFLAPI F_Db::setRflDir(
	const char *	pszNewRflDir)
{
	RCODE		rc = NE_XFLM_OK;
	FLMBOOL	bDbLocked = FALSE;

	// See if the database is being forced to close

	if (RC_BAD( rc = checkState( __FILE__, __LINE__)))
	{
		goto Exit;
	}

	// Make sure we don't have a transaction going

	if (m_eTransType != XFLM_NO_TRANS)
	{
		rc = RC_SET( NE_XFLM_TRANS_ACTIVE);
		goto Exit;
	}

	// Make sure there is no active backup running

	m_pDatabase->lockMutex();
	if (m_pDatabase->m_bBackupActive)
	{
		m_pDatabase->unlockMutex();
		rc = RC_SET( NE_XFLM_BACKUP_ACTIVE);
		goto Exit;
	}
	m_pDatabase->unlockMutex();

	// Make sure the path exists and that it is a directory
	// rather than a file.

	if (pszNewRflDir && *pszNewRflDir)
	{
		if (!gv_XFlmSysData.pFileSystem->isDir( pszNewRflDir))
		{
			rc = RC_SET( NE_FLM_IO_INVALID_FILENAME);
			goto Exit;
		}
	}

	// Need to lock the database because we can't change the RFL
	// directory until after the checkpoint has completed.  The
	// checkpoint code will unlock the transaction, but not the
	// file if we have an explicit lock.  We need to do this to
	// prevent another transaction from beginning before we have
	// changed the RFL directory.

	if (!(m_uiFlags & (FDB_HAS_FILE_LOCK | FDB_FILE_LOCK_SHARED)))
	{
		if( RC_BAD( rc = dbLock( FLM_LOCK_EXCLUSIVE, 0, FLM_NO_TIMEOUT)))
		{
			goto Exit;
		}
		bDbLocked = TRUE;
	}

	// Force a checkpoint and roll to the next RFL file numbers.  Both
	// of these steps are necessary to ensure that we won't have to do
	// any recovery using the current RFL file - because we do not
	// move the current RFL file to the new directory.  Forcing the
	// checkpoint ensures that we have no transactions that will need
	// to be recovered if we were to crash.  Rolling the RFL file number
	// ensures that no more transactions will be logged to the current
	// RFL file.

	if (RC_BAD( rc = doCheckpoint( FLM_NO_TIMEOUT)))
	{
		goto Exit;
	}

	// Force a new RFL file.

	if (RC_BAD( rc = m_pDatabase->m_pRfl->finishCurrFile( this, FALSE)))
	{
		goto Exit;
	}

	// Set the RFL directory to the new value now that we have
	// finished the checkpoint and rolled to the next RFL file.

	m_pDatabase->lockMutex();
	rc = m_pDatabase->m_pRfl->setRflDir( pszNewRflDir);
	m_pDatabase->unlockMutex();

Exit:

	if (bDbLocked)
	{
		dbUnlock();
	}

	return( rc);
}
Пример #12
0
    virtual bool run(OperationContext* txn,
                     const string& db,
                     BSONObj& cmdObj,
                     int options,
                     string& errmsg,
                     BSONObjBuilder& result) {
        const std::string ns = parseNs(db, cmdObj);
        if (nsToCollectionSubstring(ns).empty()) {
            errmsg = "missing collection name";
            return false;
        }
        NamespaceString nss(ns);

        // Parse the options for this request.
        auto request = AggregationRequest::parseFromBSON(nss, cmdObj);
        if (!request.isOK()) {
            return appendCommandStatus(result, request.getStatus());
        }

        // Set up the ExpressionContext.
        intrusive_ptr<ExpressionContext> expCtx = new ExpressionContext(txn, request.getValue());
        expCtx->tempDir = storageGlobalParams.dbpath + "/_tmp";

        // Parse the pipeline.
        auto statusWithPipeline = Pipeline::parse(request.getValue().getPipeline(), expCtx);
        if (!statusWithPipeline.isOK()) {
            return appendCommandStatus(result, statusWithPipeline.getStatus());
        }
        auto pipeline = std::move(statusWithPipeline.getValue());

        auto resolvedNamespaces = resolveInvolvedNamespaces(txn, pipeline, expCtx);
        if (!resolvedNamespaces.isOK()) {
            return appendCommandStatus(result, resolvedNamespaces.getStatus());
        }
        expCtx->resolvedNamespaces = std::move(resolvedNamespaces.getValue());

        unique_ptr<ClientCursorPin> pin;  // either this OR the exec will be non-null
        unique_ptr<PlanExecutor> exec;
        auto curOp = CurOp::get(txn);
        {
            // This will throw if the sharding version for this connection is out of date. If the
            // namespace is a view, the lock will be released before re-running the aggregation.
            // Otherwise, the lock must be held continuously from now until we have we created both
            // the output ClientCursor and the input executor. This ensures that both are using the
            // same sharding version that we synchronize on here. This is also why we always need to
            // create a ClientCursor even when we aren't outputting to a cursor. See the comment on
            // ShardFilterStage for more details.
            AutoGetCollectionOrViewForRead ctx(txn, nss);
            Collection* collection = ctx.getCollection();

            // If running $collStats on a view, we do not resolve the view since we want stats
            // on this view namespace.
            auto startsWithCollStats = [&pipeline]() {
                const Pipeline::SourceContainer& sources = pipeline->getSources();
                return !sources.empty() &&
                    dynamic_cast<DocumentSourceCollStats*>(sources.front().get());
            };

            // If this is a view, resolve it by finding the underlying collection and stitching view
            // pipelines and this request's pipeline together. We then release our locks before
            // recursively calling run, which will re-acquire locks on the underlying collection.
            // (The lock must be released because recursively acquiring locks on the database will
            // prohibit yielding.)
            auto view = ctx.getView();
            if (view && !startsWithCollStats()) {
                auto viewDefinition =
                    ViewShardingCheck::getResolvedViewIfSharded(txn, ctx.getDb(), view);
                if (!viewDefinition.isOK()) {
                    return appendCommandStatus(result, viewDefinition.getStatus());
                }

                if (!viewDefinition.getValue().isEmpty()) {
                    ViewShardingCheck::appendShardedViewStatus(viewDefinition.getValue(), &result);
                    return false;
                }

                auto resolvedView = ctx.getDb()->getViewCatalog()->resolveView(txn, nss);
                if (!resolvedView.isOK()) {
                    return appendCommandStatus(result, resolvedView.getStatus());
                }

                // With the view resolved, we can relinquish locks.
                ctx.releaseLocksForView();

                // Parse the resolved view into a new aggregation request.
                auto viewCmd =
                    resolvedView.getValue().asExpandedViewAggregation(request.getValue());
                if (!viewCmd.isOK()) {
                    return appendCommandStatus(result, viewCmd.getStatus());
                }

                bool status = this->run(txn, db, viewCmd.getValue(), options, errmsg, result);
                {
                    // Set the namespace of the curop back to the view namespace so ctx records
                    // stats on this view namespace on destruction.
                    stdx::lock_guard<Client>(*txn->getClient());
                    curOp->setNS_inlock(nss.ns());
                }
                return status;
            }

            // If the pipeline does not have a user-specified collation, set it from the collection
            // default.
            if (request.getValue().getCollation().isEmpty() && collection &&
                collection->getDefaultCollator()) {
                invariant(!expCtx->getCollator());
                expCtx->setCollator(collection->getDefaultCollator()->clone());
            }

            // Propagate the ExpressionContext throughout all of the pipeline's stages and
            // expressions.
            pipeline->injectExpressionContext(expCtx);

            // The pipeline must be optimized after the correct collator has been set on it (by
            // injecting the ExpressionContext containing the collator). This is necessary because
            // optimization may make string comparisons, e.g. optimizing {$eq: [<str1>, <str2>]} to
            // a constant.
            pipeline->optimizePipeline();

            if (kDebugBuild && !expCtx->isExplain && !expCtx->inShard) {
                // Make sure all operations round-trip through Pipeline::serialize() correctly by
                // re-parsing every command in debug builds. This is important because sharded
                // aggregations rely on this ability.  Skipping when inShard because this has
                // already been through the transformation (and this un-sets expCtx->inShard).
                pipeline = reparsePipeline(pipeline, request.getValue(), expCtx);
            }

            // This does mongod-specific stuff like creating the input PlanExecutor and adding
            // it to the front of the pipeline if needed.
            PipelineD::prepareCursorSource(collection, pipeline);

            // Create the PlanExecutor which returns results from the pipeline. The WorkingSet
            // ('ws') and the PipelineProxyStage ('proxy') will be owned by the created
            // PlanExecutor.
            auto ws = make_unique<WorkingSet>();
            auto proxy = make_unique<PipelineProxyStage>(txn, pipeline, ws.get());

            auto statusWithPlanExecutor = (NULL == collection)
                ? PlanExecutor::make(
                      txn, std::move(ws), std::move(proxy), nss.ns(), PlanExecutor::YIELD_MANUAL)
                : PlanExecutor::make(
                      txn, std::move(ws), std::move(proxy), collection, PlanExecutor::YIELD_MANUAL);
            invariant(statusWithPlanExecutor.isOK());
            exec = std::move(statusWithPlanExecutor.getValue());

            {
                auto planSummary = Explain::getPlanSummary(exec.get());
                stdx::lock_guard<Client>(*txn->getClient());
                curOp->setPlanSummary_inlock(std::move(planSummary));
            }

            if (collection) {
                PlanSummaryStats stats;
                Explain::getSummaryStats(*exec, &stats);
                collection->infoCache()->notifyOfQuery(txn, stats.indexesUsed);
            }

            if (collection) {
                const bool isAggCursor = true;  // enable special locking behavior
                ClientCursor* cursor =
                    new ClientCursor(collection->getCursorManager(),
                                     exec.release(),
                                     nss.ns(),
                                     txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(),
                                     0,
                                     cmdObj.getOwned(),
                                     isAggCursor);
                pin.reset(new ClientCursorPin(collection->getCursorManager(), cursor->cursorid()));
                // Don't add any code between here and the start of the try block.
            }

            // At this point, it is safe to release the collection lock.
            // - In the case where we have a collection: we will need to reacquire the
            //   collection lock later when cleaning up our ClientCursorPin.
            // - In the case where we don't have a collection: our PlanExecutor won't be
            //   registered, so it will be safe to clean it up outside the lock.
            invariant(!exec || !collection);
        }

        try {
            // Unless set to true, the ClientCursor created above will be deleted on block exit.
            bool keepCursor = false;

            // Use of the aggregate command without specifying to use a cursor is deprecated.
            // Applications should migrate to using cursors. Cursors are strictly more useful than
            // outputting the results as a single document, since results that fit inside a single
            // BSONObj will also fit inside a single batch.
            //
            // We occasionally log a deprecation warning.
            if (!request.getValue().isCursorCommand()) {
                RARELY {
                    warning()
                        << "Use of the aggregate command without the 'cursor' "
                           "option is deprecated. See "
                           "http://dochub.mongodb.org/core/aggregate-without-cursor-deprecation.";
                }
            }

            // If both explain and cursor are specified, explain wins.
            if (expCtx->isExplain) {
                result << "stages" << Value(pipeline->writeExplainOps());
            } else if (request.getValue().isCursorCommand()) {
                keepCursor = handleCursorCommand(txn,
                                                 nss.ns(),
                                                 pin.get(),
                                                 pin ? pin->c()->getExecutor() : exec.get(),
                                                 request.getValue(),
                                                 result);
            } else {
                pipeline->run(result);
            }

            if (!expCtx->isExplain) {
                PlanSummaryStats stats;
                Explain::getSummaryStats(pin ? *pin->c()->getExecutor() : *exec.get(), &stats);
                curOp->debug().setPlanSummaryMetrics(stats);
                curOp->debug().nreturned = stats.nReturned;
            }

            // Clean up our ClientCursorPin, if needed.  We must reacquire the collection lock
            // in order to do so.
            if (pin) {
                // We acquire locks here with DBLock and CollectionLock instead of using
                // AutoGetCollectionForRead.  AutoGetCollectionForRead will throw if the
                // sharding version is out of date, and we don't care if the sharding version
                // has changed.
                Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_IS);
                Lock::CollectionLock collLock(txn->lockState(), nss.ns(), MODE_IS);
                if (keepCursor) {
                    pin->release();
                } else {
                    pin->deleteUnderlying();
                }
            }
        } catch (...) {
/**
 * Return whether there are non-local databases. If there was an error becauses the wrong mongod
 * version was used for these datafiles, a DBException with status ErrorCodes::MustDowngrade is
 * thrown.
 */
bool repairDatabasesAndCheckVersion(OperationContext* opCtx) {
    auto const storageEngine = opCtx->getServiceContext()->getStorageEngine();
    Lock::GlobalWrite lk(opCtx);

    std::vector<std::string> dbNames = storageEngine->listDatabases();

    // Rebuilding indexes must be done before a database can be opened, except when using repair,
    // which rebuilds all indexes when it is done.
    if (!storageGlobalParams.readOnly && !storageGlobalParams.repair) {
        // Determine whether this is a replica set node running in standalone mode. If we're in
        // repair mode, we cannot set the flag yet as it needs to open a database and look through a
        // collection. Rebuild the necessary indexes after setting the flag.
        setReplSetMemberInStandaloneMode(opCtx);
        rebuildIndexes(opCtx, storageEngine);
    }

    bool ensuredCollectionProperties = false;

    // Repair all databases first, so that we do not try to open them if they are in bad shape
    auto databaseHolder = DatabaseHolder::get(opCtx);
    if (storageGlobalParams.repair) {
        invariant(!storageGlobalParams.readOnly);

        if (MONGO_FAIL_POINT(exitBeforeDataRepair)) {
            log() << "Exiting because 'exitBeforeDataRepair' fail point was set.";
            quickExit(EXIT_ABRUPT);
        }

        // Ensure that the local database is repaired first, if it exists, so that we can open it
        // before any other database to be able to determine if this is a replica set node running
        // in standalone mode before rebuilding any indexes.
        auto dbNamesIt = std::find(dbNames.begin(), dbNames.end(), NamespaceString::kLocalDb);
        if (dbNamesIt != dbNames.end()) {
            std::swap(dbNames.front(), *dbNamesIt);
            invariant(dbNames.front() == NamespaceString::kLocalDb);
        }

        stdx::function<void(const std::string& dbName)> onRecordStoreRepair =
            [opCtx](const std::string& dbName) {
                if (dbName == NamespaceString::kLocalDb) {
                    setReplSetMemberInStandaloneMode(opCtx);
                }
            };

        for (const auto& dbName : dbNames) {
            LOG(1) << "    Repairing database: " << dbName;
            fassertNoTrace(18506,
                           repairDatabase(opCtx, storageEngine, dbName, onRecordStoreRepair));
        }

        // All collections must have UUIDs before restoring the FCV document to a version that
        // requires UUIDs.
        uassertStatusOK(ensureCollectionProperties(opCtx, dbNames));
        ensuredCollectionProperties = true;

        // Attempt to restore the featureCompatibilityVersion document if it is missing.
        NamespaceString fcvNSS(NamespaceString::kServerConfigurationNamespace);

        auto db = databaseHolder->getDb(opCtx, fcvNSS.db());
        Collection* versionColl;
        BSONObj featureCompatibilityVersion;
        if (!db || !(versionColl = db->getCollection(opCtx, fcvNSS)) ||
            !Helpers::findOne(opCtx,
                              versionColl,
                              BSON("_id" << FeatureCompatibilityVersionParser::kParameterName),
                              featureCompatibilityVersion)) {
            uassertStatusOK(restoreMissingFeatureCompatibilityVersionDocument(opCtx, dbNames));
        }
    }

    if (!ensuredCollectionProperties) {
        uassertStatusOK(ensureCollectionProperties(opCtx, dbNames));
    }

    if (!storageGlobalParams.readOnly) {
        // We open the "local" database before calling hasReplSetConfigDoc() to ensure the in-memory
        // catalog entries for the 'kSystemReplSetNamespace' collection have been populated if the
        // collection exists. If the "local" database didn't exist at this point yet, then it will
        // be created. If the mongod is running in a read-only mode, then it is fine to not open the
        // "local" database and populate the catalog entries because we won't attempt to drop the
        // temporary collections anyway.
        Lock::DBLock dbLock(opCtx, NamespaceString::kSystemReplSetNamespace.db(), MODE_X);
        databaseHolder->openDb(opCtx, NamespaceString::kSystemReplSetNamespace.db());
    }

    if (storageGlobalParams.repair) {
        if (MONGO_FAIL_POINT(exitBeforeRepairInvalidatesConfig)) {
            log() << "Exiting because 'exitBeforeRepairInvalidatesConfig' fail point was set.";
            quickExit(EXIT_ABRUPT);
        }
        // This must be done after opening the "local" database as it modifies the replica set
        // config.
        auto repairObserver = StorageRepairObserver::get(opCtx->getServiceContext());
        repairObserver->onRepairDone(opCtx);
        if (repairObserver->isDataModified()) {
            warning() << "Modifications made by repair:";
            const auto& mods = repairObserver->getModifications();
            for (const auto& mod : mods) {
                warning() << "  " << mod;
            }
            if (hasReplSetConfigDoc(opCtx)) {
                warning() << "WARNING: Repair may have modified replicated data. This node will no "
                             "longer be able to join a replica set without a full re-sync";
            }
        }
    }

    const repl::ReplSettings& replSettings =
        repl::ReplicationCoordinator::get(opCtx)->getSettings();

    // On replica set members we only clear temp collections on DBs other than "local" during
    // promotion to primary. On pure slaves, they are only cleared when the oplog tells them
    // to. The local DB is special because it is not replicated.  See SERVER-10927 for more
    // details.
    const bool shouldClearNonLocalTmpCollections =
        !(hasReplSetConfigDoc(opCtx) || replSettings.usingReplSets());

    // To check whether a featureCompatibilityVersion document exists.
    bool fcvDocumentExists = false;

    // To check whether we have databases other than local.
    bool nonLocalDatabases = false;

    // Refresh list of database names to include newly-created admin, if it exists.
    dbNames = storageEngine->listDatabases();
    for (const auto& dbName : dbNames) {
        if (dbName != "local") {
            nonLocalDatabases = true;
        }
        LOG(1) << "    Recovering database: " << dbName;

        auto db = databaseHolder->openDb(opCtx, dbName);
        invariant(db);

        // First thing after opening the database is to check for file compatibility,
        // otherwise we might crash if this is a deprecated format.
        auto status = storageEngine->currentFilesCompatible(opCtx);
        if (!status.isOK()) {
            if (status.code() == ErrorCodes::CanRepairToDowngrade) {
                // Convert CanRepairToDowngrade statuses to MustUpgrade statuses to avoid logging a
                // potentially confusing and inaccurate message.
                //
                // TODO SERVER-24097: Log a message informing the user that they can start the
                // current version of mongod with --repair and then proceed with normal startup.
                status = {ErrorCodes::MustUpgrade, status.reason()};
            }
            severe() << "Unable to start mongod due to an incompatibility with the data files and"
                        " this version of mongod: "
                     << redact(status);
            severe() << "Please consult our documentation when trying to downgrade to a previous"
                        " major release";
            quickExit(EXIT_NEED_UPGRADE);
            MONGO_UNREACHABLE;
        }


        // If the server configuration collection already contains a valid
        // featureCompatibilityVersion document, cache it in-memory as a server parameter.
        if (dbName == "admin") {
            if (Collection* versionColl =
                    db->getCollection(opCtx, NamespaceString::kServerConfigurationNamespace)) {
                BSONObj featureCompatibilityVersion;
                if (Helpers::findOne(
                        opCtx,
                        versionColl,
                        BSON("_id" << FeatureCompatibilityVersionParser::kParameterName),
                        featureCompatibilityVersion)) {
                    auto swVersion =
                        FeatureCompatibilityVersionParser::parse(featureCompatibilityVersion);
                    // Note this error path captures all cases of an FCV document existing,
                    // but with any value other than "4.0" or "4.2". This includes unexpected
                    // cases with no path forward such as the FCV value not being a string.
                    uassert(ErrorCodes::MustDowngrade,
                            str::stream()
                                << "UPGRADE PROBLEM: Found an invalid "
                                   "featureCompatibilityVersion document (ERROR: "
                                << swVersion.getStatus()
                                << "). If the current featureCompatibilityVersion is below "
                                   "4.0, see the documentation on upgrading at "
                                << feature_compatibility_version_documentation::kUpgradeLink
                                << ".",
                            swVersion.isOK());

                    fcvDocumentExists = true;
                    auto version = swVersion.getValue();
                    serverGlobalParams.featureCompatibility.setVersion(version);
                    FeatureCompatibilityVersion::updateMinWireVersion();

                    // On startup, if the version is in an upgrading or downrading state, print a
                    // warning.
                    if (version ==
                        ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo42) {
                        log() << "** WARNING: A featureCompatibilityVersion upgrade did not "
                              << "complete. " << startupWarningsLog;
                        log() << "**          The current featureCompatibilityVersion is "
                              << FeatureCompatibilityVersionParser::toString(version) << "."
                              << startupWarningsLog;
                        log() << "**          To fix this, use the setFeatureCompatibilityVersion "
                              << "command to resume upgrade to 4.2." << startupWarningsLog;
                    } else if (version == ServerGlobalParams::FeatureCompatibility::Version::
                                              kDowngradingTo40) {
                        log() << "** WARNING: A featureCompatibilityVersion downgrade did not "
                              << "complete. " << startupWarningsLog;
                        log() << "**          The current featureCompatibilityVersion is "
                              << FeatureCompatibilityVersionParser::toString(version) << "."
                              << startupWarningsLog;
                        log() << "**          To fix this, use the setFeatureCompatibilityVersion "
                              << "command to resume downgrade to 4.0." << startupWarningsLog;
                    }
                }
            }
        }

        if (replSettings.usingReplSets()) {
            // We only care about _id indexes and drop-pending collections if we are in a replset.
            db->checkForIdIndexesAndDropPendingCollections(opCtx);
            // Ensure oplog is capped (mongodb does not guarantee order of inserts on noncapped
            // collections)
            if (db->name() == "local") {
                checkForCappedOplog(opCtx, db);
            }
        }

        if (!storageGlobalParams.readOnly &&
            (shouldClearNonLocalTmpCollections || dbName == "local")) {
            db->clearTmpCollections(opCtx);
        }
    }

    // Fail to start up if there is no featureCompatibilityVersion document and there are non-local
    // databases present.
    if (!fcvDocumentExists && nonLocalDatabases) {
        severe()
            << "Unable to start up mongod due to missing featureCompatibilityVersion document.";
        severe() << "Please run with --repair to restore the document.";
        fassertFailedNoTrace(40652);
    }

    LOG(1) << "done repairDatabases";
    return nonLocalDatabases;
}
Пример #14
0
    bool WriteBatchExecutor::applyWriteItem( const BatchItemRef& itemRef,
                                             WriteStats* stats,
                                             BSONObj* upsertedID,
                                             BatchedErrorDetail* error ) {
        const BatchedCommandRequest& request = *itemRef.getRequest();
        const string& ns = request.getNS();

        // Clear operation's LastError before starting.
        _le->reset( true );

        //uint64_t itemTimeMicros = 0;
        bool opSuccess = true;

        // Each write operation executes in its own PageFaultRetryableSection.  This means that
        // a single batch can throw multiple PageFaultException's, which is not the case for
        // other operations.
        PageFaultRetryableSection s;
        while ( true ) {
            try {
                // Execute the write item as a child operation of the current operation.
                CurOp childOp( _client, _client->curop() );

                HostAndPort remote =
                    _client->hasRemote() ? _client->getRemote() : HostAndPort( "0.0.0.0", 0 );

                // TODO Modify CurOp "wrapped" constructor to take an opcode, so calling .reset()
                // is unneeded
                childOp.reset( remote, getOpCode( request.getBatchType() ) );

                childOp.ensureStarted();
                OpDebug& opDebug = childOp.debug();
                opDebug.ns = ns;
                {
                    Lock::DBWrite dbLock( ns );
                    Client::Context ctx( ns,
                                         storageGlobalParams.dbpath, // TODO: better constructor?
                                         false /* don't check version here */);

                    opSuccess = doWrite( ns, itemRef, &childOp, stats, upsertedID, error );
                }
                childOp.done();
                //itemTimeMicros = childOp.totalTimeMicros();

                opDebug.executionTime = childOp.totalTimeMillis();
                opDebug.recordStats();

                // Log operation if running with at least "-v", or if exceeds slow threshold.
                if (logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1))
                     || opDebug.executionTime >
                        serverGlobalParams.slowMS + childOp.getExpectedLatencyMs()) {

                    MONGO_TLOG(1) << opDebug.report( childOp ) << endl;
                }

                // TODO Log operation if logLevel >= 3 and assertion thrown (as assembleResponse()
                // does).

                // Save operation to system.profile if shouldDBProfile().
                if ( childOp.shouldDBProfile( opDebug.executionTime ) ) {
                    profile( *_client, getOpCode( request.getBatchType() ), childOp );
                }
                break;
            }
            catch ( PageFaultException& e ) {
                e.touch();
            }
        }

        return opSuccess;
    }
Пример #15
0
/****************************************************************************
Desc:	This routine starts a transaction for the specified database.  The
		transaction may be part of an overall larger transaction.
****************************************************************************/
RCODE flmBeginDbTrans(
	FDB *			pDb,
	FLMUINT		uiTransType,
	FLMUINT		uiMaxLockWait,
	FLMUINT		uiFlags,
	FLMBYTE *	pucLogHdr)
{
	RCODE			rc = FERR_OK;
	FFILE *		pFile = pDb->pFile;
	FLMBOOL		bMutexLocked = FALSE;
	FLMBYTE *	pucLastCommittedLogHdr;
	DB_STATS *	pDbStats = pDb->pDbStats;

	if( RC_BAD( rc = flmCheckDatabaseState( pDb)))
	{
		goto Exit;
	}

	// Initialize a few things - as few as is necessary to avoid
	// unnecessary overhead.

	pDb->eAbortFuncId = FLM_UNKNOWN_FUNC;
	pDb->AbortRc = FERR_OK;
	pucLastCommittedLogHdr = &pFile->ucLastCommittedLogHdr [0];
	pDb->KrefCntrl.bKrefSetup = FALSE;
	pDb->uiTransType = uiTransType;
	pDb->uiThreadId = (FLMUINT)f_threadId();
	pDb->uiTransCount++;

	// Link the FDB to the file's most current FDICT structure,
	// if there is one.
	//
	// Also, if it is a read transaction, link the FDB
	// into the list of read transactions off of
	// the FFILE structure.

	f_mutexLock( gv_FlmSysData.hShareMutex);
	bMutexLocked = TRUE;
	if (pFile->pDictList)
	{

		// Link the FDB to the FDICT.

		flmLinkFdbToDict( pDb, pFile->pDictList);
	}

	// If it is a read transaction, link into the list of
	// read transactions off of the FFILE structure.  Until we
	// get the log header transaction ID below, we set uiCurrTransID
	// to zero and link this transaction in at the beginning of the
	// list.

	if (uiTransType == FLM_READ_TRANS)
	{
		flmGetLogHdrInfo( pucLastCommittedLogHdr, &pDb->LogHdr);

		// Link in at the end of the transaction list.

		pDb->pNextReadTrans = NULL;
		if ((pDb->pPrevReadTrans = pFile->pLastReadTrans) != NULL)
		{

			// Make sure transaction IDs are always in ascending order.  They
			// should be at this point.

			flmAssert( pFile->pLastReadTrans->LogHdr.uiCurrTransID <=
							pDb->LogHdr.uiCurrTransID);
			pFile->pLastReadTrans->pNextReadTrans = pDb;
		}
		else
		{
			pFile->pFirstReadTrans = pDb;
		}
		pFile->pLastReadTrans = pDb;
		pDb->uiInactiveTime = 0;

		if( uiFlags & FLM_DONT_KILL_TRANS)
		{
			pDb->uiFlags |= FDB_DONT_KILL_TRANS;
		}
		else
		{
			pDb->uiFlags &= ~FDB_DONT_KILL_TRANS;
		}
		
		if (pucLogHdr)
		{
			f_memcpy( pucLogHdr, &pDb->pFile->ucLastCommittedLogHdr[0],
						LOG_HEADER_SIZE);
		}
	}

	f_mutexUnlock( gv_FlmSysData.hShareMutex);
	bMutexLocked = FALSE;

	if( uiFlags & FLM_DONT_POISON_CACHE)
	{
		pDb->uiFlags |= FDB_DONT_POISON_CACHE;
	}
	else
	{
		pDb->uiFlags &= ~FDB_DONT_POISON_CACHE;
	}

	// Put an exclusive lock on the database if we are not in a read
	// transaction.  Read transactions require no lock.

	if (uiTransType != FLM_READ_TRANS)
	{
		flmAssert( pDb->pIxStats == NULL);

		// Set the bHadUpdOper to TRUE for all transactions to begin with.
		// Many calls to flmBeginDbTrans are internal, and we WANT the
		// normal behavior at the end of the transaction when it is
		// committed or aborted.  The only time this flag will be set
		// to FALSE is when the application starts the transaction as
		// opposed to an internal starting of the transaction.

		pDb->bHadUpdOper = TRUE;

		// Initialize the count of blocks changed to be 0

		pDb->uiBlkChangeCnt = 0;

		if (RC_BAD( rc = dbLock( pDb, uiMaxLockWait)))
		{
			goto Exit;
		}

		// If there was a problem with the RFL volume, we must wait
		// for a checkpoint to be completed before continuing.
		// The checkpoint thread looks at this same flag and forces
		// a checkpoint.  If it completes one successfully, it will
		// reset this flag.
		//
		// Also, if the last forced checkpoint had a problem
		// (pFile->CheckpointRc != FERR_OK), we don't want to
		// start up a new update transaction until it is resolved.

		if (!pFile->pRfl->seeIfRflVolumeOk() ||
			 RC_BAD( pFile->CheckpointRc))
		{
			rc = RC_SET( FERR_MUST_WAIT_CHECKPOINT);
			goto Exit;
		}

		// Set the first log block address to zero.

		pFile->uiFirstLogBlkAddress = 0;

		// Header must be read before opening roll forward log file to make
		// sure we have the most current log file and log options.

		f_memcpy( pFile->ucUncommittedLogHdr, pucLastCommittedLogHdr,
			LOG_HEADER_SIZE);
		flmGetLogHdrInfo( pucLastCommittedLogHdr, &pDb->LogHdr);

		// Need to increment the current checkpoint for update transactions
		// so that it will be correct when we go to mark cache blocks.

		if (pDb->uiFlags & FDB_REPLAYING_RFL)
		{
			// During recovery we need to set the transaction ID to the
			// transaction ID that was logged.

			pDb->LogHdr.uiCurrTransID = pFile->pRfl->getCurrTransID();
		}
		else
		{
			pDb->LogHdr.uiCurrTransID++;
		}
		f_mutexLock( gv_FlmSysData.hShareMutex);

		// Link FDB to the most current local dictionary, if there
		// is one.

		if (pFile->pDictList != pDb->pDict && pFile->pDictList)
		{
			flmLinkFdbToDict( pDb, pFile->pDictList);
		}
		pFile->uiUpdateTransID = pDb->LogHdr.uiCurrTransID;
		f_mutexUnlock( gv_FlmSysData.hShareMutex);

		// Set the transaction EOF to the current file EOF

		pDb->uiTransEOF = pDb->LogHdr.uiLogicalEOF;

		// Put the transaction ID into the uncommitted log header.

		UD2FBA( (FLMUINT32)pDb->LogHdr.uiCurrTransID,
					&pFile->ucUncommittedLogHdr [LOG_CURR_TRANS_ID]);

		if (pucLogHdr)
		{
			f_memcpy( pucLogHdr, &pDb->pFile->ucUncommittedLogHdr [0],
							LOG_HEADER_SIZE);
		}
	}

	if (pDbStats)
	{
		f_timeGetTimeStamp( &pDb->TransStartTime);
	}

	// If we do not have a dictionary, read it in from disk.
	// NOTE: This should only happen when we are first opening
	// the database.

	if (!pDb->pDict)
	{
		flmAssert( pDb->pFile->uiFlags & DBF_BEING_OPENED);
	
		if (RC_BAD( rc = fdictRebuild( pDb)))
		{
			if (pDb->pDict)
			{
				flmFreeDict( pDb->pDict);
				pDb->pDict = NULL;
			}
			
			goto Exit;
		}
	
		f_mutexLock( gv_FlmSysData.hShareMutex);
	
		// At this point, we will not yet have opened the database for
		// general use, so there is no way that any other thread can have
		// created a dictionary yet.
	
		flmAssert( pDb->pFile->pDictList == NULL);
	
		// Link the new local dictionary to its file structure.
	
		flmLinkDictToFile( pDb->pFile, pDb->pDict);
		f_mutexUnlock( gv_FlmSysData.hShareMutex);
	}

Exit:

	if( bMutexLocked)
	{
		f_mutexUnlock( gv_FlmSysData.hShareMutex);
	}

	if (uiTransType != FLM_READ_TRANS)
	{
		if (RC_OK( rc))
		{
			rc = pFile->pRfl->logBeginTransaction( pDb);
		}
#ifdef FLM_DBG_LOG
		flmDbgLogUpdate( pFile->uiFFileId, pDb->LogHdr.uiCurrTransID,
				0, 0, rc, "TBeg");
#endif
	}

	if( uiTransType == FLM_UPDATE_TRANS &&
		 gv_FlmSysData.UpdateEvents.pEventCBList)
	{
		flmTransEventCallback( F_EVENT_BEGIN_TRANS, (HFDB)pDb, rc,
					(FLMUINT)(RC_OK( rc)
								 ? pDb->LogHdr.uiCurrTransID
								 : (FLMUINT)0));
	}

	if (RC_BAD( rc))
	{
		// If there was an error, unlink the database from the transaction
		// structure as well as from the FDICT structure.

		flmUnlinkDbFromTrans( pDb, FALSE);

		if (pDb->pStats)
		{
			(void)flmStatUpdate( &gv_FlmSysData.Stats, &pDb->Stats);
		}
	}

	return( rc);
}