Beispiel #1
0
    // Returns false when request includes 'end'
    void assembleResponse( Message &m, DbResponse &dbresponse, const HostAndPort& remote ) {

        // before we lock...
        int op = m.operation();
        bool isCommand = false;
        const char *ns = m.singleData()->_data + 4;

        if ( op == dbQuery ) {
            if( strstr(ns, ".$cmd") ) {
                isCommand = true;
                opwrite(m);
                if( strstr(ns, ".$cmd.sys.") ) {
                    if( strstr(ns, "$cmd.sys.inprog") ) {
                        inProgCmd(m, dbresponse);
                        return;
                    }
                    if( strstr(ns, "$cmd.sys.killop") ) {
                        killOp(m, dbresponse);
                        return;
                    }
                    if( strstr(ns, "$cmd.sys.unlock") ) {
                        unlockFsync(ns, m, dbresponse);
                        return;
                    }
                }
            }
            else {
                opread(m);
            }
        }
        else if( op == dbGetMore ) {
            opread(m);
        }
        else {
            opwrite(m);
        }

        globalOpCounters.gotOp( op , isCommand );

        Client& c = cc();
        if ( c.getAuthenticationInfo() )
            c.getAuthenticationInfo()->startRequest();
        
        auto_ptr<CurOp> nestedOp;
        CurOp* currentOpP = c.curop();
        if ( currentOpP->active() ) {
            nestedOp.reset( new CurOp( &c , currentOpP ) );
            currentOpP = nestedOp.get();
        }
        else {
            c.newTopLevelRequest();
        }

        CurOp& currentOp = *currentOpP;
        currentOp.reset(remote,op);

        OpDebug& debug = currentOp.debug();
        debug.op = op;

        long long logThreshold = cmdLine.slowMS;
        bool shouldLog = logLevel >= 1;

        if ( op == dbQuery ) {
            if ( handlePossibleShardedMessage( m , &dbresponse ) )
                return;
            receivedQuery(c , dbresponse, m );
        }
        else if ( op == dbGetMore ) {
            if ( ! receivedGetMore(dbresponse, m, currentOp) )
                shouldLog = true;
        }
        else if ( op == dbMsg ) {
            // deprecated - replaced by commands
            char *p = m.singleData()->_data;
            int len = strlen(p);
            if ( len > 400 )
                out() << curTimeMillis64() % 10000 <<
                      " long msg received, len:" << len << endl;

            Message *resp = new Message();
            if ( strcmp( "end" , p ) == 0 )
                resp->setData( opReply , "dbMsg end no longer supported" );
            else
                resp->setData( opReply , "i am fine - dbMsg deprecated");

            dbresponse.response = resp;
            dbresponse.responseTo = m.header()->id;
        }
        else {
            try {
                const NamespaceString nsString( ns );

                // The following operations all require authorization.
                // dbInsert, dbUpdate and dbDelete can be easily pre-authorized,
                // here, but dbKillCursors cannot.
                if ( op == dbKillCursors ) {
                    currentOp.ensureStarted();
                    logThreshold = 10;
                    receivedKillCursors(m);
                }
                else if ( !nsString.isValid() ) {
                    // Only killCursors doesn't care about namespaces
                    uassert( 16257, str::stream() << "Invalid ns [" << ns << "]", false );
                }
                else if ( ! c.getAuthenticationInfo()->isAuthorized(
                                  nsToDatabase( m.singleData()->_data + 4 ) ) ) {
                    setLastError(0, "unauthorized");
                }
                else if ( op == dbInsert ) {
                    receivedInsert(m, currentOp);
                }
                else if ( op == dbUpdate ) {
                    receivedUpdate(m, currentOp);
                }
                else if ( op == dbDelete ) {
                    receivedDelete(m, currentOp);
                }
                else {
                    mongo::log() << "    operation isn't supported: " << op << endl;
                    currentOp.done();
                    shouldLog = true;
                }
            }
            catch ( UserException& ue ) {
                tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing "
                        << ue.toString() << endl;
                debug.exceptionInfo = ue.getInfo();
            }
            catch ( AssertionException& e ) {
                tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing "
                        << e.toString() << endl;
                debug.exceptionInfo = e.getInfo();
                shouldLog = true;
            }
        }
        currentOp.ensureStarted();
        currentOp.done();
        debug.executionTime = currentOp.totalTimeMillis();

        logThreshold += currentOp.getExpectedLatencyMs();

        if ( shouldLog || debug.executionTime > logThreshold ) {
            mongo::tlog() << debug.report( currentOp ) << endl;
        }

        if ( currentOp.shouldDBProfile( debug.executionTime ) ) {
            // performance profiling is on
            if ( Lock::isReadLocked() ) {
                mongo::log(1) << "note: not profiling because recursive read lock" << endl;
            }
            else if ( lockedForWriting() ) {
                mongo::log(1) << "note: not profiling because doing fsync+lock" << endl;
            }
            else {
                Lock::DBWrite lk( currentOp.getNS() );
                if ( dbHolder()._isLoaded( nsToDatabase( currentOp.getNS() ) , dbpath ) ) {
                    Client::Context cx( currentOp.getNS(), dbpath, false );
                    profile(c , currentOp );
                }
                else {
                    mongo::log() << "note: not profiling because db went away - probably a close on: " << currentOp.getNS() << endl;
                }
            }
        }
        
        debug.reset();
    } /* assembleResponse() */
Beispiel #2
0
// Returns false when request includes 'end'
void assembleResponse( Message &m, DbResponse &dbresponse, const HostAndPort& remote ) {

    // before we lock...
    int op = m.operation();
    bool isCommand = false;
    const char *ns = m.singleData()->_data + 4;
    if ( op == dbQuery ) {
        if( strstr(ns, ".$cmd") ) {
            isCommand = true;
            opwrite(m);
            if( strstr(ns, ".$cmd.sys.") ) {
                if( strstr(ns, "$cmd.sys.inprog") ) {
                    inProgCmd(m, dbresponse);
                    return;
                }
                if( strstr(ns, "$cmd.sys.killop") ) {
                    killOp(m, dbresponse);
                    return;
                }
                if( strstr(ns, "$cmd.sys.unlock") ) {
                    unlockFsync(ns, m, dbresponse);
                    return;
                }
            }
        }
        else {
            opread(m);
        }
    }
    else if( op == dbGetMore ) {
        opread(m);
    }
    else {
        opwrite(m);
    }

    globalOpCounters.gotOp( op , isCommand );

    Client& c = cc();

    auto_ptr<CurOp> nestedOp;
    CurOp* currentOpP = c.curop();
    if ( currentOpP->active() ) {
        nestedOp.reset( new CurOp( &c , currentOpP ) );
        currentOpP = nestedOp.get();
    }
    CurOp& currentOp = *currentOpP;
    currentOp.reset(remote,op);

    OpDebug& debug = currentOp.debug();
    StringBuilder& ss = debug.str;
    ss << opToString( op ) << " ";

    int logThreshold = cmdLine.slowMS;
    bool log = logLevel >= 1;

    if ( op == dbQuery ) {
        if ( handlePossibleShardedMessage( m , &dbresponse ) )
            return;
        receivedQuery(c , dbresponse, m );
    }
    else if ( op == dbGetMore ) {
        if ( ! receivedGetMore(dbresponse, m, currentOp) )
            log = true;
    }
    else if ( op == dbMsg ) {
        // deprecated - replaced by commands
        char *p = m.singleData()->_data;
        int len = strlen(p);
        if ( len > 400 )
            out() << curTimeMillis() % 10000 <<
                  " long msg received, len:" << len << endl;

        Message *resp = new Message();
        if ( strcmp( "end" , p ) == 0 )
            resp->setData( opReply , "dbMsg end no longer supported" );
        else
            resp->setData( opReply , "i am fine - dbMsg deprecated");

        dbresponse.response = resp;
        dbresponse.responseTo = m.header()->id;
    }
    else {
        const char *ns = m.singleData()->_data + 4;
        char cl[256];
        nsToDatabase(ns, cl);
        if( ! c.getAuthenticationInfo()->isAuthorized(cl) ) {
            uassert_nothrow("unauthorized");
        }
        else {
            try {
                if ( op == dbInsert ) {
                    receivedInsert(m, currentOp);
                }
                else if ( op == dbUpdate ) {
                    receivedUpdate(m, currentOp);
                }
                else if ( op == dbDelete ) {
                    receivedDelete(m, currentOp);
                }
                else if ( op == dbKillCursors ) {
                    currentOp.ensureStarted();
                    logThreshold = 10;
                    ss << "killcursors ";
                    receivedKillCursors(m);
                }
                else {
                    mongo::log() << "    operation isn't supported: " << op << endl;
                    currentOp.done();
                    log = true;
                }
            }
            catch ( UserException& ue ) {
                tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing " << ue.toString() << endl;
                ss << " exception " << ue.toString();
            }
            catch ( AssertionException& e ) {
                tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing " << e.toString() << endl;
                ss << " exception " << e.toString();
                log = true;
            }
        }
    }
    currentOp.ensureStarted();
    currentOp.done();
    int ms = currentOp.totalTimeMillis();

    //DEV log = true;
    if ( log || ms > logThreshold ) {
        if( logLevel < 3 && op == dbGetMore && strstr(ns, ".oplog.") && ms < 3000 && !log ) {
            /* it's normal for getMore on the oplog to be slow because of use of awaitdata flag. */
        }
        else {
            ss << ' ' << ms << "ms";
            mongo::tlog() << ss.str() << endl;
        }
    }

    if ( currentOp.shouldDBProfile( ms ) ) {
        // performance profiling is on
        if ( dbMutex.getState() < 0 ) {
            mongo::log(1) << "note: not profiling because recursive read lock" << endl;
        }
        else {
            writelock lk;
            if ( dbHolder.isLoaded( nsToDatabase( currentOp.getNS() ) , dbpath ) ) {
                Client::Context cx( currentOp.getNS() );
                profile(c , currentOp, ms);
            }
            else {
                mongo::log() << "note: not profiling because db went away - probably a close on: " << currentOp.getNS() << endl;
            }
        }
    }

} /* assembleResponse() */
Beispiel #3
0
        virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
            string source = cmdObj.getStringField( name.c_str() );
            string target = cmdObj.getStringField( "to" );
            uassert(15967,
                    "invalid collection name: " + target,
                    NamespaceString::validCollectionComponent(target.c_str()));
            if ( source.empty() || target.empty() ) {
                errmsg = "invalid command syntax";
                return false;
            }

            string sourceDB = nsToDatabase(source);
            string targetDB = nsToDatabase(target);
            string databaseName = sourceDB;
            databaseName += ".system.indexes";

            int longestIndexNameLength = 0;
            vector<BSONObj> oldIndSpec = Helpers::findAll(databaseName, BSON("ns" << source));
            for (size_t i = 0; i < oldIndSpec.size(); ++i) {
                int thisLength = oldIndSpec[i].getField("name").valuesize();
                if (thisLength > longestIndexNameLength) {
                     longestIndexNameLength = thisLength;
                }
            }
            unsigned int longestAllowed = maxNamespaceLen - longestIndexNameLength - 1;
            if (target.size() > longestAllowed) {
                StringBuilder sb;
                sb << "collection name length of " << target.size()
                << " exceeds maximum length of " << longestAllowed
                << ", allowing for index names";
                uasserted(16451, sb.str());
            }

            bool capped = false;
            long long size = 0;
            std::vector<BSONObj> indexesInProg;
            {
                Client::Context ctx( source );
                NamespaceDetails *nsd = nsdetails( source );
                uassert( 10026 ,  "source namespace does not exist", nsd );
                indexesInProg = stopIndexBuilds(dbname, cmdObj);
                capped = nsd->isCapped();
                if ( capped )
                    for( DiskLoc i = nsd->firstExtent(); !i.isNull(); i = i.ext()->xnext )
                        size += i.ext()->length;
            }

            Client::Context ctx( target );

            if ( nsdetails( target ) ) {
                uassert( 10027 ,  "target namespace exists", cmdObj["dropTarget"].trueValue() );

                Status s = cc().database()->dropCollection( target );
                if ( !s.isOK() ) {
                    errmsg = s.toString();
                    return false;
                }

            }


            // if we are renaming in the same database, just
            // rename the namespace and we're done.
            {
                if ( sourceDB == targetDB ) {
                    Status s = ctx.db()->renameCollection( source, target, cmdObj["stayTemp"].trueValue() );
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        return false;
                    }
                    return true;
                }
            }

            // renaming across databases, so we must copy all
            // the data and then remove the source collection.
            BSONObjBuilder spec;
            if ( capped ) {
                spec.appendBool( "capped", true );
                spec.append( "size", double( size ) );
            }
            if ( !userCreateNS( target.c_str(), spec.done(), errmsg, false ) )
                return false;

            auto_ptr< DBClientCursor > c;
            DBDirectClient bridge;

            {
                c = bridge.query( source, BSONObj(), 0, 0, 0, fromRepl ? QueryOption_SlaveOk : 0 );
            }
            while( 1 ) {
                {
                    if ( !c->more() )
                        break;
                }
                BSONObj o = c->next();
                theDataFileMgr.insertWithObjMod( target.c_str(), o );
            }

            string sourceIndexes = nsToDatabase( source ) + ".system.indexes";
            string targetIndexes = nsToDatabase( target ) + ".system.indexes";
            {
                c = bridge.query( sourceIndexes, QUERY( "ns" << source ), 0, 0, 0, fromRepl ? QueryOption_SlaveOk : 0 );
            }
            while( 1 ) {
                {
                    if ( !c->more() )
                        break;
                }
                BSONObj o = c->next();
                BSONObjBuilder b;
                BSONObjIterator i( o );
                while( i.moreWithEOO() ) {
                    BSONElement e = i.next();
                    if ( e.eoo() )
                        break;
                    if ( strcmp( e.fieldName(), "ns" ) == 0 ) {
                        b.append( "ns", target );
                    }
                    else {
                        b.append( e );
                    }
                }
                BSONObj n = b.done();
                theDataFileMgr.insertWithObjMod( targetIndexes.c_str(), n );
            }

            {
                Client::Context ctx( source );
                Status s = ctx.db()->dropCollection( source );
                if ( !s.isOK() ) {
                    errmsg = s.toString();
                    return false;
                }
                IndexBuilder::restoreIndexes(targetIndexes, indexesInProg);
            }
            return true;
        }
    Status repairDatabase( string dbName,
                           bool preserveClonedFilesOnFailure,
                           bool backupOriginalFiles ) {
        scoped_ptr<RepairFileDeleter> repairFileDeleter;
        doingRepair dr;
        dbName = nsToDatabase( dbName );

        log() << "repairDatabase " << dbName << endl;

        invariant( cc().database()->name() == dbName );
        invariant( cc().database()->path() == storageGlobalParams.dbpath );

        BackgroundOperation::assertNoBgOpInProgForDb(dbName);

        getDur().syncDataAndTruncateJournal(); // Must be done before and after repair

        intmax_t totalSize = dbSize( dbName );
        intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);

        if ( freeSize > -1 && freeSize < totalSize ) {
            return Status( ErrorCodes::OutOfDiskSpace,
                           str::stream() << "Cannot repair database " << dbName
                           << " having size: " << totalSize
                           << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
        }

        killCurrentOp.checkForInterrupt();

        Path reservedPath =
            uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
                                "backup" : "_tmp" );
        MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) );
        string reservedPathString = reservedPath.string();

        if ( !preserveClonedFilesOnFailure )
            repairFileDeleter.reset( new RepairFileDeleter( dbName,
                                                            reservedPathString,
                                                            reservedPath ) );

        {
            Database* originalDatabase = dbHolder().get( dbName, storageGlobalParams.dbpath );
            if ( originalDatabase == NULL )
                return Status( ErrorCodes::NamespaceNotFound, "database does not exist to repair" );

            Database* tempDatabase = NULL;
            {
                bool justCreated = false;
                tempDatabase = dbHolderW().getOrCreate( dbName, reservedPathString, justCreated );
                invariant( justCreated );
            }

            map<string,CollectionOptions> namespacesToCopy;
            {
                string ns = dbName + ".system.namespaces";
                Client::Context ctx( ns );
                Collection* coll = originalDatabase->getCollection( ns );
                if ( coll ) {
                    scoped_ptr<CollectionIterator> it( coll->getIterator( DiskLoc(),
                                                                          false,
                                                                          CollectionScanParams::FORWARD ) );
                    while ( !it->isEOF() ) {
                        DiskLoc loc = it->getNext();
                        BSONObj obj = coll->docFor( loc );

                        string ns = obj["name"].String();

                        NamespaceString nss( ns );
                        if ( nss.isSystem() ) {
                            if ( nss.isSystemDotIndexes() )
                                continue;
                            if ( nss.coll() == "system.namespaces" )
                                continue;
                        }

                        if ( !nss.isNormal() )
                            continue;

                        CollectionOptions options;
                        if ( obj["options"].isABSONObj() ) {
                            Status status = options.parse( obj["options"].Obj() );
                            if ( !status.isOK() )
                                return status;
                        }
                        namespacesToCopy[ns] = options;
                    }
                }
            }

            for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
                  i != namespacesToCopy.end();
                  ++i ) {
                string ns = i->first;
                CollectionOptions options = i->second;

                Collection* tempCollection = NULL;
                {
                    Client::Context tempContext( ns, tempDatabase );
                    tempCollection = tempDatabase->createCollection( ns, options, true, false );
                }

                Client::Context readContext( ns, originalDatabase );
                Collection* originalCollection = originalDatabase->getCollection( ns );
                invariant( originalCollection );

                // data

                MultiIndexBlock indexBlock( tempCollection );
                {
                    vector<BSONObj> indexes;
                    IndexCatalog::IndexIterator ii =
                        originalCollection->getIndexCatalog()->getIndexIterator( false );
                    while ( ii.more() ) {
                        IndexDescriptor* desc = ii.next();
                        indexes.push_back( desc->infoObj() );
                    }

                    Client::Context tempContext( ns, tempDatabase );
                    Status status = indexBlock.init( indexes );
                    if ( !status.isOK() )
                        return status;

                }

                scoped_ptr<CollectionIterator> iterator( originalCollection->getIterator( DiskLoc(),
                                                                                          false,
                                                                                          CollectionScanParams::FORWARD ) );
                while ( !iterator->isEOF() ) {
                    DiskLoc loc = iterator->getNext();
                    invariant( !loc.isNull() );

                    BSONObj doc = originalCollection->docFor( loc );

                    Client::Context tempContext( ns, tempDatabase );
                    StatusWith<DiskLoc> result = tempCollection->insertDocument( doc, indexBlock );
                    if ( !result.isOK() )
                        return result.getStatus();

                    getDur().commitIfNeeded();
                    killCurrentOp.checkForInterrupt(false);
                }

                {
                    Client::Context tempContext( ns, tempDatabase );
                    Status status = indexBlock.commit();
                    if ( !status.isOK() )
                        return status;
                }

            }

            getDur().syncDataAndTruncateJournal();
            MongoFile::flushAll(true); // need both in case journaling is disabled

            killCurrentOp.checkForInterrupt(false);

            Client::Context tempContext( dbName, reservedPathString );
            Database::closeDatabase( dbName, reservedPathString );
        }

        // at this point if we abort, we don't want to delete new files
        // as they might be the only copies

        if ( repairFileDeleter.get() )
            repairFileDeleter->success();

        Client::Context ctx( dbName );
        Database::closeDatabase(dbName, storageGlobalParams.dbpath);

        if ( backupOriginalFiles ) {
            _renameForBackup( dbName, reservedPath );
        }
        else {
            // first make new directory before deleting data
            Path newDir = Path(storageGlobalParams.dbpath) / dbName;
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));

            // this deletes old files
            _deleteDataFiles( dbName );

            if ( !boost::filesystem::exists(newDir) ) {
                // we deleted because of directoryperdb
                // re-create
                MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
            }
        }

        _replaceWithRecovered( dbName, reservedPathString.c_str() );

        if ( !backupOriginalFiles )
            MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) );

        return Status::OK();
    }
Beispiel #5
0
    void Command::execCommandClientBasic(Command * c ,
                                         ClientBasic& client,
                                         int queryOptions,
                                         const char *ns,
                                         BSONObj& cmdObj,
                                         BSONObjBuilder& result,
                                         bool fromRepl ) {
        verify(c);

        std::string dbname = nsToDatabase(ns);

        // Access control checks
        if (AuthorizationManager::isAuthEnabled()) {
            std::vector<Privilege> privileges;
            c->addRequiredPrivileges(dbname, cmdObj, &privileges);
            AuthorizationSession* authSession = client.getAuthorizationSession();
            if (!authSession->checkAuthForPrivileges(privileges).isOK()) {
                result.append("note", str::stream() << "not authorized for command: " <<
                                    c->name << " on database " << dbname);
                appendCommandStatus(result, false, "unauthorized");
                return;
            }
        }
        if (c->adminOnly() &&
                c->localHostOnlyIfNoAuth(cmdObj) &&
                !AuthorizationManager::isAuthEnabled() &&
                !client.getIsLocalHostConnection()) {
            log() << "command denied: " << cmdObj.toString() << endl;
            appendCommandStatus(result,
                               false,
                               "unauthorized: this command must run from localhost when running db "
                               "without auth");
            return;
        }
        if (c->adminOnly() && !startsWith(ns, "admin.")) {
            log() << "command denied: " << cmdObj.toString() << endl;
            appendCommandStatus(result, false, "access denied - use admin db");
            return;
        }
        // End of access control checks

        if (cmdObj.getBoolField("help")) {
            stringstream help;
            help << "help for: " << c->name << " ";
            c->help( help );
            result.append( "help" , help.str() );
            result.append( "lockType" , c->locktype() );
            appendCommandStatus(result, true, "");
            return;
        }
        std::string errmsg;
        bool ok;
        try {
            ok = c->run( dbname , cmdObj, queryOptions, errmsg, result, false );
        }
        catch (DBException& e) {
            ok = false;
            int code = e.getCode();
            if (code == RecvStaleConfigCode) { // code for StaleConfigException
                throw;
            }

            stringstream ss;
            ss << "exception: " << e.what();
            errmsg = ss.str();
            result.append( "code" , code );
        }

        appendCommandStatus(result, ok, errmsg);
    }
Status CatalogManagerReplicaSet::shardCollection(OperationContext* txn,
                                                 const string& ns,
                                                 const ShardKeyPattern& fieldsAndOrder,
                                                 bool unique,
                                                 const vector<BSONObj>& initPoints,
                                                 const set<ShardId>& initShardIds) {
    // Lock the collection globally so that no other mongos can try to shard or drop the collection
    // at the same time.
    auto scopedDistLock = getDistLockManager()->lock(ns, "shardCollection");
    if (!scopedDistLock.isOK()) {
        return scopedDistLock.getStatus();
    }

    auto status = getDatabase(txn, nsToDatabase(ns));
    if (!status.isOK()) {
        return status.getStatus();
    }

    ShardId dbPrimaryShardId = status.getValue().value.getPrimary();
    const auto primaryShard = grid.shardRegistry()->getShard(txn, dbPrimaryShardId);

    {
        // In 3.0 and prior we include this extra safety check that the collection is not getting
        // sharded concurrently by two different mongos instances. It is not 100%-proof, but it
        // reduces the chance that two invocations of shard collection will step on each other's
        // toes.  Now we take the distributed lock so going forward this check won't be necessary
        // but we leave it around for compatibility with other mongoses from 3.0.
        // TODO(spencer): Remove this after 3.2 ships.
        auto countStatus = _runCountCommandOnConfig(
            txn, NamespaceString(ChunkType::ConfigNS), BSON(ChunkType::ns(ns)));
        if (!countStatus.isOK()) {
            return countStatus.getStatus();
        }
        if (countStatus.getValue() > 0) {
            return Status(ErrorCodes::AlreadyInitialized,
                          str::stream() << "collection " << ns << " already sharded with "
                                        << countStatus.getValue() << " chunks.");
        }
    }

    // Record start in changelog
    {
        BSONObjBuilder collectionDetail;
        collectionDetail.append("shardKey", fieldsAndOrder.toBSON());
        collectionDetail.append("collection", ns);
        collectionDetail.append("primary", primaryShard->toString());

        {
            BSONArrayBuilder initialShards(collectionDetail.subarrayStart("initShards"));
            for (const ShardId& shardId : initShardIds) {
                initialShards.append(shardId);
            }
        }

        collectionDetail.append("numChunks", static_cast<int>(initPoints.size() + 1));

        logChange(txn,
                  txn->getClient()->clientAddress(true),
                  "shardCollection.start",
                  ns,
                  collectionDetail.obj());
    }

    shared_ptr<ChunkManager> manager(new ChunkManager(ns, fieldsAndOrder, unique));
    manager->createFirstChunks(txn, dbPrimaryShardId, &initPoints, &initShardIds);
    manager->loadExistingRanges(txn, nullptr);

    CollectionInfo collInfo;
    collInfo.useChunkManager(manager);
    collInfo.save(txn, ns);
    manager->reload(txn, true);

    // Tell the primary mongod to refresh its data
    // TODO:  Think the real fix here is for mongos to just
    //        assume that all collections are sharded, when we get there
    SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist(
        grid.shardRegistry()->getConfigServerConnectionString(),
        dbPrimaryShardId,
        primaryShard->getConnString(),
        NamespaceString(ns),
        manager->getVersion(),
        true);

    auto ssvStatus = grid.shardRegistry()->runCommandWithNotMasterRetries(
        txn, dbPrimaryShardId, "admin", ssv.toBSON());
    if (!ssvStatus.isOK()) {
        warning() << "could not update initial version of " << ns << " on shard primary "
                  << dbPrimaryShardId << ssvStatus.getStatus();
    }

    logChange(txn,
              txn->getClient()->clientAddress(true),
              "shardCollection",
              ns,
              BSON("version" << manager->getVersion().toString()));

    return Status::OK();
}
Beispiel #7
0
        void operator()(DBClientCursorBatchIterator &i) {
            const string to_dbname = nsToDatabase(to_collection);
            while (i.moreInCurrentBatch()) {
                if (n % 128 == 127) {
                    time_t now = time(0);
                    if (now - lastLog >= 60) { 
                        // report progress
                        if (lastLog) {
                            log() << "clone " << to_collection << ' ' << n << endl;
                        }
                        lastLog = now;
                    }
                    mayInterrupt(_mayBeInterrupted);
                }

                BSONObj js = i.nextSafe();
                ++n;

                if (isindex) {
                    verify(nsToCollectionSubstring(from_collection) == "system.indexes");
                    storedForLater->push_back(fixindex(js, to_dbname).getOwned());
                }
                else {
                    try {
                        LOCK_REASON(lockReason, "cloner: copying documents into local collection");
                        Client::ReadContext ctx(to_collection, lockReason);
                        if (_isCapped) {
                            Collection *cl = getCollection(to_collection);
                            verify(cl->isCapped());
                            BSONObj pk = js["$_"].Obj();
                            BSONObjBuilder rowBuilder;                        
                            BSONObjIterator it(js);
                            while (it.moreWithEOO()) {
                                BSONElement e = it.next();
                                if (e.eoo()) {
                                    break;
                                }
                                if (!mongoutils::str::equals(e.fieldName(), "$_")) {
                                    rowBuilder.append(e);
                                }
                            }
                            BSONObj row = rowBuilder.obj();
                            CappedCollection *cappedCl = cl->as<CappedCollection>();
                            bool indexBitChanged = false;
                            cappedCl->insertObjectWithPK(pk, row, Collection::NO_LOCKTREE, &indexBitChanged);
                            // Hack copied from Collection::insertObject. TODO: find a better way to do this                        
                            if (indexBitChanged) {
                                cl->noteMultiKeyChanged();
                            }
                        }
                        else {
                            insertObject(to_collection, js, 0, logForRepl);
                        }
                    }
                    catch (UserException& e) {
                        error() << "error: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n';
                        throw;
                    }

                    RARELY if ( time( 0 ) - saveLast > 60 ) {
                        log() << n << " objects cloned so far from collection " << from_collection << endl;
                        saveLast = time( 0 );
                    }
                }
            }
        }
Beispiel #8
0
    /* Prepare to build an index.  Does not actually build it (except for a special _id case).
       - We validate that the params are good
       - That the index does not already exist
       - Creates the source collection if it DNE

       example of 'io':
         { ns : 'test.foo', name : 'z', key : { z : 1 } }

       throws DBException

       @param sourceNS - source NS we are indexing
       @param sourceCollection - its details ptr
       @return true if ok to continue.  when false we stop/fail silently (index already exists)
    */
    bool prepareToBuildIndex(const BSONObj& io, bool god, string& sourceNS, NamespaceDetails *&sourceCollection, BSONObj& fixedIndexObject ) {
        sourceCollection = 0;

        // logical name of the index.  todo: get rid of the name, we don't need it!
        const char *name = io.getStringField("name");
        uassert(12523, "no index name specified", *name);

        // the collection for which we are building an index
        sourceNS = io.getStringField("ns");
        uassert(10096, "invalid ns to index", sourceNS.find( '.' ) != string::npos);
        uassert(10097, "bad table to index name on add index attempt",
                cc().database()->name == nsToDatabase(sourceNS.c_str()));


        BSONObj key = io.getObjectField("key");
        uassert(12524, "index key pattern too large", key.objsize() <= 2048);
        if( !validKeyPattern(key) ) {
            string s = string("bad index key pattern ") + key.toString();
            uasserted(10098 , s.c_str());
        }

        if ( sourceNS.empty() || key.isEmpty() ) {
            log(2) << "bad add index attempt name:" << (name?name:"") << "\n  ns:" <<
                   sourceNS << "\n  idxobj:" << io.toString() << endl;
            string s = "bad add index attempt " + sourceNS + " key:" + key.toString();
            uasserted(12504, s);
        }

        sourceCollection = nsdetails(sourceNS.c_str());
        if( sourceCollection == 0 ) {
            // try to create it
            string err;
            if ( !userCreateNS(sourceNS.c_str(), BSONObj(), err, false) ) {
                problem() << "ERROR: failed to create collection while adding its index. " << sourceNS << endl;
                return false;
            }
            sourceCollection = nsdetails(sourceNS.c_str());
            tlog() << "info: creating collection " << sourceNS << " on add index" << endl;
            assert( sourceCollection );
        }

        if ( sourceCollection->findIndexByName(name) >= 0 ) {
            // index already exists.
            return false;
        }
        if( sourceCollection->findIndexByKeyPattern(key) >= 0 ) {
            log(2) << "index already exists with diff name " << name << ' ' << key.toString() << endl;
            return false;
        }

        if ( sourceCollection->nIndexes >= NamespaceDetails::NIndexesMax ) {
            stringstream ss;
            ss << "add index fails, too many indexes for " << sourceNS << " key:" << key.toString();
            string s = ss.str();
            log() << s << '\n';
            uasserted(12505,s);
        }

        /* we can't build a new index for the ns if a build is already in progress in the background -
           EVEN IF this is a foreground build.
           */
        uassert(12588, "cannot add index with a background operation in progress",
                !BackgroundOperation::inProgForNs(sourceNS.c_str()));

        /* this is because we want key patterns like { _id : 1 } and { _id : <someobjid> } to
           all be treated as the same pattern.
        */
        if ( IndexDetails::isIdIndexPattern(key) ) {
            if( !god ) {
                ensureHaveIdIndex( sourceNS.c_str() );
                return false;
            }
        }
        else {
            /* is buildIndexes:false set for this replica set member?
               if so we don't build any indexes except _id
            */
            if( theReplSet && !theReplSet->buildIndexes() )
                return false;
        }

        string pluginName = IndexPlugin::findPluginName( key );
        IndexPlugin * plugin = pluginName.size() ? IndexPlugin::get( pluginName ) : 0;

        if ( plugin ) {
            fixedIndexObject = plugin->adjustIndexSpec( io );
        }
        else if ( io["v"].eoo() ) {
            // add "v" if it doesn't exist
            // if it does - leave whatever value was there
            // this is for testing and replication
            BSONObjBuilder b( io.objsize() + 32 );
            b.appendElements( io );
            b.append( "v" , 0 );
            fixedIndexObject = b.obj();
        }

        return true;
    }
Beispiel #9
0
    /* Prepare to build an index.  Does not actually build it (except for a special _id case).
       - We validate that the params are good
       - That the index does not already exist
       - Creates the source collection if it DNE

       example of 'io':
         { ns : 'test.foo', name : 'z', key : { z : 1 } }

       throws DBException

       @param sourceNS - source NS we are indexing
       @param sourceCollection - its details ptr
       @return true if ok to continue.  when false we stop/fail silently (index already exists)
    */
    bool prepareToBuildIndex(const BSONObj& io, bool god, string& sourceNS, NamespaceDetails *&sourceCollection, BSONObj& fixedIndexObject ) {
        sourceCollection = 0;

        // logical name of the index.  todo: get rid of the name, we don't need it!
        const char *name = io.getStringField("name");
        uassert(12523, "no index name specified", *name);

        // the collection for which we are building an index
        sourceNS = io.getStringField("ns");
        uassert(10096, "invalid ns to index", sourceNS.find( '.' ) != string::npos);
        massert(10097, str::stream() << "bad table to index name on add index attempt current db: " << cc().database()->name << "  source: " << sourceNS ,
                cc().database()->name == nsToDatabase(sourceNS.c_str()));

        BSONObj key = io.getObjectField("key");
        uassert(12524, "index key pattern too large", key.objsize() <= 2048);
        if( !validKeyPattern(key) ) {
            string s = string("bad index key pattern ") + key.toString();
            uasserted(10098 , s.c_str());
        }

        if ( sourceNS.empty() || key.isEmpty() ) {
            log(2) << "bad add index attempt name:" << (name?name:"") << "\n  ns:" <<
                   sourceNS << "\n  idxobj:" << io.toString() << endl;
            string s = "bad add index attempt " + sourceNS + " key:" + key.toString();
            uasserted(12504, s);
        }

        sourceCollection = nsdetails(sourceNS.c_str());
        if( sourceCollection == 0 ) {
            // try to create it
            string err;
            if ( !userCreateNS(sourceNS.c_str(), BSONObj(), err, false) ) {
                problem() << "ERROR: failed to create collection while adding its index. " << sourceNS << endl;
                return false;
            }
            sourceCollection = nsdetails(sourceNS.c_str());
            tlog() << "info: creating collection " << sourceNS << " on add index" << endl;
            verify( sourceCollection );
        }

        if ( sourceCollection->findIndexByName(name) >= 0 ) {
            // index already exists.
            return false;
        }
        if( sourceCollection->findIndexByKeyPattern(key) >= 0 ) {
            log(2) << "index already exists with diff name " << name << ' ' << key.toString() << endl;
            return false;
        }

        if ( sourceCollection->nIndexes >= NamespaceDetails::NIndexesMax ) {
            stringstream ss;
            ss << "add index fails, too many indexes for " << sourceNS << " key:" << key.toString();
            string s = ss.str();
            log() << s << '\n';
            uasserted(12505,s);
        }

        /* we can't build a new index for the ns if a build is already in progress in the background -
           EVEN IF this is a foreground build.
           */
        uassert(12588, "cannot add index with a background operation in progress",
                !BackgroundOperation::inProgForNs(sourceNS.c_str()));

        /* this is because we want key patterns like { _id : 1 } and { _id : <someobjid> } to
           all be treated as the same pattern.
        */
        if ( IndexDetails::isIdIndexPattern(key) ) {
            if( !god ) {
                ensureHaveIdIndex( sourceNS.c_str() );
                return false;
            }
        }
        else {
            /* is buildIndexes:false set for this replica set member?
               if so we don't build any indexes except _id
            */
            if( theReplSet && !theReplSet->buildIndexes() )
                return false;
        }

        string pluginName = IndexPlugin::findPluginName( key );
        IndexPlugin * plugin = pluginName.size() ? IndexPlugin::get( pluginName ) : 0;


        { 
            BSONObj o = io;
            if ( plugin ) {
                o = plugin->adjustIndexSpec(o);
            }
            BSONObjBuilder b;
            int v = DefaultIndexVersionNumber;
            if( !o["v"].eoo() ) {
                double vv = o["v"].Number();
                // note (one day) we may be able to fresh build less versions than we can use
                // isASupportedIndexVersionNumber() is what we can use
                uassert(14803, str::stream() << "this version of mongod cannot build new indexes of version number " << vv, 
                    vv == 0 || vv == 1);
                v = (int) vv;
            }
            // idea is to put things we use a lot earlier
            b.append("v", v);
            b.append(o["key"]);
            if( o["unique"].trueValue() )
                b.appendBool("unique", true); // normalize to bool true in case was int 1 or something...
            b.append(o["ns"]);

            {
                // stripping _id
                BSONObjIterator i(o);
                while ( i.more() ) {
                    BSONElement e = i.next();
                    string s = e.fieldName();
                    if( s != "_id" && s != "v" && s != "ns" && s != "unique" && s != "key" )
                        b.append(e);
                }
            }
        
            fixedIndexObject = b.obj();
        }

        return true;
    }
Beispiel #10
0
void BackgroundSync::_produce(OperationContext* txn, executor::TaskExecutor* taskExecutor) {
    // this oplog reader does not do a handshake because we don't want the server it's syncing
    // from to track how far it has synced
    {
        stdx::unique_lock<stdx::mutex> lock(_mutex);
        if (_lastOpTimeFetched.isNull()) {
            // then we're initial syncing and we're still waiting for this to be set
            lock.unlock();
            sleepsecs(1);
            // if there is no one to sync from
            return;
        }

        if (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary() ||
            inShutdownStrict()) {
            return;
        }
    }

    while (MONGO_FAIL_POINT(rsBgSyncProduce)) {
        sleepmillis(0);
    }


    // find a target to sync from the last optime fetched
    OpTime lastOpTimeFetched;
    {
        stdx::unique_lock<stdx::mutex> lock(_mutex);
        lastOpTimeFetched = _lastOpTimeFetched;
        _syncSourceHost = HostAndPort();
    }
    OplogReader syncSourceReader;
    syncSourceReader.connectToSyncSource(txn, lastOpTimeFetched, _replCoord);

    // no server found
    if (syncSourceReader.getHost().empty()) {
        sleepsecs(1);
        // if there is no one to sync from
        return;
    }

    long long lastHashFetched;
    {
        stdx::lock_guard<stdx::mutex> lock(_mutex);
        if (_pause) {
            return;
        }
        lastOpTimeFetched = _lastOpTimeFetched;
        lastHashFetched = _lastFetchedHash;
        _syncSourceHost = syncSourceReader.getHost();
        _replCoord->signalUpstreamUpdater();
    }

    const Milliseconds oplogSocketTimeout(OplogReader::kSocketTimeout);

    // Prefer host in oplog reader to _syncSourceHost because _syncSourceHost may be cleared
    // if sync source feedback fails.
    const HostAndPort source = syncSourceReader.getHost();
    syncSourceReader.resetConnection();
    // no more references to oplog reader from here on.

    // If this status is not OK after the fetcher returns from wait(),
    // proceed to execute rollback
    Status remoteOplogStartStatus = Status::OK();

    auto fetcherCallback = stdx::bind(&BackgroundSync::_fetcherCallback,
                                      this,
                                      stdx::placeholders::_1,
                                      stdx::placeholders::_3,
                                      stdx::cref(source),
                                      lastOpTimeFetched,
                                      lastHashFetched,
                                      &remoteOplogStartStatus);

    BSONObjBuilder cmdBob;
    cmdBob.append("find", nsToCollectionSubstring(rsOplogName));
    cmdBob.append("filter", BSON("ts" << BSON("$gte" << lastOpTimeFetched.getTimestamp())));
    cmdBob.append("tailable", true);
    cmdBob.append("oplogReplay", true);
    cmdBob.append("awaitData", true);
    cmdBob.append("maxTimeMS", durationCount<Milliseconds>(fetcherMaxTimeMS));

    BSONObjBuilder metadataBob;
    if (_replCoord->isV1ElectionProtocol()) {
        cmdBob.append("term", _replCoord->getTerm());
        metadataBob.append(rpc::kReplSetMetadataFieldName, 1);
    }

    auto cmdObj = cmdBob.obj();
    auto metadataObj = metadataBob.obj();
    Fetcher fetcher(
        taskExecutor, source, nsToDatabase(rsOplogName), cmdObj, fetcherCallback, metadataObj);
    auto scheduleStatus = fetcher.schedule();
    if (!scheduleStatus.isOK()) {
        warning() << "unable to schedule fetcher to read remote oplog on " << source << ": "
                  << scheduleStatus;
        return;
    }
    fetcher.wait();

    // If the background sync is paused after the fetcher is started, we need to
    // re-evaluate our sync source and oplog common point.
    if (isPaused()) {
        return;
    }

    // Execute rollback if necessary.
    // Rollback is a synchronous operation that uses the task executor and may not be
    // executed inside the fetcher callback.
    if (!remoteOplogStartStatus.isOK()) {
        const int messagingPortTags = 0;
        ConnectionPool connectionPool(messagingPortTags);
        std::unique_ptr<ConnectionPool::ConnectionPtr> connection;
        auto getConnection =
            [&connection, &connectionPool, oplogSocketTimeout, source]() -> DBClientBase* {
                if (!connection.get()) {
                    connection.reset(new ConnectionPool::ConnectionPtr(
                        &connectionPool, source, Date_t::now(), oplogSocketTimeout));
                };
                return connection->get();
            };

        log() << "starting rollback: " << remoteOplogStartStatus;
        _rollback(txn, source, getConnection);
        stop();
    }
}
Beispiel #11
0
    DiskLoc DataFileMgr::insert(const char* ns,
                                const void* obuf,
                                int32_t len,
                                bool mayInterrupt,
                                bool god,
                                bool mayAddIndex,
                                bool* addedID) {

        Database* database = cc().database();

        bool wouldAddIndex = false;
        massert( 10093 , "cannot insert into reserved $ collection", god || NamespaceString::normal( ns ) );
        uassert( 10094 , str::stream() << "invalid ns: " << ns , isValidNS( ns ) );
        {
            const char *sys = strstr(ns, "system.");
            if ( sys ) {

                if ( !insert_checkSys(sys, ns, wouldAddIndex, obuf, god) )
                    return DiskLoc();

                if ( mayAddIndex && wouldAddIndex ) {
                    // TODO: this should be handled above this function
                    BSONObj spec( static_cast<const char*>( obuf ) );
                    string collectionToIndex = spec.getStringField( "ns" );
                    uassert(10096, "invalid ns to index", collectionToIndex.find( '.' ) != string::npos);
                    massert(10097,
                            str::stream() << "trying to create index on wrong db "
                            << " db: " << database->name() << " collection: " << collectionToIndex,
                            database->ownsNS( collectionToIndex ) );

                    Collection* collection = database->getCollection( collectionToIndex );
                    if ( !collection ) {
                        collection = database->createCollection( collectionToIndex, false, NULL, true );
                        verify( collection );
                        if ( !god )
                            ensureIdIndexForNewNs( collection );
                    }

                    Status status = collection->getIndexCatalog()->createIndex( spec, mayInterrupt );
                    if ( status.code() == ErrorCodes::IndexAlreadyExists )
                        return DiskLoc();
                    uassertStatusOK( status );
                    return DiskLoc();
                }
            }
        }

        Collection* collection = database->getCollection( ns );
        if ( collection == NULL ) {
            collection = database->createCollection( ns, false, NULL, false );

            int ies = Extent::initialSize(len);
            if( str::contains(ns, '$') &&
                len + Record::HeaderSize >= BtreeData_V1::BucketSize - 256 &&
                len + Record::HeaderSize <= BtreeData_V1::BucketSize + 256 ) {
                // probably an index.  so we pick a value here for the first extent instead of using
                // initialExtentSize() which is more for user collections.
                // TODO: we could look at the # of records in the parent collection to be smarter here.
                ies = (32+4) * 1024;
            }
            collection->increaseStorageSize( ies, false);
            if ( !god )
                ensureIdIndexForNewNs( collection );
        }

        NamespaceDetails* d = collection->details();

        IDToInsert idToInsert; // only initialized if needed

        if( !god ) {
            /* Check if we have an _id field. If we don't, we'll add it.
               Note that btree buckets which we insert aren't BSONObj's, but in that case god==true.
            */
            BSONObj io((const char *) obuf);
            BSONElement idField = io.getField( "_id" );
            uassert( 10099 ,  "_id cannot be an array", idField.type() != Array );
            // we don't add _id for capped collections in local as they don't have an _id index
            if( idField.eoo() &&
                !wouldAddIndex &&
                nsToDatabase( ns ) != "local" &&
                d->haveIdIndex() ) {

                if( addedID )
                    *addedID = true;

                idToInsert.init();
                len += idToInsert.size();
            }

            BSONElementManipulator::lookForTimestamps( io );
        }

        int lenWHdr = d->getRecordAllocationSize( len + Record::HeaderSize );
        fassert( 16440, lenWHdr >= ( len + Record::HeaderSize ) );

        // If the collection is capped, check if the new object will violate a unique index
        // constraint before allocating space.
        if ( d->isCapped() && !god) {
            BSONObj temp = BSONObj( reinterpret_cast<const char *>( obuf ) );
            Status ret = collection->getIndexCatalog()->checkNoIndexConflicts( temp );
            uassert(12582, "duplicate key insert for unique index of capped collection", ret.isOK() );
        }

        DiskLoc loc = allocateSpaceForANewRecord(ns, d, lenWHdr, god);

        if ( loc.isNull() ) {
            string errmsg = str::stream() << "insert: couldn't alloc space for object ns:" << ns
                                          << " capped:" << d->isCapped();
            log() << errmsg;
            uasserted( 17248, errmsg );
        }

        Record *r = loc.rec();
        {
            verify( r->lengthWithHeaders() >= lenWHdr );
            r = (Record*) getDur().writingPtr(r, lenWHdr);
            if( idToInsert.needed() ) {
                /* a little effort was made here to avoid a double copy when we add an ID */
                int originalSize = *((int*) obuf);
                ((int&)*r->data()) = originalSize + idToInsert.size();
                memcpy(r->data()+4, idToInsert.rawdata(), idToInsert.size());
                memcpy(r->data()+4+idToInsert.size(), ((char*)obuf)+4, originalSize-4);
            }
            else {
                if( obuf ) // obuf can be null from internal callers
                    memcpy(r->data(), obuf, len);
            }
        }

        addRecordToRecListInExtent(r, loc);

        d->incrementStats( r->netLength(), 1 );

        // we don't bother resetting query optimizer stats for the god tables - also god is true when adding a btree bucket
        if ( !god )
            collection->infoCache()->notifyOfWriteOp();

        /* add this record to our indexes */
        if ( d->getTotalIndexCount() > 0 ) {
            try {
                BSONObj obj(r->data());
                collection->getIndexCatalog()->indexRecord(obj, loc);
            }
            catch( AssertionException& e ) {
                // should be a dup key error on _id index
                if( d->isCapped() ) {
                    massert( 12583, "unexpected index insertion failure on capped collection", !d->isCapped() );
                    string s = e.toString();
                    s += " : on addIndex/capped - collection and its index will not match";
                    setLastError(0, s.c_str());
                    error() << s << endl;
                }
                else {
                    // normal case -- we can roll back
                    _deleteRecord(d, ns, r, loc);
                    throw;
                }
            }
        }

        d->paddingFits();

        return loc;
    }
Beispiel #12
0
    bool _userCreateNS(const char *ns, const BSONObj& options, string& err, bool *deferIdIndex) {
        LOG(1) << "create collection " << ns << ' ' << options << endl;

        Database* db = cc().database();

        Collection* collection = db->getCollection( ns );

        if ( collection ) {
            err = "collection already exists";
            return false;
        }

        long long size = Extent::initialSize(128);
        {
            BSONElement e = options.getField("size");
            if ( e.isNumber() ) {
                size = e.numberLong();
                uassert( 10083 , "create collection invalid size spec", size >= 0 );

                size += 0xff;
                size &= 0xffffffffffffff00LL;
                if ( size < Extent::minSize() )
                    size = Extent::minSize();
            }
        }

        bool newCapped = false;
        long long mx = 0;
        if( options["capped"].trueValue() ) {
            newCapped = true;
            BSONElement e = options.getField("max");
            if ( e.isNumber() ) {
                mx = e.numberLong();
                uassert( 16495,
                         "max in a capped collection has to be < 2^31 or not set",
                         NamespaceDetails::validMaxCappedDocs(&mx) );
            }
        }


        collection = db->createCollection( ns,
                                           options["capped"].trueValue(),
                                           &options,
                                           false ); // we do it ourselves below
        verify( collection );

        // $nExtents just for debug/testing.
        BSONElement e = options.getField( "$nExtents" );

        if ( e.type() == Array ) {
            // We create one extent per array entry, with size specified
            // by the array value.
            BSONObjIterator i( e.embeddedObject() );
            while( i.more() ) {
                BSONElement e = i.next();
                int size = int( e.number() );
                verify( size <= 0x7fffffff );
                // $nExtents is just for testing - always allocate new extents
                // rather than reuse existing extents so we have some predictibility
                // in the extent size used by our tests
                collection->increaseStorageSize( (int)size, false );
            }
        }
        else if ( int( e.number() ) > 0 ) {
            // We create '$nExtents' extents, each of size 'size'.
            int nExtents = int( e.number() );
            verify( size <= 0x7fffffff );
            for ( int i = 0; i < nExtents; ++i ) {
                verify( size <= 0x7fffffff );
                // $nExtents is just for testing - always allocate new extents
                // rather than reuse existing extents so we have some predictibility
                // in the extent size used by our tests
                collection->increaseStorageSize( (int)size, false );
            }
        }
        else {
            // This is the non test case, where we don't have a $nExtents spec.
            while ( size > 0 ) {
                const int max = Extent::maxSize();
                const int min = Extent::minSize();
                int desiredExtentSize = static_cast<int> (size > max ? max : size);
                desiredExtentSize = static_cast<int> (desiredExtentSize < min ? min : desiredExtentSize);

                desiredExtentSize &= 0xffffff00;
                Extent* e = collection->increaseStorageSize( (int)desiredExtentSize, true );
                size -= e->length;
            }
        }

        NamespaceDetails *d = nsdetails(ns);
        verify(d);

        bool ensure = true;

        // respect autoIndexId if set. otherwise, create an _id index for all colls, except for
        // capped ones in local w/o autoIndexID (reason for the exception is for the oplog and
        //  non-replicated capped colls)
        if( options.hasField( "autoIndexId" ) ||
            (newCapped && nsToDatabase( ns ) == "local" ) ) {
            ensure = options.getField( "autoIndexId" ).trueValue();
        }

        if( ensure ) {
            if( deferIdIndex )
                *deferIdIndex = true;
            else
                ensureIdIndexForNewNs( collection );
        }

        if ( mx > 0 )
            d->setMaxCappedDocs( mx );

        if ( options["flags"].numberInt() ) {
            d->replaceUserFlags( options["flags"].numberInt() );
        }

        return true;
    }
Beispiel #13
0
        virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
            string source = cmdObj.getStringField( name.c_str() );
            string target = cmdObj.getStringField( "to" );

            if ( !NamespaceString::validCollectionComponent(target.c_str()) ) {
                errmsg = "invalid collection name: " + target;
                return false;
            }
            if ( source.empty() || target.empty() ) {
                errmsg = "invalid command syntax";
                return false;
            }

            if (!fromRepl) { // If it got through on the master, need to allow it here too
                Status sourceStatus = userAllowedWriteNS(source);
                if (!sourceStatus.isOK()) {
                    errmsg = "error with source namespace: " + sourceStatus.reason();
                    return false;
                }

                Status targetStatus = userAllowedWriteNS(target);
                if (!targetStatus.isOK()) {
                    errmsg = "error with target namespace: " + targetStatus.reason();
                    return false;
                }
            }

            string sourceDB = nsToDatabase(source);
            string targetDB = nsToDatabase(target);

            bool capped = false;
            long long size = 0;
            std::vector<BSONObj> indexesInProg;

            {
                Client::Context srcCtx( source );
                Collection* sourceColl = srcCtx.db()->getCollection( source );

                if ( !sourceColl ) {
                    errmsg = "source namespace does not exist";
                    return false;
                }

                // Ensure that collection name does not exceed maximum length.
                // Ensure that index names do not push the length over the max.
                // Iterator includes unfinished indexes.
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( true );
                int longestIndexNameLength = 0;
                while ( sourceIndIt.more() ) {
                    int thisLength = sourceIndIt.next()->indexName().length();
                    if ( thisLength > longestIndexNameLength )
                        longestIndexNameLength = thisLength;
                }

                unsigned int longestAllowed =
                    min(int(Namespace::MaxNsColletionLen),
                        int(Namespace::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength);
                if (target.size() > longestAllowed) {
                    StringBuilder sb;
                    sb << "collection name length of " << target.size()
                       << " exceeds maximum length of " << longestAllowed
                       << ", allowing for index names";
                    errmsg = sb.str();
                    return false;
                }

                {
                    const NamespaceDetails *nsd = nsdetails( source );
                    indexesInProg = stopIndexBuilds( dbname, cmdObj );
                    capped = nsd->isCapped();
                    if ( capped )
                        for( DiskLoc i = nsd->firstExtent(); !i.isNull(); i = i.ext()->xnext )
                            size += i.ext()->length;
                }
            }

            {
                Client::Context ctx( target );

                // Check if the target namespace exists and if dropTarget is true.
                // If target exists and dropTarget is not true, return false.
                if ( ctx.db()->getCollection( target ) ) {
                    if ( !cmdObj["dropTarget"].trueValue() ) {
                        errmsg = "target namespace exists";
                        return false;
                    }

                    Status s = cc().database()->dropCollection( target );
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        restoreIndexBuildsOnSource( indexesInProg, source );
                        return false;
                    }
                }

                // If we are renaming in the same database, just
                // rename the namespace and we're done.
                if ( sourceDB == targetDB ) {
                    Status s = ctx.db()->renameCollection( source, target,
                                                           cmdObj["stayTemp"].trueValue() );
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        restoreIndexBuildsOnSource( indexesInProg, source );
                        return false;
                    }
                    return true;
                }

                // Otherwise, we are enaming across databases, so we must copy all
                // the data and then remove the source collection.

                // Create the target collection.
                Collection* targetColl = NULL;
                if ( capped ) {
                    BSONObjBuilder spec;
                    spec.appendBool( "capped", true );
                    spec.append( "size", double( size ) );
                    spec.appendBool( "autoIndexId", false );
                    userCreateNS( target.c_str(), spec.obj(), errmsg, false );
                    targetColl = ctx.db()->getCollection( target );
                }
                else {
                    CollectionOptions options;
                    options.setNoIdIndex();
                    // No logOp necessary because the entire renameCollection command is one logOp.
                    targetColl = ctx.db()->createCollection( target, options );
                }
                if ( !targetColl ) {
                    errmsg = "Failed to create target collection.";
                    restoreIndexBuildsOnSource( indexesInProg, source );
                    return false;
                }
            }

            // Copy over all the data from source collection to target collection.
            bool insertSuccessful = true;
            boost::scoped_ptr<CollectionIterator> sourceIt;

            {
                Client::Context srcCtx( source );
                Collection* sourceColl = srcCtx.db()->getCollection( source );
                sourceIt.reset( sourceColl->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) );
            }

            Collection* targetColl = NULL;
            while ( !sourceIt->isEOF() ) {
                BSONObj o;
                {
                    Client::Context srcCtx( source );
                    o = sourceIt->getNext().obj();
                }
                // Insert and check return status of insert.
                {
                    Client::Context ctx( target );
                    if ( !targetColl )
                        targetColl = ctx.db()->getCollection( target );
                    // No logOp necessary because the entire renameCollection command is one logOp.
                    Status s = targetColl->insertDocument( o, true ).getStatus();
                    if ( !s.isOK() ) {
                        insertSuccessful = false;
                        errmsg = s.toString();
                        break;
                    }
                }
            }

            // If inserts were unsuccessful, drop the target collection and return false.
            if ( !insertSuccessful ) {
                Client::Context ctx( target );
                Status s = ctx.db()->dropCollection( target );
                if ( !s.isOK() )
                    errmsg = s.toString();
                restoreIndexBuildsOnSource( indexesInProg, source );
                return false;
            }

            // Copy over the indexes to temp storage and then to the target..
            vector<BSONObj> copiedIndexes;
            bool indexSuccessful = true;
            {
                Client::Context srcCtx( source );
                Collection* sourceColl = srcCtx.db()->getCollection( source );
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( true );

                while ( sourceIndIt.more() ) {
                    BSONObj currIndex = sourceIndIt.next()->infoObj();

                    // Process the source index.
                    BSONObjBuilder b;
                    BSONObjIterator i( currIndex );
                    while( i.moreWithEOO() ) {
                        BSONElement e = i.next();
                        if ( e.eoo() )
                            break;
                        else if ( strcmp( e.fieldName(), "ns" ) == 0 )
                            b.append( "ns", target );
                        else
                            b.append( e );
                    }

                    BSONObj newIndex = b.obj();
                    copiedIndexes.push_back( newIndex );
                }
            }

            {
                Client::Context ctx( target );
                if ( !targetColl )
                    targetColl = ctx.db()->getCollection( target );

                for ( vector<BSONObj>::iterator it = copiedIndexes.begin();
                                                it != copiedIndexes.end(); ++it ) {
                    Status s = targetColl->getIndexCatalog()->createIndex( *it, true );
                    if ( !s.isOK() ) {
                        indexSuccessful = false;
                        errmsg = s.toString();
                        break;
                    }
                }

                // If indexes were unsuccessful, drop the target collection and return false.
                if ( !indexSuccessful ) {
                    Status s = ctx.db()->dropCollection( target );
                    if ( !s.isOK() )
                        errmsg = s.toString();
                    restoreIndexBuildsOnSource( indexesInProg, source );
                    return false;
                }
            }

            // Drop the source collection.
            {
                Client::Context srcCtx( source );
                Status s = srcCtx.db()->dropCollection( source );
                if ( !s.isOK() ) {
                    errmsg = s.toString();
                    restoreIndexBuildsOnSource( indexesInProg, source );
                    return false;
                }
            }

            return true;
        }
Beispiel #14
0
Status AuthorizationSession::_checkAuthForPrivilegeHelper(const Privilege& privilege) {
    AuthorizationManager& authMan = getAuthorizationManager();
    Privilege modifiedPrivilege = _modifyPrivilegeForSpecialCases(privilege);

    // Need to check not just the resource of the privilege, but also just the database
    // component and the "*" resource.
    std::string resourceSearchList[3];
    resourceSearchList[0] = AuthorizationManager::WILDCARD_RESOURCE_NAME;
    resourceSearchList[1] = nsToDatabase(modifiedPrivilege.getResource());
    resourceSearchList[2] = modifiedPrivilege.getResource();


    ActionSet unmetRequirements = modifiedPrivilege.getActions();
    UserSet::iterator it = _authenticatedUsers.begin();
    while (it != _authenticatedUsers.end()) {
        User* user = *it;

        if (!user->isValid()) {
            // Make a good faith effort to acquire an up-to-date user object, since the one
            // we've cached is marked "out-of-date."
            UserName name = user->getName();
            User* updatedUser;

            Status status = authMan.acquireUser(name, &updatedUser);
            switch (status.code()) {
            case ErrorCodes::OK: {
                // Success! Replace the old User object with the updated one.
                fassert(17067, _authenticatedUsers.replaceAt(it, updatedUser) == user);
                authMan.releaseUser(user);
                user = updatedUser;
                LOG(1) << "Updated session cache of user information for " << name;
                break;
            }
            case ErrorCodes::UserNotFound: {
                // User does not exist anymore; remove it from _authenticatedUsers.
                fassert(17068, _authenticatedUsers.removeAt(it) == user);
                authMan.releaseUser(user);
                LOG(1) << "Removed deleted user " << name <<
                       " from session cache of user information.";
                continue;  // No need to advance "it" in this case.
            }
            default:
                // Unrecognized error; assume that it's transient, and continue working with the
                // out-of-date privilege data.
                warning() << "Could not fetch updated user privilege information for " <<
                          name << "; continuing to use old information.  Reason is " << status;
                break;
            }
        }

        for (int i = 0; i < static_cast<int>(boost::size(resourceSearchList)); ++i) {
            ActionSet userActions = user->getActionsForResource(resourceSearchList[i]);
            unmetRequirements.removeAllActionsFromSet(userActions);

            if (unmetRequirements.empty())
                return Status::OK();
        }
        ++it;
    }

    return Status(ErrorCodes::Unauthorized, "unauthorized");
}
Beispiel #15
0
        virtual bool run(OperationContext* txn,
                         const string& dbname,
                         BSONObj& cmdObj,
                         int,
                         string& errmsg,
                         BSONObjBuilder& result,
                         bool fromRepl) {
            Lock::GlobalWrite globalWriteLock(txn->lockState());
            string source = cmdObj.getStringField( name.c_str() );
            string target = cmdObj.getStringField( "to" );

            // We stay in source context the whole time. This is mostly to set the CurOp namespace.
            Client::Context ctx(txn, source);

            if ( !NamespaceString::validCollectionComponent(target.c_str()) ) {
                errmsg = "invalid collection name: " + target;
                return false;
            }
            if ( source.empty() || target.empty() ) {
                errmsg = "invalid command syntax";
                return false;
            }

            if (!fromRepl) { // If it got through on the master, need to allow it here too
                Status sourceStatus = userAllowedWriteNS(source);
                if (!sourceStatus.isOK()) {
                    errmsg = "error with source namespace: " + sourceStatus.reason();
                    return false;
                }

                Status targetStatus = userAllowedWriteNS(target);
                if (!targetStatus.isOK()) {
                    errmsg = "error with target namespace: " + targetStatus.reason();
                    return false;
                }
            }

            Database* const sourceDB = dbHolder().get(txn, nsToDatabase(source));
            Collection* const sourceColl = sourceDB ? sourceDB->getCollection(txn, source)
                                                    : NULL;
            if (!sourceColl) {
                errmsg = "source namespace does not exist";
                return false;
            }

            {
                // Ensure that collection name does not exceed maximum length.
                // Ensure that index names do not push the length over the max.
                // Iterator includes unfinished indexes.
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( txn, true );
                int longestIndexNameLength = 0;
                while ( sourceIndIt.more() ) {
                    int thisLength = sourceIndIt.next()->indexName().length();
                    if ( thisLength > longestIndexNameLength )
                        longestIndexNameLength = thisLength;
                }

                unsigned int longestAllowed =
                    min(int(NamespaceString::MaxNsCollectionLen),
                        int(NamespaceString::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength);
                if (target.size() > longestAllowed) {
                    StringBuilder sb;
                    sb << "collection name length of " << target.size()
                       << " exceeds maximum length of " << longestAllowed
                       << ", allowing for index names";
                    errmsg = sb.str();
                    return false;
                }
            }

            const std::vector<BSONObj> indexesInProg = stopIndexBuilds(txn, sourceDB, cmdObj);
            // Dismissed on success
            ScopeGuard indexBuildRestorer = MakeGuard(IndexBuilder::restoreIndexes, indexesInProg);

            bool unused;
            Database* const targetDB = dbHolder().getOrCreate(txn, nsToDatabase(target), unused);

            {
                WriteUnitOfWork wunit(txn);

                // Check if the target namespace exists and if dropTarget is true.
                // If target exists and dropTarget is not true, return false.
                if (targetDB->getCollection(txn, target)) {
                    if (!cmdObj["dropTarget"].trueValue()) {
                        errmsg = "target namespace exists";
                        return false;
                    }

                    Status s = targetDB->dropCollection(txn, target);
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        return false;
                    }
                }

                // If we are renaming in the same database, just
                // rename the namespace and we're done.
                if (sourceDB == targetDB) {
                    Status s = targetDB->renameCollection(txn,
                                                          source,
                                                          target,
                                                          cmdObj["stayTemp"].trueValue() );
                    if (!s.isOK()) {
                        return appendCommandStatus(result, s);
                    }

                    if (!fromRepl) {
                        repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj);
                    }

                    wunit.commit();
                    indexBuildRestorer.Dismiss();
                    return true;
                }
            }

            // If we get here, we are renaming across databases, so we must copy all the data and
            // indexes, then remove the source collection.

            // Create the target collection. It will be removed if we fail to copy the collection.
            // TODO use a temp collection and unset the temp flag on success.
            Collection* targetColl = NULL;
            {
                CollectionOptions options;
                options.setNoIdIndex();

                if (sourceColl->isCapped()) {
                    // TODO stop assuming storageSize == cappedSize
                    options.capped = true;
                    options.cappedSize = sourceColl->getRecordStore()->storageSize(txn);
                }

                WriteUnitOfWork wunit(txn);

                // No logOp necessary because the entire renameCollection command is one logOp.
                targetColl = targetDB->createCollection(txn, target, options);
                if (!targetColl) {
                    errmsg = "Failed to create target collection.";
                    return false;
                }

                wunit.commit();
            }

            // Dismissed on success
            ScopeGuard targetCollectionDropper = MakeGuard(dropCollection, txn, targetDB, target);

            MultiIndexBlock indexer(txn, targetColl);
            indexer.allowInterruption();

            // Copy the index descriptions from the source collection, adjusting the ns field.
            {
                std::vector<BSONObj> indexesToCopy;
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( txn, true );
                while (sourceIndIt.more()) {
                    const BSONObj currIndex = sourceIndIt.next()->infoObj();

                    // Process the source index.
                    BSONObjBuilder newIndex;
                    newIndex.append("ns", target);
                    newIndex.appendElementsUnique(currIndex);
                    indexesToCopy.push_back(newIndex.obj());
                }
                indexer.init(indexesToCopy);
            }

            {
                // Copy over all the data from source collection to target collection.
                boost::scoped_ptr<RecordIterator> sourceIt(sourceColl->getIterator(txn));
                while (!sourceIt->isEOF()) {
                    txn->checkForInterrupt(false);

                    const BSONObj obj = sourceColl->docFor(txn, sourceIt->getNext());

                    WriteUnitOfWork wunit(txn);
                    // No logOp necessary because the entire renameCollection command is one logOp.
                    Status status = targetColl->insertDocument(txn, obj, &indexer, true).getStatus();
                    if (!status.isOK())
                        return appendCommandStatus(result, status);
                    wunit.commit();
                }
            }

            Status status = indexer.doneInserting();
            if (!status.isOK())
                return appendCommandStatus(result, status);

            {
                // Getting here means we successfully built the target copy. We now remove the
                // source collection and finalize the rename.
                WriteUnitOfWork wunit(txn);

                Status status = sourceDB->dropCollection(txn, source);
                if (!status.isOK())
                    return appendCommandStatus(result, status);

                indexer.commit();

                if (!fromRepl) {
                    repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj);
                }

                wunit.commit();
            }

            indexBuildRestorer.Dismiss();
            targetCollectionDropper.Dismiss();
            return true;
        }