Exemple #1
0
    bool _handlePossibleShardedMessage( Message &m, DbResponse* dbresponse ) {
        DEV verify( shardingState.enabled() );

        int op = m.operation();
        if ( op < 2000
                || op >= 3000
                || op == dbGetMore  // cursors are weird
           )
            return false;

        DbMessage d(m);
        const char *ns = d.getns();
        string errmsg;
        // We don't care about the version here, since we're returning it later in the writeback
        ConfigVersion received, wanted;
        if ( shardVersionOk( ns , errmsg, received, wanted ) ) {
            return false;
        }

        LOG(1) << "connection meta data too old - will retry ns:(" << ns << ") op:(" << opToString(op) << ") " << errmsg << endl;

        if ( doesOpGetAResponse( op ) ) {
            verify( dbresponse );
            BufBuilder b( 32768 );
            b.skip( sizeof( QueryResult ) );
            {
                BSONObj obj = BSON( "$err" << errmsg << "ns" << ns );
                b.appendBuf( obj.objdata() , obj.objsize() );
            }

            QueryResult *qr = (QueryResult*)b.buf();
            qr->_resultFlags() = ResultFlag_ErrSet | ResultFlag_ShardConfigStale;
            qr->len = b.len();
            qr->setOperation( opReply );
            qr->cursorId = 0;
            qr->startingFrom = 0;
            qr->nReturned = 1;
            b.decouple();

            Message * resp = new Message();
            resp->setData( qr , true );

            dbresponse->response = resp;
            dbresponse->responseTo = m.header()->id;
            return true;
        }
        
        uassert( 9517 , "writeback" , ( d.reservedField() & DbMessage::Reserved_FromWriteback ) == 0 );

        OID writebackID;
        writebackID.init();
        lastError.getSafe()->writeback( writebackID );

        const OID& clientID = ShardedConnectionInfo::get(false)->getID();
        massert( 10422 ,  "write with bad shard config and no server id!" , clientID.isSet() );

        LOG(1) << "got write with an old config - writing back ns: " << ns << endl;
        LOG(1) << m.toString() << endl;

        BSONObjBuilder b;
        b.appendBool( "writeBack" , true );
        b.append( "ns" , ns );
        b.append( "id" , writebackID );
        b.append( "connectionId" , cc().getConnectionId() );
        b.append( "instanceIdent" , prettyHostName() );
        shardingState.getVersion( ns ).addToBSON( b );
        
        ShardedConnectionInfo* info = ShardedConnectionInfo::get( false );
        ( info ? info->getVersion(ns) : ConfigVersion( 0, OID() ) ).addToBSON( b, "yourVersion" );

        b.appendBinData( "msg" , m.header()->len , bdtCustom , (char*)(m.singleData()) );
        LOG(2) << "writing back msg with len: " << m.header()->len << " op: " << m.operation() << endl;
        writeBackManager.queueWriteBack( clientID.str() , b.obj() );

        return true;
    }
Exemple #2
0
        bool run(OperationContext* txn, const string& dbname,
                  BSONObj& cmdObj,
                  int,
                  string& errmsg,
                  BSONObjBuilder& result,
                  bool fromRepl ) {

            //
            // Correct behavior here is very finicky.
            //
            // 1.  The first step is to append the error that occurred on the previous operation.
            // This adds an "err" field to the command, which is *not* the command failing.
            //
            // 2.  Next we parse and validate write concern options.  If these options are invalid
            // the command fails no matter what, even if we actually had an error earlier.  The
            // reason for checking here is to match legacy behavior on these kind of failures -
            // we'll still get an "err" field for the write error.
            //
            // 3.  If we had an error on the previous operation, we then return immediately.
            //
            // 4.  Finally, we actually enforce the write concern.  All errors *except* timeout are
            // reported with ok : 0.0, to match legacy behavior.
            //
            // There is a special case when "wOpTime" and "wElectionId" are explicitly provided by 
            // the client (mongos) - in this case we *only* enforce the write concern if it is 
            // valid.
            //
            // We always need to either report "err" (if ok : 1) or "errmsg" (if ok : 0), even if
            // err is null.
            //

            LastError *le = lastError.disableForCommand();

            // Always append lastOp and connectionId
            Client& c = cc();
            c.appendLastOp( result );

            // for sharding; also useful in general for debugging
            result.appendNumber( "connectionId" , c.getConnectionId() );

            OpTime lastOpTime;
            BSONField<OpTime> wOpTimeField("wOpTime");
            FieldParser::FieldState extracted = FieldParser::extract(cmdObj, wOpTimeField, 
                                                                     &lastOpTime, &errmsg);
            if (!extracted) {
                result.append("badGLE", cmdObj);
                appendCommandStatus(result, false, errmsg);
                return false;
            }
            bool lastOpTimePresent = extracted != FieldParser::FIELD_NONE;
            if (!lastOpTimePresent) {
                // Use the client opTime if no wOpTime is specified
                lastOpTime = cc().getLastOp();
            }
            
            OID electionId;
            BSONField<OID> wElectionIdField("wElectionId");
            extracted = FieldParser::extract(cmdObj, wElectionIdField, 
                                             &electionId, &errmsg);
            if (!extracted) {
                result.append("badGLE", cmdObj);
                appendCommandStatus(result, false, errmsg);
                return false;
            }

            bool electionIdPresent = extracted != FieldParser::FIELD_NONE;
            bool errorOccurred = false;

            // Errors aren't reported when wOpTime is used
            if ( !lastOpTimePresent ) {
                if ( le->nPrev != 1 ) {
                    errorOccurred = LastError::noError.appendSelf( result, false );
                    le->appendSelfStatus( result );
                }
                else {
                    errorOccurred = le->appendSelf( result, false );
                }
            }

            BSONObj writeConcernDoc = cmdObj;
            // Use the default options if we have no gle options aside from wOpTime/wElectionId
            const int nFields = cmdObj.nFields();
            bool useDefaultGLEOptions = (nFields == 1) || 
                (nFields == 2 && lastOpTimePresent) ||
                (nFields == 3 && lastOpTimePresent && electionIdPresent);

            if (useDefaultGLEOptions) {
                BSONObj getLastErrorDefault =
                        repl::getGlobalReplicationCoordinator()->getGetLastErrorDefault();
                if (!getLastErrorDefault.isEmpty()) {
                    writeConcernDoc = getLastErrorDefault;
                }
            }

            //
            // Validate write concern no matter what, this matches 2.4 behavior
            //

            WriteConcernOptions writeConcern;
            Status status = writeConcern.parse( writeConcernDoc );

            if ( status.isOK() ) {
                // Ensure options are valid for this host
                status = validateWriteConcern( writeConcern );
            }

            if ( !status.isOK() ) {
                result.append( "badGLE", writeConcernDoc );
                return appendCommandStatus( result, status );
            }

            // Don't wait for replication if there was an error reported - this matches 2.4 behavior
            if ( errorOccurred ) {
                dassert( !lastOpTimePresent );
                return true;
            }

            // No error occurred, so we won't duplicate these fields with write concern errors
            dassert( result.asTempObj()["err"].eoo() );
            dassert( result.asTempObj()["code"].eoo() );

            // If we got an electionId, make sure it matches
            if (electionIdPresent) {
                if (repl::getGlobalReplicationCoordinator()->getReplicationMode() !=
                        repl::ReplicationCoordinator::modeReplSet) {
                    // Ignore electionIds of 0 from mongos.
                    if (electionId != OID()) {
                        errmsg = "wElectionId passed but no replication active";
                        result.append("code", ErrorCodes::BadValue);
                        return false;
                    }
                } 
                else {
                    if (electionId != repl::getGlobalReplicationCoordinator()->getElectionId()) {
                        LOG(3) << "oid passed in is " << electionId
                               << ", but our id is "
                               << repl::getGlobalReplicationCoordinator()->getElectionId();
                        errmsg = "election occurred after write";
                        result.append("code", ErrorCodes::WriteConcernFailed);
                        return false;
                    }
                }
            }

            txn->setMessage( "waiting for write concern" );

            WriteConcernResult wcResult;
            status = waitForWriteConcern( txn, writeConcern, lastOpTime, &wcResult );
            wcResult.appendTo( writeConcern, &result );

            // For backward compatibility with 2.4, wtimeout returns ok : 1.0
            if ( wcResult.wTimedOut ) {
                dassert( !wcResult.err.empty() ); // so we always report err
                dassert( !status.isOK() );
                result.append( "errmsg", "timed out waiting for slaves" );
                result.append( "code", status.code() );
                return true;
            }

            return appendCommandStatus( result, status );
        }
Exemple #3
0
    // Returns false when request includes 'end'
    void assembleResponse( Message &m, DbResponse &dbresponse, const HostAndPort& remote ) {

        // before we lock...
        int op = m.operation();
        bool isCommand = false;
        const char *ns = m.singleData()->_data + 4;

        if ( op == dbQuery ) {
            if( strstr(ns, ".$cmd") ) {
                isCommand = true;
                opwrite(m);
                if( strstr(ns, ".$cmd.sys.") ) {
                    if( strstr(ns, "$cmd.sys.inprog") ) {
                        inProgCmd(m, dbresponse);
                        return;
                    }
                    if( strstr(ns, "$cmd.sys.killop") ) {
                        killOp(m, dbresponse);
                        return;
                    }
                    if( strstr(ns, "$cmd.sys.unlock") ) {
                        unlockFsync(ns, m, dbresponse);
                        return;
                    }
                }
            }
            else {
                opread(m);
            }
        }
        else if( op == dbGetMore ) {
            opread(m);
        }
        else {
            opwrite(m);
        }

        globalOpCounters.gotOp( op , isCommand );

        Client& c = cc();
        if ( c.getAuthenticationInfo() )
            c.getAuthenticationInfo()->startRequest();
        
        auto_ptr<CurOp> nestedOp;
        CurOp* currentOpP = c.curop();
        if ( currentOpP->active() ) {
            nestedOp.reset( new CurOp( &c , currentOpP ) );
            currentOpP = nestedOp.get();
        }
        else {
            c.newTopLevelRequest();
        }

        CurOp& currentOp = *currentOpP;
        currentOp.reset(remote,op);

        OpDebug& debug = currentOp.debug();
        debug.op = op;

        long long logThreshold = cmdLine.slowMS;
        bool shouldLog = logLevel >= 1;

        if ( op == dbQuery ) {
            if ( handlePossibleShardedMessage( m , &dbresponse ) )
                return;
            receivedQuery(c , dbresponse, m );
        }
        else if ( op == dbGetMore ) {
            if ( ! receivedGetMore(dbresponse, m, currentOp) )
                shouldLog = true;
        }
        else if ( op == dbMsg ) {
            // deprecated - replaced by commands
            char *p = m.singleData()->_data;
            int len = strlen(p);
            if ( len > 400 )
                out() << curTimeMillis64() % 10000 <<
                      " long msg received, len:" << len << endl;

            Message *resp = new Message();
            if ( strcmp( "end" , p ) == 0 )
                resp->setData( opReply , "dbMsg end no longer supported" );
            else
                resp->setData( opReply , "i am fine - dbMsg deprecated");

            dbresponse.response = resp;
            dbresponse.responseTo = m.header()->id;
        }
        else {
            try {
                const NamespaceString nsString( ns );

                // The following operations all require authorization.
                // dbInsert, dbUpdate and dbDelete can be easily pre-authorized,
                // here, but dbKillCursors cannot.
                if ( op == dbKillCursors ) {
                    currentOp.ensureStarted();
                    logThreshold = 10;
                    receivedKillCursors(m);
                }
                else if ( !nsString.isValid() ) {
                    // Only killCursors doesn't care about namespaces
                    uassert( 16257, str::stream() << "Invalid ns [" << ns << "]", false );
                }
                else if ( ! c.getAuthenticationInfo()->isAuthorized(
                                  nsToDatabase( m.singleData()->_data + 4 ) ) ) {
                    setLastError(0, "unauthorized");
                }
                else if ( op == dbInsert ) {
                    receivedInsert(m, currentOp);
                }
                else if ( op == dbUpdate ) {
                    receivedUpdate(m, currentOp);
                }
                else if ( op == dbDelete ) {
                    receivedDelete(m, currentOp);
                }
                else {
                    mongo::log() << "    operation isn't supported: " << op << endl;
                    currentOp.done();
                    shouldLog = true;
                }
            }
            catch ( UserException& ue ) {
                tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing "
                        << ue.toString() << endl;
                debug.exceptionInfo = ue.getInfo();
            }
            catch ( AssertionException& e ) {
                tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing "
                        << e.toString() << endl;
                debug.exceptionInfo = e.getInfo();
                shouldLog = true;
            }
        }
        currentOp.ensureStarted();
        currentOp.done();
        debug.executionTime = currentOp.totalTimeMillis();

        logThreshold += currentOp.getExpectedLatencyMs();

        if ( shouldLog || debug.executionTime > logThreshold ) {
            mongo::tlog() << debug.report( currentOp ) << endl;
        }

        if ( currentOp.shouldDBProfile( debug.executionTime ) ) {
            // performance profiling is on
            if ( Lock::isReadLocked() ) {
                mongo::log(1) << "note: not profiling because recursive read lock" << endl;
            }
            else if ( lockedForWriting() ) {
                mongo::log(1) << "note: not profiling because doing fsync+lock" << endl;
            }
            else {
                Lock::DBWrite lk( currentOp.getNS() );
                if ( dbHolder()._isLoaded( nsToDatabase( currentOp.getNS() ) , dbpath ) ) {
                    Client::Context cx( currentOp.getNS(), dbpath, false );
                    profile(c , currentOp );
                }
                else {
                    mongo::log() << "note: not profiling because db went away - probably a close on: " << currentOp.getNS() << endl;
                }
            }
        }
        
        debug.reset();
    } /* assembleResponse() */
Exemple #4
0
        bool runNoDirectClient( const string& ns , 
                                const BSONObj& queryOriginal , const BSONObj& fields , const BSONObj& update , 
                                bool upsert , bool returnNew , bool remove ,
                                BSONObjBuilder& result ) {
            
            
            Lock::DBWrite lk( ns );
            Client::Context cx( ns );

            BSONObj doc;
            
            bool found = Helpers::findOne( ns.c_str() , queryOriginal , doc );

            BSONObj queryModified = queryOriginal;
            if ( found && doc["_id"].type() && ! isSimpleIdQuery( queryOriginal ) ) {
                // we're going to re-write the query to be more efficient
                // we have to be a little careful because of positional operators
                // maybe we can pass this all through eventually, but right now isn't an easy way
                BSONObjBuilder b( queryOriginal.objsize() + 10 );
                b.append( doc["_id"] );
                
                bool addedAtomic = false;

                BSONObjIterator i( queryOriginal );
                while ( i.more() ) {
                    const BSONElement& elem = i.next();

                    if ( str::equals( "_id" , elem.fieldName() ) ) {
                        // we already do _id
                        continue;
                    }
                    
                    if ( ! str::contains( elem.fieldName() , '.' ) ) {
                        // if there is a dotted field, accept we may need more query parts
                        continue;
                    }
                    
                    if ( ! addedAtomic ) {
                        b.appendBool( "$atomic" , true );
                        addedAtomic = true;
                    }

                    b.append( elem );
                }
                queryModified = b.obj();
            }

            if ( remove ) {
                _appendHelper( result , doc , found , fields );
                if ( found ) {
                    deleteObjects( ns.c_str() , queryModified , true , true );
                    BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) );
                    le.appendNumber( "n" , 1 );
                    le.done();
                }
            }
            else {
                // update
                if ( ! found && ! upsert ) {
                    // didn't have it, and am not upserting
                    _appendHelper( result , doc , found , fields );
                }
                else {
                    // we found it or we're updating
                    
                    if ( ! returnNew ) {
                        _appendHelper( result , doc , found , fields );
                    }
                    
                    UpdateResult res = updateObjects( ns.c_str() , update , queryModified , upsert , false , true , cc().curop()->debug() );
                    
                    if ( returnNew ) {
                        if ( ! res.existing && res.upserted.isSet() ) {
                            queryModified = BSON( "_id" << res.upserted );
                        }
                        log() << "queryModified: " << queryModified << endl;
                        verify( Helpers::findOne( ns.c_str() , queryModified , doc ) );
                        _appendHelper( result , doc , true , fields );
                    }
                    
                    BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) );
                    le.appendBool( "updatedExisting" , res.existing );
                    le.appendNumber( "n" , res.num );
                    if ( res.upserted.isSet() )
                        le.append( "upserted" , res.upserted );
                    le.done();
                    
                }
            }
            
            return true;
        }
Exemple #5
0
    // Returns false when request includes 'end'
    bool assembleResponse( Message &m, DbResponse &dbresponse, const SockAddr &client ) {

        // before we lock...
        int op = m.operation();
        bool isCommand = false;
        const char *ns = m.singleData()->_data + 4;
        if ( op == dbQuery ) {
            if( strstr(ns, ".$cmd") ) {
                isCommand = true;
                opwrite(m);
                if( strstr(ns, ".$cmd.sys.") ) { 
                    if( strstr(ns, "$cmd.sys.inprog") ) {
                        inProgCmd(m, dbresponse);
                        return true;
                    }
                    if( strstr(ns, "$cmd.sys.killop") ) { 
                        killOp(m, dbresponse);
                        return true;
                    }
                    if( strstr(ns, "$cmd.sys.unlock") ) { 
                        unlockFsync(ns, m, dbresponse);
                        return true;
                    }
                }
            }
            else {
                opread(m);
            }
        }
        else if( op == dbGetMore ) {
            opread(m);
        }
        else {
            opwrite(m);
        }
        
        globalOpCounters.gotOp( op , isCommand );
        
        Client& c = cc();
        
        auto_ptr<CurOp> nestedOp;
        CurOp* currentOpP = c.curop();
        if ( currentOpP->active() ){
            nestedOp.reset( new CurOp( &c , currentOpP ) );
            currentOpP = nestedOp.get();
        }
        CurOp& currentOp = *currentOpP;
        currentOp.reset(client,op);
        
        OpDebug& debug = currentOp.debug();
        StringBuilder& ss = debug.str;
        ss << opToString( op ) << " ";

        int logThreshold = cmdLine.slowMS;
        bool log = logLevel >= 1;
        
        if ( op == dbQuery ) {
            if ( handlePossibleShardedMessage( m , &dbresponse ) )
                return true;
            receivedQuery(c , dbresponse, m );
        }
        else if ( op == dbGetMore ) {
            if ( ! receivedGetMore(dbresponse, m, currentOp) )
                log = true;
        }
        else if ( op == dbMsg ) {
            // deprecated - replaced by commands
            char *p = m.singleData()->_data;
            int len = strlen(p);
            if ( len > 400 )
                out() << curTimeMillis() % 10000 <<
                    " long msg received, len:" << len << endl;

            Message *resp = new Message();
            if ( strcmp( "end" , p ) == 0 )
                resp->setData( opReply , "dbMsg end no longer supported" );
            else
                resp->setData( opReply , "i am fine - dbMsg deprecated");

            dbresponse.response = resp;
            dbresponse.responseTo = m.header()->id;
        }
        else {
            const char *ns = m.singleData()->_data + 4;
            char cl[256];
            nsToDatabase(ns, cl);
            if( ! c.getAuthenticationInfo()->isAuthorized(cl) ) { 
                uassert_nothrow("unauthorized");
            }
            else {
                try {
                    if ( op == dbInsert ) {
                        receivedInsert(m, currentOp);
                    }
                    else if ( op == dbUpdate ) {
                        receivedUpdate(m, currentOp);
                    }
                    else if ( op == dbDelete ) {
                        receivedDelete(m, currentOp);
                    }
                    else if ( op == dbKillCursors ) {
                        currentOp.ensureStarted();
                        logThreshold = 10;
                        ss << "killcursors ";
                        receivedKillCursors(m);
                    }
                    else {
                        mongo::log() << "    operation isn't supported: " << op << endl;
                        currentOp.done();
                        log = true;
                    }
                }
                catch ( AssertionException& e ) {
                    static int n;
                    tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing" << endl;
                    ss << " exception " + e.toString();
                    log = ++n < 10;
                }
            }
        }
        currentOp.ensureStarted();
        currentOp.done();
        int ms = currentOp.totalTimeMillis();
        
        log = log || (logLevel >= 2 && ++ctr % 512 == 0);
        //DEV log = true; 
        if ( log || ms > logThreshold ) {
            if( logLevel < 3 && op == dbGetMore && strstr(ns, ".oplog.") && ms < 3000 && !log ) {
                /* it's normal for getMore on the oplog to be slow because of use of awaitdata flag. */
            } else {
                ss << ' ' << ms << "ms";
                mongo::tlog() << ss.str() << endl;
            }
        }
        
        if ( currentOp.shouldDBProfile( ms ) ){
            // performance profiling is on
            if ( dbMutex.getState() < 0 ){
                mongo::log(1) << "note: not profiling because recursive read lock" << endl;
            }
            else {
                writelock lk;
                if ( dbHolder.isLoaded( nsToDatabase( currentOp.getNS() ) , dbpath ) ){
                    Client::Context c( currentOp.getNS() );
                    profile(ss.str().c_str(), ms);
                }
                else {
                    mongo::log() << "note: not profiling because db went away - probably a close on: " << currentOp.getNS() << endl;
                }
            }
        }

        return true;
    } /* assembleResponse() */
Exemple #6
0
    /* must call this on a delete so we clean up the cursors. */
    void ClientCursor::aboutToDelete(const DiskLoc& dl) {
        recursive_scoped_lock lock(ccmutex);

        Database *db = cc().database();
        verify(db);

        aboutToDeleteForSharding( db , dl );

        CCByLoc& bl = db->ccByLoc;
        CCByLoc::iterator j = bl.lower_bound(ByLocKey::min(dl));
        CCByLoc::iterator stop = bl.upper_bound(ByLocKey::max(dl));
        if ( j == stop )
            return;

        vector<ClientCursor*> toAdvance;

        while ( 1 ) {
            toAdvance.push_back(j->second);
            DEV verify( j->first.loc == dl );
            ++j;
            if ( j == stop )
                break;
        }

        if( toAdvance.size() >= 3000 ) {
            log() << "perf warning MPW101: " << toAdvance.size() << " cursors for one diskloc "
                  << dl.toString()
                  << ' ' << toAdvance[1000]->_ns
                  << ' ' << toAdvance[2000]->_ns
                  << ' ' << toAdvance[1000]->_pinValue
                  << ' ' << toAdvance[2000]->_pinValue
                  << ' ' << toAdvance[1000]->_pos
                  << ' ' << toAdvance[2000]->_pos
                  << ' ' << toAdvance[1000]->_idleAgeMillis
                  << ' ' << toAdvance[2000]->_idleAgeMillis
                  << ' ' << toAdvance[1000]->_doingDeletes
                  << ' ' << toAdvance[2000]->_doingDeletes
                  << endl;
            //wassert( toAdvance.size() < 5000 );
        }

        for ( vector<ClientCursor*>::iterator i = toAdvance.begin(); i != toAdvance.end(); ++i ) {
            ClientCursor* cc = *i;
            wassert(cc->_db == db);

            if ( cc->_doingDeletes ) continue;

            Cursor *c = cc->_c.get();
            if ( c->capped() ) {
                /* note we cannot advance here. if this condition occurs, writes to the oplog
                   have "caught" the reader.  skipping ahead, the reader would miss postentially
                   important data.
                   */
                delete cc;
                continue;
            }

            c->recoverFromYield();
            DiskLoc tmp1 = c->refLoc();
            if ( tmp1 != dl ) {
                // This might indicate a failure to call ClientCursor::prepareToYield() but it can
                // also happen during correct operation, see SERVER-2009.
                problem() << "warning: cursor loc " << tmp1 << " does not match byLoc position " << dl << " !" << endl;
            }
            else {
                c->advance();
            }
            while (!c->eof() && c->refLoc() == dl) {
                /* We don't delete at EOF because we want to return "no more results" rather than "no such cursor".
                 * The loop is to handle MultiKey indexes where the deleted record is pointed to by multiple adjacent keys.
                 * In that case we need to advance until we get to the next distinct record or EOF.
                 * SERVER-4154
                 */
                c->advance();
            }
            cc->updateLocation();
        }
    }
Exemple #7
0
 Exception(const Location &where, const T &detail):
   Preprocessor::Exception(where, cc("formatter error: ", detail))
 {
 }
Exemple #8
0
 bool AuthenticationInfo::_isAuthorizedSpecialChecks( const string& dbname ) const {
     if ( cc().isGod() ) 
         return true;
     return _isLocalHostAndLocalHostIsAuthorizedForAll;
 }
Exemple #9
0
 void Lock::ScopedLock::recordTime() {
     if (haveClient()) {
         cc().curop()->lockStat().recordLockTimeMicros(_type, _timer.micros());
     }
 }
bool NxuPhysicsExport::Write(NxJoint *j,const char *userProperties,const char *id)
{
	bool ret = false;

	NxSceneDesc *current = getCurrentScene();

	CustomCopy cc(mCollection,current);

	NxJointDesc *joint = 0;

	switch ( j->getType() )
	{
		case NX_JOINT_PRISMATIC:
			if ( 1 )
			{
				::NxPrismaticJointDesc d1;
				NxPrismaticJoint *sj = j->isPrismaticJoint();
				sj->saveToDesc(d1);
				addActor( d1.actor[0] );
				addActor( d1.actor[1] );
				NxPrismaticJointDesc *desc = new NxPrismaticJointDesc;
				desc->copyFrom(d1,cc);
				joint = static_cast<NxJointDesc *>(desc);
			}
			break;
		case NX_JOINT_REVOLUTE:
			if ( 1 )
			{
				::NxRevoluteJointDesc d1;
				NxRevoluteJoint *sj = j->isRevoluteJoint();
				sj->saveToDesc(d1);
				addActor( d1.actor[0] );
				addActor( d1.actor[1] );
				NxRevoluteJointDesc *desc = new NxRevoluteJointDesc;
				desc->copyFrom(d1,cc);
				joint = static_cast<NxJointDesc *>(desc);
			}
			break;
		case NX_JOINT_CYLINDRICAL:
			if ( 1 )
			{
				::NxCylindricalJointDesc d1;
				NxCylindricalJoint *sj = j->isCylindricalJoint();
				sj->saveToDesc(d1);
				addActor( d1.actor[0] );
				addActor( d1.actor[1] );
				NxCylindricalJointDesc *desc = new NxCylindricalJointDesc;
				desc->copyFrom(d1,cc);
				joint = static_cast<NxJointDesc *>(desc);
			}
			break;
		case NX_JOINT_SPHERICAL:
			if ( 1 )
			{
				::NxSphericalJointDesc d1;
				NxSphericalJoint *sj = j->isSphericalJoint();
				sj->saveToDesc(d1);
				addActor( d1.actor[0] );
				addActor( d1.actor[1] );
				NxSphericalJointDesc *desc = new NxSphericalJointDesc;
				desc->copyFrom(d1,cc);
				joint = static_cast<NxJointDesc *>(desc);
			}
			break;
		case NX_JOINT_POINT_ON_LINE:
			if ( 1 )
			{
				::NxPointOnLineJointDesc d1;
				NxPointOnLineJoint *sj = j->isPointOnLineJoint();
				sj->saveToDesc(d1);
				addActor( d1.actor[0] );
				addActor( d1.actor[1] );
				NxPointOnLineJointDesc *desc = new NxPointOnLineJointDesc;
				desc->copyFrom(d1,cc);
				joint = static_cast<NxJointDesc *>(desc);
			}
			break;
		case NX_JOINT_POINT_IN_PLANE:
			if ( 1 )
			{
				::NxPointInPlaneJointDesc d1;
				NxPointInPlaneJoint *sj = j->isPointInPlaneJoint();
				sj->saveToDesc(d1);
				addActor( d1.actor[0] );
				addActor( d1.actor[1] );
				NxPointInPlaneJointDesc *desc = new NxPointInPlaneJointDesc;
				desc->copyFrom(d1,cc);
				joint = static_cast<NxJointDesc *>(desc);
			}
			break;
		case NX_JOINT_DISTANCE:
			if ( 1 )
			{
				::NxDistanceJointDesc d1;
				NxDistanceJoint *sj = j->isDistanceJoint();
				sj->saveToDesc(d1);
				addActor( d1.actor[0] );
				addActor( d1.actor[1] );
				NxDistanceJointDesc *desc = new NxDistanceJointDesc;
				desc->copyFrom(d1,cc);
				joint = static_cast<NxJointDesc *>(desc);
			}
			break;
		case NX_JOINT_PULLEY:
			if ( 1 )
			{
				::NxPulleyJointDesc d1;
				NxPulleyJoint *sj = j->isPulleyJoint();
				sj->saveToDesc(d1);
				addActor( d1.actor[0] );
				addActor( d1.actor[1] );
				NxPulleyJointDesc *desc = new NxPulleyJointDesc;
				desc->copyFrom(d1,cc);
				joint = static_cast<NxJointDesc *>(desc);
			}
			break;
		case NX_JOINT_FIXED:
			if ( 1 )
			{
				::NxFixedJointDesc d1;
				NxFixedJoint *sj = j->isFixedJoint();
				sj->saveToDesc(d1);
				addActor( d1.actor[0] );
				addActor( d1.actor[1] );
				NxFixedJointDesc *desc = new NxFixedJointDesc;
				desc->copyFrom(d1,cc);
				joint = static_cast<NxJointDesc *>(desc);
			}
			break;
		case NX_JOINT_D6:
			if ( 1 )
			{
				::NxD6JointDesc d1;
				NxD6Joint *sj = j->isD6Joint();
				sj->saveToDesc(d1);
				addActor( d1.actor[0] );
				addActor( d1.actor[1] );
				NxD6JointDesc *desc = new NxD6JointDesc;
				desc->copyFrom(d1,cc);
				joint = static_cast<NxJointDesc *>(desc);
			}
			break;
		default:
			break;

	}


	//Add	Limits
	// in	addition,	we also	have to	write	out	its	limit	planes!
	j->resetLimitPlaneIterator();
	if (j->hasMoreLimitPlanes())
	{
		// write limit point
		joint->mOnActor2 = j->getLimitPoint(joint->mPlaneLimitPoint);

		NxArray< NxPlaneInfoDesc *> plist;


		// write the plane normals
		while	(j->hasMoreLimitPlanes())
		{
			NxPlaneInfoDesc *pInfo	=	new	NxPlaneInfoDesc();
#if NX_SDK_VERSION_NUMBER >= 272
			j->getNextLimitPlane(pInfo->mPlaneNormal,	pInfo->mPlaneD, &pInfo->restitution);
#else
			j->getNextLimitPlane(pInfo->mPlaneNormal,	pInfo->mPlaneD);
#endif
			plist.push_back(pInfo);
		}

		if ( plist.size() )
		{
			for (int i=plist.size()-1; i>=0; i--)
			{
				NxPlaneInfoDesc *p = plist[i];
				joint->mPlaneInfo.pushBack(p);
			}
		}

	}


	if ( joint )
	{
		if ( id )
		{
			joint->mId = id;
		}
		else
		{
      char scratch[512];
      sprintf(scratch,"Joint_%d", current->mJoints.size());
      joint->mId = getGlobalString(scratch);
      joint->mUserProperties = getGlobalString(userProperties);
    }
		current->mJoints.push_back(joint);
		ret = true;
	}

  return ret;
}
Exemple #11
0
 bool CmdLogout::run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
     AuthenticationInfo *ai = cc().getAuthenticationInfo();
     ai->logout(dbname);
     return true;
 }
Exemple #12
0
    /**
     * For a given query, get a runner.  The runner could be a SingleSolutionRunner, a
     * CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc.
     */
    Status getRunner(CanonicalQuery* rawCanonicalQuery, Runner** out, size_t plannerOptions) {
        verify(rawCanonicalQuery);
        auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery);

        // Try to look up a cached solution for the query.
        // TODO: Can the cache have negative data about a solution?
        PlanCache* localCache = PlanCache::get(canonicalQuery->ns());
        if (NULL != localCache) {
            CachedSolution* cs = localCache->get(*canonicalQuery);
            if (NULL != cs) {
                // We have a cached solution.  Hand the canonical query and cached solution off to
                // the cached plan runner, which takes ownership of both.
                WorkingSet* ws;
                PlanStage* root;
                verify(StageBuilder::build(*cs->solution, &root, &ws));
                *out = new CachedPlanRunner(canonicalQuery.release(), cs, root, ws);
                return Status::OK();
            }
        }

        // No entry in cache for the query.  We have to solve the query ourself.

        // Get the indices that we could possibly use.
        Database* db = cc().database();
        verify( db );
        Collection* collection = db->getCollection( canonicalQuery->ns() );

        // This can happen as we're called by internal clients as well.
        if (NULL == collection) {
            const string& ns = canonicalQuery->ns();
            *out = new EOFRunner(canonicalQuery.release(), ns);
            return Status::OK();
        }

        // If we have an _id index we can use the idhack runner.
        if (canUseIDHack(*canonicalQuery) && collection->getIndexCatalog()->findIdIndex()) {
            *out = new IDHackRunner(collection, canonicalQuery.release());
            return Status::OK();
        }

        // If it's not NULL, we may have indices.  Access the catalog and fill out IndexEntry(s)
        QueryPlannerParams plannerParams;
        for (int i = 0; i < collection->getIndexCatalog()->numIndexesReady(); ++i) {
            IndexDescriptor* desc = collection->getIndexCatalog()->getDescriptor( i );
            plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
                                                       desc->isMultikey(),
                                                       desc->isSparse(),
                                                       desc->indexName()));
        }

        // Tailable: If the query requests tailable the collection must be capped.
        if (canonicalQuery->getParsed().hasOption(QueryOption_CursorTailable)) {
            if (!collection->isCapped()) {
                return Status(ErrorCodes::BadValue,
                              "tailable cursor requested on non capped collection");
            }

            // If a sort is specified it must be equal to expectedSort.
            const BSONObj expectedSort = BSON("$natural" << 1);
            const BSONObj& actualSort = canonicalQuery->getParsed().getSort();
            if (!actualSort.isEmpty() && !(actualSort == expectedSort)) {
                return Status(ErrorCodes::BadValue,
                              "invalid sort specified for tailable cursor: "
                              + actualSort.toString());
            }
        }

        // Process the planning options.
        plannerParams.options = plannerOptions;
        if (storageGlobalParams.noTableScan) {
            const string& ns = canonicalQuery->ns();
            // There are certain cases where we ignore this restriction:
            bool ignore = canonicalQuery->getQueryObj().isEmpty()
                          || (string::npos != ns.find(".system."))
                          || (0 == ns.find("local."));
            if (!ignore) {
                plannerParams.options |= QueryPlannerParams::NO_TABLE_SCAN;
            }
        }

        if (!(plannerParams.options & QueryPlannerParams::NO_TABLE_SCAN)) {
            plannerParams.options |= QueryPlannerParams::INCLUDE_COLLSCAN;
        }

        // If the caller wants a shard filter, make sure we're actually sharded.
        if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) {
            CollectionMetadataPtr collMetadata = shardingState.getCollectionMetadata(canonicalQuery->ns());
            if (collMetadata) {
                plannerParams.shardKey = collMetadata->getKeyPattern();
            }
            else {
                // If there's no metadata don't bother w/the shard filter since we won't know what
                // the key pattern is anyway...
                plannerParams.options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER;
            }
        }

        vector<QuerySolution*> solutions;
        QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions);

        /*
        for (size_t i = 0; i < solutions.size(); ++i) {
            QLOG() << "solution " << i << " is " << solutions[i]->toString() << endl;
        }
        */

        // We cannot figure out how to answer the query.  Should this ever happen?
        if (0 == solutions.size()) {
            return Status(ErrorCodes::BadValue, "No query solutions");
        }

        if (1 == solutions.size()) {
            // Only one possible plan.  Run it.  Build the stages from the solution.
            WorkingSet* ws;
            PlanStage* root;
            verify(StageBuilder::build(*solutions[0], &root, &ws));

            // And, run the plan.
            *out = new SingleSolutionRunner(canonicalQuery.release(), solutions[0], root, ws);
            return Status::OK();
        }
        else {
            // Many solutions.  Let the MultiPlanRunner pick the best, update the cache, and so on.
            auto_ptr<MultiPlanRunner> mpr(new MultiPlanRunner(canonicalQuery.release()));
            for (size_t i = 0; i < solutions.size(); ++i) {
                WorkingSet* ws;
                PlanStage* root;
                verify(StageBuilder::build(*solutions[i], &root, &ws));
                // Takes ownership of all arguments.
                mpr->addPlan(solutions[i], root, ws);
            }
            *out = mpr.release();
            return Status::OK();
        }
    }
Exemple #13
0
    bool WriteCmd::run(const string& dbName,
                       BSONObj& cmdObj,
                       int options,
                       string& errMsg,
                       BSONObjBuilder& result,
                       bool fromRepl) {

        // Can't be run on secondaries (logTheOp() == false, slaveOk() == false).
        dassert( !fromRepl );
        BatchedCommandRequest request( _writeType );
        BatchedCommandResponse response;

        if ( !request.parseBSON( cmdObj, &errMsg ) || !request.isValid( &errMsg ) ) {

            // Batch parse failure
            response.setOk( false );
            response.setN( 0 );
            response.setErrCode( ErrorCodes::FailedToParse );
            response.setErrMessage( errMsg );

            dassert( response.isValid( &errMsg ) );
            result.appendElements( response.toBSON() );

            // TODO
            // There's a pending issue about how to report response here. If we use
            // the command infra-structure, we should reuse the 'errmsg' field. But
            // we have already filed that message inside the BatchCommandResponse.
            // return response.getOk();
            return true;
        }

        // Note that this is a runCommmand, and therefore, the database and the collection name
        // are in different parts of the grammar for the command. But it's more convenient to
        // work with a NamespaceString. We built it here and replace it in the parsed command.
        // Internally, everything work with the namespace string as opposed to just the
        // collection name.
        NamespaceString nss(dbName, request.getNS());
        request.setNS(nss.ns());

        Status status = userAllowedWriteNS( nss );
        if ( !status.isOK() )
            return appendCommandStatus( result, status );

        if ( cc().curop() )
            cc().curop()->setNS( nss.ns() );

        if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert ) {
            // check all docs
            BatchedInsertRequest* insertRequest = request.getInsertRequest();
            vector<BSONObj>& docsToInsert = insertRequest->getDocuments();
            for ( size_t i = 0; i < docsToInsert.size(); i++ ) {
                StatusWith<BSONObj> fixed = fixDocumentForInsert( docsToInsert[i] );
                if ( !fixed.isOK() ) {
                    // we don't return early since each doc can be handled independantly
                    continue;
                }
                if ( fixed.getValue().isEmpty() ) {
                    continue;
                }
                docsToInsert[i] = fixed.getValue();
            }
        }

        BSONObj defaultWriteConcern;
        // This is really bad - it's only safe because we leak the defaults by overriding them with
        // new defaults and because we never reset to an empty default.
        // TODO: fix this for sane behavior where we query repl set object
        if ( getLastErrorDefault ) defaultWriteConcern = *getLastErrorDefault;
        if ( defaultWriteConcern.isEmpty() ) {
            BSONObjBuilder b;
            b.append( "w", 1 );
            defaultWriteConcern = b.obj();
        }

        WriteBatchExecutor writeBatchExecutor(defaultWriteConcern,
                                              &cc(),
                                              &globalOpCounters,
                                              lastError.get());

        writeBatchExecutor.executeBatch( request, &response );

        result.appendElements( response.toBSON() );

        // TODO
        // There's a pending issue about how to report response here. If we use
        // the command infra-structure, we should reuse the 'errmsg' field. But
        // we have already filed that message inside the BatchCommandResponse.
        // return response.getOk();
        return true;
    }
Exemple #14
0
        bool runNoDirectClient( const string& ns , 
                                const BSONObj& queryOriginal , const BSONObj& fields , const BSONObj& update , 
                                bool upsert , bool returnNew , bool remove ,
                                BSONObjBuilder& result , string& errmsg ) {
            
            
            Lock::DBWrite lk( ns );
            Client::Context cx( ns );

            BSONObj doc;
            
            bool found = Helpers::findOne( ns.c_str() , queryOriginal , doc );

            BSONObj queryModified = queryOriginal;
            if ( found && doc["_id"].type() && ! isSimpleIdQuery( queryOriginal ) ) {
                // we're going to re-write the query to be more efficient
                // we have to be a little careful because of positional operators
                // maybe we can pass this all through eventually, but right now isn't an easy way
                
                bool hasPositionalUpdate = false;
                {
                    // if the update has a positional piece ($)
                    // then we need to pull all query parts in
                    // so here we check for $
                    // a little hacky
                    BSONObjIterator i( update );
                    while ( i.more() ) {
                        const BSONElement& elem = i.next();
                        
                        if ( elem.fieldName()[0] != '$' || elem.type() != Object )
                            continue;

                        BSONObjIterator j( elem.Obj() );
                        while ( j.more() ) {
                            if ( str::contains( j.next().fieldName(), ".$" ) ) {
                                hasPositionalUpdate = true;
                                break;
                            }
                        }
                    }
                }

                BSONObjBuilder b( queryOriginal.objsize() + 10 );
                b.append( doc["_id"] );
                
                bool addedAtomic = false;

                BSONObjIterator i( queryOriginal );
                while ( i.more() ) {
                    const BSONElement& elem = i.next();

                    if ( str::equals( "_id" , elem.fieldName() ) ) {
                        // we already do _id
                        continue;
                    }
                    
                    if ( ! hasPositionalUpdate ) {
                        // if there is a dotted field, accept we may need more query parts
                        continue;
                    }
                    
                    if ( ! addedAtomic ) {
                        b.appendBool( "$atomic" , true );
                        addedAtomic = true;
                    }

                    b.append( elem );
                }
                queryModified = b.obj();
            }

            if ( remove ) {
                _appendHelper( result , doc , found , fields );
                if ( found ) {
                    deleteObjects( ns.c_str() , queryModified , true , true );
                    BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) );
                    le.appendNumber( "n" , 1 );
                    le.done();
                }
            }
            else {
                // update
                if ( ! found && ! upsert ) {
                    // didn't have it, and am not upserting
                    _appendHelper( result , doc , found , fields );
                }
                else {
                    // we found it or we're updating
                    
                    if ( ! returnNew ) {
                        _appendHelper( result , doc , found , fields );
                    }
                    
                    const NamespaceString requestNs(ns);
                    UpdateRequest request(requestNs);

                    request.setQuery(queryModified);
                    request.setUpdates(update);
                    request.setUpsert(upsert);
                    request.setUpdateOpLog();

                    UpdateResult res = mongo::update(request, &cc().curop()->debug());

                    if ( returnNew ) {
                        if ( res.upserted.isSet() ) {
                            queryModified = BSON( "_id" << res.upserted );
                        }
                        else if ( queryModified["_id"].type() ) {
                            // we do this so that if the update changes the fields, it still matches
                            queryModified = queryModified["_id"].wrap();
                        }
                        if ( ! Helpers::findOne( ns.c_str() , queryModified , doc ) ) {
                            errmsg = str::stream() << "can't find object after modification  " 
                                                   << " ns: " << ns 
                                                   << " queryModified: " << queryModified 
                                                   << " queryOriginal: " << queryOriginal;
                            log() << errmsg << endl;
                            return false;
                        }
                        _appendHelper( result , doc , true , fields );
                    }
                    
                    BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) );
                    le.appendBool( "updatedExisting" , res.existing );
                    le.appendNumber( "n" , res.numMatched );
                    if ( res.upserted.isSet() )
                        le.append( "upserted" , res.upserted );
                    le.done();
                    
                }
            }
            
            return true;
        }
std::unique_ptr<OperationContext> ServiceContextMongoD::_newOpCtx(Client* client) {
    invariant(&cc() == client);
    return stdx::make_unique<OperationContextImpl>();
}
Exemple #16
0
 ~TrackLockAcquireTime() {
     if (haveClient()) {
         cc().curop()->lockStat().recordAcquireTimeMicros(_type, _timer.micros());
     }
 }
Exemple #17
0
        bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
            Timer t;
            string ns = dbname + '.' + cmdObj.firstElement().valuestr();

            string key = cmdObj["key"].valuestrsafe();
            BSONObj keyPattern = BSON( key << 1 );

            BSONObj query = getQuery( cmdObj );

            int bufSize = BSONObjMaxUserSize - 4096;
            BufBuilder bb( bufSize );
            char * start = bb.buf();

            BSONArrayBuilder arr( bb );
            BSONElementSet values;

            long long nscanned = 0; // locations looked at
            long long nscannedObjects = 0; // full objects looked at
            long long n = 0; // matches
            MatchDetails md;

            NamespaceDetails * d = nsdetails( ns );

            if ( ! d ) {
                result.appendArray( "values" , BSONObj() );
                result.append( "stats" , BSON( "n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0 ) );
                return true;
            }

            shared_ptr<Cursor> cursor;
            if ( ! query.isEmpty() ) {
                cursor = getOptimizedCursor( ns.c_str(), query, BSONObj() );
            }
            else {

                // query is empty, so lets see if we can find an index
                // with the key so we don't have to hit the raw data
                NamespaceDetails::IndexIterator ii = d->ii();
                while ( ii.more() ) {
                    IndexDetails& idx = ii.next();

                    if ( d->isMultikey( ii.pos() - 1 ) )
                        continue;

                    if ( idx.inKeyPattern( key ) ) {
                        cursor = getBestGuessCursor( ns.c_str(), BSONObj(), idx.keyPattern() );
                        if( cursor.get() ) break;
                    }

                }

                if ( ! cursor.get() )
                    cursor = getOptimizedCursor(ns.c_str() , query , BSONObj() );

            }

            
            verify( cursor );
            string cursorName = cursor->toString();
            
            auto_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns));

            // map from indexed field to offset in key object
            map<string, int> indexedFields;  
            if (!cursor->modifiedKeys()) {
                // store index information so we can decide if we can
                // get something out of the index key rather than full object

                int x = 0;
                BSONObjIterator i( cursor->indexKeyPattern() );
                while ( i.more() ) {
                    BSONElement e = i.next();
                    if ( e.isNumber() ) {
                        // only want basic index fields, not "2d" etc
                        indexedFields[e.fieldName()] = x;
                    }
                    x++;
                }
            }

            while ( cursor->ok() ) {
                nscanned++;
                bool loadedRecord = false;

                if ( cursor->currentMatches( &md ) && !cursor->getsetdup( cursor->currLoc() ) ) {
                    n++;

                    BSONObj holder;
                    BSONElementSet temp;
                    // Try to get the record from the key fields.
                    loadedRecord = !getFieldsDotted(indexedFields, cursor, key, temp, holder);

                    for ( BSONElementSet::iterator i=temp.begin(); i!=temp.end(); ++i ) {
                        BSONElement e = *i;
                        if ( values.count( e ) )
                            continue;

                        int now = bb.len();

                        uassert(10044,  "distinct too big, 16mb cap", ( now + e.size() + 1024 ) < bufSize );

                        arr.append( e );
                        BSONElement x( start + now );

                        values.insert( x );
                    }
                }

                if ( loadedRecord || md.hasLoadedRecord() )
                    nscannedObjects++;

                cursor->advance();

                if (!cc->yieldSometimes( ClientCursor::MaybeCovered )) {
                    cc.release();
                    break;
                }

                RARELY killCurrentOp.checkForInterrupt();
            }

            verify( start == bb.buf() );

            result.appendArray( "values" , arr.done() );

            {
                BSONObjBuilder b;
                b.appendNumber( "n" , n );
                b.appendNumber( "nscanned" , nscanned );
                b.appendNumber( "nscannedObjects" , nscannedObjects );
                b.appendNumber( "timems" , t.millis() );
                b.append( "cursor" , cursorName );
                result.append( "stats" , b.obj() );
            }

            return true;
        }
Exemple #18
0
void Strategy::queryOp(OperationContext* txn, Request& r) {
    verify(!NamespaceString(r.getns()).isCommand());

    Timer queryTimer;

    globalOpCounters.gotQuery();

    QueryMessage q(r.d());

    NamespaceString ns(q.ns);
    ClientBasic* client = txn->getClient();
    AuthorizationSession* authSession = AuthorizationSession::get(client);
    Status status = authSession->checkAuthForQuery(ns, q.query);
    audit::logQueryAuthzCheck(client, ns, q.query, status.code());
    uassertStatusOK(status);

    LOG(3) << "query: " << q.ns << " " << q.query << " ntoreturn: " << q.ntoreturn
           << " options: " << q.queryOptions;

    if (q.ntoreturn == 1 && strstr(q.ns, ".$cmd"))
        throw UserException(8010, "something is wrong, shouldn't see a command here");

    if (q.queryOptions & QueryOption_Exhaust) {
        uasserted(18526,
                  string("the 'exhaust' query option is invalid for mongos queries: ") + q.ns +
                      " " + q.query.toString());
    }

    // Spigot which controls whether OP_QUERY style find on mongos uses the new ClusterClientCursor
    // code path.
    // TODO: Delete the spigot and always use the new code.
    if (useClusterClientCursor) {
        auto txn = cc().makeOperationContext();

        ReadPreferenceSetting readPreference(ReadPreference::PrimaryOnly, TagSet::primaryOnly());

        BSONElement rpElem;
        auto readPrefExtractStatus = bsonExtractTypedField(
            q.query, LiteParsedQuery::kFindCommandReadPrefField, mongo::Object, &rpElem);

        if (readPrefExtractStatus.isOK()) {
            auto parsedRps = ReadPreferenceSetting::fromBSON(rpElem.Obj());
            uassertStatusOK(parsedRps.getStatus());
            readPreference = parsedRps.getValue();
        } else if (readPrefExtractStatus != ErrorCodes::NoSuchKey) {
            uassertStatusOK(readPrefExtractStatus);
        }

        auto canonicalQuery = CanonicalQuery::canonicalize(q, WhereCallbackNoop());
        uassertStatusOK(canonicalQuery.getStatus());

        // Do the work to generate the first batch of results. This blocks waiting to get responses
        // from the shard(s).
        std::vector<BSONObj> batch;

        // 0 means the cursor is exhausted and
        // otherwise we assume that a cursor with the returned id can be retrieved via the
        // ClusterCursorManager
        auto cursorId =
            ClusterFind::runQuery(txn.get(), *canonicalQuery.getValue(), readPreference, &batch);
        uassertStatusOK(cursorId.getStatus());

        // Build the response document.
        // TODO: this constant should be shared between mongos and mongod, and should
        // not be inside ShardedClientCursor.
        BufBuilder buffer(ShardedClientCursor::INIT_REPLY_BUFFER_SIZE);

        int numResults = 0;
        for (const auto& obj : batch) {
            buffer.appendBuf((void*)obj.objdata(), obj.objsize());
            numResults++;
        }

        replyToQuery(0,  // query result flags
                     r.p(),
                     r.m(),
                     buffer.buf(),
                     buffer.len(),
                     numResults,
                     0,  // startingFrom
                     cursorId.getValue());
        return;
    }

    QuerySpec qSpec((string)q.ns, q.query, q.fields, q.ntoskip, q.ntoreturn, q.queryOptions);

    // Parse "$maxTimeMS".
    StatusWith<int> maxTimeMS = LiteParsedQuery::parseMaxTimeMSQuery(q.query);
    uassert(17233, maxTimeMS.getStatus().reason(), maxTimeMS.isOK());

    if (_isSystemIndexes(q.ns) && doShardedIndexQuery(txn, r, qSpec)) {
        return;
    }

    ParallelSortClusteredCursor* cursor = new ParallelSortClusteredCursor(qSpec, CommandInfo());
    verify(cursor);

    // TODO:  Move out to Request itself, not strategy based
    try {
        cursor->init(txn);

        if (qSpec.isExplain()) {
            BSONObjBuilder explain_builder;
            cursor->explain(explain_builder);
            explain_builder.appendNumber("executionTimeMillis",
                                         static_cast<long long>(queryTimer.millis()));
            BSONObj b = explain_builder.obj();

            replyToQuery(0, r.p(), r.m(), b);
            delete (cursor);
            return;
        }
    } catch (...) {
        delete cursor;
        throw;
    }

    // TODO: Revisit all of this when we revisit the sharded cursor cache

    if (cursor->getNumQueryShards() != 1) {
        // More than one shard (or zero), manage with a ShardedClientCursor
        // NOTE: We may also have *zero* shards here when the returnPartial flag is set.
        // Currently the code in ShardedClientCursor handles this.

        ShardedClientCursorPtr cc(new ShardedClientCursor(q, cursor));

        BufBuilder buffer(ShardedClientCursor::INIT_REPLY_BUFFER_SIZE);
        int docCount = 0;
        const int startFrom = cc->getTotalSent();
        bool hasMore = cc->sendNextBatch(q.ntoreturn, buffer, docCount);

        if (hasMore) {
            LOG(5) << "storing cursor : " << cc->getId();

            int cursorLeftoverMillis = maxTimeMS.getValue() - queryTimer.millis();
            if (maxTimeMS.getValue() == 0) {  // 0 represents "no limit".
                cursorLeftoverMillis = kMaxTimeCursorNoTimeLimit;
            } else if (cursorLeftoverMillis <= 0) {
                cursorLeftoverMillis = kMaxTimeCursorTimeLimitExpired;
            }

            cursorCache.store(cc, cursorLeftoverMillis);
        }

        replyToQuery(0,
                     r.p(),
                     r.m(),
                     buffer.buf(),
                     buffer.len(),
                     docCount,
                     startFrom,
                     hasMore ? cc->getId() : 0);
    } else {
        // Only one shard is used

        // Remote cursors are stored remotely, we shouldn't need this around.
        unique_ptr<ParallelSortClusteredCursor> cursorDeleter(cursor);

        ShardPtr shard = cursor->getQueryShard();
        verify(shard.get());
        DBClientCursorPtr shardCursor = cursor->getShardCursor(shard->getId());

        // Implicitly stores the cursor in the cache
        r.reply(*(shardCursor->getMessage()), shardCursor->originalHost());

        // We don't want to kill the cursor remotely if there's still data left
        shardCursor->decouple();
    }
}
Exemple #19
0
Status Database::dropCollection(OperationContext* txn, StringData fullns) {
    invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X));

    LOG(1) << "dropCollection: " << fullns << endl;
    massertNamespaceNotIndex(fullns, "dropCollection");

    Collection* collection = getCollection(fullns);
    if (!collection) {
        // collection doesn't exist
        return Status::OK();
    }

    NamespaceString nss(fullns);
    {
        verify(nss.db() == _name);

        if (nss.isSystem()) {
            if (nss.isSystemDotProfile()) {
                if (_profile != 0)
                    return Status(ErrorCodes::IllegalOperation,
                                  "turn off profiling before dropping system.profile collection");
            } else {
                return Status(ErrorCodes::IllegalOperation, "can't drop system ns");
            }
        }
    }

    BackgroundOperation::assertNoBgOpInProgForNs(fullns);

    audit::logDropCollection(&cc(), fullns);

    Status s = collection->getIndexCatalog()->dropAllIndexes(txn, true);
    if (!s.isOK()) {
        warning() << "could not drop collection, trying to drop indexes" << fullns << " because of "
                  << s.toString();
        return s;
    }

    verify(collection->_details->getTotalIndexCount(txn) == 0);
    LOG(1) << "\t dropIndexes done" << endl;

    Top::get(txn->getClient()->getServiceContext()).collectionDropped(fullns);

    s = _dbEntry->dropCollection(txn, fullns);

    // we want to do this always
    _clearCollectionCache(txn, fullns, "collection dropped");

    if (!s.isOK())
        return s;

    DEV {
        // check all index collection entries are gone
        string nstocheck = fullns.toString() + ".$";
        for (CollectionMap::const_iterator i = _collections.begin(); i != _collections.end(); ++i) {
            string temp = i->first;
            if (temp.find(nstocheck) != 0)
                continue;
            log() << "after drop, bad cache entries for: " << fullns << " have " << temp;
            verify(0);
        }
    }

    getGlobalServiceContext()->getOpObserver()->onDropCollection(txn, nss);
    return Status::OK();
}
Exemple #20
0
        virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
            string source = cmdObj.getStringField( name.c_str() );
            string target = cmdObj.getStringField( "to" );
            uassert(15967,
                    "invalid collection name: " + target,
                    NamespaceString::validCollectionComponent(target.c_str()));
            if ( source.empty() || target.empty() ) {
                errmsg = "invalid command syntax";
                return false;
            }

            string sourceDB = nsToDatabase(source);
            string targetDB = nsToDatabase(target);
            string databaseName = sourceDB;
            databaseName += ".system.indexes";

            int longestIndexNameLength = 0;
            vector<BSONObj> oldIndSpec = Helpers::findAll(databaseName, BSON("ns" << source));
            for (size_t i = 0; i < oldIndSpec.size(); ++i) {
                int thisLength = oldIndSpec[i].getField("name").valuesize();
                if (thisLength > longestIndexNameLength) {
                     longestIndexNameLength = thisLength;
                }
            }
            unsigned int longestAllowed = maxNamespaceLen - longestIndexNameLength - 1;
            if (target.size() > longestAllowed) {
                StringBuilder sb;
                sb << "collection name length of " << target.size()
                << " exceeds maximum length of " << longestAllowed
                << ", allowing for index names";
                uasserted(16451, sb.str());
            }

            bool capped = false;
            long long size = 0;
            std::vector<BSONObj> indexesInProg;
            {
                Client::Context ctx( source );
                NamespaceDetails *nsd = nsdetails( source );
                uassert( 10026 ,  "source namespace does not exist", nsd );
                indexesInProg = stopIndexBuilds(dbname, cmdObj);
                capped = nsd->isCapped();
                if ( capped )
                    for( DiskLoc i = nsd->firstExtent(); !i.isNull(); i = i.ext()->xnext )
                        size += i.ext()->length;
            }

            Client::Context ctx( target );

            if ( nsdetails( target ) ) {
                uassert( 10027 ,  "target namespace exists", cmdObj["dropTarget"].trueValue() );

                Status s = cc().database()->dropCollection( target );
                if ( !s.isOK() ) {
                    errmsg = s.toString();
                    return false;
                }

            }


            // if we are renaming in the same database, just
            // rename the namespace and we're done.
            {
                if ( sourceDB == targetDB ) {
                    Status s = ctx.db()->renameCollection( source, target, cmdObj["stayTemp"].trueValue() );
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        return false;
                    }
                    return true;
                }
            }

            // renaming across databases, so we must copy all
            // the data and then remove the source collection.
            BSONObjBuilder spec;
            if ( capped ) {
                spec.appendBool( "capped", true );
                spec.append( "size", double( size ) );
            }
            if ( !userCreateNS( target.c_str(), spec.done(), errmsg, false ) )
                return false;

            auto_ptr< DBClientCursor > c;
            DBDirectClient bridge;

            {
                c = bridge.query( source, BSONObj(), 0, 0, 0, fromRepl ? QueryOption_SlaveOk : 0 );
            }
            while( 1 ) {
                {
                    if ( !c->more() )
                        break;
                }
                BSONObj o = c->next();
                theDataFileMgr.insertWithObjMod( target.c_str(), o );
            }

            string sourceIndexes = nsToDatabase( source ) + ".system.indexes";
            string targetIndexes = nsToDatabase( target ) + ".system.indexes";
            {
                c = bridge.query( sourceIndexes, QUERY( "ns" << source ), 0, 0, 0, fromRepl ? QueryOption_SlaveOk : 0 );
            }
            while( 1 ) {
                {
                    if ( !c->more() )
                        break;
                }
                BSONObj o = c->next();
                BSONObjBuilder b;
                BSONObjIterator i( o );
                while( i.moreWithEOO() ) {
                    BSONElement e = i.next();
                    if ( e.eoo() )
                        break;
                    if ( strcmp( e.fieldName(), "ns" ) == 0 ) {
                        b.append( "ns", target );
                    }
                    else {
                        b.append( e );
                    }
                }
                BSONObj n = b.done();
                theDataFileMgr.insertWithObjMod( targetIndexes.c_str(), n );
            }

            {
                Client::Context ctx( source );
                Status s = ctx.db()->dropCollection( source );
                if ( !s.isOK() ) {
                    errmsg = s.toString();
                    return false;
                }
                IndexBuilder::restoreIndexes(targetIndexes, indexesInProg);
            }
            return true;
        }
Exemple #21
0
        virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
            if( replSetBlind ) {
                if (theReplSet) {
                    errmsg = str::stream() << theReplSet->selfFullName() << " is blind";
                }
                return false;
            }

            MONGO_FAIL_POINT_BLOCK(rsDelayHeartbeatResponse, delay) {
                const BSONObj& data = delay.getData();
                sleepsecs(data["delay"].numberInt());
            }

            /* we don't call ReplSetCommand::check() here because heartbeat
               checks many things that are pre-initialization. */
            if( !replSet ) {
                errmsg = "not running with --replSet";
                return false;
            }

            /* we want to keep heartbeat connections open when relinquishing primary.  tag them here. */
            {
                AbstractMessagingPort *mp = cc().port();
                if( mp )
                    mp->tag |= ScopedConn::keepOpen;
            }

            if( cmdObj["pv"].Int() != 1 ) {
                errmsg = "incompatible replset protocol version";
                return false;
            }
            {
                string s = string(cmdObj.getStringField("replSetHeartbeat"));
                if( cmdLine.ourSetName() != s ) {
                    errmsg = "repl set names do not match";
                    log() << "replSet set names do not match, our cmdline: " << cmdLine._replSet << rsLog;
                    log() << "replSet s: " << s << rsLog;
                    result.append("mismatch", true);
                    return false;
                }
            }

            result.append("rs", true);
            if( cmdObj["checkEmpty"].trueValue() ) {
                result.append("hasData", replHasDatabases());
            }
            if( (theReplSet == 0) || (theReplSet->startupStatus == ReplSetImpl::LOADINGCONFIG) ) {
                string from( cmdObj.getStringField("from") );
                if( !from.empty() ) {
                    scoped_lock lck( replSettings.discoveredSeeds_mx );
                    replSettings.discoveredSeeds.insert(from);
                }
                result.append("hbmsg", "still initializing");
                return true;
            }

            if( theReplSet->name() != cmdObj.getStringField("replSetHeartbeat") ) {
                errmsg = "repl set names do not match (2)";
                result.append("mismatch", true);
                return false;
            }
            result.append("set", theReplSet->name());
            result.append("state", theReplSet->state().s);
            result.append("e", theReplSet->iAmElectable());
            result.append("hbmsg", theReplSet->hbmsg());
            result.append("time", (long long) time(0));
            result.appendDate("opTime", theReplSet->lastOpTimeWritten.asDate());
            const Member *syncTarget = replset::BackgroundSync::get()->getSyncTarget();
            if (syncTarget) {
                result.append("syncingTo", syncTarget->fullName());
            }

            int v = theReplSet->config().version;
            result.append("v", v);
            if( v > cmdObj["v"].Int() )
                result << "config" << theReplSet->config().asBson();

            Member *from = theReplSet->findByName(cmdObj.getStringField("from"));
            if (!from) {
                return true;
            }

            // if we thought that this node is down, let it know
            if (!from->hbinfo().up()) {
                result.append("stateDisagreement", true);
            }

            // note that we got a heartbeat from this node
            theReplSet->mgr->send(boost::bind(&ReplSet::msgUpdateHBRecv,
                                              theReplSet, from->hbinfo().id(), time(0)));


            return true;
        }
 /** \brief create execption with given message.
  *  \param where place where exception has been thrown.
  *  \param call  method that is not implementd, but should be.
  */
 ExceptionNoImplementation(const Location &where, const char *call):
   Exception(where, cc("method ", call, " is not implemented by Python script") )
 {
 }
Exemple #23
0
void runSyncThread() {
    Client::initThread("rsSync");
    AuthorizationSession::get(cc())->grantInternalAuthorization();
    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

    // Set initial indexPrefetch setting
    const std::string& prefetch = replCoord->getSettings().rsIndexPrefetch;
    if (!prefetch.empty()) {
        BackgroundSync::IndexPrefetchConfig prefetchConfig = BackgroundSync::PREFETCH_ALL;
        if (prefetch == "none")
            prefetchConfig = BackgroundSync::PREFETCH_NONE;
        else if (prefetch == "_id_only")
            prefetchConfig = BackgroundSync::PREFETCH_ID_ONLY;
        else if (prefetch == "all")
            prefetchConfig = BackgroundSync::PREFETCH_ALL;
        else {
            warning() << "unrecognized indexPrefetch setting " << prefetch << ", defaulting "
                      << "to \"all\"";
        }
        BackgroundSync::get()->setIndexPrefetchConfig(prefetchConfig);
    }

    while (!inShutdown()) {
        // After a reconfig, we may not be in the replica set anymore, so
        // check that we are in the set (and not an arbiter) before
        // trying to sync with other replicas.
        // TODO(spencer): Use a condition variable to await loading a config
        if (replCoord->getMemberState().startup()) {
            warning() << "did not receive a valid config yet, sleeping 5 seconds ";
            sleepsecs(5);
            continue;
        }

        const MemberState memberState = replCoord->getMemberState();

        // An arbiter can never transition to any other state, and doesn't replicate, ever
        if (memberState.arbiter()) {
            break;
        }

        // If we are removed then we don't belong to the set anymore
        if (memberState.removed()) {
            sleepsecs(5);
            continue;
        }

        try {
            if (memberState.primary() && !replCoord->isWaitingForApplierToDrain()) {
                sleepsecs(1);
                continue;
            }

            bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag();
            // Check criteria for doing an initial sync:
            // 1. If the oplog is empty, do an initial sync
            // 2. If minValid has _initialSyncFlag set, do an initial sync
            // 3. If initialSyncRequested is true
            if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
                getInitialSyncFlag() || initialSyncRequested) {
                syncDoInitialSync();
                continue;  // start from top again in case sync failed.
            }
            if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
                continue;
            }

            /* we have some data.  continue tailing. */
            SyncTail tail(BackgroundSync::get(), multiSyncApply);
            tail.oplogApplication();
        } catch (const DBException& e) {
            log() << "Received exception while syncing: " << e.toString();
            sleepsecs(10);
        } catch (const std::exception& e) {
            log() << "Received exception while syncing: " << e.what();
            sleepsecs(10);
        }
    }
}
    Status repairDatabase( string dbName,
                           bool preserveClonedFilesOnFailure,
                           bool backupOriginalFiles ) {
        scoped_ptr<RepairFileDeleter> repairFileDeleter;
        doingRepair dr;
        dbName = nsToDatabase( dbName );

        log() << "repairDatabase " << dbName << endl;

        invariant( cc().database()->name() == dbName );
        invariant( cc().database()->path() == storageGlobalParams.dbpath );

        BackgroundOperation::assertNoBgOpInProgForDb(dbName);

        getDur().syncDataAndTruncateJournal(); // Must be done before and after repair

        intmax_t totalSize = dbSize( dbName );
        intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);

        if ( freeSize > -1 && freeSize < totalSize ) {
            return Status( ErrorCodes::OutOfDiskSpace,
                           str::stream() << "Cannot repair database " << dbName
                           << " having size: " << totalSize
                           << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
        }

        killCurrentOp.checkForInterrupt();

        Path reservedPath =
            uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
                                "backup" : "_tmp" );
        MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) );
        string reservedPathString = reservedPath.string();

        if ( !preserveClonedFilesOnFailure )
            repairFileDeleter.reset( new RepairFileDeleter( dbName,
                                                            reservedPathString,
                                                            reservedPath ) );

        {
            Database* originalDatabase = dbHolder().get( dbName, storageGlobalParams.dbpath );
            if ( originalDatabase == NULL )
                return Status( ErrorCodes::NamespaceNotFound, "database does not exist to repair" );

            Database* tempDatabase = NULL;
            {
                bool justCreated = false;
                tempDatabase = dbHolderW().getOrCreate( dbName, reservedPathString, justCreated );
                invariant( justCreated );
            }

            map<string,CollectionOptions> namespacesToCopy;
            {
                string ns = dbName + ".system.namespaces";
                Client::Context ctx( ns );
                Collection* coll = originalDatabase->getCollection( ns );
                if ( coll ) {
                    scoped_ptr<CollectionIterator> it( coll->getIterator( DiskLoc(),
                                                                          false,
                                                                          CollectionScanParams::FORWARD ) );
                    while ( !it->isEOF() ) {
                        DiskLoc loc = it->getNext();
                        BSONObj obj = coll->docFor( loc );

                        string ns = obj["name"].String();

                        NamespaceString nss( ns );
                        if ( nss.isSystem() ) {
                            if ( nss.isSystemDotIndexes() )
                                continue;
                            if ( nss.coll() == "system.namespaces" )
                                continue;
                        }

                        if ( !nss.isNormal() )
                            continue;

                        CollectionOptions options;
                        if ( obj["options"].isABSONObj() ) {
                            Status status = options.parse( obj["options"].Obj() );
                            if ( !status.isOK() )
                                return status;
                        }
                        namespacesToCopy[ns] = options;
                    }
                }
            }

            for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
                  i != namespacesToCopy.end();
                  ++i ) {
                string ns = i->first;
                CollectionOptions options = i->second;

                Collection* tempCollection = NULL;
                {
                    Client::Context tempContext( ns, tempDatabase );
                    tempCollection = tempDatabase->createCollection( ns, options, true, false );
                }

                Client::Context readContext( ns, originalDatabase );
                Collection* originalCollection = originalDatabase->getCollection( ns );
                invariant( originalCollection );

                // data

                MultiIndexBlock indexBlock( tempCollection );
                {
                    vector<BSONObj> indexes;
                    IndexCatalog::IndexIterator ii =
                        originalCollection->getIndexCatalog()->getIndexIterator( false );
                    while ( ii.more() ) {
                        IndexDescriptor* desc = ii.next();
                        indexes.push_back( desc->infoObj() );
                    }

                    Client::Context tempContext( ns, tempDatabase );
                    Status status = indexBlock.init( indexes );
                    if ( !status.isOK() )
                        return status;

                }

                scoped_ptr<CollectionIterator> iterator( originalCollection->getIterator( DiskLoc(),
                                                                                          false,
                                                                                          CollectionScanParams::FORWARD ) );
                while ( !iterator->isEOF() ) {
                    DiskLoc loc = iterator->getNext();
                    invariant( !loc.isNull() );

                    BSONObj doc = originalCollection->docFor( loc );

                    Client::Context tempContext( ns, tempDatabase );
                    StatusWith<DiskLoc> result = tempCollection->insertDocument( doc, indexBlock );
                    if ( !result.isOK() )
                        return result.getStatus();

                    getDur().commitIfNeeded();
                    killCurrentOp.checkForInterrupt(false);
                }

                {
                    Client::Context tempContext( ns, tempDatabase );
                    Status status = indexBlock.commit();
                    if ( !status.isOK() )
                        return status;
                }

            }

            getDur().syncDataAndTruncateJournal();
            MongoFile::flushAll(true); // need both in case journaling is disabled

            killCurrentOp.checkForInterrupt(false);

            Client::Context tempContext( dbName, reservedPathString );
            Database::closeDatabase( dbName, reservedPathString );
        }

        // at this point if we abort, we don't want to delete new files
        // as they might be the only copies

        if ( repairFileDeleter.get() )
            repairFileDeleter->success();

        Client::Context ctx( dbName );
        Database::closeDatabase(dbName, storageGlobalParams.dbpath);

        if ( backupOriginalFiles ) {
            _renameForBackup( dbName, reservedPath );
        }
        else {
            // first make new directory before deleting data
            Path newDir = Path(storageGlobalParams.dbpath) / dbName;
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));

            // this deletes old files
            _deleteDataFiles( dbName );

            if ( !boost::filesystem::exists(newDir) ) {
                // we deleted because of directoryperdb
                // re-create
                MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
            }
        }

        _replaceWithRecovered( dbName, reservedPathString.c_str() );

        if ( !backupOriginalFiles )
            MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) );

        return Status::OK();
    }
Exemple #25
0
        bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {

            // Steps
            // 1. check basic config
            // 2. extract params from command
            // 3. fast check
            // 4. slow check (LOCKS)
            
            // step 1

            lastError.disableForCommand();
            ShardedConnectionInfo* info = ShardedConnectionInfo::get( true );

            bool authoritative = cmdObj.getBoolField( "authoritative" );
            
            // check config server is ok or enable sharding
            if ( ! checkConfigOrInit( cmdObj["configdb"].valuestrsafe() , authoritative , errmsg , result ) )
                return false;

            // check shard name/hosts are correct
            if ( cmdObj["shard"].type() == String ) {
                shardingState.gotShardName( cmdObj["shard"].String() );
                shardingState.gotShardHost( cmdObj["shardHost"].String() );
            }
            
            // make sure we have the mongos id for writebacks
            if ( ! checkMongosID( info , cmdObj["serverID"] , errmsg ) )
                return false;

            // step 2
            
            string ns = cmdObj["setShardVersion"].valuestrsafe();
            if ( ns.size() == 0 ) {
                errmsg = "need to speciy namespace";
                return false;
            }

            const ConfigVersion version = extractVersion( cmdObj["version"] , errmsg );
            if ( errmsg.size() )
                return false;
            
            // step 3

            const ConfigVersion oldVersion = info->getVersion(ns);
            const ConfigVersion globalVersion = shardingState.getVersion(ns);

            result.appendTimestamp( "oldVersion" , oldVersion );
            
            if ( globalVersion > 0 && version > 0 ) {
                // this means there is no reset going on an either side
                // so its safe to make some assuptions

                if ( version == globalVersion ) {
                    // mongos and mongod agree!
                    if ( oldVersion != version ) {
                        assert( oldVersion < globalVersion );
                        info->setVersion( ns , version );
                    }
                    return true;
                }
                
            }

            // step 4
            
            // this is because of a weird segfault I saw and I can't see why this should ever be set
            massert( 13647 , str::stream() << "context should be empty here, is: " << cc().getContext()->ns() , cc().getContext() == 0 ); 
        
            dblock setShardVersionLock; // TODO: can we get rid of this??
            
            if ( oldVersion > 0 && globalVersion == 0 ) {
                // this had been reset
                info->setVersion( ns , 0 );
            }

            if ( version == 0 && globalVersion == 0 ) {
                // this connection is cleaning itself
                info->setVersion( ns , 0 );
                return true;
            }

            if ( version == 0 && globalVersion > 0 ) {
                if ( ! authoritative ) {
                    result.appendBool( "need_authoritative" , true );
                    result.append( "ns" , ns );
                    result.appendTimestamp( "globalVersion" , globalVersion );
                    errmsg = "dropping needs to be authoritative";
                    return false;
                }
                log() << "wiping data for: " << ns << endl;
                result.appendTimestamp( "beforeDrop" , globalVersion );
                // only setting global version on purpose
                // need clients to re-find meta-data
                shardingState.resetVersion( ns );
                info->setVersion( ns , 0 );
                return true;
            }

            if ( version < oldVersion ) {
                errmsg = "this connection already had a newer version of collection '" + ns + "'";
                result.append( "ns" , ns );
                result.appendTimestamp( "newVersion" , version );
                result.appendTimestamp( "globalVersion" , globalVersion );
                return false;
            }

            if ( version < globalVersion ) {
                while ( shardingState.inCriticalMigrateSection() ) {
                    dbtemprelease r;
                    sleepmillis(2);
                    OCCASIONALLY log() << "waiting till out of critical section" << endl;
                }
                errmsg = "shard global version for collection is higher than trying to set to '" + ns + "'";
                result.append( "ns" , ns );
                result.appendTimestamp( "version" , version );
                result.appendTimestamp( "globalVersion" , globalVersion );
                result.appendBool( "reloadConfig" , true );
                return false;
            }

            if ( globalVersion == 0 && ! authoritative ) {
                // need authoritative for first look
                result.append( "ns" , ns );
                result.appendBool( "need_authoritative" , true );
                errmsg = "first time for collection '" + ns + "'";
                return false;
            }

            Timer relockTime;
            {
                dbtemprelease unlock;

                ShardChunkVersion currVersion = version;
                if ( ! shardingState.trySetVersion( ns , currVersion ) ) {
                    errmsg = str::stream() << "client version differs from config's for colleciton '" << ns << "'";
                    result.append( "ns" , ns );
                    result.appendTimestamp( "version" , version );
                    result.appendTimestamp( "globalVersion" , currVersion );
                    return false;
                }
            }
            if ( relockTime.millis() >= ( cmdLine.slowMS - 10 ) ) {
                log() << "setShardVersion - relocking slow: " << relockTime.millis() << endl;
            }
            
            info->setVersion( ns , version );
            return true;
        }
Exemple #26
0
/* ns:      namespace, e.g. <database>.<collection>
   pattern: the "where" clause / criteria
   justOne: stop after 1 match
   god:     allow access to system namespaces, and don't yield
*/
long long deleteObjects(const char *ns, BSONObj pattern, bool justOneOrig, bool logop, bool god, RemoveSaver * rs ) {
    if( !god ) {
        if ( strstr(ns, ".system.") ) {
            /* note a delete from system.indexes would corrupt the db
            if done here, as there are pointers into those objects in
            NamespaceDetails.
            */
            uassert(12050, "cannot delete from system namespace", legalClientSystemNS( ns , true ) );
        }
        if ( strchr( ns , '$' ) ) {
            log() << "cannot delete from collection with reserved $ in name: " << ns << endl;
            uassert( 10100 ,  "cannot delete from collection with reserved $ in name", strchr(ns, '$') == 0 );
        }
    }

    {
        NamespaceDetails *d = nsdetails( ns );
        if ( ! d )
            return 0;
        uassert( 10101 ,  "can't remove from a capped collection" , ! d->capped );
    }

    long long nDeleted = 0;

    shared_ptr< Cursor > creal = NamespaceDetailsTransient::getCursor( ns, pattern, BSONObj(), false, 0 );

    if( !creal->ok() )
        return nDeleted;

    shared_ptr< Cursor > cPtr = creal;
    auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout, cPtr, ns) );
    cc->setDoingDeletes( true );

    CursorId id = cc->cursorid();

    bool justOne = justOneOrig;
    bool canYield = !god && !(creal->matcher() && creal->matcher()->docMatcher().atomic());

    do {
        // TODO: we can generalize this I believe
        //
        bool willNeedRecord = (creal->matcher() && creal->matcher()->needRecord()) || pattern.isEmpty() || isSimpleIdQuery( pattern );
        if ( ! willNeedRecord ) {
            // TODO: this is a total hack right now
            // check if the index full encompasses query

            if ( pattern.nFields() == 1 &&
                    str::equals( pattern.firstElement().fieldName() , creal->indexKeyPattern().firstElement().fieldName() ) )
                willNeedRecord = true;
        }

        if ( canYield && ! cc->yieldSometimes( willNeedRecord ? ClientCursor::WillNeed : ClientCursor::MaybeCovered ) ) {
            cc.release(); // has already been deleted elsewhere
            // TODO should we assert or something?
            break;
        }
        if ( !cc->ok() ) {
            break; // if we yielded, could have hit the end
        }

        // this way we can avoid calling updateLocation() every time (expensive)
        // as well as some other nuances handled
        cc->setDoingDeletes( true );

        DiskLoc rloc = cc->currLoc();
        BSONObj key = cc->currKey();

        bool match = creal->currentMatches();
        bool dup = cc->c()->getsetdup(rloc);

        if ( ! cc->advance() )
            justOne = true;

        if ( ! match )
            continue;

        assert( !dup ); // can't be a dup, we deleted it!

        if ( !justOne ) {
            /* NOTE: this is SLOW.  this is not good, noteLocation() was designed to be called across getMore
                blocks.  here we might call millions of times which would be bad.
                */
            cc->c()->prepareToTouchEarlierIterate();
        }

        if ( logop ) {
            BSONElement e;
            if( BSONObj( rloc.rec() ).getObjectID( e ) ) {
                BSONObjBuilder b;
                b.append( e );
                bool replJustOne = true;
                logOp( "d", ns, b.done(), 0, &replJustOne );
            }
            else {
                problem() << "deleted object without id, not logging" << endl;
            }
        }

        if ( rs )
            rs->goingToDelete( rloc.obj() /*cc->c->current()*/ );

        theDataFileMgr.deleteRecord(ns, rloc.rec(), rloc);
        nDeleted++;
        if ( justOne ) {
            break;
        }
        cc->c()->recoverFromTouchingEarlierIterate();

        if( !god )
            getDur().commitIfNeeded();

        if( debug && god && nDeleted == 100 )
            log() << "warning high number of deletes with god=true which could use significant memory" << endl;
    }
    while ( cc->ok() );

    if ( cc.get() && ClientCursor::find( id , false ) == 0 ) {
        // TODO: remove this and the id declaration above if this doesn't trigger
        //       if it does, then i'm very confused (ERH 06/2011)
        error() << "this should be impossible" << endl;
        printStackTrace();
        cc.release();
    }

    return nDeleted;
}
bool AuthzSessionExternalStateMongod::shouldIgnoreAuthChecks() const {
    // TODO(spencer): get "isInDirectClient" from OperationContext
    return cc().isInDirectClient() ||
        AuthzSessionExternalStateServerCommon::shouldIgnoreAuthChecks();
}
    void SyncSourceFeedback::run() {
        Client::initThread("SyncSourceFeedbackThread");
        OperationContextImpl txn;

        bool positionChanged = false;
        bool handshakeNeeded = false;
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        while (!inShutdown()) { // TODO(spencer): Remove once legacy repl coordinator is gone.
            {
                boost::unique_lock<boost::mutex> lock(_mtx);
                while (!_positionChanged && !_handshakeNeeded && !_shutdownSignaled) {
                    _cond.wait(lock);
                }

                if (_shutdownSignaled) {
                    break;
                }

                positionChanged = _positionChanged;
                handshakeNeeded = _handshakeNeeded;
                _positionChanged = false;
                _handshakeNeeded = false;
            }

            MemberState state = replCoord->getCurrentMemberState();
            if (state.primary() || state.fatal() || state.startup()) {
                _resetConnection();
                continue;
            }
            const Member* target = BackgroundSync::get()->getSyncTarget();
            if (_syncTarget != target) {
                _resetConnection();
                _syncTarget = target;
            }
            if (!hasConnection()) {
                // fix connection if need be
                if (!target) {
                    sleepmillis(500);
                    continue;
                }
                if (!_connect(&txn, target->h())) {
                    sleepmillis(500);
                    continue;
                }
                handshakeNeeded = true;
            }
            if (handshakeNeeded) {
                if (!replHandshake(&txn)) {
                    boost::unique_lock<boost::mutex> lock(_mtx);
                    _handshakeNeeded = true;
                    continue;
                }
            }
            if (positionChanged) {
                if (!updateUpstream(&txn)) {
                    boost::unique_lock<boost::mutex> lock(_mtx);
                    _positionChanged = true;
                }
            }
        }
        cc().shutdown();
    }
Exemple #29
0
    void TxnContext::commit(int flags) {
        verify(!_retired);
        bool gotGTID = false;
        GTID gtid;
        // do this in case we are writing the first entry
        // we put something in that can be distinguished from
        // an initialized GTID that has never been touched
        gtid.inc_primary(); 
        // handle work related to logging of transaction for replication
        // this piece must be done before the _txn.commit
        try {
            if (hasParent()) {
                // This does something
                // a bit dangerous in that it may spill parent's stuff
                // with this child transaction that is committing. If something
                // goes wrong and this child transaction aborts, we will miss
                // some ops
                //
                // This ought to be ok, because we are in this try/catch block
                // where if something goes wrong, we will crash the server.
                // NOTHING better go wrong here, unless under bad rare
                // circumstances
                _txnOps.finishChildCommit();
            }
            else if (!_txnOps.empty()) {
                uint64_t timestamp = 0;
                uint64_t hash = 0;
                if (!_initiatingRS) {
                    dassert(txnGTIDManager);
                    txnGTIDManager->getGTIDForPrimary(&gtid, &timestamp, &hash);
                }
                else {
                    dassert(!txnGTIDManager);
                    timestamp = curTimeMillis64();
                }
                gotGTID = true;
                // In this case, the transaction we are committing has
                // no parent, so we must write the transaction's 
                // logged operations to the opLog, as part of this transaction
                dassert(logTxnOpsForReplication());
                dassert(_logTxnToOplog);
                _txnOps.rootCommit(gtid, timestamp, hash);
            }
            // handle work related to logging of transaction for chunk migrations
            if (!_txnOpsForSharding.empty()) {
                if (hasParent()) {
                    transferOpsForShardingToParent();
                }
                else {
                    writeTxnOpsToMigrateLog();
                }
            }

            _clientCursorRollback.preComplete();
            _txn.commit(flags);

            // if the commit of this transaction got a GTID, then notify 
            // the GTIDManager that the commit is now done.
            if (gotGTID && !_initiatingRS) {
                dassert(txnGTIDManager);
                // save the GTID for the client so that
                // getLastError will know what GTID slaves
                // need to be caught up to.
                cc().setLastOp(gtid);
                txnGTIDManager->noteLiveGTIDDone(gtid);
            }
        }
        catch (std::exception &e) {
            log() << "exception during critical section of txn commit, aborting system: " << e.what() << endl;
            printStackTrace();
            logflush();
            ::abort();
        }

        // These rollback items must be processed after the ydb transaction completes.
        if (hasParent()) {
            _cappedRollback.transfer(_parent->_cappedRollback);
            _nsIndexRollback.transfer(_parent->_nsIndexRollback);
        } else {
            _cappedRollback.commit();
            _nsIndexRollback.commit();
        }
        _retired = true;
    }
    void DocumentSourceCursor::loadBatch() {
        if (!_cursorId) {
            dispose();
            return;
        }

        // We have already validated the sharding version when we constructed the cursor
        // so we shouldn't check it again.
        Lock::DBRead lk(_ns);
        Client::Context ctx(_ns, storageGlobalParams.dbpath, /*doVersion=*/false);

        ClientCursorPin pin(_cursorId);
        ClientCursor* cursor = pin.c();

        uassert(16950, "Cursor deleted. Was the collection or database dropped?",
                cursor);

        Runner* runner = cursor->getRunner();
        runner->restoreState();

        int memUsageBytes = 0;
        BSONObj obj;
        Runner::RunnerState state;
        while ((state = runner->getNext(&obj, NULL)) == Runner::RUNNER_ADVANCED) {
            if (_haveDeps && !_projectionInQuery) {
                _currentBatch.push_back(documentFromBsonWithDeps(obj, _dependencies));
            }
            else {
                _currentBatch.push_back(Document::fromBsonWithMetaData(obj));
            }

            if (_limit) {
                if (++_docsAddedToBatches == _limit->getLimit()) {
                    break;
                }
                verify(_docsAddedToBatches < _limit->getLimit());
            }

            memUsageBytes += _currentBatch.back().getApproximateSize();

            if (memUsageBytes > MaxBytesToReturnToClientAtOnce) {
                // End this batch and prepare cursor for yielding.
                runner->saveState();
                cc().curop()->yielded();
                return;
            }
        }

        // If we got here, there won't be any more documents, so destroy the cursor and runner.
        _cursorId = 0;
        pin.deleteUnderlying();

        uassert(16028, "collection or index disappeared when cursor yielded",
                state != Runner::RUNNER_DEAD);

        uassert(17285, "cursor encountered an error",
                state != Runner::RUNNER_ERROR);

        massert(17286, str::stream() << "Unexpected return from Runner::getNext: " << state,
                state == Runner::RUNNER_EOF || state == Runner::RUNNER_ADVANCED);
    }