/* must call this on a delete so we clean up the cursors. */ void ClientCursor::aboutToDelete(const DiskLoc& dl) { recursive_scoped_lock lock(ccmutex); Database *db = cc().database(); assert(db); aboutToDeleteForSharding( db , dl ); CCByLoc& bl = db->ccByLoc; CCByLoc::iterator j = bl.lower_bound(dl); CCByLoc::iterator stop = bl.upper_bound(dl); if ( j == stop ) return; vector<ClientCursor*> toAdvance; while ( 1 ) { toAdvance.push_back(j->second); DEV assert( j->first == dl ); ++j; if ( j == stop ) break; } wassert( toAdvance.size() < 5000 ); for ( vector<ClientCursor*>::iterator i = toAdvance.begin(); i != toAdvance.end(); ++i ){ ClientCursor* cc = *i; wassert(cc->_db == db); if ( cc->_doingDeletes ) continue; Cursor *c = cc->c.get(); if ( c->capped() ){ delete cc; continue; } c->checkLocation(); DiskLoc tmp1 = c->refLoc(); if ( tmp1 != dl ) { /* this might indicate a failure to call ClientCursor::updateLocation() */ problem() << "warning: cursor loc " << tmp1 << " does not match byLoc position " << dl << " !" << endl; } c->advance(); if ( c->eof() ) { // advanced to end // leave ClientCursor in place so next getMore doesn't fail // still need to mark new location though cc->updateLocation(); } else { wassert( c->refLoc() != dl ); cc->updateLocation(); } } }
/* must call this on a delete so we clean up the cursors. */ void ClientCursor::aboutToDelete(const DiskLoc& dl) { recursive_scoped_lock lock(ccmutex); CCByLoc::iterator j = byLoc.lower_bound(dl); CCByLoc::iterator stop = byLoc.upper_bound(dl); if ( j == stop ) return; vector<ClientCursor*> toAdvance; while ( 1 ) { toAdvance.push_back(j->second); DEV assert( j->first == dl ); ++j; if ( j == stop ) break; } wassert( toAdvance.size() < 5000 ); for ( vector<ClientCursor*>::iterator i = toAdvance.begin(); i != toAdvance.end(); ++i ){ ClientCursor* cc = *i; if ( cc->_doingDeletes ) continue; Cursor *c = cc->c.get(); if ( c->capped() ){ delete cc; continue; } c->checkLocation(); DiskLoc tmp1 = c->refLoc(); if ( tmp1 != dl ) { /* this might indicate a failure to call ClientCursor::updateLocation() */ problem() << "warning: cursor loc " << tmp1 << " does not match byLoc position " << dl << " !" << endl; } c->advance(); if ( c->eof() ) { // advanced to end -- delete cursor delete cc; } else { wassert( c->refLoc() != dl ); cc->updateLocation(); } } }
/* must call this on a delete so we clean up the cursors. */ void ClientCursor::aboutToDelete(const DiskLoc& dl) { NoPageFaultsAllowed npfa; recursive_scoped_lock lock(ccmutex); Database *db = cc().database(); verify(db); aboutToDeleteForSharding( db , dl ); CCByLoc& bl = db->ccByLoc; CCByLoc::iterator j = bl.lower_bound(ByLocKey::min(dl)); CCByLoc::iterator stop = bl.upper_bound(ByLocKey::max(dl)); if ( j == stop ) return; vector<ClientCursor*> toAdvance; while ( 1 ) { toAdvance.push_back(j->second); DEV verify( j->first.loc == dl ); ++j; if ( j == stop ) break; } if( toAdvance.size() >= 3000 ) { log() << "perf warning MPW101: " << toAdvance.size() << " cursors for one diskloc " << dl.toString() << ' ' << toAdvance[1000]->_ns << ' ' << toAdvance[2000]->_ns << ' ' << toAdvance[1000]->_pinValue << ' ' << toAdvance[2000]->_pinValue << ' ' << toAdvance[1000]->_pos << ' ' << toAdvance[2000]->_pos << ' ' << toAdvance[1000]->_idleAgeMillis << ' ' << toAdvance[2000]->_idleAgeMillis << ' ' << toAdvance[1000]->_doingDeletes << ' ' << toAdvance[2000]->_doingDeletes << endl; //wassert( toAdvance.size() < 5000 ); } for ( vector<ClientCursor*>::iterator i = toAdvance.begin(); i != toAdvance.end(); ++i ) { ClientCursor* cc = *i; wassert(cc->_db == db); if ( cc->_doingDeletes ) continue; Cursor *c = cc->_c.get(); if ( c->capped() ) { /* note we cannot advance here. if this condition occurs, writes to the oplog have "caught" the reader. skipping ahead, the reader would miss postentially important data. */ delete cc; continue; } c->recoverFromYield(); DiskLoc tmp1 = c->refLoc(); if ( tmp1 != dl ) { // This might indicate a failure to call ClientCursor::prepareToYield() but it can // also happen during correct operation, see SERVER-2009. problem() << "warning: cursor loc " << tmp1 << " does not match byLoc position " << dl << " !" << endl; } else { c->advance(); } while (!c->eof() && c->refLoc() == dl) { /* We don't delete at EOF because we want to return "no more results" rather than "no such cursor". * The loop is to handle MultiKey indexes where the deleted record is pointed to by multiple adjacent keys. * In that case we need to advance until we get to the next distinct record or EOF. * SERVER-4154 * SERVER-5198 * But see SERVER-5725. */ c->advance(); } cc->updateLocation(); } }
/* must call this on a delete so we clean up the cursors. */ void ClientCursor::aboutToDelete(const DiskLoc& dl) { recursive_scoped_lock lock(ccmutex); Database *db = cc().database(); assert(db); aboutToDeleteForSharding( db , dl ); CCByLoc& bl = db->ccByLoc; CCByLoc::iterator j = bl.lower_bound(ByLocKey::min(dl)); CCByLoc::iterator stop = bl.upper_bound(ByLocKey::max(dl)); if ( j == stop ) return; vector<ClientCursor*> toAdvance; while ( 1 ) { toAdvance.push_back(j->second); DEV assert( j->first.loc == dl ); ++j; if ( j == stop ) break; } if( toAdvance.size() >= 3000 ) { log() << "perf warning MPW101: " << toAdvance.size() << " cursors for one diskloc " << dl.toString() << ' ' << toAdvance[1000]->_ns << ' ' << toAdvance[2000]->_ns << ' ' << toAdvance[1000]->_pinValue << ' ' << toAdvance[2000]->_pinValue << ' ' << toAdvance[1000]->_pos << ' ' << toAdvance[2000]->_pos << ' ' << toAdvance[1000]->_idleAgeMillis << ' ' << toAdvance[2000]->_idleAgeMillis << ' ' << toAdvance[1000]->_doingDeletes << ' ' << toAdvance[2000]->_doingDeletes << endl; //wassert( toAdvance.size() < 5000 ); } for ( vector<ClientCursor*>::iterator i = toAdvance.begin(); i != toAdvance.end(); ++i ) { ClientCursor* cc = *i; wassert(cc->_db == db); if ( cc->_doingDeletes ) continue; Cursor *c = cc->_c.get(); if ( c->capped() ) { /* note we cannot advance here. if this condition occurs, writes to the oplog have "caught" the reader. skipping ahead, the reader would miss postentially important data. */ delete cc; continue; } c->checkLocation(); DiskLoc tmp1 = c->refLoc(); if ( tmp1 != dl ) { // This might indicate a failure to call ClientCursor::updateLocation() but it can // also happen during correct operation, see SERVER-2009. problem() << "warning: cursor loc " << tmp1 << " does not match byLoc position " << dl << " !" << endl; } else { c->advance(); } if ( c->eof() ) { // advanced to end // leave ClientCursor in place so next getMore doesn't fail // still need to mark new location though cc->updateLocation(); } else { wassert( c->refLoc() != dl ); cc->updateLocation(); } } }
/* must call this on a delete so we clean up the cursors. */ void ClientCursor::aboutToDelete(const StringData& ns, const NamespaceDetails* nsd, const DiskLoc& dl) { // Begin cursor-only NoPageFaultsAllowed npfa; // End cursor-only recursive_scoped_lock lock(ccmutex); Database *db = cc().database(); verify(db); aboutToDeleteForSharding( ns, db, nsd, dl ); // Check our non-cached active runner list. for (set<Runner*>::iterator it = nonCachedRunners.begin(); it != nonCachedRunners.end(); ++it) { Runner* runner = *it; if (0 == ns.compare(runner->ns())) { runner->invalidate(dl); } } // TODO: This requires optimization. We walk through *all* CCs and send the delete to every // CC open on the db we're deleting from. We could: // 1. Map from ns to open runners, // 2. Map from ns -> (a map of DiskLoc -> runners who care about that DL) // // We could also queue invalidations somehow and have them processed later in the runner's // read locks. for (CCById::const_iterator it = clientCursorsById.begin(); it != clientCursorsById.end(); ++it) { ClientCursor* cc = it->second; // We're only interested in cursors over one db. if (cc->_db != db) { continue; } if (NULL == cc->_runner.get()) { continue; } cc->_runner->invalidate(dl); } // Begin cursor-only. Only cursors that are in ccByLoc are processed here. CCByLoc& bl = db->ccByLoc(); CCByLoc::iterator j = bl.lower_bound(ByLocKey::min(dl)); CCByLoc::iterator stop = bl.upper_bound(ByLocKey::max(dl)); if ( j == stop ) return; vector<ClientCursor*> toAdvance; while ( 1 ) { toAdvance.push_back(j->second); DEV verify( j->first.loc == dl ); ++j; if ( j == stop ) break; } if( toAdvance.size() >= 3000 ) { log() << "perf warning MPW101: " << toAdvance.size() << " cursors for one diskloc " << dl.toString() << ' ' << toAdvance[1000]->_ns << ' ' << toAdvance[2000]->_ns << ' ' << toAdvance[1000]->_pinValue << ' ' << toAdvance[2000]->_pinValue << ' ' << toAdvance[1000]->_pos << ' ' << toAdvance[2000]->_pos << ' ' << toAdvance[1000]->_idleAgeMillis << ' ' << toAdvance[2000]->_idleAgeMillis << ' ' << toAdvance[1000]->_doingDeletes << ' ' << toAdvance[2000]->_doingDeletes << endl; //wassert( toAdvance.size() < 5000 ); } for ( vector<ClientCursor*>::iterator i = toAdvance.begin(); i != toAdvance.end(); ++i ) { ClientCursor* cc = *i; wassert(cc->_db == db); if ( cc->_doingDeletes ) continue; Cursor *c = cc->_c.get(); if ( c->capped() ) { /* note we cannot advance here. if this condition occurs, writes to the oplog have "caught" the reader. skipping ahead, the reader would miss postentially important data. */ delete cc; continue; } c->recoverFromYield(); DiskLoc tmp1 = c->refLoc(); if ( tmp1 != dl ) { // This might indicate a failure to call ClientCursor::prepareToYield() but it can // also happen during correct operation, see SERVER-2009. problem() << "warning: cursor loc " << tmp1 << " does not match byLoc position " << dl << " !" << endl; } else { c->advance(); } while (!c->eof() && c->refLoc() == dl) { /* We don't delete at EOF because we want to return "no more results" rather than "no such cursor". * The loop is to handle MultiKey indexes where the deleted record is pointed to by multiple adjacent keys. * In that case we need to advance until we get to the next distinct record or EOF. * SERVER-4154 * SERVER-5198 * But see SERVER-5725. */ c->advance(); } cc->updateLocation(); } // End cursor-only }
QueryResult* getMore(const char *ns, int ntoreturn, long long cursorid , stringstream& ss) { ClientCursor *cc = ClientCursor::find(cursorid); int bufSize = 512; if ( cc ){ bufSize += sizeof( QueryResult ); bufSize += ( ntoreturn ? 4 : 1 ) * 1024 * 1024; } BufBuilder b( bufSize ); b.skip(sizeof(QueryResult)); int resultFlags = 0; int start = 0; int n = 0; if ( !cc ) { log() << "getMore: cursorid not found " << ns << " " << cursorid << endl; cursorid = 0; resultFlags = QueryResult::ResultFlag_CursorNotFound; } else { ss << " query: " << cc->query << " "; start = cc->pos; Cursor *c = cc->c.get(); c->checkLocation(); while ( 1 ) { if ( !c->ok() ) { if ( c->tailable() ) { if ( c->advance() ) { continue; } break; } bool ok = ClientCursor::erase(cursorid); assert(ok); cursorid = 0; cc = 0; break; } if ( !cc->matcher->matches(c->currKey(), c->currLoc() ) ) { } else { //out() << "matches " << c->currLoc().toString() << '\n'; if( c->getsetdup(c->currLoc()) ) { //out() << " but it's a dup \n"; } else { BSONObj js = c->current(); fillQueryResultFromObj(b, cc->filter.get(), js); n++; if ( (ntoreturn>0 && (n >= ntoreturn || b.len() > MaxBytesToReturnToClientAtOnce)) || (ntoreturn==0 && b.len()>1*1024*1024) ) { c->advance(); cc->pos += n; //cc->updateLocation(); break; } } } c->advance(); } if ( cc ) { cc->updateLocation(); cc->mayUpgradeStorage(); } } QueryResult *qr = (QueryResult *) b.buf(); qr->len = b.len(); qr->setOperation(opReply); qr->resultFlags() = resultFlags; qr->cursorId = cursorid; qr->startingFrom = start; qr->nReturned = n; b.decouple(); return qr; }
QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop, int pass, bool& exhaust ) { exhaust = false; ClientCursor::Pointer p(cursorid); ClientCursor *cc = p.c(); int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce; BufBuilder b( bufSize ); b.skip(sizeof(QueryResult)); int resultFlags = ResultFlag_AwaitCapable; int start = 0; int n = 0; if ( unlikely(!cc) ) { LOGSOME << "getMore: cursorid not found " << ns << " " << cursorid << endl; cursorid = 0; resultFlags = ResultFlag_CursorNotFound; } else { // check for spoofing of the ns such that it does not match the one originally there for the cursor uassert(14833, "auth error", str::equals(ns, cc->ns().c_str())); if ( pass == 0 ) cc->updateSlaveLocation( curop ); int queryOptions = cc->queryOptions(); curop.debug().query = cc->query(); start = cc->pos(); Cursor *c = cc->c(); c->recoverFromYield(); DiskLoc last; scoped_ptr<Projection::KeyOnly> keyFieldsOnly; if ( cc->modifiedKeys() == false && cc->isMultiKey() == false && cc->fields ) keyFieldsOnly.reset( cc->fields->checkKey( cc->indexKeyPattern() ) ); // This manager may be stale, but it's the state of chunking when the cursor was created. ShardChunkManagerPtr manager = cc->getChunkManager(); while ( 1 ) { if ( !c->ok() ) { if ( c->tailable() ) { /* when a tailable cursor hits "EOF", ok() goes false, and current() is null. however advance() can still be retries as a reactivation attempt. when there is new data, it will return true. that's what we are doing here. */ if ( c->advance() ) continue; if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) { return 0; } break; } p.release(); bool ok = ClientCursor::erase(cursorid); assert(ok); cursorid = 0; cc = 0; break; } // in some cases (clone collection) there won't be a matcher if ( c->matcher() && !c->matcher()->matchesCurrent( c ) ) { } else if ( manager && ! manager->belongsToMe( cc ) ){ LOG(2) << "cursor skipping document in un-owned chunk: " << c->current() << endl; } else { if( c->getsetdup(c->currLoc()) ) { //out() << " but it's a dup \n"; } else { last = c->currLoc(); n++; if ( keyFieldsOnly ) { fillQueryResultFromObj(b, 0, keyFieldsOnly->hydrate( c->currKey() ) ); } else { BSONObj js = c->current(); // show disk loc should be part of the main query, not in an $or clause, so this should be ok fillQueryResultFromObj(b, cc->fields.get(), js, ( cc->pq.get() && cc->pq->showDiskLoc() ? &last : 0)); } if ( ( ntoreturn && n >= ntoreturn ) || b.len() > MaxBytesToReturnToClientAtOnce ) { c->advance(); cc->incPos( n ); break; } } } c->advance(); if ( ! cc->yieldSometimes( ClientCursor::MaybeCovered ) ) { ClientCursor::erase(cursorid); cursorid = 0; cc = 0; p.deleted(); break; } } if ( cc ) { if ( c->supportYields() ) { ClientCursor::YieldData data; assert( cc->prepareToYield( data ) ); } else { cc->updateLocation(); } cc->mayUpgradeStorage(); cc->storeOpForSlave( last ); exhaust = cc->queryOptions() & QueryOption_Exhaust; } } QueryResult *qr = (QueryResult *) b.buf(); qr->len = b.len(); qr->setOperation(opReply); qr->_resultFlags() = resultFlags; qr->cursorId = cursorid; qr->startingFrom = start; qr->nReturned = n; b.decouple(); return qr; }