void IndexChanges::dupCheck(IndexDetails& idx, DiskLoc curObjLoc) { if (added.empty() || !idx.unique() || ignoreUniqueIndex(idx)) { return; } const Ordering ordering = Ordering::make(idx.keyPattern()); // "E11001 duplicate key on update" idx.idxInterface().uassertIfDups(idx, added, idx.head, curObjLoc, ordering); }
/** * Add the provided (obj, dl) pair to the provided index. */ static void addKeysToIndex(const char *ns, NamespaceDetails *d, int idxNo, const BSONObj& obj, const DiskLoc &recordLoc, bool dupsAllowed) { IndexDetails& id = d->idx(idxNo); auto_ptr<IndexDescriptor> desc(CatalogHack::getDescriptor(d, idxNo)); auto_ptr<IndexAccessMethod> iam(CatalogHack::getIndex(desc.get())); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = (!KeyPattern::isIdKeyPattern(id.keyPattern()) && !id.unique()) || ignoreUniqueIndex(id); int64_t inserted; Status ret = iam->insert(obj, recordLoc, options, &inserted); if (Status::OK() != ret) { uasserted(ret.location(), ret.reason()); } }
/** * Add the provided (obj, dl) pair to the provided index. */ static void addKeysToIndex( Collection* collection, int idxNo, const BSONObj& obj, const DiskLoc &recordLoc ) { IndexDetails& id = collection->details()->idx(idxNo); IndexDescriptor* desc = collection->getIndexCatalog()->getDescriptor( idxNo ); verify( desc ); IndexAccessMethod* iam = collection->getIndexCatalog()->getIndex( desc ); verify( iam ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = (!KeyPattern::isIdKeyPattern(id.keyPattern()) && !id.unique()) || ignoreUniqueIndex(id); int64_t inserted; Status ret = iam->insert(obj, recordLoc, options, &inserted); uassertStatusOK( ret ); }
/** * Add the provided (obj, loc) pair to all indices. */ void indexRecord(const char *ns, NamespaceDetails *d, const BSONObj &obj, const DiskLoc &loc) { int numIndices = d->getTotalIndexCount(); for (int i = 0; i < numIndices; ++i) { IndexDetails &id = d->idx(i); try { addKeysToIndex(ns, d, i, obj, loc, !id.unique() || ignoreUniqueIndex(id)); } catch (AssertionException&) { // TODO: the new index layer indexes either all or no keys, so j <= i can be j < i. for (int j = 0; j <= i; j++) { try { _unindexRecord(d, j, obj, loc, false); } catch(...) { LOG(3) << "unindex fails on rollback after unique " "key constraint prevented insert" << std::endl; } } throw; } } }
uint64_t BtreeBasedBuilder::fastBuildIndex(const char* ns, NamespaceDetails* d, IndexDetails& idx, bool mayInterrupt, int idxNo) { CurOp * op = cc().curop(); Timer t; tlog(1) << "fastBuildIndex " << ns << ' ' << idx.info.obj().toString() << endl; bool dupsAllowed = !idx.unique() || ignoreUniqueIndex(idx); bool dropDups = idx.dropDups() || inDBRepair; BSONObj order = idx.keyPattern(); getDur().writingDiskLoc(idx.head).Null(); if ( logLevel > 1 ) printMemInfo( "before index start" ); /* get and sort all the keys ----- */ ProgressMeterHolder pm(op->setMessage("index: (1/3) external sort", "Index: (1/3) External Sort Progress", d->stats.nrecords, 10)); SortPhaseOne phase1; addKeysToPhaseOne(d, ns, idx, order, &phase1, d->stats.nrecords, pm.get(), mayInterrupt, idxNo ); pm.finished(); BSONObjExternalSorter& sorter = *(phase1.sorter); if( phase1.multi ) { d->setIndexIsMultikey(ns, idxNo); } if ( logLevel > 1 ) printMemInfo( "before final sort" ); phase1.sorter->sort( mayInterrupt ); if ( logLevel > 1 ) printMemInfo( "after final sort" ); LOG(t.seconds() > 5 ? 0 : 1) << "\t external sort used : " << sorter.numFiles() << " files " << " in " << t.seconds() << " secs" << endl; set<DiskLoc> dupsToDrop; /* build index --- */ if( idx.version() == 0 ) buildBottomUpPhases2And3<V0>(dupsAllowed, idx, sorter, dropDups, dupsToDrop, op, &phase1, pm, t, mayInterrupt); else if( idx.version() == 1 ) buildBottomUpPhases2And3<V1>(dupsAllowed, idx, sorter, dropDups, dupsToDrop, op, &phase1, pm, t, mayInterrupt); else verify(false); if( dropDups ) log() << "\t fastBuildIndex dupsToDrop:" << dupsToDrop.size() << endl; BtreeBasedBuilder::doDropDups(ns, d, dupsToDrop, mayInterrupt); return phase1.n; }
/** Note: if the object shrinks a lot, we don't free up space, we leave extra at end of the record. */ const DiskLoc DataFileMgr::updateRecord( const char *ns, Collection* collection, Record *toupdate, const DiskLoc& dl, const char *_buf, int _len, OpDebug& debug, bool god) { dassert( toupdate == dl.rec() ); BSONObj objOld = BSONObj::make(toupdate); BSONObj objNew(_buf); DEV verify( objNew.objsize() == _len ); DEV verify( objNew.objdata() == _buf ); if( !objNew.hasElement("_id") && objOld.hasElement("_id") ) { /* add back the old _id value if the update removes it. Note this implementation is slow (copies entire object multiple times), but this shouldn't happen often, so going for simple code, not speed. */ BSONObjBuilder b; BSONElement e; verify( objOld.getObjectID(e) ); b.append(e); // put _id first, for best performance b.appendElements(objNew); objNew = b.obj(); } NamespaceString nsstring(ns); if (nsstring.coll() == "system.users") { V2UserDocumentParser parser; uassertStatusOK(parser.checkValidUserDocument(objNew)); } uassert( 13596 , str::stream() << "cannot change _id of a document old:" << objOld << " new:" << objNew, objNew["_id"] == objOld["_id"]); /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ OwnedPointerVector<UpdateTicket> updateTickets; updateTickets.mutableVector().resize(collection->details()->getTotalIndexCount()); for (int i = 0; i < collection->details()->getTotalIndexCount(); ++i) { auto_ptr<IndexDescriptor> descriptor(CatalogHack::getDescriptor(collection->details(), i)); auto_ptr<IndexAccessMethod> iam(CatalogHack::getIndex(descriptor.get())); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || ignoreUniqueIndex(descriptor->getOnDisk()); updateTickets.mutableVector()[i] = new UpdateTicket(); Status ret = iam->validateUpdate(objOld, objNew, dl, options, updateTickets.mutableVector()[i]); if (Status::OK() != ret) { uasserted(ASSERT_ID_DUPKEY, "Update validation failed: " + ret.toString()); } } if ( toupdate->netLength() < objNew.objsize() ) { // doesn't fit. reallocate ----------------------------------------------------- moveCounter.increment(); uassert( 10003, "failing update: objects in a capped ns cannot grow", !(collection && collection->details()->isCapped())); collection->details()->paddingTooSmall(); deleteRecord(ns, toupdate, dl); DiskLoc res = insert(ns, objNew.objdata(), objNew.objsize(), false, god); if (debug.nmoved == -1) // default of -1 rather than 0 debug.nmoved = 1; else debug.nmoved += 1; return res; } collection->infoCache()->notifyOfWriteOp(); collection->details()->paddingFits(); debug.keyUpdates = 0; for (int i = 0; i < collection->details()->getTotalIndexCount(); ++i) { auto_ptr<IndexDescriptor> descriptor(CatalogHack::getDescriptor(collection->details(), i)); auto_ptr<IndexAccessMethod> iam(CatalogHack::getIndex(descriptor.get())); int64_t updatedKeys; Status ret = iam->update(*updateTickets.vector()[i], &updatedKeys); if (Status::OK() != ret) { // This shouldn't happen unless something disastrous occurred. massert(16799, "update failed: " + ret.toString(), false); } debug.keyUpdates += updatedKeys; } // update in place int sz = objNew.objsize(); memcpy(getDur().writingPtr(toupdate->data(), sz), objNew.objdata(), sz); return dl; }
uint64_t BtreeBasedBuilder::fastBuildIndex( Collection* collection, IndexDescriptor* idx, bool mayInterrupt ) { CurOp * op = cc().curop(); Timer t; MONGO_TLOG(1) << "fastBuildIndex " << collection->ns() << ' ' << idx->toString() << endl; bool dupsAllowed = !idx->unique() || ignoreUniqueIndex(idx->getOnDisk()); bool dropDups = idx->dropDups() || inDBRepair; BSONObj order = idx->keyPattern(); getDur().writingDiskLoc(idx->getOnDisk().head).Null(); if ( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(2) ) ) printMemInfo( "before index start" ); /* get and sort all the keys ----- */ ProgressMeterHolder pm(op->setMessage("index: (1/3) external sort", "Index: (1/3) External Sort Progress", collection->numRecords(), 10)); SortPhaseOne phase1; addKeysToPhaseOne(collection, idx, order, &phase1, pm.get(), mayInterrupt ); pm.finished(); BSONObjExternalSorter& sorter = *(phase1.sorter); if( phase1.multi ) { collection->getIndexCatalog()->markMultikey( idx ); } if ( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(2) ) ) printMemInfo( "before final sort" ); phase1.sorter->sort( mayInterrupt ); if ( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(2) ) ) printMemInfo( "after final sort" ); LOG(t.seconds() > 5 ? 0 : 1) << "\t external sort used : " << sorter.numFiles() << " files " << " in " << t.seconds() << " secs" << endl; set<DiskLoc> dupsToDrop; /* build index --- */ if( idx->version() == 0 ) buildBottomUpPhases2And3<V0>(dupsAllowed, idx, sorter, dropDups, dupsToDrop, op, &phase1, pm, t, mayInterrupt); else if( idx->version() == 1 ) buildBottomUpPhases2And3<V1>(dupsAllowed, idx, sorter, dropDups, dupsToDrop, op, &phase1, pm, t, mayInterrupt); else verify(false); if( dropDups ) log() << "\t fastBuildIndex dupsToDrop:" << dupsToDrop.size() << endl; doDropDups(collection, dupsToDrop, mayInterrupt); return phase1.n; }
Status BtreeBasedBulkAccessMethod::commit(set<DiskLoc>* dupsToDrop, CurOp* op, bool mayInterrupt) { DiskLoc oldHead = _real->_btreeState->head(); // XXX: do we expect the tree to be empty but have a head set? Looks like so from old code. invariant(!oldHead.isNull()); _real->_btreeState->setHead(_txn, DiskLoc()); _real->_btreeState->recordStore()->deleteRecord(_txn, oldHead); if (_isMultiKey) { _real->_btreeState->setMultikey( _txn ); } _sorter->sort(false); Timer timer; IndexCatalogEntry* entry = _real->_btreeState; bool dupsAllowed = !entry->descriptor()->unique() || ignoreUniqueIndex(entry->descriptor()); bool dropDups = entry->descriptor()->dropDups() || inDBRepair; scoped_ptr<BSONObjExternalSorter::Iterator> i(_sorter->iterator()); // verifies that pm and op refer to the same ProgressMeter ProgressMeter& pm = op->setMessage("Index Bulk Build: (2/3) btree bottom up", "Index: (2/3) BTree Bottom Up Progress", _keysInserted, 10); scoped_ptr<BtreeBuilderInterface> builder; builder.reset(_interface->getBulkBuilder(_txn, dupsAllowed)); while (i->more()) { // Get the next datum and add it to the builder. ExternalSortDatum d = i->next(); Status status = builder->addKey(d.first, d.second); if (!status.isOK()) { if (ErrorCodes::DuplicateKey != status.code()) { return status; } // If we're here it's a duplicate key. if (dropDups) { static const size_t kMaxDupsToStore = 1000000; dupsToDrop->insert(d.second); if (dupsToDrop->size() > kMaxDupsToStore) { return Status(ErrorCodes::InternalError, "Too many dups on index build with dropDups = true"); } } else if (!dupsAllowed) { return status; } } // If we're here either it's a dup and we're cool with it or the addKey went just // fine. pm.hit(); } pm.finished(); op->setMessage("Index Bulk Build: (3/3) btree-middle", "Index: (3/3) BTree Middle Progress"); LOG(timer.seconds() > 10 ? 0 : 1 ) << "\t done building bottom layer, going to commit"; unsigned long long keysCommit = builder->commit(mayInterrupt); if (!dropDups && (keysCommit != _keysInserted)) { warning() << "not all entries were added to the index, probably some " << "keys were too large" << endl; } return Status::OK(); }
StatusWith<DiskLoc> Collection::updateDocument( const DiskLoc& oldLocation, const BSONObj& objNew, bool enforceQuota, OpDebug* debug ) { Record* oldRecord = getExtentManager()->recordFor( oldLocation ); BSONObj objOld = BSONObj::make( oldRecord ); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; BSONElement newId = objNew["_id"]; if ( oldId != newId ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596 ); } if ( ns().coll() == "system.users" ) { // XXX - andy and spencer think this should go away now V2UserDocumentParser parser; Status s = parser.checkValidUserDocument(objNew); if ( !s.isOK() ) return StatusWith<DiskLoc>( s ); } /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ OwnedPointerVector<UpdateTicket> updateTickets; updateTickets.mutableVector().resize(_indexCatalog.numIndexesTotal()); for (int i = 0; i < _indexCatalog.numIndexesTotal(); ++i) { IndexDescriptor* descriptor = _indexCatalog.getDescriptor( i ); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || ignoreUniqueIndex(descriptor); updateTickets.mutableVector()[i] = new UpdateTicket(); Status ret = iam->validateUpdate(objOld, objNew, oldLocation, options, updateTickets.mutableVector()[i]); if ( !ret.isOK() ) { return StatusWith<DiskLoc>( ret ); } } if ( oldRecord->netLength() < objNew.objsize() ) { // doesn't fit, have to move to new location if ( _details->isCapped() ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "failing update: objects in a capped ns cannot grow", 10003 ); moveCounter.increment(); _details->paddingTooSmall(); // unindex old record, don't delete // this way, if inserting new doc fails, we can re-index this one ClientCursor::aboutToDelete(_ns.ns(), _details, oldLocation); _indexCatalog.unindexRecord( objOld, oldLocation, true ); if ( debug ) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } StatusWith<DiskLoc> loc = insertDocument( objNew, enforceQuota ); if ( loc.isOK() ) { // insert successful, now lets deallocate the old location // remember its already unindexed _recordStore.deallocRecord( oldLocation, oldRecord ); } else { // new doc insert failed, so lets re-index the old document and location _indexCatalog.indexRecord( objOld, oldLocation ); } return loc; } _infoCache.notifyOfWriteOp(); _details->paddingFits(); if ( debug ) debug->keyUpdates = 0; for (int i = 0; i < _indexCatalog.numIndexesTotal(); ++i) { IndexDescriptor* descriptor = _indexCatalog.getDescriptor( i ); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); int64_t updatedKeys; Status ret = iam->update(*updateTickets.vector()[i], &updatedKeys); if ( !ret.isOK() ) return StatusWith<DiskLoc>( ret ); if ( debug ) debug->keyUpdates += updatedKeys; } // update in place int sz = objNew.objsize(); memcpy(getDur().writingPtr(oldRecord->data(), sz), objNew.objdata(), sz); return StatusWith<DiskLoc>( oldLocation ); }
void commit( set<DiskLoc>* dupsToDrop, CurOp* op, bool mayInterrupt ) { Timer timer; IndexCatalogEntry* entry = _real->_btreeState; bool dupsAllowed = !entry->descriptor()->unique() || ignoreUniqueIndex(entry->descriptor()); bool dropDups = entry->descriptor()->dropDups() || inDBRepair; BtreeBuilder<V> btBuilder(dupsAllowed, entry); BSONObj keyLast; scoped_ptr<BSONObjExternalSorter::Iterator> i( _phase1.sorter->iterator() ); // verifies that pm and op refer to the same ProgressMeter ProgressMeter& pm = op->setMessage("Index Bulk Build: (2/3) btree bottom up", "Index: (2/3) BTree Bottom Up Progress", _phase1.nkeys, 10); while( i->more() ) { RARELY if ( mayInterrupt ) killCurrentOp.checkForInterrupt(); ExternalSortDatum d = i->next(); try { if ( !dupsAllowed && dropDups ) { LastError::Disabled led( lastError.get() ); btBuilder.addKey(d.first, d.second); } else { btBuilder.addKey(d.first, d.second); } } catch( AssertionException& e ) { if ( dupsAllowed ) { // unknown exception?? throw; } if (ErrorCodes::isInterruption( DBException::convertExceptionCode(e.getCode()))) { killCurrentOp.checkForInterrupt(); } if ( ! dropDups ) throw; /* we could queue these on disk, but normally there are very few dups, * so instead we keep in ram and have a limit. */ if ( dupsToDrop ) { dupsToDrop->insert(d.second); uassert( 10092, "too may dups on index build with dropDups=true", dupsToDrop->size() < 1000000 ); } } pm.hit(); } pm.finished(); op->setMessage("Index Bulk Build: (3/3) btree-middle", "Index: (3/3) BTree Middle Progress"); LOG(timer.seconds() > 10 ? 0 : 1 ) << "\t done building bottom layer, going to commit"; btBuilder.commit( mayInterrupt ); if ( btBuilder.getn() != _phase1.nkeys && ! dropDups ) { warning() << "not all entries were added to the index, probably some " << "keys were too large" << endl; } }