Пример #1
0
    void IndexChanges::dupCheck(IndexDetails& idx, DiskLoc curObjLoc) {
        if (added.empty() || 
            !idx.unique() || 
            ignoreUniqueIndex(idx)) {
            return;
        }
        const Ordering ordering = Ordering::make(idx.keyPattern());

        // "E11001 duplicate key on update"
        idx.idxInterface().uassertIfDups(idx, added, idx.head, curObjLoc, ordering);
    }
Пример #2
0
    /**
     * Add the provided (obj, dl) pair to the provided index.
     */
    static void addKeysToIndex(const char *ns, NamespaceDetails *d, int idxNo, const BSONObj& obj,
                               const DiskLoc &recordLoc, bool dupsAllowed) {
        IndexDetails& id = d->idx(idxNo);
        auto_ptr<IndexDescriptor> desc(CatalogHack::getDescriptor(d, idxNo));
        auto_ptr<IndexAccessMethod> iam(CatalogHack::getIndex(desc.get()));
        InsertDeleteOptions options;
        options.logIfError = false;
        options.dupsAllowed = (!KeyPattern::isIdKeyPattern(id.keyPattern()) && !id.unique())
            || ignoreUniqueIndex(id);

        int64_t inserted;
        Status ret = iam->insert(obj, recordLoc, options, &inserted);
        if (Status::OK() != ret) {
            uasserted(ret.location(), ret.reason());
        }
    }
Пример #3
0
    /**
     * Add the provided (obj, dl) pair to the provided index.
     */
    static void addKeysToIndex( Collection* collection, int idxNo,
                                const BSONObj& obj, const DiskLoc &recordLoc ) {

        IndexDetails& id = collection->details()->idx(idxNo);

        IndexDescriptor* desc = collection->getIndexCatalog()->getDescriptor( idxNo );
        verify( desc );

        IndexAccessMethod* iam = collection->getIndexCatalog()->getIndex( desc );
        verify( iam );

        InsertDeleteOptions options;
        options.logIfError = false;
        options.dupsAllowed = (!KeyPattern::isIdKeyPattern(id.keyPattern()) && !id.unique())
            || ignoreUniqueIndex(id);

        int64_t inserted;
        Status ret = iam->insert(obj, recordLoc, options, &inserted);
        uassertStatusOK( ret );
    }
Пример #4
0
    /**
     * Add the provided (obj, loc) pair to all indices.
     */
    void indexRecord(const char *ns, NamespaceDetails *d, const BSONObj &obj, const DiskLoc &loc) {
        int numIndices = d->getTotalIndexCount();

        for (int i = 0; i < numIndices; ++i) {
            IndexDetails &id = d->idx(i);

            try {
                addKeysToIndex(ns, d, i, obj, loc, !id.unique() || ignoreUniqueIndex(id));
            }
            catch (AssertionException&) {
                // TODO: the new index layer indexes either all or no keys, so j <= i can be j < i.
                for (int j = 0; j <= i; j++) {
                    try {
                        _unindexRecord(d, j, obj, loc, false);
                    }
                    catch(...) {
                        LOG(3) << "unindex fails on rollback after unique "
                                  "key constraint prevented insert" << std::endl;
                    }
                }
                throw;
            }
        }
    }
Пример #5
0
    uint64_t BtreeBasedBuilder::fastBuildIndex(const char* ns, NamespaceDetails* d,
                                               IndexDetails& idx, bool mayInterrupt,
                                               int idxNo) {
        CurOp * op = cc().curop();

        Timer t;

        tlog(1) << "fastBuildIndex " << ns << ' ' << idx.info.obj().toString() << endl;

        bool dupsAllowed = !idx.unique() || ignoreUniqueIndex(idx);
        bool dropDups = idx.dropDups() || inDBRepair;
        BSONObj order = idx.keyPattern();

        getDur().writingDiskLoc(idx.head).Null();

        if ( logLevel > 1 ) printMemInfo( "before index start" );

        /* get and sort all the keys ----- */
        ProgressMeterHolder pm(op->setMessage("index: (1/3) external sort",
                                              "Index: (1/3) External Sort Progress",
                                              d->stats.nrecords,
                                              10));
        SortPhaseOne phase1;
        addKeysToPhaseOne(d, ns, idx, order, &phase1, d->stats.nrecords, pm.get(),
                          mayInterrupt, idxNo );
        pm.finished();

        BSONObjExternalSorter& sorter = *(phase1.sorter);

        if( phase1.multi ) {
            d->setIndexIsMultikey(ns, idxNo);
        }

        if ( logLevel > 1 ) printMemInfo( "before final sort" );
        phase1.sorter->sort( mayInterrupt );
        if ( logLevel > 1 ) printMemInfo( "after final sort" );

        LOG(t.seconds() > 5 ? 0 : 1) << "\t external sort used : " << sorter.numFiles()
                                     << " files " << " in " << t.seconds() << " secs" << endl;

        set<DiskLoc> dupsToDrop;

        /* build index --- */
        if( idx.version() == 0 )
            buildBottomUpPhases2And3<V0>(dupsAllowed,
                                         idx,
                                         sorter,
                                         dropDups,
                                         dupsToDrop,
                                         op,
                                         &phase1,
                                         pm,
                                         t,
                                         mayInterrupt);
        else if( idx.version() == 1 ) 
            buildBottomUpPhases2And3<V1>(dupsAllowed,
                                         idx,
                                         sorter,
                                         dropDups,
                                         dupsToDrop,
                                         op,
                                         &phase1,
                                         pm,
                                         t,
                                         mayInterrupt);
        else
            verify(false);

        if( dropDups ) 
            log() << "\t fastBuildIndex dupsToDrop:" << dupsToDrop.size() << endl;

        BtreeBasedBuilder::doDropDups(ns, d, dupsToDrop, mayInterrupt);

        return phase1.n;
    }
Пример #6
0
    /** Note: if the object shrinks a lot, we don't free up space, we leave extra at end of the record.
     */
    const DiskLoc DataFileMgr::updateRecord(
        const char *ns,
        Collection* collection,
        Record *toupdate, const DiskLoc& dl,
        const char *_buf, int _len, OpDebug& debug,  bool god) {

        dassert( toupdate == dl.rec() );

        BSONObj objOld = BSONObj::make(toupdate);
        BSONObj objNew(_buf);
        DEV verify( objNew.objsize() == _len );
        DEV verify( objNew.objdata() == _buf );

        if( !objNew.hasElement("_id") && objOld.hasElement("_id") ) {
            /* add back the old _id value if the update removes it.  Note this implementation is slow
               (copies entire object multiple times), but this shouldn't happen often, so going for simple
               code, not speed.
            */
            BSONObjBuilder b;
            BSONElement e;
            verify( objOld.getObjectID(e) );
            b.append(e); // put _id first, for best performance
            b.appendElements(objNew);
            objNew = b.obj();
        }

        NamespaceString nsstring(ns);
        if (nsstring.coll() == "system.users") {
            V2UserDocumentParser parser;
            uassertStatusOK(parser.checkValidUserDocument(objNew));
        }

        uassert( 13596 , str::stream() << "cannot change _id of a document old:" << objOld << " new:" << objNew,
                objNew["_id"] == objOld["_id"]);

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */
        OwnedPointerVector<UpdateTicket> updateTickets;
        updateTickets.mutableVector().resize(collection->details()->getTotalIndexCount());
        for (int i = 0; i < collection->details()->getTotalIndexCount(); ++i) {
            auto_ptr<IndexDescriptor> descriptor(CatalogHack::getDescriptor(collection->details(), i));
            auto_ptr<IndexAccessMethod> iam(CatalogHack::getIndex(descriptor.get()));
            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern())
                                    || descriptor->unique())
                                  || ignoreUniqueIndex(descriptor->getOnDisk());
            updateTickets.mutableVector()[i] = new UpdateTicket();
            Status ret = iam->validateUpdate(objOld, objNew, dl, options,
                                             updateTickets.mutableVector()[i]);

            if (Status::OK() != ret) {
                uasserted(ASSERT_ID_DUPKEY, "Update validation failed: " + ret.toString());
            }
        }

        if ( toupdate->netLength() < objNew.objsize() ) {
            // doesn't fit.  reallocate -----------------------------------------------------
            moveCounter.increment();
            uassert( 10003,
                     "failing update: objects in a capped ns cannot grow",
                     !(collection && collection->details()->isCapped()));
            collection->details()->paddingTooSmall();
            deleteRecord(ns, toupdate, dl);
            DiskLoc res = insert(ns, objNew.objdata(), objNew.objsize(), false, god);

            if (debug.nmoved == -1) // default of -1 rather than 0
                debug.nmoved = 1;
            else
                debug.nmoved += 1;

            return res;
        }

        collection->infoCache()->notifyOfWriteOp();
        collection->details()->paddingFits();

        debug.keyUpdates = 0;

        for (int i = 0; i < collection->details()->getTotalIndexCount(); ++i) {
            auto_ptr<IndexDescriptor> descriptor(CatalogHack::getDescriptor(collection->details(), i));
            auto_ptr<IndexAccessMethod> iam(CatalogHack::getIndex(descriptor.get()));
            int64_t updatedKeys;
            Status ret = iam->update(*updateTickets.vector()[i], &updatedKeys);
            if (Status::OK() != ret) {
                // This shouldn't happen unless something disastrous occurred.
                massert(16799, "update failed: " + ret.toString(), false);
            }
            debug.keyUpdates += updatedKeys;
        }

        //  update in place
        int sz = objNew.objsize();
        memcpy(getDur().writingPtr(toupdate->data(), sz), objNew.objdata(), sz);
        return dl;
    }
    uint64_t BtreeBasedBuilder::fastBuildIndex( Collection* collection,
                                                IndexDescriptor* idx,
                                                bool mayInterrupt ) {
        CurOp * op = cc().curop();

        Timer t;

        MONGO_TLOG(1) << "fastBuildIndex " << collection->ns() << ' ' << idx->toString() << endl;

        bool dupsAllowed = !idx->unique() || ignoreUniqueIndex(idx->getOnDisk());
        bool dropDups = idx->dropDups() || inDBRepair;
        BSONObj order = idx->keyPattern();

        getDur().writingDiskLoc(idx->getOnDisk().head).Null();

        if ( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(2) ) )
            printMemInfo( "before index start" );

        /* get and sort all the keys ----- */
        ProgressMeterHolder pm(op->setMessage("index: (1/3) external sort",
                                              "Index: (1/3) External Sort Progress",
                                              collection->numRecords(),
                                              10));
        SortPhaseOne phase1;
        addKeysToPhaseOne(collection, idx, order, &phase1, pm.get(), mayInterrupt );
        pm.finished();

        BSONObjExternalSorter& sorter = *(phase1.sorter);

        if( phase1.multi ) {
            collection->getIndexCatalog()->markMultikey( idx );
        }

        if ( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(2) ) )
            printMemInfo( "before final sort" );
        phase1.sorter->sort( mayInterrupt );
        if ( logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(2) ) )
            printMemInfo( "after final sort" );

        LOG(t.seconds() > 5 ? 0 : 1) << "\t external sort used : " << sorter.numFiles()
                                     << " files " << " in " << t.seconds() << " secs" << endl;

        set<DiskLoc> dupsToDrop;

        /* build index --- */
        if( idx->version() == 0 )
            buildBottomUpPhases2And3<V0>(dupsAllowed,
                                         idx,
                                         sorter,
                                         dropDups,
                                         dupsToDrop,
                                         op,
                                         &phase1,
                                         pm,
                                         t,
                                         mayInterrupt);
        else if( idx->version() == 1 )
            buildBottomUpPhases2And3<V1>(dupsAllowed,
                                         idx,
                                         sorter,
                                         dropDups,
                                         dupsToDrop,
                                         op,
                                         &phase1,
                                         pm,
                                         t,
                                         mayInterrupt);
        else
            verify(false);

        if( dropDups )
            log() << "\t fastBuildIndex dupsToDrop:" << dupsToDrop.size() << endl;

        doDropDups(collection, dupsToDrop, mayInterrupt);

        return phase1.n;
    }
    Status BtreeBasedBulkAccessMethod::commit(set<DiskLoc>* dupsToDrop,
                                              CurOp* op,
                                              bool mayInterrupt) {
        DiskLoc oldHead = _real->_btreeState->head();

        // XXX: do we expect the tree to be empty but have a head set?  Looks like so from old code.
        invariant(!oldHead.isNull());
        _real->_btreeState->setHead(_txn, DiskLoc());
        _real->_btreeState->recordStore()->deleteRecord(_txn, oldHead);

        if (_isMultiKey) {
            _real->_btreeState->setMultikey( _txn );
        }

        _sorter->sort(false);

        Timer timer;
        IndexCatalogEntry* entry = _real->_btreeState;

        bool dupsAllowed = !entry->descriptor()->unique()
                           || ignoreUniqueIndex(entry->descriptor());

        bool dropDups = entry->descriptor()->dropDups() || inDBRepair;

        scoped_ptr<BSONObjExternalSorter::Iterator> i(_sorter->iterator());

        // verifies that pm and op refer to the same ProgressMeter
        ProgressMeter& pm = op->setMessage("Index Bulk Build: (2/3) btree bottom up",
                                           "Index: (2/3) BTree Bottom Up Progress",
                                           _keysInserted,
                                           10);

        scoped_ptr<BtreeBuilderInterface> builder;

        builder.reset(_interface->getBulkBuilder(_txn, dupsAllowed));

        while (i->more()) {
            // Get the next datum and add it to the builder.
            ExternalSortDatum d = i->next();
            Status status = builder->addKey(d.first, d.second);

            if (!status.isOK()) {
                if (ErrorCodes::DuplicateKey != status.code()) {
                    return status;
                }

                // If we're here it's a duplicate key.
                if (dropDups) {
                    static const size_t kMaxDupsToStore = 1000000;
                    dupsToDrop->insert(d.second);
                    if (dupsToDrop->size() > kMaxDupsToStore) {
                        return Status(ErrorCodes::InternalError,
                                      "Too many dups on index build with dropDups = true");
                    }
                }
                else if (!dupsAllowed) {
                    return status;
                }
            }

            // If we're here either it's a dup and we're cool with it or the addKey went just
            // fine.
            pm.hit();
        }

        pm.finished();

        op->setMessage("Index Bulk Build: (3/3) btree-middle",
                       "Index: (3/3) BTree Middle Progress");

        LOG(timer.seconds() > 10 ? 0 : 1 ) << "\t done building bottom layer, going to commit";

        unsigned long long keysCommit = builder->commit(mayInterrupt);

        if (!dropDups && (keysCommit != _keysInserted)) {
            warning() << "not all entries were added to the index, probably some "
                      << "keys were too large" << endl;
        }
        return Status::OK();
    }
Пример #9
0
    StatusWith<DiskLoc> Collection::updateDocument( const DiskLoc& oldLocation,
                                                    const BSONObj& objNew,
                                                    bool enforceQuota,
                                                    OpDebug* debug ) {

        Record* oldRecord = getExtentManager()->recordFor( oldLocation );
        BSONObj objOld = BSONObj::make( oldRecord );

        if ( objOld.hasElement( "_id" ) ) {
            BSONElement oldId = objOld["_id"];
            BSONElement newId = objNew["_id"];
            if ( oldId != newId )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "in Collection::updateDocument _id mismatch",
                                            13596 );
        }

        if ( ns().coll() == "system.users" ) {
            // XXX - andy and spencer think this should go away now
            V2UserDocumentParser parser;
            Status s = parser.checkValidUserDocument(objNew);
            if ( !s.isOK() )
                return StatusWith<DiskLoc>( s );
        }

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */
        OwnedPointerVector<UpdateTicket> updateTickets;
        updateTickets.mutableVector().resize(_indexCatalog.numIndexesTotal());
        for (int i = 0; i < _indexCatalog.numIndexesTotal(); ++i) {
            IndexDescriptor* descriptor = _indexCatalog.getDescriptor( i );
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique())
                || ignoreUniqueIndex(descriptor);
            updateTickets.mutableVector()[i] = new UpdateTicket();
            Status ret = iam->validateUpdate(objOld, objNew, oldLocation, options,
                                             updateTickets.mutableVector()[i]);
            if ( !ret.isOK() ) {
                return StatusWith<DiskLoc>( ret );
            }
        }

        if ( oldRecord->netLength() < objNew.objsize() ) {
            // doesn't fit, have to move to new location

            if ( _details->isCapped() )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "failing update: objects in a capped ns cannot grow",
                                            10003 );

            moveCounter.increment();
            _details->paddingTooSmall();

            // unindex old record, don't delete
            // this way, if inserting new doc fails, we can re-index this one
            ClientCursor::aboutToDelete(_ns.ns(), _details, oldLocation);
            _indexCatalog.unindexRecord( objOld, oldLocation, true );

            if ( debug ) {
                if (debug->nmoved == -1) // default of -1 rather than 0
                    debug->nmoved = 1;
                else
                    debug->nmoved += 1;
            }

            StatusWith<DiskLoc> loc = insertDocument( objNew, enforceQuota );

            if ( loc.isOK() ) {
                // insert successful, now lets deallocate the old location
                // remember its already unindexed
                _recordStore.deallocRecord( oldLocation, oldRecord );
            }
            else {
                // new doc insert failed, so lets re-index the old document and location
                _indexCatalog.indexRecord( objOld, oldLocation );
            }

            return loc;
        }

        _infoCache.notifyOfWriteOp();
        _details->paddingFits();

        if ( debug )
            debug->keyUpdates = 0;

        for (int i = 0; i < _indexCatalog.numIndexesTotal(); ++i) {
            IndexDescriptor* descriptor = _indexCatalog.getDescriptor( i );
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            int64_t updatedKeys;
            Status ret = iam->update(*updateTickets.vector()[i], &updatedKeys);
            if ( !ret.isOK() )
                return StatusWith<DiskLoc>( ret );
            if ( debug )
                debug->keyUpdates += updatedKeys;
        }

        //  update in place
        int sz = objNew.objsize();
        memcpy(getDur().writingPtr(oldRecord->data(), sz), objNew.objdata(), sz);
        return StatusWith<DiskLoc>( oldLocation );
    }
        void commit( set<DiskLoc>* dupsToDrop,
                     CurOp* op,
                     bool mayInterrupt ) {

            Timer timer;

            IndexCatalogEntry* entry = _real->_btreeState;

            bool dupsAllowed = !entry->descriptor()->unique() ||
                ignoreUniqueIndex(entry->descriptor());
            bool dropDups = entry->descriptor()->dropDups() || inDBRepair;

            BtreeBuilder<V> btBuilder(dupsAllowed, entry);

            BSONObj keyLast;
            scoped_ptr<BSONObjExternalSorter::Iterator> i( _phase1.sorter->iterator() );

            // verifies that pm and op refer to the same ProgressMeter
            ProgressMeter& pm = op->setMessage("Index Bulk Build: (2/3) btree bottom up",
                                               "Index: (2/3) BTree Bottom Up Progress",
                                               _phase1.nkeys,
                                               10);

            while( i->more() ) {
                RARELY if ( mayInterrupt ) killCurrentOp.checkForInterrupt();
                ExternalSortDatum d = i->next();

                try {
                    if ( !dupsAllowed && dropDups ) {
                        LastError::Disabled led( lastError.get() );
                        btBuilder.addKey(d.first, d.second);
                    }
                    else {
                        btBuilder.addKey(d.first, d.second);
                    }
                }
                catch( AssertionException& e ) {
                    if ( dupsAllowed ) {
                        // unknown exception??
                        throw;
                    }

                    if (ErrorCodes::isInterruption(
                            DBException::convertExceptionCode(e.getCode()))) {
                        killCurrentOp.checkForInterrupt();
                    }

                    if ( ! dropDups )
                        throw;

                    /* we could queue these on disk, but normally there are very few dups,
                     * so instead we keep in ram and have a limit.
                    */
                    if ( dupsToDrop ) {
                        dupsToDrop->insert(d.second);
                        uassert( 10092,
                                 "too may dups on index build with dropDups=true",
                                 dupsToDrop->size() < 1000000 );
                    }
                }
                pm.hit();
            }
            pm.finished();
            op->setMessage("Index Bulk Build: (3/3) btree-middle",
                           "Index: (3/3) BTree Middle Progress");
            LOG(timer.seconds() > 10 ? 0 : 1 ) << "\t done building bottom layer, going to commit";
            btBuilder.commit( mayInterrupt );
            if ( btBuilder.getn() != _phase1.nkeys && ! dropDups ) {
                warning() << "not all entries were added to the index, probably some "
                          << "keys were too large" << endl;
            }
        }