Example #1
0
Status Collection::validate(OperationContext* txn,
                            bool full,
                            bool scanData,
                            ValidateResults* results,
                            BSONObjBuilder* output) {
    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IS));

    MyValidateAdaptor adaptor;
    Status status = _recordStore->validate(txn, full, scanData, &adaptor, results, output);
    if (!status.isOK())
        return status;

    {  // indexes
        output->append("nIndexes", _indexCatalog.numIndexesReady(txn));
        int idxn = 0;
        try {
            // Only applicable when 'full' validation is requested.
            std::unique_ptr<BSONObjBuilder> indexDetails(full ? new BSONObjBuilder() : NULL);
            BSONObjBuilder indexes;  // not using subObjStart to be exception safe

            IndexCatalog::IndexIterator i = _indexCatalog.getIndexIterator(txn, false);
            while (i.more()) {
                const IndexDescriptor* descriptor = i.next();
                log(LogComponent::kIndex) << "validating index " << descriptor->indexNamespace()
                                          << endl;
                IndexAccessMethod* iam = _indexCatalog.getIndex(descriptor);
                invariant(iam);

                std::unique_ptr<BSONObjBuilder> bob(
                    indexDetails.get() ? new BSONObjBuilder(indexDetails->subobjStart(
                                             descriptor->indexNamespace()))
                                       : NULL);

                int64_t keys;
                iam->validate(txn, full, &keys, bob.get());
                indexes.appendNumber(descriptor->indexNamespace(), static_cast<long long>(keys));

                if (bob) {
                    BSONObj obj = bob->done();
                    BSONElement valid = obj["valid"];
                    if (valid.ok() && !valid.trueValue()) {
                        results->valid = false;
                    }
                }
                idxn++;
            }

            output->append("keysPerIndex", indexes.done());
            if (indexDetails.get()) {
                output->append("indexDetails", indexDetails->done());
            }
        } catch (DBException& exc) {
            string err = str::stream() << "exception during index validate idxn "
                                       << BSONObjBuilder::numStr(idxn) << ": " << exc.toString();
            results->errors.push_back(err);
            results->valid = false;
        }
    }

    return Status::OK();
}
Example #2
0
        virtual bool run(OperationContext* txn,
                         const string& dbname,
                         BSONObj& cmdObj,
                         int,
                         string& errmsg,
                         BSONObjBuilder& result,
                         bool fromRepl) {
            Lock::GlobalWrite globalWriteLock(txn->lockState());
            string source = cmdObj.getStringField( name.c_str() );
            string target = cmdObj.getStringField( "to" );

            // We stay in source context the whole time. This is mostly to set the CurOp namespace.
            Client::Context ctx(txn, source);

            if ( !NamespaceString::validCollectionComponent(target.c_str()) ) {
                errmsg = "invalid collection name: " + target;
                return false;
            }
            if ( source.empty() || target.empty() ) {
                errmsg = "invalid command syntax";
                return false;
            }

            if (!fromRepl) { // If it got through on the master, need to allow it here too
                Status sourceStatus = userAllowedWriteNS(source);
                if (!sourceStatus.isOK()) {
                    errmsg = "error with source namespace: " + sourceStatus.reason();
                    return false;
                }

                Status targetStatus = userAllowedWriteNS(target);
                if (!targetStatus.isOK()) {
                    errmsg = "error with target namespace: " + targetStatus.reason();
                    return false;
                }
            }

            if (NamespaceString(source).coll() == "system.indexes"
                || NamespaceString(target).coll() == "system.indexes") {
                errmsg = "renaming system.indexes is not allowed";
                return false;
            }

            Database* const sourceDB = dbHolder().get(txn, nsToDatabase(source));
            Collection* const sourceColl = sourceDB ? sourceDB->getCollection(txn, source)
                                                    : NULL;
            if (!sourceColl) {
                errmsg = "source namespace does not exist";
                return false;
            }

            {
                // Ensure that collection name does not exceed maximum length.
                // Ensure that index names do not push the length over the max.
                // Iterator includes unfinished indexes.
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( txn, true );
                int longestIndexNameLength = 0;
                while ( sourceIndIt.more() ) {
                    int thisLength = sourceIndIt.next()->indexName().length();
                    if ( thisLength > longestIndexNameLength )
                        longestIndexNameLength = thisLength;
                }

                unsigned int longestAllowed =
                    min(int(NamespaceString::MaxNsCollectionLen),
                        int(NamespaceString::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength);
                if (target.size() > longestAllowed) {
                    StringBuilder sb;
                    sb << "collection name length of " << target.size()
                       << " exceeds maximum length of " << longestAllowed
                       << ", allowing for index names";
                    errmsg = sb.str();
                    return false;
                }
            }

            const std::vector<BSONObj> indexesInProg = stopIndexBuilds(txn, sourceDB, cmdObj);
            // Dismissed on success
            ScopeGuard indexBuildRestorer = MakeGuard(IndexBuilder::restoreIndexes, indexesInProg);

            Database* const targetDB = dbHolder().openDb(txn, nsToDatabase(target));

            {
                WriteUnitOfWork wunit(txn);

                // Check if the target namespace exists and if dropTarget is true.
                // If target exists and dropTarget is not true, return false.
                if (targetDB->getCollection(txn, target)) {
                    if (!cmdObj["dropTarget"].trueValue()) {
                        errmsg = "target namespace exists";
                        return false;
                    }

                    Status s = targetDB->dropCollection(txn, target);
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        return false;
                    }
                }

                // If we are renaming in the same database, just
                // rename the namespace and we're done.
                if (sourceDB == targetDB) {
                    Status s = targetDB->renameCollection(txn,
                                                          source,
                                                          target,
                                                          cmdObj["stayTemp"].trueValue() );
                    if (!s.isOK()) {
                        return appendCommandStatus(result, s);
                    }

                    if (!fromRepl) {
                        repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj);
                    }

                    wunit.commit();
                    indexBuildRestorer.Dismiss();
                    return true;
                }

                wunit.commit();
            }

            // If we get here, we are renaming across databases, so we must copy all the data and
            // indexes, then remove the source collection.

            // Create the target collection. It will be removed if we fail to copy the collection.
            // TODO use a temp collection and unset the temp flag on success.
            Collection* targetColl = NULL;
            {
                CollectionOptions options;
                options.setNoIdIndex();

                if (sourceColl->isCapped()) {
                    const CollectionOptions sourceOpts =
                        sourceColl->getCatalogEntry()->getCollectionOptions(txn);

                    options.capped = true;
                    options.cappedSize = sourceOpts.cappedSize;
                    options.cappedMaxDocs = sourceOpts.cappedMaxDocs;
                }

                WriteUnitOfWork wunit(txn);

                // No logOp necessary because the entire renameCollection command is one logOp.
                targetColl = targetDB->createCollection(txn, target, options);
                if (!targetColl) {
                    errmsg = "Failed to create target collection.";
                    return false;
                }

                wunit.commit();
            }

            // Dismissed on success
            ScopeGuard targetCollectionDropper = MakeGuard(dropCollection, txn, targetDB, target);

            MultiIndexBlock indexer(txn, targetColl);
            indexer.allowInterruption();

            // Copy the index descriptions from the source collection, adjusting the ns field.
            {
                std::vector<BSONObj> indexesToCopy;
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( txn, true );
                while (sourceIndIt.more()) {
                    const BSONObj currIndex = sourceIndIt.next()->infoObj();

                    // Process the source index.
                    BSONObjBuilder newIndex;
                    newIndex.append("ns", target);
                    newIndex.appendElementsUnique(currIndex);
                    indexesToCopy.push_back(newIndex.obj());
                }
                indexer.init(indexesToCopy);
            }

            {
                // Copy over all the data from source collection to target collection.
                boost::scoped_ptr<RecordIterator> sourceIt(sourceColl->getIterator(txn));
                while (!sourceIt->isEOF()) {
                    txn->checkForInterrupt(false);

                    const BSONObj obj = sourceColl->docFor(txn, sourceIt->getNext());

                    WriteUnitOfWork wunit(txn);
                    // No logOp necessary because the entire renameCollection command is one logOp.
                    Status status = targetColl->insertDocument(txn, obj, &indexer, true).getStatus();
                    if (!status.isOK())
                        return appendCommandStatus(result, status);
                    wunit.commit();
                }
            }

            Status status = indexer.doneInserting();
            if (!status.isOK())
                return appendCommandStatus(result, status);

            {
                // Getting here means we successfully built the target copy. We now remove the
                // source collection and finalize the rename.
                WriteUnitOfWork wunit(txn);

                Status status = sourceDB->dropCollection(txn, source);
                if (!status.isOK())
                    return appendCommandStatus(result, status);

                indexer.commit();

                if (!fromRepl) {
                    repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj);
                }

                wunit.commit();
            }

            indexBuildRestorer.Dismiss();
            targetCollectionDropper.Dismiss();
            return true;
        }
Example #3
0
    StatusWith<DiskLoc> Collection::updateDocument( OperationContext* txn,
                                                    const DiskLoc& oldLocation,
                                                    const BSONObj& objNew,
                                                    bool enforceQuota,
                                                    OpDebug* debug ) {

        BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).toBson();

        if ( objOld.hasElement( "_id" ) ) {
            BSONElement oldId = objOld["_id"];
            BSONElement newId = objNew["_id"];
            if ( oldId != newId )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "in Collection::updateDocument _id mismatch",
                                            13596 );
        }

        if ( ns().coll() == "system.users" ) {
            // XXX - andy and spencer think this should go away now
            V2UserDocumentParser parser;
            Status s = parser.checkValidUserDocument(objNew);
            if ( !s.isOK() )
                return StatusWith<DiskLoc>( s );
        }

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */
        OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets;
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique())
                || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket );
            if ( !ret.isOK() ) {
                return StatusWith<DiskLoc>( ret );
            }
        }

        // this can callback into Collection::recordStoreGoingToMove
        StatusWith<DiskLoc> newLocation = _recordStore->updateRecord( txn,
                                                                      oldLocation,
                                                                      objNew.objdata(),
                                                                      objNew.objsize(),
                                                                      _enforceQuota( enforceQuota ),
                                                                      this );

        if ( !newLocation.isOK() ) {
            return newLocation;
        }

        _infoCache.notifyOfWriteOp();

        if ( newLocation.getValue() != oldLocation ) {

            if ( debug ) {
                if (debug->nmoved == -1) // default of -1 rather than 0
                    debug->nmoved = 1;
                else
                    debug->nmoved += 1;
            }

            _indexCatalog.indexRecord(txn, objNew, newLocation.getValue());

            return newLocation;
        }

        if ( debug )
            debug->keyUpdates = 0;

        ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if ( !ret.isOK() )
                return StatusWith<DiskLoc>( ret );
            if ( debug )
                debug->keyUpdates += updatedKeys;
        }

        // Broadcast the mutation so that query results stay correct.
        _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION);

        return newLocation;
    }
Example #4
0
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn,
                                                const RecordId& oldLocation,
                                                const Snapshotted<BSONObj>& oldDoc,
                                                const BSONObj& newDoc,
                                                bool enforceQuota,
                                                bool indexesAffected,
                                                OpDebug* debug,
                                                oplogUpdateEntryArgs& args) {
    {
        auto status = checkValidation(txn, newDoc);
        if (!status.isOK()) {
            if (_validationLevel == STRICT_V) {
                return status;
            }
            // moderate means we have to check the old doc
            auto oldDocStatus = checkValidation(txn, oldDoc.value());
            if (oldDocStatus.isOK()) {
                // transitioning from good -> bad is not ok
                return status;
            }
            // bad -> bad is ok in moderate mode
        }
    }

    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));
    invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId());

    SnapshotId sid = txn->recoveryUnit()->getSnapshotId();

    BSONElement oldId = oldDoc.value()["_id"];
    if (!oldId.eoo() && (oldId != newDoc["_id"]))
        return StatusWith<RecordId>(
            ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596);

    // At the end of this step, we will have a map of UpdateTickets, one per index, which
    // represent the index updates needed to be done, based on the changes between oldDoc and
    // newDoc.
    OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets;
    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexCatalogEntry* entry = ii.catalogEntry(descriptor);
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) ||
                repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn,
                                             oldDoc.value(),
                                             newDoc,
                                             oldLocation,
                                             options,
                                             updateTicket,
                                             entry->getFilterExpression());
            if (!ret.isOK()) {
                return StatusWith<RecordId>(ret);
            }
        }
    }

    // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
    // object is removed from all indexes.
    StatusWith<RecordId> newLocation = _recordStore->updateRecord(
        txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this);

    if (!newLocation.isOK()) {
        return newLocation;
    }

    // At this point, the old object may or may not still be indexed, depending on if it was
    // moved. If the object did move, we need to add the new location to all indexes.
    if (newLocation.getValue() != oldLocation) {
        if (debug) {
            if (debug->nmoved == -1)  // default of -1 rather than 0
                debug->nmoved = 1;
            else
                debug->nmoved += 1;
        }

        Status s = _indexCatalog.indexRecord(txn, newDoc, newLocation.getValue());
        if (!s.isOK())
            return StatusWith<RecordId>(s);
        invariant(sid == txn->recoveryUnit()->getSnapshotId());
        args.ns = ns().ns();
        getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

        return newLocation;
    }

    // Object did not move.  We update each index with each respective UpdateTicket.

    if (debug)
        debug->keyUpdates = 0;

    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if (!ret.isOK())
                return StatusWith<RecordId>(ret);
            if (debug)
                debug->keyUpdates += updatedKeys;
        }
    }

    invariant(sid == txn->recoveryUnit()->getSnapshotId());
    args.ns = ns().ns();
    getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

    return newLocation;
}
Example #5
0
    void fillOutPlannerParams(Collection* collection,
                              CanonicalQuery* canonicalQuery,
                              QueryPlannerParams* plannerParams) {
        // If it's not NULL, we may have indices.  Access the catalog and fill out IndexEntry(s)
        IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false);
        while (ii.more()) {
            const IndexDescriptor* desc = ii.next();
            plannerParams->indices.push_back(IndexEntry(desc->keyPattern(),
                                                        desc->getAccessMethodName(),
                                                        desc->isMultikey(),
                                                        desc->isSparse(),
                                                        desc->indexName(),
                                                        desc->infoObj()));
        }

        // If query supports index filters, filter params.indices by indices in query settings.
        QuerySettings* querySettings = collection->infoCache()->getQuerySettings();
        AllowedIndices* allowedIndicesRaw;

        // Filter index catalog if index filters are specified for query.
        // Also, signal to planner that application hint should be ignored.
        if (querySettings->getAllowedIndices(*canonicalQuery, &allowedIndicesRaw)) {
            boost::scoped_ptr<AllowedIndices> allowedIndices(allowedIndicesRaw);
            filterAllowedIndexEntries(*allowedIndices, &plannerParams->indices);
            plannerParams->indexFiltersApplied = true;
        }

        // We will not output collection scans unless there are no indexed solutions. NO_TABLE_SCAN
        // overrides this behavior by not outputting a collscan even if there are no indexed
        // solutions.
        if (storageGlobalParams.noTableScan) {
            const string& ns = canonicalQuery->ns();
            // There are certain cases where we ignore this restriction:
            bool ignore = canonicalQuery->getQueryObj().isEmpty()
                          || (string::npos != ns.find(".system."))
                          || (0 == ns.find("local."));
            if (!ignore) {
                plannerParams->options |= QueryPlannerParams::NO_TABLE_SCAN;
            }
        }

        // If the caller wants a shard filter, make sure we're actually sharded.
        if (plannerParams->options & QueryPlannerParams::INCLUDE_SHARD_FILTER) {
            CollectionMetadataPtr collMetadata =
                shardingState.getCollectionMetadata(canonicalQuery->ns());

            if (collMetadata) {
                plannerParams->shardKey = collMetadata->getKeyPattern();
            }
            else {
                // If there's no metadata don't bother w/the shard filter since we won't know what
                // the key pattern is anyway...
                plannerParams->options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER;
            }
        }

        if (internalQueryPlannerEnableIndexIntersection) {
            plannerParams->options |= QueryPlannerParams::INDEX_INTERSECTION;
        }

        plannerParams->options |= QueryPlannerParams::KEEP_MUTATIONS;
        plannerParams->options |= QueryPlannerParams::SPLIT_LIMITED_SORT;
    }
Example #6
0
    Status getRunnerDistinct(Collection* collection,
                             const BSONObj& query,
                             const string& field,
                             Runner** out) {
        // This should'a been checked by the distinct command.
        verify(collection);

        // TODO: check for idhack here?

        // When can we do a fast distinct hack?
        // 1. There is a plan with just one leaf and that leaf is an ixscan.
        // 2. The ixscan indexes the field we're interested in.
        // 2a: We are correct if the index contains the field but for now we look for prefix.
        // 3. The query is covered/no fetch.
        //
        // We go through normal planning (with limited parameters) to see if we can produce
        // a soln with the above properties.

        QueryPlannerParams plannerParams;
        plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN;

        IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false);
        while (ii.more()) {
            const IndexDescriptor* desc = ii.next();
            // The distinct hack can work if any field is in the index but it's not always clear
            // if it's a win unless it's the first field.
            if (desc->keyPattern().firstElement().fieldName() == field) {
                plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
                                                           desc->getAccessMethodName(),
                                                           desc->isMultikey(),
                                                           desc->isSparse(),
                                                           desc->indexName(),
                                                           desc->infoObj()));
            }
        }

        // If there are no suitable indices for the distinct hack bail out now into regular planning
        // with no projection.
        if (plannerParams.indices.empty()) {
            CanonicalQuery* cq;
            Status status = CanonicalQuery::canonicalize(collection->ns().ns(),
                                                         query,
                                                         BSONObj(),
                                                         BSONObj(),
                                                         &cq);
            if (!status.isOK()) {
                return status;
            }

            // Takes ownership of cq.
            return getRunner(collection, cq, out);
        }

        //
        // If we're here, we have an index prefixed by the field we're distinct-ing over.
        //

        // Applying a projection allows the planner to try to give us covered plans that we can turn
        // into the projection hack.  getDistinctProjection deals with .find() projection semantics
        // (ie _id:1 being implied by default).
        BSONObj projection = getDistinctProjection(field);

        // Apply a projection of the key.  Empty BSONObj() is for the sort.
        CanonicalQuery* cq;
        Status status = CanonicalQuery::canonicalize(collection->ns().ns(),
                                                     query,
                                                     BSONObj(),
                                                     projection,
                                                     &cq);
        if (!status.isOK()) {
            return status;
        }

        // If there's no query, we can just distinct-scan one of the indices.
        // Not every index in plannerParams.indices may be suitable. Refer to
        // getDistinctNodeIndex().
        size_t distinctNodeIndex = 0;
        if (query.isEmpty() &&
            getDistinctNodeIndex(plannerParams.indices, field, &distinctNodeIndex)) {
            DistinctNode* dn = new DistinctNode();
            dn->indexKeyPattern = plannerParams.indices[distinctNodeIndex].keyPattern;
            dn->direction = 1;
            IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds);
            dn->fieldNo = 0;

            QueryPlannerParams params;

            // Takes ownership of 'dn'.
            QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn);
            verify(soln);

            LOG(2) << "Using fast distinct: " << cq->toStringShort()
                   << ", planSummary: " << getPlanSummary(*soln);

            WorkingSet* ws;
            PlanStage* root;
            verify(StageBuilder::build(collection, *soln, &root, &ws));
            *out = new SingleSolutionRunner(collection, cq, soln, root, ws);
            return Status::OK();
        }

        // See if we can answer the query in a fast-distinct compatible fashion.
        vector<QuerySolution*> solutions;
        status = QueryPlanner::plan(*cq, plannerParams, &solutions);
        if (!status.isOK()) {
            return getRunner(collection, cq, out);
        }

        // We look for a solution that has an ixscan we can turn into a distinctixscan
        for (size_t i = 0; i < solutions.size(); ++i) {
            if (turnIxscanIntoDistinctIxscan(solutions[i], field)) {
                // Great, we can use solutions[i].  Clean up the other QuerySolution(s).
                for (size_t j = 0; j < solutions.size(); ++j) {
                    if (j != i) {
                        delete solutions[j];
                    }
                }

                LOG(2) << "Using fast distinct: " << cq->toStringShort()
                       << ", planSummary: " << getPlanSummary(*solutions[i]);

                // Build and return the SSR over solutions[i].
                WorkingSet* ws;
                PlanStage* root;
                verify(StageBuilder::build(collection, *solutions[i], &root, &ws));
                *out = new SingleSolutionRunner(collection, cq, solutions[i], root, ws);
                return Status::OK();
            }
        }

        // If we're here, the planner made a soln with the restricted index set but we couldn't
        // translate any of them into a distinct-compatible soln.  So, delete the solutions and just
        // go through normal planning.
        for (size_t i = 0; i < solutions.size(); ++i) {
            delete solutions[i];
        }

        // We drop the projection from the 'cq'.  Unfortunately this is not trivial.
        delete cq;
        status = CanonicalQuery::canonicalize(collection->ns().ns(),
                                              query,
                                              BSONObj(),
                                              BSONObj(),
                                              &cq);
        if (!status.isOK()) {
            return status;
        }

        // Takes ownership of cq.
        return getRunner(collection, cq, out);
    }
    Status MMAPV1Engine::repairDatabase( OperationContext* txn,
                                         const std::string& dbName,
                                         bool preserveClonedFilesOnFailure,
                                         bool backupOriginalFiles ) {
        // We must hold some form of lock here
        invariant(txn->lockState()->threadState());
        invariant( dbName.find( '.' ) == string::npos );

        scoped_ptr<RepairFileDeleter> repairFileDeleter;

        log() << "repairDatabase " << dbName << endl;

        BackgroundOperation::assertNoBgOpInProgForDb(dbName);

        txn->recoveryUnit()->syncDataAndTruncateJournal(); // Must be done before and after repair

        intmax_t totalSize = dbSize( dbName );
        intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);

        if ( freeSize > -1 && freeSize < totalSize ) {
            return Status( ErrorCodes::OutOfDiskSpace,
                           str::stream() << "Cannot repair database " << dbName
                           << " having size: " << totalSize
                           << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
        }

        txn->checkForInterrupt();

        Path reservedPath =
            uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
                                "backup" : "_tmp" );
        MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) );
        string reservedPathString = reservedPath.string();

        if ( !preserveClonedFilesOnFailure )
            repairFileDeleter.reset( new RepairFileDeleter( txn,
                                                            dbName,
                                                            reservedPathString,
                                                            reservedPath ) );

        {
            Database* originalDatabase =
                            dbHolder().get(txn, dbName);
            if (originalDatabase == NULL) {
                return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair");
            }

            scoped_ptr<MMAPV1DatabaseCatalogEntry> dbEntry;
            scoped_ptr<Database> tempDatabase;
            {
                dbEntry.reset( new MMAPV1DatabaseCatalogEntry( txn,
                                                               dbName,
                                                               reservedPathString,
                                                               storageGlobalParams.directoryperdb,
                                                               true ) );
                invariant( !dbEntry->exists() );
                tempDatabase.reset( new Database( txn,
                                                  dbName,
                                                  dbEntry.get() ) );

            }

            map<string,CollectionOptions> namespacesToCopy;
            {
                string ns = dbName + ".system.namespaces";
                Client::Context ctx(txn,  ns );
                Collection* coll = originalDatabase->getCollection( txn, ns );
                if ( coll ) {
                    scoped_ptr<RecordIterator> it( coll->getIterator( txn,
                                                                      DiskLoc(),
                                                                      false,
                                                                      CollectionScanParams::FORWARD ) );
                    while ( !it->isEOF() ) {
                        DiskLoc loc = it->getNext();
                        BSONObj obj = coll->docFor( loc );

                        string ns = obj["name"].String();

                        NamespaceString nss( ns );
                        if ( nss.isSystem() ) {
                            if ( nss.isSystemDotIndexes() )
                                continue;
                            if ( nss.coll() == "system.namespaces" )
                                continue;
                        }

                        if ( !nss.isNormal() )
                            continue;

                        CollectionOptions options;
                        if ( obj["options"].isABSONObj() ) {
                            Status status = options.parse( obj["options"].Obj() );
                            if ( !status.isOK() )
                                return status;
                        }
                        namespacesToCopy[ns] = options;
                    }
                }
            }

            for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
                  i != namespacesToCopy.end();
                  ++i ) {
                string ns = i->first;
                CollectionOptions options = i->second;

                Collection* tempCollection = NULL;
                {
                    Client::Context tempContext(txn, ns, tempDatabase );
                    WriteUnitOfWork wunit(txn);
                    tempCollection = tempDatabase->createCollection(txn, ns, options, true, false);
                    wunit.commit();
                }

                Client::Context readContext(txn, ns, originalDatabase);
                Collection* originalCollection = originalDatabase->getCollection( txn, ns );
                invariant( originalCollection );

                // data

                // TODO SERVER-14812 add a mode that drops duplicates rather than failing
                MultiIndexBlock indexer(txn, tempCollection );
                {
                    vector<BSONObj> indexes;
                    IndexCatalog::IndexIterator ii =
                        originalCollection->getIndexCatalog()->getIndexIterator( false );
                    while ( ii.more() ) {
                        IndexDescriptor* desc = ii.next();
                        indexes.push_back( desc->infoObj() );
                    }

                    Client::Context tempContext(txn, ns, tempDatabase);
                    Status status = indexer.init( indexes );
                    if ( !status.isOK() )
                        return status;
                }

                scoped_ptr<RecordIterator> iterator(
                    originalCollection->getIterator( txn, DiskLoc(), false,
                                                     CollectionScanParams::FORWARD ));
                while ( !iterator->isEOF() ) {
                    DiskLoc loc = iterator->getNext();
                    invariant( !loc.isNull() );

                    BSONObj doc = originalCollection->docFor( loc );

                    Client::Context tempContext(txn, ns, tempDatabase);
                    
                    WriteUnitOfWork wunit(txn);
                    StatusWith<DiskLoc> result = tempCollection->insertDocument(txn,
                                                                                doc,
                                                                                &indexer,
                                                                                false);
                    if ( !result.isOK() )
                        return result.getStatus();

                    wunit.commit();
                    txn->checkForInterrupt(false);
                }
                
                Status status = indexer.doneInserting();
                if (!status.isOK())
                    return status;

                {
                    Client::Context tempContext(txn, ns, tempDatabase);
                    WriteUnitOfWork wunit(txn);
                    indexer.commit();
                    wunit.commit();
                }

            }

            txn->recoveryUnit()->syncDataAndTruncateJournal();
            globalStorageEngine->flushAllFiles(true); // need both in case journaling is disabled

            txn->checkForInterrupt(false);
        }

        // at this point if we abort, we don't want to delete new files
        // as they might be the only copies

        if ( repairFileDeleter.get() )
            repairFileDeleter->success();

        dbHolder().close( txn, dbName );

        if ( backupOriginalFiles ) {
            _renameForBackup( dbName, reservedPath );
        }
        else {
            // first make new directory before deleting data
            Path newDir = Path(storageGlobalParams.dbpath) / dbName;
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));

            // this deletes old files
            _deleteDataFiles( dbName );

            if ( !boost::filesystem::exists(newDir) ) {
                // we deleted because of directoryperdb
                // re-create
                MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
            }
        }

        _replaceWithRecovered( dbName, reservedPathString.c_str() );

        if ( !backupOriginalFiles )
            MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) );

        return Status::OK();
    }
Status renameCollection(OperationContext* txn,
                        const NamespaceString& source,
                        const NamespaceString& target,
                        bool dropTarget,
                        bool stayTemp) {
    DisableDocumentValidation validationDisabler(txn);

    ScopedTransaction transaction(txn, MODE_X);
    Lock::GlobalWrite globalWriteLock(txn->lockState());
    // We stay in source context the whole time. This is mostly to set the CurOp namespace.
    OldClientContext ctx(txn, source.ns());

    bool userInitiatedWritesAndNotPrimary = txn->writesAreReplicated() &&
        !repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(source);

    if (userInitiatedWritesAndNotPrimary) {
        return Status(ErrorCodes::NotMaster,
                      str::stream() << "Not primary while renaming collection " << source.ns()
                                    << " to "
                                    << target.ns());
    }

    Database* const sourceDB = dbHolder().get(txn, source.db());
    Collection* const sourceColl = sourceDB ? sourceDB->getCollection(source.ns()) : nullptr;
    if (!sourceColl) {
        return Status(ErrorCodes::NamespaceNotFound, "source namespace does not exist");
    }

    {
        // Ensure that collection name does not exceed maximum length.
        // Ensure that index names do not push the length over the max.
        // Iterator includes unfinished indexes.
        IndexCatalog::IndexIterator sourceIndIt =
            sourceColl->getIndexCatalog()->getIndexIterator(txn, true);
        int longestIndexNameLength = 0;
        while (sourceIndIt.more()) {
            int thisLength = sourceIndIt.next()->indexName().length();
            if (thisLength > longestIndexNameLength)
                longestIndexNameLength = thisLength;
        }

        unsigned int longestAllowed =
            std::min(int(NamespaceString::MaxNsCollectionLen),
                     int(NamespaceString::MaxNsLen) - 2 /*strlen(".$")*/ - longestIndexNameLength);
        if (target.size() > longestAllowed) {
            StringBuilder sb;
            sb << "collection name length of " << target.size() << " exceeds maximum length of "
               << longestAllowed << ", allowing for index names";
            return Status(ErrorCodes::InvalidLength, sb.str());
        }
    }

    BackgroundOperation::assertNoBgOpInProgForNs(source.ns());

    Database* const targetDB = dbHolder().openDb(txn, target.db());

    {
        WriteUnitOfWork wunit(txn);

        // Check if the target namespace exists and if dropTarget is true.
        // If target exists and dropTarget is not true, return false.
        if (targetDB->getCollection(target)) {
            if (!dropTarget) {
                return Status(ErrorCodes::NamespaceExists, "target namespace exists");
            }

            Status s = targetDB->dropCollection(txn, target.ns());
            if (!s.isOK()) {
                return s;
            }
        }

        // If we are renaming in the same database, just
        // rename the namespace and we're done.
        if (sourceDB == targetDB) {
            Status s = targetDB->renameCollection(txn, source.ns(), target.ns(), stayTemp);
            if (!s.isOK()) {
                return s;
            }

            getGlobalServiceContext()->getOpObserver()->onRenameCollection(
                txn, NamespaceString(source), NamespaceString(target), dropTarget, stayTemp);

            wunit.commit();
            return Status::OK();
        }

        wunit.commit();
    }

    // If we get here, we are renaming across databases, so we must copy all the data and
    // indexes, then remove the source collection.

    // Create the target collection. It will be removed if we fail to copy the collection.
    // TODO use a temp collection and unset the temp flag on success.
    Collection* targetColl = nullptr;
    {
        CollectionOptions options = sourceColl->getCatalogEntry()->getCollectionOptions(txn);

        WriteUnitOfWork wunit(txn);

        // No logOp necessary because the entire renameCollection command is one logOp.
        bool shouldReplicateWrites = txn->writesAreReplicated();
        txn->setReplicatedWrites(false);
        targetColl = targetDB->createCollection(txn,
                                                target.ns(),
                                                options,
                                                false);  // _id index build with others later.
        txn->setReplicatedWrites(shouldReplicateWrites);
        if (!targetColl) {
            return Status(ErrorCodes::OutOfDiskSpace, "Failed to create target collection.");
        }

        wunit.commit();
    }

    // Dismissed on success
    ScopeGuard targetCollectionDropper = MakeGuard(dropCollection, txn, targetDB, target.ns());

    MultiIndexBlock indexer(txn, targetColl);
    indexer.allowInterruption();

    // Copy the index descriptions from the source collection, adjusting the ns field.
    {
        std::vector<BSONObj> indexesToCopy;
        IndexCatalog::IndexIterator sourceIndIt =
            sourceColl->getIndexCatalog()->getIndexIterator(txn, true);
        while (sourceIndIt.more()) {
            const BSONObj currIndex = sourceIndIt.next()->infoObj();

            // Process the source index.
            BSONObjBuilder newIndex;
            newIndex.append("ns", target.ns());
            newIndex.appendElementsUnique(currIndex);
            indexesToCopy.push_back(newIndex.obj());
        }
        indexer.init(indexesToCopy);
    }

    {
        // Copy over all the data from source collection to target collection.
        auto cursor = sourceColl->getCursor(txn);
        while (auto record = cursor->next()) {
            txn->checkForInterrupt();

            const auto obj = record->data.releaseToBson();

            WriteUnitOfWork wunit(txn);
            // No logOp necessary because the entire renameCollection command is one logOp.
            bool shouldReplicateWrites = txn->writesAreReplicated();
            txn->setReplicatedWrites(false);
            Status status = targetColl->insertDocument(txn, obj, &indexer, true);
            txn->setReplicatedWrites(shouldReplicateWrites);
            if (!status.isOK())
                return status;
            wunit.commit();
        }
    }

    Status status = indexer.doneInserting();
    if (!status.isOK())
        return status;

    {
        // Getting here means we successfully built the target copy. We now remove the
        // source collection and finalize the rename.
        WriteUnitOfWork wunit(txn);

        bool shouldReplicateWrites = txn->writesAreReplicated();
        txn->setReplicatedWrites(false);
        Status status = sourceDB->dropCollection(txn, source.ns());
        txn->setReplicatedWrites(shouldReplicateWrites);
        if (!status.isOK())
            return status;

        indexer.commit();

        getGlobalServiceContext()->getOpObserver()->onRenameCollection(
            txn, NamespaceString(source), NamespaceString(target), dropTarget, stayTemp);

        wunit.commit();
    }

    targetCollectionDropper.Dismiss();
    return Status::OK();
}
Example #9
0
Status getRunnerDistinct(Collection* collection,
                         const BSONObj& query,
                         const string& field,
                         Runner** out) {

    Database* db = cc().database();
    verify(db);

    // This should'a been checked by the distinct command.
    verify(collection);

    // TODO: check for idhack here?

    // When can we do a fast distinct hack?
    // 1. There is a plan with just one leaf and that leaf is an ixscan.
    // 2. The ixscan indexes the field we're interested in.
    // 2a: We are correct if the index contains the field but for now we look for prefix.
    // 3. The query is covered/no fetch.
    //
    // We go through normal planning (with limited parameters) to see if we can produce
    // a soln with the above properties.

    QueryPlannerParams plannerParams;
    plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN;

    IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false);
    while (ii.more()) {
        const IndexDescriptor* desc = ii.next();
        // The distinct hack can work if any field is in the index but it's not always clear
        // if it's a win unless it's the first field.
        if (desc->keyPattern().firstElement().fieldName() == field) {
            plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
                                            desc->isMultikey(),
                                            desc->isSparse(),
                                            desc->indexName(),
                                            desc->infoObj()));
        }
    }

    // We only care about the field that we're projecting over.  Have to drop the _id field
    // explicitly because those are .find() semantics.
    //
    // Applying a projection allows the planner to try to give us covered plans.
    BSONObj projection;
    if ("_id" == field) {
        projection = BSON("_id" << 1);
    }
    else {
        projection = BSON("_id" << 0 << field << 1);
    }

    // Apply a projection of the key.  Empty BSONObj() is for the sort.
    CanonicalQuery* cq;
    Status status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), projection, &cq);
    if (!status.isOK()) {
        return status;
    }

    // No index has the field we're looking for.  Punt to normal planning.
    if (plannerParams.indices.empty()) {
        // Takes ownership of cq.
        return getRunner(cq, out);
    }

    // If we're here, we have an index prefixed by the field we're distinct-ing over.

    // If there's no query, we can just distinct-scan one of the indices.
    if (query.isEmpty()) {
        DistinctNode* dn = new DistinctNode();
        dn->indexKeyPattern = plannerParams.indices[0].keyPattern;
        dn->direction = 1;
        IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds);
        dn->fieldNo = 0;

        QueryPlannerParams params;

        // Takes ownership of 'dn'.
        QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn);
        verify(soln);

        WorkingSet* ws;
        PlanStage* root;
        verify(StageBuilder::build(*soln, &root, &ws));
        *out = new SingleSolutionRunner(cq, soln, root, ws);
        return Status::OK();
    }

    // See if we can answer the query in a fast-distinct compatible fashion.
    vector<QuerySolution*> solutions;
    status = QueryPlanner::plan(*cq, plannerParams, &solutions);
    if (!status.isOK()) {
        return getRunner(cq, out);
    }

    // XXX: why do we need to do this?  planner should prob do this internally
    cq->root()->resetTag();

    // We look for a solution that has an ixscan we can turn into a distinctixscan
    for (size_t i = 0; i < solutions.size(); ++i) {
        if (turnIxscanIntoDistinctIxscan(solutions[i], field)) {
            // Great, we can use solutions[i].  Clean up the other QuerySolution(s).
            for (size_t j = 0; j < solutions.size(); ++j) {
                if (j != i) {
                    delete solutions[j];
                }
            }

            // Build and return the SSR over solutions[i].
            WorkingSet* ws;
            PlanStage* root;
            verify(StageBuilder::build(*solutions[i], &root, &ws));
            *out = new SingleSolutionRunner(cq, solutions[i], root, ws);
            return Status::OK();
        }
    }

    // If we're here, the planner made a soln with the restricted index set but we couldn't
    // translate any of them into a distinct-compatible soln.  So, delete the solutions and just
    // go through normal planning.
    for (size_t i = 0; i < solutions.size(); ++i) {
        delete solutions[i];
    }

    return getRunner(cq, out);
}
Example #10
0
    Status repairDatabase( string dbName,
                           bool preserveClonedFilesOnFailure,
                           bool backupOriginalFiles ) {
        scoped_ptr<RepairFileDeleter> repairFileDeleter;
        doingRepair dr;
        dbName = nsToDatabase( dbName );

        log() << "repairDatabase " << dbName << endl;

        invariant( cc().database()->name() == dbName );
        invariant( cc().database()->path() == storageGlobalParams.dbpath );

        BackgroundOperation::assertNoBgOpInProgForDb(dbName);

        getDur().syncDataAndTruncateJournal(); // Must be done before and after repair

        intmax_t totalSize = dbSize( dbName );
        intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);

        if ( freeSize > -1 && freeSize < totalSize ) {
            return Status( ErrorCodes::OutOfDiskSpace,
                           str::stream() << "Cannot repair database " << dbName
                           << " having size: " << totalSize
                           << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
        }

        killCurrentOp.checkForInterrupt();

        Path reservedPath =
            uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
                                "backup" : "_tmp" );
        MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) );
        string reservedPathString = reservedPath.string();

        if ( !preserveClonedFilesOnFailure )
            repairFileDeleter.reset( new RepairFileDeleter( dbName,
                                                            reservedPathString,
                                                            reservedPath ) );

        {
            Database* originalDatabase = dbHolder().get( dbName, storageGlobalParams.dbpath );
            if ( originalDatabase == NULL )
                return Status( ErrorCodes::NamespaceNotFound, "database does not exist to repair" );

            Database* tempDatabase = NULL;
            {
                bool justCreated = false;
                tempDatabase = dbHolderW().getOrCreate( dbName, reservedPathString, justCreated );
                invariant( justCreated );
            }

            map<string,CollectionOptions> namespacesToCopy;
            {
                string ns = dbName + ".system.namespaces";
                Client::Context ctx( ns );
                Collection* coll = originalDatabase->getCollection( ns );
                if ( coll ) {
                    scoped_ptr<CollectionIterator> it( coll->getIterator( DiskLoc(),
                                                                          false,
                                                                          CollectionScanParams::FORWARD ) );
                    while ( !it->isEOF() ) {
                        DiskLoc loc = it->getNext();
                        BSONObj obj = coll->docFor( loc );

                        string ns = obj["name"].String();

                        NamespaceString nss( ns );
                        if ( nss.isSystem() ) {
                            if ( nss.isSystemDotIndexes() )
                                continue;
                            if ( nss.coll() == "system.namespaces" )
                                continue;
                        }

                        if ( !nss.isNormal() )
                            continue;

                        CollectionOptions options;
                        if ( obj["options"].isABSONObj() ) {
                            Status status = options.parse( obj["options"].Obj() );
                            if ( !status.isOK() )
                                return status;
                        }
                        namespacesToCopy[ns] = options;
                    }
                }
            }

            for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
                  i != namespacesToCopy.end();
                  ++i ) {
                string ns = i->first;
                CollectionOptions options = i->second;

                Collection* tempCollection = NULL;
                {
                    Client::Context tempContext( ns, tempDatabase );
                    tempCollection = tempDatabase->createCollection( ns, options, true, false );
                }

                Client::Context readContext( ns, originalDatabase );
                Collection* originalCollection = originalDatabase->getCollection( ns );
                invariant( originalCollection );

                // data

                MultiIndexBlock indexBlock( tempCollection );
                {
                    vector<BSONObj> indexes;
                    IndexCatalog::IndexIterator ii =
                        originalCollection->getIndexCatalog()->getIndexIterator( false );
                    while ( ii.more() ) {
                        IndexDescriptor* desc = ii.next();
                        indexes.push_back( desc->infoObj() );
                    }

                    Client::Context tempContext( ns, tempDatabase );
                    Status status = indexBlock.init( indexes );
                    if ( !status.isOK() )
                        return status;

                }

                scoped_ptr<CollectionIterator> iterator( originalCollection->getIterator( DiskLoc(),
                                                                                          false,
                                                                                          CollectionScanParams::FORWARD ) );
                while ( !iterator->isEOF() ) {
                    DiskLoc loc = iterator->getNext();
                    invariant( !loc.isNull() );

                    BSONObj doc = originalCollection->docFor( loc );

                    Client::Context tempContext( ns, tempDatabase );
                    StatusWith<DiskLoc> result = tempCollection->insertDocument( doc, indexBlock );
                    if ( !result.isOK() )
                        return result.getStatus();

                    getDur().commitIfNeeded();
                    killCurrentOp.checkForInterrupt(false);
                }

                {
                    Client::Context tempContext( ns, tempDatabase );
                    Status status = indexBlock.commit();
                    if ( !status.isOK() )
                        return status;
                }

            }

            getDur().syncDataAndTruncateJournal();
            MongoFile::flushAll(true); // need both in case journaling is disabled

            killCurrentOp.checkForInterrupt(false);

            Client::Context tempContext( dbName, reservedPathString );
            Database::closeDatabase( dbName, reservedPathString );
        }

        // at this point if we abort, we don't want to delete new files
        // as they might be the only copies

        if ( repairFileDeleter.get() )
            repairFileDeleter->success();

        Client::Context ctx( dbName );
        Database::closeDatabase(dbName, storageGlobalParams.dbpath);

        if ( backupOriginalFiles ) {
            _renameForBackup( dbName, reservedPath );
        }
        else {
            // first make new directory before deleting data
            Path newDir = Path(storageGlobalParams.dbpath) / dbName;
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));

            // this deletes old files
            _deleteDataFiles( dbName );

            if ( !boost::filesystem::exists(newDir) ) {
                // we deleted because of directoryperdb
                // re-create
                MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
            }
        }

        _replaceWithRecovered( dbName, reservedPathString.c_str() );

        if ( !backupOriginalFiles )
            MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) );

        return Status::OK();
    }
Example #11
0
/**
 * For a given query, get a runner.  The runner could be a SingleSolutionRunner, a
 * CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc.
 */
Status getRunner(Collection* collection, CanonicalQuery* rawCanonicalQuery,
                 Runner** out, size_t plannerOptions) {

    verify(rawCanonicalQuery);
    auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery);

    // This can happen as we're called by internal clients as well.
    if (NULL == collection) {
        const string& ns = canonicalQuery->ns();
        *out = new EOFRunner(canonicalQuery.release(), ns);
        return Status::OK();
    }

    // If we have an _id index we can use the idhack runner.
    if (canUseIDHack(*canonicalQuery) && collection->getIndexCatalog()->findIdIndex()) {
        *out = new IDHackRunner(collection, canonicalQuery.release());
        return Status::OK();
    }

    // If it's not NULL, we may have indices.  Access the catalog and fill out IndexEntry(s)
    QueryPlannerParams plannerParams;

    IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false);
    while (ii.more()) {
        const IndexDescriptor* desc = ii.next();
        plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
                                        desc->isMultikey(),
                                        desc->isSparse(),
                                        desc->indexName(),
                                        desc->infoObj()));
    }

    // If query supports admin hint, filter params.indices by indexes in query settings.
    QuerySettings* querySettings = collection->infoCache()->getQuerySettings();
    AllowedIndices* allowedIndicesRaw;

    // Filter index catalog if admin hint is specified for query.
    // Also, signal to planner that application hint should be ignored.
    if (querySettings->getAllowedIndices(*canonicalQuery, &allowedIndicesRaw)) {
        boost::scoped_ptr<AllowedIndices> allowedIndices(allowedIndicesRaw);
        filterAllowedIndexEntries(*allowedIndices, &plannerParams.indices);
        plannerParams.adminHintApplied = true;
    }

    // Tailable: If the query requests tailable the collection must be capped.
    if (canonicalQuery->getParsed().hasOption(QueryOption_CursorTailable)) {
        if (!collection->isCapped()) {
            return Status(ErrorCodes::BadValue,
                          "error processing query: " + canonicalQuery->toString() +
                          " tailable cursor requested on non capped collection");
        }

        // If a sort is specified it must be equal to expectedSort.
        const BSONObj expectedSort = BSON("$natural" << 1);
        const BSONObj& actualSort = canonicalQuery->getParsed().getSort();
        if (!actualSort.isEmpty() && !(actualSort == expectedSort)) {
            return Status(ErrorCodes::BadValue,
                          "error processing query: " + canonicalQuery->toString() +
                          " invalid sort specified for tailable cursor: "
                          + actualSort.toString());
        }
    }

    // Process the planning options.
    plannerParams.options = plannerOptions;
    if (storageGlobalParams.noTableScan) {
        const string& ns = canonicalQuery->ns();
        // There are certain cases where we ignore this restriction:
        bool ignore = canonicalQuery->getQueryObj().isEmpty()
                      || (string::npos != ns.find(".system."))
                      || (0 == ns.find("local."));
        if (!ignore) {
            plannerParams.options |= QueryPlannerParams::NO_TABLE_SCAN;
        }
    }

    if (!(plannerParams.options & QueryPlannerParams::NO_TABLE_SCAN)) {
        plannerParams.options |= QueryPlannerParams::INCLUDE_COLLSCAN;
    }

    // If the caller wants a shard filter, make sure we're actually sharded.
    if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) {
        CollectionMetadataPtr collMetadata =
            shardingState.getCollectionMetadata(canonicalQuery->ns());

        if (collMetadata) {
            plannerParams.shardKey = collMetadata->getKeyPattern();
        }
        else {
            // If there's no metadata don't bother w/the shard filter since we won't know what
            // the key pattern is anyway...
            plannerParams.options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER;
        }
    }

    // Try to look up a cached solution for the query.
    //
    // Skip cache look up for non-cacheable queries.
    // See PlanCache::shouldCacheQuery()
    //
    // TODO: Can the cache have negative data about a solution?
    CachedSolution* rawCS;
    if (PlanCache::shouldCacheQuery(*canonicalQuery) &&
            collection->infoCache()->getPlanCache()->get(*canonicalQuery, &rawCS).isOK()) {
        // We have a CachedSolution.  Have the planner turn it into a QuerySolution.
        boost::scoped_ptr<CachedSolution> cs(rawCS);
        QuerySolution *qs, *backupQs;
        Status status = QueryPlanner::planFromCache(*canonicalQuery, plannerParams, *cs,
                        &qs, &backupQs);
        if (status.isOK()) {
            WorkingSet* ws;
            PlanStage* root;
            verify(StageBuilder::build(*qs, &root, &ws));
            CachedPlanRunner* cpr = new CachedPlanRunner(canonicalQuery.release(), qs,
                    root, ws);

            if (NULL != backupQs) {
                WorkingSet* backupWs;
                PlanStage* backupRoot;
                verify(StageBuilder::build(*backupQs, &backupRoot, &backupWs));
                cpr->setBackupPlan(backupQs, backupRoot, backupWs);
            }

            *out = cpr;
            return Status::OK();
        }
    }

    plannerParams.options |= QueryPlannerParams::INDEX_INTERSECTION;
    plannerParams.options |= QueryPlannerParams::KEEP_MUTATIONS;

    vector<QuerySolution*> solutions;
    Status status = QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions);
    if (!status.isOK()) {
        return Status(ErrorCodes::BadValue,
                      "error processing query: " + canonicalQuery->toString() +
                      " planner returned error: " + status.reason());
    }

    /*
    for (size_t i = 0; i < solutions.size(); ++i) {
        QLOG() << "solution " << i << " is " << solutions[i]->toString() << endl;
    }
    */

    // We cannot figure out how to answer the query.  Should this ever happen?
    if (0 == solutions.size()) {
        return Status(ErrorCodes::BadValue,
                      "error processing query: " + canonicalQuery->toString() +
                      " No query solutions");
    }

    if (1 == solutions.size()) {
        // Only one possible plan.  Run it.  Build the stages from the solution.
        WorkingSet* ws;
        PlanStage* root;
        verify(StageBuilder::build(*solutions[0], &root, &ws));

        // And, run the plan.
        *out = new SingleSolutionRunner(canonicalQuery.release(), solutions[0], root, ws);
        return Status::OK();
    }
    else {
        // Many solutions.  Let the MultiPlanRunner pick the best, update the cache, and so on.
        auto_ptr<MultiPlanRunner> mpr(new MultiPlanRunner(canonicalQuery.release()));
        for (size_t i = 0; i < solutions.size(); ++i) {
            WorkingSet* ws;
            PlanStage* root;
            if (solutions[i]->cacheData.get()) {
                solutions[i]->cacheData->adminHintApplied = plannerParams.adminHintApplied;
            }
            verify(StageBuilder::build(*solutions[i], &root, &ws));
            // Takes ownership of all arguments.
            mpr->addPlan(solutions[i], root, ws);
        }
        *out = mpr.release();
        return Status::OK();
    }
}
Example #12
0
void CollectionInfoCache::computeIndexKeys(OperationContext* opCtx) {
    _indexedPaths.clear();

    bool hadTTLIndex = _hasTTLIndex;
    _hasTTLIndex = false;

    IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(opCtx, true);
    while (i.more()) {
        IndexDescriptor* descriptor = i.next();

        if (descriptor->getAccessMethodName() != IndexNames::TEXT) {
            BSONObj key = descriptor->keyPattern();
            const BSONObj& infoObj = descriptor->infoObj();
            if (infoObj.hasField("expireAfterSeconds")) {
                _hasTTLIndex = true;
            }
            BSONObjIterator j(key);
            while (j.more()) {
                BSONElement e = j.next();
                _indexedPaths.addPath(e.fieldName());
            }
        } else {
            fts::FTSSpec ftsSpec(descriptor->infoObj());

            if (ftsSpec.wildcard()) {
                _indexedPaths.allPathsIndexed();
            } else {
                for (size_t i = 0; i < ftsSpec.numExtraBefore(); ++i) {
                    _indexedPaths.addPath(ftsSpec.extraBefore(i));
                }
                for (fts::Weights::const_iterator it = ftsSpec.weights().begin();
                     it != ftsSpec.weights().end();
                     ++it) {
                    _indexedPaths.addPath(it->first);
                }
                for (size_t i = 0; i < ftsSpec.numExtraAfter(); ++i) {
                    _indexedPaths.addPath(ftsSpec.extraAfter(i));
                }
                // Any update to a path containing "language" as a component could change the
                // language of a subdocument.  Add the override field as a path component.
                _indexedPaths.addPathComponent(ftsSpec.languageOverrideField());
            }
        }

        // handle partial indexes
        const IndexCatalogEntry* entry = i.catalogEntry(descriptor);
        const MatchExpression* filter = entry->getFilterExpression();
        if (filter) {
            unordered_set<std::string> paths;
            QueryPlannerIXSelect::getFields(filter, "", &paths);
            for (auto it = paths.begin(); it != paths.end(); ++it) {
                _indexedPaths.addPath(*it);
            }
        }
    }

    TTLCollectionCache& ttlCollectionCache = TTLCollectionCache::get(getGlobalServiceContext());

    if (_hasTTLIndex != hadTTLIndex) {
        if (_hasTTLIndex) {
            ttlCollectionCache.registerCollection(_collection->ns());
        } else {
            ttlCollectionCache.unregisterCollection(_collection->ns());
        }
    }

    _keysComputed = true;
}
Example #13
0
    StatusWith<DiskLoc> Collection::updateDocument( const DiskLoc& oldLocation,
                                                    const BSONObj& objNew,
                                                    bool enforceQuota,
                                                    OpDebug* debug ) {

        Record* oldRecord = _recordStore->recordFor( oldLocation );
        BSONObj objOld( oldRecord->accessed()->data() );

        if ( objOld.hasElement( "_id" ) ) {
            BSONElement oldId = objOld["_id"];
            BSONElement newId = objNew["_id"];
            if ( oldId != newId )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "in Collection::updateDocument _id mismatch",
                                            13596 );
        }

        if ( ns().coll() == "system.users" ) {
            // XXX - andy and spencer think this should go away now
            V2UserDocumentParser parser;
            Status s = parser.checkValidUserDocument(objNew);
            if ( !s.isOK() )
                return StatusWith<DiskLoc>( s );
        }

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */
        OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets;
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique())
                || ignoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(objOld, objNew, oldLocation, options, updateTicket );
            if ( !ret.isOK() ) {
                return StatusWith<DiskLoc>( ret );
            }
        }

        if ( oldRecord->netLength() < objNew.objsize() ) {
            // doesn't fit, have to move to new location

            if ( _details->isCapped() )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "failing update: objects in a capped ns cannot grow",
                                            10003 );

            moveCounter.increment();
            _details->paddingTooSmall();

            // unindex old record, don't delete
            // this way, if inserting new doc fails, we can re-index this one
            _cursorCache.invalidateDocument(oldLocation, INVALIDATION_DELETION);
            _indexCatalog.unindexRecord( objOld, oldLocation, true );

            if ( debug ) {
                if (debug->nmoved == -1) // default of -1 rather than 0
                    debug->nmoved = 1;
                else
                    debug->nmoved += 1;
            }

            StatusWith<DiskLoc> loc = _insertDocument( objNew, enforceQuota );

            if ( loc.isOK() ) {
                // insert successful, now lets deallocate the old location
                // remember its already unindexed
                _recordStore->deleteRecord( oldLocation );
            }
            else {
                // new doc insert failed, so lets re-index the old document and location
                _indexCatalog.indexRecord( objOld, oldLocation );
            }

            return loc;
        }

        _infoCache.notifyOfWriteOp();
        _details->paddingFits();

        if ( debug )
            debug->keyUpdates = 0;

        ii = _indexCatalog.getIndexIterator( true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            int64_t updatedKeys;
            Status ret = iam->update(*updateTickets.mutableMap()[descriptor], &updatedKeys);
            if ( !ret.isOK() )
                return StatusWith<DiskLoc>( ret );
            if ( debug )
                debug->keyUpdates += updatedKeys;
        }

        // Broadcast the mutation so that query results stay correct.
        _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION);

        //  update in place
        int sz = objNew.objsize();
        memcpy(getDur().writingPtr(oldRecord->data(), sz), objNew.objdata(), sz);

        return StatusWith<DiskLoc>( oldLocation );
    }
Example #14
0
        virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
            string source = cmdObj.getStringField( name.c_str() );
            string target = cmdObj.getStringField( "to" );

            if ( !NamespaceString::validCollectionComponent(target.c_str()) ) {
                errmsg = "invalid collection name: " + target;
                return false;
            }
            if ( source.empty() || target.empty() ) {
                errmsg = "invalid command syntax";
                return false;
            }

            if (!fromRepl) { // If it got through on the master, need to allow it here too
                Status sourceStatus = userAllowedWriteNS(source);
                if (!sourceStatus.isOK()) {
                    errmsg = "error with source namespace: " + sourceStatus.reason();
                    return false;
                }

                Status targetStatus = userAllowedWriteNS(target);
                if (!targetStatus.isOK()) {
                    errmsg = "error with target namespace: " + targetStatus.reason();
                    return false;
                }
            }

            string sourceDB = nsToDatabase(source);
            string targetDB = nsToDatabase(target);

            bool capped = false;
            long long size = 0;
            std::vector<BSONObj> indexesInProg;

            Lock::GlobalWrite globalWriteLock;
            DurTransaction txn;

            {
                Client::Context srcCtx( source );
                Collection* sourceColl = srcCtx.db()->getCollection( source );

                if ( !sourceColl ) {
                    errmsg = "source namespace does not exist";
                    return false;
                }

                // Ensure that collection name does not exceed maximum length.
                // Ensure that index names do not push the length over the max.
                // Iterator includes unfinished indexes.
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( true );
                int longestIndexNameLength = 0;
                while ( sourceIndIt.more() ) {
                    int thisLength = sourceIndIt.next()->indexName().length();
                    if ( thisLength > longestIndexNameLength )
                        longestIndexNameLength = thisLength;
                }

                unsigned int longestAllowed =
                    min(int(Namespace::MaxNsColletionLen),
                        int(Namespace::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength);
                if (target.size() > longestAllowed) {
                    StringBuilder sb;
                    sb << "collection name length of " << target.size()
                       << " exceeds maximum length of " << longestAllowed
                       << ", allowing for index names";
                    errmsg = sb.str();
                    return false;
                }

                {

                    indexesInProg = stopIndexBuilds( srcCtx.db(), cmdObj );
                    capped = sourceColl->isCapped();
                    if ( capped ) {
                        size = sourceColl->storageSize();
                    }
                }
            }

            {
                Client::Context ctx( target );

                // Check if the target namespace exists and if dropTarget is true.
                // If target exists and dropTarget is not true, return false.
                if ( ctx.db()->getCollection( target ) ) {
                    if ( !cmdObj["dropTarget"].trueValue() ) {
                        errmsg = "target namespace exists";
                        return false;
                    }

                    Status s = ctx.db()->dropCollection( &txn, target );
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        restoreIndexBuildsOnSource( indexesInProg, source );
                        return false;
                    }
                }

                // If we are renaming in the same database, just
                // rename the namespace and we're done.
                if ( sourceDB == targetDB ) {
                    Status s = ctx.db()->renameCollection( &txn, source, target,
                                                           cmdObj["stayTemp"].trueValue() );
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        restoreIndexBuildsOnSource( indexesInProg, source );
                        return false;
                    }
                    return true;
                }

                // Otherwise, we are enaming across databases, so we must copy all
                // the data and then remove the source collection.

                // Create the target collection.
                Collection* targetColl = NULL;
                if ( capped ) {
                    CollectionOptions options;
                    options.capped = true;
                    options.cappedSize = size;
                    options.setNoIdIndex();

                    targetColl = ctx.db()->createCollection( &txn, target, options );
                }
                else {
                    CollectionOptions options;
                    options.setNoIdIndex();
                    // No logOp necessary because the entire renameCollection command is one logOp.
                    targetColl = ctx.db()->createCollection( &txn, target, options );
                }
                if ( !targetColl ) {
                    errmsg = "Failed to create target collection.";
                    restoreIndexBuildsOnSource( indexesInProg, source );
                    return false;
                }
            }

            // Copy over all the data from source collection to target collection.
            bool insertSuccessful = true;
            boost::scoped_ptr<RecordIterator> sourceIt;
            Collection* sourceColl = NULL;

            {
                Client::Context srcCtx( source );
                sourceColl = srcCtx.db()->getCollection( source );
                sourceIt.reset( sourceColl->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) );
            }

            Collection* targetColl = NULL;
            while ( !sourceIt->isEOF() ) {
                BSONObj o;
                {
                    Client::Context srcCtx( source );
                    o = sourceColl->docFor(sourceIt->getNext());
                }
                // Insert and check return status of insert.
                {
                    Client::Context ctx( target );
                    if ( !targetColl )
                        targetColl = ctx.db()->getCollection( target );
                    // No logOp necessary because the entire renameCollection command is one logOp.
                    Status s = targetColl->insertDocument( &txn, o, true ).getStatus();
                    if ( !s.isOK() ) {
                        insertSuccessful = false;
                        errmsg = s.toString();
                        break;
                    }
                }
            }

            // If inserts were unsuccessful, drop the target collection and return false.
            if ( !insertSuccessful ) {
                Client::Context ctx( target );
                Status s = ctx.db()->dropCollection( &txn, target );
                if ( !s.isOK() )
                    errmsg = s.toString();
                restoreIndexBuildsOnSource( indexesInProg, source );
                return false;
            }

            // Copy over the indexes to temp storage and then to the target..
            vector<BSONObj> copiedIndexes;
            bool indexSuccessful = true;
            {
                Client::Context srcCtx( source );
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( true );

                while ( sourceIndIt.more() ) {
                    BSONObj currIndex = sourceIndIt.next()->infoObj();

                    // Process the source index.
                    BSONObjBuilder b;
                    BSONObjIterator i( currIndex );
                    while( i.moreWithEOO() ) {
                        BSONElement e = i.next();
                        if ( e.eoo() )
                            break;
                        else if ( strcmp( e.fieldName(), "ns" ) == 0 )
                            b.append( "ns", target );
                        else
                            b.append( e );
                    }

                    BSONObj newIndex = b.obj();
                    copiedIndexes.push_back( newIndex );
                }
            }

            {
                Client::Context ctx( target );
                if ( !targetColl )
                    targetColl = ctx.db()->getCollection( target );

                for ( vector<BSONObj>::iterator it = copiedIndexes.begin();
                                                it != copiedIndexes.end(); ++it ) {
                    Status s = targetColl->getIndexCatalog()->createIndex( *it, true );
                    if ( !s.isOK() ) {
                        indexSuccessful = false;
                        errmsg = s.toString();
                        break;
                    }
                }

                // If indexes were unsuccessful, drop the target collection and return false.
                if ( !indexSuccessful ) {
                    Status s = ctx.db()->dropCollection( &txn, target );
                    if ( !s.isOK() )
                        errmsg = s.toString();
                    restoreIndexBuildsOnSource( indexesInProg, source );
                    return false;
                }
            }

            // Drop the source collection.
            {
                Client::Context srcCtx( source );
                Status s = srcCtx.db()->dropCollection( &txn, source );
                if ( !s.isOK() ) {
                    errmsg = s.toString();
                    restoreIndexBuildsOnSource( indexesInProg, source );
                    return false;
                }
            }

            return true;
        }
Example #15
0
    Status MMAPV1Engine::repairDatabase( OperationContext* txn,
                                         const std::string& dbName,
                                         bool preserveClonedFilesOnFailure,
                                         bool backupOriginalFiles ) {
        unique_ptr<RepairFileDeleter> repairFileDeleter;

        // Must be done before and after repair
        getDur().syncDataAndTruncateJournal(txn);

        intmax_t totalSize = dbSize( dbName );
        intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);

        if ( freeSize > -1 && freeSize < totalSize ) {
            return Status( ErrorCodes::OutOfDiskSpace,
                           str::stream() << "Cannot repair database " << dbName
                           << " having size: " << totalSize
                           << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
        }

        txn->checkForInterrupt();

        Path reservedPath =
            uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
                                "backup" : "_tmp" );
        bool created = false;
        MONGO_ASSERT_ON_EXCEPTION( created = boost::filesystem::create_directory( reservedPath ) );
        invariant( created );
        string reservedPathString = reservedPath.string();

        if ( !preserveClonedFilesOnFailure )
            repairFileDeleter.reset( new RepairFileDeleter( txn,
                                                            dbName,
                                                            reservedPathString,
                                                            reservedPath ) );

        {
            Database* originalDatabase = dbHolder().openDb(txn, dbName);
            if (originalDatabase == NULL) {
                return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair");
            }

            unique_ptr<MMAPV1DatabaseCatalogEntry> dbEntry;
            unique_ptr<Database> tempDatabase;

            // Must call this before MMAPV1DatabaseCatalogEntry's destructor closes the DB files
            ON_BLOCK_EXIT(&dur::DurableInterface::syncDataAndTruncateJournal, &getDur(), txn);

            {
                dbEntry.reset(new MMAPV1DatabaseCatalogEntry(txn,
                                                             dbName,
                                                             reservedPathString,
                                                             storageGlobalParams.directoryperdb,
                                                             true));
                tempDatabase.reset( new Database(txn, dbName, dbEntry.get()));
            }

            map<string,CollectionOptions> namespacesToCopy;
            {
                string ns = dbName + ".system.namespaces";
                OldClientContext ctx(txn,  ns );
                Collection* coll = originalDatabase->getCollection( ns );
                if ( coll ) {
                    auto cursor = coll->getCursor(txn);
                    while (auto record = cursor->next()) {
                        BSONObj obj = record->data.releaseToBson();

                        string ns = obj["name"].String();

                        NamespaceString nss( ns );
                        if ( nss.isSystem() ) {
                            if ( nss.isSystemDotIndexes() )
                                continue;
                            if ( nss.coll() == "system.namespaces" )
                                continue;
                        }

                        if ( !nss.isNormal() )
                            continue;

                        CollectionOptions options;
                        if ( obj["options"].isABSONObj() ) {
                            Status status = options.parse( obj["options"].Obj() );
                            if ( !status.isOK() )
                                return status;
                        }
                        namespacesToCopy[ns] = options;
                    }
                }
            }

            for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
                  i != namespacesToCopy.end();
                  ++i ) {
                string ns = i->first;
                CollectionOptions options = i->second;

                Collection* tempCollection = NULL;
                {
                    WriteUnitOfWork wunit(txn);
                    tempCollection = tempDatabase->createCollection(txn, ns, options, false);
                    wunit.commit();
                }

                OldClientContext readContext(txn, ns, originalDatabase);
                Collection* originalCollection = originalDatabase->getCollection( ns );
                invariant( originalCollection );

                // data

                // TODO SERVER-14812 add a mode that drops duplicates rather than failing
                MultiIndexBlock indexer(txn, tempCollection );
                {
                    vector<BSONObj> indexes;
                    IndexCatalog::IndexIterator ii =
                        originalCollection->getIndexCatalog()->getIndexIterator( txn, false );
                    while ( ii.more() ) {
                        IndexDescriptor* desc = ii.next();
                        indexes.push_back( desc->infoObj() );
                    }

                    Status status = indexer.init( indexes );
                    if (!status.isOK()) {
                        return status;
                    }
                }

                auto cursor = originalCollection->getCursor(txn);
                while (auto record = cursor->next()) {
                    BSONObj doc = record->data.releaseToBson();

                    WriteUnitOfWork wunit(txn);
                    StatusWith<RecordId> result = tempCollection->insertDocument(txn,
                                                                                 doc,
                                                                                 &indexer,
                                                                                 false);
                    if ( !result.isOK() )
                        return result.getStatus();

                    wunit.commit();
                    txn->checkForInterrupt();
                }
                
                Status status = indexer.doneInserting();
                if (!status.isOK())
                    return status;

                {
                    WriteUnitOfWork wunit(txn);
                    indexer.commit();
                    wunit.commit();
                }

            }

            getDur().syncDataAndTruncateJournal(txn);

            // need both in case journaling is disabled
            MongoFile::flushAll(true);

            txn->checkForInterrupt();
        }

        // at this point if we abort, we don't want to delete new files
        // as they might be the only copies

        if ( repairFileDeleter.get() )
            repairFileDeleter->success();

        // Close the database so we can rename/delete the original data files
        dbHolder().close(txn, dbName);

        if ( backupOriginalFiles ) {
            _renameForBackup( dbName, reservedPath );
        }
        else {
            // first make new directory before deleting data
            Path newDir = Path(storageGlobalParams.dbpath) / dbName;
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));

            // this deletes old files
            _deleteDataFiles( dbName );

            if ( !boost::filesystem::exists(newDir) ) {
                // we deleted because of directoryperdb
                // re-create
                MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
            }
        }

        _replaceWithRecovered( dbName, reservedPathString.c_str() );

        if (!backupOriginalFiles) {
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::remove_all(reservedPath));
        }

        // Reopen the database so it's discoverable
        dbHolder().openDb(txn, dbName);

        return Status::OK();
    }
Example #16
0
Status appendCollectionStorageStats(OperationContext* opCtx,
                                    const NamespaceString& nss,
                                    const BSONObj& param,
                                    BSONObjBuilder* result) {
    int scale = 1;
    if (param["scale"].isNumber()) {
        scale = param["scale"].numberInt();
        if (scale < 1) {
            return {ErrorCodes::BadValue, "scale has to be >= 1"};
        }
    } else if (param["scale"].trueValue()) {
        return {ErrorCodes::BadValue, "scale has to be a number >= 1"};
    }

    bool verbose = param["verbose"].trueValue();

    AutoGetCollectionForReadCommand ctx(opCtx, nss);
    Collection* collection = ctx.getCollection();  // Will be set if present
    if (!ctx.getDb() || !collection) {
        result->appendNumber("size", 0);
        result->appendNumber("count", 0);
        result->appendNumber("storageSize", 0);
        result->append("nindexes", 0);
        result->appendNumber("totalIndexSize", 0);
        result->append("indexDetails", BSONObj());
        result->append("indexSizes", BSONObj());
        std::string errmsg = !(ctx.getDb()) ? "Database [" + nss.db().toString() + "] not found."
                                            : "Collection [" + nss.toString() + "] not found.";
        return {ErrorCodes::NamespaceNotFound, errmsg};
    }

    long long size = collection->dataSize(opCtx) / scale;
    result->appendNumber("size", size);
    long long numRecords = collection->numRecords(opCtx);
    result->appendNumber("count", numRecords);

    if (numRecords)
        result->append("avgObjSize", collection->averageObjectSize(opCtx));

    RecordStore* recordStore = collection->getRecordStore();
    result->appendNumber(
        "storageSize",
        static_cast<long long>(recordStore->storageSize(opCtx, result, verbose ? 1 : 0)) / scale);

    recordStore->appendCustomStats(opCtx, result, scale);

    IndexCatalog* indexCatalog = collection->getIndexCatalog();
    result->append("nindexes", indexCatalog->numIndexesReady(opCtx));

    BSONObjBuilder indexDetails;

    IndexCatalog::IndexIterator i = indexCatalog->getIndexIterator(opCtx, false);
    while (i.more()) {
        const IndexDescriptor* descriptor = i.next();
        IndexAccessMethod* iam = indexCatalog->getIndex(descriptor);
        invariant(iam);

        BSONObjBuilder bob;
        if (iam->appendCustomStats(opCtx, &bob, scale)) {
            indexDetails.append(descriptor->indexName(), bob.obj());
        }
    }

    result->append("indexDetails", indexDetails.obj());

    BSONObjBuilder indexSizes;
    long long indexSize = collection->getIndexSize(opCtx, &indexSizes, scale);

    result->appendNumber("totalIndexSize", indexSize / scale);
    result->append("indexSizes", indexSizes.obj());

    return Status::OK();
}
Example #17
0
    StatusWith<RecordId> Collection::updateDocument( OperationContext* txn,
                                                    const RecordId& oldLocation,
                                                    const BSONObj& objNew,
                                                    bool enforceQuota,
                                                    OpDebug* debug ) {

        BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).releaseToBson();

        if ( objOld.hasElement( "_id" ) ) {
            BSONElement oldId = objOld["_id"];
            BSONElement newId = objNew["_id"];
            if ( oldId != newId )
                return StatusWith<RecordId>( ErrorCodes::InternalError,
                                            "in Collection::updateDocument _id mismatch",
                                            13596 );
        }

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */

        // At the end of this step, we will have a map of UpdateTickets, one per index, which
        // represent the index updates needed to be done, based on the changes between objOld and
        // objNew.
        OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets;
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique())
                || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket );
            if ( !ret.isOK() ) {
                return StatusWith<RecordId>( ret );
            }
        }

        // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
        // object is removed from all indexes.
        StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn,
                                                                      oldLocation,
                                                                      objNew.objdata(),
                                                                      objNew.objsize(),
                                                                      _enforceQuota( enforceQuota ),
                                                                      this );

        if ( !newLocation.isOK() ) {
            return newLocation;
        }

        // At this point, the old object may or may not still be indexed, depending on if it was
        // moved.

        _infoCache.notifyOfWriteOp();

        // If the object did move, we need to add the new location to all indexes.
        if ( newLocation.getValue() != oldLocation ) {

            if ( debug ) {
                if (debug->nmoved == -1) // default of -1 rather than 0
                    debug->nmoved = 1;
                else
                    debug->nmoved += 1;
            }

            Status s = _indexCatalog.indexRecord(txn, objNew, newLocation.getValue());
            if (!s.isOK())
                return StatusWith<RecordId>(s);

            return newLocation;
        }

        // Object did not move.  We update each index with each respective UpdateTicket.

        if ( debug )
            debug->keyUpdates = 0;

        ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if ( !ret.isOK() )
                return StatusWith<RecordId>( ret );
            if ( debug )
                debug->keyUpdates += updatedKeys;
        }

        // Broadcast the mutation so that query results stay correct.
        _cursorCache.invalidateDocument(txn, oldLocation, INVALIDATION_MUTATION);

        return newLocation;
    }