예제 #1
0
bool RecordStoreV1Base::findRecord(OperationContext* txn,
                                   const RecordId& loc,
                                   RecordData* rd) const {
    // this is a bit odd, as the semantics of using the storage engine imply it _has_ to be.
    // And in fact we can't actually check.
    // So we assume the best.
    MmapV1RecordHeader* rec = recordFor(DiskLoc::fromRecordId(loc));
    if (!rec) {
        return false;
    }
    *rd = rec->toRecordData();
    return true;
}
예제 #2
0
StatusWith<RecordData> RecordStoreV1Base::updateWithDamages(
    OperationContext* txn,
    const RecordId& loc,
    const RecordData& oldRec,
    const char* damageSource,
    const mutablebson::DamageVector& damages) {
    MmapV1RecordHeader* rec = recordFor(DiskLoc::fromRecordId(loc));
    char* root = rec->data();

    // All updates were in place. Apply them via durability and writing pointer.
    mutablebson::DamageVector::const_iterator where = damages.begin();
    const mutablebson::DamageVector::const_iterator end = damages.end();
    for (; where != end; ++where) {
        const char* sourcePtr = damageSource + where->sourceOffset;
        void* targetPtr = txn->recoveryUnit()->writingPtr(root + where->targetOffset, where->size);
        std::memcpy(targetPtr, sourcePtr, where->size);
    }

    return rec->toRecordData();
}
예제 #3
0
void SimpleRecordStoreV1::_compactExtent(OperationContext* txn,
                                         const DiskLoc extentLoc,
                                         int extentNumber,
                                         RecordStoreCompactAdaptor* adaptor,
                                         const CompactOptions* compactOptions,
                                         CompactStats* stats) {
    log() << "compact begin extent #" << extentNumber << " for namespace " << _ns << " "
          << extentLoc;

    unsigned oldObjSize = 0;  // we'll report what the old padding was
    unsigned oldObjSizeWithPadding = 0;

    Extent* const sourceExtent = _extentManager->getExtent(extentLoc);
    sourceExtent->assertOk();
    fassert(17437, sourceExtent->validates(extentLoc));

    {
        // The next/prev MmapV1RecordHeader pointers within the Extent might not be in order so we
        // first page in the whole Extent sequentially.
        // TODO benchmark on slow storage to verify this is measurably faster.
        log() << "compact paging in len=" << sourceExtent->length / 1000000.0 << "MB" << endl;
        Timer t;
        size_t length = sourceExtent->length;

        touch_pages(reinterpret_cast<const char*>(sourceExtent), length);
        int ms = t.millis();
        if (ms > 1000)
            log() << "compact end paging in " << ms << "ms "
                  << sourceExtent->length / 1000000.0 / t.seconds() << "MB/sec" << endl;
    }

    {
        // Move each MmapV1RecordHeader out of this extent and insert it in to the "new" extents.
        log() << "compact copying records" << endl;
        long long totalNetSize = 0;
        long long nrecords = 0;
        DiskLoc nextSourceLoc = sourceExtent->firstRecord;
        while (!nextSourceLoc.isNull()) {
            txn->checkForInterrupt();

            WriteUnitOfWork wunit(txn);
            MmapV1RecordHeader* recOld = recordFor(nextSourceLoc);
            RecordData oldData = recOld->toRecordData();
            nextSourceLoc = getNextRecordInExtent(txn, nextSourceLoc);

            if (compactOptions->validateDocuments && !adaptor->isDataValid(oldData)) {
                // object is corrupt!
                log() << "compact removing corrupt document!";
                stats->corruptDocuments++;
            } else {
                // How much data is in the record. Excludes padding and MmapV1RecordHeader headers.
                const unsigned rawDataSize = adaptor->dataSize(oldData);

                nrecords++;
                oldObjSize += rawDataSize;
                oldObjSizeWithPadding += recOld->netLength();

                // Allocation sizes include the headers and possibly some padding.
                const unsigned minAllocationSize = rawDataSize + MmapV1RecordHeader::HeaderSize;
                unsigned allocationSize = minAllocationSize;
                switch (compactOptions->paddingMode) {
                    case CompactOptions::NONE:  // default padding
                        if (shouldPadInserts()) {
                            allocationSize = quantizeAllocationSpace(minAllocationSize);
                        }
                        break;

                    case CompactOptions::PRESERVE:  // keep original padding
                        allocationSize = recOld->lengthWithHeaders();
                        break;

                    case CompactOptions::MANUAL:  // user specified how much padding to use
                        allocationSize = compactOptions->computeRecordSize(minAllocationSize);
                        if (allocationSize < minAllocationSize ||
                            allocationSize > BSONObjMaxUserSize / 2) {
                            allocationSize = minAllocationSize;
                        }
                        break;
                }
                invariant(allocationSize >= minAllocationSize);

                // Copy the data to a new record. Because we orphaned the record freelist at the
                // start of the compact, this insert will allocate a record in a new extent.
                // See the comment in compact() for more details.
                CompactDocWriter writer(recOld, rawDataSize, allocationSize);
                StatusWith<RecordId> status = insertRecordWithDocWriter(txn, &writer);
                uassertStatusOK(status.getStatus());
                const MmapV1RecordHeader* newRec =
                    recordFor(DiskLoc::fromRecordId(status.getValue()));
                invariant(unsigned(newRec->netLength()) >= rawDataSize);
                totalNetSize += newRec->netLength();

                // Tells the caller that the record has been moved, so it can do things such as
                // add it to indexes.
                adaptor->inserted(newRec->toRecordData(), status.getValue());
            }

            // Remove the old record from the linked list of records withing the sourceExtent.
            // The old record is not added to the freelist as we will be freeing the whole
            // extent at the end.
            *txn->recoveryUnit()->writing(&sourceExtent->firstRecord) = nextSourceLoc;
            if (nextSourceLoc.isNull()) {
                // Just moved the last record out of the extent. Mark extent as empty.
                *txn->recoveryUnit()->writing(&sourceExtent->lastRecord) = DiskLoc();
            } else {
                MmapV1RecordHeader* newFirstRecord = recordFor(nextSourceLoc);
                txn->recoveryUnit()->writingInt(newFirstRecord->prevOfs()) = DiskLoc::NullOfs;
            }

            // Adjust the stats to reflect the removal of the old record. The insert above
            // handled adjusting the stats for the new record.
            _details->incrementStats(txn, -(recOld->netLength()), -1);

            wunit.commit();
        }

        // The extent must now be empty.
        invariant(sourceExtent->firstRecord.isNull());
        invariant(sourceExtent->lastRecord.isNull());

        // We are still the first extent, but we must not be the only extent.
        invariant(_details->firstExtent(txn) == extentLoc);
        invariant(_details->lastExtent(txn) != extentLoc);

        // Remove the newly emptied sourceExtent from the extent linked list and return it to
        // the extent manager.
        WriteUnitOfWork wunit(txn);
        const DiskLoc newFirst = sourceExtent->xnext;
        _details->setFirstExtent(txn, newFirst);
        *txn->recoveryUnit()->writing(&_extentManager->getExtent(newFirst)->xprev) = DiskLoc();
        _extentManager->freeExtent(txn, extentLoc);
        wunit.commit();

        {
            const double oldPadding = oldObjSize ? double(oldObjSizeWithPadding) / oldObjSize
                                                 : 1.0;  // defining 0/0 as 1 for this.

            log() << "compact finished extent #" << extentNumber << " containing " << nrecords
                  << " documents (" << totalNetSize / (1024 * 1024.0) << "MB)"
                  << " oldPadding: " << oldPadding;
        }
    }
}
예제 #4
0
Status RecordStoreV1Base::validate(OperationContext* txn,
                                   bool full,
                                   bool scanData,
                                   ValidateAdaptor* adaptor,
                                   ValidateResults* results,
                                   BSONObjBuilder* output) {
    // 1) basic status that require no iteration
    // 2) extent level info
    // 3) check extent start and end
    // 4) check each non-deleted record
    // 5) check deleted list

    // -------------

    // 1111111111111111111
    if (isCapped()) {
        output->appendBool("capped", true);
        output->appendNumber("max", _details->maxCappedDocs());
    }

    output->appendNumber("datasize", _details->dataSize());
    output->appendNumber("nrecords", _details->numRecords());
    output->appendNumber("lastExtentSize", _details->lastExtentSize(txn));

    if (_details->firstExtent(txn).isNull())
        output->append("firstExtent", "null");
    else
        output->append("firstExtent",
                       str::stream()
                           << _details->firstExtent(txn).toString() << " ns:"
                           << _getExtent(txn, _details->firstExtent(txn))->nsDiagnostic.toString());
    if (_details->lastExtent(txn).isNull())
        output->append("lastExtent", "null");
    else
        output->append("lastExtent",
                       str::stream()
                           << _details->lastExtent(txn).toString() << " ns:"
                           << _getExtent(txn, _details->lastExtent(txn))->nsDiagnostic.toString());

    // 22222222222222222222222222
    {  // validate extent basics
        BSONArrayBuilder extentData;
        int extentCount = 0;
        DiskLoc extentDiskLoc;
        try {
            if (!_details->firstExtent(txn).isNull()) {
                _getExtent(txn, _details->firstExtent(txn))->assertOk();
                _getExtent(txn, _details->lastExtent(txn))->assertOk();
            }

            extentDiskLoc = _details->firstExtent(txn);
            while (!extentDiskLoc.isNull()) {
                Extent* thisExtent = _getExtent(txn, extentDiskLoc);
                if (full) {
                    extentData << thisExtent->dump();
                }
                if (!thisExtent->validates(extentDiskLoc, &results->errors)) {
                    results->valid = false;
                }
                DiskLoc nextDiskLoc = thisExtent->xnext;

                if (extentCount > 0 && !nextDiskLoc.isNull() &&
                    _getExtent(txn, nextDiskLoc)->xprev != extentDiskLoc) {
                    StringBuilder sb;
                    sb << "'xprev' pointer " << _getExtent(txn, nextDiskLoc)->xprev.toString()
                       << " in extent " << nextDiskLoc.toString() << " does not point to extent "
                       << extentDiskLoc.toString();
                    results->errors.push_back(sb.str());
                    results->valid = false;
                }
                if (nextDiskLoc.isNull() && extentDiskLoc != _details->lastExtent(txn)) {
                    StringBuilder sb;
                    sb << "'lastExtent' pointer " << _details->lastExtent(txn).toString()
                       << " does not point to last extent in list " << extentDiskLoc.toString();
                    results->errors.push_back(sb.str());
                    results->valid = false;
                }
                extentDiskLoc = nextDiskLoc;
                extentCount++;
                txn->checkForInterrupt();
            }
        } catch (const DBException& e) {
            StringBuilder sb;
            sb << "exception validating extent " << extentCount << ": " << e.what();
            results->errors.push_back(sb.str());
            results->valid = false;
            return Status::OK();
        }
        output->append("extentCount", extentCount);

        if (full)
            output->appendArray("extents", extentData.arr());
    }

    try {
        // 333333333333333333333333333
        bool testingLastExtent = false;
        try {
            DiskLoc firstExtentLoc = _details->firstExtent(txn);
            if (firstExtentLoc.isNull()) {
                // this is ok
            } else {
                output->append("firstExtentDetails", _getExtent(txn, firstExtentLoc)->dump());
                if (!_getExtent(txn, firstExtentLoc)->xprev.isNull()) {
                    StringBuilder sb;
                    sb << "'xprev' pointer in 'firstExtent' "
                       << _details->firstExtent(txn).toString() << " is "
                       << _getExtent(txn, firstExtentLoc)->xprev.toString() << ", should be null";
                    results->errors.push_back(sb.str());
                    results->valid = false;
                }
            }
            testingLastExtent = true;
            DiskLoc lastExtentLoc = _details->lastExtent(txn);
            if (lastExtentLoc.isNull()) {
                // this is ok
            } else {
                if (firstExtentLoc != lastExtentLoc) {
                    output->append("lastExtentDetails", _getExtent(txn, lastExtentLoc)->dump());
                    if (!_getExtent(txn, lastExtentLoc)->xnext.isNull()) {
                        StringBuilder sb;
                        sb << "'xnext' pointer in 'lastExtent' " << lastExtentLoc.toString()
                           << " is " << _getExtent(txn, lastExtentLoc)->xnext.toString()
                           << ", should be null";
                        results->errors.push_back(sb.str());
                        results->valid = false;
                    }
                }
            }
        } catch (const DBException& e) {
            StringBuilder sb;
            sb << "exception processing '" << (testingLastExtent ? "lastExtent" : "firstExtent")
               << "': " << e.what();
            results->errors.push_back(sb.str());
            results->valid = false;
        }

        // 4444444444444444444444444

        set<DiskLoc> recs;
        if (scanData) {
            int n = 0;
            int nInvalid = 0;
            long long nQuantizedSize = 0;
            long long len = 0;
            long long nlen = 0;
            long long bsonLen = 0;
            int outOfOrder = 0;
            DiskLoc dl_last;

            auto cursor = getCursor(txn);
            while (auto record = cursor->next()) {
                const auto dl = DiskLoc::fromRecordId(record->id);
                n++;

                if (n < 1000000)
                    recs.insert(dl);
                if (isCapped()) {
                    if (dl < dl_last)
                        outOfOrder++;
                    dl_last = dl;
                }

                MmapV1RecordHeader* r = recordFor(dl);
                len += r->lengthWithHeaders();
                nlen += r->netLength();

                if (isQuantized(r->lengthWithHeaders())) {
                    // Count the number of records having a size consistent with
                    // the quantizeAllocationSpace quantization implementation.
                    ++nQuantizedSize;
                }

                if (full) {
                    size_t dataSize = 0;
                    const Status status = adaptor->validate(r->toRecordData(), &dataSize);
                    if (!status.isOK()) {
                        results->valid = false;
                        if (nInvalid == 0)  // only log once;
                            results->errors.push_back("invalid object detected (see logs)");

                        nInvalid++;
                        log() << "Invalid object detected in " << _ns << ": " << status.reason();
                    } else {
                        bsonLen += dataSize;
                    }
                }
            }

            if (isCapped() && !_details->capLooped()) {
                output->append("cappedOutOfOrder", outOfOrder);
                if (outOfOrder > 1) {
                    results->valid = false;
                    results->errors.push_back("too many out of order records");
                }
            }
            output->append("objectsFound", n);

            if (full) {
                output->append("invalidObjects", nInvalid);
            }

            output->appendNumber("nQuantizedSize", nQuantizedSize);
            output->appendNumber("bytesWithHeaders", len);
            output->appendNumber("bytesWithoutHeaders", nlen);

            if (full) {
                output->appendNumber("bytesBson", bsonLen);
            }
        }  // end scanData

        // 55555555555555555555555555
        BSONArrayBuilder deletedListArray;
        for (int i = 0; i < Buckets; i++) {
            deletedListArray << _details->deletedListEntry(i).isNull();
        }

        int ndel = 0;
        long long delSize = 0;
        BSONArrayBuilder delBucketSizes;
        int incorrect = 0;
        for (int i = 0; i < Buckets; i++) {
            DiskLoc loc = _details->deletedListEntry(i);
            try {
                int k = 0;
                while (!loc.isNull()) {
                    if (recs.count(loc))
                        incorrect++;
                    ndel++;

                    if (loc.questionable()) {
                        if (isCapped() && !loc.isValid() && i == 1) {
                            /* the constructor for NamespaceDetails intentionally sets
                             * deletedList[1] to invalid see comments in namespace.h
                            */
                            break;
                        }

                        string err(str::stream()
                                   << "bad pointer in deleted record list: " << loc.toString()
                                   << " bucket: " << i << " k: " << k);
                        results->errors.push_back(err);
                        results->valid = false;
                        break;
                    }

                    const DeletedRecord* d = deletedRecordFor(loc);
                    delSize += d->lengthWithHeaders();
                    loc = d->nextDeleted();
                    k++;
                    txn->checkForInterrupt();
                }
                delBucketSizes << k;
            } catch (...) {
                results->errors.push_back((string) "exception in deleted chain for bucket " +
                                          BSONObjBuilder::numStr(i));
                results->valid = false;
            }
        }
        output->appendNumber("deletedCount", ndel);
        output->appendNumber("deletedSize", delSize);
        if (full) {
            output->append("delBucketSizes", delBucketSizes.arr());
        }

        if (incorrect) {
            results->errors.push_back(BSONObjBuilder::numStr(incorrect) +
                                      " records from datafile are in deleted list");
            results->valid = false;
        }

    } catch (AssertionException) {
        results->errors.push_back("exception during validate");
        results->valid = false;
    }

    return Status::OK();
}