예제 #1
0
int64_t CopyOnWriteContext::handleStreamMore(TupleOutputStreamProcessor &outputStreams,
                                             std::vector<int> &retPositions) {
    int64_t remaining = serializeMore(outputStreams);
    // If more was streamed copy current positions for return.
    // Can this copy be avoided?
    for (size_t i = 0; i < outputStreams.size(); i++) {
        retPositions.push_back((int)outputStreams.at(i).position());
    }
    return remaining;
}
예제 #2
0
/**
 * Mandatory TableStreamContext override.
 */
int64_t RecoveryContext::handleStreamMore(TupleOutputStreamProcessor &outputStreams,
                                          std::vector<int> &retPositions) {
    if (outputStreams.size() != 1) {
        throwFatalException("RecoveryContext::handleStreamMore: Expect 1 output stream "
                            "for recovery, received %ld", outputStreams.size());
    }
    /*
     * Table ids don't change during recovery because
     * catalog changes are not allowed.
     */
    bool hasMore = nextMessage(&outputStreams[0]);
    // Non-zero if some tuples remain, we're just not sure how many.
    int64_t remaining = (hasMore ? 1 : 0);
    for (size_t i = 0; i < outputStreams.size(); i++) {
        retPositions.push_back((int)outputStreams.at(i).position());
    }
    return remaining;
}
예제 #3
0
/*
 * Serialize to multiple output streams.
 * Return remaining tuple count, 0 if done, or -1 on error.
 */
int64_t CopyOnWriteContext::serializeMore(TupleOutputStreamProcessor &outputStreams) {
    // Don't expect to be re-called after streaming all the tuples.
    if (m_tuplesRemaining == 0) {
        throwFatalException("serializeMore() was called again after streaming completed.")
    }

    // Need to initialize the output stream list.
    if (outputStreams.empty()) {
        throwFatalException("serializeMore() expects at least one output stream.");
    }
    outputStreams.open(m_table, m_maxTupleLength, m_partitionId, m_predicates);

    //=== Tuple processing loop

    TableTuple tuple(m_table.schema());

    // Set to true to break out of the loop after the tuples dry up
    // or the byte count threshold is hit.
    bool yield = false;
    while (!yield) {

        // Next tuple?
        bool hasMore = m_iterator->next(tuple);
        if (hasMore) {

            // -1 is used as a sentinel value to disable counting for tests.
            if (m_tuplesRemaining > 0) {
                m_tuplesRemaining--;
            }

            /*
             * Write the tuple to all the output streams.
             * Done if any of the buffers filled up.
             * The returned copy count helps decide when to delete if m_doDelete is true.
             */
            int32_t numCopiesMade = 0;
            yield = outputStreams.writeRow(m_serializer, tuple, numCopiesMade);

            /*
             * May want to delete tuple if processing the actual table.
             */
            if (!m_finishedTableScan) {
                /*
                 * If this is the table scan, check to see if the tuple is pending
                 * delete and return the tuple if it iscop
                 */
                if (tuple.isPendingDelete()) {
                    assert(!tuple.isPendingDeleteOnUndoRelease());
                    CopyOnWriteIterator *iter = static_cast<CopyOnWriteIterator*>(m_iterator.get());
                    //Save the extra lookup if possible
                    m_table.deleteTupleStorage(tuple, iter->m_currentBlock);
                }

                /*
                 * Delete a moved tuple?
                 * This is used for Elastic rebalancing, which is wrapped in a transaction.
                 * The delete for undo is generic enough to support this operation.
                 */
                else if (m_doDelete && numCopiesMade > 0) {
                    m_table.deleteTupleForUndo(tuple.address(), true);
                }
            }

        } else if (!m_finishedTableScan) {
            /*
             * After scanning the persistent table switch to scanning the temp
             * table with the tuples that were backed up.
             */
            m_finishedTableScan = true;
            m_iterator.reset(m_backedUpTuples.get()->makeIterator());

        } else {
            /*
             * No more tuples in the temp table and had previously finished the
             * persistent table.
             */
            if (m_tuplesRemaining > 0) {
#ifdef DEBUG
                throwFatalException("serializeMore(): tuple count > 0 after streaming:\n"
                                    "Table name: %s\n"
                                    "Table type: %s\n"
                                    "Original tuple count: %jd\n"
                                    "Active tuple count: %jd\n"
                                    "Remaining tuple count: %jd\n"
                                    "Compacted block count: %jd\n"
                                    "Dirty insert count: %jd\n"
                                    "Dirty update count: %jd\n"
                                    "Partition column: %d\n",
                                    m_table.name().c_str(),
                                    m_table.tableType().c_str(),
                                    (intmax_t)m_totalTuples,
                                    (intmax_t)m_table.activeTupleCount(),
                                    (intmax_t)m_tuplesRemaining,
                                    (intmax_t)m_blocksCompacted,
                                    (intmax_t)m_inserts,
                                    (intmax_t)m_updates,
                                    m_table.partitionColumn());
#else
                char message[1024 * 16];
                snprintf(message, 1024 * 16,
                        "serializeMore(): tuple count > 0 after streaming:\n"
                        "Table name: %s\n"
                        "Table type: %s\n"
                        "Original tuple count: %jd\n"
                        "Active tuple count: %jd\n"
                        "Remaining tuple count: %jd\n"
                        "Compacted block count: %jd\n"
                        "Dirty insert count: %jd\n"
                        "Dirty update count: %jd\n"
                        "Partition column: %d\n",
                        m_table.name().c_str(),
                        m_table.tableType().c_str(),
                        (intmax_t)m_totalTuples,
                        (intmax_t)m_table.activeTupleCount(),
                        (intmax_t)m_tuplesRemaining,
                        (intmax_t)m_blocksCompacted,
                        (intmax_t)m_inserts,
                        (intmax_t)m_updates,
                        m_table.partitionColumn());
             LogManager::getThreadLogger(LOGGERID_HOST)->log(LOGLEVEL_ERROR, message);
#endif
            }
            // -1 is used for tests when we don't bother counting. Need to force it to 0 here.
            if (m_tuplesRemaining < 0)  {
                m_tuplesRemaining = 0;
            }
        }

        // All tuples serialized, bail
        if (m_tuplesRemaining == 0) {
            /*
             * CAUTION: m_iterator->next() is NOT side-effect free!!! It also
             * returns the block back to the table if the call causes it to go
             * over the boundary of used tuples. In case it actually returned
             * the very last tuple in the table last time it's called, the block
             * is still hanging around. So we need to call it again to return
             * the block here.
             */
            if (hasMore) {
                hasMore = m_iterator->next(tuple);
                if (hasMore) {
                    assert(false);
                }
            }
            yield = true;
        }
    }
    // end tuple processing while loop

    // Need to close the output streams and insert row counts.
    outputStreams.close();

    m_serializationBatches++;

    // Handle the sentinel value of -1 which is passed in from tests that don't
    // care about the active tuple count. Return max int as if there are always
    // tuples remaining (until the counter is forced to zero when done).
    if (m_tuplesRemaining < 0) {
        return std::numeric_limits<int32_t>::max();
    }

    // Done when the table scan is finished and iteration is complete.
    return m_tuplesRemaining;
}
예제 #4
0
/**
 * Exercise the multi-COW.
 */
TEST_F(CopyOnWriteTest, MultiStreamTest) {

    // Constants
    const int32_t npartitions = 7;
    const int tupleCount = TUPLE_COUNT;

    DefaultTupleSerializer serializer;

    initTable(true);
    addRandomUniqueTuples(m_table, tupleCount);

    MultiStreamTestTool tool(*m_table, npartitions);

    for (size_t iteration = 0; iteration < NUM_REPETITIONS; iteration++) {

        // The last repetition does the delete after streaming.
        bool doDelete = (iteration == NUM_REPETITIONS - 1);

        tool.iterate();

        int totalInserted = 0;              // Total tuple counter.
        boost::scoped_ptr<char> buffers[npartitions];   // Stream buffers.
        std::vector<std::string> strings(npartitions);  // Range strings.
        TupleSet expected[npartitions]; // Expected tuple values by partition.
        TupleSet actual[npartitions];   // Actual tuple values by partition.
        int totalSkipped = 0;

        // Prepare streams by generating ranges and range strings based on
        // the desired number of partitions/predicates.
        // Since integer hashes use a simple modulus we just need to provide
        // the partition number for the range.
        // Also prepare a buffer for each stream.
        // Skip one partition to make it interesting.
        int32_t skippedPartition = npartitions / 2;
        for (int32_t i = 0; i < npartitions; i++) {
            buffers[i].reset(new char[BUFFER_SIZE]);
            if (i != skippedPartition) {
                strings[i] = tool.generatePredicateString(i);
            }
            else {
                strings[i] = tool.generatePredicateString(-1);
            }
        }

        char buffer[1024 * 256];
        ReferenceSerializeOutput output(buffer, 1024 * 256);
        output.writeByte((int8_t)(doDelete ? 1 : 0));
        output.writeInt(npartitions);
        for (std::vector<std::string>::iterator i = strings.begin(); i != strings.end(); i++) {
            output.writeTextString(*i);
        }

        tool.context("precalculate");

        // Map original tuples to expected partitions.
        voltdb::TableIterator& iterator = m_table->iterator();
        int partCol = m_table->partitionColumn();
        TableTuple tuple(m_table->schema());
        while (iterator.next(tuple)) {
            int64_t value = *reinterpret_cast<int64_t*>(tuple.address() + 1);
            int32_t ipart = (int32_t)(ValuePeeker::peekAsRawInt64(tuple.getNValue(partCol)) % npartitions);
            if (ipart != skippedPartition) {
                bool inserted = expected[ipart].insert(value).second;
                if (!inserted) {
                    int32_t primaryKey = ValuePeeker::peekAsInteger(tuple.getNValue(0));
                    tool.error("Duplicate primary key %d iteration=%lu", primaryKey, iteration);
                }
                ASSERT_TRUE(inserted);
            }
            else {
                totalSkipped++;
            }
        }

        tool.context("activate");

        ReferenceSerializeInput input(buffer, output.position());
        bool alreadyActivated = m_table->activateStream(serializer, TABLE_STREAM_SNAPSHOT, 0, m_tableId, input);
        if (alreadyActivated) {
            tool.error("COW was previously activated");
        }
        ASSERT_FALSE(alreadyActivated);

        int64_t remaining = tupleCount;
        while (remaining > 0) {

            // Prepare output streams and their buffers.
            TupleOutputStreamProcessor outputStreams;
            for (int32_t i = 0; i < npartitions; i++) {
                outputStreams.add((void*)buffers[i].get(), BUFFER_SIZE);
            }

            std::vector<int> retPositions;
            remaining = m_table->streamMore(outputStreams, retPositions);
            if (remaining >= 0) {
                ASSERT_EQ(outputStreams.size(), retPositions.size());
            }

            // Per-predicate iterators.
            TupleOutputStreamProcessor::iterator outputStream = outputStreams.begin();

            // Record the final result of streaming to each partition/predicate.
            for (size_t ipart = 0; ipart < npartitions; ipart++) {

                tool.context("serialize: partition=%lu remaining=%lld", ipart, remaining);

                const int serialized = static_cast<int>(outputStream->position());
                if (serialized > 0) {
                    // Skip partition id, row count and first tuple length.
                    int ibuf = sizeof(int32_t) * 3;
                    while (ibuf < (serialized - sizeof(int32_t))) {
                        int32_t values[2];
                        values[0] = ntohl(*reinterpret_cast<const int32_t*>(buffers[ipart].get()+ibuf));
                        values[1] = ntohl(*reinterpret_cast<const int32_t*>(buffers[ipart].get()+ibuf+4));
                        int64_t value = *reinterpret_cast<int64_t*>(values);
                        const bool inserted = actual[ipart].insert(value).second;
                        if (!inserted) {
                            tool.valueError(values, "Buffer duplicate: ipart=%lu totalInserted=%d ibuf=%d",
                                            ipart, totalInserted, ibuf);
                        }
                        ASSERT_TRUE(inserted);

                        totalInserted++;

                        // Account for tuple data and second tuple length.
                        ibuf += static_cast<int>(m_tupleWidth + sizeof(int32_t));
                    }
                }

                // Mozy along to the next predicate/partition.
                // Do a silly cross-check that the iterator doesn't end prematurely.
                ++outputStream;
                ASSERT_TRUE(ipart == npartitions - 1 || outputStream != outputStreams.end());
            }

            // Mutate the table.
            if (!doDelete) {
                for (size_t imutation = 0; imutation < NUM_MUTATIONS; imutation++) {
                    doRandomTableMutation(m_table);
                }
            }
        }

        // Summarize partitions with incorrect tuple counts.
        for (size_t ipart = 0; ipart < npartitions; ipart++) {
            tool.context("check size: partition=%lu", ipart);
            if (expected[ipart].size() != actual[ipart].size()) {
                tool.error("Size mismatch: expected=%lu actual=%lu",
                           expected[ipart].size(), actual[ipart].size());
            }
        }

        // Summarize partitions where expected and actual aren't equal.
        for (size_t ipart = 0; ipart < npartitions; ipart++) {
            tool.context("check equality: partition=%lu", ipart);
            if (expected[ipart] != actual[ipart]) {
                tool.error("Not equal");
            }
        }

        // Look for tuples that are missing from partitions.
        for (size_t ipart = 0; ipart < npartitions; ipart++) {
            tool.context("missing: partition=%lu", ipart);
            tool.diff(expected[ipart], actual[ipart]);
        }

        // Look for extra tuples that don't belong in partitions.
        for (size_t ipart = 0; ipart < npartitions; ipart++) {
            tool.context("extra: partition=%lu", ipart);
            tool.diff(actual[ipart], expected[ipart]);
        }

        // Check tuple diff for each predicate/partition.
        for (size_t ipart = 0; ipart < npartitions; ipart++) {
            tool.context("check equality: partition=%lu", ipart);
            ASSERT_EQ(expected[ipart].size(), actual[ipart].size());
            ASSERT_TRUE(expected[ipart] == actual[ipart]);
        }

        // Check for dirty tuples.
        tool.context("check dirty");
        int numTuples = 0;
        iterator = m_table->iterator();
        while (iterator.next(tuple)) {
            if (tuple.isDirty()) {
                tool.error("Found tuple %d is active and dirty at end of COW",
                           ValuePeeker::peekAsInteger(tuple.getNValue(0)));
            }
            numTuples++;
            ASSERT_FALSE(tuple.isDirty());
        }

        // If deleting check the tuples remaining in the table.
        if (doDelete) {
            ASSERT_EQ(numTuples, totalSkipped);
        }
        else {
            ASSERT_EQ(numTuples, tupleCount + (m_tuplesInserted - m_tuplesDeleted));
        }
        ASSERT_EQ(tool.nerrors, 0);
    }
}