/** * Mandatory TableStreamContext override. */ int64_t RecoveryContext::handleStreamMore(TupleOutputStreamProcessor &outputStreams, std::vector<int> &retPositions) { if (outputStreams.size() != 1) { throwFatalException("RecoveryContext::handleStreamMore: Expect 1 output stream " "for recovery, received %ld", outputStreams.size()); } /* * Table ids don't change during recovery because * catalog changes are not allowed. */ bool hasMore = nextMessage(&outputStreams[0]); // Non-zero if some tuples remain, we're just not sure how many. int64_t remaining = (hasMore ? 1 : 0); for (size_t i = 0; i < outputStreams.size(); i++) { retPositions.push_back((int)outputStreams.at(i).position()); } return remaining; }
int64_t CopyOnWriteContext::handleStreamMore(TupleOutputStreamProcessor &outputStreams, std::vector<int> &retPositions) { int64_t remaining = serializeMore(outputStreams); // If more was streamed copy current positions for return. // Can this copy be avoided? for (size_t i = 0; i < outputStreams.size(); i++) { retPositions.push_back((int)outputStreams.at(i).position()); } return remaining; }
/** * Exercise the multi-COW. */ TEST_F(CopyOnWriteTest, MultiStreamTest) { // Constants const int32_t npartitions = 7; const int tupleCount = TUPLE_COUNT; DefaultTupleSerializer serializer; initTable(true); addRandomUniqueTuples(m_table, tupleCount); MultiStreamTestTool tool(*m_table, npartitions); for (size_t iteration = 0; iteration < NUM_REPETITIONS; iteration++) { // The last repetition does the delete after streaming. bool doDelete = (iteration == NUM_REPETITIONS - 1); tool.iterate(); int totalInserted = 0; // Total tuple counter. boost::scoped_ptr<char> buffers[npartitions]; // Stream buffers. std::vector<std::string> strings(npartitions); // Range strings. TupleSet expected[npartitions]; // Expected tuple values by partition. TupleSet actual[npartitions]; // Actual tuple values by partition. int totalSkipped = 0; // Prepare streams by generating ranges and range strings based on // the desired number of partitions/predicates. // Since integer hashes use a simple modulus we just need to provide // the partition number for the range. // Also prepare a buffer for each stream. // Skip one partition to make it interesting. int32_t skippedPartition = npartitions / 2; for (int32_t i = 0; i < npartitions; i++) { buffers[i].reset(new char[BUFFER_SIZE]); if (i != skippedPartition) { strings[i] = tool.generatePredicateString(i); } else { strings[i] = tool.generatePredicateString(-1); } } char buffer[1024 * 256]; ReferenceSerializeOutput output(buffer, 1024 * 256); output.writeByte((int8_t)(doDelete ? 1 : 0)); output.writeInt(npartitions); for (std::vector<std::string>::iterator i = strings.begin(); i != strings.end(); i++) { output.writeTextString(*i); } tool.context("precalculate"); // Map original tuples to expected partitions. voltdb::TableIterator& iterator = m_table->iterator(); int partCol = m_table->partitionColumn(); TableTuple tuple(m_table->schema()); while (iterator.next(tuple)) { int64_t value = *reinterpret_cast<int64_t*>(tuple.address() + 1); int32_t ipart = (int32_t)(ValuePeeker::peekAsRawInt64(tuple.getNValue(partCol)) % npartitions); if (ipart != skippedPartition) { bool inserted = expected[ipart].insert(value).second; if (!inserted) { int32_t primaryKey = ValuePeeker::peekAsInteger(tuple.getNValue(0)); tool.error("Duplicate primary key %d iteration=%lu", primaryKey, iteration); } ASSERT_TRUE(inserted); } else { totalSkipped++; } } tool.context("activate"); ReferenceSerializeInput input(buffer, output.position()); bool alreadyActivated = m_table->activateStream(serializer, TABLE_STREAM_SNAPSHOT, 0, m_tableId, input); if (alreadyActivated) { tool.error("COW was previously activated"); } ASSERT_FALSE(alreadyActivated); int64_t remaining = tupleCount; while (remaining > 0) { // Prepare output streams and their buffers. TupleOutputStreamProcessor outputStreams; for (int32_t i = 0; i < npartitions; i++) { outputStreams.add((void*)buffers[i].get(), BUFFER_SIZE); } std::vector<int> retPositions; remaining = m_table->streamMore(outputStreams, retPositions); if (remaining >= 0) { ASSERT_EQ(outputStreams.size(), retPositions.size()); } // Per-predicate iterators. TupleOutputStreamProcessor::iterator outputStream = outputStreams.begin(); // Record the final result of streaming to each partition/predicate. for (size_t ipart = 0; ipart < npartitions; ipart++) { tool.context("serialize: partition=%lu remaining=%lld", ipart, remaining); const int serialized = static_cast<int>(outputStream->position()); if (serialized > 0) { // Skip partition id, row count and first tuple length. int ibuf = sizeof(int32_t) * 3; while (ibuf < (serialized - sizeof(int32_t))) { int32_t values[2]; values[0] = ntohl(*reinterpret_cast<const int32_t*>(buffers[ipart].get()+ibuf)); values[1] = ntohl(*reinterpret_cast<const int32_t*>(buffers[ipart].get()+ibuf+4)); int64_t value = *reinterpret_cast<int64_t*>(values); const bool inserted = actual[ipart].insert(value).second; if (!inserted) { tool.valueError(values, "Buffer duplicate: ipart=%lu totalInserted=%d ibuf=%d", ipart, totalInserted, ibuf); } ASSERT_TRUE(inserted); totalInserted++; // Account for tuple data and second tuple length. ibuf += static_cast<int>(m_tupleWidth + sizeof(int32_t)); } } // Mozy along to the next predicate/partition. // Do a silly cross-check that the iterator doesn't end prematurely. ++outputStream; ASSERT_TRUE(ipart == npartitions - 1 || outputStream != outputStreams.end()); } // Mutate the table. if (!doDelete) { for (size_t imutation = 0; imutation < NUM_MUTATIONS; imutation++) { doRandomTableMutation(m_table); } } } // Summarize partitions with incorrect tuple counts. for (size_t ipart = 0; ipart < npartitions; ipart++) { tool.context("check size: partition=%lu", ipart); if (expected[ipart].size() != actual[ipart].size()) { tool.error("Size mismatch: expected=%lu actual=%lu", expected[ipart].size(), actual[ipart].size()); } } // Summarize partitions where expected and actual aren't equal. for (size_t ipart = 0; ipart < npartitions; ipart++) { tool.context("check equality: partition=%lu", ipart); if (expected[ipart] != actual[ipart]) { tool.error("Not equal"); } } // Look for tuples that are missing from partitions. for (size_t ipart = 0; ipart < npartitions; ipart++) { tool.context("missing: partition=%lu", ipart); tool.diff(expected[ipart], actual[ipart]); } // Look for extra tuples that don't belong in partitions. for (size_t ipart = 0; ipart < npartitions; ipart++) { tool.context("extra: partition=%lu", ipart); tool.diff(actual[ipart], expected[ipart]); } // Check tuple diff for each predicate/partition. for (size_t ipart = 0; ipart < npartitions; ipart++) { tool.context("check equality: partition=%lu", ipart); ASSERT_EQ(expected[ipart].size(), actual[ipart].size()); ASSERT_TRUE(expected[ipart] == actual[ipart]); } // Check for dirty tuples. tool.context("check dirty"); int numTuples = 0; iterator = m_table->iterator(); while (iterator.next(tuple)) { if (tuple.isDirty()) { tool.error("Found tuple %d is active and dirty at end of COW", ValuePeeker::peekAsInteger(tuple.getNValue(0))); } numTuples++; ASSERT_FALSE(tuple.isDirty()); } // If deleting check the tuples remaining in the table. if (doDelete) { ASSERT_EQ(numTuples, totalSkipped); } else { ASSERT_EQ(numTuples, tupleCount + (m_tuplesInserted - m_tuplesDeleted)); } ASSERT_EQ(tool.nerrors, 0); } }