/** * Parse and save predicates. */ void ElasticContext::updatePredicates(const std::vector<std::string> &predicateStrings) { //If there is already a predicate and thus presumably an index, make sure the request is a subset of what exists //That should always be the case, but wrong answers will follow if we are wrong if (m_predicates.size() > 0 && dynamic_cast<HashRangeExpression*>(&m_predicates[0]) != NULL && predicateStrings.size() > 0) { PlannerDomRoot domRoot(predicateStrings[0].c_str()); if (!domRoot.isNull()) { PlannerDomValue predicateObject = domRoot.rootObject(); HashRangeExpression *expression = dynamic_cast<HashRangeExpression*>(&m_predicates[0]); if (predicateObject.hasKey("predicateExpression")) { PlannerDomValue predicateExpression = predicateObject.valueForKey("predicateExpression"); PlannerDomValue rangesArray = predicateExpression.valueForKey("RANGES"); for (int ii = 0; ii < rangesArray.arrayLen(); ii++) { PlannerDomValue arrayObject = rangesArray.valueAtIndex(ii); PlannerDomValue rangeStartValue = arrayObject.valueForKey("RANGE_START"); PlannerDomValue rangeEndValue = arrayObject.valueForKey("RANGE_END"); if (!expression->binarySearch(rangeStartValue.asInt()).isTrue()) { throwFatalException("ElasticContext activate failed because a context already existed with conflicting ranges, conflicting range start is %d", rangeStartValue.asInt()); } if (!expression->binarySearch(rangeEndValue.asInt()).isTrue()) { throwFatalException("ElasticContext activate failed because a context already existed with conflicting ranges, conflicting range end is %d", rangeStartValue.asInt()); } } } } } m_predicateStrings = predicateStrings; // retain for possible clone after TRUNCATE TABLE TableStreamerContext::updatePredicates(predicateStrings); }
/** * Write a tuple to the output streams. * Expects buffer space was already checked. * Returns true when the caller should yield to allow other work to proceed. */ bool TupleOutputStreamProcessor::writeRow(TupleSerializer &tupleSerializer, TableTuple &tuple, bool *deleteRow) { if (m_table == NULL) { throwFatalException("TupleOutputStreamProcessor::writeRow() was called before open()."); } // Predicates, if supplied, are one per output stream (previously asserted). StreamPredicateList::iterator ipredicate; std::vector<bool>::iterator iDeleteFlag; assert(m_predicates != NULL); if (!m_predicates->empty()) { ipredicate = m_predicates->begin(); iDeleteFlag = m_predicateDeletes->begin(); } bool yield = false; for (TupleOutputStreamProcessor::iterator iter = begin(); iter != end(); ++iter) { // Get approval from corresponding output stream predicate, if provided. bool accepted = true; if (!m_predicates->empty()) { if (!boost::is_null(ipredicate)) { accepted = ipredicate->eval(&tuple).isTrue(); } // Keep walking through predicates in lock-step with the streams. // As with first() we expect a predicate to be available for each and every stream. // It was already checked, so just assert here. assert(ipredicate != m_predicates->end()); if (accepted && deleteRow != NULL) { (*deleteRow) = (*deleteRow) || *iDeleteFlag; } ++ipredicate; ++iDeleteFlag; } if (accepted) { if (!iter->canFit(m_maxTupleLength)) { throwFatalException( "TupleOutputStreamProcessor::writeRow() failed because buffer has no space."); } iter->writeRow(tupleSerializer, tuple); // Check if we'll need to yield after handling this row. if (!yield) { // Yield when the buffer is not capable of handling another tuple // or when the total bytes serialized threshold is exceeded. yield = ( !iter->canFit(m_maxTupleLength) || iter->getTotalBytesSerialized() > m_bytesSerializedThreshold); } } } return yield; }
AntiCacheDB::AntiCacheDB(ExecutorContext *ctx, std::string db_dir, long blockSize) : m_executorContext(ctx), m_dbDir(db_dir), m_blockSize(blockSize), m_nextBlockId(0) { u_int32_t env_flags = DB_CREATE | // Create the environment if it does not exist // DB_AUTO_COMMIT | // Immediately commit every operation DB_INIT_MPOOL | // Initialize the memory pool (in-memory cache) // DB_TXN_NOSYNC | // Don't flush to disk every time, we will do that explicitly // DB_INIT_LOCK | // concurrent data store DB_PRIVATE | DB_THREAD | // allow multiple threads // DB_INIT_TXN | DB_DIRECT_DB; // Use O_DIRECT try { // allocate and initialize Berkeley DB database env m_dbEnv = new DbEnv(0); m_dbEnv->open(m_dbDir.c_str(), env_flags, 0); // allocate and initialize new Berkeley DB instance m_db = new Db(m_dbEnv, 0); m_db->open(NULL, ANTICACHE_DB_NAME, NULL, DB_HASH, DB_CREATE, 0); } catch (DbException &e) { VOLT_ERROR("Anti-Cache initialization error: %s", e.what()); VOLT_ERROR("Failed to initialize anti-cache database in directory %s", db_dir.c_str()); throwFatalException("Failed to initialize anti-cache database in directory %s: %s", db_dir.c_str(), e.what()); } }
/* Insert a tuple into the evicted table but don't create any UNDO action. Return the address of the newly inserted tuple. */ const void* NVMEvictedTable::insertNVMEvictedTuple(TableTuple &source) { // not null checks at first if (!checkNulls(source)) { throwFatalException("Failed to insert tuple into table %s for undo:" " null constraint violation\n%s\n", m_name.c_str(), source.debugNoHeader().c_str()); } // First get the next free tuple This will either give us one from // the free slot list, or grab a tuple at the end of our chunk of // memory nextFreeTuple(&m_tmpTarget1); m_tupleCount++; // Then copy the source into the target //m_tmpTarget1.copyForPersistentInsert(source); m_tmpTarget1.copyForPersistentInsert(source, m_pool); m_tmpTarget1.setDeletedFalse(); // Make sure this tuple is marked as evicted, so that we know it is an // evicted tuple as we iterate through the index m_tmpTarget1.setNVMEvictedTrue(); assert(m_tmpTarget1.isNVMEvicted()); return m_tmpTarget1.address(); }
Table* AntiCacheEvictionManager::evictBlock(PersistentTable *table, long blockSize, int numBlocks) { int32_t lastTuplesEvicted = table->getTuplesEvicted(); int32_t lastBlocksEvicted = table->getBlocksEvicted(); int64_t lastBytesEvicted = table->getBytesEvicted(); if (table->evictBlockToDisk(blockSize, numBlocks) == false) { throwFatalException("Failed to evict tuples from table '%s'", table->name().c_str()); } int32_t tuplesEvicted = table->getTuplesEvicted() - lastTuplesEvicted; int32_t blocksEvicted = table->getBlocksEvicted() - lastBlocksEvicted; int64_t bytesEvicted = table->getBytesEvicted() - lastBytesEvicted; m_evictResultTable->deleteAllTuples(false); TableTuple tuple = m_evictResultTable->tempTuple(); int idx = 0; tuple.setNValue(idx++, ValueFactory::getStringValue(table->name())); tuple.setNValue(idx++, ValueFactory::getIntegerValue(static_cast<int32_t>(tuplesEvicted))); tuple.setNValue(idx++, ValueFactory::getIntegerValue(static_cast<int32_t>(blocksEvicted))); tuple.setNValue(idx++, ValueFactory::getBigIntValue(static_cast<int32_t>(bytesEvicted))); m_evictResultTable->insertTuple(tuple); return (m_evictResultTable); }
CopyOnWriteContext::CopyOnWriteContext( PersistentTable &table, TupleSerializer &serializer, int32_t partitionId, const std::vector<std::string> &predicateStrings, int64_t totalTuples, bool doDelete) : m_table(table), m_backedUpTuples(TableFactory::getCopiedTempTable(table.databaseId(), "COW of " + table.name(), &table, NULL)), m_serializer(serializer), m_pool(2097152, 320), m_blocks(m_table.m_data), m_iterator(new CopyOnWriteIterator(&table, m_blocks.begin(), m_blocks.end())), m_maxTupleLength(serializer.getMaxSerializedTupleSize(table.schema())), m_tuple(table.schema()), m_finishedTableScan(false), m_partitionId(partitionId), m_totalTuples(totalTuples), m_tuplesRemaining(totalTuples), m_blocksCompacted(0), m_serializationBatches(0), m_inserts(0), m_updates(0), m_doDelete(doDelete) { // Parse predicate strings. The factory type determines the kind of // predicates that get generated. // Throws an exception to be handled by caller on errors. std::ostringstream errmsg; if (!m_predicates.parseStrings(predicateStrings, errmsg)) { throwFatalException("CopyOnWriteContext() failed to parse predicate strings."); } }
TupleBlock::TupleBlock(Table *table, TBBucketPtr bucket) : m_references(0), m_table(table), m_storage(NULL), m_tupleLength(table->m_tupleLength), m_tuplesPerBlock(table->m_tuplesPerBlock), m_activeTuples(0), m_nextFreeTuple(0), m_lastCompactionOffset(0), m_tuplesPerBlockDivNumBuckets(m_tuplesPerBlock / static_cast<double>(TUPLE_BLOCK_NUM_BUCKETS)), m_bucketIndex(0), m_bucket(bucket) { #ifdef MEMCHECK m_storage = new char[table->m_tableAllocationSize]; #else #ifdef USE_MMAP m_storage = static_cast<char*>(::mmap( 0, table->m_tableAllocationSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0 )); if (m_storage == MAP_FAILED) { std::cout << strerror( errno ) << std::endl; throwFatalException("Failed mmap"); } #else //m_storage = static_cast<char*>(ThreadLocalPool::getExact(m_table->m_tableAllocationSize)->malloc()); m_storage = new char[table->m_tableAllocationSize]; #endif #endif tupleBlocksAllocated++; }
void LimitPlanNode::setLimitExpression(AbstractExpression* expression) { if (limitExpression && limitExpression != expression) { throwFatalException("limitExpression initialized twice in LimitPlanNode"); delete limitExpression; } this->limitExpression = expression; }
boost::shared_ptr<boost::pool<voltdb_pool_allocator_new_delete> > ThreadLocalPool::get(std::size_t size) { size_t alloc_size = getAllocationSizeForObject(size); if (alloc_size == 0) { throwFatalException("Attempted to allocate an object then the 1 meg limit. Requested size was %Zu", size); } return getExact(alloc_size); }
AntiCacheDB::~AntiCacheDB() { // NOTE: You have to close the database first before closing the environment try { m_db->close(0); delete m_db; } catch (DbException &e) { VOLT_ERROR("Anti-Cache database closing error: %s", e.what()); throwFatalException("Failed to close anti-cache database: %s", e.what()); } try { m_dbEnv->close(0); delete m_dbEnv; } catch (DbException &e) { VOLT_ERROR("Anti-Cache environment closing error: %s", e.what()); throwFatalException("Failed to close anti-cache database environment: %s", e.what()); } }
bool TempTable::updateTupleWithSpecificIndexes(TableTuple &targetTupleToUpdate, TableTuple &sourceTupleWithNewValues, std::vector<TableIndex*> const &indexesToUpdate, bool) { throwFatalException("TempTable does not support update"); // Some day maybe, if we find a use case: // Copy the source tuple into the target // targetTupleToUpdate.copy(sourceTupleWithNewValues); }
bool CopyOnWriteContext::serializeMore(ReferenceSerializeOutput *out) { boost::crc_32_type crc; boost::crc_32_type partitionIdCRC; out->writeInt(m_partitionId); partitionIdCRC.process_bytes(out->data() + out->position() - 4, 4); out->writeInt(partitionIdCRC.checksum()); const std::size_t crcPosition = out->reserveBytes(4);//For CRC int rowsSerialized = 0; TableTuple tuple(m_table->schema()); if (out->remaining() < (m_maxTupleLength + sizeof(int32_t))) { throwFatalException("Serialize more should never be called " "a 2nd time after return indicating there is no more data"); // out->writeInt(0); // assert(false); // return false; } while (out->remaining() >= (m_maxTupleLength + sizeof(int32_t))) { const bool hadMore = m_iterator->next(tuple); /** * After this finishes scanning the persistent table switch to scanning * the temp table with the tuples that were backed up */ if (!hadMore) { if (m_finishedTableScan) { out->writeInt(rowsSerialized); crc.process_bytes(out->data() + out->position() - 4, 4); out->writeIntAt(crcPosition, crc.checksum()); return false; } else { m_finishedTableScan = true; m_iterator.reset(new TableIterator(m_backedUpTuples.get())); continue; } } const std::size_t tupleStartPosition = out->position(); m_serializer->serializeTo( tuple, out); const std::size_t tupleEndPosition = out->position(); crc.process_block(out->data() + tupleStartPosition, out->data() + tupleEndPosition); m_tuplesSerialized++; rowsSerialized++; } /* * Number of rows serialized is not known until the end. Written at the end so it * can be included in the CRC. It will be moved back to the front * to match the table serialization format when chunk is read later. */ out->writeInt(rowsSerialized); crc.process_bytes(out->data() + out->position() - 4, 4); out->writeIntAt(crcPosition, crc.checksum()); return true; }
std::string getTypeName(voltdb::ConstraintType type) { std::string ret; switch (type) { // ------------------------------------------------------------------ // ForeignKey // ------------------------------------------------------------------ case (voltdb::CONSTRAINT_TYPE_FOREIGN_KEY): ret = "ForeignKey"; break; // ------------------------------------------------------------------ // Main // ------------------------------------------------------------------ case (voltdb::CONSTRAINT_TYPE_MAIN): ret = "Main"; break; // ------------------------------------------------------------------ // Unique // ------------------------------------------------------------------ case (voltdb::CONSTRAINT_TYPE_UNIQUE): ret = "Unique"; break; // ------------------------------------------------------------------ // Check // ------------------------------------------------------------------ case (voltdb::CONSTRAINT_TYPE_CHECK): ret = "Check"; break; // ------------------------------------------------------------------ // PrimaryKey // ------------------------------------------------------------------ case (voltdb::CONSTRAINT_TYPE_PRIMARY_KEY): ret = "PrimaryKey"; break; // ------------------------------------------------------------------ // PrimaryKey // ------------------------------------------------------------------ case (voltdb::CONSTRAINT_TYPE_NOT_NULL): ret = "NotNull"; break; // ------------------------------------------------------------------ // Partitioning // ------------------------------------------------------------------ case (voltdb::CONSTRAINT_TYPE_PARTITIONING): ret = "Partitioning"; break; // ------------------------------------------------------------------ // UNKNOWN // ------------------------------------------------------------------ default: { throwFatalException ( "Invalid Constraint type '%d'", type); } } return (ret); }
/** * Parse and save predicates. */ void TableStreamerContext::updatePredicates(const std::vector<std::string> &predicateStrings) { // Parse predicate strings. The factory type determines the kind of // predicates that get generated. // Throws an exception to be handled by caller on errors. std::ostringstream errmsg; m_predicates.clear(); if (!m_predicates.parseStrings(predicateStrings, errmsg, m_predicateDeleteFlags)) { throwFatalException("TableStreamerContext() failed to parse predicate strings."); } }
void ThreadLocalPool::freeExactSizedObject(std::size_t sz, void* object) { PoolsByObjectSize& pools = *(static_cast< PairTypePtr >(pthread_getspecific(m_key))->second); PoolsByObjectSize::iterator iter = pools.find(sz); if (iter == pools.end()) { throwFatalException( "Failed to locate an allocated object of size %ld to free it.", static_cast<long>(sz)); } PoolForObjectSize* pool = iter->second.get(); pool->free(object); }
ElasticContext::ElasticContext(PersistentTable &table, PersistentTableSurgeon &surgeon, int32_t partitionId, TupleSerializer &serializer, const std::vector<std::string> &predicateStrings, size_t nTuplesPerCall) : TableStreamerContext(table, surgeon, partitionId, serializer, predicateStrings), m_nTuplesPerCall(nTuplesPerCall), m_indexActive(false) { if (predicateStrings.size() != 1) { throwFatalException("ElasticContext::ElasticContext() expects a single predicate."); } }
bool getRandomTuple(const voltdb::Table* table, voltdb::TableTuple &out) { voltdb::Table* table2 = const_cast<voltdb::Table*>(table); int cnt = (int)table->activeTupleCount(); if (cnt > 0) { int idx = (rand() % cnt); voltdb::TableIterator it = table2->tableIterator(); while (it.next(out)) { if (idx-- == 0) { return true; } } throwFatalException("Unable to retrieve a random tuple." "Iterated entire table below active tuple count but ran out of tuples"); } return false; }
/** Start serializing. */ void TupleOutputStreamProcessor::open(PersistentTable &table, std::size_t maxTupleLength, int32_t partitionId, StreamPredicateList &predicates) { m_table = &table; m_maxTupleLength = maxTupleLength; // It must be either one predicate per output stream or none at all. bool havePredicates = !predicates.empty(); if (havePredicates && predicates.size() != size()) { throwFatalException("serializeMore() expects either no predicates or one per output stream."); } m_predicates = &predicates; for (TupleOutputStreamProcessor::iterator iter = begin(); iter != end(); ++iter) { iter->startRows(partitionId); } }
void ThreadLocalPool::freeRelocatable(Sized* sized) { // use the cached size to find the right pool. int32_t alloc_size = getAllocationSizeForObject(sized->m_size); CompactingStringStorage& poolMap = getStringPoolMap(); CompactingStringStorage::iterator iter = poolMap.find(alloc_size); if (iter == poolMap.end()) { // If the pool can not be found, there could not have been a prior // allocation for any object of this size, so either the caller // passed a bogus data pointer that was never allocated here OR // the data pointer's size header has been corrupted. throwFatalException("Attempted to free an object of an unrecognized size. Requested size was %d", alloc_size); } // Free the raw allocation from the found pool. iter->second->free(sized); }
/** * Mandatory TableStreamContext override. */ int64_t RecoveryContext::handleStreamMore(TupleOutputStreamProcessor &outputStreams, std::vector<int> &retPositions) { if (outputStreams.size() != 1) { throwFatalException("RecoveryContext::handleStreamMore: Expect 1 output stream " "for recovery, received %ld", outputStreams.size()); } /* * Table ids don't change during recovery because * catalog changes are not allowed. */ bool hasMore = nextMessage(&outputStreams[0]); // Non-zero if some tuples remain, we're just not sure how many. int64_t remaining = (hasMore ? 1 : 0); for (size_t i = 0; i < outputStreams.size(); i++) { retPositions.push_back((int)outputStreams.at(i).position()); } return remaining; }
TupleBlock::~TupleBlock() { /* tupleBlocksAllocated--; std::cout << "Destructing tuple block " << static_cast<void*>(this) << " with " << tupleBlocksAllocated << " left " << std::endl; */ #ifdef MEMCHECK delete []m_storage; #else #ifdef USE_MMAP if (::munmap( m_storage, m_table->m_tableAllocationSize) != 0) { std::cout << strerror( errno ) << std::endl; throwFatalException("Failed munmap"); } #else delete []m_storage; #endif #endif }
static int32_t getAllocationSizeForObject(int length) { static const int32_t NVALUE_LONG_OBJECT_LENGTHLENGTH = 4; static const int32_t MAX_ALLOCATION = ThreadLocalPool::POOLED_MAX_VALUE_LENGTH + NVALUE_LONG_OBJECT_LENGTHLENGTH + CompactingPool::FIXED_OVERHEAD_PER_ENTRY(); int length_to_fit = length + NVALUE_LONG_OBJECT_LENGTHLENGTH + CompactingPool::FIXED_OVERHEAD_PER_ENTRY(); // The -1 and repeated shifting and + 1 are part of the rounding algorithm // that produces the nearest power of 2 greater than or equal to the value. int target = length_to_fit - 1; target |= target >> 1; target |= target >> 2; target |= target >> 4; target |= target >> 8; target |= target >> 16; target++; // Try to shrink the target to "midway" down to the previous power of 2, // if the length fits. // Strictly speaking, a geometric mean (dividing the even power by sqrt(2)) // would give a more consistently proportional over-allocation for values // at slightly different scales, but the arithmetic mean (3/4 of the power) // is fast to calculate and close enough for our purposes. int threeQuartersTarget = target - (target>>2); if (length_to_fit < threeQuartersTarget) { target = threeQuartersTarget; } if (target <= MAX_ALLOCATION) { return target; } if (length_to_fit <= MAX_ALLOCATION) { return MAX_ALLOCATION; } throwFatalException("Attempted to allocate an object larger than the 1 MB limit. Requested size was %d", length); }
bool MaterializedViewMetadata::findExistingTuple(TableTuple &oldTuple, bool expected) { // find the key for this tuple (which is the group by columns) for (int i = 0; i < m_groupByColumnCount; i++) { m_searchKey.setNValue(i, oldTuple.getNValue(m_groupByColumns[i])); } // determine if the row exists (create the empty one if it doesn't) m_index->moveToKey(&m_searchKey); m_existingTuple = m_index->nextValueAtKey(); if (m_existingTuple.isNullTuple()) { if (expected) { std::string name = m_target->name(); throwFatalException("MaterializedViewMetadata for table %s went" " looking for a tuple in the view and" " expected to find it but didn't", name.c_str()); } return false; } else { return true; } }
voltdb::AbstractPlanNode* getEmptyPlanNode(voltdb::PlanNodeType type) { VOLT_TRACE("Creating an empty PlanNode of type '%s'", plannodeutil::getTypeName(type).c_str()); voltdb::AbstractPlanNode* ret = NULL; switch (type) { // ------------------------------------------------------------------ // SeqScan // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_SEQSCAN): ret = new voltdb::SeqScanPlanNode(); break; // ------------------------------------------------------------------ // IndexScan // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_INDEXSCAN): ret = new voltdb::IndexScanPlanNode(); break; // ------------------------------------------------------------------ // IndexCount // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_INDEXCOUNT): ret = new voltdb::IndexCountPlanNode(); break; // ------------------------------------------------------------------ // TableCount // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_TABLECOUNT): ret = new voltdb::TableCountPlanNode(); break; // ------------------------------------------------------------------ // NestLoop // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_NESTLOOP): ret = new voltdb::NestLoopPlanNode(); break; // ------------------------------------------------------------------ // NestLoopIndex // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_NESTLOOPINDEX): ret = new voltdb::NestLoopIndexPlanNode(); break; // ------------------------------------------------------------------ // Update // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_UPDATE): ret = new voltdb::UpdatePlanNode(); break; // ------------------------------------------------------------------ // Insert // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_INSERT): ret = new voltdb::InsertPlanNode(); break; // ------------------------------------------------------------------ // Delete // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_DELETE): ret = new voltdb::DeletePlanNode(); break; // ------------------------------------------------------------------ // Aggregate // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_HASHAGGREGATE): case (voltdb::PLAN_NODE_TYPE_AGGREGATE): ret = new voltdb::AggregatePlanNode(type); break; // ------------------------------------------------------------------ // Union // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_UNION): ret = new voltdb::UnionPlanNode(); break; // ------------------------------------------------------------------ // OrderBy // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_ORDERBY): ret = new voltdb::OrderByPlanNode(); break; // ------------------------------------------------------------------ // Projection // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_PROJECTION): ret = new voltdb::ProjectionPlanNode(); break; // ------------------------------------------------------------------ // Materialize // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_MATERIALIZE): ret = new voltdb::MaterializePlanNode(); break; // ------------------------------------------------------------------ // Send // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_SEND): ret = new voltdb::SendPlanNode(); break; // ------------------------------------------------------------------ // Limit // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_LIMIT): ret = new voltdb::LimitPlanNode(); break; // ------------------------------------------------------------------ // Distinct // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_DISTINCT): ret = new voltdb::DistinctPlanNode(); break; // ------------------------------------------------------------------ // Receive // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_RECEIVE): ret = new voltdb::ReceivePlanNode(); break; // ------------------------------------------------------------------ // UNKNOWN // ------------------------------------------------------------------ default: { throwFatalException("Invalid PlanNode type '%d'", type); } } //VOLT_TRACE("created plannode : %s ", typeid(*ret).name()); return (ret); }
/* * Serialize to multiple output streams. * Return remaining tuple count, 0 if done, or -1 on error. */ int64_t CopyOnWriteContext::serializeMore(TupleOutputStreamProcessor &outputStreams) { // Don't expect to be re-called after streaming all the tuples. if (m_tuplesRemaining == 0) { throwFatalException("serializeMore() was called again after streaming completed.") } // Need to initialize the output stream list. if (outputStreams.empty()) { throwFatalException("serializeMore() expects at least one output stream."); } outputStreams.open(m_table, m_maxTupleLength, m_partitionId, m_predicates); //=== Tuple processing loop TableTuple tuple(m_table.schema()); // Set to true to break out of the loop after the tuples dry up // or the byte count threshold is hit. bool yield = false; while (!yield) { // Next tuple? bool hasMore = m_iterator->next(tuple); if (hasMore) { // -1 is used as a sentinel value to disable counting for tests. if (m_tuplesRemaining > 0) { m_tuplesRemaining--; } /* * Write the tuple to all the output streams. * Done if any of the buffers filled up. * The returned copy count helps decide when to delete if m_doDelete is true. */ int32_t numCopiesMade = 0; yield = outputStreams.writeRow(m_serializer, tuple, numCopiesMade); /* * May want to delete tuple if processing the actual table. */ if (!m_finishedTableScan) { /* * If this is the table scan, check to see if the tuple is pending * delete and return the tuple if it iscop */ if (tuple.isPendingDelete()) { assert(!tuple.isPendingDeleteOnUndoRelease()); CopyOnWriteIterator *iter = static_cast<CopyOnWriteIterator*>(m_iterator.get()); //Save the extra lookup if possible m_table.deleteTupleStorage(tuple, iter->m_currentBlock); } /* * Delete a moved tuple? * This is used for Elastic rebalancing, which is wrapped in a transaction. * The delete for undo is generic enough to support this operation. */ else if (m_doDelete && numCopiesMade > 0) { m_table.deleteTupleForUndo(tuple.address(), true); } } } else if (!m_finishedTableScan) { /* * After scanning the persistent table switch to scanning the temp * table with the tuples that were backed up. */ m_finishedTableScan = true; m_iterator.reset(m_backedUpTuples.get()->makeIterator()); } else { /* * No more tuples in the temp table and had previously finished the * persistent table. */ if (m_tuplesRemaining > 0) { #ifdef DEBUG throwFatalException("serializeMore(): tuple count > 0 after streaming:\n" "Table name: %s\n" "Table type: %s\n" "Original tuple count: %jd\n" "Active tuple count: %jd\n" "Remaining tuple count: %jd\n" "Compacted block count: %jd\n" "Dirty insert count: %jd\n" "Dirty update count: %jd\n" "Partition column: %d\n", m_table.name().c_str(), m_table.tableType().c_str(), (intmax_t)m_totalTuples, (intmax_t)m_table.activeTupleCount(), (intmax_t)m_tuplesRemaining, (intmax_t)m_blocksCompacted, (intmax_t)m_inserts, (intmax_t)m_updates, m_table.partitionColumn()); #else char message[1024 * 16]; snprintf(message, 1024 * 16, "serializeMore(): tuple count > 0 after streaming:\n" "Table name: %s\n" "Table type: %s\n" "Original tuple count: %jd\n" "Active tuple count: %jd\n" "Remaining tuple count: %jd\n" "Compacted block count: %jd\n" "Dirty insert count: %jd\n" "Dirty update count: %jd\n" "Partition column: %d\n", m_table.name().c_str(), m_table.tableType().c_str(), (intmax_t)m_totalTuples, (intmax_t)m_table.activeTupleCount(), (intmax_t)m_tuplesRemaining, (intmax_t)m_blocksCompacted, (intmax_t)m_inserts, (intmax_t)m_updates, m_table.partitionColumn()); LogManager::getThreadLogger(LOGGERID_HOST)->log(LOGLEVEL_ERROR, message); #endif } // -1 is used for tests when we don't bother counting. Need to force it to 0 here. if (m_tuplesRemaining < 0) { m_tuplesRemaining = 0; } } // All tuples serialized, bail if (m_tuplesRemaining == 0) { /* * CAUTION: m_iterator->next() is NOT side-effect free!!! It also * returns the block back to the table if the call causes it to go * over the boundary of used tuples. In case it actually returned * the very last tuple in the table last time it's called, the block * is still hanging around. So we need to call it again to return * the block here. */ if (hasMore) { hasMore = m_iterator->next(tuple); if (hasMore) { assert(false); } } yield = true; } } // end tuple processing while loop // Need to close the output streams and insert row counts. outputStreams.close(); m_serializationBatches++; // Handle the sentinel value of -1 which is passed in from tests that don't // care about the active tuple count. Return max int as if there are always // tuples remaining (until the counter is forced to zero when done). if (m_tuplesRemaining < 0) { return std::numeric_limits<int32_t>::max(); } // Done when the table scan is finished and iteration is complete. return m_tuplesRemaining; }
bool TempTable::deleteTuple(TableTuple &, bool) { throwFatalException("TempTable does not support deleting individual tuples"); }
std::string getTypeName(voltdb::PlanNodeType type) { std::string ret; switch (type) { // ------------------------------------------------------------------ // SeqScan // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_SEQSCAN): ret = "SEQSCAN"; break; // ------------------------------------------------------------------ // IndexScan // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_INDEXSCAN): ret = "INDEXSCAN"; break; // ------------------------------------------------------------------ // IndexCount // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_INDEXCOUNT): ret = "INDEXCOUNT"; break; // ------------------------------------------------------------------ // TableScan // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_TABLECOUNT): ret = "TABLECOUNT"; break; // ------------------------------------------------------------------ // NestLoop // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_NESTLOOP): ret = "NESTLOOP"; break; // ------------------------------------------------------------------ // NestLoopIndex // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_NESTLOOPINDEX): ret = "NESTLOOPINDEX"; break; // ------------------------------------------------------------------ // Update // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_UPDATE): ret = "UPDATE"; break; // ------------------------------------------------------------------ // Insert // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_INSERT): ret = "INSERT"; break; // ------------------------------------------------------------------ // Delete // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_DELETE): ret = "DELETE"; break; // ------------------------------------------------------------------ // Send // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_SEND): ret = "SEND"; break; // ------------------------------------------------------------------ // Receive // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_RECEIVE): ret = "RECEIVE"; break; // ------------------------------------------------------------------ // Aggregate // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_AGGREGATE): ret = "AGGREGATE"; break; // ------------------------------------------------------------------ // HashAggregate // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_HASHAGGREGATE): ret = "HASHAGGREGATE"; break; // ------------------------------------------------------------------ // Union // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_UNION): ret = "UNION"; break; // ------------------------------------------------------------------ // OrderBy // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_ORDERBY): ret = "ORDERBY"; break; // ------------------------------------------------------------------ // Projection // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_PROJECTION): ret = "PROJECTION"; break; // ------------------------------------------------------------------ // Materialize // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_MATERIALIZE): ret = "MATERIALIZE"; break; // ------------------------------------------------------------------ // Limit // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_LIMIT): ret = "LIMIT"; break; // ------------------------------------------------------------------ // Distinct // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_DISTINCT): ret = "DISTINCT"; break; // ------------------------------------------------------------------ // UNKNOWN // ------------------------------------------------------------------ default: { throwFatalException( "Invalid PlanNode type '%d'", type); } } return (ret); }
voltdb::AbstractPlanNode* getEmptyPlanNode(voltdb::PlanNodeType type) { VOLT_TRACE("Creating an empty PlanNode of type '%s'", planNodeToString(type).c_str()); voltdb::AbstractPlanNode* ret = NULL; switch (type) { case (voltdb::PLAN_NODE_TYPE_INVALID): { throwFatalException("INVALID plan node type"); } break; // ------------------------------------------------------------------ // SeqScan // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_SEQSCAN): ret = new voltdb::SeqScanPlanNode(); break; // ------------------------------------------------------------------ // IndexScan // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_INDEXSCAN): ret = new voltdb::IndexScanPlanNode(); break; // ------------------------------------------------------------------ // IndexCount // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_INDEXCOUNT): ret = new voltdb::IndexCountPlanNode(); break; // ------------------------------------------------------------------ // TableCount // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_TABLECOUNT): ret = new voltdb::TableCountPlanNode(); break; // ------------------------------------------------------------------ // MaterializedScanPlanNode // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_MATERIALIZEDSCAN): ret = new voltdb::MaterializedScanPlanNode(); break; // ------------------------------------------------------------------ // TupleScanPlanNode // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_TUPLESCAN): ret = new voltdb::TupleScanPlanNode(); break; // ------------------------------------------------------------------ // NestLoop // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_NESTLOOP): ret = new voltdb::NestLoopPlanNode(); break; // ------------------------------------------------------------------ // NestLoopIndex // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_NESTLOOPINDEX): ret = new voltdb::NestLoopIndexPlanNode(); break; // ------------------------------------------------------------------ // Update // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_UPDATE): ret = new voltdb::UpdatePlanNode(); break; // ------------------------------------------------------------------ // Insert // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_INSERT): ret = new voltdb::InsertPlanNode(); break; // ------------------------------------------------------------------ // Delete // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_DELETE): ret = new voltdb::DeletePlanNode(); break; // ------------------------------------------------------------------ // Aggregate // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_HASHAGGREGATE): case (voltdb::PLAN_NODE_TYPE_AGGREGATE): case (voltdb::PLAN_NODE_TYPE_PARTIALAGGREGATE): ret = new voltdb::AggregatePlanNode(type); break; // ------------------------------------------------------------------ // Union // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_UNION): ret = new voltdb::UnionPlanNode(); break; // ------------------------------------------------------------------ // OrderBy // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_ORDERBY): ret = new voltdb::OrderByPlanNode(); break; // ------------------------------------------------------------------ // Projection // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_PROJECTION): ret = new voltdb::ProjectionPlanNode(); break; // ------------------------------------------------------------------ // Materialize // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_MATERIALIZE): ret = new voltdb::MaterializePlanNode(); break; // ------------------------------------------------------------------ // Send // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_SEND): ret = new voltdb::SendPlanNode(); break; // ------------------------------------------------------------------ // Limit // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_LIMIT): ret = new voltdb::LimitPlanNode(); break; // ------------------------------------------------------------------ // Receive // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_RECEIVE): ret = new voltdb::ReceivePlanNode(); break; // ------------------------------------------------------------------ // Merge Receive // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_MERGERECEIVE): ret = new voltdb::MergeReceivePlanNode(); break; // ------------------------------------------------------------------ // PartitionBy // ------------------------------------------------------------------ case (voltdb::PLAN_NODE_TYPE_PARTITIONBY): ret = new voltdb::PartitionByPlanNode(); break; // default: Don't provide a default, let the compiler enforce complete coverage. } // ------------------------------------------------------------------ // UNKNOWN // ------------------------------------------------------------------ if (!ret) { throwFatalException("Undefined plan node type '%d'", (int)type); } //VOLT_TRACE("created plannode : %s ", typeid(*ret).name()); return (ret); }
void AntiCacheDB::initializeNVM() { char nvm_file_name[150]; char partition_str[50]; m_totalBlocks = 0; #ifdef ANTICACHE_DRAM VOLT_INFO("Allocating anti-cache in DRAM."); m_NVMBlocks = new char[aligned_file_size]; return; #endif // use executor context to figure out which partition we are at int partition_id = (int)m_executorContext->getPartitionId(); sprintf(partition_str, "%d", partition_id); strcpy(nvm_file_name, m_dbDir.c_str()); // there will be one NVM anti-cache file per partition, saved in /mnt/pmfs/anticache-XX strcat(nvm_file_name, "/anticache-"); strcat(nvm_file_name, partition_str); VOLT_INFO("Creating nvm file: %s", nvm_file_name); nvm_file = fopen(nvm_file_name, "w"); if(nvm_file == NULL) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to open PMFS file %s: %s.", nvm_file_name, strerror(errno)); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } fclose(nvm_file); nvm_file = fopen(nvm_file_name, "rw+"); if(nvm_file == NULL) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to open PMFS file %s: %s.", nvm_file_name, strerror(errno)); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } nvm_fd = fileno(nvm_file); if(nvm_fd < 0) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to allocate anti-cache PMFS file in directory %s.", m_dbDir.c_str()); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } if(ftruncate(nvm_fd, NVM_FILE_SIZE) < 0) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to ftruncate anti-cache PMFS file %s: %s", nvm_file_name, strerror(errno)); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } //off_t aligned_file_size = (((NVM_FILE_SIZE) + MMAP_PAGE_SIZE - 1) / MMAP_PAGE_SIZE * MMAP_PAGE_SIZE); off_t aligned_file_size = NVM_FILE_SIZE; m_NVMBlocks = (char*)mmap(NULL, aligned_file_size, PROT_READ | PROT_WRITE, MAP_SHARED, nvm_fd, 0); if(m_NVMBlocks == MAP_FAILED) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to mmap PMFS file %s: %s", nvm_file_name, strerror(errno)); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } close(nvm_fd); // can safely close file now, mmap creates new reference /* // write out NULL characters to ensure entire file has been fetchted from memory for(int i = 0; i < NVM_FILE_SIZE; i++) { m_NVMBlocks[i] = '\0'; } */ }
bool CopyOnWriteContext::serializeMore(ReferenceSerializeOutput *out) { out->writeInt(m_partitionId); int rowsSerialized = 0; const std::size_t rowCountPosition = out->reserveBytes(4); TableTuple tuple(m_table->schema()); if (out->remaining() < (m_maxTupleLength + sizeof(int32_t))) { throwFatalException("Serialize more should never be called " "a 2nd time after return indicating there is no more data"); // out->writeInt(0); // assert(false); // return false; } std::size_t bytesSerialized = 0; while (out->remaining() >= (m_maxTupleLength + sizeof(int32_t))) { const bool hadMore = m_iterator->next(tuple); /** * After this finishes scanning the persistent table switch to scanning * the temp table with the tuples that were backed up */ if (!hadMore) { if (m_finishedTableScan) { out->writeIntAt( rowCountPosition, rowsSerialized); return false; } else { m_finishedTableScan = true; m_iterator.reset(m_backedUpTuples.get()->makeIterator()); continue; } } const std::size_t tupleStartPosition = out->position(); m_serializer->serializeTo( tuple, out); const std::size_t tupleEndPosition = out->position(); m_tuplesSerialized++; rowsSerialized++; /* * If this is the table scan, check to see if the tuple is pending delete * and return the tuple if it is */ if (!m_finishedTableScan && tuple.isPendingDelete()) { assert(!tuple.isPendingDeleteOnUndoRelease()); if (m_table->m_schema->getUninlinedObjectColumnCount() != 0) { m_table->decreaseStringMemCount(tuple.getNonInlinedMemorySize()); } tuple.setPendingDeleteFalse(); tuple.freeObjectColumns(); CopyOnWriteIterator *iter = static_cast<CopyOnWriteIterator*>(m_iterator.get()); //Save the extra lookup if possible m_table->deleteTupleStorage(tuple, iter->m_currentBlock); } // If we have serialized more than 512Kb of tuple data, stop for a while bytesSerialized += tupleEndPosition - tupleStartPosition; if (bytesSerialized >= 1024 * 512) { break; } } /* * Number of rows serialized is not known until the end. Written at the end so it * can be included in the CRC. It will be moved back to the front * to match the table serialization format when chunk is read later. */ out->writeIntAt( rowCountPosition, rowsSerialized); return true; }