/** * Activation handler. */ TableStreamerContext::ActivationReturnCode ElasticContext::handleActivation(TableStreamType streamType) { // Can't activate an indexing stream during a snapshot. if (m_surgeon.hasStreamType(TABLE_STREAM_SNAPSHOT)) { VOLT_ERROR("Elastic context activation is not allowed while a snapshot is in progress."); return ACTIVATION_FAILED; } // Create the index? if (streamType == TABLE_STREAM_ELASTIC_INDEX) { // Don't allow activation if there's an existing index. if (m_surgeon.hasIndex()) { VOLT_ERROR("Elastic context activation is not allowed while an index is " "present that has not been completely consumed."); return ACTIVATION_FAILED; } m_surgeon.createIndex(); m_scanner.reset(new ElasticScanner(getTable(), m_surgeon.getData())); m_indexActive = true; return ACTIVATION_SUCCEEDED; } // Clear the index? if (streamType == TABLE_STREAM_ELASTIC_INDEX_CLEAR) { if (!m_surgeon.isIndexEmpty()) { VOLT_ERROR("Elastic index clear is not allowed while an index is " "present that has not been completely consumed."); return ACTIVATION_FAILED; } m_surgeon.dropIndex(); m_scanner.reset(); m_indexActive = false; return ACTIVATION_SUCCEEDED; } // It wasn't one of the supported stream types. return ACTIVATION_UNSUPPORTED; }
void AntiCacheDB::shutdownBerkeleyDB() { // NOTE: You have to close the database first before closing the environment try { m_db->close(0); delete m_db; } catch (DbException &e) { VOLT_ERROR("Anti-Cache database closing error: %s", e.what()); throwFatalException("Failed to close anti-cache database: %s", e.what()); } try { m_dbEnv->close(0); delete m_dbEnv; } catch (DbException &e) { VOLT_ERROR("Anti-Cache environment closing error: %s", e.what()); throwFatalException("Failed to close anti-cache database environment: %s", e.what()); } }
void SwapTablesPlanNode::loadFromJSONObject(PlannerDomValue obj) { AbstractOperationPlanNode::loadFromJSONObject(obj); m_otherTargetTableName = obj.valueForKey("OTHER_TARGET_TABLE_NAME").asStr(); loadStringArrayFromJSONObject("INDEXES", obj, m_theIndexes); loadStringArrayFromJSONObject("OTHER_INDEXES", obj, m_otherIndexes); VoltDBEngine* engine = ExecutorContext::getEngine(); m_otherTcd = engine->getTableDelegate(m_otherTargetTableName); if ( ! m_otherTcd) { VOLT_ERROR("Failed to retrieve second target table from execution engine for PlanNode: %s", debug().c_str()); //TODO: throw something } }
void warmUpL3(uint32_t* array, uint32_t size) { const int32_t range = L3CacheSize / sizeof(uint32_t); uint32_t i, j; if (range > size) { VOLT_ERROR("Array size is less than L2 cache"); } else { for (i = 0, j = 0; i < range * WARMUPROUND; ++i) { j = array[j]; } printf("", j); } }
bool SendExecutor::p_execute(const NValueArray ¶ms) { // cout << "Send" << endl; VOLT_DEBUG("started SEND"); Table* inputTable = m_abstractNode->getInputTable(); assert(inputTable); //inputTable->setDependencyId(m_dependencyId);//Multiple send executors sharing the same input table apparently. // Just blast the input table on through VoltDBEngine! if (!m_engine->send(inputTable)) { VOLT_ERROR("Failed to send table '%s'", inputTable->name().c_str()); return false; } VOLT_DEBUG("SEND TABLE: %s", inputTable->debug().c_str()); return true; }
std::string JNITopend::planForFragmentId(int64_t fragmentId) { VOLT_DEBUG("fetching plan for id %d", (int) fragmentId); JNILocalFrameBarrier jni_frame = JNILocalFrameBarrier(m_jniEnv, 10); if (jni_frame.checkResult() < 0) { VOLT_ERROR("Unable to load dependency: jni frame error."); throw std::exception(); } jbyteArray jbuf = (jbyteArray)(m_jniEnv->CallObjectMethod(m_javaExecutionEngine, m_planForFragmentIdMID, fragmentId)); // jbuf might be NULL or might have 0 length here. In that case // we'll return a 0-length string to the caller, who will return // an appropriate error. return jbyteArrayToStdString(m_jniEnv, jni_frame, jbuf); }
std::string JNITopend::decodeBase64AndDecompress(const std::string& base64Str) { JNILocalFrameBarrier jni_frame = JNILocalFrameBarrier(m_jniEnv, 2); if (jni_frame.checkResult() < 0) { VOLT_ERROR("Unable to load dependency: jni frame error."); throw std::exception(); } jstring jBase64Str = m_jniEnv->NewStringUTF(base64Str.c_str()); if (m_jniEnv->ExceptionCheck()) { m_jniEnv->ExceptionDescribe(); throw std::exception(); } jbyteArray jbuf = (jbyteArray)m_jniEnv->CallStaticObjectMethod(m_encoderClass, m_decodeBase64AndDecompressToBytesMID, jBase64Str); return jbyteArrayToStdString(m_jniEnv, jni_frame, jbuf); }
void JNITopend::fallbackToEEAllocatedBuffer(char *buffer, size_t length) { JNILocalFrameBarrier jni_frame = JNILocalFrameBarrier(m_jniEnv, 1); if (jni_frame.checkResult() < 0) { VOLT_ERROR("Unable to load dependency: jni frame error."); throw std::exception(); } jobject jbuffer = m_jniEnv->NewDirectByteBuffer(buffer, length); if (jbuffer == NULL) { m_jniEnv->ExceptionDescribe(); throw std::exception(); } m_jniEnv->CallVoidMethod(m_javaExecutionEngine, m_fallbackToEEAllocatedBufferMID, jbuffer); if (m_jniEnv->ExceptionCheck()) { m_jniEnv->ExceptionDescribe(); throw std::exception(); } }
bool ExceptIntersectSetOperator::processTuplesDo() { // Map to keep candidate tuples. The key is the tuple itself // The value - tuple's repeat count in the final table. TupleMap tuples; // Collect all tuples from the first set assert(!m_input_tables.empty()); Table* input_table = m_input_tables[0]; collectTuples(*input_table, tuples); // // For each remaining input table, collect its tuple into a separate map // and substract/intersect it from/with the first one // TupleMap next_tuples; for (size_t ctr = 1, cnt = m_input_tables.size(); ctr < cnt; ctr++) { next_tuples.clear(); Table* input_table = m_input_tables[ctr]; assert(input_table); collectTuples(*input_table, next_tuples); if (m_is_except) { exceptTupleMaps(tuples, next_tuples); } else { intersectTupleMaps(tuples, next_tuples); } } // Insert remaining tuples to our ouput table for (TupleMap::const_iterator mapIt = tuples.begin(); mapIt != tuples.end(); ++mapIt) { TableTuple tuple = mapIt->first; for (size_t i = 0; i < mapIt->second; ++i) { if (!m_output_table->insertTuple(tuple)) { VOLT_ERROR("Failed to insert tuple from input table '%s' into" " output table '%s'", m_input_tables[0]->name().c_str(), m_output_table->name().c_str()); return false; } } } return true; }
void JNITopend::crashVoltDB(FatalException e) { //Enough references for the reason string, traces array, and traces strings JNILocalFrameBarrier jni_frame = JNILocalFrameBarrier( m_jniEnv, static_cast<int32_t>(e.m_traces.size()) + 4); if (jni_frame.checkResult() < 0) { VOLT_ERROR("Unable to load dependency: jni frame error."); throw std::exception(); } jstring jReason = m_jniEnv->NewStringUTF(e.m_reason.c_str()); if (m_jniEnv->ExceptionCheck()) { m_jniEnv->ExceptionDescribe(); throw std::exception(); } jstring jFilename = m_jniEnv->NewStringUTF(e.m_filename); if (m_jniEnv->ExceptionCheck()) { m_jniEnv->ExceptionDescribe(); throw std::exception(); } jobjectArray jTracesArray = m_jniEnv->NewObjectArray( static_cast<jsize>(e.m_traces.size()), m_jniEnv->FindClass("java/lang/String"), NULL); if (m_jniEnv->ExceptionCheck()) { m_jniEnv->ExceptionDescribe(); throw std::exception(); } for (int ii = 0; ii < e.m_traces.size(); ii++) { jstring traceString = m_jniEnv->NewStringUTF(e.m_traces[ii].c_str()); m_jniEnv->SetObjectArrayElement( jTracesArray, ii, traceString); } m_jniEnv->CallStaticVoidMethod( m_jniEnv->GetObjectClass(m_javaExecutionEngine), m_crashVoltDBMID, jReason, jTracesArray, jFilename, static_cast<int32_t>(e.m_lineno)); throw std::exception(); }
/** * Get statistics for the specified resources * @param sst StatisticsSelectorType of the resources * @param catalogIds CatalogIds of the resources statistics should be retrieved for * @param interval Whether to return counters since the beginning or since the last time this was called * @param Timestamp to embed in each row */ Table* StatsAgent::getStats(voltdb::StatisticsSelectorType sst, std::vector<voltdb::CatalogId> catalogIds, bool interval, int64_t now) { assert (catalogIds.size() > 0); if (catalogIds.size() < 1) { return NULL; } std::map<voltdb::CatalogId, voltdb::StatsSource*> *statsSources = &m_statsCategoryByStatsSelector[sst]; Table *statsTable = m_statsTablesByStatsSelector[sst]; if (statsTable == NULL) { /* * Initialize the output table the first time. */ voltdb::StatsSource *ss = (*statsSources)[catalogIds[0]]; voltdb::Table *table = ss->getStatsTable(interval, now); statsTable = reinterpret_cast<Table*>( voltdb::TableFactory::getTempTable( table->databaseId(), std::string("Persistent Table aggregated stats temp table"), TupleSchema::createTupleSchema(table->schema()), table->columnNames(), NULL)); m_statsTablesByStatsSelector[sst] = statsTable; } statsTable->deleteAllTuples(false); for (int ii = 0; ii < catalogIds.size(); ii++) { voltdb::StatsSource *ss = (*statsSources)[catalogIds[ii]]; assert (ss != NULL); if (ss == NULL) { VOLT_ERROR("Missing StatsSource for CatalogId #%d\n", catalogIds[ii]); continue; } voltdb::TableTuple *statsTuple = ss->getStatsTuple(interval, now); statsTable->insertTuple(*statsTuple); } return statsTable; }
/* * Recalculate how many tuples are remaining and compare to the countdown value. * This method does not work once we're in the middle of the temp table. * Only call it while m_finishedTableScan==false. */ void CopyOnWriteContext::checkRemainingTuples(const std::string &label) { assert(m_iterator != NULL); assert(!m_finishedTableScan); intmax_t count1 = static_cast<CopyOnWriteIterator*>(m_iterator.get())->countRemaining(); TableTuple tuple(getTable().schema()); boost::scoped_ptr<TupleIterator> iter(m_backedUpTuples.get()->makeIterator()); intmax_t count2 = 0; while (iter->next(tuple)) { count2++; } if (m_tuplesRemaining != count1 + count2) { VOLT_ERROR("CopyOnWriteContext::%s remaining tuple count mismatch: " "table=%s partcol=%d count=%jd count1=%jd count2=%jd " "expected=%jd compacted=%jd batch=%jd " "inserts=%jd updates=%jd", label.c_str(), getTable().name().c_str(), getTable().partitionColumn(), count1 + count2, count1, count2, (intmax_t)m_tuplesRemaining, (intmax_t)m_blocksCompacted, (intmax_t)m_serializationBatches, (intmax_t)m_inserts, (intmax_t)m_updates); } }
std::string JNITopend::planForFragmentId(int64_t fragmentId) { VOLT_DEBUG("fetching plan for id %d", (int) fragmentId); JNILocalFrameBarrier jni_frame = JNILocalFrameBarrier(m_jniEnv, 10); if (jni_frame.checkResult() < 0) { VOLT_ERROR("Unable to load dependency: jni frame error."); throw std::exception(); } jbyteArray jbuf = (jbyteArray)(m_jniEnv->CallObjectMethod(m_javaExecutionEngine, m_planForFragmentIdMID, fragmentId)); if (!jbuf) { // this will be trapped later ;-) return std::string(""); } jsize length = m_jniEnv->GetArrayLength(jbuf); if (length > 0) { jboolean is_copy; jbyte *bytes = m_jniEnv->GetByteArrayElements(jbuf, &is_copy); // Add the plan buffer info to the stack object // so it'll get cleaned up if loadTuplesFrom throws jni_frame.addDependencyRef(is_copy, jbuf, bytes); // make a null terminated copy boost::scoped_array<char> strdata(new char[length + 1]); memcpy(strdata.get(), bytes, length); strdata.get()[length] = '\0'; return std::string(strdata.get()); } else { // this will be trapped later ;-) return std::string(""); } }
AntiCacheBlock AntiCacheDB::readBlock(std::string tableName, int16_t blockId) { Dbt key; key.set_data(&blockId); key.set_size(sizeof(int16_t)); Dbt value; value.set_flags(DB_DBT_MALLOC); VOLT_DEBUG("Reading evicted block with id %d", blockId); int ret_value = m_db->get(NULL, &key, &value, 0); if (ret_value != 0) { VOLT_ERROR("Invalid anti-cache blockId '%d' for table '%s'", blockId, tableName.c_str()); throw UnknownBlockAccessException(tableName, blockId); } else { // m_db->del(NULL, &key, 0); // if we have this the benchmark won't end assert(value.get_data() != NULL); } AntiCacheBlock block(blockId, value); return (block); }
void addAntiCacheDB(std::string &dbDir, long blockSize, AntiCacheDBType dbType, bool blocking, long maxSize, bool blockMerge) { assert(m_antiCacheEnabled == true); m_dbType[m_levels] = dbType; // MJG: need a better error return (throw exception?) if (dbType == ANTICACHEDB_BERKELEY) { m_antiCacheDB[m_levels] = new BerkeleyAntiCacheDB(this, dbDir, blockSize, maxSize); // m_antiCacheEvictionManager->addAntiCacheDB(new BerkeleyAntiCacheDB(this, dbDir, blockSize, maxSize)); } else if (dbType == ANTICACHEDB_NVM) { m_antiCacheDB[m_levels] = new NVMAntiCacheDB(this, dbDir, blockSize, maxSize); //m_antiCacheEvictionManager->addAntiCacheDB(new NVMAntiCacheDB(this, dbDir, blockSize, maxSize)); } else if (dbType == ANTICACHEDB_ALLOCATORNVM) { m_antiCacheDB[m_levels] = new AllocatorNVMAntiCacheDB(this, dbDir, blockSize, maxSize); //m_antiCacheEvictionManager->addAntiCacheDB(new NVMAntiCacheDB(this, dbDir, blockSize, maxSize)); } else { VOLT_ERROR("Invalid AntiCacheDBType: %d! Aborting...", (int)dbType); assert(m_antiCacheEnabled == false); } m_antiCacheDB[m_levels]->setBlocking(blocking); m_antiCacheDB[m_levels]->setBlockMerge(blockMerge); m_blockMerge[m_levels] = blockMerge; m_antiCacheEvictionManager->addAntiCacheDB(m_antiCacheDB[m_levels]); m_levels++; }
bool InsertExecutor::p_init(AbstractPlanNode* abstractNode, const ExecutorVector& executorVector) { VOLT_TRACE("init Insert Executor"); m_node = dynamic_cast<InsertPlanNode*>(abstractNode); assert(m_node); assert(m_node->getTargetTable()); assert(m_node->getInputTableCount() == (m_node->isInline() ? 0 : 1)); Table* targetTable = m_node->getTargetTable(); m_isUpsert = m_node->isUpsert(); // // The insert node's input schema is fixed. But // if this is an inline node we don't set it here. // We let the parent node set it in p_execute_init. // // Also, we don't want to set the input table for inline // insert nodes. // if ( ! m_node->isInline()) { setDMLCountOutputTable(executorVector.limits()); m_inputTable = dynamic_cast<AbstractTempTable*>(m_node->getInputTable()); //input table should be temptable assert(m_inputTable); } else { m_inputTable = NULL; } // Target table can be StreamedTable or PersistentTable and must not be NULL PersistentTable *persistentTarget = dynamic_cast<PersistentTable*>(targetTable); m_partitionColumn = -1; StreamedTable *streamTarget = dynamic_cast<StreamedTable*>(targetTable); m_hasStreamView = false; if (streamTarget != NULL) { m_isStreamed = true; //See if we have any views. m_hasStreamView = streamTarget->hasViews(); m_partitionColumn = streamTarget->partitionColumn(); } if (m_isUpsert) { VOLT_TRACE("init Upsert Executor actually"); assert( ! m_node->isInline() ); if (m_isStreamed) { VOLT_ERROR("UPSERT is not supported for Stream table %s", targetTable->name().c_str()); } // look up the tuple whether it exists already if (persistentTarget->primaryKeyIndex() == NULL) { VOLT_ERROR("No primary keys were found in our target table '%s'", targetTable->name().c_str()); } } if (persistentTarget) { m_partitionColumn = persistentTarget->partitionColumn(); m_replicatedTableOperation = persistentTarget->isCatalogTableReplicated(); } m_multiPartition = m_node->isMultiPartition(); m_sourceIsPartitioned = m_node->sourceIsPartitioned(); // allocate memory for template tuple, set defaults for all columns m_templateTupleStorage.init(targetTable->schema()); TableTuple tuple = m_templateTupleStorage.tuple(); std::set<int> fieldsExplicitlySet(m_node->getFieldMap().begin(), m_node->getFieldMap().end()); // These default values are used for an INSERT including the INSERT sub-case of an UPSERT. // The defaults are purposely ignored in favor of existing column values // for the UPDATE subcase of an UPSERT. m_node->initTupleWithDefaultValues(m_engine, &m_memoryPool, fieldsExplicitlySet, tuple, m_nowFields); m_hasPurgeFragment = persistentTarget ? persistentTarget->hasPurgeFragment() : false; return true; }
bool AbstractExecutor::init(VoltDBEngine* engine, const ExecutorVector& executorVector) { assert (m_abstractNode); // // Grab the input tables directly from this node's children // vector<Table*> input_tables; for (int ctr = 0, cnt = static_cast<int>(m_abstractNode->getChildren().size()); ctr < cnt; ctr++) { Table* table = m_abstractNode->getChildren()[ctr]->getOutputTable(); if (table == NULL) { VOLT_ERROR("Output table from PlanNode '%s' is NULL", m_abstractNode->getChildren()[ctr]->debug().c_str()); return false; } input_tables.push_back(table); } m_abstractNode->setInputTables(input_tables); // Some tables have target tables (scans + operations) that are // based on tables under the control of the local storage manager // (as opposed to an intermediate result table). We'll grab them // from the VoltDBEngine. This is kind of a hack job here... is // there a better way? AbstractScanPlanNode* scan_node = dynamic_cast<AbstractScanPlanNode*>(m_abstractNode); AbstractOperationPlanNode* oper_node = dynamic_cast<AbstractOperationPlanNode*>(m_abstractNode); if (scan_node || oper_node) { Table* target_table = NULL; string targetTableName; if (scan_node) { targetTableName = scan_node->getTargetTableName(); target_table = scan_node->getTargetTable(); } else if (oper_node) { targetTableName = oper_node->getTargetTableName(); target_table = oper_node->getTargetTable(); } // If the target_table is NULL, then we need to ask the engine // for a reference to what we need // Really, we can't enforce this when we load the plan? --izzy 7/3/2010 bool isPersistentTableScan = (scan_node != NULL && scan_node->isPersistentTableScan()); if (target_table == NULL && isPersistentTableScan) { target_table = engine->getTableByName(targetTableName); if (target_table == NULL) { VOLT_ERROR("Failed to retrieve target table '%s' " "from execution engine for PlanNode '%s'", targetTableName.c_str(), m_abstractNode->debug().c_str()); return false; } TableCatalogDelegate * tcd = engine->getTableDelegate(targetTableName); assert(tcd != NULL); if (scan_node) { scan_node->setTargetTableDelegate(tcd); } else if (oper_node) { oper_node->setTargetTableDelegate(tcd); } } } // Call the p_init() method on our derived class if (!p_init(m_abstractNode, executorVector)) { return false; } if (m_tmpOutputTable == NULL) { m_tmpOutputTable = dynamic_cast<AbstractTempTable*>(m_abstractNode->getOutputTable()); } return true; }
bool UpdateExecutor::p_init(AbstractPlanNode *abstract_node, const catalog::Database* catalog_db, int* tempTableMemoryInBytes) { VOLT_TRACE("init Update Executor"); UpdatePlanNode* node = dynamic_cast<UpdatePlanNode*>(abstract_node); assert(node); assert(node->getTargetTable()); assert(node->getInputTables().size() == 1); m_inputTable = dynamic_cast<TempTable*>(node->getInputTables()[0]); //input table should be temptable assert(m_inputTable); m_targetTable = dynamic_cast<PersistentTable*>(node->getTargetTable()); //target table should be persistenttable assert(m_targetTable); assert(node->getTargetTable()); // Our output is just our input table (regardless if plan is single-sited or not) node->setOutputTable(node->getInputTables()[0]); // record if a full index update is needed, or if these checks can be skipped m_updatesIndexes = node->doesUpdateIndexes(); AbstractPlanNode *child = node->getChildren()[0]; ProjectionPlanNode *proj_node = NULL; if (NULL == child) { VOLT_ERROR("Attempted to initialize update executor with NULL child"); return false; } PlanNodeType pnt = child->getPlanNodeType(); if (pnt == PLAN_NODE_TYPE_PROJECTION) { proj_node = dynamic_cast<ProjectionPlanNode*>(child); } else if (pnt == PLAN_NODE_TYPE_SEQSCAN || pnt == PLAN_NODE_TYPE_INDEXSCAN) { proj_node = dynamic_cast<ProjectionPlanNode*>(child->getInlinePlanNode(PLAN_NODE_TYPE_PROJECTION)); assert(NULL != proj_node); } std::vector<std::string> output_column_names = proj_node->getOutputColumnNames(); std::string targetTableName = node->getTargetTableName(); catalog::Table *targetTable = NULL; catalog::CatalogMap<catalog::Table> tables = catalog_db->tables(); for ( catalog::CatalogMap<catalog::Table>::field_map_iter i = tables.begin(); i != tables.end(); i++) { catalog::Table *table = (*i).second; if (table->name().compare(targetTableName) == 0) { targetTable = table; break; } } assert(targetTable != NULL); catalog::CatalogMap<catalog::Column> columns = targetTable->columns(); /* * The first output column is the tuple address expression and it isn't part of our output so we skip * it when generating the map from input columns to the target table columns. */ for (int ii = 1; ii < output_column_names.size(); ii++) { std::string outputColumnName = output_column_names[ii]; catalog::Column *column = columns.get(outputColumnName); assert (column != NULL); m_inputTargetMap.push_back(std::pair<int, int>( ii, column->index())); } m_inputTargetMapSize = (int)m_inputTargetMap.size(); m_inputTuple = TableTuple(m_inputTable->schema()); m_targetTuple = TableTuple(m_targetTable->schema()); m_partitionColumn = m_targetTable->partitionColumn(); m_partitionColumnIsString = false; if (m_partitionColumn != -1) { if (m_targetTable->schema()->columnType(m_partitionColumn) == voltdb::VALUE_TYPE_VARCHAR) { m_partitionColumnIsString = true; } } return true; }
bool UpdateExecutor::p_execute(const NValueArray ¶ms, ReadWriteTracker *tracker) { assert(m_inputTable); assert(m_targetTable); VOLT_TRACE("INPUT TABLE: %s\n", m_inputTable->debug().c_str()); VOLT_TRACE("TARGET TABLE - BEFORE: %s\n", m_targetTable->debug().c_str()); assert(m_inputTuple.sizeInValues() == m_inputTable->columnCount()); assert(m_targetTuple.sizeInValues() == m_targetTable->columnCount()); TableIterator input_iterator(m_inputTable); while (input_iterator.next(m_inputTuple)) { // // OPTIMIZATION: Single-Sited Query Plans // If our beloved UpdatePlanNode is apart of a single-site query plan, // then the first column in the input table will be the address of a // tuple on the target table that we will want to update. This saves us // the trouble of having to do an index lookup // void *target_address = m_inputTuple.getNValue(0).castAsAddress(); m_targetTuple.move(target_address); // Read/Write Set Tracking if (tracker != NULL) { tracker->markTupleWritten(m_targetTable, &m_targetTuple); } // Loop through INPUT_COL_IDX->TARGET_COL_IDX mapping and only update // the values that we need to. The key thing to note here is that we // grab a temp tuple that is a copy of the target tuple (i.e., the tuple // we want to update). This insures that if the input tuple is somehow // bringing garbage with it, we're only going to copy what we really // need to into the target tuple. // TableTuple &tempTuple = m_targetTable->getTempTupleInlined(m_targetTuple); for (int map_ctr = 0; map_ctr < m_inputTargetMapSize; map_ctr++) { tempTuple.setNValue(m_inputTargetMap[map_ctr].second, m_inputTuple.getNValue(m_inputTargetMap[map_ctr].first)); } // if there is a partition column for the target table if (m_partitionColumn != -1) { // check for partition problems // get the value for the partition column NValue value = tempTuple.getNValue(m_partitionColumn); bool isLocal = m_engine->isLocalSite(value); // if it doesn't map to this site if (!isLocal) { VOLT_ERROR("Mispartitioned tuple in single-partition plan for" " table '%s'", m_targetTable->name().c_str()); return false; } } #ifdef ARIES if(m_engine->isARIESEnabled()){ // add persistency check: PersistentTable* table = dynamic_cast<PersistentTable*>(m_targetTable); // only log if we are writing to a persistent table. if (table != NULL) { // before image -- target is old val with no updates // XXX: what about uninlined fields? // should we not be doing // m_targetTable->getTempTupleInlined(m_targetTuple); instead? TableTuple *beforeImage = &m_targetTuple; // after image -- temp is NEW, created using target and input TableTuple *afterImage = &tempTuple; TableTuple *keyTuple = NULL; char *keydata = NULL; std::vector<int32_t> modifiedCols; int32_t numCols = -1; // See if we can do better by using an index instead TableIndex *index = table->primaryKeyIndex(); if (index != NULL) { // First construct tuple for primary key keydata = new char[index->getKeySchema()->tupleLength()]; keyTuple = new TableTuple(keydata, index->getKeySchema()); for (int i = 0; i < index->getKeySchema()->columnCount(); i++) { keyTuple->setNValue(i, beforeImage->getNValue(index->getColumnIndices()[i])); } // no before image need be recorded, just the primary key beforeImage = NULL; } // Set the modified column list numCols = m_inputTargetMapSize; modifiedCols.resize(m_inputTargetMapSize, -1); for (int map_ctr = 0; map_ctr < m_inputTargetMapSize; map_ctr++) { // can't use column-id directly, otherwise we would go over vector bounds int pos = m_inputTargetMap[map_ctr].first - 1; modifiedCols.at(pos) = m_inputTargetMap[map_ctr].second; } // Next, let the input tuple be the diff after image afterImage = &m_inputTuple; LogRecord *logrecord = new LogRecord(computeTimeStamp(), LogRecord::T_UPDATE,// this is an update record LogRecord::T_FORWARD,// the system is running normally -1,// XXX: prevLSN must be fetched from table! m_engine->getExecutorContext()->currentTxnId() ,// txn id m_engine->getSiteId(),// which execution site m_targetTable->name(),// the table affected keyTuple,// primary key numCols, (numCols > 0) ? &modifiedCols : NULL, beforeImage, afterImage ); size_t logrecordLength = logrecord->getEstimatedLength(); char *logrecordBuffer = new char[logrecordLength]; FallbackSerializeOutput output; output.initializeWithPosition(logrecordBuffer, logrecordLength, 0); logrecord->serializeTo(output); LogManager* m_logManager = this->m_engine->getLogManager(); Logger m_ariesLogger = m_logManager->getAriesLogger(); //VOLT_WARN("m_logManager : %p AriesLogger : %p",&m_logManager, &m_ariesLogger); const Logger *logger = m_logManager->getThreadLogger(LOGGERID_MM_ARIES); logger->log(LOGLEVEL_INFO, output.data(), output.position()); delete[] logrecordBuffer; logrecordBuffer = NULL; delete logrecord; logrecord = NULL; if (keydata != NULL) { delete[] keydata; keydata = NULL; } if (keyTuple != NULL) { delete keyTuple; keyTuple = NULL; } } } #endif if (!m_targetTable->updateTuple(tempTuple, m_targetTuple, m_updatesIndexes)) { VOLT_INFO("Failed to update tuple from table '%s'", m_targetTable->name().c_str()); return false; } } VOLT_TRACE("TARGET TABLE - AFTER: %s\n", m_targetTable->debug().c_str()); // TODO lets output result table here, not in result executor. same thing in // delete/insert // add to the planfragments count of modified tuples m_engine->m_tuplesModified += m_inputTable->activeTupleCount(); return true; }
bool TempTable::deleteTuple(TableTuple &target, bool deleteAllocatedStrings) { VOLT_ERROR("TempTable does not support deleting individual tuples"); return false; }
bool DeleteExecutor::p_execute(const NValueArray ¶ms) { // target table should be persistenttable // update target table reference from table delegate PersistentTable* targetTable = dynamic_cast<PersistentTable*>(m_node->getTargetTable()); assert(targetTable); TableTuple targetTuple(targetTable->schema()); int64_t modified_tuples = 0; if (m_truncate) { VOLT_TRACE("truncating table %s...", targetTable->name().c_str()); // count the truncated tuples as deleted modified_tuples = targetTable->visibleTupleCount(); VOLT_TRACE("Delete all rows from table : %s with %d active, %d visible, %d allocated", targetTable->name().c_str(), (int)targetTable->activeTupleCount(), (int)targetTable->visibleTupleCount(), (int)targetTable->allocatedTupleCount()); // empty the table either by table swap or iteratively deleting tuple-by-tuple targetTable->truncateTable(m_engine); } else { assert(m_inputTable); assert(m_inputTuple.columnCount() == m_inputTable->columnCount()); assert(targetTuple.columnCount() == targetTable->columnCount()); TableIterator inputIterator = m_inputTable->iterator(); while (inputIterator.next(m_inputTuple)) { // // OPTIMIZATION: Single-Sited Query Plans // If our beloved DeletePlanNode is apart of a single-site query plan, // then the first column in the input table will be the address of a // tuple on the target table that we will want to blow away. This saves // us the trouble of having to do an index lookup // void *targetAddress = m_inputTuple.getNValue(0).castAsAddress(); targetTuple.move(targetAddress); // Delete from target table targetTable->deleteTuple(targetTuple, true); } modified_tuples = m_inputTable->tempTableTupleCount(); VOLT_TRACE("Deleted %d rows from table : %s with %d active, %d visible, %d allocated", (int)modified_tuples, targetTable->name().c_str(), (int)targetTable->activeTupleCount(), (int)targetTable->visibleTupleCount(), (int)targetTable->allocatedTupleCount()); } TableTuple& count_tuple = m_node->getOutputTable()->tempTuple(); count_tuple.setNValue(0, ValueFactory::getBigIntValue(modified_tuples)); // try to put the tuple into the output table if (!m_node->getOutputTable()->insertTuple(count_tuple)) { VOLT_ERROR("Failed to insert tuple count (%ld) into" " output table '%s'", static_cast<long int>(modified_tuples), m_node->getOutputTable()->name().c_str()); return false; } m_engine->addToTuplesModified(modified_tuples); return true; }
bool UnionExecutor::p_init(AbstractPlanNode* abstract_node, TempTableLimits* limits) { VOLT_TRACE("init Union Executor"); UnionPlanNode* node = dynamic_cast<UnionPlanNode*>(abstract_node); assert(node); // // First check to make sure they have the same number of columns // assert(node->getInputTables().size() > 0); for (int table_ctr = 1, table_cnt = (int)node->getInputTables().size(); table_ctr < table_cnt; table_ctr++) { if (node->getInputTables()[0]->columnCount() != node->getInputTables()[table_ctr]->columnCount()) { VOLT_ERROR("Table '%s' has %d columns, but table '%s' has %d" " columns", node->getInputTables()[0]->name().c_str(), node->getInputTables()[0]->columnCount(), node->getInputTables()[table_ctr]->name().c_str(), node->getInputTables()[table_ctr]->columnCount()); return false; } } // // Then check that they have the same types // The two loops here are broken out so that we don't have to keep grabbing the same column for input_table[0] // // get the first table const TupleSchema *table0Schema = node->getInputTables()[0]->schema(); // iterate over all columns in the first table for (int col_ctr = 0, col_cnt = table0Schema->columnCount(); col_ctr < col_cnt; col_ctr++) { // get the type for the current column ValueType type0 = table0Schema->columnType(col_ctr); // iterate through all the other tables, comparing one column at a time for (int table_ctr = 1, table_cnt = (int)node->getInputTables().size(); table_ctr < table_cnt; table_ctr++) { // get another table const TupleSchema *table1Schema = node->getInputTables()[table_ctr]->schema(); ValueType type1 = table1Schema->columnType(col_ctr); if (type0 != type1) { // TODO: DEBUG VOLT_ERROR("Table '%s' has value type '%s' for column '%d'," " table '%s' has value type '%s' for column '%d'", node->getInputTables()[0]->name().c_str(), getTypeName(type0).c_str(), col_ctr, node->getInputTables()[table_ctr]->name().c_str(), getTypeName(type1).c_str(), col_ctr); return false; } } } // // Create our output table that will hold all the tuples that we are appending into. // Since we're are assuming that all of the tables have the same number of columns with // the same format. Therefore, we will just grab the first table in the list // node->setOutputTable(TableFactory::getCopiedTempTable(node->databaseId(), node->getInputTables()[0]->name(), node->getInputTables()[0], limits)); m_setOperator = detail::SetOperator::getSetOperator(node); return true; }
int TableCatalogDelegate::init(ExecutorContext *executorContext, catalog::Database &catalogDatabase, catalog::Table &catalogTable) { // Create a persistent table for this table in our catalog int32_t table_id = catalogTable.relativeIndex(); // Columns: // Column is stored as map<String, Column*> in Catalog. We have to // sort it by Column index to preserve column order. const int numColumns = static_cast<int>(catalogTable.columns().size()); vector<ValueType> columnTypes(numColumns); vector<int32_t> columnLengths(numColumns); vector<bool> columnAllowNull(numColumns); // GWW vector<bool> columnisEscrow(numColumns); map<string, catalog::Column*>::const_iterator col_iterator; string *columnNames = new string[numColumns]; for (col_iterator = catalogTable.columns().begin(); col_iterator != catalogTable.columns().end(); col_iterator++) { const catalog::Column *catalog_column = col_iterator->second; const int columnIndex = catalog_column->index(); const ValueType type = static_cast<ValueType>(catalog_column->type()); columnTypes[columnIndex] = type; const int32_t size = static_cast<int32_t>(catalog_column->size()); //Strings length is provided, other lengths are derived from type bool varlength = (type == VALUE_TYPE_VARCHAR) || (type == VALUE_TYPE_VARBINARY); const int32_t length = varlength ? size : static_cast<int32_t>(NValue::getTupleStorageSize(type)); columnLengths[columnIndex] = length; columnAllowNull[columnIndex] = catalog_column->nullable(); //GWW columnisEscrow[columnIndex] = catalog_column->escrowColumn(); columnNames[catalog_column->index()] = catalog_column->name(); } /* TupleSchema *schema = TupleSchema::createTupleSchema(columnTypes, columnLengths, columnAllowNull, true); */ TupleSchema *schema = TupleSchema::createTupleSchema(columnTypes, columnLengths, columnAllowNull, columnisEscrow, true); // Indexes map<string, TableIndexScheme> index_map; map<string, catalog::Index*>::const_iterator idx_iterator; for (idx_iterator = catalogTable.indexes().begin(); idx_iterator != catalogTable.indexes().end(); idx_iterator++) { catalog::Index *catalog_index = idx_iterator->second; vector<int> index_columns; vector<ValueType> column_types; // The catalog::Index object now has a list of columns that are to be // used if (catalog_index->columns().size() == (size_t)0) { VOLT_ERROR("Index '%s' in table '%s' does not declare any columns" " to use", catalog_index->name().c_str(), catalogTable.name().c_str()); delete [] columnNames; return false; } // Since the columns are not going to come back in the proper order from // the catalogs, we'll use the index attribute to make sure we put them // in the right order index_columns.resize(catalog_index->columns().size()); column_types.resize(catalog_index->columns().size()); bool isIntsOnly = true; map<string, catalog::ColumnRef*>::const_iterator colref_iterator; for (colref_iterator = catalog_index->columns().begin(); colref_iterator != catalog_index->columns().end(); colref_iterator++) { catalog::ColumnRef *catalog_colref = colref_iterator->second; if (catalog_colref->index() < 0) { VOLT_ERROR("Invalid column '%d' for index '%s' in table '%s'", catalog_colref->index(), catalog_index->name().c_str(), catalogTable.name().c_str()); delete [] columnNames; return false; } // check if the column does not have an int type if ((catalog_colref->column()->type() != VALUE_TYPE_TINYINT) && (catalog_colref->column()->type() != VALUE_TYPE_SMALLINT) && (catalog_colref->column()->type() != VALUE_TYPE_INTEGER) && (catalog_colref->column()->type() != VALUE_TYPE_BIGINT)) { isIntsOnly = false; } index_columns[catalog_colref->index()] = catalog_colref->column()->index(); column_types[catalog_colref->index()] = (ValueType) catalog_colref->column()->type(); } TableIndexScheme index_scheme(catalog_index->name(), (TableIndexType)catalog_index->type(), index_columns, column_types, catalog_index->unique(), isIntsOnly, schema); index_map[catalog_index->name()] = index_scheme; } // Constraints string pkey_index_id; map<string, catalog::Constraint*>::const_iterator constraint_iterator; for (constraint_iterator = catalogTable.constraints().begin(); constraint_iterator != catalogTable.constraints().end(); constraint_iterator++) { catalog::Constraint *catalog_constraint = constraint_iterator->second; // Constraint Type ConstraintType type = (ConstraintType)catalog_constraint->type(); switch (type) { case CONSTRAINT_TYPE_PRIMARY_KEY: // Make sure we have an index to use if (catalog_constraint->index() == NULL) { VOLT_ERROR("The '%s' constraint '%s' on table '%s' does" " not specify an index", constraintutil::getTypeName(type).c_str(), catalog_constraint->name().c_str(), catalogTable.name().c_str()); delete [] columnNames; return false; } // Make sure they didn't declare more than one primary key index else if (pkey_index_id.size() > 0) { VOLT_ERROR("Trying to declare a primary key on table '%s'" "using index '%s' but '%s' was already set as" " the primary key", catalogTable.name().c_str(), catalog_constraint->index()->name().c_str(), pkey_index_id.c_str()); delete [] columnNames; return false; } pkey_index_id = catalog_constraint->index()->name(); break; case CONSTRAINT_TYPE_UNIQUE: // Make sure we have an index to use // TODO: In the future I would like bring back my Constraint // object so that we can keep track of everything that a // table has... if (catalog_constraint->index() == NULL) { VOLT_ERROR("The '%s' constraint '%s' on table '%s' does" " not specify an index", constraintutil::getTypeName(type).c_str(), catalog_constraint->name().c_str(), catalogTable.name().c_str()); delete [] columnNames; return false; } break; // Unsupported case CONSTRAINT_TYPE_CHECK: case CONSTRAINT_TYPE_FOREIGN_KEY: case CONSTRAINT_TYPE_MAIN: VOLT_WARN("Unsupported type '%s' for constraint '%s'", constraintutil::getTypeName(type).c_str(), catalog_constraint->name().c_str()); break; // Unknown default: VOLT_ERROR("Invalid constraint type '%s' for '%s'", constraintutil::getTypeName(type).c_str(), catalog_constraint->name().c_str()); delete [] columnNames; return false; } } // Build the index array vector<TableIndexScheme> indexes; TableIndexScheme pkey_index; map<string, TableIndexScheme>::const_iterator index_iterator; for (index_iterator = index_map.begin(); index_iterator != index_map.end(); index_iterator++) { // Exclude the primary key if (index_iterator->first.compare(pkey_index_id) == 0) { pkey_index = index_iterator->second; // Just add it to the list } else { indexes.push_back(index_iterator->second); } } // partition column: const catalog::Column* partitionColumn = catalogTable.partitioncolumn(); int partitionColumnIndex = -1; if (partitionColumn != NULL) { partitionColumnIndex = partitionColumn->index(); } if (pkey_index_id.size() == 0) { int32_t databaseId = catalogDatabase.relativeIndex(); m_table = TableFactory::getPersistentTable(databaseId, executorContext, catalogTable.name(), schema, columnNames, indexes, partitionColumnIndex, isExportEnabledForTable(catalogDatabase, table_id), isTableExportOnly(catalogDatabase, table_id)); } else { int32_t databaseId = catalogDatabase.relativeIndex(); m_table = TableFactory::getPersistentTable(databaseId, executorContext, catalogTable.name(), schema, columnNames, pkey_index, indexes, partitionColumnIndex, isExportEnabledForTable(catalogDatabase, table_id), isTableExportOnly(catalogDatabase, table_id)); } delete[] columnNames; m_exportEnabled = isExportEnabledForTable(catalogDatabase, table_id); m_table->incrementRefcount(); return 0; }
BOOST_FOREACH (void* entry, m_allocations) { VOLT_ERROR("Missing deallocation for %p at:", entry); }
bool UnionExecutor::p_init(AbstractPlanNode* abstract_node, const ExecutorVector& executorVector) { VOLT_TRACE("init Union Executor"); assert(! executorVector.isLargeQuery()); UnionPlanNode* node = dynamic_cast<UnionPlanNode*>(abstract_node); assert(node); // // First check to make sure they have the same number of columns // assert(node->getInputTableCount() > 0); Table* input_table_0 = node->getInputTable(0); const TupleSchema *table_0_schema = input_table_0->schema(); for (int table_ctr = 1, table_cnt = (int)node->getInputTableCount(); table_ctr < table_cnt; ++table_ctr) { Table* input_table_n = node->getInputTable(table_ctr); if (input_table_0->columnCount() != input_table_n->columnCount()) { VOLT_ERROR("Table '%s' has %d columns, but table '%s' has %d" " columns", input_table_0->name().c_str(), input_table_0->columnCount(), input_table_n->name().c_str(), input_table_n->columnCount()); return false; } // // Then check that they have the same types // // iterate over all columns in the first table for (int col_ctr = 0, col_cnt = table_0_schema->columnCount(); col_ctr < col_cnt; col_ctr++) { // get the type for the current column ValueType type_0 = table_0_schema->columnType(col_ctr); const TupleSchema *table_n_schema = input_table_n->schema(); ValueType type_n = table_n_schema->columnType(col_ctr); if (type_0 != type_n) { VOLT_ERROR("Table '%s' has value type '%s' for column '%d'," " table '%s' has value type '%s' for column '%d'", input_table_0->name().c_str(), getTypeName(type_0).c_str(), col_ctr, input_table_n->name().c_str(), getTypeName(type_n).c_str(), col_ctr); return false; } } } // // Create our output table that will hold all the tuples that we are appending into. // Since we're are assuming that all of the tables have the same number of columns with // the same format. Therefore, we will just grab the first table in the list // node->setOutputTable(TableFactory::buildCopiedTempTable(node->getInputTable(0)->name(), node->getInputTable(0), executorVector)); m_setOperator.reset(detail::SetOperator::getSetOperator(node)); return true; }
void NVMAntiCacheDB::initializeDB() { char nvm_file_name[150]; char partition_str[50]; m_blockIndex = 0; m_nextFreeBlock = 0; m_monoBlockID = 0; // TODO: Make DRAM based store a separate type #ifdef ANTICACHE_DRAM VOLT_INFO("Allocating anti-cache in DRAM."); m_NVMBlocks = new char[m_maxDBSize]; return; #endif int partition_id; // use executor context to figure out which partition we are at // if there is no executor context, assume this is a test and let it go if (!m_executorContext) { VOLT_WARN("NVMAntiCacheDB has no executor context. If this is an EE test, don't worry\n"); partition_id = 0; } else { partition_id = (int)m_executorContext->getPartitionId(); } sprintf(partition_str, "%d", partition_id); strcpy(nvm_file_name, m_dbDir.c_str()); // there will be one NVM anti-cache file per partition, saved in /mnt/pmfs/anticache-XX strcat(nvm_file_name, "/anticache-"); strcat(nvm_file_name, partition_str); VOLT_INFO("Creating size %ld nvm file: %s", m_maxDBSize, nvm_file_name); nvm_file = fopen(nvm_file_name, "w"); if(nvm_file == NULL) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to open PMFS file %s: %s.", nvm_file_name, strerror(errno)); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } fclose(nvm_file); nvm_file = fopen(nvm_file_name, "rw+"); if(nvm_file == NULL) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to open PMFS file %s: %s.", nvm_file_name, strerror(errno)); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } nvm_fd = fileno(nvm_file); if(nvm_fd < 0) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to allocate anti-cache PMFS file in directory %s.", m_dbDir.c_str()); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } if(ftruncate(nvm_fd, m_maxDBSize) < 0) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to ftruncate anti-cache PMFS file %s: %s", nvm_file_name, strerror(errno)); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } //off_t aligned_file_size = (((NVM_FILE_SIZE) + MMAP_PAGE_SIZE - 1) / MMAP_PAGE_SIZE * MMAP_PAGE_SIZE); off_t aligned_file_size = (off_t)m_maxDBSize; m_NVMBlocks = (char*)mmap(NULL, aligned_file_size, PROT_READ | PROT_WRITE, MAP_SHARED, nvm_fd, 0); if(m_NVMBlocks == MAP_FAILED) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to mmap PMFS file %s: %s", nvm_file_name, strerror(errno)); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } close(nvm_fd); // can safely close file now, mmap creates new reference // write out NULL characters to ensure entire file has been fetchted from memory for(int i = 0; i < m_maxDBSize; i++) { m_NVMBlocks[i] = '\0'; } }
bool AbstractExecutor::init(VoltDBEngine *engine, const catalog::Database* catalog_db, int* tempTableMemoryInBytes) { assert (abstract_node); // // Grab the input tables directly from this node's children // std::vector<Table*> input_tables; for (int ctr = 0, cnt = (int)abstract_node->getChildren().size(); ctr < cnt; ctr++) { Table* table = abstract_node->getChildren()[ctr]->getOutputTable(); if (table == NULL) { VOLT_ERROR("Output table from PlanNode '%s' is NULL", abstract_node->getChildren()[ctr]->debug().c_str()); return false; } input_tables.push_back(table); } abstract_node->setInputTables(input_tables); // Some tables have target tables (scans + operations) that are // based on tables under the control of the local storage manager // (as opposed to an intermediate result table). We'll grab them // from the HStoreEgine This is kind of a hack job here... is // there a better way? AbstractScanPlanNode *scan_node = dynamic_cast<AbstractScanPlanNode*>(abstract_node); AbstractOperationPlanNode *oper_node = dynamic_cast<AbstractOperationPlanNode*>(abstract_node); bool requires_target_table = false; if (scan_node || oper_node) { requires_target_table = true; Table* target_table = NULL; std::string targetTableName; if (scan_node) { targetTableName = scan_node->getTargetTableName(); target_table = scan_node->getTargetTable(); } else if (oper_node) { targetTableName = oper_node->getTargetTableName(); target_table = oper_node->getTargetTable(); } // If the target_table is NULL, then we need to ask the engine // for a reference to what we need if (target_table == NULL) { target_table = engine->getTable(targetTableName); if (target_table == NULL) { VOLT_ERROR("Failed to retrieve target table '%s' " "from execution engine for PlanNode '%s'", targetTableName.c_str(), abstract_node->debug().c_str()); return false; } if (scan_node) { scan_node->setTargetTable(target_table); } else if (oper_node) { oper_node->setTargetTable(target_table); } } } this->needs_outputtable_clear_cached = needsOutputTableClear(); // Call the p_init() method on our derived class try { if (!this->p_init(abstract_node, catalog_db, tempTableMemoryInBytes)) return false; } catch (std::exception& err) { char message[128]; sprintf(message, "The Executor failed to initialize PlanNode '%s'", abstract_node->debug().c_str()); throw SerializableEEException(VOLT_EE_EXCEPTION_TYPE_EEEXCEPTION, message); } Table *tmp_output_table_base = abstract_node->getOutputTable(); this->tmp_output_table = dynamic_cast<TempTable*>(tmp_output_table_base); // determines whether the output table should be cleared or not. // specific executor might not need (and must not do) clearing. if (!this->needs_outputtable_clear_cached) { VOLT_TRACE("Did not clear output table because the derived class" " answered so"); this->tmp_output_table = NULL; } return true; }
void AntiCacheDB::initializeNVM() { char nvm_file_name[150]; char partition_str[50]; m_totalBlocks = 0; #ifdef ANTICACHE_DRAM VOLT_INFO("Allocating anti-cache in DRAM."); m_NVMBlocks = new char[aligned_file_size]; return; #endif // use executor context to figure out which partition we are at int partition_id = (int)m_executorContext->getPartitionId(); sprintf(partition_str, "%d", partition_id); strcpy(nvm_file_name, m_dbDir.c_str()); // there will be one NVM anti-cache file per partition, saved in /mnt/pmfs/anticache-XX strcat(nvm_file_name, "/anticache-"); strcat(nvm_file_name, partition_str); VOLT_INFO("Creating nvm file: %s", nvm_file_name); nvm_file = fopen(nvm_file_name, "w"); if(nvm_file == NULL) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to open PMFS file %s: %s.", nvm_file_name, strerror(errno)); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } fclose(nvm_file); nvm_file = fopen(nvm_file_name, "rw+"); if(nvm_file == NULL) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to open PMFS file %s: %s.", nvm_file_name, strerror(errno)); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } nvm_fd = fileno(nvm_file); if(nvm_fd < 0) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to allocate anti-cache PMFS file in directory %s.", m_dbDir.c_str()); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } if(ftruncate(nvm_fd, NVM_FILE_SIZE) < 0) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to ftruncate anti-cache PMFS file %s: %s", nvm_file_name, strerror(errno)); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } //off_t aligned_file_size = (((NVM_FILE_SIZE) + MMAP_PAGE_SIZE - 1) / MMAP_PAGE_SIZE * MMAP_PAGE_SIZE); off_t aligned_file_size = NVM_FILE_SIZE; m_NVMBlocks = (char*)mmap(NULL, aligned_file_size, PROT_READ | PROT_WRITE, MAP_SHARED, nvm_fd, 0); if(m_NVMBlocks == MAP_FAILED) { VOLT_ERROR("Anti-Cache initialization error."); VOLT_ERROR("Failed to mmap PMFS file %s: %s", nvm_file_name, strerror(errno)); throwFatalException("Failed to initialize anti-cache PMFS file in directory %s.", m_dbDir.c_str()); } close(nvm_fd); // can safely close file now, mmap creates new reference /* // write out NULL characters to ensure entire file has been fetchted from memory for(int i = 0; i < NVM_FILE_SIZE; i++) { m_NVMBlocks[i] = '\0'; } */ }
bool DeleteExecutor::p_execute(const NValueArray ¶ms, ReadWriteTracker *tracker) { assert(m_targetTable); if (m_truncate) { VOLT_TRACE("truncating table %s...", m_targetTable->name().c_str()); // count the truncated tuples as deleted m_engine->m_tuplesModified += m_inputTable->activeTupleCount(); #ifdef ARIES if(m_engine->isARIESEnabled()){ // no need of persistency check, m_targetTable is // always persistent for deletes LogRecord *logrecord = new LogRecord(computeTimeStamp(), LogRecord::T_TRUNCATE,// this is a truncate record LogRecord::T_FORWARD,// the system is running normally -1,// XXX: prevLSN must be fetched from table! m_engine->getExecutorContext()->currentTxnId() ,// txn id m_engine->getSiteId(),// which execution site m_targetTable->name(),// the table affected NULL,// primary key irrelevant -1,// irrelevant numCols NULL,// list of modified cols irrelevant NULL,// before image irrelevant NULL// after image irrelevant ); size_t logrecordLength = logrecord->getEstimatedLength(); char *logrecordBuffer = new char[logrecordLength]; FallbackSerializeOutput output; output.initializeWithPosition(logrecordBuffer, logrecordLength, 0); logrecord->serializeTo(output); LogManager* m_logManager = this->m_engine->getLogManager(); Logger m_ariesLogger = m_logManager->getAriesLogger(); //VOLT_WARN("m_logManager : %p AriesLogger : %p",&m_logManager, &m_ariesLogger); const Logger *logger = m_logManager->getThreadLogger(LOGGERID_MM_ARIES); logger->log(LOGLEVEL_INFO, output.data(), output.position()); delete[] logrecordBuffer; logrecordBuffer = NULL; delete logrecord; logrecord = NULL; } #endif //m_engine->context().incrementTuples(m_targetTable->activeTupleCount()); // actually delete all the tuples m_targetTable->deleteAllTuples(true); return true; } // XXX : ARIES : Not sure if else is needed ? assert(m_inputTable); assert(m_inputTuple.sizeInValues() == m_inputTable->columnCount()); assert(m_targetTuple.sizeInValues() == m_targetTable->columnCount()); TableIterator inputIterator(m_inputTable); while (inputIterator.next(m_inputTuple)) { // // OPTIMIZATION: Single-Sited Query Plans // If our beloved DeletePlanNode is apart of a single-site query plan, // then the first column in the input table will be the address of a // tuple on the target table that we will want to blow away. This saves // us the trouble of having to do an index lookup // void *targetAddress = m_inputTuple.getNValue(0).castAsAddress(); m_targetTuple.move(targetAddress); // Read/Write Set Tracking if (tracker != NULL) { tracker->markTupleWritten(m_targetTable, &m_targetTuple); } #ifdef ARIES if(m_engine->isARIESEnabled()){ // no need of persistency check, m_targetTable is // always persistent for deletes // before image -- target is tuple to be deleted. TableTuple *beforeImage = &m_targetTuple; TableTuple *keyTuple = NULL; char *keydata = NULL; // See if we use an index instead TableIndex *index = m_targetTable->primaryKeyIndex(); if (index != NULL) { // First construct tuple for primary key keydata = new char[index->getKeySchema()->tupleLength()]; keyTuple = new TableTuple(keydata, index->getKeySchema()); for (int i = 0; i < index->getKeySchema()->columnCount(); i++) { keyTuple->setNValue(i, beforeImage->getNValue(index->getColumnIndices()[i])); } // no before image need be recorded, just the primary key beforeImage = NULL; } LogRecord *logrecord = new LogRecord(computeTimeStamp(), LogRecord::T_DELETE,// this is a delete record LogRecord::T_FORWARD,// the system is running normally -1,// XXX: prevLSN must be fetched from table! m_engine->getExecutorContext()->currentTxnId() ,// txn id m_engine->getSiteId(),// which execution site m_targetTable->name(),// the table affected keyTuple,// primary key -1,// must delete all columns NULL,// no list of modified cols beforeImage, NULL// no after image ); size_t logrecordLength = logrecord->getEstimatedLength(); char *logrecordBuffer = new char[logrecordLength]; FallbackSerializeOutput output; output.initializeWithPosition(logrecordBuffer, logrecordLength, 0); logrecord->serializeTo(output); LogManager* m_logManager = this->m_engine->getLogManager(); Logger m_ariesLogger = m_logManager->getAriesLogger(); //VOLT_WARN("m_logManager : %p AriesLogger : %p",&m_logManager, &m_ariesLogger); const Logger *logger = m_logManager->getThreadLogger(LOGGERID_MM_ARIES); logger->log(LOGLEVEL_INFO, output.data(), output.position()); delete[] logrecordBuffer; logrecordBuffer = NULL; delete logrecord; logrecord = NULL; if (keydata != NULL) { delete[] keydata; keydata = NULL; } if (keyTuple != NULL) { delete keyTuple; keyTuple = NULL; } } #endif // Delete from target table if (!m_targetTable->deleteTuple(m_targetTuple, true)) { VOLT_ERROR("Failed to delete tuple from table '%s'", m_targetTable->name().c_str()); return false; } } // add to the planfragments count of modified tuples m_engine->m_tuplesModified += m_inputTable->activeTupleCount(); //m_engine->context().incrementTuples(m_inputTable->activeTupleCount()); return true; }
Table* TableCatalogDelegate::constructTableFromCatalog(catalog::Database const& catalogDatabase, catalog::Table const& catalogTable, bool isXDCR, int tableAllocationTargetSize, bool forceNoDR) { // Create a persistent table for this table in our catalog int32_t tableId = catalogTable.relativeIndex(); // get an array of table column names const int numColumns = static_cast<int>(catalogTable.columns().size()); std::map<std::string, catalog::Column*>::const_iterator colIterator; std::vector<std::string> columnNames(numColumns); for (colIterator = catalogTable.columns().begin(); colIterator != catalogTable.columns().end(); colIterator++) { auto catalogColumn = colIterator->second; columnNames[catalogColumn->index()] = catalogColumn->name(); } // get the schema for the table TupleSchema* schema = createTupleSchema(catalogTable, isXDCR); // Indexes std::map<std::string, TableIndexScheme> index_map; std::map<std::string, catalog::Index*>::const_iterator idxIterator; for (idxIterator = catalogTable.indexes().begin(); idxIterator != catalogTable.indexes().end(); idxIterator++) { auto catalogIndex = idxIterator->second; TableIndexScheme index_scheme; if (getIndexScheme(catalogTable, *catalogIndex, schema, &index_scheme)) { index_map[catalogIndex->name()] = index_scheme; } } // Constraints std::string pkeyIndexId; std::map<std::string, catalog::Constraint*>::const_iterator constraintIterator; for (constraintIterator = catalogTable.constraints().begin(); constraintIterator != catalogTable.constraints().end(); constraintIterator++) { auto catalogConstraint = constraintIterator->second; // Constraint Type ConstraintType type = (ConstraintType) catalogConstraint->type(); switch (type) { case CONSTRAINT_TYPE_PRIMARY_KEY: // Make sure we have an index to use assert(catalogConstraint->index()); // Make sure they didn't declare more than one primary key index assert(pkeyIndexId.empty()); pkeyIndexId = catalogConstraint->index()->name(); break; case CONSTRAINT_TYPE_UNIQUE: // Make sure we have an index to use // TODO: In the future I would like bring back my Constraint // object so that we can keep track of everything that a // table has... assert(catalogConstraint->index()); break; // Unsupported case CONSTRAINT_TYPE_CHECK: case CONSTRAINT_TYPE_FOREIGN_KEY: case CONSTRAINT_TYPE_MAIN: VOLT_WARN("Unsupported type '%s' for constraint '%s'", constraintutil::getTypeName(type).c_str(), catalogConstraint->name().c_str()); break; // Unknown default: VOLT_ERROR("Invalid constraint type '%s' for '%s'", constraintutil::getTypeName(type).c_str(), catalogConstraint->name().c_str()); assert(false); return NULL; } } // Build the index array // Please note the index array should follow the order of primary key first, // all unique indices afterwards, and all the non-unique indices at the end. std::deque<TableIndexScheme> indexes; TableIndexScheme pkeyIndex_scheme; std::map<std::string, TableIndexScheme>::const_iterator indexIterator; for (indexIterator = index_map.begin(); indexIterator != index_map.end(); indexIterator++) { // Exclude the primary key if (indexIterator->second.name.compare(pkeyIndexId) == 0) { pkeyIndex_scheme = indexIterator->second; // Just add it to the list } else { if (indexIterator->second.unique) { indexes.push_front(indexIterator->second); } else { indexes.push_back(indexIterator->second); } } } // partition column: catalog::Column const* partitionColumn = catalogTable.partitioncolumn(); int partitionColumnIndex = -1; if (partitionColumn != NULL) { partitionColumnIndex = partitionColumn->index(); } bool exportEnabled = isExportEnabledForTable(catalogDatabase, tableId); bool tableIsExportOnly = isTableExportOnly(catalogDatabase, tableId); bool drEnabled = !forceNoDR && catalogTable.isDRed(); bool isReplicated = catalogTable.isreplicated(); m_materialized = isTableMaterialized(catalogTable); std::string const& tableName = catalogTable.name(); int32_t databaseId = catalogDatabase.relativeIndex(); SHA1_CTX shaCTX; SHA1Init(&shaCTX); SHA1Update(&shaCTX, reinterpret_cast<const uint8_t*>(catalogTable.signature().c_str()), (uint32_t )::strlen(catalogTable.signature().c_str())); SHA1Final(reinterpret_cast<unsigned char*>(m_signatureHash), &shaCTX); // Persistent table will use default size (2MB) if tableAllocationTargetSize is zero. if (m_materialized) { catalog::MaterializedViewInfo* mvInfo = catalogTable.materializer()->views().get(catalogTable.name()); if (mvInfo && mvInfo->groupbycols().size() == 0) { // ENG-8490: If the materialized view came with no group by, set table block size to 64KB // to achieve better space efficiency. // FYI: maximum column count = 1024, largest fixed length data type is short varchars (64 bytes) tableAllocationTargetSize = 1024 * 64; } } VOLT_DEBUG("Creating %s %s as %s", m_materialized?"VIEW":"TABLE", tableName.c_str(), isReplicated?"REPLICATED":"PARTITIONED"); Table* table = TableFactory::getPersistentTable(databaseId, tableName, schema, columnNames, m_signatureHash, m_materialized, partitionColumnIndex, exportEnabled, tableIsExportOnly, tableAllocationTargetSize, catalogTable.tuplelimit(), m_compactionThreshold, drEnabled, isReplicated); PersistentTable* persistentTable = dynamic_cast<PersistentTable*>(table); if ( ! persistentTable) { assert(pkeyIndexId.empty()); assert(indexes.empty()); return table; } // add a pkey index if one exists if ( ! pkeyIndexId.empty()) { TableIndex* pkeyIndex = TableIndexFactory::getInstance(pkeyIndex_scheme); assert(pkeyIndex); persistentTable->addIndex(pkeyIndex); persistentTable->setPrimaryKeyIndex(pkeyIndex); } // add other indexes BOOST_FOREACH(TableIndexScheme& scheme, indexes) { TableIndex* index = TableIndexFactory::getInstance(scheme); assert(index); persistentTable->addIndex(index); }