bool TableStreamer::activateStream(PersistentTable &table, CatalogId tableId) { if (m_context == NULL) { // This is the only place that can create a streaming context based on // the stream type. Other places shouldn't need to know about the // context sub-types. try { switch (m_streamType) { case TABLE_STREAM_SNAPSHOT: { // Constructor can throw exception when it parses the predicates. CopyOnWriteContext *newContext = new CopyOnWriteContext(table, m_tupleSerializer, m_partitionId, m_predicateStrings, table.activeTupleCount()); m_context.reset(newContext); break; } case TABLE_STREAM_RECOVERY: m_context.reset(new RecoveryContext(table, tableId)); break; case TABLE_STREAM_ELASTIC: m_context.reset(new ElasticContext(table, m_predicateStrings)); break; default: assert(false); } } catch(SerializableEEException &e) { // m_context will be NULL if we get an exception. } } return (m_context != NULL); }
bool EvictionIterator::hasNext() { VOLT_TRACE("Size: %lu\n", (long unsigned int)m_size); PersistentTable* ptable = static_cast<PersistentTable*>(table); VOLT_TRACE("Count: %lu %lu\n", ptable->usedTupleCount(), ptable->activeTupleCount()); if(ptable->usedTupleCount() == 0) return false; #ifndef ANTICACHE_TIMESTAMPS if(current_tuple_id == ptable->getNewestTupleID()) return false; if(ptable->getNumTuplesInEvictionChain() == 0) { // there are no tuples in the chain VOLT_DEBUG("There are no tuples in the eviction chain."); return false; } #else if (current_tuple_id == m_size) return false; #endif return true; }
bool DeleteExecutor::p_execute(const NValueArray ¶ms) { // target table should be persistenttable // update target table reference from table delegate PersistentTable* targetTable = dynamic_cast<PersistentTable*>(m_node->getTargetTable()); assert(targetTable); TableTuple targetTuple(targetTable->schema()); int64_t modified_tuples = 0; if (m_truncate) { VOLT_TRACE("truncating table %s...", targetTable->name().c_str()); // count the truncated tuples as deleted modified_tuples = targetTable->visibleTupleCount(); VOLT_TRACE("Delete all rows from table : %s with %d active, %d visible, %d allocated", targetTable->name().c_str(), (int)targetTable->activeTupleCount(), (int)targetTable->visibleTupleCount(), (int)targetTable->allocatedTupleCount()); // empty the table either by table swap or iteratively deleting tuple-by-tuple targetTable->truncateTable(m_engine); } else { assert(m_inputTable); assert(m_inputTuple.sizeInValues() == m_inputTable->columnCount()); assert(targetTuple.sizeInValues() == targetTable->columnCount()); TableIterator inputIterator = m_inputTable->iterator(); while (inputIterator.next(m_inputTuple)) { // // OPTIMIZATION: Single-Sited Query Plans // If our beloved DeletePlanNode is apart of a single-site query plan, // then the first column in the input table will be the address of a // tuple on the target table that we will want to blow away. This saves // us the trouble of having to do an index lookup // void *targetAddress = m_inputTuple.getNValue(0).castAsAddress(); targetTuple.move(targetAddress); // Delete from target table if (!targetTable->deleteTuple(targetTuple, true)) { VOLT_ERROR("Failed to delete tuple from table '%s'", targetTable->name().c_str()); return false; } } modified_tuples = m_inputTable->tempTableTupleCount(); VOLT_TRACE("Deleted %d rows from table : %s with %d active, %d visible, %d allocated", (int)modified_tuples, targetTable->name().c_str(), (int)targetTable->activeTupleCount(), (int)targetTable->visibleTupleCount(), (int)targetTable->allocatedTupleCount()); } TableTuple& count_tuple = m_node->getOutputTable()->tempTuple(); count_tuple.setNValue(0, ValueFactory::getBigIntValue(modified_tuples)); // try to put the tuple into the output table if (!m_node->getOutputTable()->insertTuple(count_tuple)) { VOLT_ERROR("Failed to insert tuple count (%ld) into" " output table '%s'", static_cast<long int>(modified_tuples), m_node->getOutputTable()->name().c_str()); return false; } m_engine->addToTuplesModified(modified_tuples); return true; }
/** * Reserve some tuples when an eviction requested. */ void EvictionIterator::reserve(int64_t amount) { VOLT_DEBUG("amount: %ld\n", amount); char* addr = NULL; PersistentTable* ptable = static_cast<PersistentTable*>(table); int tuple_size = ptable->m_schema->tupleLength() + TUPLE_HEADER_SIZE; int active_tuple = (int)ptable->activeTupleCount(); int evict_num = 0; int64_t used_tuple = ptable->usedTupleCount(); #ifdef ANTICACHE_TIMESTAMPS_PRIME uint32_t tuples_per_block = ptable->m_tuplesPerBlock; #endif if (active_tuple) evict_num = (int)(amount / (tuple_size + ptable->nonInlinedMemorySize() / active_tuple)); else evict_num = (int)(amount / tuple_size); VOLT_DEBUG("Count: %lu %lu\n", ptable->usedTupleCount(), ptable->activeTupleCount()); if (evict_num > active_tuple) evict_num = active_tuple; int pick_num = evict_num * RANDOM_SCALE; int block_num = (int)ptable->m_data.size(); int block_size = ptable->m_tuplesPerBlock; int location_size; #ifndef ANTICACHE_TIMESTAMPS_PRIME int block_location; #endif srand((unsigned int)time(0)); VOLT_INFO("evict pick num: %d %d\n", evict_num, pick_num); VOLT_INFO("active_tuple: %d\n", active_tuple); VOLT_INFO("block number: %d\n", block_num); m_size = 0; current_tuple_id = 0; #ifdef ANTICACHE_TIMESTAMPS_PRIME int pick_num_block = (int)(((int64_t)pick_num * tuples_per_block) / used_tuple); int last_full_block = (int)(used_tuple / block_size); VOLT_INFO("LOG: %d %d %ld\n", last_full_block, tuples_per_block, used_tuple); int last_block_size = (int)(used_tuple % block_size); int pick_num_last_block = pick_num - pick_num_block * last_full_block; #endif // If we'll evict the entire table, we should do a scan instead of sampling. // The main reason we should do that is to past the test... if (evict_num < active_tuple) { candidates = new EvictionTuple[pick_num]; #ifdef ANTICACHE_TIMESTAMPS_PRIME for (int i = 0; i < last_full_block; ++i) { /** * if this is a beginning of a loop of scan, find a proper step to let it sample tuples from almost the whole block * TODO: Here we use a method that every time try a different prime number from what we use last time. Is it better? * That would need further analysis. */ if (ptable->m_stepPrime[i] < 0) { int ideal_step = (rand() % 5) * tuples_per_block / pick_num_block; int old_prime = - ptable->m_stepPrime[i]; for (int j = prime_size - 1; j >= 0; --j) { if (prime_list[j] != old_prime && (tuples_per_block % prime_list[j]) > 0) { ptable->m_stepPrime[i] = prime_list[j]; VOLT_TRACE("DEBUG: %d %d\n", tuples_per_block, ptable->m_stepPrime[i]); } if (prime_list[j] <= ideal_step) break; } VOLT_INFO("Prime of block %d: %d %d\n", i, tuples_per_block, ptable->m_stepPrime[i]); } // now scan the block with a step of we select. // if we go across the boundry, minus it back to the beginning (like a mod operation) int step_prime = ptable->m_stepPrime[i]; int step_offset = step_prime * tuple_size; int block_size_bytes = block_size * tuple_size; addr = ptable->m_data[i] + ptable->m_evictPosition[i]; uint64_t end_of_block = (uint64_t)ptable->m_data[i] + block_size_bytes; bool flag_new = false; for (int j = 0; j < pick_num_block; ++j) { VOLT_TRACE("Flip addr: %p %p %lu\n", addr, ptable->m_data[i], ((uint64_t)addr - (uint64_t)ptable->m_data[i]) / 1024); current_tuple->move(addr); if (current_tuple->isActive()) { candidates[m_size].setTuple(current_tuple->getTimeStamp(), addr); m_size++; } addr += step_offset; if ((uint64_t)addr >= end_of_block) addr -= block_size_bytes; if (addr == ptable->m_data[i]) flag_new = true; } int new_position = (int)((uint64_t)addr - (uint64_t)ptable->m_data[i]); ptable->m_evictPosition[i] = new_position; if (flag_new) ptable->m_stepPrime[i] = - ptable->m_stepPrime[i]; } if (last_full_block < block_num) { addr = ptable->m_data[last_full_block]; char* current_addr; for (int j = 0; j < pick_num_last_block; ++j) { current_addr = addr + (rand() % last_block_size) * tuple_size; current_tuple->move(current_addr); if (!current_tuple->isActive() || current_tuple->isEvicted()) continue; candidates[m_size].setTuple(current_tuple->getTimeStamp(), current_addr); m_size++; } } #else for (int i = 0; i < pick_num; i++) { // should we use a faster random generator? block_location = rand() % block_num; addr = ptable->m_data[block_location]; if ((block_location + 1) * block_size > used_tuple) location_size = (int)(used_tuple - block_location * block_size); else location_size = block_size; addr += (rand() % location_size) * tuple_size; current_tuple->move(addr); VOLT_DEBUG("Flip addr: %p\n", addr); if (!current_tuple->isActive() || current_tuple->isEvicted()) continue; candidates[m_size].setTuple(current_tuple->getTimeStamp(), addr); m_size++; } #endif } else { candidates = new EvictionTuple[active_tuple]; for (int i = 0; i < block_num; ++i) { addr = ptable->m_data[i]; if ((i + 1) * block_size > ptable->usedTupleCount()) location_size = (int)(ptable->usedTupleCount() - i * block_size); else location_size = block_size; for (int j = 0; j < location_size; j++) { current_tuple->move(addr); if (!current_tuple->isActive() || current_tuple->isEvicted()) { addr += tuple_size; continue; } VOLT_TRACE("Flip addr: %p\n", addr); candidates[m_size].setTuple(current_tuple->getTimeStamp(), addr); m_size++; addr += tuple_size; } } } sort(candidates, candidates + m_size, less <EvictionTuple>()); //VOLT_INFO("Size of eviction candidates: %lu %d %d\n", (long unsigned int)m_size, activeN, evictedN); }