예제 #1
0
bool CopyOnWriteContext::notifyTupleDelete(TableTuple &tuple) {
    assert(m_iterator != NULL);

    if (tuple.isDirty() || m_finishedTableScan) {
        return true;
    }

    /**
     * Find out which block the address is contained in. Lower bound returns the first entry
     * in the index >= the address. Unless the address happens to be equal then the block
     * we are looking for is probably the previous entry. Then check if the address fits
     * in the previous entry. If it doesn't then the block is something new.
     */
    TBPtr block = PersistentTable::findBlock(tuple.address(), m_blocks, getTable().getTableAllocationSize());
    if (block.get() == NULL) {
        // tuple not in snapshot region, don't care about this tuple
        return true;
    }

    /**
     * Now check where this is relative to the COWIterator.
     */
    CopyOnWriteIterator *iter = reinterpret_cast<CopyOnWriteIterator*>(m_iterator.get());
    return !iter->needToDirtyTuple(block->address(), tuple.address());
}
예제 #2
0
Table* TableFactory::getPersistentTable(
            voltdb::CatalogId databaseId,
            const std::string &name,
            TupleSchema* schema,
            const std::vector<std::string> &columnNames,
            char *signature,
            bool tableIsMaterialized,
            int partitionColumn,
            bool exportEnabled,
            bool exportOnly,
            int tableAllocationTargetSize,
            int tupleLimit,
            int32_t compactionThreshold,
            bool drEnabled)
{
    Table *table = NULL;
    StreamedTable *streamedTable = NULL;
    PersistentTable *persistentTable = NULL;

    if (exportOnly) {
        table = streamedTable = new StreamedTable(partitionColumn);
    }
    else {
        table = persistentTable = new PersistentTable(partitionColumn,
                                                      signature,
                                                      tableIsMaterialized,
                                                      tableAllocationTargetSize,
                                                      tupleLimit,
                                                      drEnabled);
    }

    initCommon(databaseId,
               table,
               name,
               schema,
               columnNames,
               true,  // table will take ownership of TupleSchema object
               compactionThreshold);

    TableStats *stats;
    if (exportOnly) {
        stats = streamedTable->getTableStats();
    }
    else {
        stats = persistentTable->getTableStats();
        // Allocate and assign the tuple storage block to the persistent table ahead of time instead
        // of doing so at time of first tuple insertion. The intent of block allocation ahead of time
        // is to avoid allocation cost at time of tuple insertion
        TBPtr block = persistentTable->allocateNextBlock();
        assert(block->hasFreeTuples());
        persistentTable->m_blocksWithSpace.insert(block);
    }

    // initialize stats for the table
    configureStats(name, stats);

    return table;
}
예제 #3
0
void CopyOnWriteContext::markTupleDirty(TableTuple tuple, bool newTuple) {
    assert(m_iterator != NULL);

    if (newTuple) {
        m_inserts++;
    }
    else {
        m_updates++;
    }

    /**
     * If this an update or a delete of a tuple that is already dirty then no further action is
     * required.
     */
    if (!newTuple && tuple.isDirty()) {
        return;
    }

    /**
     * If the table has been scanned already there is no need to continue marking tuples dirty
     * If the tuple is dirty then it has already been backed up.
     */
    if (m_finishedTableScan) {
        tuple.setDirtyFalse();
        return;
    }

    /**
     * Find out which block the address is contained in.
     */
    TBPtr block = PersistentTable::findBlock(tuple.address(), m_blocks, getTable().getTableAllocationSize());
    if (block.get() == NULL) {
        // tuple not in snapshot region, don't care about this tuple, no need to dirty it
        tuple.setDirtyFalse();
        return;
    }

    /**
     * Now check where this is relative to the COWIterator.
     */
    CopyOnWriteIterator *iter = reinterpret_cast<CopyOnWriteIterator*>(m_iterator.get());
    if (iter->needToDirtyTuple(block->address(), tuple.address())) {
        tuple.setDirtyTrue();
        /**
         * Don't back up a newly introduced tuple, just mark it as dirty.
         */
        if (!newTuple) {
            m_backedUpTuples->insertTupleNonVirtualWithDeepCopy(tuple, &m_pool);
        }
    } else {
        tuple.setDirtyFalse();
        return;
    }
}
예제 #4
0
Table* TableFactory::getPersistentTable(
            voltdb::CatalogId databaseId,
            const std::string &name,
            TupleSchema* schema,
            const std::vector<std::string> &columnNames,
            char *signature,
            bool tableIsMaterialized,
            int partitionColumn,
            bool exportEnabled,
            bool exportOnly,
            int tableAllocationTargetSize,
            int tupleLimit,
            int32_t compactionThreshold,
            bool drEnabled)
{
    Table *table = NULL;

    if (exportOnly) {
        table = new StreamedTable(exportEnabled);
    }
    else {
        table = new PersistentTable(partitionColumn, signature, tableIsMaterialized, tableAllocationTargetSize, tupleLimit, drEnabled);
    }

    initCommon(databaseId,
               table,
               name,
               schema,
               columnNames,
               true,  // table will take ownership of TupleSchema object
               compactionThreshold);

    // initialize stats for the table
    configureStats(databaseId, name, table);

    if(!exportOnly) {
        // allocate tuple storage block for the persistent table ahead of time
        // instead of waiting till first tuple insertion. Intend of allocating tuple
        // block storage ahead is to improve performance on first tuple insertion.
        PersistentTable *persistentTable = static_cast<PersistentTable*>(table);
        TBPtr block = persistentTable->allocateNextBlock();
        assert(block->hasFreeTuples());
        persistentTable->m_blocksWithSpace.insert(block);
    }
    return table;
}
예제 #5
0
void CopyOnWriteContext::notifyBlockWasCompactedAway(TBPtr block) {
    assert(!m_finishedTableScan);
    m_blocksCompacted++;
    CopyOnWriteIterator *iter = static_cast<CopyOnWriteIterator*>(m_iterator.get());
    if (iter->m_blockIterator != m_blocks.end()) {
        TBPtr nextBlock = iter->m_blockIterator.data();
        //The next block is the one that was compacted away
        //Need to move the iterator forward to skip it
        if (nextBlock == block) {
            iter->m_blockIterator++;

            //There is another block after the one that was compacted away
            if (iter->m_blockIterator != m_blocks.end()) {
                TBPtr newNextBlock = iter->m_blockIterator.data();
                m_blocks.erase(block->address());
                iter->m_blockIterator = m_blocks.find(newNextBlock->address());
                iter->m_end = m_blocks.end();
                assert(iter->m_blockIterator != m_blocks.end());
            } else {
                //No block after the one compacted away
                //set everything to end
                m_blocks.erase(block->address());
                iter->m_blockIterator = m_blocks.end();
                iter->m_end = m_blocks.end();
            }
        } else {
            //Some random block was compacted away. Remove it and regenerate the iterator
            m_blocks.erase(block->address());
            iter->m_blockIterator = m_blocks.find(nextBlock->address());
            iter->m_end = m_blocks.end();
            assert(iter->m_blockIterator != m_blocks.end());
        }
    }
}
예제 #6
0
/**
 * Block compaction hook.
 */
void ElasticScanner::notifyBlockWasCompactedAway(TBPtr block) {
    if (!m_scanComplete && m_blockIterator != m_blockEnd) {
        TBPtr nextBlock = m_blockIterator.data();
        if (nextBlock == block) {
            // The next block was compacted away.
            m_blockIterator++;
            if (m_blockIterator != m_blockEnd) {
                // There is a block to skip to.
                TBPtr newNextBlock = m_blockIterator.data();
                m_blockMap.erase(block->address());
                m_blockIterator = m_blockMap.find(newNextBlock->address());
                m_blockEnd = m_blockMap.end();
                assert(m_blockIterator != m_blockMap.end());
            }
            else {
                // There isn't a block to skip to, so we're done.
                m_blockMap.erase(block->address());
                m_blockIterator = m_blockMap.end();
                m_blockEnd = m_blockMap.end();
            }
        } else {
            // Some random block was compacted away.
            // Remove it and regenerate the iterator.
            m_blockMap.erase(block->address());
            m_blockIterator = m_blockMap.find(nextBlock->address());
            m_blockEnd = m_blockMap.end();
            assert(m_blockIterator != m_blockMap.end());
        }
    }
}
예제 #7
0
/**
 * When a tuple is "dirty" it is still active, but will never be a "found" tuple
 * since it is skipped. The tuple may be dirty because it was deleted (this is why it is always skipped). In that
 * case the CopyOnWriteContext calls this to ensure that the iteration finds the correct number of tuples
 * in the used portion of the table blocks and doesn't overrun to the uninitialized block memory because
 * it skiped a dirty tuple and didn't end up with the right found tuple count upon reaching the end.
 */
bool CopyOnWriteIterator::needToDirtyTuple(char *tupleAddress) {
    if (m_tableEmpty) {
        // snapshot was activated when the table was empty.
        // Tuple is not in  snapshot region, don't care about this tuple
        assert(m_currentBlock == NULL);
        return false;
    }
    /**
     * Find out which block the address is contained in. Lower bound returns the first entry
     * in the index >= the address. Unless the address happens to be equal then the block
     * we are looking for is probably the previous entry. Then check if the address fits
     * in the previous entry. If it doesn't then the block is something new.
     */
    TBPtr block = PersistentTable::findBlock(tupleAddress, m_blocks, m_table->getTableAllocationSize());
    if (block.get() == NULL) {
        // tuple not in snapshot region, don't care about this tuple
        return false;
    }

    assert(m_currentBlock != NULL);

    /**
     * Now check where this is relative to the COWIterator.
     */
    const char *blockAddress = block->address();
    if (blockAddress > m_currentBlock->address()) {
        return true;
    }

    assert(blockAddress == m_currentBlock->address());
    if (tupleAddress >= m_location) {
        return true;
    } else {
        return false;
    }
}
예제 #8
0
// ------------------------------------------------------------------
// OPERATIONS
// ------------------------------------------------------------------
void PersistentTable::nextFreeTuple(TableTuple *tuple) {
    // First check whether we have any in our list
    // In the memcheck it uses the heap instead of a free list to help Valgrind.
    if (!m_blocksWithSpace.empty()) {
        VOLT_TRACE("GRABBED FREE TUPLE!\n");
        stx::btree_set<TBPtr >::iterator begin = m_blocksWithSpace.begin();
        TBPtr block = (*begin);
        std::pair<char*, int> retval = block->nextFreeTuple();

        /**
         * Check to see if the block needs to move to a new bucket
         */
        if (retval.second != -1) {
            //Check if if the block is currently pending snapshot
            if (m_blocksNotPendingSnapshot.find(block) != m_blocksNotPendingSnapshot.end()) {
                block->swapToBucket(m_blocksNotPendingSnapshotLoad[retval.second]);
            //Check if the block goes into the pending snapshot set of buckets
            } else if (m_blocksPendingSnapshot.find(block) != m_blocksPendingSnapshot.end()) {
                block->swapToBucket(m_blocksPendingSnapshotLoad[retval.second]);
            } else {
                //In this case the block is actively being snapshotted and isn't eligible for merge operations at all
                //do nothing, once the block is finished by the iterator, the iterator will return it
            }
        }

        tuple->move(retval.first);
        if (!block->hasFreeTuples()) {
            m_blocksWithSpace.erase(block);
        }
        assert (m_columnCount == tuple->sizeInValues());
        return;
    }

    // if there are no tuples free, we need to grab another chunk of memory
    // Allocate a new set of tuples
    TBPtr block = allocateNextBlock();

    // get free tuple
    assert (m_columnCount == tuple->sizeInValues());

    std::pair<char*, int> retval = block->nextFreeTuple();

    /**
     * Check to see if the block needs to move to a new bucket
     */
    if (retval.second != -1) {
        //Check if the block goes into the pending snapshot set of buckets
        if (m_blocksPendingSnapshot.find(block) != m_blocksPendingSnapshot.end()) {
            //std::cout << "Swapping block to nonsnapshot bucket " << static_cast<void*>(block.get()) << " to bucket " << retval.second << std::endl;
            block->swapToBucket(m_blocksPendingSnapshotLoad[retval.second]);
        //Now check if it goes in with the others
        } else if (m_blocksNotPendingSnapshot.find(block) != m_blocksNotPendingSnapshot.end()) {
            //std::cout << "Swapping block to snapshot bucket " << static_cast<void*>(block.get()) << " to bucket " << retval.second << std::endl;
            block->swapToBucket(m_blocksNotPendingSnapshotLoad[retval.second]);
        } else {
            //In this case the block is actively being snapshotted and isn't eligible for merge operations at all
            //do nothing, once the block is finished by the iterator, the iterator will return it
        }
    }

    tuple->move(retval.first);
    //cout << "table::nextFreeTuple(" << reinterpret_cast<const void *>(this) << ") m_usedTuples == " << m_usedTuples << endl;

    if (block->hasFreeTuples()) {
        m_blocksWithSpace.insert(block);
    }
}
예제 #9
0
std::pair<int, int> TupleBlock::merge(Table *table, TBPtr source) {
    assert(source != this);
    /*
      std::cout << "Attempting to merge " << static_cast<void*> (this)
                << "(" << m_activeTuples << ") with " << static_cast<void*>(source.get())
                << "(" << source->m_activeTuples << ")";
      std::cout << " source last compaction offset is " << source->lastCompactionOffset()
                << " and active tuple count is " << source->m_activeTuples << std::endl;
    */

    uint32_t m_nextTupleInSourceOffset = source->lastCompactionOffset();
    int sourceTuplesPendingDeleteOnUndoRelease = 0;
    while (hasFreeTuples() && !source->isEmpty()) {
        TableTuple sourceTuple(table->schema());
        TableTuple destinationTuple(table->schema());

        bool foundSourceTuple = false;
        //Iterate further into the block looking for active tuples
        //Stop when running into the unused tuple boundry
        while (m_nextTupleInSourceOffset < source->unusedTupleBoundry()) {
            sourceTuple.move(&source->address()[m_tupleLength * m_nextTupleInSourceOffset]);
            m_nextTupleInSourceOffset++;
            if (sourceTuple.isActive()) {
                foundSourceTuple = true;
                break;
            }
        }

        if (!foundSourceTuple) {
           //The block isn't empty, but there are no more active tuples.
           //Some of the tuples that make it register as not empty must have been
           //pending delete and those aren't mergable
            assert(sourceTuplesPendingDeleteOnUndoRelease);
            break;
        }

        //Can't move a tuple with a pending undo action, it would invalidate the pointer
        //Keep a count so that the block can be notified of the number
        //of tuples pending delete on undo release when calculating the correct bucket
        //index. If all the active tuples are pending delete on undo release the block
        //is effectively empty and shouldn't be considered for merge ops.
        //It will be completely discarded once the undo log releases the block.
        if (sourceTuple.isPendingDeleteOnUndoRelease()) {
            sourceTuplesPendingDeleteOnUndoRelease++;
            continue;
        }

        destinationTuple.move(nextFreeTuple().first);
        table->swapTuples( sourceTuple, destinationTuple);
        source->freeTuple(sourceTuple.address());
    }
    source->lastCompactionOffset(m_nextTupleInSourceOffset);

    int newBucketIndex = calculateBucketIndex();
    if (newBucketIndex != m_bucketIndex) {
        m_bucketIndex = newBucketIndex;
        //std::cout << "Merged " << static_cast<void*> (this) << "(" << m_activeTuples << ") with " << static_cast<void*>(source.get())  << "(" << source->m_activeTuples << ")";
        //std::cout << " found " << sourceTuplesPendingDeleteOnUndoRelease << " tuples pending delete on undo release "<< std::endl;
        return std::pair<int, int>(newBucketIndex, source->calculateBucketIndex(sourceTuplesPendingDeleteOnUndoRelease));
    } else {
        //std::cout << "Merged " << static_cast<void*> (this) << "(" << m_activeTuples << ") with " << static_cast<void*>(source.get()) << "(" << source->m_activeTuples << ")";
        //std::cout << " found " << sourceTuplesPendingDeleteOnUndoRelease << " tuples pending delete on undo release "<< std::endl;
        return std::pair<int, int>( -1, source->calculateBucketIndex(sourceTuplesPendingDeleteOnUndoRelease));
    }
}