예제 #1
0
void MaterializedArray::materialize(const shared_ptr<Query>& query,
                                    MemChunk& materializedChunk,
                                    ConstChunk const& chunk,
                                    MaterializeFormat format)
    {
        nMaterializedChunks += 1;
        materializedChunk.initialize(chunk);
        materializedChunk.setBitmapChunk((Chunk*)chunk.getBitmapChunk());
        boost::shared_ptr<ConstChunkIterator> src 
            = chunk.getConstIterator(ChunkIterator::IGNORE_DEFAULT_VALUES|ChunkIterator::IGNORE_EMPTY_CELLS|
                                     (chunk.isSolid() ? ChunkIterator::INTENDED_TILE_MODE : 0));
        boost::shared_ptr<ChunkIterator> dst 
            = materializedChunk.getIterator(query,
                                            (src->getMode() & ChunkIterator::TILE_MODE)|ChunkIterator::ChunkIterator::NO_EMPTY_CHECK|ChunkIterator::SEQUENTIAL_WRITE);
        size_t count = 0;
        while (!src->end()) {
            if (!dst->setPosition(src->getPosition()))
                throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_OPERATION_FAILED) << "setPosition";
            dst->writeItem(src->getItem());
            count += 1;
            ++(*src);
        }
        if (!(src->getMode() & ChunkIterator::TILE_MODE) &&
            !chunk.getArrayDesc().hasOverlap()) {
            materializedChunk.setCount(count);
        }
        dst->flush();
    }
예제 #2
0
파일: ShiftArray.cpp 프로젝트: cerbo/scidb
 void ShiftChunk::setInputChunk(ConstChunk const& inputChunk)
 {
     DelegateChunk::setInputChunk(inputChunk);
     isClone = true;
     array.in2out(inputChunk.getFirstPosition(false), firstPos);
     array.in2out(inputChunk.getLastPosition(false), lastPos);
 }
예제 #3
0
void AllVersionsChunk::setInputChunk(ConstChunk const& inputChunk, VersionID version)
{
    DelegateChunk::setInputChunk(inputChunk);
    isClone = true;
    currVersion = version;
    prependVersion(firstPos, inputChunk.getFirstPosition(false), version);
    prependVersion(lastPos, inputChunk.getLastPosition(false), version);
    prependVersion(firstPosWithOverlap, inputChunk.getFirstPosition(true), version);
    prependVersion(lastPosWithOverlap, inputChunk.getLastPosition(true), version);
}
예제 #4
0
    void Chunk::aggregateMerge(ConstChunk const& with, AggregatePtr const& aggregate, boost::shared_ptr<Query>& query)
    {
        if (getDiskChunk() != NULL)
            throw USER_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_CHUNK_ALREADY_EXISTS);

        if (isReadOnly())
            throw USER_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_CANT_UPDATE_READ_ONLY_CHUNK);

        AttributeDesc const& attr = getAttributeDesc();

        if (aggregate->getStateType().typeId() != attr.getType())
            throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_TYPE_MISMATCH_BETWEEN_AGGREGATE_AND_CHUNK);

        if (!attr.isNullable())
            throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_AGGREGATE_STATE_MUST_BE_NULLABLE);//enforce equivalency w above merge()

        setCount(0);
        char* dst = (char*)getData();
        if (dst != NULL)
        {
            int sparseMode = isSparse() ? ChunkIterator::SPARSE_CHUNK : 0;
            boost::shared_ptr<ChunkIterator>dstIterator = getIterator(query, sparseMode|ChunkIterator::APPEND_CHUNK|ChunkIterator::NO_EMPTY_CHECK);
            boost::shared_ptr<ConstChunkIterator> srcIterator = with.getConstIterator(ChunkIterator::IGNORE_NULL_VALUES);
            while (!srcIterator->end())
            {
                Value& val = srcIterator->getItem();
                if (!val.isNull())
                {
                    if (!dstIterator->setPosition(srcIterator->getPosition()))
                        throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_OPERATION_FAILED) << "setPosition";
                    Value& val2 = dstIterator->getItem();
                    if (!val2.isNull())
                    {
                        aggregate->merge(val, val2);
                    }
                    dstIterator->writeItem(val);
                }
                ++(*srcIterator);
            }
            dstIterator->flush();
        }
        else
        {
            PinBuffer scope(with);
            char* src = (char*)with.getData();
            allocate(with.getSize());
            setSparse(with.isSparse());
            setRLE(with.isRLE());
            memcpy(getData(), src, getSize());
            write(query);
        }
    }
예제 #5
0
 void Chunk::merge(ConstChunk const& with, boost::shared_ptr<Query>& query)
 {
     if (getDiskChunk() != NULL)
         throw USER_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_CHUNK_ALREADY_EXISTS);
     setCount(0); // unknown
     AttributeDesc const& attr = getAttributeDesc();
     char* dst = (char*)getData();
     Value const& defaultValue = attr.getDefaultValue();
     if (dst != NULL && (isSparse() || isRLE() || with.isSparse() || with.isRLE() || attr.isNullable() || TypeLibrary::getType(attr.getType()).variableSize()
                         || !defaultValue.isZero()))
     {
         int sparseMode = isSparse() ? ChunkIterator::SPARSE_CHUNK : 0;
         boost::shared_ptr<ChunkIterator> dstIterator = getIterator(query, sparseMode|ChunkIterator::APPEND_CHUNK|ChunkIterator::NO_EMPTY_CHECK);
         boost::shared_ptr<ConstChunkIterator> srcIterator = with.getConstIterator(ChunkIterator::IGNORE_EMPTY_CELLS|ChunkIterator::IGNORE_DEFAULT_VALUES);
         if (getArrayDesc().getEmptyBitmapAttribute() != NULL) { 
             while (!srcIterator->end()) {
                 if (!dstIterator->setPosition(srcIterator->getPosition()))
                     throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_OPERATION_FAILED) << "setPosition";
                 Value const& value = srcIterator->getItem();
                 dstIterator->writeItem(value);
                 ++(*srcIterator);
             }
         } else { // ignore default values
             while (!srcIterator->end()) {
                 Value const& value = srcIterator->getItem();
                 if (value != defaultValue) {
                     if (!dstIterator->setPosition(srcIterator->getPosition()))
                         throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_OPERATION_FAILED) << "setPosition";
                     dstIterator->writeItem(value);
                 }
                 ++(*srcIterator);
             }            
         }
         dstIterator->flush();
     } else {
         PinBuffer scope(with);
         char* src = (char*)with.getData();
         if (dst == NULL) {
             allocate(with.getSize());
             setSparse(with.isSparse());
             setRLE(with.isRLE());
             memcpy(getData(), src, getSize());
         } else {
             if (getSize() != with.getSize())
                 throw USER_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_CANT_MERGE_CHUNKS_WITH_VARYING_SIZE);
             for (size_t j = 0, n = getSize(); j < n; j++) {
                 dst[j] |= src[j];
             }
         }
         write(query);
     }
 }
    size_t BitmapEncoding::compress(void* dst, const ConstChunk& chunk, size_t size) 
    {

#ifdef FORMAT_SENSITIVE_COMPRESSORS
        if (chunk.isRLE()) { return size; }
        Bitmap bitmap;
        return bitmap.compress(dst, chunk, size);
#else
        return size;
#endif
    }
예제 #7
0
    void ConcatChunk::setInputChunk(ConstChunk const& inputChunk)
    {
        DelegateChunk::setInputChunk(inputChunk);
        ConcatArrayIterator const& arrayIterator((ConcatArrayIterator const&)iterator);
        Coordinate shift = arrayIterator.shift;
        isClone = inputChunk.getArrayDesc().getDimensions()[CONCAT_DIM].getChunkOverlap() == 0;
        direct = true;

        firstPos = inputChunk.getFirstPosition(false);
        firstPosWithOverlap = inputChunk.getFirstPosition(true);
        lastPos = inputChunk.getLastPosition(false);
        lastPosWithOverlap = inputChunk.getLastPosition(true);

        if (shift != 0) { 
            firstPos[CONCAT_DIM] += shift;
            firstPosWithOverlap[CONCAT_DIM] += shift;
            lastPos[CONCAT_DIM] += shift;
            lastPosWithOverlap[CONCAT_DIM] += shift;
        }
    }
예제 #8
0
 boost::shared_ptr<MemChunk> MaterializedArray::getMaterializedChunk(ConstChunk const& inputChunk)
 {
     bool newChunk = false;
     boost::shared_ptr<MemChunk> chunk;
     boost::shared_ptr<ConstRLEEmptyBitmap> bitmap;
     Coordinates const& pos = inputChunk.getFirstPosition(false);
     AttributeID attr = inputChunk.getAttributeDesc().getId();
     {
         ScopedMutexLock cs(_mutex);
         chunk = _chunkCache[attr][pos];
         if (!chunk) {
             chunk.reset(new MemChunk());
             bitmap = _bitmapCache[pos];
             newChunk = true;
         }
     }
     if (newChunk) {
         boost::shared_ptr<Query> query(Query::getValidQueryPtr(_query));
         materialize(query, *chunk, inputChunk, _format);
         if (!bitmap) { 
             bitmap = chunk->getEmptyBitmap();
         }
         chunk->setEmptyBitmap(bitmap);
         {
             ScopedMutexLock cs(_mutex);
             if (_chunkCache[attr].size() >= _cacheSize) {
                 _chunkCache[attr].erase(_chunkCache[attr].begin());
             }
             _chunkCache[attr][pos] = chunk;
             if (_bitmapCache.size() >= _cacheSize) {
                 _bitmapCache.erase(_bitmapCache.begin());
             }
             _bitmapCache[pos] = bitmap;
         }
     }
     return chunk;
 }
    size_t DictionaryEncoding::Dictionary::compress(void* dst, const ConstChunk& chunk, size_t chunkSize)
    {
        uint8_t *readPtr = (uint8_t *)chunk.getData();
        TypeId type = chunk.getAttributeDesc().getType();        
        size_t elementSize = TypeLibrary::getType(type).byteSize();
        size_t nElems;

        if(elementSize == 0 || elementSize > 8 || chunk.isRLE() || !chunk.getArrayDesc().isImmutable() || chunk.isSparse() || chunk.getAttributeDesc().isNullable())
        {
            nElems = chunkSize;
            elementSize = 1;
        }
        else
        {
            nElems = chunkSize / elementSize;
        }

        size_t i;
        uint64_t value = 0;
        uint8_t code = 0;
        ByteOutputItr out((uint8_t *) dst, chunkSize - 1);
        BitOutputItr outBits(&out);




        uint32_t uniques = (uint32_t) createDictionary(readPtr, elementSize, nElems, out);
  
        size_t codeLength;
        uniques <= 2 ? codeLength = 1 : codeLength = ceil(log2(uniques-1)) + 1;  // 0-indexed, so values span from 0...uniques-1, log is 0-based, so bring it back to 1...n bits
    
     
  
        // project size and terminate if it will be too large
        size_t codesSize = (nElems * codeLength + 7) >> 3;
        size_t totalCompressed = 1 + uniques * elementSize + codesSize;

        if(totalCompressed*2 >= chunkSize) // if we can't get at least 2:1 it is not worth doing
        {
            return chunkSize;
        }



        if(!nElems || !uniques) 
        {
            return chunkSize;
        }

        for(i = 0; i < nElems; ++i)
        {
            memcpy((uint8_t *) &value, readPtr, elementSize);
            code = _encodeDictionary[value];
            outBits.put(code, codeLength);
            readPtr += elementSize;
        }
  
        outBits.flush();
        size_t compressedSize = out.close();

  
        return compressedSize;

    }
예제 #10
0
 /**
  *  Private function that returns true iff the value passed in needed by aggregate
  */
 inline bool WindowChunk::valueIsNeededForAggregate ( const Value & val, const ConstChunk & inputChunk ) const
 {
     return (!((val.isNull() && _aggregate->ignoreNulls()) ||
               (isDefaultFor(val,inputChunk.getAttributeDesc().getType()) && _aggregate->ignoreZeroes())));
 }
size_t DictionaryEncoding::compress(void* dst, const ConstChunk& chunk, size_t chunkSize)
{
#ifdef FORMAT_SENSITIVE_COMPRESSORS
    uint8_t *src = (uint8_t *)chunk.getData();
    TypeId type = chunk.getAttributeDesc().getType();
    size_t elementSize = TypeLibrary::getType(type).byteSize();
    size_t nElems = chunkSize / elementSize;

    uint32_t i;
    uint32_t uniqueValues;
    std::string toEncode = "";
    uint32_t code;
    uint8_t *readPtr = (uint8_t *)chunk.getData();


    if(!nElems) {
        return chunkSize;
    }


    if(elementSize == 0 || elementSize > 8 || chunk.isRLE() || !chunk.getArrayDesc().isImmutable() || chunk.isSparse()) // too big or too small or sparse = regard it as a string
    {
        nElems = chunkSize;
        elementSize = 1;
    }


    ByteOutputItr out((uint8_t *) dst, chunkSize-1);


    uniqueValues  = createDictionary(src, elementSize, nElems);
    if(uniqueValues == nElems) {
        return chunkSize;
    }

    toEncode.reserve(elementSize);


    // dictionary-specific
    assert(_entriesPerCode);
    uint32_t blocks = floor(nElems / _entriesPerCode);
    uint32_t remainder = nElems % _entriesPerCode;
    size_t blockEntriesSize = _entriesPerCode * elementSize;

    if(uniqueValues == 0) {
        return chunkSize;
    }
    if(out.putArray((uint8_t *) &uniqueValues, 4) == -1) {
        return chunkSize;
    }
    // output a list of unique values; we infer their codes by the order that they are read in
    // i.e., first elementSize bytes translate to code 0 and so on


    for(i = 0; i < uniqueValues; ++i)
    {
        // put value
        if(out.putArray((uint8_t *) _values[i].data(), elementSize) == -1) {
            return chunkSize;
        }
    }// end dictionary output



    // now output encoded data
    for(i = 0; i < blocks; ++i)
    {
        toEncode.assign((char *) readPtr, blockEntriesSize);

        readPtr += blockEntriesSize;
        code = _encodeDictionary[toEncode];

        if(out.putArray((uint8_t *) &code, _codeLength) == -1) {
            return chunkSize;
        }
    }

    if(remainder)
    {
        // output the last few entries --
        toEncode.assign((char *) readPtr, elementSize * remainder);
        // pad it with _value[0]
        for(i = 0; i < _entriesPerCode - remainder; ++i)
        {
            toEncode.append(_values[0]);
        }
        code = _encodeDictionary[toEncode];
        if(out.putArray((uint8_t *) &code, _codeLength) == -1) {
            return chunkSize;
        }
    }

    size_t compressed_size = out.close();

    return compressed_size;
#else
    return chunkSize;
#endif
}
예제 #12
0
    size_t BitmapEncoding::Bitmap::compress(void* dst, const ConstChunk& chunk, size_t chunkSize)
    {
        char const* dataSrc = (char const*)chunk.getData();
        TypeId type = chunk.getAttributeDesc().getType();
        _elementSize = TypeLibrary::getType(type).byteSize();

        /* No more immutable arrays, to keep consistent with old code, always treat data as string
         */
        _bitmapElements = chunkSize;
        _elementSize = 1;

        if(!_bitmapElements) { return chunkSize; }

        char *readPos = const_cast<char *>(dataSrc);
        ByteOutputItr out((uint8_t *) dst, chunkSize-1);
        uint32_t i;
        uint32_t bucketSize = (_bitmapElements + 7) >> 3;
        uint32_t bucketCount = 0;
        std::string key;

        clearBitmapCache();

        // make the key of our hash a string so that
        // we can compare variable-length element sizes

        size_t bitmapEntryLength = bucketSize + _elementSize;
        assert(bitmapEntryLength);
        uint32_t maxBuckets = floor(chunkSize / bitmapEntryLength);
        if(maxBuckets * bitmapEntryLength == chunkSize)
        {
            // we want to beat the uncompressed case
            --maxBuckets;
        }

        for(i = 0; i < _bitmapElements; ++i)
        {
            key.clear();

            for(uint32_t j = 0; j < _elementSize; ++j)
            {
                key.push_back(*readPos);
                ++readPos;
            }

            uint8_t *bucket = NULL;
            // check to see if a bucket exists, if so grab and pass on
            std::map<std::string, uint8_t*>::iterator iter  =
                _bitmaps.find(key);

            if(iter == _bitmaps.end() ) {
                ++bucketCount;
                if(bucketCount > maxBuckets)
                {
                    return chunkSize;
                }

                // create a new one
                bucket = new uint8_t[bucketSize];
                _bitmaps[key] = bucket;
                for(uint32_t k = 0; k < bucketSize; ++k) { *(bucket+k) = 0;}

            } else {
                bucket = iter->second;
            }
            assert(bucket!=NULL);
            setBit(bucket, i);
        }
        // drop all of bitmaps to dst
        fillOutput(&out);

        size_t compressedSize = out.close();
        return compressedSize;
    }