Exemplo n.º 1
0
void MaterializedArray::materialize(const shared_ptr<Query>& query,
                                    MemChunk& materializedChunk,
                                    ConstChunk const& chunk,
                                    MaterializeFormat format)
    {
        nMaterializedChunks += 1;
        materializedChunk.initialize(chunk);
        materializedChunk.setBitmapChunk((Chunk*)chunk.getBitmapChunk());
        boost::shared_ptr<ConstChunkIterator> src 
            = chunk.getConstIterator(ChunkIterator::IGNORE_DEFAULT_VALUES|ChunkIterator::IGNORE_EMPTY_CELLS|
                                     (chunk.isSolid() ? ChunkIterator::INTENDED_TILE_MODE : 0));
        boost::shared_ptr<ChunkIterator> dst 
            = materializedChunk.getIterator(query,
                                            (src->getMode() & ChunkIterator::TILE_MODE)|ChunkIterator::ChunkIterator::NO_EMPTY_CHECK|ChunkIterator::SEQUENTIAL_WRITE);
        size_t count = 0;
        while (!src->end()) {
            if (!dst->setPosition(src->getPosition()))
                throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_OPERATION_FAILED) << "setPosition";
            dst->writeItem(src->getItem());
            count += 1;
            ++(*src);
        }
        if (!(src->getMode() & ChunkIterator::TILE_MODE) &&
            !chunk.getArrayDesc().hasOverlap()) {
            materializedChunk.setCount(count);
        }
        dst->flush();
    }
Exemplo n.º 2
0
    void ConcatChunk::setInputChunk(ConstChunk const& inputChunk)
    {
        DelegateChunk::setInputChunk(inputChunk);
        ConcatArrayIterator const& arrayIterator((ConcatArrayIterator const&)iterator);
        Coordinate shift = arrayIterator.shift;
        isClone = inputChunk.getArrayDesc().getDimensions()[CONCAT_DIM].getChunkOverlap() == 0;
        direct = true;

        firstPos = inputChunk.getFirstPosition(false);
        firstPosWithOverlap = inputChunk.getFirstPosition(true);
        lastPos = inputChunk.getLastPosition(false);
        lastPosWithOverlap = inputChunk.getLastPosition(true);

        if (shift != 0) { 
            firstPos[CONCAT_DIM] += shift;
            firstPosWithOverlap[CONCAT_DIM] += shift;
            lastPos[CONCAT_DIM] += shift;
            lastPosWithOverlap[CONCAT_DIM] += shift;
        }
    }
    size_t DictionaryEncoding::Dictionary::compress(void* dst, const ConstChunk& chunk, size_t chunkSize)
    {
        uint8_t *readPtr = (uint8_t *)chunk.getData();
        TypeId type = chunk.getAttributeDesc().getType();        
        size_t elementSize = TypeLibrary::getType(type).byteSize();
        size_t nElems;

        if(elementSize == 0 || elementSize > 8 || chunk.isRLE() || !chunk.getArrayDesc().isImmutable() || chunk.isSparse() || chunk.getAttributeDesc().isNullable())
        {
            nElems = chunkSize;
            elementSize = 1;
        }
        else
        {
            nElems = chunkSize / elementSize;
        }

        size_t i;
        uint64_t value = 0;
        uint8_t code = 0;
        ByteOutputItr out((uint8_t *) dst, chunkSize - 1);
        BitOutputItr outBits(&out);




        uint32_t uniques = (uint32_t) createDictionary(readPtr, elementSize, nElems, out);
  
        size_t codeLength;
        uniques <= 2 ? codeLength = 1 : codeLength = ceil(log2(uniques-1)) + 1;  // 0-indexed, so values span from 0...uniques-1, log is 0-based, so bring it back to 1...n bits
    
     
  
        // project size and terminate if it will be too large
        size_t codesSize = (nElems * codeLength + 7) >> 3;
        size_t totalCompressed = 1 + uniques * elementSize + codesSize;

        if(totalCompressed*2 >= chunkSize) // if we can't get at least 2:1 it is not worth doing
        {
            return chunkSize;
        }



        if(!nElems || !uniques) 
        {
            return chunkSize;
        }

        for(i = 0; i < nElems; ++i)
        {
            memcpy((uint8_t *) &value, readPtr, elementSize);
            code = _encodeDictionary[value];
            outBits.put(code, codeLength);
            readPtr += elementSize;
        }
  
        outBits.flush();
        size_t compressedSize = out.close();

  
        return compressedSize;

    }
size_t DictionaryEncoding::compress(void* dst, const ConstChunk& chunk, size_t chunkSize)
{
#ifdef FORMAT_SENSITIVE_COMPRESSORS
    uint8_t *src = (uint8_t *)chunk.getData();
    TypeId type = chunk.getAttributeDesc().getType();
    size_t elementSize = TypeLibrary::getType(type).byteSize();
    size_t nElems = chunkSize / elementSize;

    uint32_t i;
    uint32_t uniqueValues;
    std::string toEncode = "";
    uint32_t code;
    uint8_t *readPtr = (uint8_t *)chunk.getData();


    if(!nElems) {
        return chunkSize;
    }


    if(elementSize == 0 || elementSize > 8 || chunk.isRLE() || !chunk.getArrayDesc().isImmutable() || chunk.isSparse()) // too big or too small or sparse = regard it as a string
    {
        nElems = chunkSize;
        elementSize = 1;
    }


    ByteOutputItr out((uint8_t *) dst, chunkSize-1);


    uniqueValues  = createDictionary(src, elementSize, nElems);
    if(uniqueValues == nElems) {
        return chunkSize;
    }

    toEncode.reserve(elementSize);


    // dictionary-specific
    assert(_entriesPerCode);
    uint32_t blocks = floor(nElems / _entriesPerCode);
    uint32_t remainder = nElems % _entriesPerCode;
    size_t blockEntriesSize = _entriesPerCode * elementSize;

    if(uniqueValues == 0) {
        return chunkSize;
    }
    if(out.putArray((uint8_t *) &uniqueValues, 4) == -1) {
        return chunkSize;
    }
    // output a list of unique values; we infer their codes by the order that they are read in
    // i.e., first elementSize bytes translate to code 0 and so on


    for(i = 0; i < uniqueValues; ++i)
    {
        // put value
        if(out.putArray((uint8_t *) _values[i].data(), elementSize) == -1) {
            return chunkSize;
        }
    }// end dictionary output



    // now output encoded data
    for(i = 0; i < blocks; ++i)
    {
        toEncode.assign((char *) readPtr, blockEntriesSize);

        readPtr += blockEntriesSize;
        code = _encodeDictionary[toEncode];

        if(out.putArray((uint8_t *) &code, _codeLength) == -1) {
            return chunkSize;
        }
    }

    if(remainder)
    {
        // output the last few entries --
        toEncode.assign((char *) readPtr, elementSize * remainder);
        // pad it with _value[0]
        for(i = 0; i < _entriesPerCode - remainder; ++i)
        {
            toEncode.append(_values[0]);
        }
        code = _encodeDictionary[toEncode];
        if(out.putArray((uint8_t *) &code, _codeLength) == -1) {
            return chunkSize;
        }
    }

    size_t compressed_size = out.close();

    return compressed_size;
#else
    return chunkSize;
#endif
}
    size_t BitmapEncoding::Bitmap::compress(void* dst, const ConstChunk& chunk, size_t chunkSize) 
    {
        char const* dataSrc = (char const*)chunk.getData();
        TypeId type = chunk.getAttributeDesc().getType();        
        _elementSize = TypeLibrary::getType(type).byteSize();

        if(_elementSize == 0 || _elementSize > 8 || chunk.isSparse() || !chunk.getArrayDesc().isImmutable() || chunk.getAttributeDesc().isNullable())
        {
            _bitmapElements = chunkSize;
            _elementSize = 1;
        }
        else
        {
            _bitmapElements = chunkSize / _elementSize;
        }

        if(!_bitmapElements) { return chunkSize; }

       
       

        char *readPos = const_cast<char *>(dataSrc);
        ByteOutputItr out((uint8_t *) dst, chunkSize-1);
        uint32_t i;
        uint32_t bucketSize = (_bitmapElements + 7) >> 3;
        uint32_t bucketCount = 0;
        std::string key;

        clearBitmapCache();

        // make the key of our hash a string so that 
        // we can compare variable-length element sizes

        size_t bitmapEntryLength = bucketSize + _elementSize;
        assert(bitmapEntryLength);
        uint32_t maxBuckets = floor(chunkSize / bitmapEntryLength);
        if(maxBuckets * bitmapEntryLength == chunkSize)
        {
            // we want to beat the uncompressed case
            --maxBuckets;
        }

        for(i = 0; i < _bitmapElements; ++i)
        { 
            key.clear();

            for(uint32_t j = 0; j < _elementSize; ++j)
            {
                key.push_back(*readPos);
                ++readPos;
            }

            uint8_t *bucket = NULL;
            // check to see if a bucket exists, if so grab and pass on
            std::map<std::string, uint8_t*>::iterator iter  =
                _bitmaps.find(key);

            if(iter == _bitmaps.end() ) {
                ++bucketCount;
                if(bucketCount > maxBuckets)
                {
                    return chunkSize;
                }

                // create a new one             
                bucket = new uint8_t[bucketSize];
                _bitmaps[key] = bucket;
                for(uint32_t k = 0; k < bucketSize; ++k) { *(bucket+k) = 0;} 

            } else {
                bucket = iter->second;
            }
            assert(bucket!=NULL);
            setBit(bucket, i);
        }
        // drop all of bitmaps to dst
        fillOutput(&out);

        size_t compressedSize = out.close();
        return compressedSize;
    }