Exemplo n.º 1
0
 void Chunk::merge(ConstChunk const& with, boost::shared_ptr<Query>& query)
 {
     if (getDiskChunk() != NULL)
         throw USER_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_CHUNK_ALREADY_EXISTS);
     setCount(0); // unknown
     AttributeDesc const& attr = getAttributeDesc();
     char* dst = (char*)getData();
     Value const& defaultValue = attr.getDefaultValue();
     if (dst != NULL && (isSparse() || isRLE() || with.isSparse() || with.isRLE() || attr.isNullable() || TypeLibrary::getType(attr.getType()).variableSize()
                         || !defaultValue.isZero()))
     {
         int sparseMode = isSparse() ? ChunkIterator::SPARSE_CHUNK : 0;
         boost::shared_ptr<ChunkIterator> dstIterator = getIterator(query, sparseMode|ChunkIterator::APPEND_CHUNK|ChunkIterator::NO_EMPTY_CHECK);
         boost::shared_ptr<ConstChunkIterator> srcIterator = with.getConstIterator(ChunkIterator::IGNORE_EMPTY_CELLS|ChunkIterator::IGNORE_DEFAULT_VALUES);
         if (getArrayDesc().getEmptyBitmapAttribute() != NULL) { 
             while (!srcIterator->end()) {
                 if (!dstIterator->setPosition(srcIterator->getPosition()))
                     throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_OPERATION_FAILED) << "setPosition";
                 Value const& value = srcIterator->getItem();
                 dstIterator->writeItem(value);
                 ++(*srcIterator);
             }
         } else { // ignore default values
             while (!srcIterator->end()) {
                 Value const& value = srcIterator->getItem();
                 if (value != defaultValue) {
                     if (!dstIterator->setPosition(srcIterator->getPosition()))
                         throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_OPERATION_FAILED) << "setPosition";
                     dstIterator->writeItem(value);
                 }
                 ++(*srcIterator);
             }            
         }
         dstIterator->flush();
     } else {
         PinBuffer scope(with);
         char* src = (char*)with.getData();
         if (dst == NULL) {
             allocate(with.getSize());
             setSparse(with.isSparse());
             setRLE(with.isRLE());
             memcpy(getData(), src, getSize());
         } else {
             if (getSize() != with.getSize())
                 throw USER_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_CANT_MERGE_CHUNKS_WITH_VARYING_SIZE);
             for (size_t j = 0, n = getSize(); j < n; j++) {
                 dst[j] |= src[j];
             }
         }
         write(query);
     }
 }
Exemplo n.º 2
0
    void Chunk::aggregateMerge(ConstChunk const& with, AggregatePtr const& aggregate, boost::shared_ptr<Query>& query)
    {
        if (getDiskChunk() != NULL)
            throw USER_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_CHUNK_ALREADY_EXISTS);

        if (isReadOnly())
            throw USER_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_CANT_UPDATE_READ_ONLY_CHUNK);

        AttributeDesc const& attr = getAttributeDesc();

        if (aggregate->getStateType().typeId() != attr.getType())
            throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_TYPE_MISMATCH_BETWEEN_AGGREGATE_AND_CHUNK);

        if (!attr.isNullable())
            throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_AGGREGATE_STATE_MUST_BE_NULLABLE);//enforce equivalency w above merge()

        setCount(0);
        char* dst = (char*)getData();
        if (dst != NULL)
        {
            int sparseMode = isSparse() ? ChunkIterator::SPARSE_CHUNK : 0;
            boost::shared_ptr<ChunkIterator>dstIterator = getIterator(query, sparseMode|ChunkIterator::APPEND_CHUNK|ChunkIterator::NO_EMPTY_CHECK);
            boost::shared_ptr<ConstChunkIterator> srcIterator = with.getConstIterator(ChunkIterator::IGNORE_NULL_VALUES);
            while (!srcIterator->end())
            {
                Value& val = srcIterator->getItem();
                if (!val.isNull())
                {
                    if (!dstIterator->setPosition(srcIterator->getPosition()))
                        throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_OPERATION_FAILED) << "setPosition";
                    Value& val2 = dstIterator->getItem();
                    if (!val2.isNull())
                    {
                        aggregate->merge(val, val2);
                    }
                    dstIterator->writeItem(val);
                }
                ++(*srcIterator);
            }
            dstIterator->flush();
        }
        else
        {
            PinBuffer scope(with);
            char* src = (char*)with.getData();
            allocate(with.getSize());
            setSparse(with.isSparse());
            setRLE(with.isRLE());
            memcpy(getData(), src, getSize());
            write(query);
        }
    }
size_t DictionaryEncoding::compress(void* dst, const ConstChunk& chunk, size_t chunkSize)
{
#ifdef FORMAT_SENSITIVE_COMPRESSORS
    uint8_t *src = (uint8_t *)chunk.getData();
    TypeId type = chunk.getAttributeDesc().getType();
    size_t elementSize = TypeLibrary::getType(type).byteSize();
    size_t nElems = chunkSize / elementSize;

    uint32_t i;
    uint32_t uniqueValues;
    std::string toEncode = "";
    uint32_t code;
    uint8_t *readPtr = (uint8_t *)chunk.getData();


    if(!nElems) {
        return chunkSize;
    }


    if(elementSize == 0 || elementSize > 8 || chunk.isRLE() || !chunk.getArrayDesc().isImmutable() || chunk.isSparse()) // too big or too small or sparse = regard it as a string
    {
        nElems = chunkSize;
        elementSize = 1;
    }


    ByteOutputItr out((uint8_t *) dst, chunkSize-1);


    uniqueValues  = createDictionary(src, elementSize, nElems);
    if(uniqueValues == nElems) {
        return chunkSize;
    }

    toEncode.reserve(elementSize);


    // dictionary-specific
    assert(_entriesPerCode);
    uint32_t blocks = floor(nElems / _entriesPerCode);
    uint32_t remainder = nElems % _entriesPerCode;
    size_t blockEntriesSize = _entriesPerCode * elementSize;

    if(uniqueValues == 0) {
        return chunkSize;
    }
    if(out.putArray((uint8_t *) &uniqueValues, 4) == -1) {
        return chunkSize;
    }
    // output a list of unique values; we infer their codes by the order that they are read in
    // i.e., first elementSize bytes translate to code 0 and so on


    for(i = 0; i < uniqueValues; ++i)
    {
        // put value
        if(out.putArray((uint8_t *) _values[i].data(), elementSize) == -1) {
            return chunkSize;
        }
    }// end dictionary output



    // now output encoded data
    for(i = 0; i < blocks; ++i)
    {
        toEncode.assign((char *) readPtr, blockEntriesSize);

        readPtr += blockEntriesSize;
        code = _encodeDictionary[toEncode];

        if(out.putArray((uint8_t *) &code, _codeLength) == -1) {
            return chunkSize;
        }
    }

    if(remainder)
    {
        // output the last few entries --
        toEncode.assign((char *) readPtr, elementSize * remainder);
        // pad it with _value[0]
        for(i = 0; i < _entriesPerCode - remainder; ++i)
        {
            toEncode.append(_values[0]);
        }
        code = _encodeDictionary[toEncode];
        if(out.putArray((uint8_t *) &code, _codeLength) == -1) {
            return chunkSize;
        }
    }

    size_t compressed_size = out.close();

    return compressed_size;
#else
    return chunkSize;
#endif
}
    size_t DictionaryEncoding::Dictionary::compress(void* dst, const ConstChunk& chunk, size_t chunkSize)
    {
        uint8_t *readPtr = (uint8_t *)chunk.getData();
        TypeId type = chunk.getAttributeDesc().getType();        
        size_t elementSize = TypeLibrary::getType(type).byteSize();
        size_t nElems;

        if(elementSize == 0 || elementSize > 8 || chunk.isRLE() || !chunk.getArrayDesc().isImmutable() || chunk.isSparse() || chunk.getAttributeDesc().isNullable())
        {
            nElems = chunkSize;
            elementSize = 1;
        }
        else
        {
            nElems = chunkSize / elementSize;
        }

        size_t i;
        uint64_t value = 0;
        uint8_t code = 0;
        ByteOutputItr out((uint8_t *) dst, chunkSize - 1);
        BitOutputItr outBits(&out);




        uint32_t uniques = (uint32_t) createDictionary(readPtr, elementSize, nElems, out);
  
        size_t codeLength;
        uniques <= 2 ? codeLength = 1 : codeLength = ceil(log2(uniques-1)) + 1;  // 0-indexed, so values span from 0...uniques-1, log is 0-based, so bring it back to 1...n bits
    
     
  
        // project size and terminate if it will be too large
        size_t codesSize = (nElems * codeLength + 7) >> 3;
        size_t totalCompressed = 1 + uniques * elementSize + codesSize;

        if(totalCompressed*2 >= chunkSize) // if we can't get at least 2:1 it is not worth doing
        {
            return chunkSize;
        }



        if(!nElems || !uniques) 
        {
            return chunkSize;
        }

        for(i = 0; i < nElems; ++i)
        {
            memcpy((uint8_t *) &value, readPtr, elementSize);
            code = _encodeDictionary[value];
            outBits.put(code, codeLength);
            readPtr += elementSize;
        }
  
        outBits.flush();
        size_t compressedSize = out.close();

  
        return compressedSize;

    }
    size_t BitmapEncoding::Bitmap::compress(void* dst, const ConstChunk& chunk, size_t chunkSize) 
    {
        char const* dataSrc = (char const*)chunk.getData();
        TypeId type = chunk.getAttributeDesc().getType();        
        _elementSize = TypeLibrary::getType(type).byteSize();

        if(_elementSize == 0 || _elementSize > 8 || chunk.isSparse() || !chunk.getArrayDesc().isImmutable() || chunk.getAttributeDesc().isNullable())
        {
            _bitmapElements = chunkSize;
            _elementSize = 1;
        }
        else
        {
            _bitmapElements = chunkSize / _elementSize;
        }

        if(!_bitmapElements) { return chunkSize; }

       
       

        char *readPos = const_cast<char *>(dataSrc);
        ByteOutputItr out((uint8_t *) dst, chunkSize-1);
        uint32_t i;
        uint32_t bucketSize = (_bitmapElements + 7) >> 3;
        uint32_t bucketCount = 0;
        std::string key;

        clearBitmapCache();

        // make the key of our hash a string so that 
        // we can compare variable-length element sizes

        size_t bitmapEntryLength = bucketSize + _elementSize;
        assert(bitmapEntryLength);
        uint32_t maxBuckets = floor(chunkSize / bitmapEntryLength);
        if(maxBuckets * bitmapEntryLength == chunkSize)
        {
            // we want to beat the uncompressed case
            --maxBuckets;
        }

        for(i = 0; i < _bitmapElements; ++i)
        { 
            key.clear();

            for(uint32_t j = 0; j < _elementSize; ++j)
            {
                key.push_back(*readPos);
                ++readPos;
            }

            uint8_t *bucket = NULL;
            // check to see if a bucket exists, if so grab and pass on
            std::map<std::string, uint8_t*>::iterator iter  =
                _bitmaps.find(key);

            if(iter == _bitmaps.end() ) {
                ++bucketCount;
                if(bucketCount > maxBuckets)
                {
                    return chunkSize;
                }

                // create a new one             
                bucket = new uint8_t[bucketSize];
                _bitmaps[key] = bucket;
                for(uint32_t k = 0; k < bucketSize; ++k) { *(bucket+k) = 0;} 

            } else {
                bucket = iter->second;
            }
            assert(bucket!=NULL);
            setBit(bucket, i);
        }
        // drop all of bitmaps to dst
        fillOutput(&out);

        size_t compressedSize = out.close();
        return compressedSize;
    }