Exemplo n.º 1
0
 boost::shared_ptr<MemChunk> MaterializedArray::getMaterializedChunk(ConstChunk const& inputChunk)
 {
     bool newChunk = false;
     boost::shared_ptr<MemChunk> chunk;
     boost::shared_ptr<ConstRLEEmptyBitmap> bitmap;
     Coordinates const& pos = inputChunk.getFirstPosition(false);
     AttributeID attr = inputChunk.getAttributeDesc().getId();
     {
         ScopedMutexLock cs(_mutex);
         chunk = _chunkCache[attr][pos];
         if (!chunk) {
             chunk.reset(new MemChunk());
             bitmap = _bitmapCache[pos];
             newChunk = true;
         }
     }
     if (newChunk) {
         boost::shared_ptr<Query> query(Query::getValidQueryPtr(_query));
         materialize(query, *chunk, inputChunk, _format);
         if (!bitmap) { 
             bitmap = chunk->getEmptyBitmap();
         }
         chunk->setEmptyBitmap(bitmap);
         {
             ScopedMutexLock cs(_mutex);
             if (_chunkCache[attr].size() >= _cacheSize) {
                 _chunkCache[attr].erase(_chunkCache[attr].begin());
             }
             _chunkCache[attr][pos] = chunk;
             if (_bitmapCache.size() >= _cacheSize) {
                 _bitmapCache.erase(_bitmapCache.begin());
             }
             _bitmapCache[pos] = bitmap;
         }
     }
     return chunk;
 }
    size_t DictionaryEncoding::Dictionary::compress(void* dst, const ConstChunk& chunk, size_t chunkSize)
    {
        uint8_t *readPtr = (uint8_t *)chunk.getData();
        TypeId type = chunk.getAttributeDesc().getType();        
        size_t elementSize = TypeLibrary::getType(type).byteSize();
        size_t nElems;

        if(elementSize == 0 || elementSize > 8 || chunk.isRLE() || !chunk.getArrayDesc().isImmutable() || chunk.isSparse() || chunk.getAttributeDesc().isNullable())
        {
            nElems = chunkSize;
            elementSize = 1;
        }
        else
        {
            nElems = chunkSize / elementSize;
        }

        size_t i;
        uint64_t value = 0;
        uint8_t code = 0;
        ByteOutputItr out((uint8_t *) dst, chunkSize - 1);
        BitOutputItr outBits(&out);




        uint32_t uniques = (uint32_t) createDictionary(readPtr, elementSize, nElems, out);
  
        size_t codeLength;
        uniques <= 2 ? codeLength = 1 : codeLength = ceil(log2(uniques-1)) + 1;  // 0-indexed, so values span from 0...uniques-1, log is 0-based, so bring it back to 1...n bits
    
     
  
        // project size and terminate if it will be too large
        size_t codesSize = (nElems * codeLength + 7) >> 3;
        size_t totalCompressed = 1 + uniques * elementSize + codesSize;

        if(totalCompressed*2 >= chunkSize) // if we can't get at least 2:1 it is not worth doing
        {
            return chunkSize;
        }



        if(!nElems || !uniques) 
        {
            return chunkSize;
        }

        for(i = 0; i < nElems; ++i)
        {
            memcpy((uint8_t *) &value, readPtr, elementSize);
            code = _encodeDictionary[value];
            outBits.put(code, codeLength);
            readPtr += elementSize;
        }
  
        outBits.flush();
        size_t compressedSize = out.close();

  
        return compressedSize;

    }
Exemplo n.º 3
0
 /**
  *  Private function that returns true iff the value passed in needed by aggregate
  */
 inline bool WindowChunk::valueIsNeededForAggregate ( const Value & val, const ConstChunk & inputChunk ) const
 {
     return (!((val.isNull() && _aggregate->ignoreNulls()) ||
               (isDefaultFor(val,inputChunk.getAttributeDesc().getType()) && _aggregate->ignoreZeroes())));
 }
size_t DictionaryEncoding::compress(void* dst, const ConstChunk& chunk, size_t chunkSize)
{
#ifdef FORMAT_SENSITIVE_COMPRESSORS
    uint8_t *src = (uint8_t *)chunk.getData();
    TypeId type = chunk.getAttributeDesc().getType();
    size_t elementSize = TypeLibrary::getType(type).byteSize();
    size_t nElems = chunkSize / elementSize;

    uint32_t i;
    uint32_t uniqueValues;
    std::string toEncode = "";
    uint32_t code;
    uint8_t *readPtr = (uint8_t *)chunk.getData();


    if(!nElems) {
        return chunkSize;
    }


    if(elementSize == 0 || elementSize > 8 || chunk.isRLE() || !chunk.getArrayDesc().isImmutable() || chunk.isSparse()) // too big or too small or sparse = regard it as a string
    {
        nElems = chunkSize;
        elementSize = 1;
    }


    ByteOutputItr out((uint8_t *) dst, chunkSize-1);


    uniqueValues  = createDictionary(src, elementSize, nElems);
    if(uniqueValues == nElems) {
        return chunkSize;
    }

    toEncode.reserve(elementSize);


    // dictionary-specific
    assert(_entriesPerCode);
    uint32_t blocks = floor(nElems / _entriesPerCode);
    uint32_t remainder = nElems % _entriesPerCode;
    size_t blockEntriesSize = _entriesPerCode * elementSize;

    if(uniqueValues == 0) {
        return chunkSize;
    }
    if(out.putArray((uint8_t *) &uniqueValues, 4) == -1) {
        return chunkSize;
    }
    // output a list of unique values; we infer their codes by the order that they are read in
    // i.e., first elementSize bytes translate to code 0 and so on


    for(i = 0; i < uniqueValues; ++i)
    {
        // put value
        if(out.putArray((uint8_t *) _values[i].data(), elementSize) == -1) {
            return chunkSize;
        }
    }// end dictionary output



    // now output encoded data
    for(i = 0; i < blocks; ++i)
    {
        toEncode.assign((char *) readPtr, blockEntriesSize);

        readPtr += blockEntriesSize;
        code = _encodeDictionary[toEncode];

        if(out.putArray((uint8_t *) &code, _codeLength) == -1) {
            return chunkSize;
        }
    }

    if(remainder)
    {
        // output the last few entries --
        toEncode.assign((char *) readPtr, elementSize * remainder);
        // pad it with _value[0]
        for(i = 0; i < _entriesPerCode - remainder; ++i)
        {
            toEncode.append(_values[0]);
        }
        code = _encodeDictionary[toEncode];
        if(out.putArray((uint8_t *) &code, _codeLength) == -1) {
            return chunkSize;
        }
    }

    size_t compressed_size = out.close();

    return compressed_size;
#else
    return chunkSize;
#endif
}
Exemplo n.º 5
0
    size_t BitmapEncoding::Bitmap::compress(void* dst, const ConstChunk& chunk, size_t chunkSize)
    {
        char const* dataSrc = (char const*)chunk.getData();
        TypeId type = chunk.getAttributeDesc().getType();
        _elementSize = TypeLibrary::getType(type).byteSize();

        /* No more immutable arrays, to keep consistent with old code, always treat data as string
         */
        _bitmapElements = chunkSize;
        _elementSize = 1;

        if(!_bitmapElements) { return chunkSize; }

        char *readPos = const_cast<char *>(dataSrc);
        ByteOutputItr out((uint8_t *) dst, chunkSize-1);
        uint32_t i;
        uint32_t bucketSize = (_bitmapElements + 7) >> 3;
        uint32_t bucketCount = 0;
        std::string key;

        clearBitmapCache();

        // make the key of our hash a string so that
        // we can compare variable-length element sizes

        size_t bitmapEntryLength = bucketSize + _elementSize;
        assert(bitmapEntryLength);
        uint32_t maxBuckets = floor(chunkSize / bitmapEntryLength);
        if(maxBuckets * bitmapEntryLength == chunkSize)
        {
            // we want to beat the uncompressed case
            --maxBuckets;
        }

        for(i = 0; i < _bitmapElements; ++i)
        {
            key.clear();

            for(uint32_t j = 0; j < _elementSize; ++j)
            {
                key.push_back(*readPos);
                ++readPos;
            }

            uint8_t *bucket = NULL;
            // check to see if a bucket exists, if so grab and pass on
            std::map<std::string, uint8_t*>::iterator iter  =
                _bitmaps.find(key);

            if(iter == _bitmaps.end() ) {
                ++bucketCount;
                if(bucketCount > maxBuckets)
                {
                    return chunkSize;
                }

                // create a new one
                bucket = new uint8_t[bucketSize];
                _bitmaps[key] = bucket;
                for(uint32_t k = 0; k < bucketSize; ++k) { *(bucket+k) = 0;}

            } else {
                bucket = iter->second;
            }
            assert(bucket!=NULL);
            setBit(bucket, i);
        }
        // drop all of bitmaps to dst
        fillOutput(&out);

        size_t compressedSize = out.close();
        return compressedSize;
    }