void MaterializedArray::materialize(const shared_ptr<Query>& query, MemChunk& materializedChunk, ConstChunk const& chunk, MaterializeFormat format) { nMaterializedChunks += 1; materializedChunk.initialize(chunk); materializedChunk.setBitmapChunk((Chunk*)chunk.getBitmapChunk()); boost::shared_ptr<ConstChunkIterator> src = chunk.getConstIterator(ChunkIterator::IGNORE_DEFAULT_VALUES|ChunkIterator::IGNORE_EMPTY_CELLS| (chunk.isSolid() ? ChunkIterator::INTENDED_TILE_MODE : 0)); boost::shared_ptr<ChunkIterator> dst = materializedChunk.getIterator(query, (src->getMode() & ChunkIterator::TILE_MODE)|ChunkIterator::ChunkIterator::NO_EMPTY_CHECK|ChunkIterator::SEQUENTIAL_WRITE); size_t count = 0; while (!src->end()) { if (!dst->setPosition(src->getPosition())) throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_OPERATION_FAILED) << "setPosition"; dst->writeItem(src->getItem()); count += 1; ++(*src); } if (!(src->getMode() & ChunkIterator::TILE_MODE) && !chunk.getArrayDesc().hasOverlap()) { materializedChunk.setCount(count); } dst->flush(); }
void ConcatChunk::setInputChunk(ConstChunk const& inputChunk) { DelegateChunk::setInputChunk(inputChunk); ConcatArrayIterator const& arrayIterator((ConcatArrayIterator const&)iterator); Coordinate shift = arrayIterator.shift; isClone = inputChunk.getArrayDesc().getDimensions()[CONCAT_DIM].getChunkOverlap() == 0; direct = true; firstPos = inputChunk.getFirstPosition(false); firstPosWithOverlap = inputChunk.getFirstPosition(true); lastPos = inputChunk.getLastPosition(false); lastPosWithOverlap = inputChunk.getLastPosition(true); if (shift != 0) { firstPos[CONCAT_DIM] += shift; firstPosWithOverlap[CONCAT_DIM] += shift; lastPos[CONCAT_DIM] += shift; lastPosWithOverlap[CONCAT_DIM] += shift; } }
size_t DictionaryEncoding::Dictionary::compress(void* dst, const ConstChunk& chunk, size_t chunkSize) { uint8_t *readPtr = (uint8_t *)chunk.getData(); TypeId type = chunk.getAttributeDesc().getType(); size_t elementSize = TypeLibrary::getType(type).byteSize(); size_t nElems; if(elementSize == 0 || elementSize > 8 || chunk.isRLE() || !chunk.getArrayDesc().isImmutable() || chunk.isSparse() || chunk.getAttributeDesc().isNullable()) { nElems = chunkSize; elementSize = 1; } else { nElems = chunkSize / elementSize; } size_t i; uint64_t value = 0; uint8_t code = 0; ByteOutputItr out((uint8_t *) dst, chunkSize - 1); BitOutputItr outBits(&out); uint32_t uniques = (uint32_t) createDictionary(readPtr, elementSize, nElems, out); size_t codeLength; uniques <= 2 ? codeLength = 1 : codeLength = ceil(log2(uniques-1)) + 1; // 0-indexed, so values span from 0...uniques-1, log is 0-based, so bring it back to 1...n bits // project size and terminate if it will be too large size_t codesSize = (nElems * codeLength + 7) >> 3; size_t totalCompressed = 1 + uniques * elementSize + codesSize; if(totalCompressed*2 >= chunkSize) // if we can't get at least 2:1 it is not worth doing { return chunkSize; } if(!nElems || !uniques) { return chunkSize; } for(i = 0; i < nElems; ++i) { memcpy((uint8_t *) &value, readPtr, elementSize); code = _encodeDictionary[value]; outBits.put(code, codeLength); readPtr += elementSize; } outBits.flush(); size_t compressedSize = out.close(); return compressedSize; }
size_t DictionaryEncoding::compress(void* dst, const ConstChunk& chunk, size_t chunkSize) { #ifdef FORMAT_SENSITIVE_COMPRESSORS uint8_t *src = (uint8_t *)chunk.getData(); TypeId type = chunk.getAttributeDesc().getType(); size_t elementSize = TypeLibrary::getType(type).byteSize(); size_t nElems = chunkSize / elementSize; uint32_t i; uint32_t uniqueValues; std::string toEncode = ""; uint32_t code; uint8_t *readPtr = (uint8_t *)chunk.getData(); if(!nElems) { return chunkSize; } if(elementSize == 0 || elementSize > 8 || chunk.isRLE() || !chunk.getArrayDesc().isImmutable() || chunk.isSparse()) // too big or too small or sparse = regard it as a string { nElems = chunkSize; elementSize = 1; } ByteOutputItr out((uint8_t *) dst, chunkSize-1); uniqueValues = createDictionary(src, elementSize, nElems); if(uniqueValues == nElems) { return chunkSize; } toEncode.reserve(elementSize); // dictionary-specific assert(_entriesPerCode); uint32_t blocks = floor(nElems / _entriesPerCode); uint32_t remainder = nElems % _entriesPerCode; size_t blockEntriesSize = _entriesPerCode * elementSize; if(uniqueValues == 0) { return chunkSize; } if(out.putArray((uint8_t *) &uniqueValues, 4) == -1) { return chunkSize; } // output a list of unique values; we infer their codes by the order that they are read in // i.e., first elementSize bytes translate to code 0 and so on for(i = 0; i < uniqueValues; ++i) { // put value if(out.putArray((uint8_t *) _values[i].data(), elementSize) == -1) { return chunkSize; } }// end dictionary output // now output encoded data for(i = 0; i < blocks; ++i) { toEncode.assign((char *) readPtr, blockEntriesSize); readPtr += blockEntriesSize; code = _encodeDictionary[toEncode]; if(out.putArray((uint8_t *) &code, _codeLength) == -1) { return chunkSize; } } if(remainder) { // output the last few entries -- toEncode.assign((char *) readPtr, elementSize * remainder); // pad it with _value[0] for(i = 0; i < _entriesPerCode - remainder; ++i) { toEncode.append(_values[0]); } code = _encodeDictionary[toEncode]; if(out.putArray((uint8_t *) &code, _codeLength) == -1) { return chunkSize; } } size_t compressed_size = out.close(); return compressed_size; #else return chunkSize; #endif }
size_t BitmapEncoding::Bitmap::compress(void* dst, const ConstChunk& chunk, size_t chunkSize) { char const* dataSrc = (char const*)chunk.getData(); TypeId type = chunk.getAttributeDesc().getType(); _elementSize = TypeLibrary::getType(type).byteSize(); if(_elementSize == 0 || _elementSize > 8 || chunk.isSparse() || !chunk.getArrayDesc().isImmutable() || chunk.getAttributeDesc().isNullable()) { _bitmapElements = chunkSize; _elementSize = 1; } else { _bitmapElements = chunkSize / _elementSize; } if(!_bitmapElements) { return chunkSize; } char *readPos = const_cast<char *>(dataSrc); ByteOutputItr out((uint8_t *) dst, chunkSize-1); uint32_t i; uint32_t bucketSize = (_bitmapElements + 7) >> 3; uint32_t bucketCount = 0; std::string key; clearBitmapCache(); // make the key of our hash a string so that // we can compare variable-length element sizes size_t bitmapEntryLength = bucketSize + _elementSize; assert(bitmapEntryLength); uint32_t maxBuckets = floor(chunkSize / bitmapEntryLength); if(maxBuckets * bitmapEntryLength == chunkSize) { // we want to beat the uncompressed case --maxBuckets; } for(i = 0; i < _bitmapElements; ++i) { key.clear(); for(uint32_t j = 0; j < _elementSize; ++j) { key.push_back(*readPos); ++readPos; } uint8_t *bucket = NULL; // check to see if a bucket exists, if so grab and pass on std::map<std::string, uint8_t*>::iterator iter = _bitmaps.find(key); if(iter == _bitmaps.end() ) { ++bucketCount; if(bucketCount > maxBuckets) { return chunkSize; } // create a new one bucket = new uint8_t[bucketSize]; _bitmaps[key] = bucket; for(uint32_t k = 0; k < bucketSize; ++k) { *(bucket+k) = 0;} } else { bucket = iter->second; } assert(bucket!=NULL); setBit(bucket, i); } // drop all of bitmaps to dst fillOutput(&out); size_t compressedSize = out.close(); return compressedSize; }