void MaterializedArray::materialize(const shared_ptr<Query>& query, MemChunk& materializedChunk, ConstChunk const& chunk, MaterializeFormat format) { nMaterializedChunks += 1; materializedChunk.initialize(chunk); materializedChunk.setBitmapChunk((Chunk*)chunk.getBitmapChunk()); boost::shared_ptr<ConstChunkIterator> src = chunk.getConstIterator(ChunkIterator::IGNORE_DEFAULT_VALUES|ChunkIterator::IGNORE_EMPTY_CELLS| (chunk.isSolid() ? ChunkIterator::INTENDED_TILE_MODE : 0)); boost::shared_ptr<ChunkIterator> dst = materializedChunk.getIterator(query, (src->getMode() & ChunkIterator::TILE_MODE)|ChunkIterator::ChunkIterator::NO_EMPTY_CHECK|ChunkIterator::SEQUENTIAL_WRITE); size_t count = 0; while (!src->end()) { if (!dst->setPosition(src->getPosition())) throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_OPERATION_FAILED) << "setPosition"; dst->writeItem(src->getItem()); count += 1; ++(*src); } if (!(src->getMode() & ChunkIterator::TILE_MODE) && !chunk.getArrayDesc().hasOverlap()) { materializedChunk.setCount(count); } dst->flush(); }
void ShiftChunk::setInputChunk(ConstChunk const& inputChunk) { DelegateChunk::setInputChunk(inputChunk); isClone = true; array.in2out(inputChunk.getFirstPosition(false), firstPos); array.in2out(inputChunk.getLastPosition(false), lastPos); }
void AllVersionsChunk::setInputChunk(ConstChunk const& inputChunk, VersionID version) { DelegateChunk::setInputChunk(inputChunk); isClone = true; currVersion = version; prependVersion(firstPos, inputChunk.getFirstPosition(false), version); prependVersion(lastPos, inputChunk.getLastPosition(false), version); prependVersion(firstPosWithOverlap, inputChunk.getFirstPosition(true), version); prependVersion(lastPosWithOverlap, inputChunk.getLastPosition(true), version); }
void Chunk::aggregateMerge(ConstChunk const& with, AggregatePtr const& aggregate, boost::shared_ptr<Query>& query) { if (getDiskChunk() != NULL) throw USER_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_CHUNK_ALREADY_EXISTS); if (isReadOnly()) throw USER_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_CANT_UPDATE_READ_ONLY_CHUNK); AttributeDesc const& attr = getAttributeDesc(); if (aggregate->getStateType().typeId() != attr.getType()) throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_TYPE_MISMATCH_BETWEEN_AGGREGATE_AND_CHUNK); if (!attr.isNullable()) throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_AGGREGATE_STATE_MUST_BE_NULLABLE);//enforce equivalency w above merge() setCount(0); char* dst = (char*)getData(); if (dst != NULL) { int sparseMode = isSparse() ? ChunkIterator::SPARSE_CHUNK : 0; boost::shared_ptr<ChunkIterator>dstIterator = getIterator(query, sparseMode|ChunkIterator::APPEND_CHUNK|ChunkIterator::NO_EMPTY_CHECK); boost::shared_ptr<ConstChunkIterator> srcIterator = with.getConstIterator(ChunkIterator::IGNORE_NULL_VALUES); while (!srcIterator->end()) { Value& val = srcIterator->getItem(); if (!val.isNull()) { if (!dstIterator->setPosition(srcIterator->getPosition())) throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_OPERATION_FAILED) << "setPosition"; Value& val2 = dstIterator->getItem(); if (!val2.isNull()) { aggregate->merge(val, val2); } dstIterator->writeItem(val); } ++(*srcIterator); } dstIterator->flush(); } else { PinBuffer scope(with); char* src = (char*)with.getData(); allocate(with.getSize()); setSparse(with.isSparse()); setRLE(with.isRLE()); memcpy(getData(), src, getSize()); write(query); } }
void Chunk::merge(ConstChunk const& with, boost::shared_ptr<Query>& query) { if (getDiskChunk() != NULL) throw USER_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_CHUNK_ALREADY_EXISTS); setCount(0); // unknown AttributeDesc const& attr = getAttributeDesc(); char* dst = (char*)getData(); Value const& defaultValue = attr.getDefaultValue(); if (dst != NULL && (isSparse() || isRLE() || with.isSparse() || with.isRLE() || attr.isNullable() || TypeLibrary::getType(attr.getType()).variableSize() || !defaultValue.isZero())) { int sparseMode = isSparse() ? ChunkIterator::SPARSE_CHUNK : 0; boost::shared_ptr<ChunkIterator> dstIterator = getIterator(query, sparseMode|ChunkIterator::APPEND_CHUNK|ChunkIterator::NO_EMPTY_CHECK); boost::shared_ptr<ConstChunkIterator> srcIterator = with.getConstIterator(ChunkIterator::IGNORE_EMPTY_CELLS|ChunkIterator::IGNORE_DEFAULT_VALUES); if (getArrayDesc().getEmptyBitmapAttribute() != NULL) { while (!srcIterator->end()) { if (!dstIterator->setPosition(srcIterator->getPosition())) throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_OPERATION_FAILED) << "setPosition"; Value const& value = srcIterator->getItem(); dstIterator->writeItem(value); ++(*srcIterator); } } else { // ignore default values while (!srcIterator->end()) { Value const& value = srcIterator->getItem(); if (value != defaultValue) { if (!dstIterator->setPosition(srcIterator->getPosition())) throw SYSTEM_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_OPERATION_FAILED) << "setPosition"; dstIterator->writeItem(value); } ++(*srcIterator); } } dstIterator->flush(); } else { PinBuffer scope(with); char* src = (char*)with.getData(); if (dst == NULL) { allocate(with.getSize()); setSparse(with.isSparse()); setRLE(with.isRLE()); memcpy(getData(), src, getSize()); } else { if (getSize() != with.getSize()) throw USER_EXCEPTION(SCIDB_SE_MERGE, SCIDB_LE_CANT_MERGE_CHUNKS_WITH_VARYING_SIZE); for (size_t j = 0, n = getSize(); j < n; j++) { dst[j] |= src[j]; } } write(query); } }
size_t BitmapEncoding::compress(void* dst, const ConstChunk& chunk, size_t size) { #ifdef FORMAT_SENSITIVE_COMPRESSORS if (chunk.isRLE()) { return size; } Bitmap bitmap; return bitmap.compress(dst, chunk, size); #else return size; #endif }
void ConcatChunk::setInputChunk(ConstChunk const& inputChunk) { DelegateChunk::setInputChunk(inputChunk); ConcatArrayIterator const& arrayIterator((ConcatArrayIterator const&)iterator); Coordinate shift = arrayIterator.shift; isClone = inputChunk.getArrayDesc().getDimensions()[CONCAT_DIM].getChunkOverlap() == 0; direct = true; firstPos = inputChunk.getFirstPosition(false); firstPosWithOverlap = inputChunk.getFirstPosition(true); lastPos = inputChunk.getLastPosition(false); lastPosWithOverlap = inputChunk.getLastPosition(true); if (shift != 0) { firstPos[CONCAT_DIM] += shift; firstPosWithOverlap[CONCAT_DIM] += shift; lastPos[CONCAT_DIM] += shift; lastPosWithOverlap[CONCAT_DIM] += shift; } }
boost::shared_ptr<MemChunk> MaterializedArray::getMaterializedChunk(ConstChunk const& inputChunk) { bool newChunk = false; boost::shared_ptr<MemChunk> chunk; boost::shared_ptr<ConstRLEEmptyBitmap> bitmap; Coordinates const& pos = inputChunk.getFirstPosition(false); AttributeID attr = inputChunk.getAttributeDesc().getId(); { ScopedMutexLock cs(_mutex); chunk = _chunkCache[attr][pos]; if (!chunk) { chunk.reset(new MemChunk()); bitmap = _bitmapCache[pos]; newChunk = true; } } if (newChunk) { boost::shared_ptr<Query> query(Query::getValidQueryPtr(_query)); materialize(query, *chunk, inputChunk, _format); if (!bitmap) { bitmap = chunk->getEmptyBitmap(); } chunk->setEmptyBitmap(bitmap); { ScopedMutexLock cs(_mutex); if (_chunkCache[attr].size() >= _cacheSize) { _chunkCache[attr].erase(_chunkCache[attr].begin()); } _chunkCache[attr][pos] = chunk; if (_bitmapCache.size() >= _cacheSize) { _bitmapCache.erase(_bitmapCache.begin()); } _bitmapCache[pos] = bitmap; } } return chunk; }
size_t DictionaryEncoding::Dictionary::compress(void* dst, const ConstChunk& chunk, size_t chunkSize) { uint8_t *readPtr = (uint8_t *)chunk.getData(); TypeId type = chunk.getAttributeDesc().getType(); size_t elementSize = TypeLibrary::getType(type).byteSize(); size_t nElems; if(elementSize == 0 || elementSize > 8 || chunk.isRLE() || !chunk.getArrayDesc().isImmutable() || chunk.isSparse() || chunk.getAttributeDesc().isNullable()) { nElems = chunkSize; elementSize = 1; } else { nElems = chunkSize / elementSize; } size_t i; uint64_t value = 0; uint8_t code = 0; ByteOutputItr out((uint8_t *) dst, chunkSize - 1); BitOutputItr outBits(&out); uint32_t uniques = (uint32_t) createDictionary(readPtr, elementSize, nElems, out); size_t codeLength; uniques <= 2 ? codeLength = 1 : codeLength = ceil(log2(uniques-1)) + 1; // 0-indexed, so values span from 0...uniques-1, log is 0-based, so bring it back to 1...n bits // project size and terminate if it will be too large size_t codesSize = (nElems * codeLength + 7) >> 3; size_t totalCompressed = 1 + uniques * elementSize + codesSize; if(totalCompressed*2 >= chunkSize) // if we can't get at least 2:1 it is not worth doing { return chunkSize; } if(!nElems || !uniques) { return chunkSize; } for(i = 0; i < nElems; ++i) { memcpy((uint8_t *) &value, readPtr, elementSize); code = _encodeDictionary[value]; outBits.put(code, codeLength); readPtr += elementSize; } outBits.flush(); size_t compressedSize = out.close(); return compressedSize; }
/** * Private function that returns true iff the value passed in needed by aggregate */ inline bool WindowChunk::valueIsNeededForAggregate ( const Value & val, const ConstChunk & inputChunk ) const { return (!((val.isNull() && _aggregate->ignoreNulls()) || (isDefaultFor(val,inputChunk.getAttributeDesc().getType()) && _aggregate->ignoreZeroes()))); }
size_t DictionaryEncoding::compress(void* dst, const ConstChunk& chunk, size_t chunkSize) { #ifdef FORMAT_SENSITIVE_COMPRESSORS uint8_t *src = (uint8_t *)chunk.getData(); TypeId type = chunk.getAttributeDesc().getType(); size_t elementSize = TypeLibrary::getType(type).byteSize(); size_t nElems = chunkSize / elementSize; uint32_t i; uint32_t uniqueValues; std::string toEncode = ""; uint32_t code; uint8_t *readPtr = (uint8_t *)chunk.getData(); if(!nElems) { return chunkSize; } if(elementSize == 0 || elementSize > 8 || chunk.isRLE() || !chunk.getArrayDesc().isImmutable() || chunk.isSparse()) // too big or too small or sparse = regard it as a string { nElems = chunkSize; elementSize = 1; } ByteOutputItr out((uint8_t *) dst, chunkSize-1); uniqueValues = createDictionary(src, elementSize, nElems); if(uniqueValues == nElems) { return chunkSize; } toEncode.reserve(elementSize); // dictionary-specific assert(_entriesPerCode); uint32_t blocks = floor(nElems / _entriesPerCode); uint32_t remainder = nElems % _entriesPerCode; size_t blockEntriesSize = _entriesPerCode * elementSize; if(uniqueValues == 0) { return chunkSize; } if(out.putArray((uint8_t *) &uniqueValues, 4) == -1) { return chunkSize; } // output a list of unique values; we infer their codes by the order that they are read in // i.e., first elementSize bytes translate to code 0 and so on for(i = 0; i < uniqueValues; ++i) { // put value if(out.putArray((uint8_t *) _values[i].data(), elementSize) == -1) { return chunkSize; } }// end dictionary output // now output encoded data for(i = 0; i < blocks; ++i) { toEncode.assign((char *) readPtr, blockEntriesSize); readPtr += blockEntriesSize; code = _encodeDictionary[toEncode]; if(out.putArray((uint8_t *) &code, _codeLength) == -1) { return chunkSize; } } if(remainder) { // output the last few entries -- toEncode.assign((char *) readPtr, elementSize * remainder); // pad it with _value[0] for(i = 0; i < _entriesPerCode - remainder; ++i) { toEncode.append(_values[0]); } code = _encodeDictionary[toEncode]; if(out.putArray((uint8_t *) &code, _codeLength) == -1) { return chunkSize; } } size_t compressed_size = out.close(); return compressed_size; #else return chunkSize; #endif }
size_t BitmapEncoding::Bitmap::compress(void* dst, const ConstChunk& chunk, size_t chunkSize) { char const* dataSrc = (char const*)chunk.getData(); TypeId type = chunk.getAttributeDesc().getType(); _elementSize = TypeLibrary::getType(type).byteSize(); /* No more immutable arrays, to keep consistent with old code, always treat data as string */ _bitmapElements = chunkSize; _elementSize = 1; if(!_bitmapElements) { return chunkSize; } char *readPos = const_cast<char *>(dataSrc); ByteOutputItr out((uint8_t *) dst, chunkSize-1); uint32_t i; uint32_t bucketSize = (_bitmapElements + 7) >> 3; uint32_t bucketCount = 0; std::string key; clearBitmapCache(); // make the key of our hash a string so that // we can compare variable-length element sizes size_t bitmapEntryLength = bucketSize + _elementSize; assert(bitmapEntryLength); uint32_t maxBuckets = floor(chunkSize / bitmapEntryLength); if(maxBuckets * bitmapEntryLength == chunkSize) { // we want to beat the uncompressed case --maxBuckets; } for(i = 0; i < _bitmapElements; ++i) { key.clear(); for(uint32_t j = 0; j < _elementSize; ++j) { key.push_back(*readPos); ++readPos; } uint8_t *bucket = NULL; // check to see if a bucket exists, if so grab and pass on std::map<std::string, uint8_t*>::iterator iter = _bitmaps.find(key); if(iter == _bitmaps.end() ) { ++bucketCount; if(bucketCount > maxBuckets) { return chunkSize; } // create a new one bucket = new uint8_t[bucketSize]; _bitmaps[key] = bucket; for(uint32_t k = 0; k < bucketSize; ++k) { *(bucket+k) = 0;} } else { bucket = iter->second; } assert(bucket!=NULL); setBit(bucket, i); } // drop all of bitmaps to dst fillOutput(&out); size_t compressedSize = out.close(); return compressedSize; }