boost::shared_ptr<ConstChunkIterator> ConstRLEChunk::getConstIterator(int mode) const { if (getAttributeDesc().getType()==TID_INDICATOR) { return boost::shared_ptr<ConstChunkIterator>(new ConstRLEBitmaskIterator(*_arrayDesc, _addr.attId, this, mode)); } return boost::shared_ptr<ConstChunkIterator>(new ConstRLEChunkIterator(*_arrayDesc, _addr.attId, this, mode)); }
/** * Private function to setPosition in a WindowChunk */ void WindowChunk::setPosition(WindowArrayIterator const* iterator, Coordinates const& pos) { _arrayIterator = iterator; _firstPos = pos; Dimensions const& dims = _array._desc.getDimensions(); for (size_t i = 0, n = dims.size(); i < n; i++) { _lastPos[i] = _firstPos[i] + dims[i].getChunkInterval() - 1; if (_lastPos[i] > dims[i].getEndMax()) { _lastPos[i] = dims[i].getEndMax(); } } _materialized = false; if (_aggregate.get() == 0) { return; } if (_array._desc.getEmptyBitmapAttribute()) { // // At this point, we need to make a 1-bit decision about how we // will compute the window(...) result. Do we materialize all of // the cells in the inputChunk into a coords -> value map before // we compute the per-cellwindow aggregate, or do we probe the // inputChunk's iterator on demand? // // The way we figure this out is to (a) compute the total size of // the materialization by taking at the size of the inputChunk // (number of elements) and calculating how big the in-memory map // data structure would be. Then (b) we compare this size to a // (configurable) threshhold, which is a constant (configurable) // multiplier of the CONFIG_MEM_ARRAY_THRESHHOLD. // // Although using size estimations appears to be a significant // improvement over using a simple estimate of the sparsity of the // input, there are several problems with the mechanism // implemented here. // // 1. The calculation of the inputChunk.count() can involve a // complete iteration through the inputChunk's values, which // means that we might be computing a sub-query's results // for the operator twice. // // Consider: window ( filter ( A, expr ), ... ). // // FIXME: Need to support some kind of cheap and reasonably // accurate estimate of the size of an operator's // output chunk, given the size(s) of its input chunk(s). // // 2. The real thing we are trying to minimize here is the // expense of all of the of probe calls to into the inputChunk. // The total number of probes calls is a product of the input // size, the number of cells, and the chunk's sparsity. Probing // (or ideally scanning) a materialized inputChunk is usually // a lot less expensive than probing an unmaterialized // inputChunk. // // BUT the constant overhead to materialize the inputChunk is // quite high. So we would probably benefit from a smarter way // to choose between the two algorithms that incorporated the // fixed cost. // // 3. As input chunk is often going to be ordered, the cost of // materializing the inputChunk by using a map<> is higher than // it needs to be. See detailed note in the materialize() // function. // if (_arrayIterator->getMethod() == WindowArray::MATERIALIZE) { materialize(); } else if (_arrayIterator->getMethod() != WindowArray::PROBE) { // // The operator has expressed no preference about the // algorithm. So we figure out whther materializing the source // involves too much memory. ConstChunk const& inputChunk = _arrayIterator->iterator->getChunk(); size_t varSize = getAttributeDesc().getVarSize(); if (varSize <= 8) { varSize=0; } else if (varSize ==0) { varSize=Config::getInstance()->getOption<int>(CONFIG_STRING_SIZE_ESTIMATION); } size_t materializedChunkSize = inputChunk.count() * ( sizeof( _Rb_tree_node_base ) + sizeof ( scidb::Value ) + sizeof ( position_t ) + varSize ); size_t maxMaterializedChunkSize = ( Config::getInstance()->getOption<int>(CONFIG_MATERIALIZED_WINDOW_THRESHOLD) * MiB); // All config.ini params are in Mebibytes. if ( materializedChunkSize <= maxMaterializedChunkSize ) { materialize(); } else { LOG4CXX_TRACE ( windowLogger, "WindowChunk::setPosition(..) - NOT MATERIALIZING \n" << "\t materializedChunkSize = " << materializedChunkSize << " as inputChunk.count() = " << inputChunk.count() << " and varSize = " << varSize << " and maxMaterializedChunkSize = " << maxMaterializedChunkSize ); LOG4CXX_TRACE ( windowLogger, "\t NOT MATERIALIZING "); } } } }