コード例 #1
0
boost::shared_ptr<ConstChunkIterator> ConstRLEChunk::getConstIterator(int mode) const
{
    if (getAttributeDesc().getType()==TID_INDICATOR)
    {
        return boost::shared_ptr<ConstChunkIterator>(new ConstRLEBitmaskIterator(*_arrayDesc, _addr.attId, this, mode));
    }
    return boost::shared_ptr<ConstChunkIterator>(new ConstRLEChunkIterator(*_arrayDesc, _addr.attId, this, mode));

}
コード例 #2
0
    /**
     *  Private function to setPosition in a WindowChunk
     */
    void WindowChunk::setPosition(WindowArrayIterator const* iterator, Coordinates const& pos)
    {
        _arrayIterator = iterator;
        _firstPos = pos;
        Dimensions const& dims = _array._desc.getDimensions();

        for (size_t i = 0, n = dims.size(); i < n; i++) {
            _lastPos[i] = _firstPos[i] + dims[i].getChunkInterval() - 1;
            if (_lastPos[i] > dims[i].getEndMax())
            {
                _lastPos[i] = dims[i].getEndMax();
            }
        }
        _materialized = false;
        if (_aggregate.get() == 0)
        {
            return;
        }

        if (_array._desc.getEmptyBitmapAttribute())
        {
            //
            //  At this point, we need to make a 1-bit decision about how we
            // will compute the window(...) result. Do we materialize all of
            // the cells in the inputChunk into a coords -> value map before
            // we compute the per-cellwindow aggregate, or do we probe the
            // inputChunk's iterator on demand?
            //
            //  The way we figure this out is to (a) compute the total size of
            // the materialization by taking at the size of the inputChunk
            // (number of elements) and calculating how big the in-memory map
            // data structure would be. Then (b) we compare this size to a
            // (configurable) threshhold, which is a constant (configurable)
            // multiplier of the CONFIG_MEM_ARRAY_THRESHHOLD.
            //
            //  Although using size estimations appears to be a significant
            // improvement over using a simple estimate of the sparsity of the
            // input, there are several problems with the mechanism
            // implemented here.
            //
            //  1. The calculation of the inputChunk.count() can involve a
            //  complete iteration through the inputChunk's values, which
            //  means that we might be computing a sub-query's results
            //  for the operator twice.
            //
            //  Consider: window ( filter ( A, expr ), ... ).
            //
            //  FIXME: Need to support some kind of cheap and reasonably
            //         accurate estimate of the size of an operator's
            //         output chunk, given the size(s) of its input chunk(s).
            //
            //  2.  The real thing we are trying to minimize here is the
            //   expense of all of the of probe calls to into the inputChunk.
            //   The total number of probes calls is a product of the input
            //   size, the number of cells, and the chunk's sparsity. Probing
            //   (or ideally scanning) a materialized inputChunk is usually
            //   a lot less expensive than probing an unmaterialized
            //   inputChunk.
            //
            //    BUT the constant overhead to materialize the inputChunk is
            //   quite high. So we would probably benefit from a smarter way
            //   to choose between the two algorithms that incorporated the
            //   fixed cost.
            //
            //  3.  As input chunk is often going to be ordered, the cost of
            //   materializing the inputChunk by using a map<> is higher than
            //   it needs to be. See detailed note in the materialize()
            //   function.
            //
            if (_arrayIterator->getMethod() == WindowArray::MATERIALIZE)
            {
                materialize();
            } else if (_arrayIterator->getMethod() != WindowArray::PROBE)
            {
                //
                //  The operator has expressed no preference about the
                // algorithm. So we figure out whther materializing the source
                // involves too much memory.
                ConstChunk const& inputChunk = _arrayIterator->iterator->getChunk();
                size_t varSize = getAttributeDesc().getVarSize();

                if (varSize <= 8)
                {
                    varSize=0;
                } else if (varSize ==0)
                {
                    varSize=Config::getInstance()->getOption<int>(CONFIG_STRING_SIZE_ESTIMATION);
                }

                size_t materializedChunkSize = inputChunk.count() *
                                               ( sizeof( _Rb_tree_node_base ) +
                                               sizeof ( scidb::Value ) +
                                               sizeof ( position_t ) +
                                               varSize );

                size_t maxMaterializedChunkSize = (
                    Config::getInstance()->getOption<int>(CONFIG_MATERIALIZED_WINDOW_THRESHOLD)
                    * MiB);   // All config.ini params are in Mebibytes.

                if ( materializedChunkSize <= maxMaterializedChunkSize )
                {
                    materialize();
                } else {

				    LOG4CXX_TRACE ( windowLogger,
                                    "WindowChunk::setPosition(..) - NOT MATERIALIZING \n"
                                    << "\t materializedChunkSize = " << materializedChunkSize
                                    << " as inputChunk.count() = " << inputChunk.count() << " and varSize = " << varSize
                                    << " and maxMaterializedChunkSize = " << maxMaterializedChunkSize
				                  );
				    LOG4CXX_TRACE ( windowLogger, "\t NOT MATERIALIZING ");
                }
            }
        }
    }