Esempio n. 1
0
BSONObj S2CellIdToIndexKey(const S2CellId& cellId, S2IndexVersion indexVersion) {
    // The range of an unsigned long long is
    // |-----------------|------------------|
    // 0                2^32               2^64 - 1
    // 000...           100...             111...
    // The range of a signed long long is
    // |-----------------|------------------|
    // -2^63             0                 2^63 - 1
    // 100...           000...             011...
    // S2 gives us an unsigned long long, and we need
    // to use signed long longs for the index.
    //
    // The relative ordering may be changed with unsigned
    // numbers around 2^32 being converted to signed
    //
    // However, because a single cell cannot span over
    // more than once face, individual intervals will
    // never cross that threshold. Thus, scans will still
    // produce the same results.
    BSONObjBuilder b;
    if (indexVersion >= S2_INDEX_VERSION_3) {
        b.append("", static_cast<long long>(cellId.id()));
    } else {
        b.append("", cellId.ToString());
    }
    return b.obj();
}
Esempio n. 2
0
    BSONObj S2SearchUtil::coverAsBSON(const vector<S2CellId> &cover, const string& field,
                                      const int coarsestIndexedLevel) {
        BSONObjBuilder queryBuilder;
        BSONObjBuilder inBuilder(queryBuilder.subobjStart(field));
        // To have an array where elements of that array are regexes, we have to do this.
        BSONObjBuilder inArrayBuilder(inBuilder.subarrayStart("$in"));
        // Sadly we must keep track of this ourselves.  Oh, BSONObjBuilder, you rascal!
        int arrayPos = 0;

        bool considerCoarser = false;

        // Look at the cells we cover and all cells that are within our covering and
        // finer.  Anything with our cover as a strict prefix is contained within the cover and
        // should be intersection tested.
        for (size_t i = 0; i < cover.size(); ++i) {
            // First argument is position in the array as a string.
            // Third argument is options to regex.
            inArrayBuilder.appendRegex(myitoa(arrayPos++), "^" + cover[i].toString(), "");
            // If any of our covers could be covered by something in the index, we have
            // to look at things coarser.
            considerCoarser = considerCoarser || (cover[i].level() > coarsestIndexedLevel);
        }

        if (considerCoarser) {
            // Look at the cells that cover us.  We want to look at every cell that
            // contains the covering we would index on if we were to insert the
            // query geometry.  We generate the would-index-with-this-covering and
            // find all the cells strictly containing the cells in that set, until we hit the
            // coarsest indexed cell.  We use $in, not a prefix match.  Why not prefix?  Because
            // we've already looked at everything finer or as fine as our initial covering.
            //
            // Say we have a fine point with cell id 212121, we go up one, get 21212, we don't
            // want to look at cells 21212[not-1] because we know they're not going to intersect
            // with 212121, but entries inserted with cell value 21212 (no trailing digits) may.
            // And we've already looked at points with the cell id 211111 from the regex search
            // created above, so we only want things where the value of the last digit is not
            // stored (and therefore could be 1).
            unordered_set<S2CellId> parents;
            for (size_t i = 0; i < cover.size(); ++i) {
                for (S2CellId id = cover[i].parent(); id.level() >= coarsestIndexedLevel;
                        id = id.parent()) {
                    parents.insert(id);
                }
            }

            for (unordered_set<S2CellId>::const_iterator it = parents.begin(); it != parents.end(); ++it) {
                inArrayBuilder.append(myitoa(arrayPos++), it->toString());
            }
        }

        inArrayBuilder.done();
        inBuilder.done();
        return queryBuilder.obj();
    }
Esempio n. 3
0
void ExpressionMapping::S2CellIdsToIntervalsWithParents(const std::vector<S2CellId>& intervalSet,
                                                        const S2IndexingParams& indexParams,
                                                        OrderedIntervalList* oilOut) {
    // There may be duplicates when going up parent cells if two cells share a parent
    std::unordered_set<S2CellId> exactSet;
    for (const S2CellId& interval : intervalSet) {
        S2CellId coveredCell = interval;
        // Look at the cells that cover us.  We want to look at every cell that contains the
        // covering we would index on if we were to insert the query geometry.  We generate
        // the would-index-with-this-covering and find all the cells strictly containing the
        // cells in that set, until we hit the coarsest indexed cell.  We use equality, not
        // a prefix match.  Why not prefix?  Because we've already looked at everything
        // finer or as fine as our initial covering.
        //
        // Say we have a fine point with cell id 212121, we go up one, get 21212, we don't
        // want to look at cells 21212[not-1] because we know they're not going to intersect
        // with 212121, but entries inserted with cell value 21212 (no trailing digits) may.
        // And we've already looked at points with the cell id 211111 from the regex search
        // created above, so we only want things where the value of the last digit is not
        // stored (and therefore could be 1).

        while (coveredCell.level() > indexParams.coarsestIndexedLevel) {
            // Add the parent cell of the currently covered cell since we aren't at the
            // coarsest level yet
            // NOTE: Be careful not to generate cells strictly less than the
            // coarsestIndexedLevel - this can result in S2 failures when level < 0.

            coveredCell = coveredCell.parent();
            exactSet.insert(coveredCell);
        }
    }

    for (const S2CellId& exact : exactSet) {
        BSONObj exactBSON = S2CellIdToIndexKey(exact, indexParams.indexVersion);
        oilOut->intervals.push_back(IndexBoundsBuilder::makePointInterval(exactBSON));
    }

    S2CellIdsToIntervalsUnsorted(intervalSet, indexParams.indexVersion, oilOut);
    std::sort(oilOut->intervals.begin(), oilOut->intervals.end(), compareIntervals);
    // Make sure that our intervals don't overlap each other and are ordered correctly.
    // This perhaps should only be done in debug mode.
    if (!oilOut->isValidFor(1)) {
        std::cout << "check your assumptions! OIL = " << oilOut->toString() << std::endl;
        verify(0);
    }
}
Esempio n. 4
0
BSONObj S2CellIdToIndexKey(const S2CellId& cellId, S2IndexVersion indexVersion) {
    // The range of an unsigned long long is
    // |-----------------|------------------|
    // 0                2^32               2^64 - 1
    // 000...           100...             111...
    // The range of a signed long long is
    // |-----------------|------------------|
    // -2^63             0                 2^63 - 1
    // 100...           000...             011...
    // S2 gives us an unsigned long long, and we need
    // to use signed long longs for the index.
    //
    // The relative ordering may be changed with unsigned
    // numbers around 2^32 being converted to signed
    //
    // However, because a single cell cannot span over
    // more than once face, individual intervals will
    // never cross that threshold. Thus, scans will still
    // produce the same results.
    if (indexVersion >= S2_INDEX_VERSION_3) {
        // The size of an index BSONObj in S2 index version 3 is 15 bytes.
        // total size (4 bytes)  |  type code 0x12 (1)  |  field name "" 0x00 (1)  |
        // long long cell id (8) | EOO (1)
        BSONObjBuilder b(15);
        b.append("", static_cast<long long>(cellId.id()));
        return b.obj();
    }

    // The size of an index BSONObj in older versions is 10 ~ 40 bytes.
    // total size (4 bytes)  |  type code 0x12 (1)  |  field name "" 0x00 (1)  |
    // cell id string (2 ~ 32) 0x00 (1) | EOO (1)
    BSONObjBuilder b;
    b.append("", cellId.ToString());
    // Return a copy so its buffer size fits the object size.
    return b.obj().copy();
}
Esempio n. 5
0
    // TODO: what should we really pass in for indexInfoObj?
    void ExpressionMapping::cover2dsphere(const S2Region& region,
                                          const BSONObj& indexInfoObj,
                                          OrderedIntervalList* oilOut) {

        int coarsestIndexedLevel;
        BSONElement ce = indexInfoObj["coarsestIndexedLevel"];
        if (ce.isNumber()) {
            coarsestIndexedLevel = ce.numberInt();
        }
        else {
            coarsestIndexedLevel =
                S2::kAvgEdge.GetClosestLevel(100 * 1000.0 / kRadiusOfEarthInMeters);
        }

        // The min level of our covering is the level whose cells are the closest match to the
        // *area* of the region (or the max indexed level, whichever is smaller) The max level
        // is 4 sizes larger.
        double edgeLen = sqrt(region.GetRectBound().Area());
        S2RegionCoverer coverer;
        coverer.set_min_level(min(coarsestIndexedLevel,
                                  2 + S2::kAvgEdge.GetClosestLevel(edgeLen)));
        coverer.set_max_level(4 + coverer.min_level());

        std::vector<S2CellId> cover;
        coverer.GetCovering(region, &cover);

        // Look at the cells we cover and all cells that are within our covering and finer.
        // Anything with our cover as a strict prefix is contained within the cover and should
        // be intersection tested.
        bool considerCoarser = false;
        std::set<std::string> intervalSet;
        for (size_t i = 0; i < cover.size(); ++i) {
            intervalSet.insert(cover[i].toString());
            // If any of our covers could be covered by something in the index, we have
            // to look at things coarser.
            if (cover[i].level() > coarsestIndexedLevel) {
                considerCoarser = true;
            }
        }

        std::set<std::string> exactSet;
        if (considerCoarser) {
            // Look at the cells that cover us.  We want to look at every cell that contains the
            // covering we would index on if we were to insert the query geometry.  We generate
            // the would-index-with-this-covering and find all the cells strictly containing the
            // cells in that set, until we hit the coarsest indexed cell.  We use equality, not
            // a prefix match.  Why not prefix?  Because we've already looked at everything
            // finer or as fine as our initial covering.
            //
            // Say we have a fine point with cell id 212121, we go up one, get 21212, we don't
            // want to look at cells 21212[not-1] because we know they're not going to intersect
            // with 212121, but entries inserted with cell value 21212 (no trailing digits) may.
            // And we've already looked at points with the cell id 211111 from the regex search
            // created above, so we only want things where the value of the last digit is not
            // stored (and therefore could be 1).
            for (size_t i = 0; i < cover.size(); ++i) {
                for (S2CellId id = cover[i].parent(); id.level() >= coarsestIndexedLevel;
                        id = id.parent()) {
                    exactSet.insert(id.toString());
                }
            }
        }

        // We turned the cell IDs into strings which define point intervals or prefixes of
        // strings we want to look for.
        std::set<std::string>::iterator exactIt = exactSet.begin();
        std::set<std::string>::iterator intervalIt = intervalSet.begin();
        while (exactSet.end() != exactIt && intervalSet.end() != intervalIt) {
            const std::string& exact = *exactIt;
            const std::string& ival = *intervalIt;
            if (exact < ival) {
                // add exact
                oilOut->intervals.push_back(IndexBoundsBuilder::makePointInterval(exact));
                exactIt++;
            }
            else {
                std::string end = ival;
                end[end.size() - 1]++;
                oilOut->intervals.push_back(
                    IndexBoundsBuilder::makeRangeInterval(ival, end, true, false));
                intervalIt++;
            }
        }

        if (exactSet.end() != exactIt) {
            verify(intervalSet.end() == intervalIt);
            do {
                oilOut->intervals.push_back(IndexBoundsBuilder::makePointInterval(*exactIt));
                exactIt++;
            } while (exactSet.end() != exactIt);
        }
        else if (intervalSet.end() != intervalIt) {
            verify(exactSet.end() == exactIt);
            do {
                const std::string& ival = *intervalIt;
                std::string end = ival;
                end[end.size() - 1]++;
                oilOut->intervals.push_back(
                    IndexBoundsBuilder::makeRangeInterval(ival, end, true, false));
                intervalIt++;
            } while (intervalSet.end() != intervalIt);
        }

        // Make sure that our intervals don't overlap each other and are ordered correctly.
        // This perhaps should only be done in debug mode.
        if (!oilOut->isValidFor(1)) {
            cout << "check your assumptions! OIL = " << oilOut->toString() << std::endl;
            verify(0);
        }
    }