BSONObj S2SearchUtil::coverAsBSON(const vector<S2CellId> &cover, const string& field, const int coarsestIndexedLevel) { BSONObjBuilder queryBuilder; BSONObjBuilder inBuilder(queryBuilder.subobjStart(field)); // To have an array where elements of that array are regexes, we have to do this. BSONObjBuilder inArrayBuilder(inBuilder.subarrayStart("$in")); // Sadly we must keep track of this ourselves. Oh, BSONObjBuilder, you rascal! int arrayPos = 0; bool considerCoarser = false; // Look at the cells we cover and all cells that are within our covering and // finer. Anything with our cover as a strict prefix is contained within the cover and // should be intersection tested. for (size_t i = 0; i < cover.size(); ++i) { // First argument is position in the array as a string. // Third argument is options to regex. inArrayBuilder.appendRegex(myitoa(arrayPos++), "^" + cover[i].toString(), ""); // If any of our covers could be covered by something in the index, we have // to look at things coarser. considerCoarser = considerCoarser || (cover[i].level() > coarsestIndexedLevel); } if (considerCoarser) { // Look at the cells that cover us. We want to look at every cell that // contains the covering we would index on if we were to insert the // query geometry. We generate the would-index-with-this-covering and // find all the cells strictly containing the cells in that set, until we hit the // coarsest indexed cell. We use $in, not a prefix match. Why not prefix? Because // we've already looked at everything finer or as fine as our initial covering. // // Say we have a fine point with cell id 212121, we go up one, get 21212, we don't // want to look at cells 21212[not-1] because we know they're not going to intersect // with 212121, but entries inserted with cell value 21212 (no trailing digits) may. // And we've already looked at points with the cell id 211111 from the regex search // created above, so we only want things where the value of the last digit is not // stored (and therefore could be 1). unordered_set<S2CellId> parents; for (size_t i = 0; i < cover.size(); ++i) { for (S2CellId id = cover[i].parent(); id.level() >= coarsestIndexedLevel; id = id.parent()) { parents.insert(id); } } for (unordered_set<S2CellId>::const_iterator it = parents.begin(); it != parents.end(); ++it) { inArrayBuilder.append(myitoa(arrayPos++), it->toString()); } } inArrayBuilder.done(); inBuilder.done(); return queryBuilder.obj(); }
void ExpressionMapping::S2CellIdsToIntervalsWithParents(const std::vector<S2CellId>& intervalSet, const S2IndexingParams& indexParams, OrderedIntervalList* oilOut) { // There may be duplicates when going up parent cells if two cells share a parent std::unordered_set<S2CellId> exactSet; for (const S2CellId& interval : intervalSet) { S2CellId coveredCell = interval; // Look at the cells that cover us. We want to look at every cell that contains the // covering we would index on if we were to insert the query geometry. We generate // the would-index-with-this-covering and find all the cells strictly containing the // cells in that set, until we hit the coarsest indexed cell. We use equality, not // a prefix match. Why not prefix? Because we've already looked at everything // finer or as fine as our initial covering. // // Say we have a fine point with cell id 212121, we go up one, get 21212, we don't // want to look at cells 21212[not-1] because we know they're not going to intersect // with 212121, but entries inserted with cell value 21212 (no trailing digits) may. // And we've already looked at points with the cell id 211111 from the regex search // created above, so we only want things where the value of the last digit is not // stored (and therefore could be 1). while (coveredCell.level() > indexParams.coarsestIndexedLevel) { // Add the parent cell of the currently covered cell since we aren't at the // coarsest level yet // NOTE: Be careful not to generate cells strictly less than the // coarsestIndexedLevel - this can result in S2 failures when level < 0. coveredCell = coveredCell.parent(); exactSet.insert(coveredCell); } } for (const S2CellId& exact : exactSet) { BSONObj exactBSON = S2CellIdToIndexKey(exact, indexParams.indexVersion); oilOut->intervals.push_back(IndexBoundsBuilder::makePointInterval(exactBSON)); } S2CellIdsToIntervalsUnsorted(intervalSet, indexParams.indexVersion, oilOut); std::sort(oilOut->intervals.begin(), oilOut->intervals.end(), compareIntervals); // Make sure that our intervals don't overlap each other and are ordered correctly. // This perhaps should only be done in debug mode. if (!oilOut->isValidFor(1)) { std::cout << "check your assumptions! OIL = " << oilOut->toString() << std::endl; verify(0); } }
// TODO: what should we really pass in for indexInfoObj? void ExpressionMapping::cover2dsphere(const S2Region& region, const BSONObj& indexInfoObj, OrderedIntervalList* oilOut) { int coarsestIndexedLevel; BSONElement ce = indexInfoObj["coarsestIndexedLevel"]; if (ce.isNumber()) { coarsestIndexedLevel = ce.numberInt(); } else { coarsestIndexedLevel = S2::kAvgEdge.GetClosestLevel(100 * 1000.0 / kRadiusOfEarthInMeters); } // The min level of our covering is the level whose cells are the closest match to the // *area* of the region (or the max indexed level, whichever is smaller) The max level // is 4 sizes larger. double edgeLen = sqrt(region.GetRectBound().Area()); S2RegionCoverer coverer; coverer.set_min_level(min(coarsestIndexedLevel, 2 + S2::kAvgEdge.GetClosestLevel(edgeLen))); coverer.set_max_level(4 + coverer.min_level()); std::vector<S2CellId> cover; coverer.GetCovering(region, &cover); // Look at the cells we cover and all cells that are within our covering and finer. // Anything with our cover as a strict prefix is contained within the cover and should // be intersection tested. bool considerCoarser = false; std::set<std::string> intervalSet; for (size_t i = 0; i < cover.size(); ++i) { intervalSet.insert(cover[i].toString()); // If any of our covers could be covered by something in the index, we have // to look at things coarser. if (cover[i].level() > coarsestIndexedLevel) { considerCoarser = true; } } std::set<std::string> exactSet; if (considerCoarser) { // Look at the cells that cover us. We want to look at every cell that contains the // covering we would index on if we were to insert the query geometry. We generate // the would-index-with-this-covering and find all the cells strictly containing the // cells in that set, until we hit the coarsest indexed cell. We use equality, not // a prefix match. Why not prefix? Because we've already looked at everything // finer or as fine as our initial covering. // // Say we have a fine point with cell id 212121, we go up one, get 21212, we don't // want to look at cells 21212[not-1] because we know they're not going to intersect // with 212121, but entries inserted with cell value 21212 (no trailing digits) may. // And we've already looked at points with the cell id 211111 from the regex search // created above, so we only want things where the value of the last digit is not // stored (and therefore could be 1). for (size_t i = 0; i < cover.size(); ++i) { for (S2CellId id = cover[i].parent(); id.level() >= coarsestIndexedLevel; id = id.parent()) { exactSet.insert(id.toString()); } } } // We turned the cell IDs into strings which define point intervals or prefixes of // strings we want to look for. std::set<std::string>::iterator exactIt = exactSet.begin(); std::set<std::string>::iterator intervalIt = intervalSet.begin(); while (exactSet.end() != exactIt && intervalSet.end() != intervalIt) { const std::string& exact = *exactIt; const std::string& ival = *intervalIt; if (exact < ival) { // add exact oilOut->intervals.push_back(IndexBoundsBuilder::makePointInterval(exact)); exactIt++; } else { std::string end = ival; end[end.size() - 1]++; oilOut->intervals.push_back( IndexBoundsBuilder::makeRangeInterval(ival, end, true, false)); intervalIt++; } } if (exactSet.end() != exactIt) { verify(intervalSet.end() == intervalIt); do { oilOut->intervals.push_back(IndexBoundsBuilder::makePointInterval(*exactIt)); exactIt++; } while (exactSet.end() != exactIt); } else if (intervalSet.end() != intervalIt) { verify(exactSet.end() == exactIt); do { const std::string& ival = *intervalIt; std::string end = ival; end[end.size() - 1]++; oilOut->intervals.push_back( IndexBoundsBuilder::makeRangeInterval(ival, end, true, false)); intervalIt++; } while (intervalSet.end() != intervalIt); } // Make sure that our intervals don't overlap each other and are ordered correctly. // This perhaps should only be done in debug mode. if (!oilOut->isValidFor(1)) { cout << "check your assumptions! OIL = " << oilOut->toString() << std::endl; verify(0); } }