bool PlacesPoiMergerCreator::createMergers(const MatchSet& matches, vector<Merger*>& mergers) const { bool result = false; assert(matches.size() > 0); set< pair<ElementId, ElementId> > eids; // go through all the matches for (MatchSet::const_iterator it = matches.begin(); it != matches.end(); ++it) { const PlacesPoiMatch* pm = dynamic_cast<const PlacesPoiMatch*>(*it); // check to make sure all the input matches are building matches. if (pm == 0) { // return an empty result return false; } // add all the element to element pairs to a set else { set< pair<ElementId, ElementId> > s = pm->getMatchPairs(); eids.insert(s.begin(), s.end()); } } // only add the POI merge if there are elements to merge. if (eids.size() > 0) { mergers.push_back(new PlacesPoiMerger(eids)); result = true; } return result; }
// filter extraneous matches static void filterMatches(MatchSet &matches) { set<size_t> eom; // first, collect all end-offset matches for (const auto &match : matches) { eom.insert(match.second); } // now, go through all the end-offsets and filter extra matches for (const auto &elem : eom) { // find minimum SOM for this EOM size_t min_som = -1U; for (const auto &match : matches) { // skip entries with wrong EOM if (match.second != elem) { continue; } min_som = min(min_som, match.first); } auto msit = matches.begin(); while (msit != matches.end()) { // skip everything that doesn't match if (msit->second != elem || msit->first <= min_som) { ++msit; continue; } DEBUG_PRINTF("erasing match %zu, %zu\n", msit->first, msit->second); matches.erase(msit++); } } }
/** * Creates a single match and should result in a PoiPolygonMerger */ void basicTest() { OsmMap::resetCounters(); OsmMapPtr map(new OsmMap()); Coordinate c1[] = { Coordinate(0.0, 0.0), Coordinate(20.0, 0.0), Coordinate(20.0, 20.0), Coordinate(0.0, 20.0), Coordinate(0.0, 0.0), Coordinate::getNull() }; WayPtr w1 = TestUtils::createWay(map, Status::Unknown1, c1, 5, "w1"); w1->getTags().set("area", true); w1->getTags()["name"] = "foo"; w1->getTags()["amenity"] = "bar"; NodePtr n1(new Node(Status::Unknown2, 1, 10, 10, 5)); n1->getTags()["name"] = "bar"; n1->getTags()["amenity"] = "cafe"; map->addNode(n1); PoiPolygonMatch match1( map, w1->getElementId(), n1->getElementId(), shared_ptr<MatchThreshold>()); MatchSet matches; matches.insert(&match1); vector<Merger*> mergers; PoiPolygonMergerCreator uut; uut.setOsmMap(map.get()); HOOT_STR_EQUALS(1, uut.createMergers(matches, mergers)); HOOT_STR_EQUALS(1, mergers.size()); HOOT_STR_EQUALS(1, (dynamic_cast<PoiPolygonMerger*>(mergers[0]) != 0)); }
bool PoiPolygonMergerCreator::_isConflictingSet(const MatchSet& matches) const { // _map must be set using setOsmMap() assert(_map != 0); bool conflicting = false; for (MatchSet::const_iterator it = matches.begin(); it != matches.end(); ++it) { const Match* m1 = *it; for (MatchSet::const_iterator jt = matches.begin(); jt != matches.end(); ++jt) { const Match* m2 = *jt; if (m1 != m2) { ConstOsmMapPtr map = _map->shared_from_this(); if (MergerFactory::getInstance().isConflicting(map, m1, m2)) { conflicting = true; } } } } return conflicting; }
bool HighwayMergerCreator::createMergers(const MatchSet& matches, vector<Merger*>& mergers) const { LOG_TRACE("Creating mergers with " << className() << "..."); bool result = false; assert(matches.size() > 0); set<pair<ElementId, ElementId>> eids; boost::shared_ptr<SublineStringMatcher> sublineMatcher; // go through all the matches for (MatchSet::const_iterator it = matches.begin(); it != matches.end(); ++it) { const Match* m = *it; LOG_VART(m->toString()); const HighwayMatch* hm = dynamic_cast<const HighwayMatch*>(m); // check to make sure all the input matches are building matches. if (hm == 0) { // return an empty result LOG_TRACE("Returning empty result due to match not being HighwayMatch: " << m->toString()); return false; } // add all the element to element pairs to a set else { // there should only be one HighwayMatch in a set. sublineMatcher = hm->getSublineMatcher(); set<pair<ElementId, ElementId>> s = hm->getMatchPairs(); LOG_VART(s); eids.insert(s.begin(), s.end()); } } LOG_VART(eids); // only add the highway merge if there are elements to merge. if (eids.size() > 0) { if (!ConfigOptions().getHighwayMergeTagsOnly()) { mergers.push_back(new HighwaySnapMerger(eids, sublineMatcher)); } else { mergers.push_back(new HighwayTagOnlyMerger(eids, sublineMatcher)); } result = true; } return result; }
bool PoiPolygonMergerCreator::createMergers(const MatchSet& matches, vector<Merger*>& mergers) const { bool result = false; assert(matches.size() > 0); bool foundAPoi = false; bool foundAPolygon = false; // go through all the matches for (MatchSet::const_iterator it = matches.begin(); it != matches.end(); ++it) { const Match* m = *it; if (m->getMatchMembers() & MatchMembers::Poi) { foundAPoi = true; } if (m->getMatchMembers() & MatchMembers::Polygon) { foundAPolygon = true; } } // if there is at least one POI and at least one polygon, then we need to merge things in a // special way. if (foundAPoi && foundAPolygon) { set< pair<ElementId, ElementId> > eids; // go through all the matches for (MatchSet::const_iterator it = matches.begin(); it != matches.end(); ++it) { set< pair<ElementId, ElementId> > s = (*it)->getMatchPairs(); eids.insert(s.begin(), s.end()); } if (_isConflictingSet(matches)) { mergers.push_back(new MarkForReviewMerger(eids, "Conflicting information", 1)); } else { mergers.push_back(new PoiPolygonMerger(eids)); } result = true; } return result; }
static void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state, bool allowEodMatches) { SOMMap::const_iterator it, ite; for (it = state.states.begin(), ite = state.states.end(); it != ite; ++it) { NFAGraph::adjacency_iterator ai, ae; // we can't accept anything from startDs inbetween UTF-8 codepoints if (state.utf8 && it->first == g.startDs && !isUtf8CodePoint(state.cur)) { continue; } for (tie(ai, ae) = adjacent_vertices(it->first, g); ai != ae; ++ai) { if (*ai == g.accept || (*ai == g.acceptEod && allowEodMatches)) { // check edge assertions if we are allowed to reach accept if (!canReach(g, it->first, *ai, state)) { continue; } DEBUG_PRINTF("match found at %zu\n", state.offset); assert(!g[it->first].reports.empty()); for (const auto &report_id : g[it->first].reports) { const Report &ri = state.rm.getReport(report_id); DEBUG_PRINTF("report %u has offset adjustment %d\n", report_id, ri.offsetAdjust); matches.insert( make_pair(it->second, state.offset + ri.offsetAdjust)); } } } } }
/** * Creates two matches with overlap and should create a MarkForReviewMerger */ void reviewTest() { OsmMap::resetCounters(); OsmMapPtr map(new OsmMap()); Coordinate c1[] = { Coordinate(0.0, 0.0), Coordinate(20.0, 0.0), Coordinate(20.0, 20.0), Coordinate(0.0, 20.0), Coordinate(0.0, 0.0), Coordinate::getNull() }; WayPtr w1 = TestUtils::createWay(map, Status::Unknown1, c1, 5, "w1"); w1->getTags().set("building", true); w1->getTags()["name"] = "foo"; w1->getTags()["amenity"] = "bar"; Coordinate c2[] = { Coordinate(0.0, 0.0), Coordinate(5.0, 0.0), Coordinate(5.0, 5.0), Coordinate(0.0, 5.0), Coordinate(0.0, 0.0), Coordinate::getNull() }; WayPtr w2 = TestUtils::createWay(map, Status::Unknown2, c2, 5, "w2"); w2->getTags().set("building", true); w2->getTags()["name"] = "goofie"; NodePtr n1(new Node(Status::Unknown2, 1, 19, 19, 5)); n1->getTags()["name"] = "foo"; n1->getTags()["amenity"] = "cafe"; map->addNode(n1); vector<const Match*> matchesV; PoiPolygonMatch match1(map, w1->getElementId(), n1->getElementId(), shared_ptr<MatchThreshold>()); matchesV.push_back(&match1); shared_ptr<const MatchThreshold> threshold(new MatchThreshold(0.5, 0.5, 0.5)); BuildingMatchCreator().createMatches(map, matchesV, threshold); PoiPolygonMatch match2(map, w2->getElementId(), n1->getElementId(), shared_ptr<MatchThreshold>()); LOG_VAR(match2); MatchSet matches; matches.insert(matchesV.begin(), matchesV.end()); vector<Merger*> mergers; PoiPolygonMergerCreator uut; uut.setOsmMap(map.get()); HOOT_STR_EQUALS(1, uut.createMergers(matches, mergers)); HOOT_STR_EQUALS(1, mergers.size()); LOG_VAR(*mergers[0]); HOOT_STR_EQUALS(1, (dynamic_cast<MarkForReviewMerger*>(mergers[0]) != 0)); }
bool MarkForReviewMergerCreator::createMergers(const MatchSet& matches, vector<Merger*>& mergers) const { bool result = false; set< pair<ElementId, ElementId> > eids; int reviewCount = 0; QStringList matchStrings; double score = 0; // go through all the matches for (MatchSet::const_iterator it = matches.begin(); it != matches.end(); ++it) { const Match* match = (*it); //MatchType type = _mt.getType(*(*it)); MatchType type = match->getType(); if (type == MatchType::Review) { set< pair<ElementId, ElementId> > s = (*it)->getMatchPairs(); eids.insert(s.begin(), s.end()); matchStrings.append((*it)->explain()); score = max<double>((*it)->getClassification().getReviewP(), score); reviewCount++; } } if (reviewCount > 0 && reviewCount != int(matches.size())) { throw HootException("Expected the whole set to be review matches."); } // only add the mark for review merger if there are elements to merge. if (eids.size() > 0) { mergers.push_back(new MarkForReviewMerger(eids, matchStrings.join(","), score)); result = true; } return result; }
void runFindSubgraphsTest() { // See this for a visual. // https://insightcloud.digitalglobe.com/redmine/attachments/download/1638/Hootenanny%20-%20Graph%20Based%20Conflation%20-%202013-06-21.pptx ElementId a1 = ElementId::way(1); ElementId a2 = ElementId::way(2); ElementId a3 = ElementId::way(3); // unused //ElementId a4 = ElementId::way(4); ElementId b1 = ElementId::way(5); ElementId b2 = ElementId::way(5); ElementId b3 = ElementId::way(6); vector<const Match*> matches; // force the pointers to be in order which forces the set to be consistent between runs. ConstrainedFakeMatch* fm = new ConstrainedFakeMatch[4]; MatchThresholdPtr mt(new MatchThreshold(0.5, 0.5)); matches.push_back(fm[0].init(a1, b1, 0.8, mt)->addConflict(&fm[1])); matches.push_back(fm[1].init(a2, b1, 1, mt)->addConflict(&fm[2])); matches.push_back(fm[2].init(a2, b2, 0.9, mt)); matches.push_back(fm[3].init(a3, b3, 0.9, mt)); ConstOsmMapPtr empty; GreedyConstrainedMatches uut(empty); uut.addMatches(matches.begin(), matches.end()); vector<const Match*> subsetVector = uut.calculateSubset(); MatchSet matchSet; matchSet.insert(subsetVector.begin(), subsetVector.end()); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.9, uut.getScore(), 0.001); CPPUNIT_ASSERT_EQUAL((size_t)2, matchSet.size()); CPPUNIT_ASSERT_EQUAL(true, matchSet.find(&fm[1]) != matchSet.end()); CPPUNIT_ASSERT_EQUAL(true, matchSet.find(&fm[3]) != matchSet.end()); }
bool HighwaySnapMergerCreator::createMergers(const MatchSet& matches, vector<Merger*>& mergers) const { bool result = false; assert(matches.size() > 0); set< pair<ElementId, ElementId> > eids; shared_ptr<SublineStringMatcher> sublineMatcher; // go through all the matches for (MatchSet::const_iterator it = matches.begin(); it != matches.end(); ++it) { const HighwayMatch* hm = dynamic_cast<const HighwayMatch*>(*it); // check to make sure all the input matches are building matches. if (hm == 0) { // return an empty result return false; } // add all the element to element pairs to a set else { // there should only be one HighwayMatch in a set. sublineMatcher = hm->getSublineMatcher(); set< pair<ElementId, ElementId> > s = hm->getMatchPairs(); eids.insert(s.begin(), s.end()); } } // only add the highway merge if there are elements to merge. if (eids.size() > 0) { mergers.push_back(new HighwaySnapMerger(_minSplitSize, eids, sublineMatcher)); result = true; } return result; }
static void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state, bool allowEodMatches) { auto acc_states = state.states.s; acc_states &= allowEodMatches ? state.accept_with_eod : state.accept; for (size_t i = acc_states.find_first(); i != acc_states.npos; i = acc_states.find_next(i)) { const NFAVertex u = state.vertices[i]; const size_t &som_offset = state.states.som[i]; // we can't accept anything from startDs in between UTF-8 codepoints if (state.utf8 && u == g.startDs && !isUtf8CodePoint(state.cur)) { continue; } for (const auto &e : out_edges_range(u, g)) { NFAVertex v = target(e, g); if (v == g.accept || (v == g.acceptEod && allowEodMatches)) { // check edge assertions if we are allowed to reach accept if (!canReach(g, e, state)) { continue; } DEBUG_PRINTF("match found at %zu\n", state.offset); assert(!g[u].reports.empty()); for (const auto &report_id : g[u].reports) { const Report &ri = state.rm.getReport(report_id); DEBUG_PRINTF("report %u has offset adjustment %d\n", report_id, ri.offsetAdjust); matches.emplace(som_offset, state.offset + ri.offsetAdjust); } } } } }
bool NetworkMergerCreator::createMergers(const MatchSet& matchesIn, vector<Merger*>& mergers) const { LOG_TRACE("Creating mergers with " << className() << "..."); LOG_TRACE("Creating mergers for match set: " << matchesIn); QString matchesList = ""; if (hoot::Log::Trace == hoot::Log::getInstance().getLevel()) { for (MatchSet::const_iterator it = matchesIn.begin(); it != matchesIn.end(); ++it) { const NetworkMatch* nmi = dynamic_cast<const NetworkMatch*>(*it); if (nmi) { matchesList += nmi->getEdgeMatch()->getUid() + " "; } } } LOG_TRACE(matchesList.size()); LOG_TRACE(matchesList); MatchSet matches = matchesIn; LOG_VART(matches); bool result = false; assert(matches.size() > 0); const NetworkMatch* m = dynamic_cast<const NetworkMatch*>(*matches.begin()); if (m) { const bool matchOverlap = _containsOverlap(matches); LOG_VART(matchOverlap); if (!matchOverlap) { // create a merger that can merge multiple partial matches QSet<ConstEdgeMatchPtr> edgeMatches; int count = 0; set<pair<ElementId, ElementId>> pairs; foreach (const Match* itm, matches) { const NetworkMatch* nm = dynamic_cast<const NetworkMatch*>(itm); edgeMatches.insert(nm->getEdgeMatch()); set<pair<ElementId, ElementId>> p = nm->getMatchPairs(); pairs.insert(p.begin(), p.end()); count++; if (count % 100 == 0) { PROGRESS_INFO( "Added match " << count << " / " << matches.size() << " to partial network merger..."); } } if (!ConfigOptions().getHighwayMergeTagsOnly()) { mergers.push_back(new PartialNetworkMerger(pairs, edgeMatches, m->getNetworkDetails())); } else { // TODO: We need to allow for HighwayTagOnlyMerger to spawn off PartialNetworkMerger here, // I guess...but that's kind of nasty... (applies to the rest of the calls to // HighwayTagOnlyMerger in this class as well). mergers.push_back(new HighwayTagOnlyMerger(pairs)); } } else { // If one match completely contains the rest, use the larger match. This may need to be // reverted as we play with more data, but at this point it seems like a reasonable heuristic. if (const NetworkMatch* larger = _getLargestContainer(matches))
bool ScriptMatch::_isOrderedConflicting(const ConstOsmMapPtr& map, ElementId sharedEid, ElementId other1, ElementId other2) const { Isolate* current = v8::Isolate::GetCurrent(); HandleScope handleScope(current); Context::Scope context_scope(_script->getContext(current)); set<ElementId> eids; eids.insert(sharedEid); eids.insert(other1); eids.insert(other2); OsmMapPtr copiedMap(new OsmMap(map->getProjection())); CopyMapSubsetOp(map, eids).apply(copiedMap); Handle<Object> copiedMapJs = OsmMapJs::create(copiedMap); // make sure unknown1 is always first ElementId eid11, eid12, eid21, eid22; if (map->getElement(sharedEid)->getStatus() == Status::Unknown1) { eid11 = sharedEid; eid21 = sharedEid; eid12 = other1; eid22 = other2; } else { eid11 = other1; eid21 = other2; eid12 = sharedEid; eid22 = sharedEid; } boost::shared_ptr<ScriptMatch> m1( new ScriptMatch(_script, _plugin, copiedMap, copiedMapJs, eid11, eid12, _threshold)); MatchSet ms; ms.insert(m1.get()); vector<Merger*> mergers; ScriptMergerCreator creator; creator.createMergers(ms, mergers); m1.reset(); bool conflicting = true; // if we got a merger, then check to see if it conflicts if (mergers.size() == 1) { // apply the merger to our map copy vector< pair<ElementId, ElementId> > replaced; mergers[0]->apply(copiedMap, replaced); // replace the element id in the second merger. for (size_t i = 0; i < replaced.size(); ++i) { if (replaced[i].first == eid21) { eid21 = replaced[i].second; } if (replaced[i].first == eid22) { eid22 = replaced[i].second; } } // if we can still find the second match after the merge was applied then it isn't a conflict if (copiedMap->containsElement(eid21) && copiedMap->containsElement(eid22)) { ScriptMatch m2(_script, _plugin, copiedMap, copiedMapJs, eid21, eid22, _threshold); if (m2.getType() == MatchType::Match) { conflicting = false; } } } return conflicting; }
bool ScriptMergerCreator::createMergers(const MatchSet& matches, vector<Merger*>& mergers) const { bool result = false; assert(matches.size() > 0); set< pair<ElementId, ElementId> > eids; shared_ptr<PluginContext> script; Persistent<Object> plugin; QStringList matchType; // go through all the matches for (MatchSet::const_iterator it = matches.begin(); it != matches.end(); ++it) { const ScriptMatch* sm = dynamic_cast<const ScriptMatch*>(*it); // check to make sure all the input matches are building matches. if (sm == 0) { // return an empty result return false; } // add all the element to element pairs to a set else { script = sm->getScript(); HandleScope handleScope; Context::Scope context_scope(script->getContext()); plugin = sm->getPlugin(); set< pair<ElementId, ElementId> > s = sm->getMatchPairs(); eids.insert(s.begin(), s.end()); if (matchType.contains(sm->getMatchName()) == false) { matchType.append(sm->getMatchName()); } } } ScriptMerger* sm = new ScriptMerger(script, plugin, eids); // only add the POI merge if there are elements to merge. if (sm->hasFunction("mergeSets")) { if (eids.size() >= 1) { mergers.push_back(sm); result = true; } else { delete sm; } } else { if (eids.size() == 1) { mergers.push_back(sm); result = true; } else if (eids.size() > 1) { delete sm; mergers.push_back(new MarkForReviewMerger(eids, "Overlapping matches", matchType.join(";"), 1.0)); result = true; } else { delete sm; } } return result; }