void UnifyingConflator::_reset() { if (_mergerFactory == 0) { _mergerFactory.reset(new MergerFactory()); // register the mark for review merger first so all reviews get tagged before another merger // gets a chance. _mergerFactory->registerCreator(new MarkForReviewMergerCreator()); _mergerFactory->registerDefaultCreators(); } _e2m.clear(); _deleteAll(_matches); _deleteAll(_mergers); }
void _deleteAll(node<V>* n) { if(n!=NULL) { node<V>* c=n; do { node<V>* d=c; c=c->next; _deleteAll(d->child); delete d; } while(c!=n); } }
void UnifyingConflator::apply(shared_ptr<OsmMap>& map) { Timer timer; _reset(); NamedOp(ConfigOptions().getUnifyPreOps().split(";", QString::SkipEmptyParts)).apply(map); _stats.append(SingleStat("Apply Pre Ops Time (sec)", timer.getElapsedAndRestart())); // will reproject if necessary. MapReprojector::reprojectToPlanar(map); _stats.append(SingleStat("Project to Planar Time (sec)", timer.getElapsedAndRestart())); if (Log::getInstance().isDebugEnabled()) { LOG_DEBUG("Writing debug map."); OsmMapPtr debug(new OsmMap(map)); MapReprojector::reprojectToWgs84(debug); OsmMapWriterFactory::write(debug, "tmp/debug.osm"); _stats.append(SingleStat("Write Debug Map Time (sec)", timer.getElapsedAndRestart())); } LOG_DEBUG("Creating matches..."); // find all the matches in this map if (_matchThreshold.get()) { //ScoreMatches logic seems to be the only one that needs to pass in the match threshold now when //the optimize param is activated. Otherwise, we get the match threshold information from the //config. _matchFactory.createMatches(map, _matches, _bounds, _matchThreshold); } else { _matchFactory.createMatches(map, _matches, _bounds); } LOG_DEBUG("Match count: " << _matches.size()); LOG_DEBUG(SystemInfo::getMemoryUsageString()); double findMatchesTime = timer.getElapsedAndRestart(); _stats.append(SingleStat("Find Matches Time (sec)", findMatchesTime)); _stats.append(SingleStat("Number of Matches Found", _matches.size())); _stats.append(SingleStat("Number of Matches Found per Second", (double)_matches.size() / findMatchesTime)); vector<const Match*> allMatches = _matches; // add review tags to all matches that have some review component _addReviewTags(map, allMatches); LOG_INFO("Pre-constraining match count: " << allMatches.size()); _stats.append(SingleStat("Number of Matches Before Whole Groups", _matches.size())); // If there are groups of matches that should not be optimized, remove them before optimization. MatchSetVector matchSets; _removeWholeGroups(_matches, matchSets, map); _stats.append(SingleStat("Number of Whole Groups", matchSets.size())); // Globally optimize the set of matches to maximize the conflation score. { OptimalConstrainedMatches cm(map); cm.addMatches(_matches.begin(), _matches.end()); cm.setTimeLimit(ConfigOptions(_settings).getUnifyOptimizerTimeLimit()); double cmStart = Time::getTime(); vector<const Match*> cmMatches = cm.calculateSubset(); LOG_INFO("CM took: " << Time::getTime() - cmStart << "s."); LOG_INFO("CM Score: " << cm.getScore()); LOG_DEBUG(SystemInfo::getMemoryUsageString()); GreedyConstrainedMatches gm(map); gm.addMatches(_matches.begin(), _matches.end()); double gmStart = Time::getTime(); vector<const Match*> gmMatches = gm.calculateSubset(); LOG_INFO("GM took: " << Time::getTime() - gmStart << "s."); LOG_INFO("GM Score: " << gm.getScore()); if (gm.getScore() > cm.getScore()) { _matches = gmMatches; } else { _matches = cmMatches; } } double optimizeMatchesTime = timer.getElapsedAndRestart(); _stats.append(SingleStat("Optimize Matches Time (sec)", optimizeMatchesTime)); _stats.append(SingleStat("Number of Optimized Matches", _matches.size())); _stats.append(SingleStat("Number of Matches Optimized per Second", (double)allMatches.size() / optimizeMatchesTime)); LOG_DEBUG(SystemInfo::getMemoryUsageString()); // #warning validateConflictSubset is on, this is slow. // _validateConflictSubset(map, _matches); LOG_INFO("Post constraining match count: " << _matches.size()); { // search the matches for groups (subgraphs) of matches. In other words, groups where all the // matches are interrelated by element id MatchGraph mg; mg.addMatches(_matches.begin(), _matches.end()); vector< set<const Match*, MatchPtrComparator> > tmpMatchSets = mg.findSubgraphs(map); matchSets.insert(matchSets.end(), tmpMatchSets.begin(), tmpMatchSets.end()); LOG_DEBUG(SystemInfo::getMemoryUsageString()); } LOG_DEBUG("Match sets count: " << matchSets.size()); LOG_DEBUG(SystemInfo::getMemoryUsageString()); /// @todo would it help to sort the matches so the biggest or best ones get merged first? // convert all the match sets into mergers. for (size_t i = 0; i < matchSets.size(); ++i) { _mergerFactory->createMergers(map, matchSets[i], _mergers); } LOG_DEBUG(SystemInfo::getMemoryUsageString()); // don't need the matches any more _deleteAll(allMatches); _matches.clear(); LOG_DEBUG(SystemInfo::getMemoryUsageString()); _mapElementIdsToMergers(); LOG_DEBUG(SystemInfo::getMemoryUsageString()); _stats.append(SingleStat("Create Mergers Time (sec)", timer.getElapsedAndRestart())); vector< pair<ElementId, ElementId> > replaced; for (size_t i = 0; i < _mergers.size(); ++i) { _mergers[i]->apply(map, replaced); // update any mergers that reference the replaced values _replaceElementIds(replaced); replaced.clear(); if (Log::getInstance().getLevel() == Log::Debug) { cout << "Applying mergers: " << i + 1 << " / " << _mergers.size() << " \r" << flush; } } if (Log::getInstance().getLevel() == Log::Debug) { cout << endl; } LOG_DEBUG(SystemInfo::getMemoryUsageString()); size_t mergerCount = _mergers.size(); // free up any used resources. _reset(); LOG_DEBUG(SystemInfo::getMemoryUsageString()); double mergersTime = timer.getElapsedAndRestart(); _stats.append(SingleStat("Apply Mergers Time (sec)", mergersTime)); _stats.append(SingleStat("Mergers Applied per Second", (double)mergerCount / mergersTime)); NamedOp(ConfigOptions().getUnifyPostOps().split(";", QString::SkipEmptyParts)).apply(map); _stats.append(SingleStat("Apply Post Ops Time (sec)", timer.getElapsedAndRestart())); }
virtual ~FibonacciHeap() { if(heap) { _deleteAll(heap); } }