void FindIntersectionsOp::apply(shared_ptr<OsmMap>& map) { // remove all relations LOG_INFO(QString("%1 Relations found.").arg(map->getRelationMap().size())); shared_ptr<ElementTypeCriterion> rFilter(new ElementTypeCriterion(ElementType::Relation)); VisitorOp(new RemoveElementsVisitor(rFilter)).apply(map); LOG_INFO(QString("%1 Relations found, after removal").arg(map->getRelationMap().size())); /// @todo move this to a config file. // pragmatically remove "bad" data in OSM afghanistan map->removeWays(TagFilter(Filter::FilterMatches, "source", "AIMS")); // reproject into a planar projection centered in the middle of bounding box. MapReprojector::reprojectToPlanar(map); DuplicateWayRemover::removeDuplicates(map); SuperfluousWayRemover::removeWays(map); // split ways up on intersections. This must come before DualWaySplitter. The DualWaySplitter // assumes that all intersections are on end nodes. IntersectionSplitter::splitIntersections(map); map = UnlikelyIntersectionRemover::removeIntersections(map); LOG_INFO("Assuming drives on right."); // map = DualWaySplitter::splitAll(map, DualWaySplitter::Right, 12.5); // map = ImpliedDividedMarker::markDivided(map); // LOG_INFO("removeDuplicates()"); // DuplicateNameRemover::removeDuplicates(map); // LOG_INFO("SmallWayMerger::mergeWays()"); // SmallWayMerger::mergeWays(map, 15.0); // LOG_INFO("RemoveEmptyAreasVisitor()"); // VisitorOp(new RemoveEmptyAreasVisitor()).apply(map); // LOG_INFO("RemoveDuplicateAreaVisitor()"); // VisitorOp(new RemoveDuplicateAreaVisitor()).apply(map); // find all intersections // LOG_INFO("FindIntersectionsVisitor()"); shared_ptr<FindIntersectionsVisitor> v(new FindIntersectionsVisitor()); VisitorOp(v).apply(map); LOG_INFO(QString("%1 Intersections found.").arg(v->getIntersections().size())); // remove all ways first shared_ptr<ElementTypeCriterion> wayFilter(new ElementTypeCriterion(ElementType::Way)); VisitorOp(new RemoveElementsVisitor(wayFilter)).apply(map); // then remove everything except for the intersection that we found shared_ptr<IntersectionFilter> intersectionFilter(new IntersectionFilter(v->getIntersections())); VisitorOp(new RemoveElementsVisitor(intersectionFilter)).apply(map); // Apply any user specified operations. NamedOp(conf().getList(opsKey(), "")).apply(map); }
void UnifyingConflator::apply(shared_ptr<OsmMap>& map) { Timer timer; _reset(); NamedOp(ConfigOptions().getUnifyPreOps().split(";", QString::SkipEmptyParts)).apply(map); _stats.append(SingleStat("Apply Pre Ops Time (sec)", timer.getElapsedAndRestart())); // will reproject if necessary. MapReprojector::reprojectToPlanar(map); _stats.append(SingleStat("Project to Planar Time (sec)", timer.getElapsedAndRestart())); if (Log::getInstance().isDebugEnabled()) { LOG_DEBUG("Writing debug map."); OsmMapPtr debug(new OsmMap(map)); MapReprojector::reprojectToWgs84(debug); OsmMapWriterFactory::write(debug, "tmp/debug.osm"); _stats.append(SingleStat("Write Debug Map Time (sec)", timer.getElapsedAndRestart())); } LOG_DEBUG("Creating matches..."); // find all the matches in this map if (_matchThreshold.get()) { //ScoreMatches logic seems to be the only one that needs to pass in the match threshold now when //the optimize param is activated. Otherwise, we get the match threshold information from the //config. _matchFactory.createMatches(map, _matches, _bounds, _matchThreshold); } else { _matchFactory.createMatches(map, _matches, _bounds); } LOG_DEBUG("Match count: " << _matches.size()); LOG_DEBUG(SystemInfo::getMemoryUsageString()); double findMatchesTime = timer.getElapsedAndRestart(); _stats.append(SingleStat("Find Matches Time (sec)", findMatchesTime)); _stats.append(SingleStat("Number of Matches Found", _matches.size())); _stats.append(SingleStat("Number of Matches Found per Second", (double)_matches.size() / findMatchesTime)); vector<const Match*> allMatches = _matches; // add review tags to all matches that have some review component _addReviewTags(map, allMatches); LOG_INFO("Pre-constraining match count: " << allMatches.size()); _stats.append(SingleStat("Number of Matches Before Whole Groups", _matches.size())); // If there are groups of matches that should not be optimized, remove them before optimization. MatchSetVector matchSets; _removeWholeGroups(_matches, matchSets, map); _stats.append(SingleStat("Number of Whole Groups", matchSets.size())); // Globally optimize the set of matches to maximize the conflation score. { OptimalConstrainedMatches cm(map); cm.addMatches(_matches.begin(), _matches.end()); cm.setTimeLimit(ConfigOptions(_settings).getUnifyOptimizerTimeLimit()); double cmStart = Time::getTime(); vector<const Match*> cmMatches = cm.calculateSubset(); LOG_INFO("CM took: " << Time::getTime() - cmStart << "s."); LOG_INFO("CM Score: " << cm.getScore()); LOG_DEBUG(SystemInfo::getMemoryUsageString()); GreedyConstrainedMatches gm(map); gm.addMatches(_matches.begin(), _matches.end()); double gmStart = Time::getTime(); vector<const Match*> gmMatches = gm.calculateSubset(); LOG_INFO("GM took: " << Time::getTime() - gmStart << "s."); LOG_INFO("GM Score: " << gm.getScore()); if (gm.getScore() > cm.getScore()) { _matches = gmMatches; } else { _matches = cmMatches; } } double optimizeMatchesTime = timer.getElapsedAndRestart(); _stats.append(SingleStat("Optimize Matches Time (sec)", optimizeMatchesTime)); _stats.append(SingleStat("Number of Optimized Matches", _matches.size())); _stats.append(SingleStat("Number of Matches Optimized per Second", (double)allMatches.size() / optimizeMatchesTime)); LOG_DEBUG(SystemInfo::getMemoryUsageString()); // #warning validateConflictSubset is on, this is slow. // _validateConflictSubset(map, _matches); LOG_INFO("Post constraining match count: " << _matches.size()); { // search the matches for groups (subgraphs) of matches. In other words, groups where all the // matches are interrelated by element id MatchGraph mg; mg.addMatches(_matches.begin(), _matches.end()); vector< set<const Match*, MatchPtrComparator> > tmpMatchSets = mg.findSubgraphs(map); matchSets.insert(matchSets.end(), tmpMatchSets.begin(), tmpMatchSets.end()); LOG_DEBUG(SystemInfo::getMemoryUsageString()); } LOG_DEBUG("Match sets count: " << matchSets.size()); LOG_DEBUG(SystemInfo::getMemoryUsageString()); /// @todo would it help to sort the matches so the biggest or best ones get merged first? // convert all the match sets into mergers. for (size_t i = 0; i < matchSets.size(); ++i) { _mergerFactory->createMergers(map, matchSets[i], _mergers); } LOG_DEBUG(SystemInfo::getMemoryUsageString()); // don't need the matches any more _deleteAll(allMatches); _matches.clear(); LOG_DEBUG(SystemInfo::getMemoryUsageString()); _mapElementIdsToMergers(); LOG_DEBUG(SystemInfo::getMemoryUsageString()); _stats.append(SingleStat("Create Mergers Time (sec)", timer.getElapsedAndRestart())); vector< pair<ElementId, ElementId> > replaced; for (size_t i = 0; i < _mergers.size(); ++i) { _mergers[i]->apply(map, replaced); // update any mergers that reference the replaced values _replaceElementIds(replaced); replaced.clear(); if (Log::getInstance().getLevel() == Log::Debug) { cout << "Applying mergers: " << i + 1 << " / " << _mergers.size() << " \r" << flush; } } if (Log::getInstance().getLevel() == Log::Debug) { cout << endl; } LOG_DEBUG(SystemInfo::getMemoryUsageString()); size_t mergerCount = _mergers.size(); // free up any used resources. _reset(); LOG_DEBUG(SystemInfo::getMemoryUsageString()); double mergersTime = timer.getElapsedAndRestart(); _stats.append(SingleStat("Apply Mergers Time (sec)", mergersTime)); _stats.append(SingleStat("Mergers Applied per Second", (double)mergerCount / mergersTime)); NamedOp(ConfigOptions().getUnifyPostOps().split(";", QString::SkipEmptyParts)).apply(map); _stats.append(SingleStat("Apply Post Ops Time (sec)", timer.getElapsedAndRestart())); }
void CumulativeConflator::conflate(const QStringList inputs, const QString output) { assert(inputs.size() >= 3); //for NoInformationElementRemover if (ConfigOptions().getWriterCleanReviewTags()) { throw HootException( "Multi-conflation must be run with " + ConfigOptions::getWriterCleanReviewTagsKey() + "=false"); } //for TagMergerFactory if (ConfigOptions().getTagMergerDefault() != "hoot::ProvenanceAwareOverwriteTagMerger") { throw HootException( "Multi-conflation must be run with " + ConfigOptions::getTagMergerDefaultKey() + "=hoot::ProvenanceAwareOverwriteTagMerger"); } OsmMapPtr cumulativeMap(new OsmMap()); LOG_VARD(inputs.size()); for (int i = 0; i < inputs.size(); i++) { OsmMapPtr reviewCache; if (i == 0) { OsmMapReaderFactory::read( cumulativeMap, inputs[i], ConfigOptions().getReaderConflateUseDataSourceIds1(), Status::Unknown1); //keep a source tag history on the data for provenance; append to any existing source values //(this shouldn't be added to any review relations) LOG_DEBUG("Setting source tags for map " << QString::number(i + 1) << "..."); SetTagValueVisitor sourceTagVisitor(MetadataTags::HootSource(), QString::number(i + 1)); cumulativeMap->visitRw(sourceTagVisitor); } else { if (i == 1) { LOG_INFO("Conflating " << inputs[i - 1] << " with " << inputs[i] << "..."); } else { LOG_INFO("Conflating cumulative map with " << inputs[i] << "..."); } //I'm not yet sure all the projecting going on here is the right way to go about this, but //the maps must be in the same projection for the appending to work. OsmMapPtr unknown2Map(new OsmMap()); OsmMapReaderFactory::read( unknown2Map, inputs[i], ConfigOptions().getReaderConflateUseDataSourceIds2(), Status::Unknown2); MapProjector::projectToWgs84(unknown2Map); //Same as above, but do this before combining the cumulative map with the unknown2 map to //prevent incorrect tags from being added to the cumulative map. LOG_DEBUG("Setting source tags for map " << QString::number(i + 1) << "..."); SetTagValueVisitor sourceTagVisitor( MetadataTags::HootSource(), QString::number(i + 1)/*, true*/); unknown2Map->visitRw(sourceTagVisitor); //now combine the two maps before conflation MapProjector::projectToWgs84(cumulativeMap); MapProjector::projectToWgs84(unknown2Map); cumulativeMap->append(unknown2Map); //load in cached reviews from previous conflations - I believe this is necessary, b/c the //UnifyingConflator is ignoring any incoming reviews by design (need to verify this). It //could be argued that modifying it to optionally retain the reviews is a better design //than the caching going on here... if (reviewCache.get() && reviewCache->getElementCount() > 0) { LOG_DEBUG("Adding previous reviews..."); const RelationMap& reviews = reviewCache->getRelations(); for (RelationMap::const_iterator it = reviews.begin(); it != reviews.end(); ++it) { RelationPtr review = it->second; review->setId(cumulativeMap->createNextRelationId()); cumulativeMap->addRelation(review); } LOG_DEBUG("Added " << reviews.size() << " cached reviews."); } NamedOp(ConfigOptions().getConflatePreOps()).apply(cumulativeMap); UnifyingConflator().apply(cumulativeMap); //going to apply this at the end of all conflation jobs, but maybe we'll find out later that //it needs to be done here instead (?) //NamedOp(ConfigOptions().getConflatePostOps()).apply(cumulativeMap); if (i < inputs.size() - 1) { //Up until just before the last conflate job, set the status tag back to 1 so that the //accumulated data will conflate with the next dataset. //there is a bug here that will affect river conflation in that somehow hoot:status=3 //tags are being left in at some point which causes the SearchRadiusCalculator to skip the //features. LOG_DEBUG("Setting status tags for map " << QString::number(i + 1) << "..."); SetTagValueVisitor statusTagVisitor( MetadataTags::HootStatus(), QString("%1").arg(Status(Status::Unknown1).getEnum())); cumulativeMap->visitRw(statusTagVisitor); } //copy the map and save the reviews LOG_DEBUG("Caching reviews..."); reviewCache.reset(new OsmMap(cumulativeMap->getProjection())); KeepReviewsVisitor vis; reviewCache->visitRw(vis); LOG_DEBUG("Cached " << reviewCache->getElementCount() << " reviews."); } } NamedOp(ConfigOptions().getConflatePostOps()).apply(cumulativeMap); MapProjector::projectToWgs84(cumulativeMap); OsmMapWriterFactory::write(cumulativeMap, output); }