void FindIntersectionsOp::apply(shared_ptr<OsmMap>& map)
{
  // remove all relations
  LOG_INFO(QString("%1 Relations found.").arg(map->getRelationMap().size()));
  shared_ptr<ElementTypeCriterion> rFilter(new ElementTypeCriterion(ElementType::Relation));
  VisitorOp(new RemoveElementsVisitor(rFilter)).apply(map);
  LOG_INFO(QString("%1 Relations found, after removal").arg(map->getRelationMap().size()));


  /// @todo move this to a config file.
  // pragmatically remove "bad" data in OSM afghanistan
  map->removeWays(TagFilter(Filter::FilterMatches, "source", "AIMS"));

  // reproject into a planar projection centered in the middle of bounding box.
  MapReprojector::reprojectToPlanar(map);

  DuplicateWayRemover::removeDuplicates(map);
  SuperfluousWayRemover::removeWays(map);
  // split ways up on intersections. This must come before DualWaySplitter. The DualWaySplitter
  // assumes that all intersections are on end nodes.
  IntersectionSplitter::splitIntersections(map);
  map = UnlikelyIntersectionRemover::removeIntersections(map);
  LOG_INFO("Assuming drives on right.");
//  map = DualWaySplitter::splitAll(map, DualWaySplitter::Right, 12.5);
//  map = ImpliedDividedMarker::markDivided(map);

//  LOG_INFO("removeDuplicates()");
//  DuplicateNameRemover::removeDuplicates(map);
//  LOG_INFO("SmallWayMerger::mergeWays()");
//  SmallWayMerger::mergeWays(map, 15.0);

//  LOG_INFO("RemoveEmptyAreasVisitor()");
//  VisitorOp(new RemoveEmptyAreasVisitor()).apply(map);
//  LOG_INFO("RemoveDuplicateAreaVisitor()");
//  VisitorOp(new RemoveDuplicateAreaVisitor()).apply(map);

  // find all intersections
//  LOG_INFO("FindIntersectionsVisitor()");
  shared_ptr<FindIntersectionsVisitor> v(new FindIntersectionsVisitor());
  VisitorOp(v).apply(map);
  LOG_INFO(QString("%1 Intersections found.").arg(v->getIntersections().size()));

  // remove all ways first
  shared_ptr<ElementTypeCriterion> wayFilter(new ElementTypeCriterion(ElementType::Way));
  VisitorOp(new RemoveElementsVisitor(wayFilter)).apply(map);

  // then remove everything except for the intersection that we found
  shared_ptr<IntersectionFilter> intersectionFilter(new IntersectionFilter(v->getIntersections()));
  VisitorOp(new RemoveElementsVisitor(intersectionFilter)).apply(map);


  // Apply any user specified operations.
  NamedOp(conf().getList(opsKey(), "")).apply(map);
}
void UnifyingConflator::apply(shared_ptr<OsmMap>& map)
{
  Timer timer;
  _reset();

  NamedOp(ConfigOptions().getUnifyPreOps().split(";", QString::SkipEmptyParts)).apply(map);

  _stats.append(SingleStat("Apply Pre Ops Time (sec)", timer.getElapsedAndRestart()));

  // will reproject if necessary.
  MapReprojector::reprojectToPlanar(map);

  _stats.append(SingleStat("Project to Planar Time (sec)", timer.getElapsedAndRestart()));

  if (Log::getInstance().isDebugEnabled())
  {
    LOG_DEBUG("Writing debug map.");
    OsmMapPtr debug(new OsmMap(map));
    MapReprojector::reprojectToWgs84(debug);
    OsmMapWriterFactory::write(debug, "tmp/debug.osm");

    _stats.append(SingleStat("Write Debug Map Time (sec)", timer.getElapsedAndRestart()));
  }

  LOG_DEBUG("Creating matches...");
  // find all the matches in this map
  if (_matchThreshold.get())
  {
    //ScoreMatches logic seems to be the only one that needs to pass in the match threshold now when
    //the optimize param is activated.  Otherwise, we get the match threshold information from the
    //config.
    _matchFactory.createMatches(map, _matches, _bounds, _matchThreshold);
  }
  else
  {
    _matchFactory.createMatches(map, _matches, _bounds);
  }
  LOG_DEBUG("Match count: " << _matches.size());
  LOG_DEBUG(SystemInfo::getMemoryUsageString());

  double findMatchesTime = timer.getElapsedAndRestart();
  _stats.append(SingleStat("Find Matches Time (sec)", findMatchesTime));
  _stats.append(SingleStat("Number of Matches Found", _matches.size()));
  _stats.append(SingleStat("Number of Matches Found per Second",
    (double)_matches.size() / findMatchesTime));

  vector<const Match*> allMatches = _matches;

  // add review tags to all matches that have some review component
  _addReviewTags(map, allMatches);
  LOG_INFO("Pre-constraining match count: " << allMatches.size());

  _stats.append(SingleStat("Number of Matches Before Whole Groups", _matches.size()));

  // If there are groups of matches that should not be optimized, remove them before optimization.
  MatchSetVector matchSets;
  _removeWholeGroups(_matches, matchSets, map);
  _stats.append(SingleStat("Number of Whole Groups", matchSets.size()));

  // Globally optimize the set of matches to maximize the conflation score.
  {
    OptimalConstrainedMatches cm(map);
    cm.addMatches(_matches.begin(), _matches.end());

    cm.setTimeLimit(ConfigOptions(_settings).getUnifyOptimizerTimeLimit());

    double cmStart = Time::getTime();
    vector<const Match*> cmMatches = cm.calculateSubset();
    LOG_INFO("CM took: " << Time::getTime() - cmStart << "s.");
    LOG_INFO("CM Score: " << cm.getScore());
    LOG_DEBUG(SystemInfo::getMemoryUsageString());

    GreedyConstrainedMatches gm(map);
    gm.addMatches(_matches.begin(), _matches.end());
    double gmStart = Time::getTime();
    vector<const Match*> gmMatches = gm.calculateSubset();
    LOG_INFO("GM took: " << Time::getTime() - gmStart << "s.");
    LOG_INFO("GM Score: " << gm.getScore());

    if (gm.getScore() > cm.getScore())
    {
      _matches = gmMatches;
    }
    else
    {
      _matches = cmMatches;
    }
  }

  double optimizeMatchesTime = timer.getElapsedAndRestart();
  _stats.append(SingleStat("Optimize Matches Time (sec)", optimizeMatchesTime));
  _stats.append(SingleStat("Number of Optimized Matches", _matches.size()));
  _stats.append(SingleStat("Number of Matches Optimized per Second",
    (double)allMatches.size() / optimizeMatchesTime));

  LOG_DEBUG(SystemInfo::getMemoryUsageString());

//  #warning validateConflictSubset is on, this is slow.
//  _validateConflictSubset(map, _matches);

  LOG_INFO("Post constraining match count: " << _matches.size());

  {
    // search the matches for groups (subgraphs) of matches. In other words, groups where all the
    // matches are interrelated by element id
    MatchGraph mg;
    mg.addMatches(_matches.begin(), _matches.end());
    vector< set<const Match*, MatchPtrComparator> > tmpMatchSets =
      mg.findSubgraphs(map);
    matchSets.insert(matchSets.end(), tmpMatchSets.begin(), tmpMatchSets.end());
    LOG_DEBUG(SystemInfo::getMemoryUsageString());
  }

  LOG_DEBUG("Match sets count: " << matchSets.size());
  LOG_DEBUG(SystemInfo::getMemoryUsageString());
  /// @todo would it help to sort the matches so the biggest or best ones get merged first?

  // convert all the match sets into mergers.
  for (size_t i = 0; i < matchSets.size(); ++i)
  {
    _mergerFactory->createMergers(map, matchSets[i], _mergers);
  }

  LOG_DEBUG(SystemInfo::getMemoryUsageString());
  // don't need the matches any more
  _deleteAll(allMatches);
  _matches.clear();

  LOG_DEBUG(SystemInfo::getMemoryUsageString());
  _mapElementIdsToMergers();
  LOG_DEBUG(SystemInfo::getMemoryUsageString());

  _stats.append(SingleStat("Create Mergers Time (sec)", timer.getElapsedAndRestart()));

  vector< pair<ElementId, ElementId> > replaced;
  for (size_t i = 0; i < _mergers.size(); ++i)
  {
    _mergers[i]->apply(map, replaced);

    // update any mergers that reference the replaced values
    _replaceElementIds(replaced);
    replaced.clear();
    if (Log::getInstance().getLevel() == Log::Debug)
    {
      cout << "Applying mergers: " << i + 1 << " / " << _mergers.size() << "       \r" << flush;
    }
  }
  if (Log::getInstance().getLevel() == Log::Debug)
  {
    cout << endl;
  }
  LOG_DEBUG(SystemInfo::getMemoryUsageString());
  size_t mergerCount = _mergers.size();
  // free up any used resources.
  _reset();
  LOG_DEBUG(SystemInfo::getMemoryUsageString());

  double mergersTime = timer.getElapsedAndRestart();
  _stats.append(SingleStat("Apply Mergers Time (sec)", mergersTime));
  _stats.append(SingleStat("Mergers Applied per Second", (double)mergerCount / mergersTime));

  NamedOp(ConfigOptions().getUnifyPostOps().split(";", QString::SkipEmptyParts)).apply(map);

  _stats.append(SingleStat("Apply Post Ops Time (sec)", timer.getElapsedAndRestart()));
}
Esempio n. 3
0
void CumulativeConflator::conflate(const QStringList inputs, const QString output)
{
  assert(inputs.size() >= 3);

  //for NoInformationElementRemover
  if (ConfigOptions().getWriterCleanReviewTags())
  {
    throw HootException(
      "Multi-conflation must be run with " +
      ConfigOptions::getWriterCleanReviewTagsKey() + "=false");
  }

  //for TagMergerFactory
  if (ConfigOptions().getTagMergerDefault() != "hoot::ProvenanceAwareOverwriteTagMerger")
  {
    throw HootException(
      "Multi-conflation must be run with " + ConfigOptions::getTagMergerDefaultKey() +
      "=hoot::ProvenanceAwareOverwriteTagMerger");
  }

  OsmMapPtr cumulativeMap(new OsmMap());
  LOG_VARD(inputs.size());
  for (int i = 0; i < inputs.size(); i++)
  {
    OsmMapPtr reviewCache;
    if (i == 0)
    {
      OsmMapReaderFactory::read(
        cumulativeMap, inputs[i], ConfigOptions().getReaderConflateUseDataSourceIds1(),
        Status::Unknown1);

      //keep a source tag history on the data for provenance; append to any existing source values
      //(this shouldn't be added to any review relations)
      LOG_DEBUG("Setting source tags for map " << QString::number(i + 1) << "...");
      SetTagValueVisitor sourceTagVisitor(MetadataTags::HootSource(), QString::number(i + 1));
      cumulativeMap->visitRw(sourceTagVisitor);
    }
    else
    {
      if (i == 1)
      {
        LOG_INFO("Conflating " << inputs[i - 1] << " with " << inputs[i] << "...");
      }
      else
      {
        LOG_INFO("Conflating cumulative map with " << inputs[i] << "...");
      }

      //I'm not yet sure all the projecting going on here is the right way to go about this, but
      //the maps must be in the same projection for the appending to work.

      OsmMapPtr unknown2Map(new OsmMap());
      OsmMapReaderFactory::read(
        unknown2Map, inputs[i], ConfigOptions().getReaderConflateUseDataSourceIds2(),
        Status::Unknown2);
      MapProjector::projectToWgs84(unknown2Map);

      //Same as above, but do this before combining the cumulative map with the unknown2 map to
      //prevent incorrect tags from being added to the cumulative map.
      LOG_DEBUG("Setting source tags for map " << QString::number(i + 1) << "...");
      SetTagValueVisitor sourceTagVisitor(
        MetadataTags::HootSource(), QString::number(i + 1)/*, true*/);
      unknown2Map->visitRw(sourceTagVisitor);

      //now combine the two maps before conflation
      MapProjector::projectToWgs84(cumulativeMap);
      MapProjector::projectToWgs84(unknown2Map);
      cumulativeMap->append(unknown2Map);

      //load in cached reviews from previous conflations - I believe this is necessary, b/c the
      //UnifyingConflator is ignoring any incoming reviews by design (need to verify this).  It
      //could be argued that modifying it to optionally retain the reviews is a better design
      //than the caching going on here...
      if (reviewCache.get() && reviewCache->getElementCount() > 0)
      {
        LOG_DEBUG("Adding previous reviews...");
        const RelationMap& reviews = reviewCache->getRelations();
        for (RelationMap::const_iterator it = reviews.begin(); it != reviews.end(); ++it)
        {
          RelationPtr review = it->second;
          review->setId(cumulativeMap->createNextRelationId());
          cumulativeMap->addRelation(review);
        }
        LOG_DEBUG("Added " << reviews.size() << " cached reviews.");
      }

      NamedOp(ConfigOptions().getConflatePreOps()).apply(cumulativeMap);
      UnifyingConflator().apply(cumulativeMap);
      //going to apply this at the end of all conflation jobs, but maybe we'll find out later that
      //it needs to be done here instead (?)
      //NamedOp(ConfigOptions().getConflatePostOps()).apply(cumulativeMap);

      if (i < inputs.size() - 1)
      {
        //Up until just before the last conflate job, set the status tag back to 1 so that the
        //accumulated data will conflate with the next dataset.
        //there is a bug here that will affect river conflation in that somehow hoot:status=3
        //tags are being left in at some point which causes the SearchRadiusCalculator to skip the
        //features.
        LOG_DEBUG("Setting status tags for map " << QString::number(i + 1) << "...");
        SetTagValueVisitor statusTagVisitor(
          MetadataTags::HootStatus(), QString("%1").arg(Status(Status::Unknown1).getEnum()));
        cumulativeMap->visitRw(statusTagVisitor);
      }

      //copy the map and save the reviews
      LOG_DEBUG("Caching reviews...");
      reviewCache.reset(new OsmMap(cumulativeMap->getProjection()));
      KeepReviewsVisitor vis;
      reviewCache->visitRw(vis);
      LOG_DEBUG("Cached " << reviewCache->getElementCount() << " reviews.");
    }
  }

  NamedOp(ConfigOptions().getConflatePostOps()).apply(cumulativeMap);

  MapProjector::projectToWgs84(cumulativeMap);
  OsmMapWriterFactory::write(cumulativeMap, output);
}