shared_ptr<OsmMap> PertyMatchScorer::_loadReferenceMap(const QString referenceMapInputPath, const QString referenceMapOutputPath) { LOG_DEBUG("Loading the reference data with status Unknown1 and adding REF1 tags to it; Saving " << "a copy to " << referenceMapOutputPath << "..."); shared_ptr<OsmMap> referenceMap(new OsmMap()); OsmUtils::loadMap(referenceMap, referenceMapInputPath, false, Status::Unknown1); //TODO: should this be removed? MapCleaner().apply(referenceMap); shared_ptr<AddRef1Visitor> addRef1Visitor(new AddRef1Visitor()); referenceMap->visitRw(*addRef1Visitor); //TODO: this could eventually be replaced with a SetTagVisitor passed in from the command line //instead shared_ptr<SetTagVisitor> setAccuracyVisitor( new SetTagVisitor("error:circular", QString::number(_searchDistance))); referenceMap->visitRw(*setAccuracyVisitor); LOG_VARD(referenceMap->getNodeMap().size()); LOG_VARD(referenceMap->getWays().size()); if (Log::getInstance().getLevel() <= Log::Debug) { TagCountVisitor tagCountVisitor; referenceMap->visitRo(tagCountVisitor); const long numTotalTags = (long)tagCountVisitor.getStat(); LOG_VARD(numTotalTags); } shared_ptr<OsmMap> referenceMapCopy(referenceMap); MapProjector::projectToWgs84(referenceMapCopy); OsmUtils::saveMap(referenceMapCopy, referenceMapOutputPath); return referenceMap; }
bool OsmApiDbChangesetWriter::conflictExistsInTarget(const QString boundsStr, const QString timeStr) { LOG_INFO("Checking for OSM API DB conflicts for changesets within " << boundsStr << " and " << "created after " << timeStr << "..."); const Envelope bounds = GeometryUtils::envelopeFromConfigString(boundsStr); LOG_VARD(bounds.toString()); const QDateTime time = QDateTime::fromString(timeStr, OsmApiDb::TIME_FORMAT); LOG_VARD(time); if (!time.isValid()) { throw HootException( "Invalid timestamp: " + time.toString() + ". Should be of the form " + OsmApiDb::TIME_FORMAT); } shared_ptr<QSqlQuery> changesetItr = _db.getChangesetsCreatedAfterTime(timeStr); while (changesetItr->next()) { shared_ptr<Envelope> changesetBounds( new Envelope(changesetItr->value(0).toDouble(), changesetItr->value(1).toDouble(), changesetItr->value(2).toDouble(), changesetItr->value(3).toDouble())); LOG_VARD(changesetBounds->toString()); if (changesetBounds->intersects(bounds)) { return true; } } return false; }
WayLocation PertyWaySplitVisitor::_calcSplitPoint(shared_ptr<const Way> way) const { //create a way location that is the minimum node spacing distance from the beginning of the way WayLocation splitWayStart(_map->shared_from_this(), way, _minNodeSpacing); //create a way location that is the minimum node spacing from the end of the way WayLocation splitWayEnd = WayLocation::createAtEndOfWay(_map->shared_from_this(), way). move(-1 * _minNodeSpacing); //if the length between the way locations is greater than zero, then then a way location can be //selected that doesn't violate the min node spacing const double splitWayLength = splitWayEnd.calculateDistanceOnWay() - splitWayStart.calculateDistanceOnWay(); LOG_VARD(splitWayLength); if (splitWayLength > 0) { boost::uniform_real<> randomSplitPointDistribution(0.0, splitWayLength); const double splitPoint = randomSplitPointDistribution(*_rng); LOG_VARD(splitPoint); return splitWayStart.move(splitPoint); } //otherwise, return an empty location else { return WayLocation(); } }
void OsmApiDbChangesetWriter::_execNoPrepare(const QString sql) { QSqlQuery q(_db.getDB()); LOG_VARD(sql); if (q.exec(sql) == false) { throw HootException( QString("Error executing query: %1 (%2)").arg(q.lastError().text()).arg(sql)); } LOG_VARD(q.numRowsAffected()); }
void TagRenameKeyVisitor::visit(const shared_ptr<Element>& e) { LOG_VARD(_oldKey); LOG_VARD(_newKey); if (e->getTags().contains(_oldKey)) { Tags& tags = e->getTags(); const QString value = tags.get(_oldKey); tags.remove(_oldKey); tags.insert(_newKey, value); e->setTags(tags); } }
void OsmUtils::printNodes(const QString nodeCollectionName, const QList<shared_ptr<const Node> >& nodes) { if (Log::getInstance().getLevel() == Log::Debug) { LOG_DEBUG(nodeCollectionName); LOG_VARD(nodes.size()); for (QList<shared_ptr<const Node> >::const_iterator it = nodes.begin(); it != nodes.end(); it++) { shared_ptr<const Node> node = *it; LOG_VARD(node->toString()); } } }
void ImplicitTagRulesDatabaseDeriver::_writeCustomRules(long& linesWrittenCount) { // would like to know somehow if any of the custom rules overlap with the db derived // rules from the public data - #2300 LOG_DEBUG("Writing custom rules..."); long ruleCount = 0; LOG_VARD(_customRules.getCustomRulesList().size()); if (_customRules.getCustomRulesList().size() > 0) { const QMap<QString, QString> customRulesList = _customRules.getCustomRulesList(); for (QMap<QString, QString>::const_iterator customRulesItr = customRulesList.begin(); customRulesItr != customRulesList.end(); ++customRulesItr) { const QString line = QString::number(INT_MAX) % "\t" % customRulesItr.key().trimmed() % "\t" % customRulesItr.value().trimmed() % "\n"; LOG_VART(line); _filteredCountFile->write(line.toUtf8()); ruleCount++; linesWrittenCount++; } LOG_INFO("Wrote " << ruleCount << " custom rules."); } }
QSqlQuery ServicesDb::_exec(QString sql, QVariant v1, QVariant v2, QVariant v3) const { QSqlQuery q(_db); LOG_VARD(sql); if (q.prepare(sql) == false) { throw HootException(QString("Error preparing query: %1 (%2)").arg(q.lastError().text()). arg(sql)); } if (v1.isValid()) { q.bindValue(0, v1); } if (v2.isValid()) { q.bindValue(1, v2); } if (v3.isValid()) { q.bindValue(2, v3); } if (q.exec() == false) { throw HootException(QString("Error executing query: %1 (%2)").arg(q.lastError().text()). arg(sql)); } return q; }
shared_ptr<Node> PertyWaySplitVisitor::_getNodeAddedBySplit(const QList<long>& nodeIdsBeforeSplit, const vector<ElementPtr>& newElementsAfterSplit) const { //newElementsAfterSplit is assumed to only contain ways; find the new node created by the way //split; it will be the last node in the first way, which is the same as the first node in the //last way shared_ptr<const Way> firstWay = dynamic_pointer_cast<Way>(newElementsAfterSplit.at(0)); const long lastNodeIdInFirstWay = firstWay->getNodeIds().at(firstWay->getNodeCount() - 1); LOG_VARD(lastNodeIdInFirstWay); shared_ptr<const Way> lastWay = dynamic_pointer_cast<Way>(newElementsAfterSplit.at(1)); const long firstNodeIdInLastWay = lastWay->getNodeIds().at(0); LOG_VARD(firstNodeIdInLastWay); assert(lastNodeIdInFirstWay == firstNodeIdInLastWay); assert(!nodeIdsBeforeSplit.contains(lastNodeIdInFirstWay)); return _map->getNode(firstNodeIdInLastWay); }
void PertyMatchScorer::_saveMap(OsmMapPtr map, QString path) { BuildingOutlineUpdateOp().apply(map); LOG_VARD(map->getNodeMap().size()); LOG_VARD(map->getWays().size()); if (Log::getInstance().getLevel() <= Log::Debug) { TagCountVisitor tagCountVisitor; map->visitRo(tagCountVisitor); const long numTotalTags = (long)tagCountVisitor.getStat(); LOG_VARD(numTotalTags); } MapProjector::projectToWgs84(map); OsmUtils::saveMap(map, path); }
void ImplicitTagRulesDatabaseDeriver::_populateSchemaTagValues() { _schemaTagValues.clear(); _wordsNotInSchema.clear(); // TODO: should the use and/or building categories be added here? const std::vector<SchemaVertex> tags = OsmSchema::getInstance().getTagByCategory(OsmSchemaCategory::poi()); StringTokenizer tokenizer; for (std::vector<SchemaVertex>::const_iterator tagItr = tags.begin(); tagItr != tags.end(); ++tagItr) { SchemaVertex tag = *tagItr; const QString tagVal = tag.value.toLower().replace("_", " "); if (!tagVal.contains("*")) //skip wildcards { if (!_customRules.getWordIgnoreList().contains(tagVal, Qt::CaseInsensitive)) { _schemaTagValues.insert(tagVal); //dealing with the uk english spellings on an as seen basis; this should be expanded and //made more extensible if (tagVal == "theatre") { _schemaTagValues.insert("theater"); } if (tagVal == "centre") { _schemaTagValues.insert("center"); } LOG_TRACE("Appended " << tagVal << " to schema tag values."); } QStringList vals = tokenizer.tokenize(tagVal); for (int i = 0; i < vals.size(); i++) { const QString val = vals.at(i); if (!_customRules.getWordIgnoreList().contains(val, Qt::CaseInsensitive)) { _schemaTagValues.insert(val); //see comment above if (val == "theatre") { _schemaTagValues.insert("theater"); } if (val == "centre") { _schemaTagValues.insert("center"); } LOG_TRACE("Appended " << val << " to schema tag values."); } } } } LOG_VARD(_schemaTagValues.size()); QStringList schemaTagValuesList = _schemaTagValues.toList(); qSort(schemaTagValuesList.begin(), schemaTagValuesList.end()); //sort for viewing only LOG_VART(schemaTagValuesList); }
QString OsmMapReaderFactory::getReaderName(const QString url) { LOG_VARD(url); vector<std::string> names = Factory::getInstance().getObjectNamesByBase(OsmMapReader::className()); LOG_VARD(names.size()); boost::shared_ptr<OsmMapReader> writer; for (size_t i = 0; i < names.size(); i++) { const std::string name = names[i]; LOG_VART(name); writer.reset(Factory::getInstance().constructObject<OsmMapReader>(name)); if (writer->isSupported(url)) { return QString::fromStdString(name); } } return ""; }
void runWaySplitTest() { //Log::WarningLevel levelBefore = Log::getInstance().getLevel(); //Log::getInstance().setLevel(Log::Debug); OsmMap::resetCounters(); OsmReader reader; shared_ptr<OsmMap> map(new OsmMap()); reader.setDefaultStatus(Status::Unknown1); reader.setUseDataSourceIds(true); reader.read("test-files/perty/PertyWaySplitVisitorTest/PertyWaySplitVisitorTest-in-1.osm", map); const int numNodesBeforeSplitting = map->getNodeMap().size(); LOG_VARD(numNodesBeforeSplitting); const int numWaysBeforeSplitting = map->getWays().size(); LOG_VARD(numWaysBeforeSplitting) MapProjector::projectToPlanar(map); PertyWaySplitVisitor waySplitVisitor; boost::minstd_rand rng; rng.seed(1); waySplitVisitor.setRng(rng); waySplitVisitor.setWaySplitProbability(0.5); waySplitVisitor.setMinNodeSpacing(1.0); map->visitRw(waySplitVisitor); MapProjector::projectToWgs84(map); const int numNewNodesCreatedBySpliting = map->getNodeMap().size() - numNodesBeforeSplitting; LOG_VARD(numNewNodesCreatedBySpliting); const int numNewWaysCreatedBySpliting = map->getWays().size() - numWaysBeforeSplitting; LOG_VARD(numNewWaysCreatedBySpliting); const QString outDir = "test-output/perty/PertyWaySplitVisitorTest/"; QDir().mkpath(outDir); OsmWriter writer; writer.setIncludeHootInfo(true); const QString outFile = outDir + "/PertyWaySplitVisitorTest-out-1.osm"; writer.write(map, outFile); HOOT_FILE_EQUALS( "test-files/perty/PertyWaySplitVisitorTest/PertyWaySplitVisitorTest-out-1.osm", outFile); //Log::getInstance().setLevel(levelBefore); }
void ImplicitTagRawRulesDeriver::_validateInputs(const QStringList inputs, const QStringList translationScripts, const QString output) { LOG_VARD(inputs); LOG_VARD(translationScripts); LOG_VARD(output); if (!_elementCriterion.get()) { throw HootException("No element type was specified."); } if (inputs.isEmpty()) { throw HootException("No inputs were specified."); } if (inputs.size() != translationScripts.size()) { LOG_VARD(inputs.size()); LOG_VARD(translationScripts.size()); throw HootException( "The size of the input datasets list must equal the size of the list of translation scripts."); } if (output.isEmpty()) { throw HootException("No output was specified."); } _output.reset(new QFile()); _output->setFileName(output); if (_output->exists() && !_output->remove()) { throw HootException(QObject::tr("Error removing existing %1 for writing.").arg(output)); } _output->close(); if (_translateNamesToEnglish && !_translator.get()) { throw HootException("To English translation enabled but no translator was specified."); } }
MultiLineStringLocation PertyWaySplitVisitor::_calcSplitPoint(shared_ptr<const Relation> relation, ElementId& wayId) const { const vector<RelationData::Entry>& members = relation->getMembers(); LOG_VARD(members.size()); //find the way to split on boost::uniform_int<> randomWayIndexDistribution(0, members.size() - 1); int wayIndex = randomWayIndexDistribution(*_rng); wayId = members.at(wayIndex).getElementId(); LOG_VARD(wayIndex); LOG_VARD(wayId); ElementPtr element = _map->getElement(wayId); if (element->getElementType() != ElementType::Way) { throw HootException( "PERTY feature splitting for multi-line string relations may only occur on relations which contain only ways."); } WayPtr way = dynamic_pointer_cast<Way>(element); LOG_VARD(way->getNodeCount()); //calculate the split point WayLocation wayLocation = _calcSplitPoint(way); //return it, if its valid if (wayLocation.isValid()) { return MultiLineStringLocation( _map->shared_from_this(), relation, wayIndex, wayLocation); } //otherwise, return an empty location else { return MultiLineStringLocation(); } }
ScriptTranslator* ScriptTranslatorFactory::createTranslator(QString scriptPath) { LOG_VARD(scriptPath); _init(); vector<ScriptTranslator*> st; for (size_t i = 0; i < _translators.size(); ++i) { LOG_VART(_translators[i]); st.push_back(Factory::getInstance().constructObject<ScriptTranslator>(_translators[i])); } sort(st.begin(), st.end(), CompareSt); LOG_VART(st); ScriptTranslator* result = 0; for (size_t i = 0; i < st.size(); ++i) { try { st[i]->setScript(scriptPath); LOG_VART(st[i]->isValidScript()); if (result == 0 && st[i]->isValidScript()) { result = st[i]; LOG_TRACE("Found a valid translator: " + _translators[i]); break; } else { delete st[i]; } LOG_VART(result); } catch (...) { LOG_WARN("isValidScript shouldn't throw an exception."); delete st[i]; } } if (result == 0) { throw HootException("Unable to find an appropriate scripting language for: " + scriptPath); } return result; }
void PertyRemoveRandomElementVisitor::setConfiguration(const Settings& conf) { ConfigOptions configOptions(conf); setProbability(configOptions.getPertyRemoveRandomP()); const int seed = configOptions.getPertySeed(); LOG_VARD(seed); if (seed == -1) { _rng->seed(RandomNumberUtils::generateSeed()); } else { _rng->seed(seed); } }
void ServicesDbWriter::writePartial(const shared_ptr<const Node>& n) { bool countChange = true; //LOG_DEBUG("Inside writePartial for Node"); Tags t = n->getTags(); // Only add tags for servicesDB, not good for OSM API if ( _sdb.getDatabaseType() == ServicesDb::DBTYPE_SERVICES) { _addElementTags(n, t); } //LOG_DEBUG("Incoming node ID: " << n->getId()); if (_remapIds) { bool alreadyThere = _nodeRemap.count(n->getId()) != 0; LOG_VARD(alreadyThere); long nodeId = _getRemappedElementId(n->getElementId()); if (alreadyThere) { _sdb.updateNode(nodeId, n->getY(), n->getX(), n->getVersion() + 1, t); } else { _sdb.insertNode(nodeId, n->getY(), n->getX(), t); } } else { if ( n->getId() < 1 ) { throw HootException("Writing non-positive IDs without remap is not supported by " "ServicesDbWriter."); } //LOG_DEBUG("Inserted node " << QString::number(n->getId()) << ", no remapping" ); _sdb.insertNode(n->getId(), n->getY(), n->getX(), t); } if (countChange) { _countChange(); _nodesWritten++; } }
void PertyWaySplitVisitor::setConfiguration(const Settings& conf) { ConfigOptions configOptions(conf); setWaySplitProbability(configOptions.getPertyWaySplitProbability()); setMinNodeSpacing(configOptions.getPertyWaySplitMinNodeSpacing()); const int seed = configOptions.getPertySeed(); LOG_VARD(seed); if (seed == -1) { _rng->seed(RandomNumberUtils::generateSeed()); } else { _rng->seed(seed); } }
void PertyDuplicatePoiOp::setConfiguration(const Settings& conf) { ConfigOptions configOptions(conf); setDuplicateSigma(configOptions.getPertyDuplicatePoiDuplicateSigma()); setProbability(configOptions.getPertyDuplicatePoiP()); setMoveMultiplier(configOptions.getPertyDuplicatePoiMoveMultiplier()); const int seed = configOptions.getPertySeed(); LOG_VARD(seed); if (seed == -1) { _rng->seed(RandomNumberUtils::generateSeed()); } else { _rng->seed(seed); } }
void ServicesDb::_copyTableStructure(QString from, QString to) { // inserting strings in this fashion is safe b/c it is private and we closely control the table // names. QString sql = QString("CREATE TABLE %1 (LIKE %2 INCLUDING DEFAULTS INCLUDING CONSTRAINTS " "INCLUDING INDEXES)").arg(to).arg(from); QSqlQuery q(_db); LOG_VARD(sql); if (q.exec(sql) == false) { QString error = QString("Error executing query: %1 (%2)").arg(q.lastError().text()). arg(sql); LOG_WARN(error); throw HootException(error); } }
QSqlQuery ServicesDb::_execNoPrepare(QString sql) const { // inserting strings in this fashion is safe b/c it is private and we closely control the table // names. QSqlQuery q(_db); LOG_VARD(sql); if (q.exec(sql) == false) { QString error = QString("Error executing query: %1 (%2)").arg(q.lastError().text()). arg(sql); LOG_WARN(error); throw HootException(error); } return q; }
void PertyRemoveTagVisitor::setConfiguration(const Settings& conf) { setProbability(conf.getDouble(pKey(), 0.05)); ConfigOptions configOptions(conf); const int seed = configOptions.getPertySeed(); LOG_VARD(seed); if (seed == -1) { _rng->seed(RandomNumberUtils::generateSeed()); } else { _rng->seed(seed); } _exemptTagKeys = configOptions.getPertyRemoveTagVisitorExemptTagKeys(); _replacementTagKeys = configOptions.getPertyRemoveTagVisitorSubstitutionKeys(); _replacementTagValues = configOptions.getPertyRemoveTagVisitorSubstitutionValues(); }
void PertyMatchScorer::_loadPerturbedMap(const QString perturbedMapInputPath, const QString perturbedMapOutputPath) { LOG_DEBUG("Loading the reference data to be used by the data to be perturbed; " << "renaming REF1 tags to REF2..."); //load from the modified reference data output to get the added ref1 tags; don't copy the map, //since updates to the names of the ref tags on this map will propagate to the map copied from shared_ptr<OsmMap> perturbedMap(new OsmMap()); OsmUtils::loadMap(perturbedMap, perturbedMapInputPath, false, Status::Unknown2); //TODO: should this be removed? MapCleaner().apply(perturbedMap); shared_ptr<TagRenameKeyVisitor> tagRenameKeyVisitor(new TagRenameKeyVisitor("REF1", "REF2")); perturbedMap->visitRw(*tagRenameKeyVisitor); //TODO: this could eventually be replaced with a SetTagVisitor passed in from the command line //instead shared_ptr<SetTagVisitor> setAccuracyVisitor( new SetTagVisitor("error:circular", QString::number(_searchDistance))); perturbedMap->visitRw(*setAccuracyVisitor); LOG_VARD(perturbedMap->getNodeMap().size()); LOG_VARD(perturbedMap->getWays().size()); if (Log::getInstance().getLevel() <= Log::Debug) { TagCountVisitor tagCountVisitor; perturbedMap->visitRo(tagCountVisitor); const long numTotalTags = (long)tagCountVisitor.getStat(); LOG_VARD(numTotalTags); } LOG_DEBUG("Perturbing the copied reference data and saving it to: " << perturbedMapOutputPath); PertyOp pertyOp; pertyOp.setConfiguration(_settings); LOG_DEBUG("Details: " << pertyOp.toString()); pertyOp.apply(perturbedMap); LOG_VARD(perturbedMap->getNodeMap().size()); LOG_VARD(perturbedMap->getWays().size()); if (Log::getInstance().getLevel() <= Log::Debug) { TagCountVisitor tagCountVisitor; perturbedMap->visitRo(tagCountVisitor); const long numTotalTags = (long)tagCountVisitor.getStat(); LOG_VARD(numTotalTags); } MapProjector::projectToWgs84(perturbedMap); OsmUtils::saveMap(perturbedMap, perturbedMapOutputPath); }
boost::shared_ptr<ElementInputStream> ImplicitTagRawRulesDeriver::_getInputStream( const QString input, const QString translationScript) { LOG_INFO("Parsing: " << input << "..."); _inputReader = boost::dynamic_pointer_cast<PartialOsmMapReader>( OsmMapReaderFactory::createReader(input)); _inputReader->open(input); boost::shared_ptr<ElementInputStream> inputStream = boost::dynamic_pointer_cast<ElementInputStream>(_inputReader); LOG_VARD(translationScript); //"none" allows for bypassing translation for an input; e.g. OSM data if (translationScript.toLower() != "none") { boost::shared_ptr<TranslationVisitor> translationVisitor(new TranslationVisitor()); translationVisitor->setPath(translationScript); inputStream.reset(new ElementVisitorInputStream(_inputReader, translationVisitor)); } return inputStream; }
void ImplicitTagRawRulesDeriver::_sortByWord(boost::shared_ptr<QTemporaryFile> input) { LOG_INFO("Sorting output by word..."); if (!input->exists()) { throw HootException("Unable to sort file; file doesn't exist."); } //sort by word, then by tag const QString cmd = "sort -t$'\t' -k2,2 -k3,3 --parallel=" + QString::number(_sortParallelCount) + " " + input->fileName() + " -o " + _output->fileName(); if (std::system(cmd.toStdString().c_str()) != 0) { throw HootException("Unable to sort input file."); } LOG_VARD(_output->fileName()); LOG_INFO( "Wrote " << StringUtils::formatLargeNumber( FileUtils::getNumberOfLinesInFile(_output->fileName())) << " lines to final sorted file."); }
QStringList TagListReader::readList(const QString inputPath, const bool keysOnly) { LOG_VARD(inputPath); QStringList outputList; if (!inputPath.trimmed().isEmpty()) { QFile inputFile(inputPath); if (!inputFile.open(QIODevice::ReadOnly)) { throw HootException(QObject::tr("Error opening %1 for writing.").arg(inputFile.fileName())); } while (!inputFile.atEnd()) { const QString line = QString::fromUtf8(inputFile.readLine().constData()).trimmed(); if (!line.trimmed().isEmpty() && !line.startsWith("#") && (keysOnly || line.contains("="))) { outputList.append(line.toLower()); } } inputFile.close(); } LOG_VART(outputList); return outputList; }
void ImplicitTagRawRulesDeriver::_resolveCountTies() { //Any time more than one word/key combo has the same occurrence count, we need to pick just one //of them. LOG_INFO( "Resolving word/tag key/count ties for " << StringUtils::formatLargeNumber(_duplicatedWordTagKeyCountsToValues.size()) << " duplicated word/tag key/counts..."); _tieResolvedCountFile.reset( new QTemporaryFile( _tempFileDir + "/implicit-tag-raw-rules-generator-temp-XXXXXX")); _tieResolvedCountFile->setAutoRemove(!_keepTempFiles); if (!_tieResolvedCountFile->open()) { throw HootException( QObject::tr("Error opening %1 for writing.").arg(_tieResolvedCountFile->fileName())); } LOG_DEBUG("Opened tie resolve temp file: " << _tieResolvedCountFile->fileName()); if (_keepTempFiles) { LOG_WARN("Keeping temp file: " << _tieResolvedCountFile->fileName()); } if (!_dedupedCountFile->open()) { throw HootException( QObject::tr("Error opening %1 for reading.").arg(_dedupedCountFile->fileName())); } long lineCount = 0; long duplicateResolutions = 0; while (!_dedupedCountFile->atEnd()) { const QString line = QString::fromUtf8(_dedupedCountFile->readLine().constData()).trimmed(); LOG_VART(line); const QStringList lineParts = line.split("\t"); LOG_VART(lineParts); QString word = lineParts[1].trimmed(); LOG_VART(word); const QString kvp = lineParts[2].trimmed(); LOG_VART(kvp); const QString countStr = lineParts[0].trimmed(); const long count = countStr.toLong(); LOG_VART(count); const QStringList kvpParts = kvp.split("="); const QString tagKey = kvpParts[0]; LOG_VART(tagKey); const QString wordTagKey = word.trimmed() % ";" % tagKey.trimmed(); LOG_VART(wordTagKey); const QString wordTagKeyCount = word.trimmed() % ";" % tagKey.trimmed() % ";" % countStr.trimmed(); LOG_VART(wordTagKeyCount); const QString tagValue = kvpParts[1]; LOG_VART(tagValue); if (_duplicatedWordTagKeyCountsToValues.contains(wordTagKeyCount)) { LOG_TRACE("Resolving duplicated word/tag key/count for " << wordTagKeyCount << "..."); //To resolve the tie, we're going to pick the most specific kvp. e.g. amenity=public_hall //wins out of amenity=hall. This is not really dealing with same hierarchy level tags //(e.g. amenity=school and amenity=hall) and will just arbitrarily pick in that situation. //Duplicates do seem to be fairly rare, but there could be some perfomance gains by coming //up with a better way to handle this situation. QString lineWithMostSpecificKvp = line % "\n"; const QStringList tagValues = _duplicatedWordTagKeyCountsToValues[wordTagKeyCount]; for (int i = 0; i < tagValues.size(); i++) { const QString childKvp = tagKey % "=" % tagValues[i]; if (OsmSchema::getInstance().isAncestor(childKvp, tagKey % "=" % tagValue)) { lineWithMostSpecificKvp = countStr % "\t" % word % "\t" % childKvp % "\n"; } } LOG_VART(lineWithMostSpecificKvp); _tieResolvedCountFile->write(lineWithMostSpecificKvp.toUtf8()); duplicateResolutions++; } else { const QString updatedLine = countStr % "\t" % word % "\t" % kvp % "\n"; LOG_VART(updatedLine); _tieResolvedCountFile->write(updatedLine.toUtf8()); } lineCount++; if (lineCount % (_statusUpdateInterval * 10) == 0) { PROGRESS_INFO( "Parsed " << StringUtils::formatLargeNumber(lineCount) << " lines from input for duplicated tag key count ties."); } } LOG_VARD(lineCount); LOG_INFO( "Resolved " << StringUtils::formatLargeNumber(duplicateResolutions) << " word/tag key/count ties."); _duplicatedWordTagKeyCountsToValues.clear(); _tieResolvedCountFile->close(); }
void ImplicitTagRawRulesDeriver::deriveRawRules(const QStringList inputs, const QStringList translationScripts, const QString output) { _validateInputs(inputs, translationScripts, output); LOG_INFO( "Generating implicit tag rules raw file for inputs: " << inputs << ", translation scripts: " << translationScripts << ". Writing to output: " << output << "..."); LOG_VARD(_sortParallelCount); LOG_VARD(_skipFiltering); LOG_VARD(_translateNamesToEnglish); _init(); long eligibleFeatureCount = 0; long totalFeatureCount = 0; for (int i = 0; i < inputs.size(); i++) { boost::shared_ptr<ElementInputStream> inputStream = _getInputStream(inputs.at(i), translationScripts.at(i)); while (inputStream->hasMoreElements()) { ElementPtr element = inputStream->readNextElement(); LOG_VART(element); totalFeatureCount++; assert(_elementCriterion.get()); if (_skipFiltering || _elementCriterion->isSatisfied(element)) { QStringList names = element->getTags().getNames(); assert(!names.isEmpty()); //old_name/former_name generally indicates that an element formerly went by the name, so //not really useful here. if (names.removeAll("old_name") > 0) { LOG_VART("Removed old name tag."); } if (names.removeAll("former_name") > 0) { LOG_VART("Removed former name tag."); } assert(!names.isEmpty()); if (_translateNamesToEnglish) { names = ImplicitTagUtils::translateNamesToEnglish(names, element->getTags(), _translator); } LOG_VART(names); //get back only the tags that we'd be interested in applying to future elements implicitly //based on name const QStringList kvps = _elementCriterion->getEligibleKvps(element->getTags()); assert(!kvps.isEmpty()); if (kvps.isEmpty()) { throw HootException("Kvps empty."); } //parse whole names and token groups _parseNames(names, kvps); eligibleFeatureCount++; if (eligibleFeatureCount % _statusUpdateInterval == 0) { PROGRESS_INFO( "Parsed " << StringUtils::formatLargeNumber(eligibleFeatureCount) << " eligible features / " << StringUtils::formatLargeNumber(totalFeatureCount) << " total features."); } } } _inputReader->finalizePartial(); } _countFile->close(); LOG_INFO( "Parsed " << StringUtils::formatLargeNumber(eligibleFeatureCount) << " eligible features from " << StringUtils::formatLargeNumber(totalFeatureCount) << " total features."); LOG_INFO( "Wrote " << StringUtils::formatLargeNumber(_countFileLineCtr) << " lines to count file."); _sortByTagOccurrence(); //sort in descending count order _removeDuplicatedKeyTypes(); bool tieCountsNeededResolved = false; if (_duplicatedWordTagKeyCountsToValues.size() > 0) { _resolveCountTies(); tieCountsNeededResolved = true; } LOG_INFO( "Extracted " << StringUtils::formatLargeNumber(_wordKeysToCountsValues.size()) << " word/tag associations."); LOG_INFO("Clearing word/tag associations..."); _wordKeysToCountsValues.clear(); if (tieCountsNeededResolved) { _sortByWord(_tieResolvedCountFile); } else { _sortByWord(_dedupedCountFile); } }
vector<Radians> NodeMatcher::calculateAngles(const OsmMap* map, long nid, const set<long>& wids, Meters delta) { vector<Radians> result; result.reserve(wids.size()); int badSpots = 0; for (set<long>::const_iterator it = wids.begin(); it != wids.end(); ++it) { const shared_ptr<const Way>& w = map->getWay(*it); if (OsmSchema::getInstance().isLinearHighway(w->getTags(), w->getElementType()) == false && OsmSchema::getInstance().isLinearWaterway(*w) == false) { // if this isn't a highway or waterway, then don't consider it. //LOG_DEBUG("calculateAngles skipping feature"); } else if (w->getNodeId(0) == nid) { WayLocation wl(map->shared_from_this(), w, 0, 0.0); Radians heading = WayHeading::calculateHeading(wl, delta); // This is the first node so the angle is an inbound angle, reverse the value. if (heading < 0.0) { heading += M_PI; } else { heading -= M_PI; } result.push_back(heading); } else if (w->getLastNodeId() == nid) { WayLocation wl(map->shared_from_this(), w, w->getNodeCount() - 1, 1.0); result.push_back(WayHeading::calculateHeading(wl, delta)); } else { LOG_VARD(w->getNodeId(0)); LOG_VARD(w->getLastNodeId()); // count this as a bad spot. If we find some valid spots and some bad spots then that is an // error condition badSpots++; } } if (result.size() > 0 && badSpots > 0) { LOG_WARN("nid: " << nid); LOG_WARN(map->getNode(nid)->toString()); LOG_WARN("wids: " << wids); for (set<long>::const_iterator it = wids.begin(); it != wids.end(); ++it) { LOG_WARN(map->getWay(*it)->toString()); } //shared_ptr<OsmMap> copy(new OsmMap(*map)); //MapReprojector::reprojectToWgs84(copy); //OsmUtils::saveMap(copy, "/data/river-data/NodeMatcherMap-temp.osm"); throw HootException("calculateAngles was called with a node that was not a start or end node" " on the specified way."); } return result; }