bool IoUtils::isSupportedOgrFormat(const QString input, const bool allowDir) { LOG_VART(input); LOG_VART(allowDir); if (!allowDir && QFileInfo(input).isDir()) { return false; } LOG_VART(QFileInfo(input).isDir()); //input is a dir; only accepting a dir as input if it contains a shape file or is a file geodb if (QFileInfo(input).isDir()) { return input.toLower().endsWith(".gdb") || FileUtils::dirContainsFileWithExtension(QFileInfo(input).dir(), "shp"); } //single input else { //The only zip file format we support are ones containing OGR inputs. if (input.toLower().endsWith(".zip") || //We only support this type of postgres URL for OGR inputs. input.toLower().startsWith("pg:") || // Or, OGDI Vectors. Things like VPF (DNC, VMAP etc) input.toLower().startsWith("gltp:")) { return true; } LOG_VART(OgrUtilities::getInstance().getSupportedFormats(false)); LOG_VART(QFileInfo(input).suffix()); return OgrUtilities::getInstance().getSupportedFormats(false) .contains("." + QFileInfo(input).suffix()); } }
StatusUpdateVisitor::StatusUpdateVisitor(Status status, bool onlyUpdateIfStatusInvalid) : _status(status), _onlyUpdateIfStatusInvalid(onlyUpdateIfStatusInvalid) { LOG_VART(_status); LOG_VART(_onlyUpdateIfStatusInvalid); }
double AverageNumericTagsVisitor::getStat() const { LOG_VART(_sum); LOG_VART(_tagCount); if (_sum > 0.0 && _tagCount > 0) { return _sum / _tagCount; } return 0.0; }
boost::shared_ptr<OsmMapReader> OsmMapReaderFactory::createReader(bool useDataSourceIds, bool useFileStatus, QString url) { LOG_VART(url); LOG_VART(useDataSourceIds); LOG_VART(useFileStatus); boost::shared_ptr<OsmMapReader> reader = _createReader(url); reader->setUseDataSourceIds(useDataSourceIds); reader->setUseFileStatus(useFileStatus); return reader; }
boost::shared_ptr<OsmMapReader> OsmMapReaderFactory::createReader(QString url, bool useDataSourceIds, Status defaultStatus) { LOG_VART(url); LOG_VART(useDataSourceIds); LOG_VART(defaultStatus); boost::shared_ptr<OsmMapReader> reader = _createReader(url); reader->setUseDataSourceIds(useDataSourceIds); reader->setDefaultStatus(defaultStatus); return reader; }
void TagRenameKeyVisitor::visit(const boost::shared_ptr<Element>& e) { LOG_VART(_oldKey); LOG_VART(_newKey); if (e->getTags().contains(_oldKey)) { Tags& tags = e->getTags(); const QString value = tags.get(_oldKey); tags.remove(_oldKey); tags.insert(_newKey, value); e->setTags(tags); } }
ScriptTranslator* ScriptTranslatorFactory::createTranslator(QString scriptPath) { LOG_VARD(scriptPath); _init(); vector<ScriptTranslator*> st; for (size_t i = 0; i < _translators.size(); ++i) { LOG_VART(_translators[i]); st.push_back(Factory::getInstance().constructObject<ScriptTranslator>(_translators[i])); } sort(st.begin(), st.end(), CompareSt); LOG_VART(st); ScriptTranslator* result = 0; for (size_t i = 0; i < st.size(); ++i) { try { st[i]->setScript(scriptPath); LOG_VART(st[i]->isValidScript()); if (result == 0 && st[i]->isValidScript()) { result = st[i]; LOG_TRACE("Found a valid translator: " + _translators[i]); break; } else { delete st[i]; } LOG_VART(result); } catch (...) { LOG_WARN("isValidScript shouldn't throw an exception."); delete st[i]; } } if (result == 0) { throw HootException("Unable to find an appropriate scripting language for: " + scriptPath); } return result; }
void StatusUpdateVisitor::setConfiguration(const Settings& conf) { ConfigOptions configOptions(conf); _onlyUpdateIfStatusInvalid = configOptions.getStatusUpdateVisitorOnlyUpdateInvalidStatus(); if (configOptions.getStatusUpdateVisitorStatus().trimmed().isEmpty()) { _status = Status::Invalid; } else { _status = Status::fromString(configOptions.getStatusUpdateVisitorStatus()); } LOG_VART(_status); LOG_VART(_onlyUpdateIfStatusInvalid); }
void ImplicitTagRulesDatabaseDeriver::_writeCustomRules(long& linesWrittenCount) { // would like to know somehow if any of the custom rules overlap with the db derived // rules from the public data - #2300 LOG_DEBUG("Writing custom rules..."); long ruleCount = 0; LOG_VARD(_customRules.getCustomRulesList().size()); if (_customRules.getCustomRulesList().size() > 0) { const QMap<QString, QString> customRulesList = _customRules.getCustomRulesList(); for (QMap<QString, QString>::const_iterator customRulesItr = customRulesList.begin(); customRulesItr != customRulesList.end(); ++customRulesItr) { const QString line = QString::number(INT_MAX) % "\t" % customRulesItr.key().trimmed() % "\t" % customRulesItr.value().trimmed() % "\n"; LOG_VART(line); _filteredCountFile->write(line.toUtf8()); ruleCount++; linesWrittenCount++; } LOG_INFO("Wrote " << ruleCount << " custom rules."); } }
boost::shared_ptr<Element> GeometryConverter::convertGeometryToElement(const Geometry* g, Status s, double circularError) { LOG_VART(g->getGeometryTypeId()); switch (g->getGeometryTypeId()) { case GEOS_POINT: return convertPointToNode(dynamic_cast<const Point*>(g), _map, s, circularError); case GEOS_LINESTRING: case GEOS_LINEARRING: return convertLineStringToWay(dynamic_cast<const LineString*>(g), _map, s, circularError); case GEOS_POLYGON: return convertPolygonToElement(dynamic_cast<const Polygon*>(g), _map, s, circularError); case GEOS_MULTILINESTRING: return convertMultiLineStringToElement( dynamic_cast<const MultiLineString*>(g), _map, s, circularError); case GEOS_MULTIPOLYGON: return convertMultiPolygonToRelation(dynamic_cast<const MultiPolygon*>(g), _map, s, circularError); case GEOS_GEOMETRYCOLLECTION: return convertGeometryCollection(dynamic_cast<const GeometryCollection*>(g), s, circularError); default: if (logWarnCount < Log::getWarnMessageLimit()) { LOG_WARN("Unsupported geometry type. Element will be removed from the map. " + g->toString()); } else if (logWarnCount == Log::getWarnMessageLimit()) { LOG_WARN(className() << ": " << Log::LOG_WARN_LIMIT_REACHED_MESSAGE); } logWarnCount++; return boost::shared_ptr<Element>(); } }
void ImplicitTagRawRulesDeriver::setConfiguration(const Settings& conf) { ConfigOptions options = ConfigOptions(conf); setSortParallelCount(options.getImplicitTaggingRawRulesDeriverSortParallelCount()); const int idealThreads = QThread::idealThreadCount(); LOG_VART(idealThreads); if (_sortParallelCount < 1 || _sortParallelCount > idealThreads) { setSortParallelCount(idealThreads); } setSkipFiltering(options.getImplicitTaggingRawRulesDeriverSkipFiltering()); setKeepTempFiles(options.getImplicitTaggingKeepTempFiles()); setTempFileDir(options.getApidbBulkInserterTempFileDir()); setTranslateNamesToEnglish(options.getImplicitTaggingDatabaseDeriverTranslateNamesToEnglish()); setElementCriterion(options.getImplicitTaggingElementCriterion()); if (_translateNamesToEnglish) { _translator.reset( Factory::getInstance().constructObject<ToEnglishTranslator>( options.getLanguageTranslationTranslator())); _translator->setConfiguration(conf); _translator->setSourceLanguages(options.getLanguageTranslationSourceLanguages()); _translator->setId("ImplicitTagRawRulesDeriver"); } }
boost::shared_ptr<GDALDataset> OgrUtilities::openDataSource(const QString url, bool readonly) { /* Check for the correct driver name, if unknown try all drivers. * This can be an issue because drivers are tried in the order that they are * loaded which has been known to cause issues. */ OgrDriverInfo driverInfo = getDriverInfo(url, readonly); // With GDALOpenEx, we need to specify the GDAL_OF_UPDATE option or the dataset will get opened // Read Only. if (! readonly) { driverInfo._driverType = driverInfo._driverType | GDAL_OF_UPDATE; } LOG_VART(driverInfo._driverName); LOG_VART(driverInfo._driverType); LOG_VART(url.toUtf8().data()); const char* drivers[2] = { driverInfo._driverName, NULL }; // Setup read options for various file types OgrOptions options; if (QString(driverInfo._driverName) == "CSV") { options["X_POSSIBLE_NAMES"] = ConfigOptions().getOgrReaderCsvLonfield(); options["Y_POSSIBLE_NAMES"] = ConfigOptions().getOgrReaderCsvLatfield(); // options["Z_POSSIBLE_NAMES"] = ConfigOptions().getOgrReaderCsvZfield(); options["KEEP_GEOM_COLUMNS"] = ConfigOptions().getOgrReaderCsvKeepGeomFields(); } if (QString(driverInfo._driverName) == "OGR_OGDI") { // From the GDAL docs: // From GDAL/OGR 1.8.0, setting the OGR_OGDI_LAUNDER_LAYER_NAMES configuration option // (or environment variable) to YES causes the layer names to be simplified. // For example : watrcrsl_hydro instead of 'watrcrsl@hydro(*)_line' options["OGR_OGDI_LAUNDER_LAYER_NAMES"] = ConfigOptions().getOgrReaderOgdiLaunderLayerNames(); } boost::shared_ptr<GDALDataset> result(static_cast<GDALDataset*>(GDALOpenEx(url.toUtf8().data(), driverInfo._driverType, (driverInfo._driverName != NULL ? drivers : NULL), options.getCrypticOptions(), NULL))); if (!result) throw HootException("Unable to open: " + url); return result; }
boost::shared_ptr<HootNetworkCookieJar> NetworkIoUtils::getUserSessionCookie( const QString userName, const QString accessToken, const QString accessTokenSecret, const QString url) { LOG_VART(userName); LOG_VART(accessToken); LOG_VART(url); HootApiDb db; LOG_VART(HootApiDb::getBaseUrl()); //hoot db requires a layer to open, but we don't need one here...so put anything in QUrl dbUrl(HootApiDb::getBaseUrl().toString() + "/blah"); db.open(dbUrl); const QString sessionId = db.getSessionIdByAccessTokens(userName, accessToken, accessTokenSecret); LOG_VART(sessionId); db.close(); if (sessionId.isEmpty()) { throw HootException("User: "******" has not been authenticated."); } boost::shared_ptr<HootNetworkCookieJar> cookieJar(new HootNetworkCookieJar()); QList<QNetworkCookie> cookies; QNetworkCookie sessionCookie(QString("SESSION").toUtf8(), sessionId.toUtf8()); cookies.append(sessionCookie); cookieJar->setCookiesFromUrl(cookies, url); LOG_VART(cookieJar->size()); LOG_VART(cookieJar->toString()); return cookieJar; }
void ImplicitTagRulesDatabaseDeriver::_populateSchemaTagValues() { _schemaTagValues.clear(); _wordsNotInSchema.clear(); // TODO: should the use and/or building categories be added here? const std::vector<SchemaVertex> tags = OsmSchema::getInstance().getTagByCategory(OsmSchemaCategory::poi()); StringTokenizer tokenizer; for (std::vector<SchemaVertex>::const_iterator tagItr = tags.begin(); tagItr != tags.end(); ++tagItr) { SchemaVertex tag = *tagItr; const QString tagVal = tag.value.toLower().replace("_", " "); if (!tagVal.contains("*")) //skip wildcards { if (!_customRules.getWordIgnoreList().contains(tagVal, Qt::CaseInsensitive)) { _schemaTagValues.insert(tagVal); //dealing with the uk english spellings on an as seen basis; this should be expanded and //made more extensible if (tagVal == "theatre") { _schemaTagValues.insert("theater"); } if (tagVal == "centre") { _schemaTagValues.insert("center"); } LOG_TRACE("Appended " << tagVal << " to schema tag values."); } QStringList vals = tokenizer.tokenize(tagVal); for (int i = 0; i < vals.size(); i++) { const QString val = vals.at(i); if (!_customRules.getWordIgnoreList().contains(val, Qt::CaseInsensitive)) { _schemaTagValues.insert(val); //see comment above if (val == "theatre") { _schemaTagValues.insert("theater"); } if (val == "centre") { _schemaTagValues.insert("center"); } LOG_TRACE("Appended " << val << " to schema tag values."); } } } } LOG_VARD(_schemaTagValues.size()); QStringList schemaTagValuesList = _schemaTagValues.toList(); qSort(schemaTagValuesList.begin(), schemaTagValuesList.end()); //sort for viewing only LOG_VART(schemaTagValuesList); }
void ToEnglishTranslationVisitor::visit(const boost::shared_ptr<Element>& e) { if (_tagKeys.isEmpty()) { throw HootException("No tag keys specified for language translation."); } LOG_VART(e); //if this var was set while parsing the previous element, increment the counter now if (_currentElementHasSuccessfulTagTranslation) { _numElementsWithSuccessfulTagTranslation++; } _currentElementHasSuccessfulTagTranslation = false; const Tags& tags = e->getTags(); bool elementProcessed = false; for (QSet<QString>::const_iterator tagKeysItr = _tagKeys.begin(); tagKeysItr != _tagKeys.end(); ++tagKeysItr) { const QString toTranslateTagKey = *tagKeysItr; if (tags.contains(toTranslateTagKey)) { //making skipping tags that already have an english translated tag optional, b/c a many of the //OSM english translations I've seen are either just copies of the foreign language text or are //not very good translations const QString preTranslatedTagKey = toTranslateTagKey + ":en"; if (!_ignorePreTranslatedTags && tags.contains(preTranslatedTagKey)) { LOG_TRACE( "Skipping element with pre-translated tag: " << preTranslatedTagKey << "=" << tags.get(toTranslateTagKey).trimmed()); } else { _translate(e, toTranslateTagKey); elementProcessed = true; } } } if (elementProcessed) { _numProcessedElements++; if (_numProcessedElements % _taskStatusUpdateInterval == 0) { PROGRESS_INFO("Attempted tag translation for " << _numProcessedElements << " elements."); } } _numTotalElements++; if (_numTotalElements % _taskStatusUpdateInterval == 0) { PROGRESS_INFO("Visited " << _numTotalElements << " elements."); } }
void StatusUpdateVisitor::visit(const boost::shared_ptr<Element>& e) { LOG_VART(e->getStatus()); if (_onlyUpdateIfStatusInvalid && e->getStatus() != Status::Invalid) { return; } e->setStatus(_status); e->getTags()[MetadataTags::HootStatus()] = _status.toString(); }
bool IoUtils::areSupportedOgrFormats(const QStringList inputs, const bool allowDir) { if (inputs.size() == 0) { return false; } for (int i = 0; i < inputs.size(); i++) { const QString input = inputs.at(i); LOG_VART(input); const QString file = input.split(";")[0]; LOG_VART(file); if (!isSupportedOgrFormat(file, allowDir)) { return false; } } return true; }
void ImplicitTagRawRulesDeriver::_parseNames(const QStringList names, const QStringList kvps) { for (int i = 0; i < names.size(); i++) { QString name = names.at(i); LOG_VART(name); //'=' is used in the map key for kvps, so it needs to be escaped in the word if (name.contains("=")) { name = name.replace("=", "%3D"); } for (int j = 0; j < kvps.size(); j++) { _updateForNewWord(name, kvps.at(j)); } QStringList nameTokens = _tokenizer.tokenize(name); LOG_VART(nameTokens.size()); //tokenization for (int j = 0; j < nameTokens.size(); j++) { QString nameToken = nameTokens.at(j); _parseNameToken(nameToken, kvps); } //going up to a token group size of two; tested up to group size three, but three didn't seem to //yield any better tagging results if (nameTokens.size() > 2) { for (int j = 0; j < nameTokens.size() - 1; j++) { QString nameToken = nameTokens.at(j) + " " + nameTokens.at(j + 1); _parseNameToken(nameToken, kvps); } } } }
void ImplicitTagRawRulesDeriver::_parseNameToken(QString& nameToken, const QStringList kvps) { //may eventually need to replace more punctuation chars here, but this is fine for now...need a //more extensible way to do it; also, that logic could moved into ImplicitTagUtils::cleanName nameToken = nameToken.replace(",", ""); LOG_VART(nameToken); if (_translateNamesToEnglish) { const QString englishNameToken = _translator->translate(nameToken); LOG_VART(englishNameToken); if (!englishNameToken.isEmpty()) { nameToken = englishNameToken; } } for (int k = 0; k < kvps.size(); k++) { _updateForNewWord(nameToken, kvps.at(k)); } }
bool WaySublineMatchString::isValid() const { if (_matches.size() > 0) { for (size_t i = 0; i < _matches.size(); i++) { const WaySublineMatch& m = _matches[i]; LOG_VART(m.getSubline1()); LOG_VART(m.getSubline2()); if (m.getSubline1().isZeroLength() || m.getSubline2().isZeroLength()) { return false; } } return true; } else { return false; } }
QString AddressTranslator::translateToEnglish(const QString address) const { const QStringList addressParts = address.simplified().split(" "); //Try to translate blocks of consecutive address tokens to cut down on the number of //translation calls made. QString combinedAddressPartToTranslate = ""; QStringList combinedAddressPartsToTranslate; for (int i = 0; i < addressParts.size(); i++) { const QString addressPart = addressParts.at(i); LOG_VART(addressPart); if (!StringUtils::isNumber(addressPart)) { combinedAddressPartToTranslate += " " + addressPart; LOG_VART(combinedAddressPartToTranslate); } else if (!combinedAddressPartToTranslate.isEmpty()) { combinedAddressPartsToTranslate.append(combinedAddressPartToTranslate.trimmed()); combinedAddressPartToTranslate = ""; } LOG_VART(combinedAddressPartsToTranslate); } if (!combinedAddressPartToTranslate.isEmpty()) { combinedAddressPartsToTranslate.append(combinedAddressPartToTranslate.trimmed()); } LOG_VART(combinedAddressPartsToTranslate); bool anyAddressPartWasTranslated = false; QString translatedAddress = address; for (int i = 0; i < combinedAddressPartsToTranslate.size(); i++) { const QString combinedAddressPart = combinedAddressPartsToTranslate.at(i).trimmed(); LOG_VART(combinedAddressPart); const QString translatedCombinedAddressPart = _translator->translate(combinedAddressPart); if (!translatedCombinedAddressPart.isEmpty()) { translatedAddress = translatedAddress.replace(combinedAddressPart, translatedCombinedAddressPart); LOG_VART(translatedAddress); anyAddressPartWasTranslated = true; } } if (anyAddressPartWasTranslated) { LOG_TRACE("Translated address from " << address << " to " << translatedAddress); return translatedAddress; } else { LOG_TRACE("Address " << address << " could not be translated."); return ""; } }
WaySublineMatchString::WaySublineMatchString(const MatchCollection& m) { for (size_t i = 0; i < m.size(); i++) { for (size_t j = 0; j < m.size(); j++) { if (i != j && m[i].overlaps(m[j])) { LOG_VART(m); throw OverlappingMatchesException( "The match collection must not contain overlapping matches."); } } } _matches = m; }
QString OsmMapReaderFactory::getReaderName(const QString url) { LOG_VARD(url); vector<std::string> names = Factory::getInstance().getObjectNamesByBase(OsmMapReader::className()); LOG_VARD(names.size()); boost::shared_ptr<OsmMapReader> writer; for (size_t i = 0; i < names.size(); i++) { const std::string name = names[i]; LOG_VART(name); writer.reset(Factory::getInstance().constructObject<OsmMapReader>(name)); if (writer->isSupported(url)) { return QString::fromStdString(name); } } return ""; }
void ImplicitTagRawRulesDeriver::_sortByTagOccurrence() { LOG_INFO("Sorting output by tag occurrence count..."); LOG_VART(_sortParallelCount); _sortedCountFile.reset( new QTemporaryFile(_tempFileDir + "/implicit-tag-raw-rules-generator-temp-XXXXXX")); _sortedCountFile->setAutoRemove(!_keepTempFiles); if (!_sortedCountFile->open()) { throw HootException( QObject::tr("Error opening %1 for writing.").arg(_sortedCountFile->fileName())); } LOG_DEBUG("Opened sorted temp file: " << _sortedCountFile->fileName()); if (_keepTempFiles) { LOG_WARN("Keeping temp file: " << _sortedCountFile->fileName()); } if (!_countFile->exists()) { throw HootException("Unable to sort file; file doesn't exist."); } //This counts each unique line occurrence, sorts by decreasing occurrence count (necessary for //next step which removes duplicate tag keys associated with the same word), and replaces the //space between the prepended count and the word with a tab. //sort by highest count, then by word, then by tag const QString cmd = "sort --parallel=" + QString::number(_sortParallelCount) + " " + _countFile->fileName() + " | uniq -c | sort -n -r --parallel=" + QString::number(_sortParallelCount) + " | " + "sed -e 's/^ *//;s/ /\\t/' > " + _sortedCountFile->fileName(); if (std::system(cmd.toStdString().c_str()) != 0) { throw HootException("Unable to sort file."); } LOG_INFO( "Wrote " << StringUtils::formatLargeNumber(FileUtils::getNumberOfLinesInFile(_sortedCountFile->fileName())) << " lines to sorted file."); }
QStringList TagListReader::readList(const QString inputPath, const bool keysOnly) { LOG_VARD(inputPath); QStringList outputList; if (!inputPath.trimmed().isEmpty()) { QFile inputFile(inputPath); if (!inputFile.open(QIODevice::ReadOnly)) { throw HootException(QObject::tr("Error opening %1 for writing.").arg(inputFile.fileName())); } while (!inputFile.atEnd()) { const QString line = QString::fromUtf8(inputFile.readLine().constData()).trimmed(); if (!line.trimmed().isEmpty() && !line.startsWith("#") && (keysOnly || line.contains("="))) { outputList.append(line.toLower()); } } inputFile.close(); } LOG_VART(outputList); return outputList; }
double SqliteWordWeightDictionary::getWeight(const QString& word) const { LOG_VART(word); QString normalized = word.toLower().normalized(QString::NormalizationForm_C); normalized.replace(_nonWord, ""); WeightHash::const_iterator it = _weights.find(normalized); long c; if (it == _weights.end()) { long c = _reader.readCount(normalized); if (c > 0) { _weights[normalized] = c; } return c; } else { c = it->second; } return c / (double)_count; }
void AverageNumericTagsVisitor::visit(const ConstElementPtr& e) { for (int i = 0; i < _keys.size(); i++) { const QString key = _keys.at(i); LOG_VART(key); if (e->getTags().contains(key)) { bool parsed = false; const QString strValue = e->getTags().get(key); LOG_VART(strValue); const double value = strValue.toDouble(&parsed); LOG_VART(value); LOG_VART(parsed); if (parsed) { _tagCount++; _sum += value; LOG_VART(_tagCount); LOG_VART(_sum); } else { if (logWarnCount < Log::getWarnMessageLimit()) { LOG_WARN( "Unsuccessfully attempted to convert tag with key: " << key << " and value: " << strValue << " to number."); } else if (logWarnCount == Log::getWarnMessageLimit()) { LOG_WARN(className() << ": " << Log::LOG_WARN_LIMIT_REACHED_MESSAGE); } logWarnCount++; } } } }
vector<Radians> NodeMatcher::calculateAngles(const OsmMap* map, long nid, const set<long>& wids, Meters delta) { vector<Radians> result; result.reserve(wids.size()); LOG_VART(nid); QSet<long> badWayIds; for (set<long>::const_iterator it = wids.begin(); it != wids.end(); ++it) { const ConstWayPtr& w = map->getWay(*it); LOG_VART(w->getId()); LOG_VART(w->getLastNodeId()); LOG_VART(w->getNodeId(0)); if (!isNetworkFeatureType(w)) { // if this isn't a feature from a specific list, then don't consider it. LOG_TRACE("calculateAngles skipping feature..."); } else if (w->getNodeId(0) == nid) { LOG_TRACE("Start node: " << nid); WayLocation wl(map->shared_from_this(), w, 0, 0.0); Radians heading = WayHeading::calculateHeading(wl, delta); // This is the first node so the angle is an inbound angle, reverse the value. if (heading < 0.0) { heading += M_PI; } else { heading -= M_PI; } LOG_VART(heading); result.push_back(heading); } else if (w->getLastNodeId() == nid) { LOG_TRACE("End node: " << nid); WayLocation wl(map->shared_from_this(), w, w->getNodeCount() - 1, 1.0); Radians heading = WayHeading::calculateHeading(wl, delta); LOG_VART(heading); result.push_back(heading); } else { // count this as a bad spot. If we find some valid spots and some bad spots then that is an // error condition badWayIds.insert(w->getId()); } } LOG_VART(badWayIds.size()); LOG_VART(result.size()); if (result.size() > 0 && badWayIds.size() > 0) { LOG_TRACE( "Found " << badWayIds.size() << " bad spot(s) in NodeMatcher when calculating angles " << "with node: " << nid); LOG_TRACE("wids: " << badWayIds); for (QSet<long>::const_iterator it = badWayIds.begin(); it != badWayIds.end(); ++it) { const ConstWayPtr& w = map->getWay(*it); LOG_VART(w->getId()); LOG_VART(w->getTags().get("REF1")); LOG_VART(w->getTags().get("REF2")); LOG_VART(w->getNodeIndex(nid)); LOG_VART(w->getNodeId(0)); LOG_VART(w->getLastNodeId()); } if (ConfigOptions().getNodeMatcherFailOnBadAngleSpots()) { throw HootException( QString("NodeMatcher::calculateAngles was called with a node that was not a start or ") + QString("end node on the specified way.")); } } return result; }
double NodeMatcher::scorePair(long nid1, long nid2) { ConstNodePtr n1 = _map->getNode(nid1); ConstNodePtr n2 = _map->getNode(nid2); const set<long>& wids1 = _map->getIndex().getNodeToWayMap()->at(nid1); const set<long>& wids2 = _map->getIndex().getNodeToWayMap()->at(nid2); double acc = 0; for (set<long>::const_iterator it = wids1.begin(); it != wids1.end(); ++it) { acc = max(acc, _map->getWay(*it)->getCircularError()); } for (set<long>::const_iterator it = wids2.begin(); it != wids2.end(); ++it) { acc = max(acc, _map->getWay(*it)->getCircularError()); } vector<Radians> theta1 = calculateAngles(_map.get(), nid1, wids1, _delta); vector<Radians> theta2 = calculateAngles(_map.get(), nid2, wids2, _delta); int s1 = theta1.size(); int s2 = theta2.size(); if (s1 < 3 || s2 < 3) { return 0.0; } double d = n1->toCoordinate().distance(n2->toCoordinate()); // TODO: this isnt right; Talk to mike double distanceScore = 1 - (Normal::phi(d, acc * 1.5) - 0.5) * 2.0; LOG_VART(nid1); LOG_VART(nid2); LOG_VART(distanceScore); LOG_VART(acc); LOG_VART(d); LOG_VART(Normal::phi(d, acc * 1.5)); LOG_VART(Normal::phi(d, acc / 2.0)); if (theta1.size() < theta2.size()) { swap(theta1, theta2); } double thetaScore; // this is very unsual and will slow things down. if (theta1.size() > 6 && theta2.size() > 6) { if (logWarnCount < Log::getWarnMessageLimit()) { LOG_WARN("Greater than seven intersections at one spot? Odd. Giving it a high angleScore."); } else if (logWarnCount == Log::getWarnMessageLimit()) { LOG_WARN(className() << ": " << Log::LOG_WARN_LIMIT_REACHED_MESSAGE); } logWarnCount++; LOG_VART(nid1); LOG_VART(nid2); LOG_VART(wids1); LOG_VART(wids2); thetaScore = 1.0; } else { if (theta2.size() < theta1.size()) { vector<bool> exclude(theta2.size(), false); thetaScore = _calculateAngleScore(theta2, theta1, exclude, 0); } else { vector<bool> exclude(theta1.size(), false); thetaScore = _calculateAngleScore(theta1, theta2, exclude, 0); } } // simple stupid heuristic. Replace w/ some cosine fanciness later. int diff = abs((int)s1 - (int)s2); double result = (min(s1, s2) - diff) * thetaScore * distanceScore; LOG_VART(result); return result; }
void ImplicitTagRawRulesDeriver::_resolveCountTies() { //Any time more than one word/key combo has the same occurrence count, we need to pick just one //of them. LOG_INFO( "Resolving word/tag key/count ties for " << StringUtils::formatLargeNumber(_duplicatedWordTagKeyCountsToValues.size()) << " duplicated word/tag key/counts..."); _tieResolvedCountFile.reset( new QTemporaryFile( _tempFileDir + "/implicit-tag-raw-rules-generator-temp-XXXXXX")); _tieResolvedCountFile->setAutoRemove(!_keepTempFiles); if (!_tieResolvedCountFile->open()) { throw HootException( QObject::tr("Error opening %1 for writing.").arg(_tieResolvedCountFile->fileName())); } LOG_DEBUG("Opened tie resolve temp file: " << _tieResolvedCountFile->fileName()); if (_keepTempFiles) { LOG_WARN("Keeping temp file: " << _tieResolvedCountFile->fileName()); } if (!_dedupedCountFile->open()) { throw HootException( QObject::tr("Error opening %1 for reading.").arg(_dedupedCountFile->fileName())); } long lineCount = 0; long duplicateResolutions = 0; while (!_dedupedCountFile->atEnd()) { const QString line = QString::fromUtf8(_dedupedCountFile->readLine().constData()).trimmed(); LOG_VART(line); const QStringList lineParts = line.split("\t"); LOG_VART(lineParts); QString word = lineParts[1].trimmed(); LOG_VART(word); const QString kvp = lineParts[2].trimmed(); LOG_VART(kvp); const QString countStr = lineParts[0].trimmed(); const long count = countStr.toLong(); LOG_VART(count); const QStringList kvpParts = kvp.split("="); const QString tagKey = kvpParts[0]; LOG_VART(tagKey); const QString wordTagKey = word.trimmed() % ";" % tagKey.trimmed(); LOG_VART(wordTagKey); const QString wordTagKeyCount = word.trimmed() % ";" % tagKey.trimmed() % ";" % countStr.trimmed(); LOG_VART(wordTagKeyCount); const QString tagValue = kvpParts[1]; LOG_VART(tagValue); if (_duplicatedWordTagKeyCountsToValues.contains(wordTagKeyCount)) { LOG_TRACE("Resolving duplicated word/tag key/count for " << wordTagKeyCount << "..."); //To resolve the tie, we're going to pick the most specific kvp. e.g. amenity=public_hall //wins out of amenity=hall. This is not really dealing with same hierarchy level tags //(e.g. amenity=school and amenity=hall) and will just arbitrarily pick in that situation. //Duplicates do seem to be fairly rare, but there could be some perfomance gains by coming //up with a better way to handle this situation. QString lineWithMostSpecificKvp = line % "\n"; const QStringList tagValues = _duplicatedWordTagKeyCountsToValues[wordTagKeyCount]; for (int i = 0; i < tagValues.size(); i++) { const QString childKvp = tagKey % "=" % tagValues[i]; if (OsmSchema::getInstance().isAncestor(childKvp, tagKey % "=" % tagValue)) { lineWithMostSpecificKvp = countStr % "\t" % word % "\t" % childKvp % "\n"; } } LOG_VART(lineWithMostSpecificKvp); _tieResolvedCountFile->write(lineWithMostSpecificKvp.toUtf8()); duplicateResolutions++; } else { const QString updatedLine = countStr % "\t" % word % "\t" % kvp % "\n"; LOG_VART(updatedLine); _tieResolvedCountFile->write(updatedLine.toUtf8()); } lineCount++; if (lineCount % (_statusUpdateInterval * 10) == 0) { PROGRESS_INFO( "Parsed " << StringUtils::formatLargeNumber(lineCount) << " lines from input for duplicated tag key count ties."); } } LOG_VARD(lineCount); LOG_INFO( "Resolved " << StringUtils::formatLargeNumber(duplicateResolutions) << " word/tag key/count ties."); _duplicatedWordTagKeyCountsToValues.clear(); _tieResolvedCountFile->close(); }