Ejemplo n.º 1
0
bool IoUtils::isSupportedOgrFormat(const QString input, const bool allowDir)
{
  LOG_VART(input);
  LOG_VART(allowDir);

  if (!allowDir && QFileInfo(input).isDir())
  {
    return false;
  }

  LOG_VART(QFileInfo(input).isDir());
  //input is a dir; only accepting a dir as input if it contains a shape file or is a file geodb
  if (QFileInfo(input).isDir())
  {
    return input.toLower().endsWith(".gdb") ||
           FileUtils::dirContainsFileWithExtension(QFileInfo(input).dir(), "shp");
  }
  //single input
  else
  {
    //The only zip file format we support are ones containing OGR inputs.
    if (input.toLower().endsWith(".zip") ||
        //We only support this type of postgres URL for OGR inputs.
        input.toLower().startsWith("pg:") ||
        // Or, OGDI Vectors. Things like VPF (DNC, VMAP etc)
        input.toLower().startsWith("gltp:"))
    {
      return true;
    }
    LOG_VART(OgrUtilities::getInstance().getSupportedFormats(false));
    LOG_VART(QFileInfo(input).suffix());
    return OgrUtilities::getInstance().getSupportedFormats(false)
             .contains("." + QFileInfo(input).suffix());
  }
}
Ejemplo n.º 2
0
StatusUpdateVisitor::StatusUpdateVisitor(Status status, bool onlyUpdateIfStatusInvalid) :
_status(status),
_onlyUpdateIfStatusInvalid(onlyUpdateIfStatusInvalid)
{
  LOG_VART(_status);
  LOG_VART(_onlyUpdateIfStatusInvalid);
}
Ejemplo n.º 3
0
double AverageNumericTagsVisitor::getStat() const
{
  LOG_VART(_sum);
  LOG_VART(_tagCount);
  if (_sum > 0.0 && _tagCount > 0)
  {
    return _sum / _tagCount;
  }
  return 0.0;
}
Ejemplo n.º 4
0
boost::shared_ptr<OsmMapReader> OsmMapReaderFactory::createReader(bool useDataSourceIds,
                                                                  bool useFileStatus, QString url)
{
  LOG_VART(url);
  LOG_VART(useDataSourceIds);
  LOG_VART(useFileStatus);

  boost::shared_ptr<OsmMapReader> reader = _createReader(url);
  reader->setUseDataSourceIds(useDataSourceIds);
  reader->setUseFileStatus(useFileStatus);
  return reader;
}
Ejemplo n.º 5
0
boost::shared_ptr<OsmMapReader> OsmMapReaderFactory::createReader(QString url,
                                                                  bool useDataSourceIds,
                                                                  Status defaultStatus)
{
  LOG_VART(url);
  LOG_VART(useDataSourceIds);
  LOG_VART(defaultStatus);

  boost::shared_ptr<OsmMapReader> reader = _createReader(url);
  reader->setUseDataSourceIds(useDataSourceIds);
  reader->setDefaultStatus(defaultStatus);
  return reader;
}
Ejemplo n.º 6
0
void TagRenameKeyVisitor::visit(const boost::shared_ptr<Element>& e)
{
  LOG_VART(_oldKey);
  LOG_VART(_newKey);

  if (e->getTags().contains(_oldKey))
  {
    Tags& tags = e->getTags();
    const QString value = tags.get(_oldKey);
    tags.remove(_oldKey);
    tags.insert(_newKey, value);
    e->setTags(tags);
  }
}
Ejemplo n.º 7
0
ScriptTranslator* ScriptTranslatorFactory::createTranslator(QString scriptPath)
{
  LOG_VARD(scriptPath);

  _init();

  vector<ScriptTranslator*> st;
  for (size_t i = 0; i < _translators.size(); ++i)
  {
    LOG_VART(_translators[i]);
    st.push_back(Factory::getInstance().constructObject<ScriptTranslator>(_translators[i]));
  }

  sort(st.begin(), st.end(), CompareSt);
  LOG_VART(st);

  ScriptTranslator* result = 0;
  for (size_t i = 0; i < st.size(); ++i)
  {
    try
    {
      st[i]->setScript(scriptPath);
      LOG_VART(st[i]->isValidScript());
      if (result == 0 && st[i]->isValidScript())
      {
        result = st[i];
        LOG_TRACE("Found a valid translator: " + _translators[i]);
        break;
      }
      else
      {
        delete st[i];
      }
      LOG_VART(result);
    }
    catch (...)
    {
      LOG_WARN("isValidScript shouldn't throw an exception.");
      delete st[i];
    }
  }

  if (result == 0)
  {
    throw HootException("Unable to find an appropriate scripting language for: " + scriptPath);
  }

  return result;
}
Ejemplo n.º 8
0
void StatusUpdateVisitor::setConfiguration(const Settings& conf)
{
  ConfigOptions configOptions(conf);
  _onlyUpdateIfStatusInvalid = configOptions.getStatusUpdateVisitorOnlyUpdateInvalidStatus();
  if (configOptions.getStatusUpdateVisitorStatus().trimmed().isEmpty())
  {
    _status = Status::Invalid;
  }
  else
  {
    _status = Status::fromString(configOptions.getStatusUpdateVisitorStatus());
  }
  LOG_VART(_status);
  LOG_VART(_onlyUpdateIfStatusInvalid);
}
Ejemplo n.º 9
0
void ImplicitTagRulesDatabaseDeriver::_writeCustomRules(long& linesWrittenCount)
{
  // would like to know somehow if any of the custom rules overlap with the db derived
  // rules from the public data - #2300

  LOG_DEBUG("Writing custom rules...");
  long ruleCount = 0;
  LOG_VARD(_customRules.getCustomRulesList().size());
  if (_customRules.getCustomRulesList().size() > 0)
  {
    const QMap<QString, QString> customRulesList = _customRules.getCustomRulesList();
    for (QMap<QString, QString>::const_iterator customRulesItr = customRulesList.begin();
         customRulesItr != customRulesList.end(); ++customRulesItr)
    {
      const QString line =
        QString::number(INT_MAX) % "\t" % customRulesItr.key().trimmed() % "\t" %
        customRulesItr.value().trimmed() % "\n";
      LOG_VART(line);
      _filteredCountFile->write(line.toUtf8());
      ruleCount++;
      linesWrittenCount++;
    }
    LOG_INFO("Wrote " << ruleCount << " custom rules.");
  }
}
Ejemplo n.º 10
0
boost::shared_ptr<Element> GeometryConverter::convertGeometryToElement(const Geometry* g, Status s,
  double circularError)
{
  LOG_VART(g->getGeometryTypeId());
  switch (g->getGeometryTypeId())
  {
  case GEOS_POINT:
    return convertPointToNode(dynamic_cast<const Point*>(g), _map, s, circularError);
  case GEOS_LINESTRING:
  case GEOS_LINEARRING:
    return convertLineStringToWay(dynamic_cast<const LineString*>(g), _map, s, circularError);
  case GEOS_POLYGON:
    return convertPolygonToElement(dynamic_cast<const Polygon*>(g), _map, s, circularError);
  case GEOS_MULTILINESTRING:
    return
      convertMultiLineStringToElement(
        dynamic_cast<const MultiLineString*>(g), _map, s, circularError);
  case GEOS_MULTIPOLYGON:
    return
      convertMultiPolygonToRelation(dynamic_cast<const MultiPolygon*>(g), _map, s, circularError);
  case GEOS_GEOMETRYCOLLECTION:
    return convertGeometryCollection(dynamic_cast<const GeometryCollection*>(g), s, circularError);
  default:
    if (logWarnCount < Log::getWarnMessageLimit())
    {
      LOG_WARN("Unsupported geometry type. Element will be removed from the map. " + g->toString());
    }
    else if (logWarnCount == Log::getWarnMessageLimit())
    {
      LOG_WARN(className() << ": " << Log::LOG_WARN_LIMIT_REACHED_MESSAGE);
    }
    logWarnCount++;
    return boost::shared_ptr<Element>();
  }
}
Ejemplo n.º 11
0
void ImplicitTagRawRulesDeriver::setConfiguration(const Settings& conf)
{
  ConfigOptions options = ConfigOptions(conf);

  setSortParallelCount(options.getImplicitTaggingRawRulesDeriverSortParallelCount());
  const int idealThreads = QThread::idealThreadCount();
  LOG_VART(idealThreads);
  if (_sortParallelCount < 1 || _sortParallelCount > idealThreads)
  {
    setSortParallelCount(idealThreads);
  }
  setSkipFiltering(options.getImplicitTaggingRawRulesDeriverSkipFiltering());
  setKeepTempFiles(options.getImplicitTaggingKeepTempFiles());
  setTempFileDir(options.getApidbBulkInserterTempFileDir());
  setTranslateNamesToEnglish(options.getImplicitTaggingDatabaseDeriverTranslateNamesToEnglish());
  setElementCriterion(options.getImplicitTaggingElementCriterion());

  if (_translateNamesToEnglish)
  {
    _translator.reset(
      Factory::getInstance().constructObject<ToEnglishTranslator>(
        options.getLanguageTranslationTranslator()));
    _translator->setConfiguration(conf);
    _translator->setSourceLanguages(options.getLanguageTranslationSourceLanguages());
    _translator->setId("ImplicitTagRawRulesDeriver");
  }
}
Ejemplo n.º 12
0
boost::shared_ptr<GDALDataset> OgrUtilities::openDataSource(const QString url, bool readonly)
{
  /* Check for the correct driver name, if unknown try all drivers.
   * This can be an issue because drivers are tried in the order that they are
   * loaded which has been known to cause issues.
   */
  OgrDriverInfo driverInfo = getDriverInfo(url, readonly);

  // With GDALOpenEx, we need to specify the GDAL_OF_UPDATE option or the dataset will get opened
  // Read Only.
  if (! readonly)
  {
    driverInfo._driverType = driverInfo._driverType | GDAL_OF_UPDATE;
  }
  LOG_VART(driverInfo._driverName);
  LOG_VART(driverInfo._driverType);
  LOG_VART(url.toUtf8().data());

  const char* drivers[2] = { driverInfo._driverName, NULL };

  // Setup read options for various file types
  OgrOptions options;
  if (QString(driverInfo._driverName) == "CSV")
  {
    options["X_POSSIBLE_NAMES"] = ConfigOptions().getOgrReaderCsvLonfield();
    options["Y_POSSIBLE_NAMES"] = ConfigOptions().getOgrReaderCsvLatfield();
//    options["Z_POSSIBLE_NAMES"] = ConfigOptions().getOgrReaderCsvZfield();
    options["KEEP_GEOM_COLUMNS"] = ConfigOptions().getOgrReaderCsvKeepGeomFields();
  }
  if (QString(driverInfo._driverName) == "OGR_OGDI")
  {
    // From the GDAL docs:
    // From GDAL/OGR 1.8.0, setting the OGR_OGDI_LAUNDER_LAYER_NAMES configuration option
    // (or environment variable) to YES causes the layer names to be simplified.
    // For example : watrcrsl_hydro instead of 'watrcrsl@hydro(*)_line'
    options["OGR_OGDI_LAUNDER_LAYER_NAMES"] = ConfigOptions().getOgrReaderOgdiLaunderLayerNames();
  }

  boost::shared_ptr<GDALDataset> result(static_cast<GDALDataset*>(GDALOpenEx(url.toUtf8().data(),
    driverInfo._driverType, (driverInfo._driverName != NULL ? drivers : NULL), options.getCrypticOptions(), NULL)));

  if (!result)
    throw HootException("Unable to open: " + url);

  return result;
}
Ejemplo n.º 13
0
boost::shared_ptr<HootNetworkCookieJar> NetworkIoUtils::getUserSessionCookie(
  const QString userName, const QString accessToken, const QString accessTokenSecret,
  const QString url)
{
  LOG_VART(userName);
  LOG_VART(accessToken);
  LOG_VART(url);

  HootApiDb db;
  LOG_VART(HootApiDb::getBaseUrl());
  //hoot db requires a layer to open, but we don't need one here...so put anything in
  QUrl dbUrl(HootApiDb::getBaseUrl().toString() + "/blah");
  db.open(dbUrl);
  const QString sessionId = db.getSessionIdByAccessTokens(userName, accessToken, accessTokenSecret);
  LOG_VART(sessionId);
  db.close();
  if (sessionId.isEmpty())
  {
    throw HootException("User: "******" has not been authenticated.");
  }

  boost::shared_ptr<HootNetworkCookieJar> cookieJar(new HootNetworkCookieJar());
  QList<QNetworkCookie> cookies;
  QNetworkCookie sessionCookie(QString("SESSION").toUtf8(), sessionId.toUtf8());
  cookies.append(sessionCookie);
  cookieJar->setCookiesFromUrl(cookies, url);
  LOG_VART(cookieJar->size());
  LOG_VART(cookieJar->toString());
  return cookieJar;
}
Ejemplo n.º 14
0
void ImplicitTagRulesDatabaseDeriver::_populateSchemaTagValues()
{
  _schemaTagValues.clear();
  _wordsNotInSchema.clear();

  // TODO: should the use and/or building categories be added here?
  const std::vector<SchemaVertex> tags =
    OsmSchema::getInstance().getTagByCategory(OsmSchemaCategory::poi());
  StringTokenizer tokenizer;
  for (std::vector<SchemaVertex>::const_iterator tagItr = tags.begin();
       tagItr != tags.end(); ++tagItr)
  {
    SchemaVertex tag = *tagItr;
    const QString tagVal = tag.value.toLower().replace("_", " ");
    if (!tagVal.contains("*"))  //skip wildcards
    {
      if (!_customRules.getWordIgnoreList().contains(tagVal, Qt::CaseInsensitive))
      {
        _schemaTagValues.insert(tagVal);
        //dealing with the uk english spellings on an as seen basis; this should be expanded and
        //made more extensible
        if (tagVal == "theatre")
        {
          _schemaTagValues.insert("theater");
        }
        if (tagVal == "centre")
        {
          _schemaTagValues.insert("center");
        }
        LOG_TRACE("Appended " << tagVal << " to schema tag values.");
      }
      QStringList vals = tokenizer.tokenize(tagVal);
      for (int i = 0; i < vals.size(); i++)
      {
        const QString val = vals.at(i);
        if (!_customRules.getWordIgnoreList().contains(val, Qt::CaseInsensitive))
        {
          _schemaTagValues.insert(val);
          //see comment above
          if (val == "theatre")
          {
            _schemaTagValues.insert("theater");
          }
          if (val == "centre")
          {
            _schemaTagValues.insert("center");
          }
          LOG_TRACE("Appended " << val << " to schema tag values.");
        }
      }
    }
  }
  LOG_VARD(_schemaTagValues.size());
  QStringList schemaTagValuesList = _schemaTagValues.toList();
  qSort(schemaTagValuesList.begin(), schemaTagValuesList.end()); //sort for viewing only
  LOG_VART(schemaTagValuesList);
}
Ejemplo n.º 15
0
void ToEnglishTranslationVisitor::visit(const boost::shared_ptr<Element>& e)
{
  if (_tagKeys.isEmpty())
  {
    throw HootException("No tag keys specified for language translation.");
  }

  LOG_VART(e);

  //if this var was set while parsing the previous element, increment the counter now
  if (_currentElementHasSuccessfulTagTranslation)
  {
    _numElementsWithSuccessfulTagTranslation++;
  }
  _currentElementHasSuccessfulTagTranslation = false;

  const Tags& tags = e->getTags();
  bool elementProcessed = false;
  for (QSet<QString>::const_iterator tagKeysItr = _tagKeys.begin();
       tagKeysItr != _tagKeys.end(); ++tagKeysItr)
  {
    const QString toTranslateTagKey = *tagKeysItr;
    if (tags.contains(toTranslateTagKey))
    {     
      //making skipping tags that already have an english translated tag optional, b/c a many of the
      //OSM english translations I've seen are either just copies of the foreign language text or are
      //not very good translations
      const QString preTranslatedTagKey = toTranslateTagKey + ":en";
      if (!_ignorePreTranslatedTags && tags.contains(preTranslatedTagKey))
      {
        LOG_TRACE(
          "Skipping element with pre-translated tag: " << preTranslatedTagKey << "=" <<
          tags.get(toTranslateTagKey).trimmed());
      }
      else
      {
        _translate(e, toTranslateTagKey);
        elementProcessed = true;
      }
    }
  }

  if (elementProcessed)
  {
    _numProcessedElements++;
    if (_numProcessedElements % _taskStatusUpdateInterval == 0)
    {
      PROGRESS_INFO("Attempted tag translation for " << _numProcessedElements << " elements.");
    }
  }

  _numTotalElements++;
  if (_numTotalElements % _taskStatusUpdateInterval == 0)
  {
    PROGRESS_INFO("Visited " << _numTotalElements << " elements.");
  }
}
Ejemplo n.º 16
0
void StatusUpdateVisitor::visit(const boost::shared_ptr<Element>& e)
{
  LOG_VART(e->getStatus());
  if (_onlyUpdateIfStatusInvalid && e->getStatus() != Status::Invalid)
  {
    return;
  }

  e->setStatus(_status);
  e->getTags()[MetadataTags::HootStatus()] = _status.toString();
}
Ejemplo n.º 17
0
bool IoUtils::areSupportedOgrFormats(const QStringList inputs, const bool allowDir)
{
  if (inputs.size() == 0)
  {
    return false;
  }

  for (int i = 0; i < inputs.size(); i++)
  {
    const QString input = inputs.at(i);
    LOG_VART(input);
    const QString file = input.split(";")[0];
    LOG_VART(file);
    if (!isSupportedOgrFormat(file, allowDir))
    {
      return false;
    }
  }
  return true;
}
Ejemplo n.º 18
0
void ImplicitTagRawRulesDeriver::_parseNames(const QStringList names, const QStringList kvps)
{
  for (int i = 0; i < names.size(); i++)
  {
    QString name = names.at(i);
    LOG_VART(name);

    //'=' is used in the map key for kvps, so it needs to be escaped in the word
    if (name.contains("="))
    {
      name = name.replace("=", "%3D");
    }
    for (int j = 0; j < kvps.size(); j++)
    {
      _updateForNewWord(name, kvps.at(j));
    }

    QStringList nameTokens = _tokenizer.tokenize(name);
    LOG_VART(nameTokens.size());

    //tokenization

    for (int j = 0; j < nameTokens.size(); j++)
    {
      QString nameToken = nameTokens.at(j);
      _parseNameToken(nameToken, kvps);
    }

    //going up to a token group size of two; tested up to group size three, but three didn't seem to
    //yield any better tagging results
    if (nameTokens.size() > 2)
    {
      for (int j = 0; j < nameTokens.size() - 1; j++)
      {
        QString nameToken = nameTokens.at(j) + " " + nameTokens.at(j + 1);
        _parseNameToken(nameToken, kvps);
      }
    }
  }
}
Ejemplo n.º 19
0
void ImplicitTagRawRulesDeriver::_parseNameToken(QString& nameToken, const QStringList kvps)
{
  //may eventually need to replace more punctuation chars here, but this is fine for now...need a
  //more extensible way to do it; also, that logic could moved into ImplicitTagUtils::cleanName
  nameToken = nameToken.replace(",", "");
  LOG_VART(nameToken);

  if (_translateNamesToEnglish)
  {
    const QString englishNameToken = _translator->translate(nameToken);
    LOG_VART(englishNameToken);
    if (!englishNameToken.isEmpty())
    {
      nameToken = englishNameToken;
    }
  }

  for (int k = 0; k < kvps.size(); k++)
  {
    _updateForNewWord(nameToken, kvps.at(k));
  }
}
Ejemplo n.º 20
0
bool WaySublineMatchString::isValid() const
{
  if (_matches.size() > 0)
  {
    for (size_t i = 0; i < _matches.size(); i++)
    {
      const WaySublineMatch& m = _matches[i];
      LOG_VART(m.getSubline1());
      LOG_VART(m.getSubline2());
      if (m.getSubline1().isZeroLength() || m.getSubline2().isZeroLength())
      {
        return false;
      }
    }

    return true;
  }
  else
  {
    return false;
  }
}
Ejemplo n.º 21
0
QString AddressTranslator::translateToEnglish(const QString address) const
{
  const QStringList addressParts = address.simplified().split(" ");
  //Try to translate blocks of consecutive address tokens to cut down on the number of
  //translation calls made.
  QString combinedAddressPartToTranslate = "";
  QStringList combinedAddressPartsToTranslate;
  for (int i = 0; i < addressParts.size(); i++)
  {
    const QString addressPart = addressParts.at(i);
    LOG_VART(addressPart);
    if (!StringUtils::isNumber(addressPart))
    {
      combinedAddressPartToTranslate += " " + addressPart;
      LOG_VART(combinedAddressPartToTranslate);
    }
    else if (!combinedAddressPartToTranslate.isEmpty())
    {
      combinedAddressPartsToTranslate.append(combinedAddressPartToTranslate.trimmed());
      combinedAddressPartToTranslate = "";
    }
    LOG_VART(combinedAddressPartsToTranslate);
  }
  if (!combinedAddressPartToTranslate.isEmpty())
  {
    combinedAddressPartsToTranslate.append(combinedAddressPartToTranslate.trimmed());
  }
  LOG_VART(combinedAddressPartsToTranslate);

  bool anyAddressPartWasTranslated = false;
  QString translatedAddress = address;
  for (int i = 0; i < combinedAddressPartsToTranslate.size(); i++)
  {
    const QString combinedAddressPart = combinedAddressPartsToTranslate.at(i).trimmed();
    LOG_VART(combinedAddressPart);
    const QString translatedCombinedAddressPart = _translator->translate(combinedAddressPart);
    if (!translatedCombinedAddressPart.isEmpty())
    {
      translatedAddress =
        translatedAddress.replace(combinedAddressPart, translatedCombinedAddressPart);
      LOG_VART(translatedAddress);
      anyAddressPartWasTranslated = true;
    }
  }

  if (anyAddressPartWasTranslated)
  {
    LOG_TRACE("Translated address from " << address << " to " << translatedAddress);
    return translatedAddress;
  }
  else
  {
    LOG_TRACE("Address " << address << " could not be translated.");
    return "";
  }
}
Ejemplo n.º 22
0
WaySublineMatchString::WaySublineMatchString(const MatchCollection& m)
{
  for (size_t i = 0; i < m.size(); i++)
  {
    for (size_t j = 0; j < m.size(); j++)
    {
      if (i != j && m[i].overlaps(m[j]))
      {
        LOG_VART(m);
        throw OverlappingMatchesException(
          "The match collection must not contain overlapping matches.");
      }
    }
  }
  _matches = m;
}
Ejemplo n.º 23
0
QString OsmMapReaderFactory::getReaderName(const QString url)
{
  LOG_VARD(url);
  vector<std::string> names =
    Factory::getInstance().getObjectNamesByBase(OsmMapReader::className());
  LOG_VARD(names.size());
  boost::shared_ptr<OsmMapReader> writer;
  for (size_t i = 0; i < names.size(); i++)
  {
    const std::string name = names[i];
    LOG_VART(name);
    writer.reset(Factory::getInstance().constructObject<OsmMapReader>(name));
    if (writer->isSupported(url))
    {
      return QString::fromStdString(name);
    }
  }
  return "";
}
Ejemplo n.º 24
0
void ImplicitTagRawRulesDeriver::_sortByTagOccurrence()
{
  LOG_INFO("Sorting output by tag occurrence count...");
  LOG_VART(_sortParallelCount);

  _sortedCountFile.reset(
    new QTemporaryFile(_tempFileDir + "/implicit-tag-raw-rules-generator-temp-XXXXXX"));
  _sortedCountFile->setAutoRemove(!_keepTempFiles);
  if (!_sortedCountFile->open())
  {
    throw HootException(
      QObject::tr("Error opening %1 for writing.").arg(_sortedCountFile->fileName()));
  }
  LOG_DEBUG("Opened sorted temp file: " << _sortedCountFile->fileName());
  if (_keepTempFiles)
  {
    LOG_WARN("Keeping temp file: " << _sortedCountFile->fileName());
  }
  if (!_countFile->exists())
  {
    throw HootException("Unable to sort file; file doesn't exist.");
  }

  //This counts each unique line occurrence, sorts by decreasing occurrence count (necessary for
  //next step which removes duplicate tag keys associated with the same word), and replaces the
  //space between the prepended count and the word with a tab.

  //sort by highest count, then by word, then by tag
  const QString cmd =
    "sort --parallel=" + QString::number(_sortParallelCount) + " " + _countFile->fileName() +
    " | uniq -c | sort -n -r --parallel=" + QString::number(_sortParallelCount) + " | " +
    "sed -e 's/^ *//;s/ /\\t/' > " + _sortedCountFile->fileName();
  if (std::system(cmd.toStdString().c_str()) != 0)
  {
    throw HootException("Unable to sort file.");
  }
  LOG_INFO(
    "Wrote " <<
    StringUtils::formatLargeNumber(FileUtils::getNumberOfLinesInFile(_sortedCountFile->fileName())) <<
    " lines to sorted file.");
}
Ejemplo n.º 25
0
QStringList TagListReader::readList(const QString inputPath, const bool keysOnly)
{  
  LOG_VARD(inputPath);
  QStringList outputList;
  if (!inputPath.trimmed().isEmpty())
  {
    QFile inputFile(inputPath);
    if (!inputFile.open(QIODevice::ReadOnly))
    {
      throw HootException(QObject::tr("Error opening %1 for writing.").arg(inputFile.fileName()));
    }
    while (!inputFile.atEnd())
    {
      const QString line = QString::fromUtf8(inputFile.readLine().constData()).trimmed();
      if (!line.trimmed().isEmpty() && !line.startsWith("#") && (keysOnly || line.contains("=")))
      {
        outputList.append(line.toLower());
      }
    }
    inputFile.close();
  }
  LOG_VART(outputList);
  return outputList;
}
double SqliteWordWeightDictionary::getWeight(const QString& word) const
{
  LOG_VART(word);
  QString normalized = word.toLower().normalized(QString::NormalizationForm_C);
  normalized.replace(_nonWord, "");
  WeightHash::const_iterator it = _weights.find(normalized);

  long c;
  if (it == _weights.end())
  {
    long c = _reader.readCount(normalized);
    if (c > 0)
    {
      _weights[normalized] = c;
    }
    return c;
  }
  else
  {
    c = it->second;
  }

  return c / (double)_count;
}
Ejemplo n.º 27
0
void AverageNumericTagsVisitor::visit(const ConstElementPtr& e)
{
  for (int i = 0; i < _keys.size(); i++)
  {
    const QString key = _keys.at(i);
    LOG_VART(key);
    if (e->getTags().contains(key))
    {
      bool parsed = false;
      const QString strValue = e->getTags().get(key);
      LOG_VART(strValue);
      const double value = strValue.toDouble(&parsed);
      LOG_VART(value);
      LOG_VART(parsed);
      if (parsed)
      {
        _tagCount++;
        _sum += value;
        LOG_VART(_tagCount);
        LOG_VART(_sum);
      }
      else
      {
        if (logWarnCount < Log::getWarnMessageLimit())
        {
          LOG_WARN(
            "Unsuccessfully attempted to convert tag with key: " << key << " and value: " <<
            strValue << " to number.");
        }
        else if (logWarnCount == Log::getWarnMessageLimit())
        {
          LOG_WARN(className() << ": " << Log::LOG_WARN_LIMIT_REACHED_MESSAGE);
        }
        logWarnCount++;
      }
    }
  }
}
Ejemplo n.º 28
0
vector<Radians> NodeMatcher::calculateAngles(const OsmMap* map, long nid,
                                             const set<long>& wids, Meters delta)
{
  vector<Radians> result;
  result.reserve(wids.size());

  LOG_VART(nid);
  QSet<long> badWayIds;
  for (set<long>::const_iterator it = wids.begin(); it != wids.end(); ++it)
  {
    const ConstWayPtr& w = map->getWay(*it);
    LOG_VART(w->getId());
    LOG_VART(w->getLastNodeId());
    LOG_VART(w->getNodeId(0));

    if (!isNetworkFeatureType(w))
    {
      // if this isn't a feature from a specific list, then don't consider it.
      LOG_TRACE("calculateAngles skipping feature...");
    }
    else if (w->getNodeId(0) == nid)
    {
      LOG_TRACE("Start node: " << nid);
      WayLocation wl(map->shared_from_this(), w, 0, 0.0);
      Radians heading = WayHeading::calculateHeading(wl, delta);
      // This is the first node so the angle is an inbound angle, reverse the value.
      if (heading < 0.0)
      {
        heading += M_PI;
      }
      else
      {
        heading -= M_PI;
      }
      LOG_VART(heading);
      result.push_back(heading);
    }
    else if (w->getLastNodeId() == nid)
    {
      LOG_TRACE("End node: " << nid);
      WayLocation wl(map->shared_from_this(), w, w->getNodeCount() - 1, 1.0);
      Radians heading = WayHeading::calculateHeading(wl, delta);
      LOG_VART(heading);
      result.push_back(heading);
    }
    else
    {
      // count this as a bad spot. If we find some valid spots and some bad spots then that is an
      // error condition
      badWayIds.insert(w->getId());
    }
  }

  LOG_VART(badWayIds.size());
  LOG_VART(result.size());
  if (result.size() > 0 && badWayIds.size() > 0)
  {
    LOG_TRACE(
      "Found " << badWayIds.size() << " bad spot(s) in NodeMatcher when calculating angles " <<
      "with node: " << nid);
    LOG_TRACE("wids: " << badWayIds);
    for (QSet<long>::const_iterator it = badWayIds.begin(); it != badWayIds.end(); ++it)
    {
      const ConstWayPtr& w = map->getWay(*it);
      LOG_VART(w->getId());
      LOG_VART(w->getTags().get("REF1"));
      LOG_VART(w->getTags().get("REF2"));
      LOG_VART(w->getNodeIndex(nid));
      LOG_VART(w->getNodeId(0));
      LOG_VART(w->getLastNodeId());
    }

    if (ConfigOptions().getNodeMatcherFailOnBadAngleSpots())
    {
      throw HootException(
        QString("NodeMatcher::calculateAngles was called with a node that was not a start or ") +
        QString("end node on the specified way."));
    }
  }

  return result;
}
Ejemplo n.º 29
0
double NodeMatcher::scorePair(long nid1, long nid2)
{
  ConstNodePtr n1 = _map->getNode(nid1);
  ConstNodePtr n2 = _map->getNode(nid2);

  const set<long>& wids1 = _map->getIndex().getNodeToWayMap()->at(nid1);
  const set<long>& wids2 = _map->getIndex().getNodeToWayMap()->at(nid2);

  double acc = 0;
  for (set<long>::const_iterator it = wids1.begin(); it != wids1.end(); ++it)
  {
    acc = max(acc, _map->getWay(*it)->getCircularError());
  }
  for (set<long>::const_iterator it = wids2.begin(); it != wids2.end(); ++it)
  {
    acc = max(acc, _map->getWay(*it)->getCircularError());
  }

  vector<Radians> theta1 = calculateAngles(_map.get(), nid1, wids1, _delta);
  vector<Radians> theta2 = calculateAngles(_map.get(), nid2, wids2, _delta);

  int s1 = theta1.size();
  int s2 = theta2.size();

  if (s1 < 3 || s2 < 3)
  {
    return 0.0;
  }

  double d = n1->toCoordinate().distance(n2->toCoordinate());

  // TODO: this isnt right; Talk to mike
  double distanceScore = 1 - (Normal::phi(d, acc * 1.5) - 0.5) * 2.0;
  LOG_VART(nid1);
  LOG_VART(nid2);
  LOG_VART(distanceScore);
  LOG_VART(acc);
  LOG_VART(d);
  LOG_VART(Normal::phi(d, acc * 1.5));
  LOG_VART(Normal::phi(d, acc / 2.0));

  if (theta1.size() < theta2.size())
  {
    swap(theta1, theta2);
  }

  double thetaScore;
  // this is very unsual and will slow things down.
  if (theta1.size() > 6 && theta2.size() > 6)
  {
    if (logWarnCount < Log::getWarnMessageLimit())
    {
      LOG_WARN("Greater than seven intersections at one spot? Odd.  Giving it a high angleScore.");
    }
    else if (logWarnCount == Log::getWarnMessageLimit())
    {
      LOG_WARN(className() << ": " << Log::LOG_WARN_LIMIT_REACHED_MESSAGE);
    }
    logWarnCount++;
    LOG_VART(nid1);
    LOG_VART(nid2);
    LOG_VART(wids1);
    LOG_VART(wids2);
    thetaScore = 1.0;
  }
  else
  {
    if (theta2.size() < theta1.size())
    {
      vector<bool> exclude(theta2.size(), false);
      thetaScore = _calculateAngleScore(theta2, theta1, exclude, 0);
    }
    else
    {
      vector<bool> exclude(theta1.size(), false);
      thetaScore = _calculateAngleScore(theta1, theta2, exclude, 0);
    }
  }

  // simple stupid heuristic. Replace w/ some cosine fanciness later.
  int diff = abs((int)s1 - (int)s2);

  double result = (min(s1, s2) - diff) * thetaScore * distanceScore;

  LOG_VART(result);
  return result;
}
Ejemplo n.º 30
0
void ImplicitTagRawRulesDeriver::_resolveCountTies()
{
  //Any time more than one word/key combo has the same occurrence count, we need to pick just one
  //of them.

  LOG_INFO(
    "Resolving word/tag key/count ties for " <<
    StringUtils::formatLargeNumber(_duplicatedWordTagKeyCountsToValues.size()) <<
    " duplicated word/tag key/counts...");

  _tieResolvedCountFile.reset(
    new QTemporaryFile(
      _tempFileDir + "/implicit-tag-raw-rules-generator-temp-XXXXXX"));
  _tieResolvedCountFile->setAutoRemove(!_keepTempFiles);
  if (!_tieResolvedCountFile->open())
  {
    throw HootException(
      QObject::tr("Error opening %1 for writing.").arg(_tieResolvedCountFile->fileName()));
  }
  LOG_DEBUG("Opened tie resolve temp file: " << _tieResolvedCountFile->fileName());
  if (_keepTempFiles)
  {
    LOG_WARN("Keeping temp file: " << _tieResolvedCountFile->fileName());
  }
  if (!_dedupedCountFile->open())
  {
    throw HootException(
      QObject::tr("Error opening %1 for reading.").arg(_dedupedCountFile->fileName()));
  }

  long lineCount = 0;
  long duplicateResolutions = 0;
  while (!_dedupedCountFile->atEnd())
  {
    const QString line = QString::fromUtf8(_dedupedCountFile->readLine().constData()).trimmed();
    LOG_VART(line);
    const QStringList lineParts = line.split("\t");
    LOG_VART(lineParts);
    QString word = lineParts[1].trimmed();
    LOG_VART(word);
    const QString kvp = lineParts[2].trimmed();
    LOG_VART(kvp);
    const QString countStr = lineParts[0].trimmed();
    const long count = countStr.toLong();
    LOG_VART(count);
    const QStringList kvpParts = kvp.split("=");
    const QString tagKey = kvpParts[0];
    LOG_VART(tagKey);
    const QString wordTagKey = word.trimmed() % ";" % tagKey.trimmed();
    LOG_VART(wordTagKey);
    const QString wordTagKeyCount =
      word.trimmed() % ";" % tagKey.trimmed() % ";" % countStr.trimmed();
    LOG_VART(wordTagKeyCount);
    const QString tagValue = kvpParts[1];
    LOG_VART(tagValue);

    if (_duplicatedWordTagKeyCountsToValues.contains(wordTagKeyCount))
    {
      LOG_TRACE("Resolving duplicated word/tag key/count for " << wordTagKeyCount << "...");

      //To resolve the tie, we're going to pick the most specific kvp.  e.g. amenity=public_hall
      //wins out of amenity=hall.  This is not really dealing with same hierarchy level tags
      //(e.g. amenity=school and amenity=hall) and will just arbitrarily pick in that situation.
      //Duplicates do seem to be fairly rare, but there could be some perfomance gains by coming
      //up with a better way to handle this situation.
      QString lineWithMostSpecificKvp = line % "\n";
      const QStringList tagValues = _duplicatedWordTagKeyCountsToValues[wordTagKeyCount];
      for (int i = 0; i < tagValues.size(); i++)
      {
        const QString childKvp = tagKey % "=" % tagValues[i];
        if (OsmSchema::getInstance().isAncestor(childKvp, tagKey % "=" % tagValue))
        {
          lineWithMostSpecificKvp = countStr % "\t" % word % "\t" % childKvp % "\n";
        }
      }
      LOG_VART(lineWithMostSpecificKvp);
      _tieResolvedCountFile->write(lineWithMostSpecificKvp.toUtf8());
      duplicateResolutions++;
    }
    else
    {
      const QString updatedLine = countStr % "\t" % word % "\t" % kvp % "\n";
      LOG_VART(updatedLine);
      _tieResolvedCountFile->write(updatedLine.toUtf8());
    }

    lineCount++;
    if (lineCount % (_statusUpdateInterval * 10) == 0)
    {
      PROGRESS_INFO(
        "Parsed " << StringUtils::formatLargeNumber(lineCount) <<
        " lines from input for duplicated tag key count ties.");
    }
  }
  LOG_VARD(lineCount);
  LOG_INFO(
    "Resolved " << StringUtils::formatLargeNumber(duplicateResolutions) <<
    " word/tag key/count ties.");
  _duplicatedWordTagKeyCountsToValues.clear();
  _tieResolvedCountFile->close();
}