Example #1
0
QString MostEnglishName::getMostEnglishName(const Tags& tags)
{
  if (tags.contains("name:en") && tags.get("name:en").isEmpty() == false)
  {
    return tags.get("name:en");
  }

  QStringList names = tags.getNames();

  double bestScore = -numeric_limits<double>::max();
  QString bestName;

  for (int i = 0; i < names.size(); i++)
  {
    double score = scoreName(names[i]);

    if (score > bestScore)
    {
      bestScore = score;
      bestName = names[i];
    }
  }

  return bestName;
}
Example #2
0
void TagComparator::compareNames(const Tags& t1, const Tags& t2, double& score, double& weight,
                                 bool strict)
{
  //double score = LevenshteinDistance::score();
  // Check out picard's coefficient
  // or sum(top scores) / min(t1.count, t2.count)
  // score for the same and score for different
  score = 0;
  weight = 0;

  QStringList n1 = t1.getNames();
  QStringList n2 = t2.getNames();

  for (int i = 0; i < n1.size(); i++)
  {
    n1[i] = Translator::getInstance().translateStreet(n1[i]);
  }
  for (int i = 0; i < n2.size(); i++)
  {
    n2[i] = Translator::getInstance().translateStreet(n2[i]);
  }

  priority_queue<Entry, deque<Entry>, Entry> heap;

  // create a n x m matrix of scores
  vector< vector<double> > scores;
  scores.resize(n1.size());
  Entry e;
  for (int i = 0; i < n1.size(); i++)
  {
    scores[i].resize(n2.size());
    e.i = i;
    for (int j = 0; j < n2.size(); j++)
    {
      e.j = j;
      e.score = LevenshteinDistance::score(n1[i], n2[j]);
      //LOG_INFO("n1: " << n1[i].toStdString() << " n2: " << n2[j].toStdString() << " " << e.score);
      heap.push(e);
    }
  }

  int scoreCount = (int)((double)std::min(n1.size(), n2.size()) / 2.0 + 0.5);
  weight = scoreCount;

  set<int> used1;
  set<int> used2;

  while (scoreCount > 0)
  {
    assert(heap.size() > 0);

    e = heap.top();
    heap.pop();
    if (used1.find(e.i) == used1.end() && used2.find(e.j) == used2.end())
    {
      //LOG_DEBUG("  " << n1[e.i].toStdString() << ", " << n2[e.j].toStdString() << " " << e.score);
      score += e.score;
      used1.insert(e.i);
      used2.insert(e.j);
      scoreCount--;
    }
  }

  if (weight > 0)
  {
    score /= weight;
  }
  // if this is strict checking and one entry doesn't have a name.
  else if (strict && (n1.size() > 0) != (n2.size() > 0))
  {
    score = 0.2;
  }
  else
  {
    score = 1;
  }
}