QString MostEnglishName::getMostEnglishName(const Tags& tags) { if (tags.contains("name:en") && tags.get("name:en").isEmpty() == false) { return tags.get("name:en"); } QStringList names = tags.getNames(); double bestScore = -numeric_limits<double>::max(); QString bestName; for (int i = 0; i < names.size(); i++) { double score = scoreName(names[i]); if (score > bestScore) { bestScore = score; bestName = names[i]; } } return bestName; }
void TagComparator::compareNames(const Tags& t1, const Tags& t2, double& score, double& weight, bool strict) { //double score = LevenshteinDistance::score(); // Check out picard's coefficient // or sum(top scores) / min(t1.count, t2.count) // score for the same and score for different score = 0; weight = 0; QStringList n1 = t1.getNames(); QStringList n2 = t2.getNames(); for (int i = 0; i < n1.size(); i++) { n1[i] = Translator::getInstance().translateStreet(n1[i]); } for (int i = 0; i < n2.size(); i++) { n2[i] = Translator::getInstance().translateStreet(n2[i]); } priority_queue<Entry, deque<Entry>, Entry> heap; // create a n x m matrix of scores vector< vector<double> > scores; scores.resize(n1.size()); Entry e; for (int i = 0; i < n1.size(); i++) { scores[i].resize(n2.size()); e.i = i; for (int j = 0; j < n2.size(); j++) { e.j = j; e.score = LevenshteinDistance::score(n1[i], n2[j]); //LOG_INFO("n1: " << n1[i].toStdString() << " n2: " << n2[j].toStdString() << " " << e.score); heap.push(e); } } int scoreCount = (int)((double)std::min(n1.size(), n2.size()) / 2.0 + 0.5); weight = scoreCount; set<int> used1; set<int> used2; while (scoreCount > 0) { assert(heap.size() > 0); e = heap.top(); heap.pop(); if (used1.find(e.i) == used1.end() && used2.find(e.j) == used2.end()) { //LOG_DEBUG(" " << n1[e.i].toStdString() << ", " << n2[e.j].toStdString() << " " << e.score); score += e.score; used1.insert(e.i); used2.insert(e.j); scoreCount--; } } if (weight > 0) { score /= weight; } // if this is strict checking and one entry doesn't have a name. else if (strict && (n1.size() > 0) != (n2.size() > 0)) { score = 0.2; } else { score = 1; } }