oClub *RunnerDB::getClub(const string &name) const { setupCNHash(); vector<string> names; canonizeSplitName(name, names); vector< vector<int> > ix(names.size()); set<int> iset; for (size_t k = 0; k<names.size(); k++) { multimap<string, int>::const_iterator it = cnhash.find(names[k]); while (it != cnhash.end() && names[k] == it->first) { ix[k].push_back(it->second); ++it; } if (ix[k].size() == 1 && names[k].length()>3) return pClub(&cdb[ix[k][0]]); if (iset.empty()) iset.insert(ix[k].begin(), ix[k].end()); else { set<int> im; for (size_t j = 0; j<ix[k].size(); j++) { if (iset.count(ix[k][j])==1) im.insert(ix[k][j]); } if (im.size() == 1) { int i = *im.begin(); return pClub(&cdb[i]); } else if (!im.empty()) swap(iset, im); } } // Exact compare for (set<int>::iterator it = iset.begin(); it != iset.end(); ++it) { pClub pc = pClub(&cdb[*it]); if (_stricmp(pc->getName().c_str(), name.c_str())==0) return pc; } string cname = canonizeName(name.c_str()); // Looser compare for (set<int>::iterator it = iset.begin(); it != iset.end(); ++it) { pClub pc = pClub(&cdb[*it]); if (strcmp(canonizeName(pc->getName().c_str()), cname.c_str()) == 0 ) return pc; } double best = 1; double secondBest = 1; int bestIndex = -1; for (set<int>::iterator it = iset.begin(); it != iset.end(); ++it) { pClub pc = pClub(&cdb[*it]); double d = stringDistance(cname.c_str(), canonizeName(pc->getName().c_str())); if (d<best) { bestIndex = *it; secondBest = best; best = d; } else if (d<secondBest) { secondBest = d; if (d<=0.4) return 0; // Two possible clubs are too close. We cannot choose. } } if (best < 0.2 && secondBest>0.4) return pClub(&cdb[bestIndex]); return 0; }
int main() { stringDistance("appropriate meaning", "approximate matching"); }