示例#1
0
oClub *RunnerDB::getClub(const string &name) const
{
  setupCNHash();
  vector<string> names;
  canonizeSplitName(name, names);
  vector< vector<int> > ix(names.size());
  set<int> iset;

  for (size_t k = 0; k<names.size(); k++) {
    multimap<string, int>::const_iterator it = cnhash.find(names[k]);

    while (it != cnhash.end() && names[k] == it->first) {
      ix[k].push_back(it->second);
      ++it;
    }

    if (ix[k].size() == 1 && names[k].length()>3)
      return pClub(&cdb[ix[k][0]]);

    if (iset.empty())
      iset.insert(ix[k].begin(), ix[k].end());
    else {
      set<int> im;
      for (size_t j = 0; j<ix[k].size(); j++) {
        if (iset.count(ix[k][j])==1)
          im.insert(ix[k][j]);
      }
      if (im.size() == 1) {
        int i = *im.begin();
        return pClub(&cdb[i]);
      }
      else if (!im.empty())
        swap(iset, im);
    }
  }

  // Exact compare
  for (set<int>::iterator it = iset.begin(); it != iset.end(); ++it) {
    pClub pc = pClub(&cdb[*it]);
    if (_stricmp(pc->getName().c_str(), name.c_str())==0)
      return pc;
  }

  string cname = canonizeName(name.c_str());
  // Looser compare
  for (set<int>::iterator it = iset.begin(); it != iset.end(); ++it) {
    pClub pc = pClub(&cdb[*it]);
    if (strcmp(canonizeName(pc->getName().c_str()), cname.c_str()) == 0 )
      return pc;
  }

  double best = 1;
  double secondBest = 1;
  int bestIndex = -1;
  for (set<int>::iterator it = iset.begin(); it != iset.end(); ++it) {
    pClub pc = pClub(&cdb[*it]);

    double d = stringDistance(cname.c_str(), canonizeName(pc->getName().c_str()));

    if (d<best) {
      bestIndex = *it;
      secondBest = best;
      best = d;
    }
    else if (d<secondBest) {
      secondBest = d;
      if (d<=0.4)
        return 0; // Two possible clubs are too close. We cannot choose.
    }
  }

  if (best < 0.2 && secondBest>0.4)
    return pClub(&cdb[bestIndex]);

  return 0;
}
int main()
{
	stringDistance("appropriate meaning", "approximate matching");
}