bool CPokerTrackerThread::QueryName(const char * query_name, const char * scraped_name, char * best_name)
{
	int				lev_dist = 0, bestLD = 0, bestLDindex = 0;
	PGresult		*res = NULL;
	bool			result = false;
	CLevDistance	myLD;
	int				siteid = 0;
	double			Levenshtein_distance_matching_factor = 0.3;

	//No more unnecessary queries when we don't even have a siteid to check
	siteid = pt_lookup.GetSiteId();
	if (siteid == kUndefined)  return false;

	if (!_connected || PQstatus(_pgconn) != CONNECTION_OK)
		return false;

	if (0 == strlen(query_name))
		return false;

	CString query;
	query.Format("SELECT player_name FROM player WHERE player_name like '%s' AND id_site=%i",
		query_name, siteid);
	res = PQexec(_pgconn, query);

  // We got nothing, return false
	if (PQntuples(res) == 0)
	{
		result = false;
	}

	// If we get one tuple, we still check for Levenshtein distance if the name is short.
	// If the name is long it is probably displayed truncated so Levenshtein distance would be too high.
	if (PQntuples(res) == 1)
	{
		char *found_name = PQgetvalue(res, 0, 0);
		lev_dist = myLD.LD(scraped_name, found_name);
		if (	strlen(found_name) >= k_min_name_length_to_skip_lev_dist
			 || lev_dist <= (int)strlen(found_name) * Levenshtein_distance_matching_factor )
		{
			strncpy_s(best_name, kMaxLengthOfPlayername, found_name, _TRUNCATE);
			// Max length names are possibly truncated so we replace the last character by %.
			if (strlen(best_name)==(kMaxLengthOfPlayername-1)) {
				best_name[(kMaxLengthOfPlayername-2)] = '%';
			}
			result = true;
		}
		else {
			result = false;
		}
	}

	// We got more than one tuple, figure the Levenshtein distance for all of them
	// and return the best
	else if (PQntuples(res) > 1)
	{
		bestLD = 999;
		for (int i=0; i<PQntuples(res); i++)
		{
			lev_dist = myLD.LD(scraped_name, PQgetvalue(res, i, 0));

			if (lev_dist<bestLD && lev_dist<(int)strlen(PQgetvalue(res, i, 0))*Levenshtein_distance_matching_factor ) 
			{
				bestLD = lev_dist;
				bestLDindex = i;
			}
		}
		if (bestLD != 999)
		{
			strncpy_s(best_name, kMaxLengthOfPlayername, PQgetvalue(res, bestLDindex, 0), _TRUNCATE);
			// Max length names are possibly truncated so we replace the last character by %.
			if (strlen(best_name)==(kMaxLengthOfPlayername-1)) {
				best_name[(kMaxLengthOfPlayername-2)] = '%';
			}
			result = true;
		}
		else
		{
			result = false;
		}
	}

	PQclear(res);
	return result;
}
Beispiel #2
0
bool CPokerTrackerThread::QueryName(const char * query_name, const char * scraped_name, char * best_name)
{
	int				lev_dist = 0, bestLD = 0, bestLDindex = 0;
	PGresult		*res = NULL;
	bool			result = false;
	CLevDistance	myLD;
	int				siteid = 0;
	double			Levenshtein_distance_matching_factor = 0.2;

	//No more unnecessary queries when we don't even have a siteid to check
	siteid = pt_lookup.GetSiteId();
	if (siteid == k_undefined)  return false;

	if (!_connected || PQstatus(_pgconn) != CONNECTION_OK)
		return false;

	if (0 == strlen(query_name))
		return false;

	CString query;
	query.Format("SELECT player_name FROM player WHERE player_name like '%s' AND id_site=%i",
		query_name, siteid);
	try
	{
		res = PQexec(_pgconn, query);
	}
	catch (_com_error &e)
	{
		write_log(preferences.debug_pokertracker(), "[PokerTracker] Postgres Query error:\n");
		write_log(preferences.debug_pokertracker(), "[PokerTracker] \tCode = %08lx\n", e.Error());
		write_log(preferences.debug_pokertracker(), "[PokerTracker] \tCode meaning = %s\n", e.ErrorMessage());
		_bstr_t bstrSource(e.Source());
		_bstr_t bstrDescription(e.Description());
		write_log(preferences.debug_pokertracker(), "[PokerTracker] \tSource = %s\n", (LPCTSTR) bstrSource);
		write_log(preferences.debug_pokertracker(), "[PokerTracker] \tDescription = %s\n", (LPCTSTR) bstrDescription);
		write_log(preferences.debug_pokertracker(), "[PokerTracker] \tQuery = [%s]\n", query);
	}

	// We got nothing, return false
	if (PQntuples(res) == 0)
	{
		result = false;
	}

	// If we get one tuple, all is good - return the one name
	if (PQntuples(res) == 1)
	{
		lev_dist = myLD.LD(scraped_name, PQgetvalue(res, 0, 0));

		if (lev_dist<=(int)strlen(PQgetvalue(res, 0, 0))*Levenshtein_distance_matching_factor )
		{
	     	strcpy_s(best_name, k_max_length_of_playername, PQgetvalue(res, 0, 0));
			result = true;
		}
		else {
			result = false;
		}
	}

	// We got more than one tuple, figure the Levenshtein distance for all of them
	// and return the best
	else if ((PQntuples(res) > 1))
	{
		bestLD = 999;
		for (int i=0; i<PQntuples(res); i++)
		{
			lev_dist = myLD.LD(scraped_name, PQgetvalue(res, i, 0));

			if (lev_dist<bestLD && lev_dist<(int)strlen(PQgetvalue(res, i, 0))*Levenshtein_distance_matching_factor ) 
			{
				bestLD = lev_dist;
				bestLDindex = i;
			}
		}
		if (bestLD != 999)
		{
			strcpy_s(best_name, k_max_length_of_playername, PQgetvalue(res, bestLDindex, 0));
			result = true;
		}
		else
		{
			result = false;
		}
	}

	PQclear(res);
	return result;
}
Beispiel #3
0
bool CPokerTrackerThread::QueryName(const char * query_name, const char * scraped_name, char * best_name)
{
	char			strQry[k_max_length_of_query] = {0};
	int				lev_dist = 0, bestLD = 0, bestLDindex = 0;
	PGresult		*res = NULL;
	char			siteidstr[k_max_length_of_site_id] = {0};
	bool			result = false;
	CLevDistance	myLD;
	int				siteid = 0;
	static int		_last_siteid = -1;
	double			Levenshtein_distance_matching_factor = 0.2;

	//No more unnecessary queries when we don't even have a siteid to check
	siteid = pt_lookup.GetSiteId();
	if (siteid == k_undefined)  return false;

	// siteid has changed -- we're using ManualMode
	if (siteid != _last_siteid)
	{
		ClearAllStats();
		_last_siteid = siteid;
	}

	if (!_connected || PQstatus(_pgconn) != CONNECTION_OK)
		return false;

	if (0 == strlen(query_name))
		return false;

	sprintf_s(siteidstr, k_max_length_of_site_id, "%d", siteid);

	// PT version 3 name query
	strcpy_s(strQry, k_max_length_of_query, "SELECT player_name FROM player WHERE player_name like '");
	strcat_s(strQry, k_max_length_of_query, query_name);
	strcat_s(strQry, k_max_length_of_query, "' AND id_site=");
	strcat_s(strQry, k_max_length_of_query, siteidstr);
	
	try
	{
		res = PQexec(_pgconn, strQry);
	}
	catch (_com_error &e)
	{
		write_log_pokertracker(prefs.debug_pokertracker(), "Postgres Query error:\n");
		write_log_pokertracker(prefs.debug_pokertracker(), "\tCode = %08lx\n", e.Error());
		write_log_pokertracker(prefs.debug_pokertracker(), "\tCode meaning = %s\n", e.ErrorMessage());
		_bstr_t bstrSource(e.Source());
		_bstr_t bstrDescription(e.Description());
		write_log_pokertracker(prefs.debug_pokertracker(), "\tSource = %s\n", (LPCTSTR) bstrSource);
		write_log_pokertracker(prefs.debug_pokertracker(), "\tDescription = %s\n", (LPCTSTR) bstrDescription);
		write_log_pokertracker(prefs.debug_pokertracker(), "\tQuery = [%s]\n", strQry);
	}

	// We got nothing, return false
	if (PQntuples(res) == 0)
	{
		result = false;
	}

	// If we get one tuple, all is good - return the one name
	if (PQntuples(res) == 1)
	{
		lev_dist = myLD.LD(scraped_name, PQgetvalue(res, 0, 0));

		if (lev_dist<=(int)strlen(PQgetvalue(res, 0, 0))*Levenshtein_distance_matching_factor )
		{
	     	strcpy_s(best_name, k_max_length_of_playername, PQgetvalue(res, 0, 0));
			result = true;
		}
		else {
			result = false;
		}
	}

	// We got more than one tuple, figure the Levenshtein distance for all of them
	// and return the best
	else if ((PQntuples(res) > 1))
	{
		bestLD = 999;
		for (int i=0; i<PQntuples(res); i++)
		{
			lev_dist = myLD.LD(scraped_name, PQgetvalue(res, i, 0));

			if (lev_dist<bestLD && lev_dist<(int)strlen(PQgetvalue(res, i, 0))*Levenshtein_distance_matching_factor ) 
			{
				bestLD = lev_dist;
				bestLDindex = i;
			}
		}
		if (bestLD != 999)
		{
			strcpy_s(best_name, k_max_length_of_playername, PQgetvalue(res, bestLDindex, 0));
			result = true;
		}
		else
		{
			result = false;
		}
	}

	PQclear(res);
	return result;
}