コード例 #1
0
ファイル: bayes.cpp プロジェクト: Seldom/miranda-ng
double get_msg_score(TCHAR *msg)
{
	char *message, *token;
	double spam_prob, ham_prob, tmp1 = 1, tmp2 = 1;
	double *scores = NULL;
	int spam_msgcount, ham_msgcount, n = 0, i;

	if (bayesdb == NULL)
		return 0;

	message = mir_u2a(msg);
	spam_msgcount = get_msg_count(SPAM);
	ham_msgcount = get_msg_count(HAM);
	token = strtok(message, DELIMS);
	while (token)
	{
		if (!is_token_valid(token)) {
			token = strtok(NULL, DELIMS);
			continue;
		}
		scores = (double*)realloc(scores, sizeof(double)*(n + 1));
		spam_prob = spam_msgcount == 0 ? 0 : (double)get_token_score(SPAM, token) / (double)spam_msgcount;
		ham_prob = ham_msgcount == 0 ? 0 : (double)get_token_score(HAM, token) / (double)ham_msgcount;
		if (ham_prob == 0 && spam_prob == 0) {
			spam_prob = 0.4; ham_prob = 0.6;
		}
		spam_prob = spam_prob > 1.0 ? 1.0 : (spam_prob < 0.01 ? 0.01 : spam_prob);
		ham_prob = ham_prob > 1.0 ? 1.0 : (ham_prob < 0.01 ? 0.01 : ham_prob);
		scores[n++] = spam_prob / (spam_prob + ham_prob);
		
		token = strtok(NULL, DELIMS);
	}
	
	for (i = 0; i < n; i++) {
		tmp1 *= scores[i];
		tmp2 *= 1-scores[i];
	}
	
	mir_free(message);
	free(scores);
	return tmp1 / (tmp1 + tmp2);
}
コード例 #2
0
ファイル: bayes.cpp プロジェクト: Seldom/miranda-ng
/* Learn one message as either SPAM or HAM as specified in type parameter */
void learn(int type, TCHAR *msg)
{
	char *tok, *message;
	BYTE digest[16];
	char sql_select[200], sql_update[200], sql_insert[200], sql_counter[200];
	sqlite3_stmt *stmt;
#ifdef _DEBUG
	sqlite3_stmt *stmtdbg;
#endif

	if (_getOptB("BayesEnabled", defaultBayesEnabled) == 0) 
		return;
	if (bayesdb == NULL)
		OpenBayes();

	message = mir_u2a(msg);
	tok = strtok(message, DELIMS);
	mir_snprintf(sql_counter, "UPDATE stats SET value=value+1 WHERE key='%s'", type == SPAM ? "spam_msgcount" : "ham_msgcount");
	mir_snprintf(sql_select, "SELECT 1 FROM %s WHERE token=?", type == SPAM ? "spam" : "ham");
	mir_snprintf(sql_update, "UPDATE %s SET num=num+1 WHERE token=?", type ? "spam" : "ham");
	mir_snprintf(sql_insert, "INSERT INTO %s VALUES(?, 1)", type ? "spam" : "ham");
#ifdef _DEBUG
	sqlite3_exec(bayesdbg, "BEGIN", NULL, NULL, NULL);
#endif
	sqlite3_exec(bayesdb, "BEGIN", NULL, NULL, NULL);
	while (tok) {
		if (!is_token_valid(tok)) {
			tok = strtok(NULL, DELIMS);
			continue;
		}
		tokenhash(tok, digest);
		sqlite3_prepare_v2(bayesdb, sql_select, -1, &stmt, NULL);
		sqlite3_bind_blob(stmt, 1, digest, 16, SQLITE_STATIC);
		if (SQLITE_ROW == sqlite3_step(stmt)) {
			sqlite3_finalize(stmt);
			sqlite3_prepare_v2(bayesdb, sql_update, -1, &stmt, NULL);
		} else {
			sqlite3_finalize(stmt);
			sqlite3_prepare_v2(bayesdb, sql_insert, -1, &stmt, NULL);
		}
		sqlite3_bind_blob(stmt, 1, digest, 16, SQLITE_STATIC);
		sqlite3_step(stmt);
		sqlite3_finalize(stmt);

#ifdef _DEBUG
		sqlite3_prepare_v2(bayesdbg, sql_select, -1, &stmtdbg, NULL);
		sqlite3_bind_text(stmtdbg, 1, tok, (int)mir_strlen(tok), NULL);
		if (SQLITE_ROW == sqlite3_step(stmtdbg)) {
			sqlite3_finalize(stmtdbg);
			sqlite3_prepare_v2(bayesdbg, sql_update, -1, &stmtdbg, NULL);
		} else {
			sqlite3_finalize(stmtdbg);
			sqlite3_prepare_v2(bayesdbg, sql_insert, -1, &stmtdbg, NULL);
		}
		sqlite3_bind_text(stmtdbg, 1, tok, (int)mir_strlen(tok), SQLITE_STATIC);
		sqlite3_step(stmtdbg);
		sqlite3_finalize(stmtdbg);
#endif

		tok = strtok(NULL, DELIMS);
	}
	sqlite3_exec(bayesdb, sql_counter, NULL, NULL, NULL);
	sqlite3_exec(bayesdb, "COMMIT", NULL, NULL, NULL);
#ifdef _DEBUG
	sqlite3_exec(bayesdbg, "COMMIT", NULL, NULL, NULL);
#endif
	mir_free(message);
}
コード例 #3
0
int process_args(int argc, char *argv[], char batch_mode)
{
	int c;
	char mode = 0;
	char time_ok = 0;
	char room = 0;
	char *filename;
	record *rec = calloc(1, sizeof(record));
	record *first = NULL;
	record *last = NULL;
	EVP_CIPHER_CTX en_ctx, de_ctx;

	optind = 0;
	opterr = 0;
	while ((c = getopt(argc, argv, ":T:K:E:G:ALR:B:")) != -1)
		switch (c)
		{
		case 'T':
			if (mode == BATCH_MODE) 
				return print_error(ARG_ERROR);
			mode = DEFAULT_MODE;
			if (strtoi(optarg, &(rec->timestamp)) || !rec->timestamp)
				return print_error(ARG_ERROR);
			time_ok = 1;
			break;
		case 'K':
			if (mode == BATCH_MODE)
				return print_error(ARG_ERROR);
			mode = DEFAULT_MODE;
			if (!is_token_valid(optarg))
				return print_error(ARG_ERROR);
			rec->token = optarg;
			break;
		case 'E':
			if (mode == BATCH_MODE)
				return print_error(ARG_ERROR);
			mode = DEFAULT_MODE;
			if (rec->ptype == GUEST)
				return print_error(ARG_ERROR);
			rec->ptype = EMPLOYEE;
			if (!is_name_valid(optarg))
				return print_error(ARG_ERROR);
			rec->name = optarg;
			break;
		case 'G':
			if (mode == BATCH_MODE)
				return print_error(ARG_ERROR);
			mode = DEFAULT_MODE;
			if (rec->ptype == EMPLOYEE)
				return print_error(ARG_ERROR);
			rec->ptype = GUEST;
			if (!is_name_valid(optarg))
				return print_error(ARG_ERROR);
			rec->name = optarg;
			break;
		case 'A':
			if (mode == BATCH_MODE)
				return print_error(ARG_ERROR);
			mode = DEFAULT_MODE;
			if (rec->etype == DEPARTURE)
				return print_error(ARG_ERROR);
			rec->etype = ARRIVAL;
			break;
		case 'L':
			if (mode == BATCH_MODE)
				return print_error(ARG_ERROR);
			mode = DEFAULT_MODE;
			if (rec->etype == ARRIVAL)
				return print_error(ARG_ERROR);
			rec->etype = DEPARTURE;
			break;
		case 'R':
			if (mode == BATCH_MODE)
				return print_error(ARG_ERROR);
			mode = DEFAULT_MODE;
			if (strtoi(optarg, &(rec->room_id)))
				return print_error(ARG_ERROR);
			room = 1;
			break;
		case 'B':
			if (mode == DEFAULT_MODE || batch_mode)
				return print_error(ARG_ERROR);
			mode = BATCH_MODE;
			filename = optarg;
			break;
		default:
			return print_error(ARG_ERROR);
		}
	
	if (mode == BATCH_MODE)
	{
		free(rec);
		if (batch(filename))
			return print_error(ARG_ERROR);
		return NO_ERROR;
	}

	//Checa opcoes mandatorias
	if (mode != DEFAULT_MODE || !time_ok || rec->token == NULL || rec->name == NULL 
	  || rec->etype == UNDEF_E || optind >= argc)
		return print_error(ARG_ERROR);

	//Ajusta o evento de acordo com a presenca de sala
	if (!room)
	{
		if (rec->etype == ARRIVAL)
			rec->etype = G_ARRIVAL;
		else
			rec->etype = G_DEPARTURE;
	}

	//Inicia contextos de seguranca
	if ((aes_init((unsigned char *) rec->token, strlen(rec->token), &en_ctx, &de_ctx)))
		return print_error(ARG_ERROR);

	//validacoes de consistencia
	c = read_records(&first, argv[optind], &last, &de_ctx);
	if (c)
	{
		EVP_CIPHER_CTX_cleanup(&en_ctx);
		EVP_CIPHER_CTX_cleanup(&de_ctx);
		return c;
	}

	if ((c = check_record(first, rec)))
	{
		del_records(&first);
		free(rec);
		EVP_CIPHER_CTX_cleanup(&en_ctx);
		EVP_CIPHER_CTX_cleanup(&de_ctx);
		return print_error(ARG_ERROR);
	}

	if (!first)
		first = rec;
	else
	{
		if (!last)
			last = first;
		last->next = rec;
	}
	write_record(first, argv[optind], &en_ctx);

	//TODO em batch, esses registros poderiam ser mantidos para evitar reler o arquivo.
	if (first != rec)
	{
		last->next = NULL;
		del_records(&first);
	}

	free(rec);
	EVP_CIPHER_CTX_cleanup(&en_ctx);
	EVP_CIPHER_CTX_cleanup(&de_ctx);

	return NO_ERROR;
}