double get_msg_score(TCHAR *msg) { char *message, *token; double spam_prob, ham_prob, tmp1 = 1, tmp2 = 1; double *scores = NULL; int spam_msgcount, ham_msgcount, n = 0, i; if (bayesdb == NULL) return 0; message = mir_u2a(msg); spam_msgcount = get_msg_count(SPAM); ham_msgcount = get_msg_count(HAM); token = strtok(message, DELIMS); while (token) { if (!is_token_valid(token)) { token = strtok(NULL, DELIMS); continue; } scores = (double*)realloc(scores, sizeof(double)*(n + 1)); spam_prob = spam_msgcount == 0 ? 0 : (double)get_token_score(SPAM, token) / (double)spam_msgcount; ham_prob = ham_msgcount == 0 ? 0 : (double)get_token_score(HAM, token) / (double)ham_msgcount; if (ham_prob == 0 && spam_prob == 0) { spam_prob = 0.4; ham_prob = 0.6; } spam_prob = spam_prob > 1.0 ? 1.0 : (spam_prob < 0.01 ? 0.01 : spam_prob); ham_prob = ham_prob > 1.0 ? 1.0 : (ham_prob < 0.01 ? 0.01 : ham_prob); scores[n++] = spam_prob / (spam_prob + ham_prob); token = strtok(NULL, DELIMS); } for (i = 0; i < n; i++) { tmp1 *= scores[i]; tmp2 *= 1-scores[i]; } mir_free(message); free(scores); return tmp1 / (tmp1 + tmp2); }
/* Learn one message as either SPAM or HAM as specified in type parameter */ void learn(int type, TCHAR *msg) { char *tok, *message; BYTE digest[16]; char sql_select[200], sql_update[200], sql_insert[200], sql_counter[200]; sqlite3_stmt *stmt; #ifdef _DEBUG sqlite3_stmt *stmtdbg; #endif if (_getOptB("BayesEnabled", defaultBayesEnabled) == 0) return; if (bayesdb == NULL) OpenBayes(); message = mir_u2a(msg); tok = strtok(message, DELIMS); mir_snprintf(sql_counter, "UPDATE stats SET value=value+1 WHERE key='%s'", type == SPAM ? "spam_msgcount" : "ham_msgcount"); mir_snprintf(sql_select, "SELECT 1 FROM %s WHERE token=?", type == SPAM ? "spam" : "ham"); mir_snprintf(sql_update, "UPDATE %s SET num=num+1 WHERE token=?", type ? "spam" : "ham"); mir_snprintf(sql_insert, "INSERT INTO %s VALUES(?, 1)", type ? "spam" : "ham"); #ifdef _DEBUG sqlite3_exec(bayesdbg, "BEGIN", NULL, NULL, NULL); #endif sqlite3_exec(bayesdb, "BEGIN", NULL, NULL, NULL); while (tok) { if (!is_token_valid(tok)) { tok = strtok(NULL, DELIMS); continue; } tokenhash(tok, digest); sqlite3_prepare_v2(bayesdb, sql_select, -1, &stmt, NULL); sqlite3_bind_blob(stmt, 1, digest, 16, SQLITE_STATIC); if (SQLITE_ROW == sqlite3_step(stmt)) { sqlite3_finalize(stmt); sqlite3_prepare_v2(bayesdb, sql_update, -1, &stmt, NULL); } else { sqlite3_finalize(stmt); sqlite3_prepare_v2(bayesdb, sql_insert, -1, &stmt, NULL); } sqlite3_bind_blob(stmt, 1, digest, 16, SQLITE_STATIC); sqlite3_step(stmt); sqlite3_finalize(stmt); #ifdef _DEBUG sqlite3_prepare_v2(bayesdbg, sql_select, -1, &stmtdbg, NULL); sqlite3_bind_text(stmtdbg, 1, tok, (int)mir_strlen(tok), NULL); if (SQLITE_ROW == sqlite3_step(stmtdbg)) { sqlite3_finalize(stmtdbg); sqlite3_prepare_v2(bayesdbg, sql_update, -1, &stmtdbg, NULL); } else { sqlite3_finalize(stmtdbg); sqlite3_prepare_v2(bayesdbg, sql_insert, -1, &stmtdbg, NULL); } sqlite3_bind_text(stmtdbg, 1, tok, (int)mir_strlen(tok), SQLITE_STATIC); sqlite3_step(stmtdbg); sqlite3_finalize(stmtdbg); #endif tok = strtok(NULL, DELIMS); } sqlite3_exec(bayesdb, sql_counter, NULL, NULL, NULL); sqlite3_exec(bayesdb, "COMMIT", NULL, NULL, NULL); #ifdef _DEBUG sqlite3_exec(bayesdbg, "COMMIT", NULL, NULL, NULL); #endif mir_free(message); }
int process_args(int argc, char *argv[], char batch_mode) { int c; char mode = 0; char time_ok = 0; char room = 0; char *filename; record *rec = calloc(1, sizeof(record)); record *first = NULL; record *last = NULL; EVP_CIPHER_CTX en_ctx, de_ctx; optind = 0; opterr = 0; while ((c = getopt(argc, argv, ":T:K:E:G:ALR:B:")) != -1) switch (c) { case 'T': if (mode == BATCH_MODE) return print_error(ARG_ERROR); mode = DEFAULT_MODE; if (strtoi(optarg, &(rec->timestamp)) || !rec->timestamp) return print_error(ARG_ERROR); time_ok = 1; break; case 'K': if (mode == BATCH_MODE) return print_error(ARG_ERROR); mode = DEFAULT_MODE; if (!is_token_valid(optarg)) return print_error(ARG_ERROR); rec->token = optarg; break; case 'E': if (mode == BATCH_MODE) return print_error(ARG_ERROR); mode = DEFAULT_MODE; if (rec->ptype == GUEST) return print_error(ARG_ERROR); rec->ptype = EMPLOYEE; if (!is_name_valid(optarg)) return print_error(ARG_ERROR); rec->name = optarg; break; case 'G': if (mode == BATCH_MODE) return print_error(ARG_ERROR); mode = DEFAULT_MODE; if (rec->ptype == EMPLOYEE) return print_error(ARG_ERROR); rec->ptype = GUEST; if (!is_name_valid(optarg)) return print_error(ARG_ERROR); rec->name = optarg; break; case 'A': if (mode == BATCH_MODE) return print_error(ARG_ERROR); mode = DEFAULT_MODE; if (rec->etype == DEPARTURE) return print_error(ARG_ERROR); rec->etype = ARRIVAL; break; case 'L': if (mode == BATCH_MODE) return print_error(ARG_ERROR); mode = DEFAULT_MODE; if (rec->etype == ARRIVAL) return print_error(ARG_ERROR); rec->etype = DEPARTURE; break; case 'R': if (mode == BATCH_MODE) return print_error(ARG_ERROR); mode = DEFAULT_MODE; if (strtoi(optarg, &(rec->room_id))) return print_error(ARG_ERROR); room = 1; break; case 'B': if (mode == DEFAULT_MODE || batch_mode) return print_error(ARG_ERROR); mode = BATCH_MODE; filename = optarg; break; default: return print_error(ARG_ERROR); } if (mode == BATCH_MODE) { free(rec); if (batch(filename)) return print_error(ARG_ERROR); return NO_ERROR; } //Checa opcoes mandatorias if (mode != DEFAULT_MODE || !time_ok || rec->token == NULL || rec->name == NULL || rec->etype == UNDEF_E || optind >= argc) return print_error(ARG_ERROR); //Ajusta o evento de acordo com a presenca de sala if (!room) { if (rec->etype == ARRIVAL) rec->etype = G_ARRIVAL; else rec->etype = G_DEPARTURE; } //Inicia contextos de seguranca if ((aes_init((unsigned char *) rec->token, strlen(rec->token), &en_ctx, &de_ctx))) return print_error(ARG_ERROR); //validacoes de consistencia c = read_records(&first, argv[optind], &last, &de_ctx); if (c) { EVP_CIPHER_CTX_cleanup(&en_ctx); EVP_CIPHER_CTX_cleanup(&de_ctx); return c; } if ((c = check_record(first, rec))) { del_records(&first); free(rec); EVP_CIPHER_CTX_cleanup(&en_ctx); EVP_CIPHER_CTX_cleanup(&de_ctx); return print_error(ARG_ERROR); } if (!first) first = rec; else { if (!last) last = first; last->next = rec; } write_record(first, argv[optind], &en_ctx); //TODO em batch, esses registros poderiam ser mantidos para evitar reler o arquivo. if (first != rec) { last->next = NULL; del_records(&first); } free(rec); EVP_CIPHER_CTX_cleanup(&en_ctx); EVP_CIPHER_CTX_cleanup(&de_ctx); return NO_ERROR; }