int varnam_is_known_word(varnam* handle, const char* word) { int rc; sqlite3_int64 word_id; if (handle == NULL || word == NULL) return 0; if (!is_words_store_available (handle)) { return 0; } reset_pool (handle); rc = vwt_get_word_id (handle, word, &word_id); if (rc != VARNAM_SUCCESS) { return 0; } if (word_id > 0) return 1; else return 0; }
int varnam_train(varnam *handle, const char *pattern, const char *word) { int rc; sqlite3_int64 word_id; reset_pool (handle); rc = vwt_start_changes (handle); if (rc != VARNAM_SUCCESS) return rc; rc = varnam_learn_internal(handle, word, 1); if (rc != VARNAM_SUCCESS) { vwt_discard_changes (handle); return rc; } rc = vwt_get_word_id (handle, word, &word_id); if (rc) return rc; rc = vwt_persist_pattern (handle, pattern, word_id, false); if (rc) return rc; vwt_end_changes (handle); return VARNAM_SUCCESS; }
int varnam_learn(varnam *handle, const char *word) { int rc; #ifdef _RECORD_EXEC_TIME V_BEGIN_TIMING #endif reset_pool (handle); if (!is_words_store_available (handle)) { return VARNAM_ERROR; } rc = vwt_start_changes (handle); if (rc != VARNAM_SUCCESS) return rc; rc = varnam_learn_internal(handle, word, 1); if (rc != VARNAM_SUCCESS) { vwt_discard_changes (handle); return rc; } rc = vwt_end_changes (handle); if (rc != VARNAM_SUCCESS) return rc; #ifdef _RECORD_EXEC_TIME V_REPORT_TIME_TAKEN("varnam_learn") #endif return VARNAM_SUCCESS; }
/* * The public function to reset pool. */ PJ_DEF(void) pj_pool_reset(pj_pool_t *pool) { LOG((pool->obj_name, "reset(): cap=%d, used=%d(%d%%)", pool->capacity, pj_pool_get_used_size(pool), pj_pool_get_used_size(pool)*100/pool->capacity)); reset_pool(pool); }
int varnam_delete_word(varnam *handle, const char *word) { if (handle == NULL || word == NULL) { return VARNAM_ARGS_ERROR; } reset_pool (handle); return vwt_delete_word (handle, word); }
int varnam_export_words(varnam* handle, int words_per_file, const char* out_dir, int export_type, void (*callback)(int total_words, int processed, const char *current_word)) { if (handle == NULL || out_dir == NULL || words_per_file <= 0) { return VARNAM_ARGS_ERROR; } reset_pool (handle); if (export_type == VARNAM_EXPORT_FULL) return vwt_full_export (handle, words_per_file, out_dir, callback); else return vwt_export_words (handle, words_per_file, out_dir, callback); }
/* * Destroy the pool. */ PJ_DEF(void) pj_pool_destroy_int(pj_pool_t *pool) { pj_size_t initial_size; LOG((pool->obj_name, "destroy(): cap=%d, used=%d(%d%%), block0=%p-%p", pool->capacity, pj_pool_get_used_size(pool), pj_pool_get_used_size(pool)*100/pool->capacity, ((pj_pool_block*)pool->block_list.next)->buf, ((pj_pool_block*)pool->block_list.next)->end)); reset_pool(pool); initial_size = ((pj_pool_block*)pool->block_list.next)->end - (unsigned char*)pool; if (pool->factory->policy.block_free) (*pool->factory->policy.block_free)(pool->factory, pool, initial_size); }
int varnam_import_learnings_from_file(varnam *handle, const char *filepath, void (*onfailure)(const char* line)) { int rc, filetype = -1; FILE *infile; if (handle == NULL || filepath == NULL) return VARNAM_ARGS_ERROR; reset_pool (handle); infile = fopen(filepath, "r"); if (!infile) { set_last_error (handle, "Couldn't open file '%s' for reading", filepath); return VARNAM_ERROR; } rc = vwt_optimize_for_huge_transaction(handle); if (rc) { fclose (infile); return rc; } varnam_log (handle, "Starting to import from %s", filepath); rc = vwt_start_changes (handle); if (rc) { vwt_turn_off_optimization_for_huge_transaction(handle); fclose (infile); return rc; } filetype = get_file_type (infile); switch (filetype) { case _WORDS_IMPORT: rc = vwt_import_words (handle, infile, onfailure); if (rc != VARNAM_SUCCESS) { fclose (infile); vwt_turn_off_optimization_for_huge_transaction (handle); return rc; } break; case _PATTERNS_IMPORT: rc = vwt_import_patterns (handle, infile, onfailure); if (rc != VARNAM_SUCCESS) { fclose (infile); vwt_turn_off_optimization_for_huge_transaction (handle); return rc; } break; case -1: set_last_error (handle, "Couldn't read file '%s'. Unknown file type", filepath); fclose (infile); return VARNAM_ERROR; } fclose (infile); varnam_log (handle, "Writing changes to disk"); rc = vwt_end_changes (handle); if (rc != VARNAM_SUCCESS) { varnam_log (handle, "Writing changes to disk failed"); return rc; } varnam_log (handle, "Ensuring file integrity"); rc = vwt_turn_off_optimization_for_huge_transaction(handle); if (rc) { varnam_log (handle, "Failed to check file integrity"); return rc; } return VARNAM_SUCCESS; }
int varnam_learn_from_file(varnam *handle, const char *filepath, vlearn_status *status, void (*callback)(varnam *handle, const char *word, int status_code, void *object), void *object) { int rc; FILE *infile; char line_buffer[10000]; strbuf *word; varray *word_parts; int confidence; int parts; infile = fopen(filepath, "r"); if (!infile) { set_last_error (handle, "Couldn't open file '%s' for reading.\n", filepath); return VARNAM_ERROR; } if (status != NULL) { status->total_words = 0; status->failed = 0; } rc = vwt_optimize_for_huge_transaction(handle); if (rc) { fclose (infile); return rc; } /* Learning from file will be mostly new words. Optimizing for that */ v_->_config_mostly_learning_new_words = 1; varnam_log (handle, "Starting to learn from %s", filepath); rc = vwt_start_changes (handle); if (rc) { vwt_turn_off_optimization_for_huge_transaction(handle); fclose (infile); return rc; } while (fgets(line_buffer, sizeof(line_buffer), infile)) { reset_pool (handle); word = get_pooled_string (handle); strbuf_add (word, trimwhitespace (line_buffer)); word_parts = strbuf_split (word, handle, ' '); parts = varray_length (word_parts); if (parts > 0 && parts <= 2) { confidence = 1; if (parts == 2) { word = varray_get (word_parts, 1); confidence = atoi (strbuf_to_s (word)); } word = varray_get (word_parts, 0); rc = varnam_learn_internal (handle, strbuf_to_s (word), confidence); if (rc) { if (status != NULL) status->failed++; } } else { rc = VARNAM_ERROR; if (status != NULL) status->failed++; } if (status != NULL) status->total_words++; if (callback != NULL) callback (handle, strbuf_to_s (word), rc, object); } varnam_log (handle, "Writing changes to disk"); rc = vwt_end_changes (handle); if (rc) { varnam_log (handle, "Writing changes to disk failed"); } varnam_log (handle, "Ensuring file integrity"); rc = vwt_turn_off_optimization_for_huge_transaction(handle); if (rc) { varnam_log (handle, "Failed to check file integrity"); } varnam_log (handle, "Compacting file"); rc = vwt_compact_file (handle); if (rc) return rc; fclose (infile); return rc; }
lock_free_pool(size_t poolsize = 0) { reset_pool(poolsize); }