int varnam_import_learnings_from_file(varnam *handle, const char *filepath, void (*onfailure)(const char* line)) { int rc, filetype = -1; FILE *infile; if (handle == NULL || filepath == NULL) return VARNAM_ARGS_ERROR; reset_pool (handle); infile = fopen(filepath, "r"); if (!infile) { set_last_error (handle, "Couldn't open file '%s' for reading", filepath); return VARNAM_ERROR; } rc = vwt_optimize_for_huge_transaction(handle); if (rc) { fclose (infile); return rc; } varnam_log (handle, "Starting to import from %s", filepath); rc = vwt_start_changes (handle); if (rc) { vwt_turn_off_optimization_for_huge_transaction(handle); fclose (infile); return rc; } filetype = get_file_type (infile); switch (filetype) { case _WORDS_IMPORT: rc = vwt_import_words (handle, infile, onfailure); if (rc != VARNAM_SUCCESS) { fclose (infile); vwt_turn_off_optimization_for_huge_transaction (handle); return rc; } break; case _PATTERNS_IMPORT: rc = vwt_import_patterns (handle, infile, onfailure); if (rc != VARNAM_SUCCESS) { fclose (infile); vwt_turn_off_optimization_for_huge_transaction (handle); return rc; } break; case -1: set_last_error (handle, "Couldn't read file '%s'. Unknown file type", filepath); fclose (infile); return VARNAM_ERROR; } fclose (infile); varnam_log (handle, "Writing changes to disk"); rc = vwt_end_changes (handle); if (rc != VARNAM_SUCCESS) { varnam_log (handle, "Writing changes to disk failed"); return rc; } varnam_log (handle, "Ensuring file integrity"); rc = vwt_turn_off_optimization_for_huge_transaction(handle); if (rc) { varnam_log (handle, "Failed to check file integrity"); return rc; } return VARNAM_SUCCESS; }
int varnam_set_scheme_details( varnam *handle, const char *language_code, const char *identifier, const char *display_name, const char *author, const char *compiled_date) { int rc; set_last_error (handle, NULL); if (language_code != NULL && strlen(language_code) > 0) { if (strlen(language_code) != 2) { set_last_error (handle, "Language code should be one of ISO 639-1 two letter codes."); return VARNAM_ERROR; } rc = vst_add_metadata (handle, VARNAM_METADATA_SCHEME_LANGUAGE_CODE, language_code); if (rc != VARNAM_SUCCESS) return rc; varnam_log (handle, "Set language code to : %s", language_code); } if (identifier != NULL && strlen(identifier) > 0) { rc = vst_add_metadata (handle, VARNAM_METADATA_SCHEME_IDENTIFIER, identifier); if (rc != VARNAM_SUCCESS) return rc; varnam_log (handle, "Set language identifier to : %s", identifier); } if (display_name != NULL && strlen(display_name) > 0) { rc = vst_add_metadata (handle, VARNAM_METADATA_SCHEME_DISPLAY_NAME, display_name); if (rc != VARNAM_SUCCESS) return rc; varnam_log (handle, "Set language display name to : %s", display_name); } if (author != NULL && strlen(author) > 0) { rc = vst_add_metadata (handle, VARNAM_METADATA_SCHEME_AUTHOR, author); if (rc != VARNAM_SUCCESS) return rc; varnam_log (handle, "Set author to : %s", author); } if (compiled_date != NULL && strlen(compiled_date) > 0) { rc = vst_add_metadata (handle, VARNAM_METADATA_SCHEME_COMPILED_DATE, compiled_date); if (rc != VARNAM_SUCCESS) return rc; varnam_log (handle, "Set compiled date to : %s", compiled_date); } return VARNAM_SUCCESS; }
int varnam_learn_from_file(varnam *handle, const char *filepath, vlearn_status *status, void (*callback)(varnam *handle, const char *word, int status_code, void *object), void *object) { int rc; FILE *infile; char line_buffer[10000]; strbuf *word; varray *word_parts; int confidence; int parts; infile = fopen(filepath, "r"); if (!infile) { set_last_error (handle, "Couldn't open file '%s' for reading.\n", filepath); return VARNAM_ERROR; } if (status != NULL) { status->total_words = 0; status->failed = 0; } rc = vwt_optimize_for_huge_transaction(handle); if (rc) { fclose (infile); return rc; } /* Learning from file will be mostly new words. Optimizing for that */ v_->_config_mostly_learning_new_words = 1; varnam_log (handle, "Starting to learn from %s", filepath); rc = vwt_start_changes (handle); if (rc) { vwt_turn_off_optimization_for_huge_transaction(handle); fclose (infile); return rc; } while (fgets(line_buffer, sizeof(line_buffer), infile)) { reset_pool (handle); word = get_pooled_string (handle); strbuf_add (word, trimwhitespace (line_buffer)); word_parts = strbuf_split (word, handle, ' '); parts = varray_length (word_parts); if (parts > 0 && parts <= 2) { confidence = 1; if (parts == 2) { word = varray_get (word_parts, 1); confidence = atoi (strbuf_to_s (word)); } word = varray_get (word_parts, 0); rc = varnam_learn_internal (handle, strbuf_to_s (word), confidence); if (rc) { if (status != NULL) status->failed++; } } else { rc = VARNAM_ERROR; if (status != NULL) status->failed++; } if (status != NULL) status->total_words++; if (callback != NULL) callback (handle, strbuf_to_s (word), rc, object); } varnam_log (handle, "Writing changes to disk"); rc = vwt_end_changes (handle); if (rc) { varnam_log (handle, "Writing changes to disk failed"); } varnam_log (handle, "Ensuring file integrity"); rc = vwt_turn_off_optimization_for_huge_transaction(handle); if (rc) { varnam_log (handle, "Failed to check file integrity"); } varnam_log (handle, "Compacting file"); rc = vwt_compact_file (handle); if (rc) return rc; fclose (infile); return rc; }