Пример #1
0
int
varnam_import_learnings_from_file(varnam *handle, const char *filepath,
        void (*onfailure)(const char* line))
{
    int rc, filetype = -1;
    FILE *infile;

    if (handle == NULL || filepath == NULL)
        return VARNAM_ARGS_ERROR;

    reset_pool (handle);

    infile = fopen(filepath, "r");
    if (!infile) {
        set_last_error (handle, "Couldn't open file '%s' for reading", filepath);
        return VARNAM_ERROR;
    }

    rc = vwt_optimize_for_huge_transaction(handle);
    if (rc) {
        fclose (infile);
        return rc;
    }

    varnam_log (handle, "Starting to import from %s", filepath);
    rc = vwt_start_changes (handle);
    if (rc) {
        vwt_turn_off_optimization_for_huge_transaction(handle);
        fclose (infile);
        return rc;
    }

    filetype = get_file_type (infile);
    switch (filetype) {
        case _WORDS_IMPORT:
            rc = vwt_import_words (handle, infile, onfailure);
            if (rc != VARNAM_SUCCESS) {
                fclose (infile);
                vwt_turn_off_optimization_for_huge_transaction (handle);
                return rc;
            }
            break;
        case _PATTERNS_IMPORT:
            rc = vwt_import_patterns (handle, infile, onfailure);
            if (rc != VARNAM_SUCCESS) {
                fclose (infile);
                vwt_turn_off_optimization_for_huge_transaction (handle);
                return rc;
            }
            break;
        case -1:
            set_last_error (handle, "Couldn't read file '%s'. Unknown file type", filepath);
            fclose (infile);
            return VARNAM_ERROR;
    }

    fclose (infile);

    varnam_log (handle, "Writing changes to disk");
    rc = vwt_end_changes (handle);
    if (rc != VARNAM_SUCCESS) {
        varnam_log (handle, "Writing changes to disk failed");
        return rc;
    }

    varnam_log (handle, "Ensuring file integrity");
    rc = vwt_turn_off_optimization_for_huge_transaction(handle);
    if (rc) {
        varnam_log (handle, "Failed to check file integrity");
        return rc;
    }

    return VARNAM_SUCCESS;
}
Пример #2
0
int
varnam_set_scheme_details(
    varnam *handle,
    const char *language_code,
    const char *identifier,
    const char *display_name,
    const char *author,
    const char *compiled_date)
{
    int rc;

    set_last_error (handle, NULL);

    if (language_code != NULL && strlen(language_code) > 0)
    {
        if (strlen(language_code) != 2)
        {
            set_last_error (handle, "Language code should be one of ISO 639-1 two letter codes.");
            return VARNAM_ERROR;
        }

        rc = vst_add_metadata (handle, VARNAM_METADATA_SCHEME_LANGUAGE_CODE, language_code);
        if (rc != VARNAM_SUCCESS)
            return rc;

        varnam_log (handle, "Set language code to : %s", language_code);
    }

    if (identifier != NULL && strlen(identifier) > 0)
    {
        rc = vst_add_metadata (handle, VARNAM_METADATA_SCHEME_IDENTIFIER, identifier);
        if (rc != VARNAM_SUCCESS)
            return rc;

        varnam_log (handle, "Set language identifier to : %s", identifier);
    }

    if (display_name != NULL && strlen(display_name) > 0)
    {
        rc = vst_add_metadata (handle, VARNAM_METADATA_SCHEME_DISPLAY_NAME, display_name);
        if (rc != VARNAM_SUCCESS)
            return rc;

        varnam_log (handle, "Set language display name to : %s", display_name);
    }

    if (author != NULL && strlen(author) > 0)
    {
        rc = vst_add_metadata (handle, VARNAM_METADATA_SCHEME_AUTHOR, author);
        if (rc != VARNAM_SUCCESS)
            return rc;

        varnam_log (handle, "Set author to : %s", author);
    }

    if (compiled_date != NULL && strlen(compiled_date) > 0)
    {
        rc = vst_add_metadata (handle, VARNAM_METADATA_SCHEME_COMPILED_DATE, compiled_date);
        if (rc != VARNAM_SUCCESS)
            return rc;

        varnam_log (handle, "Set compiled date to : %s", compiled_date);
    }

    return VARNAM_SUCCESS;
}
Пример #3
0
int
varnam_learn_from_file(varnam *handle,
                       const char *filepath,
                       vlearn_status *status,
                       void (*callback)(varnam *handle, const char *word, int status_code, void *object),
                       void *object)
{
    int rc;
    FILE *infile;
    char line_buffer[10000];
    strbuf *word;
    varray *word_parts;
    int confidence;
    int parts;

    infile = fopen(filepath, "r");
    if (!infile) {
        set_last_error (handle, "Couldn't open file '%s' for reading.\n", filepath);
        return VARNAM_ERROR;
    }

    if (status != NULL)
    {
        status->total_words = 0;
        status->failed = 0;
    }

    rc = vwt_optimize_for_huge_transaction(handle);
    if (rc) {
        fclose (infile);
        return rc;
    }

    /* Learning from file will be mostly new words. Optimizing for that */
    v_->_config_mostly_learning_new_words = 1;

    varnam_log (handle, "Starting to learn from %s", filepath);
    rc = vwt_start_changes (handle);
    if (rc) {
        vwt_turn_off_optimization_for_huge_transaction(handle);
        fclose (infile);
        return rc;
    }

    while (fgets(line_buffer, sizeof(line_buffer), infile))
    {
        reset_pool (handle);

        word = get_pooled_string (handle);
        strbuf_add (word, trimwhitespace (line_buffer));
        word_parts = strbuf_split (word, handle, ' ');
        parts = varray_length (word_parts);
        if (parts > 0 && parts <= 2)
        {
            confidence = 1;
            if (parts == 2) {
                word = varray_get (word_parts, 1);
                confidence = atoi (strbuf_to_s (word));
            }

            word = varray_get (word_parts, 0);
            rc = varnam_learn_internal (handle, strbuf_to_s (word), confidence);
            if (rc) {
                if (status != NULL) status->failed++;
            }
        }
        else {
            rc = VARNAM_ERROR;
            if (status != NULL) status->failed++;
        }

        if (status   != NULL) status->total_words++;
        if (callback != NULL) callback (handle, strbuf_to_s (word), rc, object);
    }

    varnam_log (handle, "Writing changes to disk");
    rc = vwt_end_changes (handle);
    if (rc) {
        varnam_log (handle, "Writing changes to disk failed");
    }

    varnam_log (handle, "Ensuring file integrity");
    rc = vwt_turn_off_optimization_for_huge_transaction(handle);
    if (rc) {
        varnam_log (handle, "Failed to check file integrity");
    }

    varnam_log (handle, "Compacting file");
    rc = vwt_compact_file (handle);
    if (rc) return rc;

    fclose (infile);
    return rc;
}