double test_accuracy(char *filename) {
    language_classifier_data_set_t *data_set = language_classifier_data_set_init(filename);
    if (data_set == NULL) {
        log_error("Error creating data set\n");
        exit(EXIT_FAILURE);
    }

    language_classifier_minibatch_t *minibatch;

    uint32_t correct = 0;
    uint32_t total = 0;

    language_classifier_t *classifier = get_language_classifier();
    trie_t *label_ids = trie_new_from_cstring_array(classifier->labels);

    while (language_classifier_data_set_next(data_set)) {
        char *address = char_array_get_string(data_set->address);
        char *language = char_array_get_string(data_set->language);
        
        uint32_t label_id;
        if (!trie_get_data(label_ids, language, &label_id)) {
            continue;
        }

        language_classifier_response_t *response = classify_languages(address);
        if (response == NULL || response->num_languages == 0) {
            printf("%s\tNULL\t%s\n", language, address);
            continue;
        }

        char *top_lang = response->languages[0];

        if (string_equals(top_lang, language)) {
            correct++;
        } else {
            printf("%s\t%s\t%s\n", language, top_lang, address);
        }

        total++;

        language_classifier_response_destroy(response);

    }

    log_info("total=%zu\n", total);

    trie_destroy(label_ids);

    return (double) correct / total;


}
Example #2
0
static inline bool crf_get_feature_id(crf_t *self, char *feature, uint32_t *feature_id) {
    return trie_get_data(self->state_features, feature, feature_id);
}
static inline bool averaged_perceptron_get_feature_id(averaged_perceptron_t *self, char *feature, uint32_t *feature_id) {
    return trie_get_data(self->features, feature, feature_id);
}
bool address_dictionary_add_expansion(char *name, char *language, address_expansion_t expansion) {
    if (address_dict == NULL || address_dict->values == NULL) {
        log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
        return false;
    }

    if (name == NULL) return false;

    char *key;

    bool is_prefix = false;
    bool is_suffix = false;
    bool is_phrase = false;

    for (size_t i = 0; i < expansion.num_dictionaries; i++) {
        dictionary_type_t dict = expansion.dictionary_ids[i];
        if (dict == DICTIONARY_CONCATENATED_SUFFIX_SEPARABLE || 
            dict == DICTIONARY_CONCATENATED_SUFFIX_INSEPARABLE) {
            is_suffix = true;
        } else if (dict == DICTIONARY_CONCATENATED_PREFIX_SEPARABLE ||
                   dict == DICTIONARY_ELISION) {
            is_prefix = true;
        } else {
            is_phrase = true;
        }
    }

    char_array *array = char_array_new_size(strlen(name));
    if (array == NULL) {
        return false;    
    }

    if (language != NULL) {
        char_array_cat(array, language);
        char_array_cat(array, NAMESPACE_SEPARATOR_CHAR);
    }

    if (!is_suffix && !is_prefix) {
        char_array_cat(array, name);
    } else if (is_prefix) {
        char_array_cat(array, TRIE_PREFIX_CHAR);
        char_array_cat(array, name);
    } else if (is_suffix) {
        char_array_cat(array, TRIE_SUFFIX_CHAR);
        char_array_cat_reversed(array, name);
    }

    key = char_array_to_string(array);

    log_debug("key=%s\n", key);

    uint32_t expansion_index;
    address_expansion_value_t *value;

    if (trie_get_data(address_dict->trie, key, &expansion_index)) {
        value = address_dict->values->a[expansion_index];
        value->components |= expansion.address_components;
        address_expansion_array_push(value->expansions, expansion);
    } else {
        value = address_expansion_value_new_with_expansion(expansion);
        expansion_index = (uint32_t)address_dict->values->n;
        address_expansion_value_array_push(address_dict->values, value);

        if (!trie_add(address_dict->trie, key, expansion_index)) {
            log_warn("Key %s could not be added to trie\n", key);
            goto exit_key_created;;
        }
    }

    free(key);

    return true;

exit_key_created:
    free(key);
    return false;
}