static bool remove_low_priority_tokens(varray *tokens) { int i, j, to_remove[100], total_to_remove = 0; varray *item; vtoken *t; for (i = 0; i < varray_length (tokens); i++) { item = varray_get (tokens, i); if (varray_length (item) == 1) continue; for (j = 0; j < varray_length (item); j++) { t = varray_get (item, j); if (t->priority <= VARNAM_TOKEN_PRIORITY_LOW) { to_remove[total_to_remove++] = j; } } for (j = 0; j < total_to_remove; j++) { varray_remove_at (item, to_remove[j] - j); } total_to_remove = 0; if (get_total_possible_patterns (tokens) <= MAXIMUM_PATTERNS_TO_LEARN) { return true; } } return false; }
static varray* get_next_array(varray *tokens) { varray *item, *candidate = NULL; int i; bool same_priority = true; vtoken *last, *lowest = NULL; for (i = 0; i < varray_length (tokens); i++) { item = varray_get (tokens, i); if (varray_length (item) == 1) continue; last = varray_get (item, varray_length (item) - 1); if (lowest == NULL || last->priority < lowest->priority) { lowest = last; candidate = item; } if (last->priority != lowest->priority) { same_priority = false; } } if (same_priority) { candidate = get_largest_array (tokens); if (varray_length (candidate) == 1) return NULL; } return candidate; }
static void apply_acceptance_condition(varray *tokens) { int i, j, to_remove[100], total_to_remove = 0, state, empty_arrays[100], empty_arrays_index = 0; varray *item; vtoken *t; for (i = 0; i < varray_length (tokens); i++) { if (i == 0) state = VARNAM_TOKEN_ACCEPT_IF_STARTS_WITH; else if ((i + 1) == varray_length (tokens)) state = VARNAM_TOKEN_ACCEPT_IF_ENDS_WITH; else state = VARNAM_TOKEN_ACCEPT_IF_IN_BETWEEN; item = varray_get (tokens, i); for (j = 0; j < varray_length (item); j++) { t = varray_get (item, j); switch (t->type) { case VARNAM_TOKEN_VIRAMA: case VARNAM_TOKEN_VISARGA: case VARNAM_TOKEN_ANUSVARA: case VARNAM_TOKEN_NON_JOINER: case VARNAM_TOKEN_NUMBER: to_remove[total_to_remove++] = j; break; default: if (varray_length (item) != 1 && t->accept_condition != VARNAM_TOKEN_ACCEPT_ALL && t->accept_condition != state) { to_remove[total_to_remove++] = j; } break; } } for (j = 0; j < total_to_remove; j++) { /* to_remove[j] - j is required to calculate the new index as deleting each item changes index */ varray_remove_at (item, to_remove[j] - j); } if (varray_length (item) == 0) { /* This happens when all the items in this list is VIRAMA, VISARGA etc */ empty_arrays[empty_arrays_index++] = i; } total_to_remove = 0; } for (i = 0; i < empty_arrays_index; i++) { varray_remove_at (tokens, empty_arrays[i] - i); } }
static bool can_learn_from_tokens (varnam *handle, varray *tokens, const char *word) { bool all_vowels = true, unknown_tokens = false; int i, j, repeating_tokens = 0, last_token_id = 0; vtoken *t, *unknown_token; varray *array; if (varray_length (tokens) < 2) { set_last_error (handle, "Nothing to learn from '%s'", word); return false; } for (i = 0; i < varray_length (tokens); i++) { array = varray_get (tokens, i); for (j = 0; j < varray_length (array); j++) { t = varray_get (array, j); if (t->type != VARNAM_TOKEN_VOWEL) all_vowels = false; if (t->type == VARNAM_TOKEN_OTHER) { unknown_tokens = true; unknown_token = t; goto done; } if (last_token_id == t->id) { ++repeating_tokens; } else { repeating_tokens = 0; last_token_id = t->id; } } } done: if (all_vowels) { set_last_error (handle, "Word contains only vowels. Nothing to learn from '%s'", word); return false; } else if (unknown_tokens) { set_last_error (handle, "Can't process '%s'. One or more characters in '%s' are not known", unknown_token->pattern, word); return false; } else if (repeating_tokens >= 3) { set_last_error (handle, "'%s' looks incorrect. Not learning anything", word); return false; } return true; }
static int get_total_possible_patterns(varray *tokens) { int total = 1, i = 0; varray *item; for (i = 0; i < varray_length (tokens); i++) { item = varray_get (tokens, i); total *= varray_length (item); } return total; }
void model_draw(struct Model *model, float *mMat, float *vMat, float *pMat) { glEnable(GL_CULL_FACE); glEnable(GL_DEPTH_TEST); mat4 t_matrix; mat4_identity(t_matrix); mat4_mult(mMat, t_matrix, t_matrix); mat4_mult(vMat, t_matrix, t_matrix); mat4_mult(pMat, t_matrix, t_matrix); //mat4_transpose(t_matrix); int i; for(i = 0; i < varray_length(&model->features); i++) { static int FALSE = 0; const ModelFeature *feature = varray_get(&model->features, i); glbProgramTexture(glbdrawmodel, GLB_FRAGMENT_SHADER, 0, feature->color); glbProgramUniformMatrix(glbdrawmodel, GLB_VERTEX_SHADER, 0, sizeof(int), true, &FALSE); glbProgramUniformMatrix(glbdrawmodel, GLB_VERTEX_SHADER, 1, sizeof(float[16]), true, t_matrix); glbProgramDrawIndexed(glbdrawmodel, feature->mesh->vbuffer, feature->mesh->ibuffer); } }
static varray* get_largest_array(varray *tokens) { int i; varray *item = NULL, *largest = NULL; for (i = 0; i < varray_length (tokens); i++) { item = varray_get (tokens, i); if (largest == NULL || varray_length (item) > varray_length (largest)) { largest = item; } } return largest; }
void _varray_dump(varray_t* v) { int i; for (i = 0; i < varray_length(v); i++) { printf("'%s', ", (char*)varray_get(v, i)); } printf("\n"); }
void varray_clear(varray *array) { int i; for(i = 0; i < varray_length(array); i++) { array->memory[i] = NULL; } array->used = 0; array->index = -1; }
END_TEST START_TEST(test_sort) { srand(12345); int i, j; uint64_t t1, t2, tt; tt = 0; int num_repeats = 10; int max_size = 20000; for(i = 0; i < num_repeats; i++) { varray_t* v = varray_new(); int size = _random64() % max_size; for(j = 0; j < size; j++) { varray_add(v, _new_int64(_random64())); } t1 = gettimeusec(); varray_sort(v, _cmp_int64); t2 = gettimeusec(); tt += t2 - t1; for(j = 0; j < varray_length(v) - 1; j++) { fail_unless(_cmp_int64( varray_get(v, j), varray_get(v, j+1) ) <= 0, "array not sorted correctly"); } for(j = varray_length(v) - 1; j >= 0; j--) { free( varray_get(v, j) ); varray_remove(v, j); } varray_free(v); } if (verbose) { printf("sort loop: %f /sec (%d repeats, %d array size)\n", (1000000*(double)num_repeats)/tt, num_repeats, max_size/2); } }
bool varray_exists (varray *array, void *item, bool (*equals)(void *left, void *right)) { int i; for (i = 0; i < varray_length (array); i++) { if (equals(varray_get (array, i), item)) return true; } return false; }
// expects a NULL-terminated string array int _varray_cmp_to_string_array(varray_t* v, const char** strs) { int i = 0; int result = 0; while (strs[i] != NULL) { char* e = (char*)varray_get(v, i); if (e == NULL) { result = -2; continue; } if (result == 0) { result = strcmp(e, strs[i]); } i++; } if (varray_length(v) != i) { result = -3; } return result; }
void varray_copy(varray *source, varray *destination) { int i; void *item; if (source == NULL) return; if (destination == NULL) return; for (i = 0; i < varray_length (source); i++) { item = varray_get (source, i); varray_push (destination, item); } }
END_TEST START_TEST (indic_digit_rendering) { int rc; vword* word; varray *words; rc = varnam_transliterate (varnam_instance, "01", &words); assert_success (rc); ck_assert_int_eq (varray_length (words), 1); word = varray_get (words, 0); ck_assert_str_eq (word->text, "01"); rc = varnam_config (varnam_instance, VARNAM_CONFIG_USE_INDIC_DIGITS, 1); assert_success (rc); rc = varnam_transliterate (varnam_instance, "01", &words); assert_success (rc); ck_assert_int_eq (varray_length (words), 1); word = varray_get (words, 0); ck_assert_str_eq (word->text, "०१"); }
END_TEST START_TEST (cancellation_character_should_force_independent_vowel_form) { int rc; vword* word; varray *words; rc = varnam_transliterate(varnam_instance, "aa_a", &words); assert_success (rc); ck_assert_int_eq (varray_length(words), 1); word = varray_get(words, 0); ck_assert_str_eq (word->text, "aa-value1a-value1"); }
END_TEST START_TEST (dependent_vowel_rendering) { int rc; vword* word; varray *words; rc = varnam_transliterate(varnam_instance, "aaa", &words); assert_success (rc); ck_assert_int_eq (varray_length(words), 1); word = varray_get(words, 0); ck_assert_str_eq (word->text, "aa-value1a-value2"); }
void varray_remove_at(varray *array, int index) { int i, len; if (index < 0 || index > array->index) return; len = varray_length(array); for(i = index + 1; i < len; i++) { array->memory[index++] = array->memory[i]; } array->used--; array->index--; }
END_TEST START_TEST (basic_learning) { int rc; varray *words; const char *word_to_learn = "കഖ"; rc = varnam_learn (varnam_instance, word_to_learn); assert_success (rc); /* Here gha is a possibility. But since it is learned, it will be suggested back */ rc = varnam_transliterate (varnam_instance, "kagha", &words); assert_success (rc); ck_assert_int_eq (varray_length (words), 2); ensure_word_list_contains (words, word_to_learn); }
END_TEST START_TEST (confidence_should_get_updated_for_existing_words) { int rc; varray* words; vword* word; const char *word_to_learn = "കഖ"; rc = varnam_learn (varnam_instance, word_to_learn); assert_success (rc); rc = varnam_learn (varnam_instance, word_to_learn); assert_success (rc); rc = varnam_transliterate (varnam_instance, "kagha", &words); assert_success (rc); ck_assert_int_eq (varray_length (words), 2); word = varray_get (words, 0); ck_assert_int_eq (word->confidence, 2); }
void ensure_word_list_contains(varray *words, const char *word) { int i = 0, found = 0; vword *w; strbuf *error; for (i = 0; i < varray_length (words); i++) { w = varray_get (words, i); if (strcmp (w->text, word) == 0) { found = 1; break; } } if (!found) { error = strbuf_init (50); strbuf_addf (error, "Expected word list to contain '%s'", word); ck_abort_msg (strbuf_to_s (error)); } }
void varray_free(varray *array, void (*destructor)(void*)) { int i; void *item; if (array == NULL) return; if (destructor != NULL) { for(i = 0; i < varray_length(array); i++) { item = varray_get (array, i); if (item != NULL) destructor(item); } } if (array->memory != NULL) free(array->memory); free(array); }
bool varray_is_empty (varray *array) { return (varray_length (array) == 0); }
int varnam_learn_from_file(varnam *handle, const char *filepath, vlearn_status *status, void (*callback)(varnam *handle, const char *word, int status_code, void *object), void *object) { int rc; FILE *infile; char line_buffer[10000]; strbuf *word; varray *word_parts; int confidence; int parts; infile = fopen(filepath, "r"); if (!infile) { set_last_error (handle, "Couldn't open file '%s' for reading.\n", filepath); return VARNAM_ERROR; } if (status != NULL) { status->total_words = 0; status->failed = 0; } rc = vwt_optimize_for_huge_transaction(handle); if (rc) { fclose (infile); return rc; } /* Learning from file will be mostly new words. Optimizing for that */ v_->_config_mostly_learning_new_words = 1; varnam_log (handle, "Starting to learn from %s", filepath); rc = vwt_start_changes (handle); if (rc) { vwt_turn_off_optimization_for_huge_transaction(handle); fclose (infile); return rc; } while (fgets(line_buffer, sizeof(line_buffer), infile)) { reset_pool (handle); word = get_pooled_string (handle); strbuf_add (word, trimwhitespace (line_buffer)); word_parts = strbuf_split (word, handle, ' '); parts = varray_length (word_parts); if (parts > 0 && parts <= 2) { confidence = 1; if (parts == 2) { word = varray_get (word_parts, 1); confidence = atoi (strbuf_to_s (word)); } word = varray_get (word_parts, 0); rc = varnam_learn_internal (handle, strbuf_to_s (word), confidence); if (rc) { if (status != NULL) status->failed++; } } else { rc = VARNAM_ERROR; if (status != NULL) status->failed++; } if (status != NULL) status->total_words++; if (callback != NULL) callback (handle, strbuf_to_s (word), rc, object); } varnam_log (handle, "Writing changes to disk"); rc = vwt_end_changes (handle); if (rc) { varnam_log (handle, "Writing changes to disk failed"); } varnam_log (handle, "Ensuring file integrity"); rc = vwt_turn_off_optimization_for_huge_transaction(handle); if (rc) { varnam_log (handle, "Failed to check file integrity"); } varnam_log (handle, "Compacting file"); rc = vwt_compact_file (handle); if (rc) return rc; fclose (infile); return rc; }