int main() { trie* a; char* str=(char*)malloc(10); char* key=(char*)malloc(10); int ch,res; a=init(); do { printf("\n1.Insert into trie\n2.Search for a word in trie\n3.Exit\nEnter choice:\n"); scanf("%d",&ch); switch(ch) { case 1: printf("Enter the word to be inserted\n"); scanf("%s",str); trie_add(a,str); break; case 2: printf("Enter the word to be searched\n"); scanf("%s",key); res=trie_lookup(a,key); if(res!=0) printf("Word present!\n"); else printf("Word not in trie!\n"); break; case 3: printf("Exitting.....\n"); break; } }while(ch!=3); return 0; }
void trie(FILE *pool,FILE *check,FILE *result) { TRIE *head = trie_create(); char line[BUFFERSIZE]; int exitflag=0; int i; while(fgets(line,BUFFERSIZE,pool)) { /*delete the useless character '\r'*/ exitflag = trimString(line); if(!exitflag) { reverseString(line); trie_add(&head,line); } else { /*printf("Error email %s",line);*/ continue; } } while(fgets(line,BUFFERSIZE,check)) { i = 0; while(line[i]!='\r' && line[i]!='\n') i++; line[i] = '\0'; exitflag = trimString(line); if(!exitflag) { reverseString(line); if(trie_check(&head,line)) { fprintf(result,"yes\n"); } else { fprintf(result,"no\n"); } } } trie_destroy(&head); }
/* Interface. */ int atms_add_node(atms tms, const signed char assumption, const signed char contradiction) { atms_node node = (atms_node)malloc(sizeof(struct str_atms_node)); if (NULL == node) { return -1; } memset(node, 0, sizeof(struct str_atms_node)); node->index = tms->nodes->sz; node->assumption = assumption; node->contradiction = contradiction; node->consequences = array_new(NULL, NULL); node->label = trie_new((trie_node_destroy_func_t)array_free, (trie_node_clone_func_t)array_copy); array_append(tms->nodes, node); if (assumption) { /* The label of this node is a singleton environment with this node only. */ array env = array_append(array_new(NULL, NULL), node); array key = get_environment_key(env); trie_add(node->label, key, env); array_free(key); } return node->index - 1; }
void add_deligatured_word( unsigned char *word, Trie *deligatured_words ) { int *ligature_positions; int *ligature_ids; int ligature_count = word_ligatures( word, &ligature_positions, &ligature_ids ); // printf( "%s (%d)\n", word, ligature_count ); if ( ligature_count > 0 ) { unsigned char *deligatured_word; deligatured_word = mark_ligatures( word, ligature_positions, ligature_ids, ligature_count ); /* printf( "%s ==> %s\n", word, deligatured_word ); */ // warn user when 2 "real" words map to the same deligatured word Trie *node = trie_contains( deligatured_words, deligatured_word ); unsigned char *buf = 0; if ( node ) { unsigned char* existing_word = ( (LigatureData*) node->data )->word; // TODO: after a 2nd "real" source word is added to // node->data->word, reoccurring source words will be repeated // in our concatenated list; not a big deal, since this entry // will have to be edited by the user anyway, but we could do // better... if ( strcmp( word, existing_word ) ) { // if real words are different... // we will remove the existing node and replace it with one // whose "real" word is a concatenation of both words; the // user will be required to edit the result in the generated // file fprintf( stderr, "warning: deligatured word '%s' has multiple source words: %s, %s; EDIT FILE!\n", deligatured_word, word, existing_word ); buf = (unsigned char*) malloc( 2 + strlen( existing_word ) + 1 + strlen( word ) + 1 ); buf[0] = 0; strcat( buf, "{" ); strcat( buf, existing_word ); strcat( buf, "|" ); strcat( buf, word ); strcat( buf, "}" ); word = buf; trie_remove( deligatured_words, deligatured_word, _free_ligature_data_callback2 ); } } LigatureData *lig_data = (LigatureData*) malloc( sizeof( LigatureData ) ); lig_data->word = (unsigned char*) strdup( word ); lig_data->ligature_positions = ligature_positions; lig_data->ligature_ids = ligature_ids; lig_data->ligature_count = ligature_count; trie_add( deligatured_words, deligatured_word, lig_data ); free( deligatured_word ); if ( buf ) { free( buf ); } } }
void trie_addfile(trie* t, char* filename) { FILE *f = fopen(filename, "r"); while (!feof(f)) { char* line = (char*)malloc(32 * sizeof(char)); fgets(line, 32, f); int i = 0; for (i = 0; line[i] != '\n' && line[i] != '\0'; i++); if (i > 0 && i < 32) trie_add(t, line, i); free(line); } fclose(f); }
void trie(FILE *pool,FILE *check,FILE *result) { clock_t start,end,start1; start = clock(); TRIE *head = trie_create(); char line[BUFFERSIZE]; int count=0; int i=0; int exitflag=0; while(fgets(line,BUFFERSIZE,pool)) { /*delete the useless character '\r'*/ exitflag = trimString(line); if(!exitflag){ reverseString(line); trie_add(&head,line); // if(!(++count%100000)){ // end = clock(); // printf("%d,%f \n",count++,(double)(end -start)/CLOCKS_PER_SEC); // } }else{ /*printf("Error email %s",line);*/ continue; } } //end = clock(); //printf("Creating tree using %f\n",(double)(end -start)/CLOCKS_PER_SEC); start1 = clock(); while(fgets(line,BUFFERSIZE,check)) { i = 0; while(line[i]!='\r' && line[i]!='\n') i++; line[i] = '\0'; exitflag = trimString(line); if(!exitflag){ reverseString(line); if(trie_check(&head,line)) { fprintf(result,"yes\n"); }else { fprintf(result,"no\n"); } } } trie_destroy(&head); end = clock(); printf("%f\n",(double)(end -start)/CLOCKS_PER_SEC); }
/* Initialize our global 'deligatured_words' trie by loading the pre-generated ligatures from a file; see generate_ligatures(). */ int load_ligatures( char *ligature_file ) { FILE *ligatures_fp = fopen( ligature_file, "r" ); if ( !ligatures_fp ) { return 0; } deligatured_words = trie_new( 0, (void*) strdup( "" ) ); unsigned char buf[MAX_WORD_LEN * 2 + 2]; while ( fgets( buf, MAX_WORD_LEN * 2 + 2, ligatures_fp ) ) { unsigned char *deligatured_word = buf; char *delimiter = index( buf, '\t' ); delimiter[0] = 0; unsigned char *real_word = (unsigned char*) strdup( delimiter + 1 ); real_word[strlen( real_word ) - 1] = 0; // chomp newline trie_add( deligatured_words, deligatured_word, real_word ); } fclose( ligatures_fp ); return 1; }
int atms_add_justification(atms tms, const_material_implication cl) { register unsigned int ix; atms_justification justification; trie I; array key; if (NULL == (justification = atms_justification_new())) { return 0; } for (ix = 0; ix < cl->antecedents->sz; ix++) { atms_node node = tms->nodes->arr[cl->antecedents->arr[ix] + 1]; array_append(justification->antecedents, node); array_append(node->consequences, justification); } if (cl->consequent == -1) { justification->consequent = tms->nodes->arr[0]; } else { justification->consequent = tms->nodes->arr[cl->consequent + 1]; } array_append(tms->justifications, justification); I = trie_new((trie_node_destroy_func_t)array_free, (trie_node_clone_func_t)array_copy); /* Append the empty environment. */ key = array_new(NULL, NULL); trie_add(I, key, array_new(NULL, NULL)); array_free(key); propagate(tms, justification, NULL, I); trie_free(I); return 1; }
int main(){ int c; struct node root = trie_nodes[size++]; root.ch = ' '; root.is_leaf = false; scanf("%d", &c); for(int i=0; i<c; i++){ char temp[20000]; char result[9]; scanf("%s", temp); normalize(temp,result); trie_add(&root,result); } char t[9]; for(int i=0; i<10; i++) if(root.children[i]) print_trie_leaf(root.children[i],t,0); if(!has_dup) printf("No duplicates.\n"); return 0; }
gboolean falcon_cache_add(falcon_cache_t *cache, falcon_object_t *object) { trie_node_t *old_node = NULL; falcon_object_t *old = NULL; falcon_object_t *dup = falcon_object_copy(object); g_return_val_if_fail(cache, FALSE); g_return_val_if_fail(object, FALSE); g_mutex_lock(cache->lock); old_node = trie_find(cache->objects, falcon_object_get_name(dup)); if (old_node && (old = trie_data(old_node))) { falcon_object_free(old); trie_set_data(old_node, dup); } else { trie_add(cache->objects, falcon_object_get_name(dup), dup); cache->count++; } g_mutex_unlock(cache->lock); return TRUE; }
int main(int argc, char **argv) { char *filename; if (argc == 2) { filename = argv[1]; } else { filename = DEFAULT_TRANSLITERATION_PATH; } FILE *f = fopen(filename, "wb"); if (f == NULL) { log_error("File could not be opened, ensure directory exists: %s", filename); exit(1); } size_t num_source_transliterators = sizeof(transliterators_source) / sizeof(transliterator_source_t); char *key; size_t key_len; context_type_t pre_context_type; size_t pre_context_max_len; char *pre_context; size_t pre_context_len; context_type_t post_context_type; size_t post_context_max_len; char *post_context; size_t post_context_len; char *replacement; size_t replacement_len; char *revisit; size_t revisit_len; char *group_regex_str; size_t group_regex_len; transliteration_module_init(); transliteration_table_t *trans_table = get_transliteration_table(); trie_t *trie = trans_table->trie; for (int i = 0; i < num_source_transliterators; i++) { transliterator_source_t trans_source = transliterators_source[i]; size_t trans_name_len = strlen(trans_source.name); log_info("Doing transliterator: %s\n", trans_source.name); char_array *trans_key = char_array_from_string(trans_source.name); char_array_cat(trans_key, NAMESPACE_SEPARATOR_CHAR); char *trans_name = strdup(trans_source.name); if (trans_name == NULL) { log_error("strdup returned NULL on trans_source.name\n"); goto exit_teardown; } transliterator_t *trans = transliterator_new(trans_name, trans_source.internal, trans_table->steps->n, trans_source.steps_length); for (int j = 0; j < trans_source.steps_length; j++) { transliteration_step_source_t step_source = steps_source[trans_source.steps_start + j]; size_t step_name_len = strlen(step_source.name); log_debug("Doing step: %s, type=%d\n", step_source.name, step_source.type); if (!transliteration_table_add_step(trans_table, step_source.type, step_source.name)) { log_error("Step couldn't be added\n"); goto exit_teardown; } if (step_source.type != STEP_RULESET) { continue; } char_array *step_key = char_array_from_string(char_array_get_string(trans_key)); char_array_cat(step_key, step_source.name); char_array_cat(step_key, NAMESPACE_SEPARATOR_CHAR); char *step_key_str = char_array_get_string(step_key); size_t step_key_len = strlen(step_key_str); for (int k = 0; k < step_source.rules_length; k++) { transliteration_rule_source_t rule_source = rules_source[step_source.rules_start + k]; key = rule_source.key; key_len = rule_source.key_len; pre_context_type = rule_source.pre_context_type; pre_context_max_len = rule_source.pre_context_max_len; pre_context = rule_source.pre_context; pre_context_len = rule_source.pre_context_len; post_context_type = rule_source.post_context_type; post_context_max_len = rule_source.post_context_max_len; post_context = rule_source.post_context; post_context_len = rule_source.post_context_len; replacement = rule_source.replacement; replacement_len = rule_source.replacement_len; revisit = rule_source.revisit; revisit_len = rule_source.revisit_len; group_regex_str = rule_source.group_regex_str; group_regex_len = rule_source.group_regex_len; uint32_t data = trans_table->replacements->n; char_array *rule_key = char_array_from_string(step_key_str); uint32_t replacement_string_index = cstring_array_num_strings(trans_table->replacement_strings); cstring_array_add_string_len(trans_table->replacement_strings, replacement, replacement_len); uint32_t revisit_index = 0; if (revisit != NULL && revisit_len > 0) { revisit_index = cstring_array_num_strings(trans_table->revisit_strings); cstring_array_add_string_len(trans_table->revisit_strings, revisit, revisit_len); } group_capture_array *groups = parse_groups(group_regex_str, group_regex_len); transliteration_replacement_t *trans_repl = transliteration_replacement_new(replacement_string_index, revisit_index, groups); uint32_t replacement_index = trans_table->replacements->n; transliteration_replacement_array_push(trans_table->replacements, trans_repl); int c; char *token; log_debug("Doing rule: %s\n", key); string_tree_t *tree = regex_string_tree(key, key_len); string_tree_t *pre_context_tree = NULL; string_tree_iterator_t *pre_context_iter = NULL; cstring_array *pre_context_strings = NULL; if (pre_context_type != CONTEXT_TYPE_NONE) { pre_context_strings = cstring_array_new(); } if (pre_context_type == CONTEXT_TYPE_REGEX) { log_debug("pre_context_type == CONTEXT_TYPE_REGEX\n"); pre_context_tree = regex_string_tree(pre_context, pre_context_len); pre_context_iter = string_tree_iterator_new(pre_context_tree); char_array *pre_context_perm = char_array_new_size(pre_context_len); for (; !string_tree_iterator_done(pre_context_iter); string_tree_iterator_next(pre_context_iter)) { char_array_clear(pre_context_perm); for (c = 0; c < pre_context_iter->num_tokens; c++) { token = string_tree_iterator_get_string(pre_context_iter, c); if (token == NULL || strlen(token) == 0) { log_warn("pre_token_context is NULL or 0 length: %s\n", token); } char_array_cat(pre_context_perm, token); } token = char_array_get_string(pre_context_perm); if (token == NULL || strlen(token) == 0) { log_warn("pre_perm is NULL or 0 length\n"); } cstring_array_add_string(pre_context_strings, token); } char_array_destroy(pre_context_perm); string_tree_iterator_destroy(pre_context_iter); string_tree_destroy(pre_context_tree); } else if (pre_context_type == CONTEXT_TYPE_STRING) { if (pre_context == NULL || strlen(pre_context) == 0) { log_warn("pre_context STRING NULL or 0 length\n"); } cstring_array_add_string(pre_context_strings, pre_context); } else if (pre_context_type == CONTEXT_TYPE_WORD_BOUNDARY) { cstring_array_add_string(pre_context_strings, WORD_BOUNDARY_CHAR); } size_t num_pre_context_strings = 0; if (pre_context_type != CONTEXT_TYPE_NONE) { num_pre_context_strings = cstring_array_num_strings(pre_context_strings); log_debug("num_pre_context_strings = %zu\n", num_pre_context_strings); } string_tree_t *post_context_tree = NULL; string_tree_iterator_t *post_context_iter = NULL; cstring_array *post_context_strings = NULL; if (post_context_type != CONTEXT_TYPE_NONE) { post_context_strings = cstring_array_new(); } if (post_context_type == CONTEXT_TYPE_REGEX) { log_debug("post_context_type == CONTEXT_TYPE_REGEX\n"); post_context_tree = regex_string_tree(post_context, post_context_len); post_context_iter = string_tree_iterator_new(post_context_tree); char_array *post_context_perm = char_array_new_size(post_context_len); for (; !string_tree_iterator_done(post_context_iter); string_tree_iterator_next(post_context_iter)) { char_array_clear(post_context_perm); for (c = 0; c < post_context_iter->num_tokens; c++) { token = string_tree_iterator_get_string(post_context_iter, c); if (token == NULL) { log_error ("post_token_context is NULL\n"); } else if (strlen(token) == 0) { log_error("post_token_context is 0 length\n"); } char_array_cat(post_context_perm, token); } cstring_array_add_string(post_context_strings, char_array_get_string(post_context_perm)); } char_array_destroy(post_context_perm); string_tree_iterator_destroy(post_context_iter); string_tree_destroy(post_context_tree); } else if (post_context_type == CONTEXT_TYPE_STRING) { if (post_context == NULL || strlen(post_context) == 0) { log_error("post_context STRING NULL or 0 length\n"); } cstring_array_add_string(post_context_strings, post_context); } else if (post_context_type == CONTEXT_TYPE_WORD_BOUNDARY) { cstring_array_add_string(post_context_strings, WORD_BOUNDARY_CHAR); } size_t num_post_context_strings = 0; if (post_context_type != CONTEXT_TYPE_NONE) { num_post_context_strings = cstring_array_num_strings(post_context_strings); log_debug("num_post_context_strings = %zu\n", num_post_context_strings); } cstring_array *context_strings = NULL; size_t num_context_strings = 0; char *context_start_char = NULL; bool combined_context_strings = false; int ante, post; if (num_pre_context_strings > 0 && num_post_context_strings > 0) { context_start_char = PRE_CONTEXT_CHAR; combined_context_strings = true; size_t max_string_size = 2 * MAX_UTF8_CHAR_SIZE + ((pre_context_max_len * MAX_UTF8_CHAR_SIZE) * (post_context_max_len * MAX_UTF8_CHAR_SIZE)); num_context_strings = num_pre_context_strings * num_post_context_strings; char_array *context = char_array_new_size(max_string_size); context_strings = cstring_array_new_size(num_context_strings * max_string_size + num_context_strings); for (ante = 0; ante < num_pre_context_strings; ante++) { char_array_clear(context); token = cstring_array_get_string(pre_context_strings, ante); if (token == NULL || strlen(token) == 0) { log_error("pre_context token was NULL or 0 length\n"); goto exit_teardown; } char_array_cat(context, token); size_t context_len = strlen(char_array_get_string(context)); for (post = 0; post < num_post_context_strings; post++) { context->n = context_len; char_array_cat(context, POST_CONTEXT_CHAR); token = cstring_array_get_string(post_context_strings, post); char_array_cat(context, token); if (token == NULL || strlen(token) == 0) { log_error("post_context token was NULL or 0 length\n"); goto exit_teardown; } token = char_array_get_string(context); cstring_array_add_string(context_strings, token); } } char_array_destroy(context); } else if (num_pre_context_strings > 0) { context_start_char = PRE_CONTEXT_CHAR; num_context_strings = num_pre_context_strings; context_strings = pre_context_strings; } else if (num_post_context_strings > 0) { context_start_char = POST_CONTEXT_CHAR; num_context_strings = num_post_context_strings; context_strings = post_context_strings; } if (num_context_strings > 0) { log_debug("num_context_strings = %zu\n", num_context_strings); } if (tree == NULL) { log_error("Tree was NULL, rule=%s\n", key); goto exit_teardown; } string_tree_iterator_t *iter = string_tree_iterator_new(tree); log_debug("iter->remaining=%d\n", iter->remaining); char *key_str; for (; !string_tree_iterator_done(iter); string_tree_iterator_next(iter)) { rule_key->n = step_key_len; for (c = 0; c < iter->num_tokens; c++) { token = string_tree_iterator_get_string(iter, c); if (token == NULL) { log_error("string_tree_iterator_get_string was NULL: %s\n", key); goto exit_teardown; } char_array_cat(rule_key, token); log_debug("string_tree token was %s\n", token); } log_debug("rule_key=%s\n", char_array_get_string(rule_key)); size_t context_key_len; if (num_context_strings == 0) { token = char_array_get_string(rule_key); if (trie_get(trie, token) == NULL_NODE_ID) { trie_add(trie, token, replacement_index); } else { log_warn("Key exists: %s, skipping\n", token); } } else { char_array_cat(rule_key, context_start_char); context_key_len = strlen(char_array_get_string(rule_key)); for (c = 0; c < num_context_strings; c++) { rule_key->n = context_key_len; token = cstring_array_get_string(context_strings, c); if (token == NULL) { log_error("token was NULL for c=%d\n", c); } char_array_cat(rule_key, token); token = char_array_get_string(rule_key); if (trie_get(trie, token) == NULL_NODE_ID) { trie_add(trie, token, replacement_index); } else { log_warn("Key exists: %s, skipping\n", token); } } } } string_tree_iterator_destroy(iter); string_tree_destroy(tree); char_array_destroy(rule_key); if (pre_context_strings != NULL) { cstring_array_destroy(pre_context_strings); } if (post_context_strings != NULL) { cstring_array_destroy(post_context_strings); } // Only needed if we created a combined context array if (combined_context_strings) { cstring_array_destroy(context_strings); } } char_array_destroy(step_key); } char_array_destroy(trans_key); if (!transliteration_table_add_transliterator(trans)) { goto exit_teardown; } } size_t num_source_scripts = sizeof(script_transliteration_rules) / sizeof(script_transliteration_rule_t); for (int i = 0; i < num_source_scripts; i++) { script_transliteration_rule_t rule = script_transliteration_rules[i]; if (!transliteration_table_add_script_language(rule.script_language, rule.index)) { goto exit_teardown; } transliterator_index_t index = rule.index; for (int j = index.transliterator_index; j < index.transliterator_index + index.num_transliterators; j++) { char *trans_name = script_transliterators[j]; if (trans_name == NULL) { goto exit_teardown; } cstring_array_add_string(trans_table->transliterator_names, trans_name); } } transliteration_table_write(f); fclose(f); transliteration_module_teardown(); log_info("Done!\n"); exit(EXIT_SUCCESS); exit_teardown: log_error("FAIL\n"); transliteration_module_teardown(); exit(EXIT_FAILURE); }
tv_nf pi_tison_trie(const_tv_nf cf) { unsigned int i, l, q, x, y, z; tv_nf result = truncate_copy_nf(cf); tv_literal_set_list input = get_literal_sets(cf); tv_literal_set_list output = get_literal_sets(result); tv_literal_set model; stack s, sx, sy; trie_node c, cx, cy; trie db = trie_new((trie_node_destroy_func_t)array_free, (trie_node_clone_func_t)array_copy); for (i = 0; i < input->sz; i++) { array l = literal_set_to_sorted_int_list(input->arr[i]); if (trie_is_subsumed(db, l)) { array_free(l); continue; } trie_remove_subsumed(db, l); trie_add(db, l, array_copy(l)); array_free(l); } trie_gc(db); /* We have the literal_sets in the trie now. */ if (db->root->edges == NULL) { trie_free(db); rfre_tv_nf(result); return rdup_tv_nf(cf); } for (l = 0; l < cf->variables->sz; l++) { sx = stack_new(NULL, NULL); /* Outer walk. */ stack_push(sx, db->root); while (NULL != (cx = stack_pop(sx))) { for (x = 0; x < cx->edges->sz; x++) { if (((trie_node)cx->kids->arr[x])->is_deleted) { continue; } if (((trie_node)cx->kids->arr[x])->is_terminal) { /* Inner walk. */ sy = stack_new(NULL, NULL); for (q = 0; q < sx->sz; q++) { stack_push(sy, sx->arr[q]); } stack_push(sy, cx); /* Finish the c level. */ z = x + 1; while (NULL != (cy = stack_pop(sy))) { for (y = z; y < cy->edges->sz; y++) { if (((trie_node)cy->kids->arr[y])->is_deleted) { continue; } if (((trie_node)cy->kids->arr[y])->is_terminal) { array r = resolve_int_list_literal(((trie_node)cx->kids->arr[x])->value, ((trie_node)cy->kids->arr[y])->value, l * 2); if (NULL == r) { continue; } if (trie_is_subsumed(db, r)) { array_free(r); continue; } trie_remove_subsumed(db, r); trie_add(db, r, array_copy(r)); array_free(r); } if (((trie_node)cy->kids->arr[y])->edges != NULL) { stack_push(sy, cy->kids->arr[y]); } } z = 0; } stack_free(sy); /* End of the inner walk. */ } if (((trie_node)cx->kids->arr[x])->edges != NULL) { stack_push(sx, cx->kids->arr[x]); } } } /* End of the outer walk. */ stack_free(sx); trie_gc(db); } assert(input->sz > 0); model = rdup_tv_literal_set(input->arr[0]); rfre_int_list(model->pos); rfre_int_list(model->neg); model->pos = int_listNIL; model->neg = int_listNIL; /* Convert the trie back to a clausal form. */ s = stack_new(NULL, NULL); stack_push(s, db->root); while (NULL != (c = stack_pop(s))) { for (i = 0; i < c->edges->sz; i++) { if (((trie_node)c->kids->arr[i])->is_terminal) { append_tv_literal_set_list(output, int_list_to_literal_set(((trie_node)c->kids->arr[i])->value, model)); } if (((trie_node)c->kids->arr[i])->edges != NULL) { stack_push(s, c->kids->arr[i]); } } } stack_free(s); trie_free(db); fre_tv_literal_set(model); set_literal_sets(result, output); return result; }
void addEventGroupFields(mxArray* mxTrial, mxArray* mxGroupMeta, const GroupInfo* pg, unsigned trialIdx, timestamp_t timeTrialStart, bool useGroupPrefix, unsigned groupMetaIndex) { // field names will be groupName_<eventName> // but the signal always comes in as .eventName and the contents are the name // of the event // // so: build up a trie where the eventName is the key and a TimestampBuffer is the value Trie* eventTrie = trie_create(); Trie* trieNode; // get timestamp buffer from group buffer const TimestampBuffer* groupTimestamps = pg->tsBuffers + trialIdx; const char* groupName = pg->name; // for now check that the event group has only 1 signal and it's type is EventName bool printError = false; if(pg->nSignals != 1) printError = true; else if(pg->signals[0]->type != SIGNAL_TYPE_EVENTNAME) printError = true; if(printError) { logError("Event groups must have 1 signal of type event name"); return; } const SignalDataBuffer*psdb = pg->signals[0]; const SampleBuffer* ptb = psdb->buffers + trialIdx; char eventName[MAX_SIGNAL_NAME]; char* dataPtr = (char*)ptb->data; for(unsigned iSample = 0; iSample < ptb->nSamples; iSample++) { // first copy string into buffer, then zero terminate it unsigned bytesThisSample = ptb->bytesEachSample[iSample]; // TODO add overflow detection memcpy(eventName, dataPtr, bytesThisSample); dataPtr += bytesThisSample; eventName[bytesThisSample] = '\0'; //logError("Event %s\n", eventName); // search for this eventName in the trie EventTrieInfo* info = (EventTrieInfo*)trie_lookup(eventTrie, eventName); if(info == NULL) { // doesn't exist, give it a TimestampBuffer info = (EventTrieInfo*)CALLOC(sizeof(EventTrieInfo), 1); strncpy(info->eventName, eventName, MAX_SIGNAL_NAME); trie_add(eventTrie, eventName, info); } // push this timestamp to the buffer bool success = pushTimestampToTimestampBuffer(&info->tsBuffer, groupTimestamps->timestamps[iSample]); if(!success) { logError("Issue building event fields\n"); return; } } // now iterate over the eventName trie and add each field unsigned nEventNames = trie_count(eventTrie); mxArray* mxSignalNames = mxCreateCellMatrix(nEventNames, 1); unsigned iEvent = 0; unsigned fieldNum = 0; trieNode = trie_get_first(eventTrie); char fieldName[MAX_SIGNAL_NAME]; while(trieNode != NULL) { EventTrieInfo* info = (EventTrieInfo*)trieNode->value; // build the groupName_eventName field name if(useGroupPrefix) snprintf(fieldName, MAX_SIGNAL_NAME, "%s_%s", groupName, info->eventName); else strncpy(fieldName, info->eventName, MAX_SIGNAL_NAME); // store the name of the field in the cell array mxSetCell(mxSignalNames, iEvent, mxCreateString(fieldName)); // copy timestamps from buffer to double vector mxArray* mxTimestamps = mxCreateNumericMatrix(info->tsBuffer.nSamples, 1, mxDOUBLE_CLASS, mxREAL); // subtract off trial start time and convert to ms, rounding at ms double_t* buffer = (double_t*)mxGetData(mxTimestamps); for(unsigned i = 0; i < info->tsBuffer.nSamples; i++) buffer[i] = round((info->tsBuffer.timestamps[i] - timeTrialStart)); // add event time list field to trial struct fieldNum = mxAddField(mxTrial, fieldName); mxSetFieldByNumber(mxTrial, 0, fieldNum, mxTimestamps); // get the next event in the trie trieNode = trie_get_next(trieNode); iEvent++; } // free the event Trie resources trie_flush(eventTrie, FREE); // add signal names to the meta array fieldNum = mxGetFieldNumber(mxGroupMeta, "signalNames"); if(fieldNum == -1) fieldNum = mxAddField(mxGroupMeta, "signalNames"); mxSetFieldByNumber(mxGroupMeta, groupMetaIndex, fieldNum, mxSignalNames); }
bool address_dictionary_add_expansion(char *name, char *language, address_expansion_t expansion) { if (address_dict == NULL || address_dict->values == NULL) { log_error(ADDRESS_DICTIONARY_SETUP_ERROR); return false; } if (name == NULL) return false; char *key; bool is_prefix = false; bool is_suffix = false; bool is_phrase = false; for (size_t i = 0; i < expansion.num_dictionaries; i++) { dictionary_type_t dict = expansion.dictionary_ids[i]; if (dict == DICTIONARY_CONCATENATED_SUFFIX_SEPARABLE || dict == DICTIONARY_CONCATENATED_SUFFIX_INSEPARABLE) { is_suffix = true; } else if (dict == DICTIONARY_CONCATENATED_PREFIX_SEPARABLE || dict == DICTIONARY_ELISION) { is_prefix = true; } else { is_phrase = true; } } char_array *array = char_array_new_size(strlen(name)); if (array == NULL) { return false; } if (language != NULL) { char_array_cat(array, language); char_array_cat(array, NAMESPACE_SEPARATOR_CHAR); } if (!is_suffix && !is_prefix) { char_array_cat(array, name); } else if (is_prefix) { char_array_cat(array, TRIE_PREFIX_CHAR); char_array_cat(array, name); } else if (is_suffix) { char_array_cat(array, TRIE_SUFFIX_CHAR); char_array_cat_reversed(array, name); } key = char_array_to_string(array); log_debug("key=%s\n", key); uint32_t expansion_index; address_expansion_value_t *value; if (trie_get_data(address_dict->trie, key, &expansion_index)) { value = address_dict->values->a[expansion_index]; value->components |= expansion.address_components; address_expansion_array_push(value->expansions, expansion); } else { value = address_expansion_value_new_with_expansion(expansion); expansion_index = (uint32_t)address_dict->values->n; address_expansion_value_array_push(address_dict->values, value); if (!trie_add(address_dict->trie, key, expansion_index)) { log_warn("Key %s could not be added to trie\n", key); goto exit_key_created;; } } free(key); return true; exit_key_created: free(key); return false; }
int main(int argc, char **argv) { char *filename; if (argc == 2) { filename = argv[1]; } else { filename = DEFAULT_NUMEX_PATH; } FILE *f = fopen(filename, "wb"); if (f == NULL) { log_error("File could not be opened, ensure directory exists: %s\n", filename); numex_module_teardown(); exit(1); } if (!numex_module_init()) { log_error("Numex table initialization unsuccessful\n"); numex_module_teardown(); exit(1); } numex_table_t *numex_table = get_numex_table(); size_t num_languages = sizeof(numex_languages) / sizeof(numex_language_source_t); size_t num_source_keys = sizeof(numex_keys) / sizeof(char *); size_t num_source_rules = sizeof(numex_rules) / sizeof(numex_rule_t); if (num_source_keys != num_source_rules) { log_error("num_sourcE_keys != num_source_rules, aborting\n"); numex_module_teardown(); exit(1); } size_t num_ordinal_indicator_rules = sizeof(ordinal_indicator_rules) / sizeof(ordinal_indicator_t); char_array *key = char_array_new(); for (int i = 0; i < num_languages; i++) { numex_language_source_t lang_source = numex_languages[i]; char *lang = lang_source.name; int j; size_t rule_index = lang_source.rule_index; size_t num_rules = lang_source.num_rules; size_t ordinal_indicator_index = lang_source.ordinal_indicator_index; size_t num_ordinal_indicators = lang_source.num_ordinal_indicators; numex_rule_t rule; uint32_t value; log_info("Doing language=%s\n", lang); for (j = rule_index; j < rule_index + num_rules; j++) { char *numex_key = numex_keys[j]; numex_rule_t rule = numex_rules[j]; value = rule.rule_type != NUMEX_STOPWORD ? numex_table->rules->n : NUMEX_STOPWORD_INDEX; numex_rule_array_push(numex_table->rules, rule); char_array_clear(key); char_array_cat(key, lang); char_array_cat(key, NAMESPACE_SEPARATOR_CHAR); char_array_cat(key, numex_key); char *str_key = char_array_get_string(key); trie_add(numex_table->trie, str_key, value); } for (j = ordinal_indicator_index; j < ordinal_indicator_index + num_ordinal_indicators; j++) { value = numex_table->ordinal_indicators->n; ordinal_indicator_t ordinal_source = ordinal_indicator_rules[j]; if (ordinal_source.key == NULL) { log_error("ordinal source key was NULL at index %d\n", j); exit(EXIT_FAILURE); } char *ordinal_indicator_key = strdup(ordinal_source.key); if (ordinal_indicator_key == NULL) { log_error("Error in strdup\n"); exit(EXIT_FAILURE); } char *suffix = NULL; if (ordinal_source.suffix != NULL) { suffix = strdup(ordinal_source.suffix); if (suffix == NULL) { log_error("Error in strdup\n"); exit(EXIT_FAILURE); } } ordinal_indicator_t *ordinal = ordinal_indicator_new(ordinal_indicator_key, ordinal_source.gender, ordinal_source.category, suffix); ordinal_indicator_array_push(numex_table->ordinal_indicators, ordinal); char_array_clear(key); char_array_cat(key, lang); char_array_cat(key, ORDINAL_NAMESPACE_PREFIX); switch (ordinal_source.gender) { case GENDER_MASCULINE: char_array_cat(key, GENDER_MASCULINE_PREFIX); break; case GENDER_FEMININE: char_array_cat(key, GENDER_FEMININE_PREFIX); break; case GENDER_NEUTER: char_array_cat(key, GENDER_NEUTER_PREFIX); break; case GENDER_NONE: default: char_array_cat(key, GENDER_NONE_PREFIX); } switch (ordinal_source.category) { case CATEGORY_PLURAL: char_array_cat(key, CATEGORY_PLURAL_PREFIX); break; case CATEGORY_DEFAULT: default: char_array_cat(key, CATEGORY_DEFAULT_PREFIX); } char_array_cat(key, NAMESPACE_SEPARATOR_CHAR); char *reversed = utf8_reversed_string(ordinal_source.key); char_array_cat(key, reversed); free(reversed); char *str_key = char_array_get_string(key); if (trie_get(numex_table->trie, str_key) == NULL_NODE_ID) { trie_add(numex_table->trie, str_key, value); } else { log_warn("Key exists: %s, skipping\n", str_key); } } char *name = strdup(lang_source.name); if (name == NULL) { log_error("Error in strdup\n"); exit(EXIT_FAILURE); } numex_language_t *language = numex_language_new(name, lang_source.whole_tokens_only, lang_source.rule_index, lang_source.num_rules, lang_source.ordinal_indicator_index, lang_source.num_ordinal_indicators); numex_table_add_language(language); } char_array_destroy(key); if (!numex_table_write(f)) { log_error("Error writing numex table\n"); exit(1); } fclose(f); numex_module_teardown(); log_info("Done\n"); }
int dictionary_insert(struct dictionary *dict, const wchar_t *word) { return trie_add(dict->tree, word); }