C++ (Cpp) trie_add 예제들

예제 #1

0

파일 보기

파일: TrieApplication.cpp 프로젝트: anshu89/C-Cpp-Code

int main()
{
	trie* a;

	char* str=(char*)malloc(10);
	char* key=(char*)malloc(10);
        int ch,res;
        a=init();
       	do
	{
		printf("\n1.Insert into trie\n2.Search for a word in trie\n3.Exit\nEnter choice:\n");
		scanf("%d",&ch);
		switch(ch)
		{
			case 1:
				printf("Enter the word to be inserted\n");
				scanf("%s",str);
				trie_add(a,str);
				break;
			case 2:
				printf("Enter the word to be searched\n");
				scanf("%s",key);
				res=trie_lookup(a,key);
				if(res!=0)
					printf("Word present!\n");
				else
					printf("Word not in trie!\n");
				break;
			case 3:
				printf("Exitting.....\n");
				break;
		}
	}while(ch!=3);
	return 0;
}

예제 #2

0

파일 보기

파일: trie.c 프로젝트: qhsong/EmailSearch

void trie(FILE *pool,FILE *check,FILE *result) {
    TRIE *head = trie_create();
    char line[BUFFERSIZE];
    int exitflag=0;
    int i;
    while(fgets(line,BUFFERSIZE,pool)) {
        /*delete the useless character '\r'*/
        exitflag = trimString(line);
        if(!exitflag) {
            reverseString(line);
            trie_add(&head,line);
        } else {
            /*printf("Error email %s",line);*/
            continue;
        }
    }
    while(fgets(line,BUFFERSIZE,check)) {

        i = 0;
        while(line[i]!='\r' && line[i]!='\n') i++;
        line[i] = '\0';
        exitflag = trimString(line);
        if(!exitflag) {
            reverseString(line);
            if(trie_check(&head,line)) {
                fprintf(result,"yes\n");
            } else {
                fprintf(result,"no\n");
            }
        }
    }
    trie_destroy(&head);
}

예제 #3

0

파일 보기

파일: atms.c 프로젝트: abfeldman/lydia

/* Interface. */
int atms_add_node(atms tms, const signed char assumption,
                  const signed char contradiction)
{
    atms_node node = (atms_node)malloc(sizeof(struct str_atms_node));

    if (NULL == node) {
        return -1;
    }
    memset(node, 0, sizeof(struct str_atms_node));

    node->index = tms->nodes->sz;
    node->assumption = assumption;
    node->contradiction = contradiction;

    node->consequences = array_new(NULL, NULL);

    node->label = trie_new((trie_node_destroy_func_t)array_free,
                           (trie_node_clone_func_t)array_copy);

    array_append(tms->nodes, node);

    if (assumption) {
/* The label of this node is a singleton environment with this node only. */
        array env = array_append(array_new(NULL, NULL), node);
        array key = get_environment_key(env);
        trie_add(node->label, key, env);
        array_free(key);
    }

    return node->index - 1;
}

예제 #4

0

파일 보기

파일: ligature.c 프로젝트: CIIR/rexa1-pstotext

void add_deligatured_word( unsigned char *word, Trie *deligatured_words ) {

  int *ligature_positions;
  int *ligature_ids;
  int ligature_count = word_ligatures( word, &ligature_positions, &ligature_ids );
  // printf( "%s (%d)\n", word, ligature_count ); 
  if ( ligature_count > 0 ) {
    unsigned char *deligatured_word;
    deligatured_word = mark_ligatures( word,
                                       ligature_positions, 
                                       ligature_ids, 
                                       ligature_count );
/*     printf( "%s ==> %s\n", word, deligatured_word ); */

    // warn user when 2 "real" words map to the same deligatured word
    Trie *node = trie_contains( deligatured_words, deligatured_word );
    unsigned char *buf = 0;
    if ( node ) {
      unsigned char* existing_word = ( (LigatureData*) node->data )->word;
      // TODO: after a 2nd "real" source word is added to
      // node->data->word, reoccurring source words will be repeated
      // in our concatenated list; not a big deal, since this entry
      // will have to be edited by the user anyway, but we could do
      // better...
      if ( strcmp( word, existing_word ) ) { // if real words are different...
        // we will remove the existing node and replace it with one
        // whose "real" word is a concatenation of both words; the
        // user will be required to edit the result in the generated
        // file
        fprintf( stderr, 
                 "warning: deligatured word '%s' has multiple source words: %s, %s; EDIT FILE!\n",
                 deligatured_word,
                 word,
                 existing_word );
        buf = (unsigned char*) malloc( 2 + strlen( existing_word ) + 1 +
                                       strlen( word ) + 1 );
        buf[0] = 0;
        strcat( buf, "{" );
        strcat( buf, existing_word );
        strcat( buf, "|" );
        strcat( buf, word );
        strcat( buf, "}" );
        word = buf;
        trie_remove( deligatured_words, deligatured_word, _free_ligature_data_callback2 );
      }
    }
    LigatureData *lig_data = (LigatureData*) malloc( sizeof( LigatureData ) );
    lig_data->word = (unsigned char*) strdup( word );
    lig_data->ligature_positions = ligature_positions;
    lig_data->ligature_ids = ligature_ids;
    lig_data->ligature_count = ligature_count;
    trie_add( deligatured_words, deligatured_word, lig_data );
    free( deligatured_word );
    if ( buf ) {
      free( buf );
    }
  }
}

예제 #5

0

파일 보기

파일: trie.c 프로젝트: shawnfernandes96/School

void trie_addfile(trie* t, char* filename) {
    FILE *f = fopen(filename, "r");
    while (!feof(f)) {
        char* line = (char*)malloc(32 * sizeof(char));
        fgets(line, 32, f);
        int i = 0;
        for (i = 0; line[i] != '\n' && line[i] != '\0'; i++);
        if (i > 0 && i < 32)
            trie_add(t, line, i);
        free(line);
    }
    fclose(f);
}

예제 #6

0

파일 보기

파일: clisttrie2.c 프로젝트: qhsong/EmailSearch

void trie(FILE *pool,FILE *check,FILE *result) {
	clock_t start,end,start1;
	start = clock();
	TRIE *head = trie_create();
	char line[BUFFERSIZE];
	int count=0;
	int i=0;
	int exitflag=0;
	while(fgets(line,BUFFERSIZE,pool)) {
		/*delete the useless character '\r'*/
		exitflag = trimString(line);
		if(!exitflag){
			reverseString(line);
			trie_add(&head,line);	
		//	if(!(++count%100000)){ 
		//		end = clock();
		//		printf("%d,%f \n",count++,(double)(end -start)/CLOCKS_PER_SEC);
		//	} 
		}else{
			/*printf("Error email %s",line);*/
			continue;
		}
	}
	//end = clock();
	//printf("Creating tree using %f\n",(double)(end -start)/CLOCKS_PER_SEC);
	start1 = clock();
	while(fgets(line,BUFFERSIZE,check)) {

		i = 0;
		while(line[i]!='\r' && line[i]!='\n') i++;
		line[i] = '\0';
		exitflag = trimString(line);
		if(!exitflag){
			reverseString(line);
			if(trie_check(&head,line)) {
				fprintf(result,"yes\n");
			}else {
				fprintf(result,"no\n");
			}
		}
	}
	trie_destroy(&head);
	end = clock();
	printf("%f\n",(double)(end -start)/CLOCKS_PER_SEC);

}

예제 #7

0

파일 보기

파일: ligature.c 프로젝트: CIIR/rexa1-pstotext

/* Initialize our global 'deligatured_words' trie by loading the
   pre-generated ligatures from a file; see generate_ligatures(). */
int load_ligatures( char *ligature_file ) {
  FILE *ligatures_fp = fopen( ligature_file, "r" );
  if ( !ligatures_fp ) {
    return 0;
  }
  deligatured_words = trie_new( 0, (void*) strdup( "" ) );
  unsigned char buf[MAX_WORD_LEN * 2 + 2];
  while ( fgets( buf, MAX_WORD_LEN * 2 + 2, ligatures_fp ) ) {
    unsigned char *deligatured_word = buf;
    char *delimiter = index( buf, '\t' );
    delimiter[0] = 0;
    unsigned char *real_word = (unsigned char*) strdup( delimiter + 1 );
    real_word[strlen( real_word ) - 1] = 0; // chomp newline
    trie_add( deligatured_words, deligatured_word, real_word );
  }
  fclose( ligatures_fp );
  return 1;
}

예제 #8

0

파일 보기

파일: atms.c 프로젝트: abfeldman/lydia

int atms_add_justification(atms tms, const_material_implication cl)
{
    register unsigned int ix;

    atms_justification justification;

    trie I;

    array key;

    if (NULL == (justification = atms_justification_new())) {
        return 0;
    }

    for (ix = 0; ix < cl->antecedents->sz; ix++) {
        atms_node node = tms->nodes->arr[cl->antecedents->arr[ix] + 1];
        array_append(justification->antecedents, node);

        array_append(node->consequences, justification);
    }
    if (cl->consequent == -1) {
        justification->consequent = tms->nodes->arr[0];
    } else {
        justification->consequent = tms->nodes->arr[cl->consequent + 1];
    }

    array_append(tms->justifications, justification);

    I = trie_new((trie_node_destroy_func_t)array_free,
                 (trie_node_clone_func_t)array_copy);
/* Append the empty environment. */
    key = array_new(NULL, NULL);
    trie_add(I, key, array_new(NULL, NULL));
    array_free(key);
    propagate(tms, justification, NULL, I);
    trie_free(I);

    return 1;
}

예제 #9

0

파일 보기

파일: 1002.c 프로젝트: fengyu225/oj

int main(){
    int c;
    struct node root = trie_nodes[size++];
    root.ch = ' ';
    root.is_leaf = false;
    scanf("%d", &c);
    for(int i=0; i<c; i++){
        char temp[20000];
        char result[9];
        scanf("%s", temp);
        normalize(temp,result);
        trie_add(&root,result);
    }
    char t[9];
    for(int i=0; i<10; i++)
        if(root.children[i])
            print_trie_leaf(root.children[i],t,0); 
    if(!has_dup)
        printf("No duplicates.\n");

    return 0;
}

예제 #10

0

파일 보기

파일: cache.c 프로젝트: nshi/falcon

gboolean falcon_cache_add(falcon_cache_t *cache, falcon_object_t *object)
{
	trie_node_t *old_node = NULL;
	falcon_object_t *old = NULL;
	falcon_object_t *dup = falcon_object_copy(object);

	g_return_val_if_fail(cache, FALSE);
	g_return_val_if_fail(object, FALSE);

	g_mutex_lock(cache->lock);
	old_node = trie_find(cache->objects, falcon_object_get_name(dup));

	if (old_node && (old = trie_data(old_node))) {
		falcon_object_free(old);
		trie_set_data(old_node, dup);
	} else {
		trie_add(cache->objects, falcon_object_get_name(dup), dup);
		cache->count++;
	}

	g_mutex_unlock(cache->lock);

	return TRUE;
}

예제 #11

0

파일 보기

파일: transliteration_table_builder.c 프로젝트: BERENZ/libpostal

int main(int argc, char **argv) {
    char *filename;

    if (argc == 2) {
        filename = argv[1];
    } else {
        filename = DEFAULT_TRANSLITERATION_PATH;
    }

    FILE *f = fopen(filename, "wb");

    if (f == NULL) {
        log_error("File could not be opened, ensure directory exists: %s", filename);
        exit(1);
    }

    size_t num_source_transliterators = sizeof(transliterators_source) / sizeof(transliterator_source_t);

    char *key;
    size_t key_len;

    context_type_t pre_context_type;
    size_t pre_context_max_len;
    char *pre_context;
    size_t pre_context_len;

    context_type_t post_context_type;
    size_t post_context_max_len;
    char *post_context;
    size_t post_context_len;

    char *replacement;
    size_t replacement_len;

    char *revisit;
    size_t revisit_len;

    char *group_regex_str;
    size_t group_regex_len;

    transliteration_module_init();

    transliteration_table_t *trans_table = get_transliteration_table();

    trie_t *trie = trans_table->trie;

    for (int i = 0; i < num_source_transliterators; i++) {
        transliterator_source_t trans_source = transliterators_source[i];

        size_t trans_name_len = strlen(trans_source.name);

        log_info("Doing transliterator: %s\n", trans_source.name);

        char_array *trans_key = char_array_from_string(trans_source.name);
        char_array_cat(trans_key, NAMESPACE_SEPARATOR_CHAR);

        char *trans_name = strdup(trans_source.name);
        if (trans_name == NULL) {
            log_error("strdup returned NULL on trans_source.name\n");
            goto exit_teardown;
        }

        transliterator_t *trans = transliterator_new(trans_name, trans_source.internal, trans_table->steps->n, trans_source.steps_length);

        for (int j = 0; j < trans_source.steps_length; j++) {
            transliteration_step_source_t step_source = steps_source[trans_source.steps_start + j];

            size_t step_name_len = strlen(step_source.name);

            log_debug("Doing step: %s, type=%d\n", step_source.name, step_source.type);

            if (!transliteration_table_add_step(trans_table, step_source.type, step_source.name)) {
                log_error("Step couldn't be added\n");
                goto exit_teardown;
            }

            if (step_source.type != STEP_RULESET) {
                continue;
            }

            char_array *step_key = char_array_from_string(char_array_get_string(trans_key));
            char_array_cat(step_key, step_source.name);
            char_array_cat(step_key, NAMESPACE_SEPARATOR_CHAR);

            char *step_key_str = char_array_get_string(step_key);
            size_t step_key_len = strlen(step_key_str);

            for (int k = 0; k < step_source.rules_length; k++) {
                transliteration_rule_source_t rule_source = rules_source[step_source.rules_start + k];
                key = rule_source.key;
                key_len = rule_source.key_len;

                pre_context_type = rule_source.pre_context_type;
                pre_context_max_len = rule_source.pre_context_max_len;
                pre_context = rule_source.pre_context;
                pre_context_len = rule_source.pre_context_len;

                post_context_type = rule_source.post_context_type;
                post_context_max_len = rule_source.post_context_max_len;
                post_context = rule_source.post_context;
                post_context_len = rule_source.post_context_len;

                replacement = rule_source.replacement;
                replacement_len = rule_source.replacement_len;

                revisit = rule_source.revisit;
                revisit_len = rule_source.revisit_len;

                group_regex_str = rule_source.group_regex_str;
                group_regex_len = rule_source.group_regex_len;

                uint32_t data = trans_table->replacements->n;
                
                char_array *rule_key = char_array_from_string(step_key_str);

                uint32_t replacement_string_index = cstring_array_num_strings(trans_table->replacement_strings);
                cstring_array_add_string_len(trans_table->replacement_strings, replacement, replacement_len);

                uint32_t revisit_index = 0;
                if (revisit != NULL && revisit_len > 0) {
                    revisit_index = cstring_array_num_strings(trans_table->revisit_strings);
                    cstring_array_add_string_len(trans_table->revisit_strings, revisit, revisit_len);
                }

                group_capture_array *groups = parse_groups(group_regex_str, group_regex_len);

                transliteration_replacement_t *trans_repl = transliteration_replacement_new(replacement_string_index, revisit_index, groups);

                uint32_t replacement_index = trans_table->replacements->n;
                transliteration_replacement_array_push(trans_table->replacements, trans_repl);

                int c;

                char *token;

                log_debug("Doing rule: %s\n", key);

                string_tree_t *tree = regex_string_tree(key, key_len);

                string_tree_t *pre_context_tree = NULL;
                string_tree_iterator_t *pre_context_iter = NULL;

                cstring_array *pre_context_strings = NULL;

                if (pre_context_type != CONTEXT_TYPE_NONE) {
                    pre_context_strings = cstring_array_new();
                }

                if (pre_context_type == CONTEXT_TYPE_REGEX) {
                    log_debug("pre_context_type == CONTEXT_TYPE_REGEX\n");
                    pre_context_tree = regex_string_tree(pre_context, pre_context_len);

                    pre_context_iter = string_tree_iterator_new(pre_context_tree);

                    char_array *pre_context_perm = char_array_new_size(pre_context_len);

                    for (; !string_tree_iterator_done(pre_context_iter); string_tree_iterator_next(pre_context_iter)) {
                        char_array_clear(pre_context_perm);
                        for (c = 0; c < pre_context_iter->num_tokens; c++) {
                            token = string_tree_iterator_get_string(pre_context_iter, c);
                            if (token == NULL || strlen(token) == 0) {
                                log_warn("pre_token_context is NULL or 0 length: %s\n", token);
                            }
                            char_array_cat(pre_context_perm, token);
                        }
                        token = char_array_get_string(pre_context_perm);
                        if (token == NULL || strlen(token) == 0) {
                            log_warn("pre_perm is NULL or 0 length\n");
                        }
                        cstring_array_add_string(pre_context_strings, token);
                    }

                    char_array_destroy(pre_context_perm);
                    string_tree_iterator_destroy(pre_context_iter);
                    string_tree_destroy(pre_context_tree);
                } else if (pre_context_type == CONTEXT_TYPE_STRING) {
                    if (pre_context == NULL || strlen(pre_context) == 0) {
                        log_warn("pre_context STRING NULL or 0 length\n");
                    }
                    cstring_array_add_string(pre_context_strings, pre_context);
                } else if (pre_context_type == CONTEXT_TYPE_WORD_BOUNDARY) {
                    cstring_array_add_string(pre_context_strings, WORD_BOUNDARY_CHAR);
                }

                size_t num_pre_context_strings = 0;
                if (pre_context_type != CONTEXT_TYPE_NONE) {
                    num_pre_context_strings = cstring_array_num_strings(pre_context_strings);
                    log_debug("num_pre_context_strings = %zu\n", num_pre_context_strings);
                }

                string_tree_t *post_context_tree = NULL;
                string_tree_iterator_t *post_context_iter = NULL;

                cstring_array *post_context_strings = NULL;

                if (post_context_type != CONTEXT_TYPE_NONE) {
                    post_context_strings = cstring_array_new();
                }

                if (post_context_type == CONTEXT_TYPE_REGEX) {
                    log_debug("post_context_type == CONTEXT_TYPE_REGEX\n");
                    post_context_tree = regex_string_tree(post_context, post_context_len);

                    post_context_iter = string_tree_iterator_new(post_context_tree);

                    char_array *post_context_perm = char_array_new_size(post_context_len);

                    for (; !string_tree_iterator_done(post_context_iter); string_tree_iterator_next(post_context_iter)) {
                        char_array_clear(post_context_perm);
                        for (c = 0; c < post_context_iter->num_tokens; c++) {
                            token = string_tree_iterator_get_string(post_context_iter, c);
                            if (token == NULL) {
                                log_error ("post_token_context is NULL\n");
                            } else if (strlen(token) == 0) {
                                log_error("post_token_context is 0 length\n");
                            }
                            char_array_cat(post_context_perm, token);
                        }

                        cstring_array_add_string(post_context_strings, char_array_get_string(post_context_perm));
                    }

                    char_array_destroy(post_context_perm);
                    string_tree_iterator_destroy(post_context_iter);
                    string_tree_destroy(post_context_tree);
                } else if (post_context_type == CONTEXT_TYPE_STRING) {
                    if (post_context == NULL || strlen(post_context) == 0) {
                        log_error("post_context STRING NULL or 0 length\n");
                    }
                    cstring_array_add_string(post_context_strings, post_context);
                } else if (post_context_type == CONTEXT_TYPE_WORD_BOUNDARY) {
                    cstring_array_add_string(post_context_strings, WORD_BOUNDARY_CHAR);
                }

                size_t num_post_context_strings = 0;
                if (post_context_type != CONTEXT_TYPE_NONE) {
                    num_post_context_strings = cstring_array_num_strings(post_context_strings);
                    log_debug("num_post_context_strings = %zu\n", num_post_context_strings);
                }

                cstring_array *context_strings = NULL;
                size_t num_context_strings = 0;
                char *context_start_char = NULL;
                bool combined_context_strings = false;

                int ante, post;

                if (num_pre_context_strings > 0 && num_post_context_strings > 0) {
                    context_start_char = PRE_CONTEXT_CHAR;
                    combined_context_strings = true;
                    size_t max_string_size = 2 * MAX_UTF8_CHAR_SIZE + 
                                             ((pre_context_max_len * MAX_UTF8_CHAR_SIZE) * 
                                             (post_context_max_len * MAX_UTF8_CHAR_SIZE));
                    num_context_strings = num_pre_context_strings * num_post_context_strings;
                    char_array *context = char_array_new_size(max_string_size);
                    context_strings = cstring_array_new_size(num_context_strings * max_string_size + num_context_strings);
                    for (ante = 0; ante < num_pre_context_strings; ante++) {
                        char_array_clear(context);

                        token = cstring_array_get_string(pre_context_strings, ante);
                        if (token == NULL || strlen(token) == 0) {
                            log_error("pre_context token was NULL or 0 length\n");
                            goto exit_teardown;
                        }

                        char_array_cat(context, token);
                        size_t context_len = strlen(char_array_get_string(context));

                        for (post = 0; post < num_post_context_strings; post++) {
                            context->n = context_len;
                            char_array_cat(context, POST_CONTEXT_CHAR);
                            token = cstring_array_get_string(post_context_strings, post);
                            char_array_cat(context, token);
                            if (token == NULL || strlen(token) == 0) {
                                log_error("post_context token was NULL or 0 length\n");
                                goto exit_teardown;
                            }

                            token = char_array_get_string(context);
                            cstring_array_add_string(context_strings, token);

                        }

                    }

                    char_array_destroy(context);

                } else if (num_pre_context_strings > 0) {
                    context_start_char = PRE_CONTEXT_CHAR;
                    num_context_strings = num_pre_context_strings;
                    context_strings = pre_context_strings;
                } else if (num_post_context_strings > 0) {
                    context_start_char = POST_CONTEXT_CHAR;
                    num_context_strings = num_post_context_strings;
                    context_strings = post_context_strings;
                }

                if (num_context_strings > 0) {
                    log_debug("num_context_strings = %zu\n", num_context_strings);
                }


                if (tree == NULL) {
                    log_error("Tree was NULL, rule=%s\n", key);
                    goto exit_teardown;
                }

                string_tree_iterator_t *iter = string_tree_iterator_new(tree);

                log_debug("iter->remaining=%d\n", iter->remaining);
                
                char *key_str;

                for (; !string_tree_iterator_done(iter); string_tree_iterator_next(iter)) {
                    rule_key->n = step_key_len;

                    for (c = 0; c < iter->num_tokens; c++) {
                        token = string_tree_iterator_get_string(iter, c);
                        if (token == NULL) {
                            log_error("string_tree_iterator_get_string was NULL: %s\n", key);
                            goto exit_teardown;
                        }
                        char_array_cat(rule_key, token);
                        log_debug("string_tree token was %s\n", token);
                    }

                    log_debug("rule_key=%s\n", char_array_get_string(rule_key));

                    size_t context_key_len;

                    if (num_context_strings == 0) {

                        token = char_array_get_string(rule_key);
                        if (trie_get(trie, token) == NULL_NODE_ID) {
                            trie_add(trie, token, replacement_index);
                        } else {
                            log_warn("Key exists: %s, skipping\n", token);                            
                        }
                    } else {
                        char_array_cat(rule_key, context_start_char);
                        context_key_len = strlen(char_array_get_string(rule_key));

                        for (c = 0; c < num_context_strings; c++) {
                            rule_key->n = context_key_len;
                            token = cstring_array_get_string(context_strings, c);
                            if (token == NULL) {
                                log_error("token was NULL for c=%d\n", c);
                            }
                            char_array_cat(rule_key, token);
                            token = char_array_get_string(rule_key);
                            if (trie_get(trie, token) == NULL_NODE_ID) {
                                trie_add(trie, token, replacement_index);
                            } else {
                                log_warn("Key exists: %s, skipping\n", token);
                            }
                        }

                    }

                }

                string_tree_iterator_destroy(iter);
                string_tree_destroy(tree);

                char_array_destroy(rule_key);

                if (pre_context_strings != NULL) {
                    cstring_array_destroy(pre_context_strings);
                }

                if (post_context_strings != NULL) {
                    cstring_array_destroy(post_context_strings);
                }

                // Only needed if we created a combined context array
                if (combined_context_strings) {
                    cstring_array_destroy(context_strings);
                }
            }

            char_array_destroy(step_key);

        }

        char_array_destroy(trans_key);

        if (!transliteration_table_add_transliterator(trans)) {
            goto exit_teardown;
        }

    }

    size_t num_source_scripts = sizeof(script_transliteration_rules) / sizeof(script_transliteration_rule_t);

    for (int i = 0; i < num_source_scripts; i++) {
        script_transliteration_rule_t rule = script_transliteration_rules[i];

        if (!transliteration_table_add_script_language(rule.script_language, rule.index)) {
            goto exit_teardown;
        }

        transliterator_index_t index = rule.index;

        for (int j = index.transliterator_index; j < index.transliterator_index + index.num_transliterators; j++) {
            char *trans_name = script_transliterators[j];
            if (trans_name == NULL) {
                goto exit_teardown;
            }
            cstring_array_add_string(trans_table->transliterator_names, trans_name);
        }

    }

    transliteration_table_write(f);
    fclose(f);
    transliteration_module_teardown();
    log_info("Done!\n");
    exit(EXIT_SUCCESS);

exit_teardown:
    log_error("FAIL\n");
    transliteration_module_teardown();
    exit(EXIT_FAILURE);

}

예제 #12

0

파일 보기

파일: pi.c 프로젝트: abfeldman/lydia

tv_nf pi_tison_trie(const_tv_nf cf)
{
    unsigned int i, l, q, x, y, z;

    tv_nf result = truncate_copy_nf(cf);
    tv_literal_set_list input = get_literal_sets(cf);
    tv_literal_set_list output = get_literal_sets(result);
    tv_literal_set model;

    stack s, sx, sy;

    trie_node c, cx, cy;

    trie db = trie_new((trie_node_destroy_func_t)array_free,
                       (trie_node_clone_func_t)array_copy);
    for (i = 0; i < input->sz; i++) {
        array l = literal_set_to_sorted_int_list(input->arr[i]);
        if (trie_is_subsumed(db, l)) {
            array_free(l);
            continue;
        }
        trie_remove_subsumed(db, l);
        trie_add(db, l, array_copy(l));
        array_free(l);
    }
    trie_gc(db);
/* We have the literal_sets in the trie now. */

    if (db->root->edges == NULL) {
        trie_free(db);
        rfre_tv_nf(result);
        return rdup_tv_nf(cf);
    }

    for (l = 0; l < cf->variables->sz; l++) {
        sx = stack_new(NULL, NULL);
/* Outer walk. */
        stack_push(sx, db->root);
        while (NULL != (cx = stack_pop(sx))) {
            for (x = 0; x < cx->edges->sz; x++) {
                if (((trie_node)cx->kids->arr[x])->is_deleted) {
                    continue;
                }
                if (((trie_node)cx->kids->arr[x])->is_terminal) {
/* Inner walk. */
                    sy = stack_new(NULL, NULL);
                    for (q = 0; q < sx->sz; q++) {
                        stack_push(sy, sx->arr[q]);
                    }
                    stack_push(sy, cx);

/* Finish the c level. */
                    z = x + 1;
                    while (NULL != (cy = stack_pop(sy))) {
                        for (y = z; y < cy->edges->sz; y++) {
                            if (((trie_node)cy->kids->arr[y])->is_deleted) {
                                continue;
                            }
                            if (((trie_node)cy->kids->arr[y])->is_terminal) {
                                array r = resolve_int_list_literal(((trie_node)cx->kids->arr[x])->value, ((trie_node)cy->kids->arr[y])->value, l * 2);
                                if (NULL == r) {
                                    continue;
                                }
                                if (trie_is_subsumed(db, r)) {
                                    array_free(r);
                                    continue;
                                }
                                trie_remove_subsumed(db, r);
                                trie_add(db, r, array_copy(r));
                                array_free(r);
                            } 
                            if (((trie_node)cy->kids->arr[y])->edges != NULL) {
                                stack_push(sy, cy->kids->arr[y]);
                            }
                        }
                        z = 0;
                    }
                    stack_free(sy);
/* End of the inner walk. */
                }
                if (((trie_node)cx->kids->arr[x])->edges != NULL) {
                    stack_push(sx, cx->kids->arr[x]);
                }
            }
        }
/* End of the outer walk. */
        stack_free(sx);
        trie_gc(db);
    }

    assert(input->sz > 0);
    model = rdup_tv_literal_set(input->arr[0]);
    rfre_int_list(model->pos);
    rfre_int_list(model->neg);
    model->pos = int_listNIL;
    model->neg = int_listNIL;

/* Convert the trie back to a clausal form. */
    s = stack_new(NULL, NULL);
    stack_push(s, db->root);
    while (NULL != (c = stack_pop(s))) {
        for (i = 0; i < c->edges->sz; i++) {
            if (((trie_node)c->kids->arr[i])->is_terminal) {
                append_tv_literal_set_list(output, int_list_to_literal_set(((trie_node)c->kids->arr[i])->value, model));
            }
            if (((trie_node)c->kids->arr[i])->edges != NULL) {
                stack_push(s, c->kids->arr[i]);
            }
        }
    }
    stack_free(s);
    trie_free(db);

    fre_tv_literal_set(model);

    set_literal_sets(result, output);

    return result;
}

예제 #13

0

파일 보기

파일: writer.c 프로젝트: djoshea/matudp

void addEventGroupFields(mxArray* mxTrial, mxArray* mxGroupMeta,
    const GroupInfo* pg, unsigned trialIdx, timestamp_t timeTrialStart,
    bool useGroupPrefix, unsigned groupMetaIndex)
{
    // field names will be groupName_<eventName>
    // but the signal always comes in as .eventName and the contents are the name
    // of the event
    //
    // so: build up a trie where the eventName is the key and a TimestampBuffer is the value
    Trie* eventTrie = trie_create();
    Trie* trieNode;

    // get timestamp buffer from group buffer
    const TimestampBuffer* groupTimestamps = pg->tsBuffers + trialIdx;
    const char* groupName = pg->name;

    // for now check that the event group has only 1 signal and it's type is EventName
    bool printError = false;
    if(pg->nSignals != 1)
    	printError = true;
    else if(pg->signals[0]->type != SIGNAL_TYPE_EVENTNAME)
    	printError = true;
    if(printError) {
    	logError("Event groups must have 1 signal of type event name");
    	return;
    }

    const SignalDataBuffer*psdb = pg->signals[0];
    const SampleBuffer* ptb = psdb->buffers + trialIdx;
    char eventName[MAX_SIGNAL_NAME];

    char* dataPtr = (char*)ptb->data;
    for(unsigned iSample = 0; iSample < ptb->nSamples; iSample++) {
        // first copy string into buffer, then zero terminate it
        unsigned bytesThisSample = ptb->bytesEachSample[iSample];

        // TODO add overflow detection
        memcpy(eventName, dataPtr, bytesThisSample);
        dataPtr += bytesThisSample;
        eventName[bytesThisSample] = '\0';

        //logError("Event %s\n", eventName);

        // search for this eventName in the trie
        EventTrieInfo* info = (EventTrieInfo*)trie_lookup(eventTrie, eventName);
        if(info == NULL) {
            // doesn't exist, give it a TimestampBuffer
            info = (EventTrieInfo*)CALLOC(sizeof(EventTrieInfo), 1);
            strncpy(info->eventName, eventName, MAX_SIGNAL_NAME);
            trie_add(eventTrie, eventName, info);
        }

        // push this timestamp to the buffer
        bool success = pushTimestampToTimestampBuffer(&info->tsBuffer, groupTimestamps->timestamps[iSample]);
        if(!success) {
            logError("Issue building event fields\n");
            return;
        }
    }

    // now iterate over the eventName trie and add each field
    unsigned nEventNames = trie_count(eventTrie);
    mxArray* mxSignalNames = mxCreateCellMatrix(nEventNames, 1);

    unsigned iEvent = 0;
    unsigned fieldNum = 0;
    trieNode = trie_get_first(eventTrie);
    char fieldName[MAX_SIGNAL_NAME];
    while(trieNode != NULL) {
        EventTrieInfo* info = (EventTrieInfo*)trieNode->value;

        // build the groupName_eventName field name
        if(useGroupPrefix)
            snprintf(fieldName, MAX_SIGNAL_NAME, "%s_%s", groupName, info->eventName);
        else
            strncpy(fieldName, info->eventName, MAX_SIGNAL_NAME);

        // store the name of the field in the cell array
        mxSetCell(mxSignalNames, iEvent, mxCreateString(fieldName));

        // copy timestamps from buffer to double vector
        mxArray* mxTimestamps = mxCreateNumericMatrix(info->tsBuffer.nSamples, 1, mxDOUBLE_CLASS, mxREAL);

        // subtract off trial start time and convert to ms, rounding at ms
        double_t* buffer = (double_t*)mxGetData(mxTimestamps);
        for(unsigned i = 0; i < info->tsBuffer.nSamples; i++)
            buffer[i] = round((info->tsBuffer.timestamps[i] - timeTrialStart));

        // add event time list field to trial struct
        fieldNum = mxAddField(mxTrial, fieldName);
        mxSetFieldByNumber(mxTrial, 0, fieldNum, mxTimestamps);

        // get the next event in the trie
        trieNode = trie_get_next(trieNode);
        iEvent++;
    }

    // free the event Trie resources
    trie_flush(eventTrie, FREE);

    // add signal names to the meta array
    fieldNum = mxGetFieldNumber(mxGroupMeta, "signalNames");
    if(fieldNum == -1)
        fieldNum = mxAddField(mxGroupMeta, "signalNames");
    mxSetFieldByNumber(mxGroupMeta, groupMetaIndex, fieldNum, mxSignalNames);

}

예제 #14

0

파일 보기

파일: address_dictionary.c 프로젝트: tomterragni/libpostal

bool address_dictionary_add_expansion(char *name, char *language, address_expansion_t expansion) {
    if (address_dict == NULL || address_dict->values == NULL) {
        log_error(ADDRESS_DICTIONARY_SETUP_ERROR);
        return false;
    }

    if (name == NULL) return false;

    char *key;

    bool is_prefix = false;
    bool is_suffix = false;
    bool is_phrase = false;

    for (size_t i = 0; i < expansion.num_dictionaries; i++) {
        dictionary_type_t dict = expansion.dictionary_ids[i];
        if (dict == DICTIONARY_CONCATENATED_SUFFIX_SEPARABLE || 
            dict == DICTIONARY_CONCATENATED_SUFFIX_INSEPARABLE) {
            is_suffix = true;
        } else if (dict == DICTIONARY_CONCATENATED_PREFIX_SEPARABLE ||
                   dict == DICTIONARY_ELISION) {
            is_prefix = true;
        } else {
            is_phrase = true;
        }
    }

    char_array *array = char_array_new_size(strlen(name));
    if (array == NULL) {
        return false;    
    }

    if (language != NULL) {
        char_array_cat(array, language);
        char_array_cat(array, NAMESPACE_SEPARATOR_CHAR);
    }

    if (!is_suffix && !is_prefix) {
        char_array_cat(array, name);
    } else if (is_prefix) {
        char_array_cat(array, TRIE_PREFIX_CHAR);
        char_array_cat(array, name);
    } else if (is_suffix) {
        char_array_cat(array, TRIE_SUFFIX_CHAR);
        char_array_cat_reversed(array, name);
    }

    key = char_array_to_string(array);

    log_debug("key=%s\n", key);

    uint32_t expansion_index;
    address_expansion_value_t *value;

    if (trie_get_data(address_dict->trie, key, &expansion_index)) {
        value = address_dict->values->a[expansion_index];
        value->components |= expansion.address_components;
        address_expansion_array_push(value->expansions, expansion);
    } else {
        value = address_expansion_value_new_with_expansion(expansion);
        expansion_index = (uint32_t)address_dict->values->n;
        address_expansion_value_array_push(address_dict->values, value);

        if (!trie_add(address_dict->trie, key, expansion_index)) {
            log_warn("Key %s could not be added to trie\n", key);
            goto exit_key_created;;
        }
    }

    free(key);

    return true;

exit_key_created:
    free(key);
    return false;
}

예제 #15

0

파일 보기

파일: numex_table_builder.c 프로젝트: SigTill/libpostal

int main(int argc, char **argv) {
    char *filename;

    if (argc == 2) {
        filename = argv[1];
    } else {
        filename = DEFAULT_NUMEX_PATH;
    }

    FILE *f = fopen(filename, "wb");

    if (f == NULL) {
        log_error("File could not be opened, ensure directory exists: %s\n", filename);
        numex_module_teardown();
        exit(1);
    }

    if (!numex_module_init()) {
        log_error("Numex table initialization unsuccessful\n");
        numex_module_teardown();
        exit(1);
    }

    numex_table_t *numex_table = get_numex_table();

    size_t num_languages = sizeof(numex_languages) / sizeof(numex_language_source_t);

    size_t num_source_keys = sizeof(numex_keys) / sizeof(char *);
    size_t num_source_rules = sizeof(numex_rules) / sizeof(numex_rule_t);

    if (num_source_keys != num_source_rules) {
        log_error("num_sourcE_keys != num_source_rules, aborting\n");
        numex_module_teardown();
        exit(1);
    }

    size_t num_ordinal_indicator_rules = sizeof(ordinal_indicator_rules) / sizeof(ordinal_indicator_t);

    char_array *key = char_array_new();

    for (int i = 0; i < num_languages; i++) {
        numex_language_source_t lang_source = numex_languages[i];

        char *lang = lang_source.name;

        int j;

        size_t rule_index = lang_source.rule_index;
        size_t num_rules = lang_source.num_rules;
        size_t ordinal_indicator_index = lang_source.ordinal_indicator_index;
        size_t num_ordinal_indicators = lang_source.num_ordinal_indicators;

        numex_rule_t rule;

        uint32_t value;

        log_info("Doing language=%s\n", lang);

        for (j = rule_index; j < rule_index + num_rules; j++) {
            char *numex_key = numex_keys[j];
            numex_rule_t rule = numex_rules[j];

            value = rule.rule_type != NUMEX_STOPWORD ? numex_table->rules->n : NUMEX_STOPWORD_INDEX;
            numex_rule_array_push(numex_table->rules, rule);

            char_array_clear(key);
            char_array_cat(key, lang);
            char_array_cat(key, NAMESPACE_SEPARATOR_CHAR);
            char_array_cat(key, numex_key);

            char *str_key = char_array_get_string(key);

            trie_add(numex_table->trie, str_key, value);
        }

        for (j = ordinal_indicator_index; j < ordinal_indicator_index + num_ordinal_indicators; j++) {
            value = numex_table->ordinal_indicators->n;
            ordinal_indicator_t ordinal_source = ordinal_indicator_rules[j];

            if (ordinal_source.key == NULL) {
                log_error("ordinal source key was NULL at index %d\n", j);
                exit(EXIT_FAILURE);
            }

            char *ordinal_indicator_key = strdup(ordinal_source.key);
            if (ordinal_indicator_key == NULL) {
                log_error("Error in strdup\n");
                exit(EXIT_FAILURE);
            }

            char *suffix = NULL;
            if (ordinal_source.suffix != NULL) {
                suffix = strdup(ordinal_source.suffix);
                if (suffix == NULL) {
                    log_error("Error in strdup\n");
                    exit(EXIT_FAILURE);
                }
            }
            ordinal_indicator_t *ordinal = ordinal_indicator_new(ordinal_indicator_key, ordinal_source.gender, ordinal_source.category, suffix);
            ordinal_indicator_array_push(numex_table->ordinal_indicators, ordinal);            

            char_array_clear(key);
            char_array_cat(key, lang);
            char_array_cat(key, ORDINAL_NAMESPACE_PREFIX);

            switch (ordinal_source.gender) {
                case GENDER_MASCULINE:
                    char_array_cat(key, GENDER_MASCULINE_PREFIX);
                    break;
                case GENDER_FEMININE:
                    char_array_cat(key, GENDER_FEMININE_PREFIX);
                    break;
                case GENDER_NEUTER:
                    char_array_cat(key, GENDER_NEUTER_PREFIX);
                    break;
                case GENDER_NONE:
                default:
                    char_array_cat(key, GENDER_NONE_PREFIX);
            }

            switch (ordinal_source.category) {
                case CATEGORY_PLURAL:
                    char_array_cat(key, CATEGORY_PLURAL_PREFIX);
                    break;
                case CATEGORY_DEFAULT:
                default:
                    char_array_cat(key, CATEGORY_DEFAULT_PREFIX);

            }

            char_array_cat(key, NAMESPACE_SEPARATOR_CHAR);

            char *reversed = utf8_reversed_string(ordinal_source.key);
            char_array_cat(key, reversed);
            free(reversed);

            char *str_key = char_array_get_string(key);

            if (trie_get(numex_table->trie, str_key) == NULL_NODE_ID) {
                trie_add(numex_table->trie, str_key, value);
            } else {
                log_warn("Key exists: %s, skipping\n", str_key);                            
            }
        }

        char *name = strdup(lang_source.name);
        if (name == NULL) {
            log_error("Error in strdup\n");
            exit(EXIT_FAILURE);
        }

        numex_language_t *language = numex_language_new(name, lang_source.whole_tokens_only, lang_source.rule_index, lang_source.num_rules, lang_source.ordinal_indicator_index, lang_source.num_ordinal_indicators);
        numex_table_add_language(language);

    }

    char_array_destroy(key);

    if (!numex_table_write(f)) {
        log_error("Error writing numex table\n");
        exit(1);
    }

    fclose(f);

    numex_module_teardown();

    log_info("Done\n");
}

예제 #16

0

파일 보기

파일: dictionary.c 프로젝트: Pand9/IPP-2015

int dictionary_insert(struct dictionary *dict, const wchar_t *word)
{
    return trie_add(dict->tree, word);
}