Пример #1
0
/**
 * Frees all the memory associated to the given pattern.
 */
void free_pattern(struct pattern* p,Abstract_allocator prv_alloc) {
if (p==NULL) return;
if (p->inflected!=NULL) free_cb(p->inflected,prv_alloc);
if (p->lemma!=NULL) free_cb(p->lemma,prv_alloc);
free_list_ustring(p->grammatical_codes,prv_alloc);
free_list_ustring(p->inflectional_codes,prv_alloc);
free_list_ustring(p->forbidden_codes,prv_alloc);
free_cb(p,prv_alloc);
}
Пример #2
0
/**
 * Frees the memory used by the whole given lemma tree.
 */
void free_lemma_node(struct lemma_node* node) {
if (node==NULL) return;
if (node->inflected_forms!=NULL) {
   free_list_ustring(node->inflected_forms);
}
free_lemma_node_list(node->sons);
free(node);
}
Пример #3
0
cassys_tokens_list *add_replaced_text( const char *text, cassys_tokens_list *list,
		 int transducer_id, const char *alphabet_name,int mask_encoding_compatibility_input) {


	Alphabet *alphabet = load_alphabet(alphabet_name);

	struct snt_files *snt_text_files = new_snt_files(text);

	struct fifo *stage_concord = read_concord_file(snt_text_files->concord_ind, mask_encoding_compatibility_input);

	// performance enhancement
	cassys_tokens_list *current_list_position = list;
	long current_token_position = 0;

	int nb_sentence = 0;
	while (!is_empty(stage_concord)) {
		nb_sentence++;

		locate_pos *l = (locate_pos*) take_ptr(stage_concord);

		struct list_ustring *new_sentence_lu = cassys_tokenize_word_by_word(l->label,
				alphabet);

		cassys_tokens_list *new_sentence_ctl =
				new_list(new_sentence_lu, transducer_id);

		// performance enhancement :
		// Since matches are sorted, we begin the search from the last known position in the list.
		// We have to substract from the text position the current token position.
		cassys_tokens_list *list_position = get_element_at(current_list_position, transducer_id - 1,
				l->token_start_offset - current_token_position);

		int replaced_sentence_length = l->token_end_offset
				- l->token_start_offset+1;
		int new_sentence_length = length(new_sentence_lu);

		add_output(list_position, new_sentence_ctl, transducer_id,
				replaced_sentence_length, new_sentence_length-1);


		// performance enhancement
		current_list_position = list_position;
		current_token_position = l-> token_start_offset;

		free(l->label);
		free(l);
		free_list_ustring(new_sentence_lu);
	}

	free_fifo(stage_concord);
	free_snt_files(snt_text_files);
    free_alphabet(alphabet);

	return list;
}
Пример #4
0
/**
 * Deals with the matches associated to the current word.
 */
static void deal_with_matches(Dictionary* d,unichar* inflected,int inf_code,Ustring* output,
		SpellCheckConfig* cfg,int base,SpellCheckHypothesis* *list) {
struct list_ustring* inf_codes=NULL;
int should_free=get_inf_codes(d,inf_code,output,&inf_codes,base);
if (inf_codes==NULL) {
	fatal_error("Internal error in deal_with_matches: no inf codes associated to %S (base=%d,output=%S)\n",
			inflected,base,output->str);
}
struct list_ustring* tmp=inf_codes;
while (tmp!=NULL) {
	uncompress_entry(inflected,tmp->string,cfg->tmp);
	*list=new_SpellCheckHypothesis(cfg->tmp->str,cfg->current_errors,cfg->current_SP_INSERT,
			cfg->current_SP_SUPPR,cfg->current_SP_SWAP,cfg->current_SP_CHANGE,cfg->pairs,
			cfg->score,*list);
	tmp=tmp->next;
}
if (should_free) free_list_ustring(inf_codes);
}
Пример #5
0
void construct_cascade_concord(cassys_tokens_list *list, const char *text_name, int number_of_transducer,
    Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input){

	fprintf(stdout, "Construct cascade concord\n");

	struct snt_files *snt_file = new_snt_files(text_name);

	U_FILE *concord_desc_file = u_fopen_versatile_encoding(encoding_output,bom_output,mask_encoding_compatibility_input, snt_file->concord_ind,U_WRITE);
	if( concord_desc_file == NULL){
		perror("u_fopen\n");
		fprintf(stderr,"Cannot open file %s\n",snt_file->concord_ind);
		exit(1);
	}

	fprintf(stdout, "Concord File %s successfully opened\n",snt_file->concord_ind);

	if (list == NULL) {
		fatal_error("empty text");
	}

	u_fprintf(concord_desc_file,"#M\n");

	cassys_tokens_list *current_pos_in_original_text = list;
	cassys_tokens_list *output=get_output(list,number_of_transducer);
	struct list_ustring *sentence = NULL;
	bool output_detected = false;
	long token_position=0;


	while(current_pos_in_original_text != NULL && output != NULL){
		if(output -> transducer_id == 0){
			if(output_detected){
				int start_position = token_position;
				int last_token_length = 0;
				while(current_pos_in_original_text != output){
					token_position ++;
					last_token_length = u_strlen(current_pos_in_original_text -> token)-1;
					current_pos_in_original_text = current_pos_in_original_text -> next_token;
				}

				// token position pointe sur le token suivant déjà
				int end_position=token_position-1;

				if(sentence == NULL){
					fatal_error("construct_cassys_concordance : Phrase de remplacement vide\n");
				}

				struct list_ustring *iterator = sentence;
				while(iterator -> next != NULL){
					iterator = iterator -> next;
				}

				//display_list_ustring(iterator);


				u_fprintf(concord_desc_file, "%d.0.0 %d.%d.0 ",start_position,end_position,last_token_length);
				//u_fprintf(concord_desc_file, "%d.0.0 %d.0.0 ",start_position,end_position);

				iterator = sentence;
				while(iterator != NULL){
					u_fprintf(concord_desc_file,"%S",iterator->string);
					//u_printf("concord.ind : %S\n",iterator->string);
					iterator = iterator -> next;
				}
				//u_printf("\n");
				u_fprintf(concord_desc_file,"\n");

				current_pos_in_original_text
						= current_pos_in_original_text -> next_token;
				output = get_output(current_pos_in_original_text,
						number_of_transducer);
				token_position++;

				free_list_ustring(sentence);
				sentence = NULL;

				output_detected = false;
			} else {
				current_pos_in_original_text = current_pos_in_original_text -> next_token;
				output = get_output(current_pos_in_original_text,number_of_transducer);
				token_position++;
			}
		}
		else {
			//u_printf("insert new sentence\n");

			sentence = insert_at_end_of_list(output->token, sentence);
			output = output -> next_token;
			output = get_output(output, number_of_transducer);
			output_detected = true;
		}

	}


	u_fclose(concord_desc_file);
	free(snt_file);

}
/**
 * Frees all the memory associated to the given normalization tree.
 * Note that it assumes that transitions are tagged with token numbers.
 */
void free_normalization_tree(struct normalization_tree* n) {
if (n==NULL) return;
free_list_ustring(n->outputs);
free_normalization_tree_transition(n->trans);
free(n);
}