Пример #1
0
/**
 * Allocates, initializes and returns a new DELA entry. The 'clone' parameter
 * indicates whether the given entry must be cloned or not. If not, the function
 * only copies the pointed address.
 */
struct dela_entry_list* new_dela_entry_list(struct dela_entry* entry,int clone) {
struct dela_entry_list* l;
l=(struct dela_entry_list*)malloc(sizeof(struct dela_entry_list));
if (l==NULL) {
   fatal_alloc_error("new_dela_entry_list");
}
l->next=NULL;
if (clone) l->entry=clone_dela_entry(entry);
else l->entry=entry;
return l;
}
Пример #2
0
/**
 * Allocates, initializes and returns a dic_variable.
 */
struct dic_variable* new_dic_variable(const unichar* name,struct dela_entry* dic_entry,
                                      struct dic_variable* next,int must_clone) {
struct dic_variable* tmp=(struct dic_variable*)malloc(sizeof(struct dic_variable));
if (tmp==NULL) {
   fatal_alloc_error("new_dic_variable");
}
tmp->name=u_strdup(name);
if (must_clone) {
	tmp->dic_entry=clone_dela_entry(dic_entry);
} else {
	tmp->dic_entry=dic_entry;
}
tmp->next=next;
return tmp;
}
Пример #3
0
/**
 * Sets the given dic variable, inserting it in the variable list if absent.
 */
void set_dic_variable(const unichar* name,struct dela_entry* dic_entry,struct dic_variable* *list,int must_clone) {
while (*list!=NULL) {
   if (!u_strcmp((*list)->name,name)) {
      /* If we have found the variable we were looking for */
      /* We have to free the previous value */
      free_dela_entry((*list)->dic_entry);
      if (must_clone) {
    	  (*list)->dic_entry=clone_dela_entry(dic_entry);
      } else {
    	  (*list)->dic_entry=dic_entry;
      }
      return;
   }
   list=&((*list)->next);
}
*list=new_dic_variable(name,dic_entry,NULL,must_clone);
}
Пример #4
0
/**
 * Explores all the partial matches to produce outputs in MERGE or REPLACE mode.
 * 
 * If *var_starts!=NULL, it means that there are pending $var_start( tags
 * that wait for being taken into account when a text dependent tag is found.
 */
void explore_match_for_MERGE_or_REPLACE_mode(struct locate_tfst_infos* infos,
                                  struct tfst_simple_match_list* element,
                                  vector_ptr* items,int current_item,Ustring* s,
                                  int last_text_dependent_tfst_tag,
                                  struct list_pointer* *var_starts) {
if (current_item==items->nbelems) {
   /* If we have finished, we can save the current output */
   element->output=s->str;
   infos->matches=add_element_to_list(infos,infos->matches,element);
   element->output=NULL;
   return;
}
/* We save the length because it will be modified */
int len=s->len;
struct tfst_match* item=(struct tfst_match*)(items->tab[current_item]);
if (item==NULL) {
   fatal_error("Unexpected NULL item in explore_match_for_MERGE_mode\n");
}

unichar* output=infos->fst2->tags[item->fst2_transition->tag_number]->output;

unichar name[MAX_TRANSDUCTION_VAR_LENGTH];
int capture;
struct dela_entry* old_value_dela=NULL;
capture=is_capture_variable(output,name);
if (capture) {
	/* If we have a capture variable $:X$, we must save the previous value
	 * for this dictionary variable */
	old_value_dela=clone_dela_entry(get_dic_variable(name,infos->dic_variables));
}

Match saved_element=element->m;
struct list_int* text_tags=item->text_tag_numbers;
int captured_chars=0;
/* We explore all the text tags */
while (text_tags!=NULL) {
   /* First, we restore the output string */
   s->len=len;
   s->str[len]='\0';
   captured_chars=0;
   /* We deal with the fst2 tag output, if any */
   if (item->first_time) {
	   /* We only have to process the output only once,
	    * since it will have the same effect on all tfst tags.
	    *
	    * Example: the fst2 tag "cybercrime/ZZ" may match the two tfst tags "cyber" and
	    * "crime", but we must process the "ZZ" output only before the first tfst tag "cyber" */
	   if (capture) {
		   /* If we have a capture variable, then we have to check whether the tfst tag
	   	    * is a tagged token or not */
	   	   int tfst_tag_number=text_tags->n;
	   	   int fst2_tag_number=item->fst2_transition->tag_number;
	   	   if (!do_variable_capture(tfst_tag_number,fst2_tag_number,infos,name)) {
	   		   goto restore_dic_variable;
	   	   }
	   } else if (!deal_with_output_tfst(s,output,infos,&captured_chars)) {
         /* We do not take into account matches with variable errors if the
          * process_output_for_tfst_match function has decided that backtracking
          * was necessary, either because of a variable error of because of a
          * $a.SET$ or $a.UNSET$ test */
		  goto restore_dic_variable;
      }
   }
   int last_tag=last_text_dependent_tfst_tag;
   TfstTag* current_tag=NULL;
   if (text_tags->n==-1) {
      /* We have a text independent match */
      Fst2Tag fst2_tag=infos->fst2->tags[item->fst2_transition->tag_number];
      if (fst2_tag->type==BEGIN_OUTPUT_VAR_TAG) {
    	  /* If we an output variable start $|a( */
    	  set_output_variable_pending(infos->output_variables,fst2_tag->variable);
          explore_match_for_MERGE_or_REPLACE_mode(infos,element,items,current_item+1,s,last_tag,var_starts);
          unset_output_variable_pending(infos->output_variables,fst2_tag->variable);
          goto restore_dic_variable;
      } else if (fst2_tag->type==END_OUTPUT_VAR_TAG) {
    	  /* If we an output variable end $|a) */
    	  unset_output_variable_pending(infos->output_variables,fst2_tag->variable);
    	  explore_match_for_MERGE_or_REPLACE_mode(infos,element,items,current_item+1,s,last_tag,var_starts);
          set_output_variable_pending(infos->output_variables,fst2_tag->variable);
          goto restore_dic_variable;
      } else if (fst2_tag->type==BEGIN_VAR_TAG) {
         /* If we have a variable start tag $a(, we add it to our 
          * variable tag list */
         struct transduction_variable* v=get_transduction_variable(infos->input_variables,fst2_tag->variable);
         int old_value=v->start_in_tokens;
         /* We add the address of the start field to our list */
         (*var_starts)=new_list_pointer(&(v->start_in_tokens),(var_starts==NULL)?NULL:(*var_starts));
         /* Then, we go on the next item */
         explore_match_for_MERGE_or_REPLACE_mode(infos,element,items,current_item+1,s,last_tag,var_starts);
         /* After the exploration, there are 2 cases:
          * 1) *var_starts is NULL: nothing to do
          * 2) *var_starts is not NULL: we reached the end of the items without findind any
          *                             text dependent match, so we can free the list */
         free_list_pointer(*var_starts);
         (*var_starts)=NULL;
         v->start_in_tokens=old_value;
         /* If we have a $a( tag, we know that we can only have just one text tag 
          * with special value -1 */
         goto restore_dic_variable;
      } else if (fst2_tag->type==END_VAR_TAG) {
         /* If we have found a $a) tag */
         if (last_tag==-1) {
            /* If we have no tfst tag to use, then it's a variable definition error,
             * and we have nothing special to do */
            explore_match_for_MERGE_or_REPLACE_mode(infos,element,items,current_item+1,s,last_tag,var_starts);
            goto restore_dic_variable;
         } else {
            /* We can set the end of the variable, it's 'last_tag' */
            struct transduction_variable* v=get_transduction_variable(infos->input_variables,fst2_tag->variable);
            int old_value=v->end_in_tokens;
            v->end_in_tokens=last_tag;
            explore_match_for_MERGE_or_REPLACE_mode(infos,element,items,current_item+1,s,last_tag,var_starts);
            v->end_in_tokens=old_value;
            goto restore_dic_variable;
         }
      } else if (fst2_tag->type==LEFT_CONTEXT_TAG) {
         /* If we have found a $* tag, we must reset the stack string and the 
          * start position, so we save them */
         unichar* old_stack=u_strdup(s->str);
         int old_pos_token=element->m.start_pos_in_token;
         int old_pos_char=element->m.start_pos_in_char;
         int old_pos_letter=element->m.start_pos_in_letter;
         /* We set the new values */
         empty(s);
         element->m.start_pos_in_token=LEFT_CONTEXT_PENDING;
         /* We must reset last_tag to -1, because is not, we will have an 
          * extra space on the left of the match */
         explore_match_for_MERGE_or_REPLACE_mode(infos,element,items,current_item+1,s,-1,var_starts);
         
         /* And we restore previous values */
         element->m.start_pos_in_token=old_pos_token;
         element->m.start_pos_in_char=old_pos_char;
         element->m.start_pos_in_letter=old_pos_letter;
         u_strcpy(s,old_stack);
         free(old_stack);
         /* If we have a $* tag, we know that we can only have just one text tag 
          * with special value -1 */
         goto restore_dic_variable;
      }
   } else {
      current_tag=(TfstTag*)(infos->tfst->tags->tab[text_tags->n]);
      /* We update the last tag */
      last_tag=text_tags->n;
      /* If the current text tag is not a text independent one */
      
      /* If there are some pending $a( tags, we set them to the current tag */
      if (var_starts!=NULL) {
         struct list_pointer* ptr=(*var_starts);
         while (ptr!=NULL) {
            int* start=(int*)(ptr->pointer);
            (*start)=text_tags->n;
            ptr=ptr->next;
         }
      }
      int previous_start_token,previous_start_char; 
      if (last_text_dependent_tfst_tag!=-1) {
         /* If the item is not the first, we must insert the original text that is
          * between the end of the previous merged text and the beginning of the
          * current one, typically to insert spaces */
         TfstTag* previous_tag=(TfstTag*)(infos->tfst->tags->tab[last_text_dependent_tfst_tag]);
         previous_start_token=previous_tag->m.end_pos_in_token;
         previous_start_char=previous_tag->m.end_pos_in_char;
         /* We start just after the end of the previous match */
         if (infos->tfst->token_content[previous_start_token][previous_start_char+1]!='\0') {
            /* If we were not at the end of the previous text token, we just inscrease
             * the char position */
            previous_start_char++;
         } else {
            /* Otherwise, we go on the next token */
            previous_start_token++;
            previous_start_char=0;
         }
      } else {
         /* Otherwise, we start on the beginning of the current text tag */
         //error("current item=%d\n",text_tags->n);
         previous_start_token=current_tag->m.start_pos_in_token;
         previous_start_char=current_tag->m.start_pos_in_char;
      }
      /* Here we have to insert the text that is between current_start and current_end,
       * and then, the ouput of the fst2 transition */
      if (infos->output_policy==MERGE_OUTPUTS) {
    	  insert_text_interval_tfst(infos,s,previous_start_token,previous_start_char,
                 current_tag->m.end_pos_in_token,current_tag->m.end_pos_in_char);
      }
   }
   /* Then, we go on the next item */
   struct list_pointer* ptr2=NULL;
   if (element->m.start_pos_in_token==LEFT_CONTEXT_PENDING && current_tag!=NULL) {
      element->m.start_pos_in_token=infos->tfst->offset_in_tokens+current_tag->m.start_pos_in_token;
      element->m.start_pos_in_char=current_tag->m.start_pos_in_char;
      element->m.start_pos_in_letter=current_tag->m.start_pos_in_letter;
   }
   explore_match_for_MERGE_or_REPLACE_mode(infos,element,items,current_item+1,s,last_tag
         ,&ptr2 /* We have encountered a text dependent tag, so there is no
                 * more pending start tag like $a( */
         );
   element->m=saved_element;
   /* If there was a $* tag pending */
   free_list_pointer(ptr2);
   if (infos->ambiguous_output_policy==IGNORE_AMBIGUOUS_OUTPUTS) {
      /* If we don't want ambiguous outputs, then the first path is
       * enough for our purpose */ 
      goto restore_dic_variable;
   }
   text_tags=text_tags->next;
   remove_chars_from_output_variables(infos->output_variables,captured_chars);
   /* We reset to 0, because if we exit the while normally, we don't want to
    * modify output variables twice when reaching the 'restore_dic_variable'
    * label */
   captured_chars=0;
}
restore_dic_variable:
/* We redo this about output variables here, since we may have jumped here directly */
remove_chars_from_output_variables(infos->output_variables,captured_chars);
if (capture) {
	/* If we have a capture variable $:X$, we must restore the previous value
	 * for this dictionary variable */
	set_dic_variable(name,old_value_dela,&(infos->dic_variables),0);
}
}