/**
 * Inserts a value in a sorted list, if not already present. The
 * element that contains the value is returned.
 *
 * NOTE: in the general case, a struct list_int is not supposed
 *       to be sorted.
 *
 * Time-critical function: the iterative implementation is faster!
 */
struct list_int* sorted_insert(int value,struct list_int* l,Abstract_allocator prv_alloc) {

register struct list_int* tmp;
struct list_int* tmp2;
struct list_int* last = NULL;

if (l==NULL)  /* empty list */
   return new_list_int(value,prv_alloc);

for (tmp=l; tmp!=NULL; tmp=tmp->next) {
  if (value==tmp->n) /* is in list */
     return l;
  if (value<tmp->n) { /* smaller than element tmp */
    tmp2=new_list_int(value,tmp,prv_alloc);
    if (last==NULL) /* tmp was the first element: tmp2 will get the
                       first */
      l = tmp2;
    else
      last->next = tmp2;
    return l;
  }
  last=tmp;
}
/* value not found in the list and there is no bigger element in the
   list: insert at the end of the list */
tmp2=new_list_int(value,prv_alloc);
last->next = tmp2;
return l;
}
/**
 * Returns a copy of the given list.
 */
struct list_int* clone(const struct list_int* list,Abstract_allocator prv_alloc) {
if (list==NULL) return NULL;
list_int* result=new_list_int(list->n,NULL,prv_alloc);
list=list->next;
list_int* tmp=result;
while (list!=NULL) {
   tmp->next=new_list_int(list->n,NULL,prv_alloc);
   tmp->next->next=NULL;
   list=list->next;
   tmp=tmp->next;
}

return result;
}
/**
 * Returns 1 if there is something more to do after this call or 0 if:
 * - no new information was found
 * - the main graph matches E
 */
static int resolve_all_conditions(GrfCheckInfo* chk,struct list_int* *list,int *unknown) {
*unknown=0;
struct list_int* new_list=NULL;
for (int i=1;i<chk->fst2->number_of_graphs+1;i++) {
	if (chk->graphs_matching_E[i]==CHK_DONT_KNOW) {
		/* We only need to look at the graphs we are not sure about yet */
		resolve_conditions(chk,i,*list);
		chk->graphs_matching_E[i]=get_status(chk->condition_graphs[i]);
		if (chk->graphs_matching_E[i]!=CHK_DONT_KNOW) {
			/* If we have found an answer, we note that graph #i must be
			 * looked at on the next loop */
			new_list=new_list_int(i,new_list);
		} else {
			/* The graph is still unknown */
			(*unknown)++;
		}
	}
}
/* Now we can use the new list */
free_list_int(*list);
*list=new_list;
if (chk->graphs_matching_E[1]==CHK_MATCHES_E) {
	error("Main graph matches epsilon!\n");
	return 0;
}
return ((*list)!=NULL && (*unknown)!=0);
}
Exemple #4
0
/**
 * Returns 1 and prints an error message if a recursion is found in graph #n;
 * returns 0 otherwise.
 */
int look_for_recursion(int n,struct list_int* l,Fst2* fst2,int* graphs_matching_E,U_FILE*ferr) {
if (is_in_list(n,l)) {
   /* If we find a graph that has already been visited */
   print_reversed_list(l,n,fst2->graph_names,ferr);
   error(" recalls the graph %S\n",fst2->graph_names[n]);
   if (ferr != NULL)
      u_fprintf(ferr," recalls the graph %S\n",fst2->graph_names[n]);
   return 1;
}
l=new_list_int(n,l);
int ret=explore_state(fst2->initial_states[n],l,fst2,graphs_matching_E,ferr);
delete_head(&l);
return ret;
}
/**
 * Returns the number of the state that is pointed by the $] transition that
 * closes the current context or -1 if not found.
 * Note that nested contexts are taken into account.
 * 'visited_states' is used to avoid exploring loops, which would lead to
 * a stack overflow.
 */
static int get_end_of_context__(Fst2* fst2,int state,struct list_int* *visited_states) {
int res;
if (is_in_list(state,*visited_states)) {
	/* No need to visit twice a state */
	return -1;
}
(*visited_states)=new_list_int(state,*visited_states);
for (Transition* t=fst2->states[state]->transitions;t!=NULL;t=t->next) {
	if (t->tag_number<=0) {
		/* If we have a graph call or a E transition, we look after it */
		res=get_end_of_context__(fst2,t->state_number,visited_states);
		if (res!=-1) {
			return res;
		}
		continue;
	}
	Fst2Tag tag=fst2->tags[t->tag_number];
	if (tag->type==END_CONTEXT_TAG) {
		/* We found it! */
		return t->state_number;
	}
	if (tag->type==BEGIN_POSITIVE_CONTEXT_TAG || tag->type==BEGIN_NEGATIVE_CONTEXT_TAG) {
		/* If we have a nested context, we deal with it */
		int end=get_end_of_context(fst2,t->state_number);
		if (end==-1) {
			/* No end for this nested context ? There is nothing more to be done
			 * with this transition */
			continue;
		} else {
			/* Now, we can continue to explore from the end of the nested context */
			res=get_end_of_context__(fst2,end,visited_states);
			if (res!=-1) {
				return res;
			}
			continue;
		}
	}
	/* If we have a normal transition, we explore it */
	res=get_end_of_context__(fst2,t->state_number,visited_states);
	if (res!=-1) {
		return res;
	}
	continue;
}
return -1;
}
Exemple #6
0
/**
 * Inserts the graph number 'n' in the given condition list.
 */
void insert_graph_in_conditions(int n,ConditionList* l) {
ConditionList tmp;
if (*l==NULL) {
   /* If the condition list is empty, we create one */
   tmp=(ConditionList)malloc(sizeof(struct condition_list));
   if (tmp==NULL) {
      fatal_alloc_error("insert_graph_in_conditions");
   }
   tmp->next=NULL;
   tmp->condition=new_list_int(n);
   *l=tmp;
   return;
}
/* Otherwise, we insert the graph number in all the conditions of the list */
tmp=*l;
while (tmp!=NULL) {
   tmp->condition=sorted_insert(n,tmp->condition);
   tmp=tmp->next;
}
}
/**
 * This function explores a dictionary tree in order to insert an entry.
 * 'inflected' is the inflected form to insert, and 'pos' is the current position
 * in the string 'inflected'. 'node' is the current node in the dictionary tree.
 * 'infos' is used to access to constant parameters.
 */
static void add_entry_to_dictionary_tree(const unichar* inflected,int pos,struct dictionary_node* node,
                                  struct info* infos,int /*line*/, Abstract_allocator prv_alloc) {
for (;;) {
if (inflected[pos]=='\0') {
   /* If we have reached the end of 'inflected', then we are in the
    * node where the INF code must be inserted */
   int N=get_value_index(infos->INF_code,infos->INF_code_list);
   if (node->single_INF_code_list==NULL) {
      /* If there is no INF code in the node, then
       * we add one and we return */
      node->single_INF_code_list=new_list_int(N,prv_alloc);
      node->INF_code=N;
      return;
   }
   /* If there is an INF code list in the node ...*/
   if (is_in_list(N,node->single_INF_code_list)) {
      /* If the INF code has already been taken into account for this node
       * (case of duplicates), we do nothing */
      return;
   }
   /* Otherwise, we add it to the INF code list */
   node->single_INF_code_list=head_insert(N,node->single_INF_code_list,prv_alloc);
	/* And we update the global INF line for this node */
   node->INF_code=get_value_index_for_string_colon_string(infos->INF_code_list->value[node->INF_code],infos->INF_code,infos->INF_code_list);
   return;
}
/* If we are not at the end of 'inflected', then we look for
 * the correct outgoing transition and we follow it */
struct dictionary_node_transition* t=get_transition(inflected[pos],&node,prv_alloc);
if (t->node==NULL) {
   /* We create the node if necessary */
   t->node=new_dictionary_node(prv_alloc);
   (t->node->incoming)++;
}

node=t->node;
pos++;
}
}
/**
 * This function adds a pattern number to the pattern list of a given
 * compound word tree node.
 */
void add_pattern_to_DLC_tree_node(struct DLC_tree_node* node,int pattern) {
struct list_int *previous;
if (node->patterns==NULL) {
  /* If the list is empty, we add the pattern */
  node->patterns=new_list_int(pattern);
  /* We update the length of the list */
  (node->number_of_patterns)++;
  return;
}
if (node->patterns->n==pattern)
  /* If the first element of the list is the same than 'pattern'
   * we have nothing to do */
  return;
if (node->patterns->n>pattern) {
  /* If we must insert 'pattern' at the beginning of the list */
  node->patterns=head_insert(pattern,node->patterns);
  /* We update the length of the list */
  (node->number_of_patterns)++;
  return;
}
/* General case */
previous=node->patterns;
int stop=0;
/* We parse the list until we have found the pattern or the place
 * to insert the pattern */
while (!stop && previous->next!=NULL) {
	/* If we find the pattern in the list, we have nothing to do */
	if (previous->next->n==pattern) return;
	else if (previous->next->n<pattern) previous=previous->next;
	else stop=1;
}
/* If must insert the pattern */
previous->next=head_insert(pattern,previous->next);
/* We update the length of the list */
(node->number_of_patterns)++;
return;
}
/**
 * Allocates, initializes and returns a new int list element.
 */
struct list_int* new_list_int(int value,Abstract_allocator prv_alloc) {
return new_list_int(value,NULL,prv_alloc);
}
/**
 * Inserts an element at the head of the list.
 */
struct list_int* head_insert(int value,struct list_int* old_head,Abstract_allocator prv_alloc) {
struct list_int* new_head=new_list_int(value,prv_alloc);
new_head->next=old_head;
return new_head;
}
/**
 * Returns 1 if the given .fst2 is OK to be used by the Locate program; 0 otherwise.
 * Conditions are:
 *
 * 1) no left recursion
 * 2) no loop that can recognize the empty word (<E> with an output or subgraph
 *    that can match the empty word).
 */
int OK_for_Locate_write_error(const VersatileEncodingConfig* vec,const char* name,char no_empty_graph_warning,U_FILE* ferr) {
int RESULT=1;
struct FST2_free_info fst2_free;
Fst2* fst2=load_abstract_fst2(vec,name,1,&fst2_free);
if (fst2==NULL) {
	fatal_error("Cannot load graph %s\n",name);
}
u_printf("Creating condition sets...\n");
GrfCheckInfo* chk=new_GrfCheckInfo(fst2);
/* Now, we look for a fix point in the condition graphs */
struct list_int* list=NULL;
/* To do that, we start by creating a list of all the graphs we are sure about */
int unknown=0;
for (int i=1;i<fst2->number_of_graphs+1;i++) {
	if (chk->graphs_matching_E[i]!=CHK_DONT_KNOW) {
		list=new_list_int(i,list);
	} else {
		unknown++;
	}
}
/* While there is something to do for E matching */
u_printf("Checking empty word matching...\n");
while (resolve_all_conditions(chk,&list,&unknown)) {}
if (chk->graphs_matching_E[1]==CHK_MATCHES_E) {
	if (!no_empty_graph_warning) {
       error("ERROR: the main graph %S recognizes <E>\n",fst2->graph_names[1]);
       if (ferr!=NULL) {
    	   u_fprintf(ferr,"ERROR: the main graph %S recognizes <E>\n",fst2->graph_names[1]);
	   }
	}
	goto evil_goto;
}
if (!no_empty_graph_warning) {
	for (int i=2;i<fst2->number_of_graphs+1;i++) {
		if (chk->graphs_matching_E[i]==CHK_MATCHES_E) {
			error("WARNING: the graph %S recognizes <E>\n",fst2->graph_names[i]);
			if (ferr!=NULL) {
				u_fprintf(ferr,"WARNING: the graph %S recognizes <E>\n",fst2->graph_names[i]);
			}
		}
	}
}
/* Now, we look for E loops and left recursions. And to do that, we need a new version
 * of the condition graphs, because a graph that does not match E would have been emptied.
 * And obviously, we can not deduce anything from an empty graph. */
rebuild_condition_graphs(chk);
u_printf("Checking E loops...\n");
if (is_any_E_loop(chk)) {
	/* Error messages have already been printed */
	goto evil_goto;
}
u_printf("Checking left recursions...\n");
if (is_any_left_recursion(chk)) {
	/* Error messages have already been printed */
	goto evil_goto;
}
evil_goto:
/* There may be something unused in the list that we need to free */
free_list_int(list);
free_GrfCheckInfo(chk);
free_abstract_Fst2(fst2,&fst2_free);
return RESULT;
}