/** * Inserts a value in a sorted list, if not already present. The * element that contains the value is returned. * * NOTE: in the general case, a struct list_int is not supposed * to be sorted. * * Time-critical function: the iterative implementation is faster! */ struct list_int* sorted_insert(int value,struct list_int* l,Abstract_allocator prv_alloc) { register struct list_int* tmp; struct list_int* tmp2; struct list_int* last = NULL; if (l==NULL) /* empty list */ return new_list_int(value,prv_alloc); for (tmp=l; tmp!=NULL; tmp=tmp->next) { if (value==tmp->n) /* is in list */ return l; if (value<tmp->n) { /* smaller than element tmp */ tmp2=new_list_int(value,tmp,prv_alloc); if (last==NULL) /* tmp was the first element: tmp2 will get the first */ l = tmp2; else last->next = tmp2; return l; } last=tmp; } /* value not found in the list and there is no bigger element in the list: insert at the end of the list */ tmp2=new_list_int(value,prv_alloc); last->next = tmp2; return l; }
/** * Returns a copy of the given list. */ struct list_int* clone(const struct list_int* list,Abstract_allocator prv_alloc) { if (list==NULL) return NULL; list_int* result=new_list_int(list->n,NULL,prv_alloc); list=list->next; list_int* tmp=result; while (list!=NULL) { tmp->next=new_list_int(list->n,NULL,prv_alloc); tmp->next->next=NULL; list=list->next; tmp=tmp->next; } return result; }
/** * Returns 1 if there is something more to do after this call or 0 if: * - no new information was found * - the main graph matches E */ static int resolve_all_conditions(GrfCheckInfo* chk,struct list_int* *list,int *unknown) { *unknown=0; struct list_int* new_list=NULL; for (int i=1;i<chk->fst2->number_of_graphs+1;i++) { if (chk->graphs_matching_E[i]==CHK_DONT_KNOW) { /* We only need to look at the graphs we are not sure about yet */ resolve_conditions(chk,i,*list); chk->graphs_matching_E[i]=get_status(chk->condition_graphs[i]); if (chk->graphs_matching_E[i]!=CHK_DONT_KNOW) { /* If we have found an answer, we note that graph #i must be * looked at on the next loop */ new_list=new_list_int(i,new_list); } else { /* The graph is still unknown */ (*unknown)++; } } } /* Now we can use the new list */ free_list_int(*list); *list=new_list; if (chk->graphs_matching_E[1]==CHK_MATCHES_E) { error("Main graph matches epsilon!\n"); return 0; } return ((*list)!=NULL && (*unknown)!=0); }
/** * Returns 1 and prints an error message if a recursion is found in graph #n; * returns 0 otherwise. */ int look_for_recursion(int n,struct list_int* l,Fst2* fst2,int* graphs_matching_E,U_FILE*ferr) { if (is_in_list(n,l)) { /* If we find a graph that has already been visited */ print_reversed_list(l,n,fst2->graph_names,ferr); error(" recalls the graph %S\n",fst2->graph_names[n]); if (ferr != NULL) u_fprintf(ferr," recalls the graph %S\n",fst2->graph_names[n]); return 1; } l=new_list_int(n,l); int ret=explore_state(fst2->initial_states[n],l,fst2,graphs_matching_E,ferr); delete_head(&l); return ret; }
/** * Returns the number of the state that is pointed by the $] transition that * closes the current context or -1 if not found. * Note that nested contexts are taken into account. * 'visited_states' is used to avoid exploring loops, which would lead to * a stack overflow. */ static int get_end_of_context__(Fst2* fst2,int state,struct list_int* *visited_states) { int res; if (is_in_list(state,*visited_states)) { /* No need to visit twice a state */ return -1; } (*visited_states)=new_list_int(state,*visited_states); for (Transition* t=fst2->states[state]->transitions;t!=NULL;t=t->next) { if (t->tag_number<=0) { /* If we have a graph call or a E transition, we look after it */ res=get_end_of_context__(fst2,t->state_number,visited_states); if (res!=-1) { return res; } continue; } Fst2Tag tag=fst2->tags[t->tag_number]; if (tag->type==END_CONTEXT_TAG) { /* We found it! */ return t->state_number; } if (tag->type==BEGIN_POSITIVE_CONTEXT_TAG || tag->type==BEGIN_NEGATIVE_CONTEXT_TAG) { /* If we have a nested context, we deal with it */ int end=get_end_of_context(fst2,t->state_number); if (end==-1) { /* No end for this nested context ? There is nothing more to be done * with this transition */ continue; } else { /* Now, we can continue to explore from the end of the nested context */ res=get_end_of_context__(fst2,end,visited_states); if (res!=-1) { return res; } continue; } } /* If we have a normal transition, we explore it */ res=get_end_of_context__(fst2,t->state_number,visited_states); if (res!=-1) { return res; } continue; } return -1; }
/** * Inserts the graph number 'n' in the given condition list. */ void insert_graph_in_conditions(int n,ConditionList* l) { ConditionList tmp; if (*l==NULL) { /* If the condition list is empty, we create one */ tmp=(ConditionList)malloc(sizeof(struct condition_list)); if (tmp==NULL) { fatal_alloc_error("insert_graph_in_conditions"); } tmp->next=NULL; tmp->condition=new_list_int(n); *l=tmp; return; } /* Otherwise, we insert the graph number in all the conditions of the list */ tmp=*l; while (tmp!=NULL) { tmp->condition=sorted_insert(n,tmp->condition); tmp=tmp->next; } }
/** * This function explores a dictionary tree in order to insert an entry. * 'inflected' is the inflected form to insert, and 'pos' is the current position * in the string 'inflected'. 'node' is the current node in the dictionary tree. * 'infos' is used to access to constant parameters. */ static void add_entry_to_dictionary_tree(const unichar* inflected,int pos,struct dictionary_node* node, struct info* infos,int /*line*/, Abstract_allocator prv_alloc) { for (;;) { if (inflected[pos]=='\0') { /* If we have reached the end of 'inflected', then we are in the * node where the INF code must be inserted */ int N=get_value_index(infos->INF_code,infos->INF_code_list); if (node->single_INF_code_list==NULL) { /* If there is no INF code in the node, then * we add one and we return */ node->single_INF_code_list=new_list_int(N,prv_alloc); node->INF_code=N; return; } /* If there is an INF code list in the node ...*/ if (is_in_list(N,node->single_INF_code_list)) { /* If the INF code has already been taken into account for this node * (case of duplicates), we do nothing */ return; } /* Otherwise, we add it to the INF code list */ node->single_INF_code_list=head_insert(N,node->single_INF_code_list,prv_alloc); /* And we update the global INF line for this node */ node->INF_code=get_value_index_for_string_colon_string(infos->INF_code_list->value[node->INF_code],infos->INF_code,infos->INF_code_list); return; } /* If we are not at the end of 'inflected', then we look for * the correct outgoing transition and we follow it */ struct dictionary_node_transition* t=get_transition(inflected[pos],&node,prv_alloc); if (t->node==NULL) { /* We create the node if necessary */ t->node=new_dictionary_node(prv_alloc); (t->node->incoming)++; } node=t->node; pos++; } }
/** * This function adds a pattern number to the pattern list of a given * compound word tree node. */ void add_pattern_to_DLC_tree_node(struct DLC_tree_node* node,int pattern) { struct list_int *previous; if (node->patterns==NULL) { /* If the list is empty, we add the pattern */ node->patterns=new_list_int(pattern); /* We update the length of the list */ (node->number_of_patterns)++; return; } if (node->patterns->n==pattern) /* If the first element of the list is the same than 'pattern' * we have nothing to do */ return; if (node->patterns->n>pattern) { /* If we must insert 'pattern' at the beginning of the list */ node->patterns=head_insert(pattern,node->patterns); /* We update the length of the list */ (node->number_of_patterns)++; return; } /* General case */ previous=node->patterns; int stop=0; /* We parse the list until we have found the pattern or the place * to insert the pattern */ while (!stop && previous->next!=NULL) { /* If we find the pattern in the list, we have nothing to do */ if (previous->next->n==pattern) return; else if (previous->next->n<pattern) previous=previous->next; else stop=1; } /* If must insert the pattern */ previous->next=head_insert(pattern,previous->next); /* We update the length of the list */ (node->number_of_patterns)++; return; }
/** * Allocates, initializes and returns a new int list element. */ struct list_int* new_list_int(int value,Abstract_allocator prv_alloc) { return new_list_int(value,NULL,prv_alloc); }
/** * Inserts an element at the head of the list. */ struct list_int* head_insert(int value,struct list_int* old_head,Abstract_allocator prv_alloc) { struct list_int* new_head=new_list_int(value,prv_alloc); new_head->next=old_head; return new_head; }
/** * Returns 1 if the given .fst2 is OK to be used by the Locate program; 0 otherwise. * Conditions are: * * 1) no left recursion * 2) no loop that can recognize the empty word (<E> with an output or subgraph * that can match the empty word). */ int OK_for_Locate_write_error(const VersatileEncodingConfig* vec,const char* name,char no_empty_graph_warning,U_FILE* ferr) { int RESULT=1; struct FST2_free_info fst2_free; Fst2* fst2=load_abstract_fst2(vec,name,1,&fst2_free); if (fst2==NULL) { fatal_error("Cannot load graph %s\n",name); } u_printf("Creating condition sets...\n"); GrfCheckInfo* chk=new_GrfCheckInfo(fst2); /* Now, we look for a fix point in the condition graphs */ struct list_int* list=NULL; /* To do that, we start by creating a list of all the graphs we are sure about */ int unknown=0; for (int i=1;i<fst2->number_of_graphs+1;i++) { if (chk->graphs_matching_E[i]!=CHK_DONT_KNOW) { list=new_list_int(i,list); } else { unknown++; } } /* While there is something to do for E matching */ u_printf("Checking empty word matching...\n"); while (resolve_all_conditions(chk,&list,&unknown)) {} if (chk->graphs_matching_E[1]==CHK_MATCHES_E) { if (!no_empty_graph_warning) { error("ERROR: the main graph %S recognizes <E>\n",fst2->graph_names[1]); if (ferr!=NULL) { u_fprintf(ferr,"ERROR: the main graph %S recognizes <E>\n",fst2->graph_names[1]); } } goto evil_goto; } if (!no_empty_graph_warning) { for (int i=2;i<fst2->number_of_graphs+1;i++) { if (chk->graphs_matching_E[i]==CHK_MATCHES_E) { error("WARNING: the graph %S recognizes <E>\n",fst2->graph_names[i]); if (ferr!=NULL) { u_fprintf(ferr,"WARNING: the graph %S recognizes <E>\n",fst2->graph_names[i]); } } } } /* Now, we look for E loops and left recursions. And to do that, we need a new version * of the condition graphs, because a graph that does not match E would have been emptied. * And obviously, we can not deduce anything from an empty graph. */ rebuild_condition_graphs(chk); u_printf("Checking E loops...\n"); if (is_any_E_loop(chk)) { /* Error messages have already been printed */ goto evil_goto; } u_printf("Checking left recursions...\n"); if (is_any_left_recursion(chk)) { /* Error messages have already been printed */ goto evil_goto; } evil_goto: /* There may be something unused in the list that we need to free */ free_list_int(list); free_GrfCheckInfo(chk); free_abstract_Fst2(fst2,&fst2_free); return RESULT; }