Beispiel #1
0
/**
 * Returns 1 if the given dictionary entry is a "V" one that does
 * not have the inflectional code "Y".
 */
char check_V_but_not_Y(struct dela_entry* d) {
unichar t1[2];
u_strcpy(t1,"V");
unichar t2[2];
u_strcpy(t2,"Y");
return dic_entry_contain_gram_code(d,t1) && (!dic_entry_contain_inflectional_code(d,t2));
}
Beispiel #2
0
/**
 * Returns 1 if the given dictionary entry is a "V:W" one.
 */
char check_VW(const struct dela_entry* d) {
unichar t1[2];
u_strcpy(t1,"V");
unichar t2[2];
u_strcpy(t2,"W");
return dic_entry_contain_gram_code(d,t1) && dic_entry_contain_inflectional_code(d,t2);
}
Beispiel #3
0
/**
 * Returns 1 if the given DELAF entry is compatible with the given code part of this pattern;
 * 0 otherwise.
 */
int is_compatible_code_pattern(const struct dela_entry* entry,const struct pattern* pattern) {
struct list_ustring* tmp=pattern->grammatical_codes;
while (tmp!=NULL) {
   if (!dic_entry_contain_gram_code(entry,tmp->string)) {
      /* If one code of the pattern is not present in the entry, we fail */
      return 0;
   }
   tmp=tmp->next;
}
tmp=pattern->forbidden_codes;
while (tmp!=NULL) {
   if (dic_entry_contain_gram_code(entry,tmp->string)) {
      /* If one forbidden code of the pattern is present in the entry, we fail */
      return 0;
   }
   tmp=tmp->next;
}
tmp=pattern->inflectional_codes;
while (tmp!=NULL) {
   if (!dic_entry_contain_inflectional_code(entry,tmp->string)) {
      /* If one inflectional code of the pattern is not present in the entry, we fail */
      return 0;
   }
   tmp=tmp->next;
}
return 1;
}
Beispiel #4
0
/**
 * Returns 1 if the line is a valid right "A" component.
 */
char check_A_right_component(unichar* s) {
/* We produce an artifical dictionary entry with the given INF code,
 * and then, we tokenize it in order to get grammatical and inflectional
 * codes in a structured way. */
unichar temp[2000];
u_strcpy(temp,"x,");
u_strcat(temp,s);
struct dela_entry* d=tokenize_DELAF_line(temp,0);
unichar t1[2];
u_strcpy(t1,"A");
unichar t2[4];
u_strcpy(t2,"sie");
char res=dic_entry_contain_gram_code(d,t1) && !dic_entry_contain_inflectional_code(d,t2);
/* We free the artifical dictionary entry */
free_dela_entry(d);
return res;
}
int composition_rule_matches_entry (const struct pattern* rule,
				     const struct dela_entry* d,U_FILE* 
#if DDEBUG > 1                         
				     debug_file
#endif
                     ) {
  int ok = 1;
  // "ok = 0;"  may be replaced by "return 0;"
  int flex_code_already_matched = 1;
#if DDEBUG > 1
    u_strcat(tmp, "   trying ");
#endif
  for (int i = 0; i < MAX_NUMBER_OF_COMPOSITION_RULES; i++) {
    if (rule[i].string[0] == '\0')
      break; // last rule reached: return 1
#if DDEBUG > 1
    {
      if (rule[i].type == 'f')
	u_strcat(tmp, ":");
      else if (rule[i].YesNo)
	u_strcat(tmp, "+");
      else
	u_strcat(tmp, "-");
      u_strcat(tmp, rule[i].string);
    }
#endif
    if (rule[i].YesNo) { // rule '+' => pattern must be in entry, too
      if (rule[i].type == 'g') {
	if (dic_entry_contain_gram_code(d,rule[i].string))
	  continue; // rule matched, try next one
	ok = 0;
      }
      else if (rule[i].type == 'f') {
	if (dic_entry_contain_inflectional_code(d,rule[i].string)) {
	  // rule matched, try next one, but mark flex codes as matched
	  flex_code_already_matched = 2;
	  continue;
	}
	else if (flex_code_already_matched == 2) {
	  // no matter if any flex code already matched
	  continue;
	}
	else {
	  // no-matches before first match
	  flex_code_already_matched = 0;
	}
      }
    }
    else { // rule '-' => pattern must not be in entry
      if (rule[i].type == 'g') {
	if (dic_entry_contain_gram_code(d,rule[i].string))
	  ok = 0;
      }
      else if (rule[i].type == 'f') {
	// implemented although not possible in rule syntax
	if (dic_entry_contain_inflectional_code(d,rule[i].string))
	  ok = 0;
      }
    }
  }
#if DDEBUG > 1
  {
    if (ok && flex_code_already_matched) u_fprintf(debug_file,"\n   === matched ");
    else u_fprintf(debug_file,"\n   === not matched ");
    if ( d->semantic_codes != 0 ) {
      for (int i = 0; i < d->n_semantic_codes; i++) {
         u_fprintf(debug_file,"+%S",d->semantic_codes[i]);
      }
    }
    if ( d->inflectional_codes != 0 ) {
      for (int i = 0; i < d->n_inflectional_codes; i++) {
         u_fprintf(debug_file,":%S",d->inflectional_codes[i]);
      }
    }
    u_fprintf(debug_file,"\n");
  }
#endif
  return (ok && flex_code_already_matched);
}
Beispiel #6
0
/**
 * Explores the node n, dumps the corresponding lines to the output file,
 * and then frees the node. 'pos' is the current position in the string 's'.
 */
int explore_node(struct sort_tree_node* n, struct sort_infos* inf,
    struct dela_entry* *last) {
  int i, N;
  struct sort_tree_transition* t = NULL;
  struct couple* couple = NULL;
  struct couple* tmp    = NULL;
  if (n == NULL) {
    error("Internal error in explore_node\n");
    return DEFAULT_ERROR_CODE;
  }
  if (n->couples != NULL) {
    /* If the node is a final one, we print the corresponding lines */
    couple = n->couples;
    while (couple != NULL) {
      if (inf->factorize_inflectional_codes) {
        /* We look if the previously printed line, if any, did share
         * the same information. If so, we just append the new inflectional codes.
         * Otherwise, we print the new line.
         *
         * NOTE: in factorize mode, we always ignore duplicates */
        int err;
        struct dela_entry* entry = tokenize_DELAF_line(couple->s,1,&err,0);
        if (entry==NULL) {
          /* We have a non DELAF entry line, like for instance a comment one */
          if (*last!=NULL && *last!=(struct dela_entry*)-1) {
            /* If there was at least one line already printed, then this line
             * awaits for its \n */
            u_fprintf(inf->f_out, "\n");
          }
          /* Then we print the line */
          u_fprintf(inf->f_out, "%S\n",couple->s);
          /* And we reset *last */
          if (*last==(struct dela_entry*)-1) {
            *last=NULL;
          } else if (*last!=NULL) {
            free_dela_entry(*last);
            *last=NULL;
          }
        } else {
          /* So, we have a dic entry. Was there a previous one ? */
          if (*last==NULL || *last==(struct dela_entry*)-1) {
            /* No ? So we print the line, and the current entry becomes *last */
            u_fputs(couple->s, inf->f_out);
            *last=entry;
          } else {
            /* Yes ? We must compare if the codes are compatible */
            if (are_compatible(*last,entry)) {
              /* We look for any code of entry if it was already in *last */
              for (int j=0;j<entry->n_inflectional_codes;j++) {
                if (!dic_entry_contain_inflectional_code(*last,entry->inflectional_codes[j])) {
                  u_fprintf(inf->f_out, ":%S",entry->inflectional_codes[j]);
                  /* We also have to add the newly printed code to *last */
                  (*last)->inflectional_codes[((*last)->n_inflectional_codes)++]=u_strdup(entry->inflectional_codes[j]);
                }
              }
              /* And we must free entry */
              free_dela_entry(entry);
            } else {
              /* If codes are not compatible, we print the \n for the previous
               * line, then the current line that becomes *last */
              u_fprintf(inf->f_out, "\n%S",couple->s);
              free_dela_entry(*last);
              *last=entry;
            }
          }
        }
      } else {
        /* Normal way: we print each line one after the other */
        for (i = 0; i < couple->n; i++) {
          u_fprintf(inf->f_out, "%S\n", couple->s);
          (inf->resulting_line_number)++;
        }
      }
      tmp = couple;
      couple = couple->next;
      free(tmp->s);
      free(tmp);
    }
    n->couples = NULL;
  }
  /* We convert the transition list into a sorted array */
  t = n->transitions;
  N = 0;
  while (t != NULL && N < 0x10000) {
    inf->transitions[N++] = t;
    t = t->next;
  }
  if (N == 0x10000) {
    error("Internal error in explore_node: more than 0x10000 nodes\n");
    free_sort_tree_node(n);
    return DEFAULT_ERROR_CODE;
  }
  if (N > 1)
    quicksort(inf->transitions, 0, N - 1, inf);
  /* After sorting, we copy the result into the transitions of n */
  for (int j = 0; j < N - 1; j++) {
    inf->transitions[j]->next = inf->transitions[j + 1];
  }
  if (N > 0) {
    inf->transitions[N - 1]->next = NULL;
    n->transitions = inf->transitions[0];
  }
  /* Finally, we explore the outgoing transitions */
  t = n->transitions;
  int explore_return_value = SUCCESS_RETURN_CODE;

  while (t != NULL && explore_return_value == SUCCESS_RETURN_CODE) {
    explore_return_value = explore_node(t->node, inf, last);
    if(explore_return_value == SUCCESS_RETURN_CODE) {
      t = t->next;
    }
  }

  /* And we free the node */
  free_sort_tree_node(n);
  return explore_return_value;
}
Beispiel #7
0
/**
 * Returns 1 if the given dictionary entry is a ":a" one.
 */
char check_a(struct dela_entry* d) {
unichar t1[2];
u_strcpy(t1,"a");
return (char)dic_entry_contain_inflectional_code(d,t1);
}