Ejemplo n.º 1
0
static void convert_numerative_pron(const char *list[], NJDNode * node1, NJDNode * node2)
{
   int i, j;
   int type = 0;
   const char *str = NJDNode_get_string(node1);
   char buff[MAXBUFLEN];

   if (strcmp(str, "*") == 0)
      return;
   for (i = 0; list[i] != NULL; i += 2) {
      if (strcmp(list[i], str) == 0) {
         type = atoi(list[i + 1]);
         break;
      }
   }
   if (type == 1) {
      for (i = 0; njd_set_digit_rule_voiced_sound_symbol_list[i] != NULL; i += 2) {
         str = NJDNode_get_pron(node2);
         j = strtopcmp(str, njd_set_digit_rule_voiced_sound_symbol_list[i]);
         if (j >= 0) {
            strcpy(buff, njd_set_digit_rule_voiced_sound_symbol_list[i + 1]);
            strcat(buff, &str[j]);
            NJDNode_set_pron(node2, buff);
            break;
         }
      }
   } else if (type == 2) {
      for (i = 0; njd_set_digit_rule_semivoiced_sound_symbol_list[i] != NULL; i += 2) {
         str = NJDNode_get_pron(node2);
         j = strtopcmp(str, njd_set_digit_rule_semivoiced_sound_symbol_list[i]);
         if (j >= 0) {
            strcpy(buff, njd_set_digit_rule_semivoiced_sound_symbol_list[i + 1]);
            strcat(buff, &str[j]);
            NJDNode_set_pron(node2, buff);
            break;
         }
      }
   }
}
Ejemplo n.º 2
0
static void convert_digit_pron(const char *list[], NJDNode * node)
{
   int i;
   const char *str = NJDNode_get_string(node);

   if (strcmp(str, "*") == 0)
      return;
   for (i = 0; list[i] != NULL; i += 4) {
      if (strcmp(list[i], str) == 0) {
         NJDNode_set_pron(node, (char *) list[i + 1]);
         NJDNode_set_acc(node, atoi(list[i + 2]));
         NJDNode_set_mora_size(node, atoi(list[i + 3]));
         return;
      }
   }
}
Ejemplo n.º 3
0
void NJDNode_copy(NJDNode * node1, NJDNode * node2)
{
   NJDNode_set_string(node1, node2->string);
   NJDNode_set_pos(node1, node2->pos);
   NJDNode_set_pos_group1(node1, node2->pos_group1);
   NJDNode_set_pos_group2(node1, node2->pos_group2);
   NJDNode_set_pos_group3(node1, node2->pos_group3);
   NJDNode_set_ctype(node1, node2->ctype);
   NJDNode_set_cform(node1, node2->cform);
   NJDNode_set_orig(node1, node2->orig);
   NJDNode_set_read(node1, node2->read);
   NJDNode_set_pron(node1, node2->pron);
   NJDNode_set_acc(node1, node2->acc);
   NJDNode_set_mora_size(node1, node2->mora_size);
   NJDNode_set_chain_rule(node1, node2->chain_rule);
   NJDNode_set_chain_flag(node1, node2->chain_flag);
}
Ejemplo n.º 4
0
void njd_set_pronunciation(NJD * njd)
{
   NJDNode *node;
   const char *str;
   int i, j = 0;
   int pos;
   int len;

   for (node = njd->head; node != NULL; node = node->next) {
      if (NJDNode_get_mora_size(node) == 0) {
         NJDNode_set_read(node, NULL);
         NJDNode_set_pron(node, NULL);
         if (strcmp(NJDNode_get_pos(node), NJD_SET_PRONUNCIATION_KIGOU) == 0 || strcmp(NJDNode_get_pos_group1(node), NJD_SET_PRONUNCIATION_KAZU) == 0) {        /* for symbol */
            for (i = 0; njd_set_pronunciation_symbol_list[i] != NULL; i += 2)
               if (strcmp(NJDNode_get_string(node), njd_set_pronunciation_symbol_list[i]) == 0) {
                  NJDNode_set_read(node, (char *) njd_set_pronunciation_symbol_list[i + 1]);
                  NJDNode_set_pron(node, (char *) njd_set_pronunciation_symbol_list[i + 1]);
                  break;
               }
         } else if (strcmp(NJDNode_get_pron(node), "*") == 0) { /* for others */
            str = NJDNode_get_string(node);
            len = strlen(str);
            for (pos = 0; pos < len;) {
               for (i = 0, j = 0; njd_set_pronunciation_list[i] != NULL; i += 3) {
                  j = strtopcmp(&str[pos], njd_set_pronunciation_list[i]);
                  if (j > 0)
                     break;
               }
               if (j > 0) {
                  pos += j;
                  NJDNode_add_read(node, (char *) njd_set_pronunciation_list[i + 1]);
                  NJDNode_add_pron(node, (char *) njd_set_pronunciation_list[i + 1]);
                  NJDNode_add_mora_size(node, atoi(njd_set_pronunciation_list[i + 2]));
               } else {
                  pos++;
               }
            }
         }
      }
   }
   NJD_remove_silent_node(njd);

   for (node = njd->head; node != NULL; node = node->next) {
      if (node->next != NULL
          && strcmp(NJDNode_get_pron(node->next), NJD_SET_PRONUNCIATION_U) == 0
          && strcmp(NJDNode_get_pos(node->next), NJD_SET_PRONUNCIATION_JODOUSHI) == 0
          && (strcmp(NJDNode_get_pos(node), NJD_SET_PRONUNCIATION_DOUSHI) == 0
              || strcmp(NJDNode_get_pos(node), NJD_SET_PRONUNCIATION_JODOUSHI) == 0)
          && NJDNode_get_mora_size(node) > 0) {
         NJDNode_set_pron(node->next, NJD_SET_PRONUNCIATION_CHOUON);
      }
      if (node->next != NULL
          && strcmp(NJDNode_get_pos(node), NJD_SET_PRONUNCIATION_JODOUSHI) == 0
          && strcmp(NJDNode_get_string(node->next), NJD_SET_PRONUNCIATION_QUESTION) == 0) {
         if (strcmp(NJDNode_get_string(node), NJD_SET_PRONUNCIATION_DESU_STR) == 0)
            NJDNode_set_pron(node, NJD_SET_PRONUNCIATION_DESU_PRON);
         else if (strcmp(NJDNode_get_string(node), NJD_SET_PRONUNCIATION_MASU_STR) == 0)
            NJDNode_set_pron(node, NJD_SET_PRONUNCIATION_MASU_PRON);
      }
   }
}
Ejemplo n.º 5
0
void njd_set_digit(NJD * njd)
{
   int i, j;
   NJDNode *s = NULL;
   NJDNode *e = NULL;
   NJDNode *node;
   int find = 0;

   /* convert digit sequence */
   for (node = njd->head; node != NULL; node = node->next) {
      if (find == 0 && strcmp(NJDNode_get_pos_group1(node), NJD_SET_DIGIT_KAZU) == 0)
         find = 1;
      if (get_digit(node, 1) >= 0) {
         if (s == NULL)
            s = node;
         if (node == njd->tail)
            e = node;
      } else {
         if (s != NULL)
            e = node->prev;
      }
      if (s != NULL && e != NULL) {
         convert_digit_sequence(s, e);
         s = e = NULL;
      }
   }
   if (find == 0)
      return;
   NJD_remove_silent_node(njd);
   if (njd->head == NULL)
      return;

   for (node = njd->head->next; node != NULL && node->next != NULL; node = node->next) {
      if (strcmp(NJDNode_get_string(node), "*") != 0
          && strcmp(NJDNode_get_string(node->prev), "*") != 0
          && (strcmp(NJDNode_get_string(node), NJD_SET_DIGIT_TEN1) == 0
              || strcmp(NJDNode_get_string(node), NJD_SET_DIGIT_TEN2) == 0)
          && strcmp(NJDNode_get_pos_group1(node->prev), NJD_SET_DIGIT_KAZU) == 0
          && strcmp(NJDNode_get_pos_group1(node->next), NJD_SET_DIGIT_KAZU) == 0) {
         NJDNode_load(node, NJD_SET_DIGIT_TEN_FEATURE);
         NJDNode_set_chain_flag(node, 1);
         if (strcmp(NJDNode_get_string(node->prev), NJD_SET_DIGIT_ZERO1) == 0
             || strcmp(NJDNode_get_string(node->prev), NJD_SET_DIGIT_ZERO2) == 0) {
            NJDNode_set_pron(node->prev, NJD_SET_DIGIT_ZERO_BEFORE_DP);
            NJDNode_set_mora_size(node->prev, 2);
         } else if (strcmp(NJDNode_get_string(node->prev), NJD_SET_DIGIT_TWO) == 0) {
            NJDNode_set_pron(node->prev, NJD_SET_DIGIT_TWO_BEFORE_DP);
            NJDNode_set_mora_size(node->prev, 2);
         } else if (strcmp(NJDNode_get_string(node->prev), NJD_SET_DIGIT_FIVE) == 0) {
            NJDNode_set_pron(node->prev, NJD_SET_DIGIT_FIVE_BEFORE_DP);
            NJDNode_set_mora_size(node->prev, 2);
         }
      }
   }

   for (node = njd->head->next; node != NULL; node = node->next) {
      if (strcmp(NJDNode_get_pos_group1(node->prev), NJD_SET_DIGIT_KAZU) == 0) {
         if (strcmp(NJDNode_get_pos_group2(node), NJD_SET_DIGIT_JOSUUSHI) == 0
             || strcmp(NJDNode_get_pos_group1(node), NJD_SET_DIGIT_FUKUSHIKANOU) == 0) {
            /* convert digit pron */
            if (search_numerative_class(njd_set_digit_rule_numerative_class1b, node) == 1)
               convert_digit_pron(njd_set_digit_rule_conv_table1b, node->prev);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class1c1, node) == 1)
               convert_digit_pron(njd_set_digit_rule_conv_table1c1, node->prev);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class1c2, node) == 1)
               convert_digit_pron(njd_set_digit_rule_conv_table1c2, node->prev);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class1d, node) == 1)
               convert_digit_pron(njd_set_digit_rule_conv_table1d, node->prev);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class1e, node) == 1)
               convert_digit_pron(njd_set_digit_rule_conv_table1e, node->prev);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class1f, node) == 1)
               convert_digit_pron(njd_set_digit_rule_conv_table1f, node->prev);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class1g, node) == 1)
               convert_digit_pron(njd_set_digit_rule_conv_table1g, node->prev);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class1h, node) == 1)
               convert_digit_pron(njd_set_digit_rule_conv_table1h, node->prev);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class1i, node) == 1)
               convert_digit_pron(njd_set_digit_rule_conv_table1i, node->prev);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class1j, node) == 1)
               convert_digit_pron(njd_set_digit_rule_conv_table1j, node->prev);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class1k, node) == 1)
               convert_digit_pron(njd_set_digit_rule_conv_table1k, node->prev);
            /* convert numerative pron */
            if (search_numerative_class(njd_set_digit_rule_numerative_class2b, node) == 1)
               convert_numerative_pron(njd_set_digit_rule_conv_table2b, node->prev, node);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class2c, node) == 1)
               convert_numerative_pron(njd_set_digit_rule_conv_table2c, node->prev, node);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class2d, node) == 1)
               convert_numerative_pron(njd_set_digit_rule_conv_table2d, node->prev, node);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class2e, node) == 1)
               convert_numerative_pron(njd_set_digit_rule_conv_table2e, node->prev, node);
            else if (search_numerative_class(njd_set_digit_rule_numerative_class2f, node) == 1)
               convert_numerative_pron(njd_set_digit_rule_conv_table2f, node->prev, node);
            /* modify accent phrase */
            NJDNode_set_chain_flag(node->prev, 0);
            NJDNode_set_chain_flag(node, 1);
         }
      }
   }

   for (node = njd->head->next; node != NULL; node = node->next) {
      if (strcmp(NJDNode_get_pos_group1(node->prev), NJD_SET_DIGIT_KAZU) == 0) {
         if (strcmp(NJDNode_get_pos_group1(node), NJD_SET_DIGIT_KAZU) == 0
             && NJDNode_get_string(node->prev) != NULL && NJDNode_get_string(node) != NULL) {
            /* modify accent phrase */
            find = 0;
            for (i = 0; njd_set_digit_rule_numeral_list4[i] != NULL; i++) {
               if (strcmp(NJDNode_get_string(node->prev), njd_set_digit_rule_numeral_list4[i]) == 0) {
                  for (j = 0; njd_set_digit_rule_numeral_list5[j] != NULL; j++) {
                     if (strcmp(NJDNode_get_string(node), njd_set_digit_rule_numeral_list5[j]) == 0) {
                        NJDNode_set_chain_flag(node->prev, 0);
                        NJDNode_set_chain_flag(node, 1);
                        find = 1;
                        break;
                     }
                  }
                  break;
               }
            }
            if (find == 0) {
               for (i = 0; njd_set_digit_rule_numeral_list5[i] != NULL; i++) {
                  if (strcmp(NJDNode_get_string(node->prev), njd_set_digit_rule_numeral_list5[i]) ==
                      0) {
                     for (j = 0; njd_set_digit_rule_numeral_list4[j] != NULL; j++) {
                        if (strcmp(NJDNode_get_string(node), njd_set_digit_rule_numeral_list4[j]) ==
                            0) {
                           NJDNode_set_chain_flag(node, 0);
                           break;
                        }
                     }
                     break;
                  }
               }
            }
         }
         if (search_numerative_class(njd_set_digit_rule_numeral_list8, node) == 1)
            convert_digit_pron(njd_set_digit_rule_numeral_list9, node->prev);
         if (search_numerative_class(njd_set_digit_rule_numeral_list10, node) == 1)
            convert_digit_pron(njd_set_digit_rule_numeral_list11, node->prev);
         if (search_numerative_class(njd_set_digit_rule_numeral_list6, node) == 1)
            convert_numerative_pron(njd_set_digit_rule_numeral_list7, node->prev, node);
      }
   }

   for (node = njd->head; node != NULL; node = node->next) {
      if (node->next != NULL &&
          strcmp(NJDNode_get_string(node->next), "*") != 0 &&
          strcmp(NJDNode_get_pos_group1(node), NJD_SET_DIGIT_KAZU) == 0 &&
          (node->prev == NULL
           || strcmp(NJDNode_get_pos_group1(node->prev), NJD_SET_DIGIT_KAZU) != 0)
          && (strcmp(NJDNode_get_pos_group2(node->next), NJD_SET_DIGIT_JOSUUSHI) == 0
              || strcmp(NJDNode_get_pos_group1(node->next), NJD_SET_DIGIT_FUKUSHIKANOU) == 0)) {
         /* convert class3 */
         for (i = 0; njd_set_digit_rule_numerative_class3[i] != NULL; i += 2) {
            if (strcmp(NJDNode_get_string(node->next), njd_set_digit_rule_numerative_class3[i]) == 0
                && strcmp(NJDNode_get_read(node->next),
                          njd_set_digit_rule_numerative_class3[i + 1]) == 0) {
               for (j = 0; njd_set_digit_rule_conv_table3[j] != NULL; j += 4) {
                  if (strcmp(NJDNode_get_string(node), njd_set_digit_rule_conv_table3[j]) == 0) {
                     NJDNode_set_read(node, (char *) njd_set_digit_rule_conv_table3[j + 1]);
                     NJDNode_set_pron(node, (char *) njd_set_digit_rule_conv_table3[j + 1]);
                     NJDNode_set_acc(node, atoi(njd_set_digit_rule_conv_table3[j + 2]));
                     NJDNode_set_mora_size(node, atoi(njd_set_digit_rule_conv_table3[j + 3]));
                     break;
                  }
               }
               break;
            }
         }
         /* person */
         if (strcmp(NJDNode_get_string(node->next), NJD_SET_DIGIT_NIN) == 0) {
            for (i = 0; njd_set_digit_rule_conv_table4[i] != NULL; i += 2) {
               if (strcmp(NJDNode_get_string(node), njd_set_digit_rule_conv_table4[i]) == 0) {
                  NJDNode_load(node, (char *) njd_set_digit_rule_conv_table4[i + 1]);
                  NJDNode_set_pron(node->next, NULL);
                  break;
               }
            }
         }
         /* the day of month */
         if (strcmp(NJDNode_get_string(node->next), NJD_SET_DIGIT_NICHI) == 0
             && strcmp(NJDNode_get_string(node), "*") != 0) {
            if (node->prev != NULL
                && strstr(NJDNode_get_string(node->prev), NJD_SET_DIGIT_GATSU) != NULL
                && strcmp(NJDNode_get_string(node), NJD_SET_DIGIT_ONE) == 0) {
               NJDNode_load(node, NJD_SET_DIGIT_TSUITACHI);
               NJDNode_set_pron(node->next, NULL);
            } else {
               for (i = 0; njd_set_digit_rule_conv_table5[i] != NULL; i += 2) {
                  if (strcmp(NJDNode_get_string(node), njd_set_digit_rule_conv_table5[i]) == 0) {
                     NJDNode_load(node, (char *) njd_set_digit_rule_conv_table5[i + 1]);
                     NJDNode_set_pron(node->next, NULL);
                     break;
                  }
               }
            }
         } else if (strcmp(NJDNode_get_string(node->next), NJD_SET_DIGIT_NICHIKAN) == 0) {
            for (i = 0; njd_set_digit_rule_conv_table6[i] != NULL; i += 2) {
               if (strcmp(NJDNode_get_string(node), njd_set_digit_rule_conv_table6[i]) == 0) {
                  NJDNode_load(node, (char *) njd_set_digit_rule_conv_table6[i + 1]);
                  NJDNode_set_pron(node->next, NULL);
                  break;
               }
            }
         }
      }
   }

   for (node = njd->head; node != NULL; node = node->next) {
      if ((node->prev == NULL
           || strcmp(NJDNode_get_pos_group1(node->prev), NJD_SET_DIGIT_KAZU) != 0)
          && node->next != NULL && node->next->next != NULL) {
         if (strcmp(NJDNode_get_string(node), NJD_SET_DIGIT_TEN) == 0
             && strcmp(NJDNode_get_string(node->next), NJD_SET_DIGIT_FOUR) == 0) {
            if (strcmp(NJDNode_get_string(node->next->next), NJD_SET_DIGIT_NICHI) == 0) {
               NJDNode_load(node, NJD_SET_DIGIT_JUYOKKA);
               NJDNode_set_pron(node->next, NULL);
               NJDNode_set_pron(node->next->next, NULL);
            } else if (strcmp(NJDNode_get_string(node->next->next), NJD_SET_DIGIT_NICHIKAN) == 0) {
               NJDNode_load(node, NJD_SET_DIGIT_JUYOKKAKAN);
               NJDNode_set_pron(node->next, NULL);
               NJDNode_set_pron(node->next->next, NULL);
            }
         } else if (strcmp(NJDNode_get_string(node), NJD_SET_DIGIT_TWO) == 0
                    && strcmp(NJDNode_get_string(node->next), NJD_SET_DIGIT_TEN) == 0) {
            if (strcmp(NJDNode_get_string(node->next->next), NJD_SET_DIGIT_NICHI) == 0) {
               NJDNode_load(node, NJD_SET_DITIT_HATSUKA);
               NJDNode_set_pron(node->next, NULL);
               NJDNode_set_pron(node->next->next, NULL);
            } else if (strcmp(NJDNode_get_string(node->next->next), NJD_SET_DIGIT_NICHIKAN) == 0) {
               NJDNode_load(node, NJD_SET_DIGIT_HATSUKAKAN);
               NJDNode_set_pron(node->next, NULL);
               NJDNode_set_pron(node->next->next, NULL);
            } else if (strcmp(NJDNode_get_string(node->next->next), NJD_SET_DIGIT_FOUR) == 0
                       && node->next->next->next != NULL) {
               if (strcmp(NJDNode_get_string(node->next->next->next), NJD_SET_DIGIT_NICHI) == 0) {
                  NJDNode_load(node, NJD_SET_DIGIT_NIJU);
                  NJDNode_load(node->next, NJD_SET_DITIT_YOKKA);
                  NJDNode_set_pron(node->next->next, NULL);
                  NJDNode_set_pron(node->next->next->next, NULL);
               } else if (strcmp(NJDNode_get_string(node->next->next->next), NJD_SET_DIGIT_NICHIKAN)
                          == 0) {
                  NJDNode_load(node, NJD_SET_DIGIT_NIJU);
                  NJDNode_load(node->next, NJD_SET_DIGIT_YOKKAKAN);
                  NJDNode_set_pron(node->next->next, NULL);
                  NJDNode_set_pron(node->next->next->next, NULL);
               }
            }
         }
      }
   }

   NJD_remove_silent_node(njd);
   if (njd->head == NULL)
      return;
}
Ejemplo n.º 6
0
static void convert_digit_sequence(NJDNode * start, NJDNode * end)
{
   NJDNode *node;
   NJDNode *newnode;
   int digit;
   int place = 0;
   int index;
   int size = 0;
   int have = 0;

   for (node = start; node != end->next; node = node->next)
      size++;
   if (size <= 1)
      return;

   if (get_digit_sequence_score(start, end) < 0) {
      for (node = start, size = 0; node != end->next; node = node->next) {
         if (strcmp(NJDNode_get_string(node), NJD_SET_DIGIT_ZERO1) == 0
             || strcmp(NJDNode_get_string(node), NJD_SET_DIGIT_ZERO2) == 0) {
            NJDNode_set_pron(node, NJD_SET_DIGIT_ZERO_AFTER_DP);
            NJDNode_set_mora_size(node, 2);
         } else if (strcmp(NJDNode_get_string(node), NJD_SET_DIGIT_TWO) == 0) {
            NJDNode_set_pron(node, NJD_SET_DIGIT_TWO_AFTER_DP);
            NJDNode_set_mora_size(node, 2);
         } else if (strcmp(NJDNode_get_string(node), NJD_SET_DIGIT_FIVE) == 0) {
            NJDNode_set_pron(node, NJD_SET_DIGIT_FIVE_AFTER_DP);
            NJDNode_set_mora_size(node, 2);
         }
         NJDNode_set_chain_rule(node, NULL);
         if (size % 2 == 0) {
            NJDNode_set_chain_flag(node, 0);
         } else {
            NJDNode_set_chain_flag(node, 1);
            NJDNode_set_acc(node->prev, 3);
         }
         size++;
      }
      return;
   }

   index = size % 4;
   if (index == 0)
      index = 4;
   if (size > index)
      place = (size - index) / 4;
   index--;
   if (place > 17)
      return;

   for (node = start; node != end->next; node = node->next) {
      digit = get_digit(node, 0);
      if (index == 0) {
         if (digit == 0) {
            NJDNode_set_pron(node, NULL);
            NJDNode_set_acc(node, 0);
            NJDNode_set_mora_size(node, 0);
         } else {
            have = 1;
         }
         if (have == 1) {
            if (place > 0) {
               newnode = (NJDNode *) calloc(1, sizeof(NJDNode));
               NJDNode_initialize(newnode);
               NJDNode_load(newnode, (char *) njd_set_digit_rule_numeral_list3[place]);
               node = NJDNode_insert(node, node->next, newnode);
            }
            have = 0;
         }
         place--;
      } else {
         if (digit <= 1) {
            NJDNode_set_pron(node, NULL);
            NJDNode_set_acc(node, 0);
            NJDNode_set_mora_size(node, 0);
         }
         if (digit > 0) {
            newnode = (NJDNode *) calloc(1, sizeof(NJDNode));
            NJDNode_initialize(newnode);
            NJDNode_load(newnode, (char *) njd_set_digit_rule_numeral_list2[index]);
            node = NJDNode_insert(node, node->next, newnode);
            have = 1;
         }
      }
      index--;
      if (index < 0)
         index = 4 - 1;
   }
}
Ejemplo n.º 7
0
void NJD_load_from_fp(NJD * njd, FILE * fp)
{
   NJDNode *node = NULL;
   char string[MAXBUFLEN];
   char pos[MAXBUFLEN];
   char pos_group1[MAXBUFLEN];
   char pos_group2[MAXBUFLEN];
   char pos_group3[MAXBUFLEN];
   char ctype[MAXBUFLEN];
   char cform[MAXBUFLEN];
   char orig[MAXBUFLEN];
   char read[MAXBUFLEN];
   char pron[MAXBUFLEN];
   char acc[MAXBUFLEN];
   char mora_size[MAXBUFLEN];
   char chain_rule[MAXBUFLEN];
   char chain_flag[MAXBUFLEN];

   if (fp == NULL) {
      fprintf(stderr, "WARNING: NJD_load_from_fp() in njd.c: File pointer should not be null.");
      return;
   }

   while (1) {
      get_token_from_fp(fp, string, ',');
      if (get_token_from_fp(fp, pos, ',') <= 0)
         break;
      if (get_token_from_fp(fp, pos_group1, ',') <= 0)
         break;
      if (get_token_from_fp(fp, pos_group2, ',') <= 0)
         break;
      if (get_token_from_fp(fp, pos_group3, ',') <= 0)
         break;
      if (get_token_from_fp(fp, ctype, ',') <= 0)
         break;
      if (get_token_from_fp(fp, cform, ',') <= 0)
         break;
      get_token_from_fp(fp, orig, ',');
      get_token_from_fp(fp, read, ',');
      get_token_from_fp(fp, pron, ',');
      if (get_token_from_fp(fp, acc, '/') <= 0)
         break;
      if (get_token_from_fp(fp, mora_size, ',') <= 0)
         break;
      get_token_from_fp(fp, chain_rule, ',');
      if (get_token_from_fp(fp, chain_flag, ',') <= 0)
         break;
      node = (NJDNode *) calloc(1, sizeof(NJDNode));
      NJDNode_initialize(node);
      NJDNode_set_string(node, string);
      NJDNode_set_pos(node, pos);
      NJDNode_set_pos_group1(node, pos_group1);
      NJDNode_set_pos_group2(node, pos_group2);
      NJDNode_set_pos_group3(node, pos_group3);
      NJDNode_set_ctype(node, ctype);
      NJDNode_set_cform(node, cform);
      NJDNode_set_orig(node, orig);
      NJDNode_set_read(node, read);
      NJDNode_set_pron(node, pron);
      NJDNode_set_acc(node, atoi(acc));
      NJDNode_set_mora_size(node, atoi(mora_size));
      NJDNode_set_chain_rule(node, chain_rule);
      NJDNode_set_chain_flag(node, atoi(chain_flag));
      NJD_push_node(njd, node);
   }
}
Ejemplo n.º 8
0
void NJD_load(NJD * njd, char *str)
{
   int i = 0;
   NJDNode *node = NULL;
   char string[MAXBUFLEN];
   char pos[MAXBUFLEN];
   char pos_group1[MAXBUFLEN];
   char pos_group2[MAXBUFLEN];
   char pos_group3[MAXBUFLEN];
   char ctype[MAXBUFLEN];
   char cform[MAXBUFLEN];
   char orig[MAXBUFLEN];
   char read[MAXBUFLEN];
   char pron[MAXBUFLEN];
   char acc[MAXBUFLEN];
   char mora_size[MAXBUFLEN];
   char chain_rule[MAXBUFLEN];
   char chain_flag[MAXBUFLEN];

   if (strlen(str) < 1) {
      fprintf(stderr, "WARNING: NJD_load() in njd.c: Input string should not be empty.");
      return;
   }

   while (1) {
      get_token_from_string(str, &i, string, ',');
      if (get_token_from_string(str, &i, pos, ',') <= 0)
         break;
      if (get_token_from_string(str, &i, pos_group1, ',') <= 0)
         break;
      if (get_token_from_string(str, &i, pos_group2, ',') <= 0)
         break;
      if (get_token_from_string(str, &i, pos_group3, ',') <= 0)
         break;
      if (get_token_from_string(str, &i, ctype, ',') <= 0)
         break;
      if (get_token_from_string(str, &i, cform, ',') <= 0)
         break;
      get_token_from_string(str, &i, orig, ',');
      get_token_from_string(str, &i, read, ',');
      get_token_from_string(str, &i, pron, ',');
      if (get_token_from_string(str, &i, acc, '/') <= 0)
         break;
      if (get_token_from_string(str, &i, mora_size, ',') <= 0)
         break;
      get_token_from_string(str, &i, chain_rule, ',');
      if (get_token_from_string(str, &i, chain_flag, ',') <= 0)
         break;
      node = (NJDNode *) calloc(1, sizeof(NJDNode));
      NJDNode_initialize(node);
      NJDNode_set_string(node, string);
      NJDNode_set_pos(node, pos);
      NJDNode_set_pos_group1(node, pos_group1);
      NJDNode_set_pos_group2(node, pos_group2);
      NJDNode_set_pos_group3(node, pos_group3);
      NJDNode_set_ctype(node, ctype);
      NJDNode_set_cform(node, cform);
      NJDNode_set_orig(node, orig);
      NJDNode_set_read(node, read);
      NJDNode_set_pron(node, pron);
      NJDNode_set_acc(node, atoi(acc));
      NJDNode_set_mora_size(node, atoi(mora_size));
      NJDNode_set_chain_rule(node, chain_rule);
      NJDNode_set_chain_flag(node, atoi(chain_flag));
      NJD_push_node(njd, node);
   }
}
Ejemplo n.º 9
0
static void convert_digit_sequence(NJDNode * start, NJDNode * end)
{
   NJDNode *node;
   NJDNode *newnode;
   int digit;
   int place = 0;
   int index;
   int size = 0;
   int have = 0;

   if (get_digit_sequence_score(start, end) < 0)
      return;

   for (node = start; node != end->next; node = node->next)
      size++;
   if (size <= 1)
      return;
   index = size % 4;
   if (index == 0)
      index = 4;
   if (size > index)
      place = (size - index) / 4;
   index--;
   if (place > 17)
      return;

   for (node = start; node != end->next; node = node->next) {
      digit = get_digit(node, 0);
      if (index == 0) {
         if (digit == 0) {
            NJDNode_set_pron(node, NULL);
            NJDNode_set_acc(node, 0);
            NJDNode_set_mora_size(node, 0);
         } else {
            have = 1;
         }
         if (have == 1) {
            if (place > 0) {
               newnode = (NJDNode *) calloc(1, sizeof(NJDNode));
               NJDNode_initialize(newnode);
               NJDNode_load(newnode, (char *) njd_set_digit_rule_numeral_list3[place]);
               node = NJDNode_insert(node, node->next, newnode);
            }
            have = 0;
         }
         place--;
      } else {
         if (digit <= 1) {
            NJDNode_set_pron(node, NULL);
            NJDNode_set_acc(node, 0);
            NJDNode_set_mora_size(node, 0);
         }
         if (digit > 0) {
            newnode = (NJDNode *) calloc(1, sizeof(NJDNode));
            NJDNode_initialize(newnode);
            NJDNode_load(newnode, (char *) njd_set_digit_rule_numeral_list2[index]);
            node = NJDNode_insert(node, node->next, newnode);
            have = 1;
         }
      }
      index--;
      if (index < 0)
         index = 4 - 1;
   }
}
Ejemplo n.º 10
0
void NJDNode_load(NJDNode * node, const char *str)
{
   int i, j;
   int index = 0;
   char buff[MAXBUFLEN];
   char buff_string[MAXBUFLEN];
   char buff_orig[MAXBUFLEN];
   char buff_read[MAXBUFLEN];
   char buff_pron[MAXBUFLEN];
   char buff_acc[MAXBUFLEN];
   int count;
   int index_string;
   int index_orig;
   int index_read;
   int index_pron;
   int index_acc;
   NJDNode *prev = NULL;

   /* load */
   get_token_from_string(str, &index, buff_string, ',');
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_pos(node, buff);
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_pos_group1(node, buff);
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_pos_group2(node, buff);
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_pos_group3(node, buff);
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_ctype(node, buff);
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_cform(node, buff);
   get_token_from_string(str, &index, buff_orig, ',');
   get_token_from_string(str, &index, buff_read, ',');
   get_token_from_string(str, &index, buff_pron, ',');
   get_token_from_string(str, &index, buff_acc, ',');
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_chain_rule(node, buff);
   get_token_from_string(str, &index, buff, ',');
   if (strcmp(buff, "1") == 0)
      NJDNode_set_chain_flag(node, 1);
   else if (strcmp(buff, "0") == 0)
      NJDNode_set_chain_flag(node, 0);

   /* for symbol */
   if (strstr(buff_acc, "*") != NULL || strstr(buff_acc, "/") == NULL) {
      NJDNode_set_string(node, buff_string);
      NJDNode_set_orig(node, buff_orig);
      NJDNode_set_read(node, buff_read);
      NJDNode_set_pron(node, buff_pron);
      NJDNode_set_acc(node, 0);
      NJDNode_set_mora_size(node, 0);
      return;
   }

   /* count chained word */
   for (i = 0, count = 0; buff_acc[i] != '\0'; i++)
      if (buff_acc[i] == '/')
         count++;

   /* for single word */
   if (count == 1) {
      NJDNode_set_string(node, buff_string);
      NJDNode_set_orig(node, buff_orig);
      NJDNode_set_read(node, buff_read);
      NJDNode_set_pron(node, buff_pron);
      index_acc = 0;
      get_token_from_string(buff_acc, &index_acc, buff, '/');
      if (buff[0] == '\0') {
         j = 0;
         fprintf(stderr, "WARNING: NJDNode_load() in njd_node.c: Accent is empty.\n");
      } else {
         j = atoi(buff);
      }
      NJDNode_set_acc(node, j);
      get_token_from_string(buff_acc, &index_acc, buff, ':');
      if (buff[0] == '\0') {
         j = 0;
         fprintf(stderr, "WARNING: NJDNode_load() in njd_node.c: Mora size is empty.\n");
      } else {
         j = atoi(buff);
      }
      NJDNode_set_mora_size(node, j);
      return;
   }

   /* parse chained word */
   index_string = 0;
   index_orig = 0;
   index_read = 0;
   index_pron = 0;
   index_acc = 0;
   for (i = 0; i < count; i++) {
      if (i > 0) {
         node = (NJDNode *) calloc(1, sizeof(NJDNode));
         NJDNode_initialize(node);
         NJDNode_copy(node, prev);
         NJDNode_set_chain_flag(node, 0);
         node->prev = prev;
         prev->next = node;
      }
      /* orig */
      get_token_from_string(buff_orig, &index_orig, buff, ':');
      NJDNode_set_orig(node, buff);
      /* string */
      if (i + 1 < count) {
         NJDNode_set_string(node, buff);
         index_string += strlen(buff);
      } else {
         NJDNode_set_string(node, &buff_string[index_string]);
      }
      /* read */
      get_token_from_string(buff_read, &index_read, buff, ':');
      NJDNode_set_read(node, buff);
      /* pron */
      get_token_from_string(buff_pron, &index_pron, buff, ':');
      NJDNode_set_pron(node, buff);
      /* acc */
      get_token_from_string(buff_acc, &index_acc, buff, '/');
      if (buff[0] == '\0') {
         j = 0;
         fprintf(stderr, "WARNING: NJDNode_load() in njd_node.c: Accent is empty.\n");
      } else {
         j = atoi(buff);
      }
      NJDNode_set_acc(node, j);
      /* mora size */
      get_token_from_string(buff_acc, &index_acc, buff, ':');
      if (buff[0] == '\0') {
         j = 0;
         fprintf(stderr, "WARNING: NJDNode_load() in njd_node.c: Mora size is empty.\n");
      } else {
         j = atoi(buff);
      }
      NJDNode_set_mora_size(node, j);
      prev = node;
   }
}