Пример #1
0
void mecab2njd(NJD * njd, char **feature, int size)
{
    int i;
    NJDNode *node;

    for (i = 0; i < size; i++) {
        node = (NJDNode *) calloc(1, sizeof(NJDNode));
        NJDNode_initialize(node);
        NJDNode_load(node, feature[i]);
        NJD_push_node(njd, node);
    }
}
Пример #2
0
static void convert_digit_sequence(NJDNode * start, NJDNode * end)
{
   NJDNode *node;
   NJDNode *newnode;
   int digit;
   int place = 0;
   int index;
   int size = 0;
   int have = 0;

   for (node = start; node != end->next; node = node->next)
      size++;
   if (size <= 1)
      return;

   if (get_digit_sequence_score(start, end) < 0) {
      for (node = start, size = 0; node != end->next; node = node->next) {
         if (strcmp(NJDNode_get_string(node), NJD_SET_DIGIT_ZERO1) == 0
             || strcmp(NJDNode_get_string(node), NJD_SET_DIGIT_ZERO2) == 0) {
            NJDNode_set_pron(node, NJD_SET_DIGIT_ZERO_AFTER_DP);
            NJDNode_set_mora_size(node, 2);
         } else if (strcmp(NJDNode_get_string(node), NJD_SET_DIGIT_TWO) == 0) {
            NJDNode_set_pron(node, NJD_SET_DIGIT_TWO_AFTER_DP);
            NJDNode_set_mora_size(node, 2);
         } else if (strcmp(NJDNode_get_string(node), NJD_SET_DIGIT_FIVE) == 0) {
            NJDNode_set_pron(node, NJD_SET_DIGIT_FIVE_AFTER_DP);
            NJDNode_set_mora_size(node, 2);
         }
         NJDNode_set_chain_rule(node, NULL);
         if (size % 2 == 0) {
            NJDNode_set_chain_flag(node, 0);
         } else {
            NJDNode_set_chain_flag(node, 1);
            NJDNode_set_acc(node->prev, 3);
         }
         size++;
      }
      return;
   }

   index = size % 4;
   if (index == 0)
      index = 4;
   if (size > index)
      place = (size - index) / 4;
   index--;
   if (place > 17)
      return;

   for (node = start; node != end->next; node = node->next) {
      digit = get_digit(node, 0);
      if (index == 0) {
         if (digit == 0) {
            NJDNode_set_pron(node, NULL);
            NJDNode_set_acc(node, 0);
            NJDNode_set_mora_size(node, 0);
         } else {
            have = 1;
         }
         if (have == 1) {
            if (place > 0) {
               newnode = (NJDNode *) calloc(1, sizeof(NJDNode));
               NJDNode_initialize(newnode);
               NJDNode_load(newnode, (char *) njd_set_digit_rule_numeral_list3[place]);
               node = NJDNode_insert(node, node->next, newnode);
            }
            have = 0;
         }
         place--;
      } else {
         if (digit <= 1) {
            NJDNode_set_pron(node, NULL);
            NJDNode_set_acc(node, 0);
            NJDNode_set_mora_size(node, 0);
         }
         if (digit > 0) {
            newnode = (NJDNode *) calloc(1, sizeof(NJDNode));
            NJDNode_initialize(newnode);
            NJDNode_load(newnode, (char *) njd_set_digit_rule_numeral_list2[index]);
            node = NJDNode_insert(node, node->next, newnode);
            have = 1;
         }
      }
      index--;
      if (index < 0)
         index = 4 - 1;
   }
}
Пример #3
0
void NJD_load_from_fp(NJD * njd, FILE * fp)
{
   NJDNode *node = NULL;
   char string[MAXBUFLEN];
   char pos[MAXBUFLEN];
   char pos_group1[MAXBUFLEN];
   char pos_group2[MAXBUFLEN];
   char pos_group3[MAXBUFLEN];
   char ctype[MAXBUFLEN];
   char cform[MAXBUFLEN];
   char orig[MAXBUFLEN];
   char read[MAXBUFLEN];
   char pron[MAXBUFLEN];
   char acc[MAXBUFLEN];
   char mora_size[MAXBUFLEN];
   char chain_rule[MAXBUFLEN];
   char chain_flag[MAXBUFLEN];

   if (fp == NULL) {
      fprintf(stderr, "WARNING: NJD_load_from_fp() in njd.c: File pointer should not be null.");
      return;
   }

   while (1) {
      get_token_from_fp(fp, string, ',');
      if (get_token_from_fp(fp, pos, ',') <= 0)
         break;
      if (get_token_from_fp(fp, pos_group1, ',') <= 0)
         break;
      if (get_token_from_fp(fp, pos_group2, ',') <= 0)
         break;
      if (get_token_from_fp(fp, pos_group3, ',') <= 0)
         break;
      if (get_token_from_fp(fp, ctype, ',') <= 0)
         break;
      if (get_token_from_fp(fp, cform, ',') <= 0)
         break;
      get_token_from_fp(fp, orig, ',');
      get_token_from_fp(fp, read, ',');
      get_token_from_fp(fp, pron, ',');
      if (get_token_from_fp(fp, acc, '/') <= 0)
         break;
      if (get_token_from_fp(fp, mora_size, ',') <= 0)
         break;
      get_token_from_fp(fp, chain_rule, ',');
      if (get_token_from_fp(fp, chain_flag, ',') <= 0)
         break;
      node = (NJDNode *) calloc(1, sizeof(NJDNode));
      NJDNode_initialize(node);
      NJDNode_set_string(node, string);
      NJDNode_set_pos(node, pos);
      NJDNode_set_pos_group1(node, pos_group1);
      NJDNode_set_pos_group2(node, pos_group2);
      NJDNode_set_pos_group3(node, pos_group3);
      NJDNode_set_ctype(node, ctype);
      NJDNode_set_cform(node, cform);
      NJDNode_set_orig(node, orig);
      NJDNode_set_read(node, read);
      NJDNode_set_pron(node, pron);
      NJDNode_set_acc(node, atoi(acc));
      NJDNode_set_mora_size(node, atoi(mora_size));
      NJDNode_set_chain_rule(node, chain_rule);
      NJDNode_set_chain_flag(node, atoi(chain_flag));
      NJD_push_node(njd, node);
   }
}
Пример #4
0
void NJD_load(NJD * njd, char *str)
{
   int i = 0;
   NJDNode *node = NULL;
   char string[MAXBUFLEN];
   char pos[MAXBUFLEN];
   char pos_group1[MAXBUFLEN];
   char pos_group2[MAXBUFLEN];
   char pos_group3[MAXBUFLEN];
   char ctype[MAXBUFLEN];
   char cform[MAXBUFLEN];
   char orig[MAXBUFLEN];
   char read[MAXBUFLEN];
   char pron[MAXBUFLEN];
   char acc[MAXBUFLEN];
   char mora_size[MAXBUFLEN];
   char chain_rule[MAXBUFLEN];
   char chain_flag[MAXBUFLEN];

   if (strlen(str) < 1) {
      fprintf(stderr, "WARNING: NJD_load() in njd.c: Input string should not be empty.");
      return;
   }

   while (1) {
      get_token_from_string(str, &i, string, ',');
      if (get_token_from_string(str, &i, pos, ',') <= 0)
         break;
      if (get_token_from_string(str, &i, pos_group1, ',') <= 0)
         break;
      if (get_token_from_string(str, &i, pos_group2, ',') <= 0)
         break;
      if (get_token_from_string(str, &i, pos_group3, ',') <= 0)
         break;
      if (get_token_from_string(str, &i, ctype, ',') <= 0)
         break;
      if (get_token_from_string(str, &i, cform, ',') <= 0)
         break;
      get_token_from_string(str, &i, orig, ',');
      get_token_from_string(str, &i, read, ',');
      get_token_from_string(str, &i, pron, ',');
      if (get_token_from_string(str, &i, acc, '/') <= 0)
         break;
      if (get_token_from_string(str, &i, mora_size, ',') <= 0)
         break;
      get_token_from_string(str, &i, chain_rule, ',');
      if (get_token_from_string(str, &i, chain_flag, ',') <= 0)
         break;
      node = (NJDNode *) calloc(1, sizeof(NJDNode));
      NJDNode_initialize(node);
      NJDNode_set_string(node, string);
      NJDNode_set_pos(node, pos);
      NJDNode_set_pos_group1(node, pos_group1);
      NJDNode_set_pos_group2(node, pos_group2);
      NJDNode_set_pos_group3(node, pos_group3);
      NJDNode_set_ctype(node, ctype);
      NJDNode_set_cform(node, cform);
      NJDNode_set_orig(node, orig);
      NJDNode_set_read(node, read);
      NJDNode_set_pron(node, pron);
      NJDNode_set_acc(node, atoi(acc));
      NJDNode_set_mora_size(node, atoi(mora_size));
      NJDNode_set_chain_rule(node, chain_rule);
      NJDNode_set_chain_flag(node, atoi(chain_flag));
      NJD_push_node(njd, node);
   }
}
Пример #5
0
static void convert_digit_sequence(NJDNode * start, NJDNode * end)
{
   NJDNode *node;
   NJDNode *newnode;
   int digit;
   int place = 0;
   int index;
   int size = 0;
   int have = 0;

   if (get_digit_sequence_score(start, end) < 0)
      return;

   for (node = start; node != end->next; node = node->next)
      size++;
   if (size <= 1)
      return;
   index = size % 4;
   if (index == 0)
      index = 4;
   if (size > index)
      place = (size - index) / 4;
   index--;
   if (place > 17)
      return;

   for (node = start; node != end->next; node = node->next) {
      digit = get_digit(node, 0);
      if (index == 0) {
         if (digit == 0) {
            NJDNode_set_pron(node, NULL);
            NJDNode_set_acc(node, 0);
            NJDNode_set_mora_size(node, 0);
         } else {
            have = 1;
         }
         if (have == 1) {
            if (place > 0) {
               newnode = (NJDNode *) calloc(1, sizeof(NJDNode));
               NJDNode_initialize(newnode);
               NJDNode_load(newnode, (char *) njd_set_digit_rule_numeral_list3[place]);
               node = NJDNode_insert(node, node->next, newnode);
            }
            have = 0;
         }
         place--;
      } else {
         if (digit <= 1) {
            NJDNode_set_pron(node, NULL);
            NJDNode_set_acc(node, 0);
            NJDNode_set_mora_size(node, 0);
         }
         if (digit > 0) {
            newnode = (NJDNode *) calloc(1, sizeof(NJDNode));
            NJDNode_initialize(newnode);
            NJDNode_load(newnode, (char *) njd_set_digit_rule_numeral_list2[index]);
            node = NJDNode_insert(node, node->next, newnode);
            have = 1;
         }
      }
      index--;
      if (index < 0)
         index = 4 - 1;
   }
}
Пример #6
0
void NJDNode_load(NJDNode * node, const char *str)
{
   int i, j;
   int index = 0;
   char buff[MAXBUFLEN];
   char buff_string[MAXBUFLEN];
   char buff_orig[MAXBUFLEN];
   char buff_read[MAXBUFLEN];
   char buff_pron[MAXBUFLEN];
   char buff_acc[MAXBUFLEN];
   int count;
   int index_string;
   int index_orig;
   int index_read;
   int index_pron;
   int index_acc;
   NJDNode *prev = NULL;

   /* load */
   get_token_from_string(str, &index, buff_string, ',');
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_pos(node, buff);
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_pos_group1(node, buff);
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_pos_group2(node, buff);
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_pos_group3(node, buff);
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_ctype(node, buff);
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_cform(node, buff);
   get_token_from_string(str, &index, buff_orig, ',');
   get_token_from_string(str, &index, buff_read, ',');
   get_token_from_string(str, &index, buff_pron, ',');
   get_token_from_string(str, &index, buff_acc, ',');
   get_token_from_string(str, &index, buff, ',');
   NJDNode_set_chain_rule(node, buff);
   get_token_from_string(str, &index, buff, ',');
   if (strcmp(buff, "1") == 0)
      NJDNode_set_chain_flag(node, 1);
   else if (strcmp(buff, "0") == 0)
      NJDNode_set_chain_flag(node, 0);

   /* for symbol */
   if (strstr(buff_acc, "*") != NULL || strstr(buff_acc, "/") == NULL) {
      NJDNode_set_string(node, buff_string);
      NJDNode_set_orig(node, buff_orig);
      NJDNode_set_read(node, buff_read);
      NJDNode_set_pron(node, buff_pron);
      NJDNode_set_acc(node, 0);
      NJDNode_set_mora_size(node, 0);
      return;
   }

   /* count chained word */
   for (i = 0, count = 0; buff_acc[i] != '\0'; i++)
      if (buff_acc[i] == '/')
         count++;

   /* for single word */
   if (count == 1) {
      NJDNode_set_string(node, buff_string);
      NJDNode_set_orig(node, buff_orig);
      NJDNode_set_read(node, buff_read);
      NJDNode_set_pron(node, buff_pron);
      index_acc = 0;
      get_token_from_string(buff_acc, &index_acc, buff, '/');
      if (buff[0] == '\0') {
         j = 0;
         fprintf(stderr, "WARNING: NJDNode_load() in njd_node.c: Accent is empty.\n");
      } else {
         j = atoi(buff);
      }
      NJDNode_set_acc(node, j);
      get_token_from_string(buff_acc, &index_acc, buff, ':');
      if (buff[0] == '\0') {
         j = 0;
         fprintf(stderr, "WARNING: NJDNode_load() in njd_node.c: Mora size is empty.\n");
      } else {
         j = atoi(buff);
      }
      NJDNode_set_mora_size(node, j);
      return;
   }

   /* parse chained word */
   index_string = 0;
   index_orig = 0;
   index_read = 0;
   index_pron = 0;
   index_acc = 0;
   for (i = 0; i < count; i++) {
      if (i > 0) {
         node = (NJDNode *) calloc(1, sizeof(NJDNode));
         NJDNode_initialize(node);
         NJDNode_copy(node, prev);
         NJDNode_set_chain_flag(node, 0);
         node->prev = prev;
         prev->next = node;
      }
      /* orig */
      get_token_from_string(buff_orig, &index_orig, buff, ':');
      NJDNode_set_orig(node, buff);
      /* string */
      if (i + 1 < count) {
         NJDNode_set_string(node, buff);
         index_string += strlen(buff);
      } else {
         NJDNode_set_string(node, &buff_string[index_string]);
      }
      /* read */
      get_token_from_string(buff_read, &index_read, buff, ':');
      NJDNode_set_read(node, buff);
      /* pron */
      get_token_from_string(buff_pron, &index_pron, buff, ':');
      NJDNode_set_pron(node, buff);
      /* acc */
      get_token_from_string(buff_acc, &index_acc, buff, '/');
      if (buff[0] == '\0') {
         j = 0;
         fprintf(stderr, "WARNING: NJDNode_load() in njd_node.c: Accent is empty.\n");
      } else {
         j = atoi(buff);
      }
      NJDNode_set_acc(node, j);
      /* mora size */
      get_token_from_string(buff_acc, &index_acc, buff, ':');
      if (buff[0] == '\0') {
         j = 0;
         fprintf(stderr, "WARNING: NJDNode_load() in njd_node.c: Mora size is empty.\n");
      } else {
         j = atoi(buff);
      }
      NJDNode_set_mora_size(node, j);
      prev = node;
   }
}