int main (int argc, char **argv) { char input[] = "太郎は次郎が持っている本を花子に渡した。"; mecab_model_t *model, *another_model; mecab_t *mecab; mecab_lattice_t *lattice; const mecab_node_t *node; const char *result; int i; size_t len; model = mecab_model_new(argc, argv); CHECK(model); mecab = mecab_model_new_tagger(model); CHECK(mecab); lattice = mecab_model_new_lattice(model); CHECK(lattice); mecab_lattice_set_sentence(lattice, input); mecab_parse_lattice(mecab, lattice); printf("RESULT: %s\n", mecab_lattice_tostr(lattice)); node = mecab_lattice_get_bos_node(lattice); for (; node; node = node->next) { printf("%d ", node->id); if (node->stat == MECAB_BOS_NODE) printf("BOS"); else if (node->stat == MECAB_EOS_NODE) printf("EOS"); else fwrite (node->surface, sizeof(char), node->length, stdout); printf(" %s %d %d %d %d %d %d %d %d %f %f %f %ld\n", node->feature, (int)(node->surface - input), (int)(node->surface - input + node->length), node->rcAttr, node->lcAttr, node->posid, (int)node->char_type, (int)node->stat, (int)node->isbest, node->alpha, node->beta, node->prob, node->cost); } len = mecab_lattice_get_size(lattice); for (i = 0; i <= len; ++i) { mecab_node_t *b, *e; b = mecab_lattice_get_begin_nodes(lattice, (size_t)i); e = mecab_lattice_get_end_nodes(lattice, (size_t)i); for (; b; b = b->bnext) { printf("B[%d] %s\t%s\n", i, b->surface, b->feature); } for (; e; e = e->enext) { printf("E[%d] %s\t%s\n", i, e->surface, e->feature); } } mecab_lattice_set_sentence(lattice, input); mecab_lattice_set_request_type(lattice, MECAB_NBEST); mecab_parse_lattice(mecab, lattice); for (i = 0; i < 10; ++i) { fprintf(stdout, "%s", mecab_lattice_tostr(lattice)); if (!mecab_lattice_next(lattice)) { break; } } mecab_lattice_set_sentence(lattice, input); mecab_lattice_set_request_type(lattice, MECAB_MARGINAL_PROB); mecab_lattice_set_theta(lattice, 0.001); mecab_parse_lattice(mecab, lattice); node = mecab_lattice_get_bos_node(lattice); for (; node; node = node->next) { fwrite(node->surface, sizeof(char), node->length, stdout); fprintf(stdout, "\t%s\t%f\n", node->feature, node->prob); } mecab_set_lattice_level(mecab, 0); mecab_set_all_morphs(mecab, 1); node = mecab_sparse_tonode(mecab, input); CHECK(node); for (; node; node = node->next) { fwrite (node->surface, sizeof(char), node->length, stdout); printf("\t%s\n", node->feature); } const mecab_dictionary_info_t *d = mecab_dictionary_info(mecab); for (; d; d = d->next) { printf("filename: %s\n", d->filename); printf("charset: %s\n", d->charset); printf("size: %d\n", d->size); printf("type: %d\n", d->type); printf("lsize: %d\n", d->lsize); printf("rsize: %d\n", d->rsize); printf("version: %d\n", d->version); } mecab_destroy(mecab); mecab_lattice_destroy(lattice); mecab_model_destroy(model); return 0; }
main(int argc, char **argv){ char input_file[128]; strcpy(input_file,argv[1]); #else int mecab_analyze (char *input_file){ #endif char input[MAX_TEXT_SIZE]; char analyzed_text[MAX_TEXT_SIZE]; char wk_buff[MAX_TEXT_SIZE]; char wk_file_name[256]; char title_buff[256]; mecab_t *mecab; const mecab_node_t *node; FILE *wfp; char surface_buff[256]; char key_list[MAX_KEY_NUMBERS][MAX_KEY_LENGTH]; int key_numbers; strcpy(wk_file_name,TO_MECAB_FILE_DIR); strcat(wk_file_name,input_file); if(read_text(wk_file_name,input,title_buff)){ fprintf(stderr,"[%s] not found\n",wk_file_name); return(-1); } /**** remove(wk_file_name); ****/ // edit character e.g. ' '' { 0x0a edit_input_text(input); /** memset(analyzed_text,'\0',sizeof(analyzed_text)); if(!modify_text(analyzed_text,input)){ strcpy(wk_buff,analyzed_text); while(1){ memset(analyzed_text,'\0',sizeof(analyzed_text)); if(modify_text(analyzed_text,wk_buff)) break; strcpy(wk_buff,analyzed_text); } } **/ strcpy(wk_file_name,TO_HIBARI_FILE_DIR); strcat(wk_file_name,input_file); if((wfp = fopen(wk_file_name,"w")) == NULL){ fprintf(stderr,"[%s] could not open\n",wk_file_name); return(-1); } /* fprintf(wfp,"{\"%s\"}.\n",input); // first write message */ fprintf(wfp,"{\"%s\"}.\n",title_buff); // write wiki title mecab = mecab_new2(""); CHECK(mecab); mecab_set_lattice_level(mecab, 0); // mecab_set_lattice_level(mecab, 1); node = mecab_sparse_tonode(mecab, input); CHECK(node); memset(key_list,'\0',sizeof(key_list)); for (key_numbers=0; node; node = node->next) { strncpy(surface_buff,node->surface,node->length); surface_buff[node->length] ='\0'; #ifdef UNIT_TEST printf("名詞:[%s] 文字種:[%d] ID:[%d]\n",surface_buff,node->char_type,node->posid); #endif if (node->length <= 1) continue; if (omitted_word(surface_buff)) continue; // check charcter type switch(node->posid){ case 3: //記号 case 4: //数字 case 5: //記号 case 6: //記号 case 7: //記号 break; case 36: // ' case 37: case 38: case 39: case 40: case 41: case 42: case 43: case 44: case 45: case 46: case 47: case 48: case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: case 58: case 59: case 60: case 67: if(!check_duplicate((char *)key_list,surface_buff,key_numbers)){ fprintf(wfp,"{\"%s\"}.\n",surface_buff); key_numbers ++; } break; case 61: // 非自立 名詞 case 62: case 63: case 64: case 65: case 66: break; default: // printf("[%s] ID:[%d]\n",surface_buff,node->posid); break; } #ifdef NOT_USE printf(" %s %d %d %d %d posid:[%d] %d %d %d %f %f %f %ld\n", node->feature, (int)(node->surface - input), (int)(node->surface - input + node->length), node->rcAttr, node->lcAttr, node->posid, (int)node->char_type, (int)node->stat, (int)node->isbest, node->alpha, node->beta, node->prob, node->cost); #endif } fclose(wfp); mecab_destroy(mecab); return 0; }