Пример #1
0
int main (int argc, char **argv)  {
  char input[] = "太郎は次郎が持っている本を花子に渡した。";
  mecab_model_t *model, *another_model;
  mecab_t *mecab;
  mecab_lattice_t *lattice;
  const mecab_node_t *node;
  const char *result;
  int i;
  size_t len;

  model = mecab_model_new(argc, argv);
  CHECK(model);

  mecab = mecab_model_new_tagger(model);
  CHECK(mecab);

  lattice = mecab_model_new_lattice(model);
  CHECK(lattice);

  mecab_lattice_set_sentence(lattice, input);
  mecab_parse_lattice(mecab, lattice);

  printf("RESULT: %s\n", mecab_lattice_tostr(lattice));

  node = mecab_lattice_get_bos_node(lattice);
  for (;  node; node = node->next) {
    printf("%d ", node->id);

    if (node->stat == MECAB_BOS_NODE)
      printf("BOS");
    else if (node->stat == MECAB_EOS_NODE)
      printf("EOS");
    else
      fwrite (node->surface, sizeof(char), node->length, stdout);

    printf(" %s %d %d %d %d %d %d %d %d %f %f %f %ld\n",
	   node->feature,
	   (int)(node->surface - input),
	   (int)(node->surface - input + node->length),
	   node->rcAttr,
	   node->lcAttr,
	   node->posid,
	   (int)node->char_type,
	   (int)node->stat,
	   (int)node->isbest,
	   node->alpha,
	   node->beta,
	   node->prob,
	   node->cost);
  }

  len = mecab_lattice_get_size(lattice);
  for (i = 0; i <= len; ++i) {
    mecab_node_t *b, *e;
    b = mecab_lattice_get_begin_nodes(lattice, (size_t)i);
    e = mecab_lattice_get_end_nodes(lattice, (size_t)i);
    for (; b; b = b->bnext) {
        printf("B[%d] %s\t%s\n", i, b->surface, b->feature);
    }
    for (; e; e = e->enext) {
        printf("E[%d] %s\t%s\n", i, e->surface, e->feature);
    }
  }

  mecab_lattice_set_sentence(lattice, input);
  mecab_lattice_set_request_type(lattice, MECAB_NBEST);
  mecab_parse_lattice(mecab, lattice);
  for (i = 0; i < 10; ++i) {
    fprintf(stdout, "%s", mecab_lattice_tostr(lattice));
    if (!mecab_lattice_next(lattice)) {
      break;
    }
  }

  mecab_lattice_set_sentence(lattice, input);
  mecab_lattice_set_request_type(lattice, MECAB_MARGINAL_PROB);
  mecab_lattice_set_theta(lattice, 0.001);
  mecab_parse_lattice(mecab, lattice);
  node = mecab_lattice_get_bos_node(lattice);
  for (;  node; node = node->next) {
    fwrite(node->surface, sizeof(char), node->length, stdout);
    fprintf(stdout, "\t%s\t%f\n", node->feature, node->prob);
  }

  mecab_set_lattice_level(mecab, 0);
  mecab_set_all_morphs(mecab, 1);
  node = mecab_sparse_tonode(mecab, input);
  CHECK(node);
  for (; node; node = node->next) {
    fwrite (node->surface, sizeof(char), node->length, stdout);
    printf("\t%s\n", node->feature);
  }

  const mecab_dictionary_info_t *d = mecab_dictionary_info(mecab);
  for (; d; d = d->next) {
    printf("filename: %s\n", d->filename);
    printf("charset: %s\n", d->charset);
    printf("size: %d\n", d->size);
    printf("type: %d\n", d->type);
    printf("lsize: %d\n", d->lsize);
    printf("rsize: %d\n", d->rsize);
    printf("version: %d\n", d->version);
  }


  mecab_destroy(mecab);
  mecab_lattice_destroy(lattice);
  mecab_model_destroy(model);

  return 0;
}
Пример #2
0
main(int argc, char **argv){
char input_file[128];
strcpy(input_file,argv[1]);

#else
int
mecab_analyze (char *input_file){
#endif
	char input[MAX_TEXT_SIZE];
	char analyzed_text[MAX_TEXT_SIZE];
	char wk_buff[MAX_TEXT_SIZE];
	char wk_file_name[256];
	char title_buff[256];
	mecab_t *mecab;
	const mecab_node_t *node;
	FILE *wfp;
	char surface_buff[256];
	char key_list[MAX_KEY_NUMBERS][MAX_KEY_LENGTH];
	int key_numbers;

	strcpy(wk_file_name,TO_MECAB_FILE_DIR);
	strcat(wk_file_name,input_file);
	if(read_text(wk_file_name,input,title_buff)){
    	fprintf(stderr,"[%s] not found\n",wk_file_name);
		return(-1);
	}
	/****
	 remove(wk_file_name);
	****/
    // edit character e.g. ' '' { 0x0a
	edit_input_text(input);

	/**
	memset(analyzed_text,'\0',sizeof(analyzed_text));
	if(!modify_text(analyzed_text,input)){
		strcpy(wk_buff,analyzed_text);
		while(1){
			memset(analyzed_text,'\0',sizeof(analyzed_text));
			if(modify_text(analyzed_text,wk_buff))
				break;
			strcpy(wk_buff,analyzed_text);
		}
	}
	**/

	strcpy(wk_file_name,TO_HIBARI_FILE_DIR);
	strcat(wk_file_name,input_file);
  	if((wfp = fopen(wk_file_name,"w")) == NULL){
   		fprintf(stderr,"[%s] could not open\n",wk_file_name);
   		return(-1);
  	}
	/*
	fprintf(wfp,"{\"%s\"}.\n",input); // first write message
	*/
	fprintf(wfp,"{\"%s\"}.\n",title_buff); // write wiki title

  	mecab = mecab_new2("");
  	CHECK(mecab);

  	mecab_set_lattice_level(mecab, 0);   
  	// mecab_set_lattice_level(mecab, 1);   

  	node = mecab_sparse_tonode(mecab, input);
  	CHECK(node);
  	memset(key_list,'\0',sizeof(key_list));
  	for (key_numbers=0;  node; node = node->next) {
      	strncpy(surface_buff,node->surface,node->length);
      	surface_buff[node->length] ='\0';
#ifdef UNIT_TEST
		printf("名詞:[%s] 文字種:[%d] ID:[%d]\n",surface_buff,node->char_type,node->posid);
#endif
      	if (node->length <= 1)
			continue;
		if (omitted_word(surface_buff))
			continue;
      // check charcter type
    	switch(node->posid){
			case 3:  //記号
			case 4:  //数字
			case 5:  //記号
			case 6:  //記号
			case 7:  //記号
       	      	break;
			case 36: // '
			case 37:
			case 38:
			case 39:
			case 40:
			case 41:
			case 42:
			case 43:
			case 44:
			case 45:
			case 46:
			case 47:
			case 48:
			case 49:
			case 50:
			case 51:
			case 52:
			case 53:
			case 54:
			case 55:
			case 56:
			case 57:
			case 58:
			case 59:
			case 60:
			case 67:
	    		if(!check_duplicate((char *)key_list,surface_buff,key_numbers)){
					fprintf(wfp,"{\"%s\"}.\n",surface_buff);
					key_numbers ++;
				}
          		break;
			case 61: // 非自立 名詞
			case 62:
			case 63:
			case 64:
			case 65:
			case 66:
          		break;
        	default:
				//		printf("[%s] ID:[%d]\n",surface_buff,node->posid);
          		break;
    	}

#ifdef NOT_USE
    printf(" %s %d %d %d %d posid:[%d] %d %d %d %f %f %f %ld\n",
	   node->feature,
	   (int)(node->surface - input),
	   (int)(node->surface - input + node->length),
	   node->rcAttr,
	   node->lcAttr,
	   node->posid,
	   (int)node->char_type,
	   (int)node->stat,
	   (int)node->isbest,
	   node->alpha,
	   node->beta,
	   node->prob,
	   node->cost);
#endif
  	}
  	fclose(wfp);
  	mecab_destroy(mecab);
   
  	return 0;
}