コード例 #1
0
ファイル: segment.cpp プロジェクト: joseph-chan/rqpersonalsvn
dict_ptr_t seg_dict_open( const char* dictpath )
{
	if( !dictpath )
		return 0;

	if( strlen(dictpath) >= MAX_PATH_LENG - 1 )
	{
		LOG_ERROR("error: length of path is too long");
		return -1;
	}

	dict_ptr_t ptr = 0;
	char path[MAX_PATH_LENG] = {0};
	strncpy(path,dictpath,strlen(dictpath));
	strncat(path,"/",1);
	path[strlen(dictpath)+1]=0;

	ptr = (dict_ptr_t)scw_load_worddict(path); // new!
	if( !ptr )
	{    
		LOG_ERROR("error: couldn't load the dictionary");
		return 0;
	}

	return ptr;
}
コード例 #2
0
int main(int argc,char** argv)
{
  scw_worddict_t * pwdict;
  scw_out_t *pout;
  char line[1024000];
  u_int scw_out_flag;
  int flag = 0;

  if(argc!= 3 )
  {
    fprintf(stderr, "usage: %s worddict_dir outtype\n", argv[0]);
    exit(-1);
  }

  if((pwdict=scw_load_worddict(argv[1]))==NULL)
  {
    fprintf(stderr,"Load worddict failed.Filename=worddict/bin/");
    return 1;
  }

  flag = atoi(argv[0]);

  scw_out_flag = SCW_OUT_ALL | SCW_OUT_PROP;
  if((pout=scw_create_out(80000, scw_out_flag))==NULL)
  {
    fprintf(stderr,"Init the output buffer error.\n");
    return -1;
  }

  while(fgets(line,sizeof(line),stdin))
  {    
    int len=strlen(line);
    while((line[len-1]=='\r') ||(line[len-1]=='\n'))
      line[--len]=0;
    
    if(scw_segment_words(pwdict,pout,line,len)<0)
    {
      fprintf(stderr, "query %s error\n", line);
      scw_destroy_out(pout);
      return -1;
    }
    scw_dump_out2(pout,flag,pwdict->m_wdtype);
  }

  return 0;
}
コード例 #3
0
ファイル: segmentor.cpp プロジェクト: samevers/dlg_service
/*
 * word segment init,dict and memory
 **/
int segment_init(const char * scwconfile,const char * wordictpath)
{
	pgconf = scw_load_conf(scwconfile);
	if(pgconf == NULL){
		UB_LOG_FATAL("scw conf load failed");
		return -1;
	}
	UB_LOG_DEBUG("scw load conf success");

	if((pwdict=scw_load_worddict(wordictpath))==NULL)
	{
		UB_LOG_FATAL("error: loading wordict failed: %s",wordictpath);
		return -1;
	}

	return 0;
}
コード例 #4
0
int main(int argc,char** argv)
{
	scw_worddict_t * pwdict;
	scw_inner_t *pir;
	scw_item_t *pitem;
	char line[1024000];
	int flag = 0;
	int tsize = 0;

	if(argc!=4){
		fprintf(stderr, "usage: %s dictfilename pos outlevel\n", argv[0]);
		exit(-1);
	}

	if((pwdict=scw_load_worddict(argv[1]))==NULL){
		fprintf(stderr,"Load worddict failed.Filename=%s\n",argv[1]);
		return 1;
	}

	flag = atoi(argv[2]);
	tsize = 10000;
	//tsize = atoi(argv[3]);
	if(tsize < 1)
	{
		fprintf(stderr,"tsize [%s] should > 1\n",argv[3]);
		return 1;
	}

	if((pir=scw_create_inner(tsize, SCW_OUT_ALL | SCW_OUT_PROP))==NULL){
		fprintf(stderr,"Init the output buffer error.\n");
		return -1;
	}

	if((pitem=scw_create_item(tsize))==NULL){
		fprintf(stderr, "Init pitem failed\n");
		return -1;
	}

	set_scw_tn();
	while(fgets(line,sizeof(line),stdin)){
		//if(++linenum%1000==0)
		//fprintf(stderr, "%d\n", linenum);

		int len=strlen(line);
		while((line[len-1]=='\r') ||(line[len-1]=='\n'))
			line[--len]=0;

		if(scw_seg(pwdict,pir,line,len,false) < 0)
		{
			fprintf(stderr, "scw_seg return -1!\n");
		}

		/*ret = get_lgt_scw_seg(pwdict, pir);
		  if(ret < 0)
		  {
		  fprintf(stderr, "get_lgt_scw_seg return -1\n");
		  continue;
		  }*/

		switch(atoi(argv[3]))
		{
			case 0:
				printf("============== Basic Word Result =============\n");
				if(scw_get_result(pitem, pwdict, pir, SCW_OUT_BASIC | SCW_OUT_PROP)<0){
					fprintf(stderr, "get basic seg result error!\n");
					continue;
				}
				if(flag == 0)
					dump_item(pitem);
				else if(flag == 1)
					dump_item1(pitem,pwdict->m_wdtype);
				break;
			case 1:
				printf("============== Word Phrase Result =============\n");
				if(scw_get_result(pitem, pwdict,pir, SCW_OUT_WPCOMP | SCW_OUT_PROP)<0){
					fprintf(stderr, "get word/phrase result error!\n");
					continue;
				}
				if(flag == 0)
					dump_item(pitem);
				else if(flag == 1)
					dump_item1(pitem,pwdict->m_wdtype);
				break;
			case 2:
				printf("============== Sub Phrase Result =============\n");
				if(scw_get_result(pitem, pwdict, pir, SCW_OUT_SUBPH | SCW_OUT_PROP)<0){
					fprintf(stderr,"get sub phrase result error!\n");
					continue;
				}
				if(flag == 0)
					dump_item(pitem);
				else if(flag == 1)
					dump_item1(pitem,pwdict->m_wdtype);
				break;
			case 3:
				printf("============== Human Name Result =============\n");
				if(scw_get_result(pitem, pwdict, pir, SCW_OUT_HUMANNAME | SCW_OUT_PROP)<0){
					fprintf(stderr, "get sub phrase result error!\n");
					continue;
				}
				if(flag == 0)
					dump_item(pitem);
				else if(flag == 1)
					dump_item1(pitem,pwdict->m_wdtype);
				break;
			case 4:
				printf("============== Book Name Result =============\n");
				if(scw_get_result(pitem, pwdict, pir, SCW_OUT_BOOKNAME | SCW_OUT_PROP)<0){
					fprintf(stderr,"get sub phrase result error!\n");
					continue;
				}
				if(flag == 0)
					dump_item(pitem);
				else if(flag == 1)
					dump_item1(pitem,pwdict->m_wdtype);
				break;

			case 5:
				printf("============== Newword Result =============\n");
				if(scw_get_result(pitem, pwdict, pir, SCW_OUT_NEWWORD | SCW_OUT_PROP)<0){
					fprintf(stderr,"get newword result error!\n");
					continue;
				}
				if(flag == 0)
					dump_item(pitem);
				else if(flag == 1)
					dump_item1(pitem,pwdict->m_wdtype);
				break;

			default: break;
		}
	}
	return 0;

}