Пример #1
0
  bool load(const char* model1, const char* model2,
      const char * lexicon_file = NULL) {
    std::ifstream mfs(model1, std::ifstream::binary);
    if (!mfs) { return false; }

    model = new ltp::segmentor::Model;
    if (!model->load(model_header.c_str(), mfs)) {
      delete model;
      model = 0;
      return false;
    }

    mfs.close();
    mfs.open(model2);
    if (!mfs) { return false; }

    bs_model = new ltp::segmentor::Model;
    if (!bs_model->load(model_header.c_str(), mfs)) {
      delete model;     model = 0;
      delete bs_model;  bs_model = 0;
      return false;
    }

    if (NULL != lexicon_file) {
      load_lexicon(lexicon_file, &model->external_lexicon);
    }

    lexicons.push_back(&(bs_model->internal_lexicon));
    lexicons.push_back(&(model->internal_lexicon));
    lexicons.push_back(&(model->external_lexicon));
    return true;
  }
Пример #2
0
void urdutag_file(const char *input_filename, const char *output_filename,
				  const char *lexicon_filename)
{
	FILE *source;
	FILE *dest;

	entry *lexicon;

	token *word;


	/* open the source file and check for (then discard) directionality character */
	if (!(source = fopen(input_filename, "rb")))
	{
		puts("Error opening original file!");
		fcloseall();
		return;
	}
	if (!( ucheckdir(source) ))
	{
		fputs("Specified source file not recognised as Unicode!", stderr);
		fcloseall();
		return;
	}

	/* open file to write, insert directionality character */
	if( !(dest = fopen(output_filename, "wb")) )
	{
		puts("Error opening processed file!");
		fcloseall();
		return;
	}
	if ( fputuc( RIGHTWAY , dest) == UERR )
	{
		puts("Error writing to processed file!");
		fcloseall();
		return;
	}


	if (! ( lexicon = load_lexicon(lexicon_filename) ) )
		return;

	while (1)
	{
		/* read a line */
		if ( ! (word = load_token(source)) )
			break;


		/* urdutag that token IF it has no tags already */

		if (word->tag[0][0] == 0x0000)
			urdutag(lexicon, word);


		/* if split-signal tag returned , perform the special actions */
		if ( word->tag[0][0] == 0x0053 && word->tag[0][1] == 0x0050 )
			word = do_the_splits(word, lexicon, dest);



		/* write the line to file */
		if (write_token(word, dest))
			break;

		free(word);
	}

	/* check "word". If it is still alloc'd, then the loop broke prematurely */
	/* and "word" will need freeing. */
	if (word)
		free(word);

	/* free the lexicon */
	free_lexicon(lexicon);


	/* close read and write files */
	if (fclose(source) < 0)
	{
		puts("Error closing original file!");
		fcloseall();
		return;
	}
	if (fclose(dest) < 0)
	{
		puts("Error closing processed file!");
		fcloseall();
		return;
	}
}