bool load(const char* model1, const char* model2, const char * lexicon_file = NULL) { std::ifstream mfs(model1, std::ifstream::binary); if (!mfs) { return false; } model = new ltp::segmentor::Model; if (!model->load(model_header.c_str(), mfs)) { delete model; model = 0; return false; } mfs.close(); mfs.open(model2); if (!mfs) { return false; } bs_model = new ltp::segmentor::Model; if (!bs_model->load(model_header.c_str(), mfs)) { delete model; model = 0; delete bs_model; bs_model = 0; return false; } if (NULL != lexicon_file) { load_lexicon(lexicon_file, &model->external_lexicon); } lexicons.push_back(&(bs_model->internal_lexicon)); lexicons.push_back(&(model->internal_lexicon)); lexicons.push_back(&(model->external_lexicon)); return true; }
void urdutag_file(const char *input_filename, const char *output_filename, const char *lexicon_filename) { FILE *source; FILE *dest; entry *lexicon; token *word; /* open the source file and check for (then discard) directionality character */ if (!(source = fopen(input_filename, "rb"))) { puts("Error opening original file!"); fcloseall(); return; } if (!( ucheckdir(source) )) { fputs("Specified source file not recognised as Unicode!", stderr); fcloseall(); return; } /* open file to write, insert directionality character */ if( !(dest = fopen(output_filename, "wb")) ) { puts("Error opening processed file!"); fcloseall(); return; } if ( fputuc( RIGHTWAY , dest) == UERR ) { puts("Error writing to processed file!"); fcloseall(); return; } if (! ( lexicon = load_lexicon(lexicon_filename) ) ) return; while (1) { /* read a line */ if ( ! (word = load_token(source)) ) break; /* urdutag that token IF it has no tags already */ if (word->tag[0][0] == 0x0000) urdutag(lexicon, word); /* if split-signal tag returned , perform the special actions */ if ( word->tag[0][0] == 0x0053 && word->tag[0][1] == 0x0050 ) word = do_the_splits(word, lexicon, dest); /* write the line to file */ if (write_token(word, dest)) break; free(word); } /* check "word". If it is still alloc'd, then the loop broke prematurely */ /* and "word" will need freeing. */ if (word) free(word); /* free the lexicon */ free_lexicon(lexicon); /* close read and write files */ if (fclose(source) < 0) { puts("Error closing original file!"); fcloseall(); return; } if (fclose(dest) < 0) { puts("Error closing processed file!"); fcloseall(); return; } }