/** * \brief Calls the tokenize program in Cassys * * Tokenize is called with target text_name and options --word_by_word, --alphabet=alphabet_name, --token=token_txt_name if * if token_txt_name is not NULL. For more information about tokenize, see the unitex manual. * * \param [in/out] text_name the name of the text * \param [in] alphabet the name of the alphabet * \param [in/out] token_txt_name the file containing all the of the text or * * * * */ int launch_tokenize_in_Cassys(const char *text_name, const char *alphabet_name, const char *token_txt_name, Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input){ fprintf(stdout,"Launch tokenize in Cassys\n"); ProgramInvoker *invoker = new_ProgramInvoker(main_Tokenize,"main_Tokenize"); char tmp[FILENAME_MAX]; { tmp[0]=0; get_reading_encoding_text(tmp,sizeof(tmp)-1,mask_encoding_compatibility_input); if (tmp[0] != '\0') { add_argument(invoker,"-k"); add_argument(invoker,tmp); } tmp[0]=0; get_writing_encoding_text(tmp,sizeof(tmp)-1,encoding_output,bom_output); if (tmp[0] != '\0') { add_argument(invoker,"-q"); add_argument(invoker,tmp); } } // add the alphabet char alphabet_argument[FILENAME_MAX + 11]; sprintf(alphabet_argument, "--alphabet=%s", alphabet_name); add_argument(invoker, alphabet_argument); // Tokenize word by word add_argument(invoker, "--word_by_word"); // add the target text file add_argument(invoker,text_name); // if a token.txt file already exists, use it if(token_txt_name != NULL){ char token_argument[FILENAME_MAX + 9]; sprintf(token_argument,"--tokens=%s",token_txt_name); add_argument(invoker,token_argument); } char line_command[4096]; build_command_line(invoker, line_command); fprintf(stdout, "%s\n", line_command); int result = invoke(invoker); free_ProgramInvoker(invoker); return result; }
int pseudo_main_SortTxt(const VersatileEncodingConfig* vec, int duplicates, int reverse, char* sort_alphabet, char* line_info, int thai, char* text, int factorize) { ProgramInvoker* invoker = new_ProgramInvoker(main_SortTxt, "main_SortTxt"); char tmp[200]; { tmp[0] = 0; get_reading_encoding_text(tmp, sizeof(tmp) - 1, vec->mask_encoding_compatibility_input); if (tmp[0] != '\0') { add_argument(invoker, "-k"); add_argument(invoker, tmp); } tmp[0] = 0; get_writing_encoding_text(tmp, sizeof(tmp) - 1, vec->encoding_output, vec->bom_output); if (tmp[0] != '\0') { add_argument(invoker, "-q"); add_argument(invoker, tmp); } } if (duplicates) { add_argument(invoker, "-d"); } else { add_argument(invoker, "-n"); } if (reverse) { add_argument(invoker, "-r"); } if (sort_alphabet != NULL) { add_argument(invoker, "-o"); add_argument(invoker, sort_alphabet); } if (line_info != NULL) { add_argument(invoker, "-l"); add_argument(invoker, line_info); } if (thai) { add_argument(invoker, "-t"); } if (factorize) { add_argument(invoker, "-f"); } add_argument(invoker, text); int ret = invoke(invoker); free_ProgramInvoker(invoker); return ret; }
int pseudo_main_Fst2Check(Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input, const char* fst2name,const char* output_name,int append,int display_statistics, int yes_or_no,int no_empty_graph_warning,int tfst_check) { ProgramInvoker* invoker=new_ProgramInvoker(main_Fst2Check,"main_Fst2Check"); add_argument(invoker,fst2name); add_argument(invoker,yes_or_no?"-y":"-n"); char tmp[FILENAME_MAX]; { tmp[0]=0; get_reading_encoding_text(tmp,sizeof(tmp)-1,mask_encoding_compatibility_input); if (tmp[0] != '\0') { add_argument(invoker,"-k"); add_argument(invoker,tmp); } tmp[0]=0; get_writing_encoding_text(tmp,sizeof(tmp)-1,encoding_output,bom_output); if (tmp[0] != '\0') { add_argument(invoker,"-q"); add_argument(invoker,tmp); } } if (output_name!=NULL) { add_argument(invoker,"-o"); add_argument(invoker,output_name); } if (append) { add_argument(invoker,"-p"); } if (display_statistics) { add_argument(invoker,"-s"); } if (no_empty_graph_warning) { add_argument(invoker,"-e"); } if (tfst_check) { add_argument(invoker,"-t"); } int ret=invoke(invoker); free_ProgramInvoker(invoker); return ret; }
/** * \brief Calls the Concord program in Cassys * * Concord is called with target index_file and options * --merge=text_name, --alphabet=alphabet_name. * * For more information about Concord, see the unitex manual. * * \param [in/out] text_name the name of the text * \param [in] alphabet the name of the alphabet * \param [in] index_file file containing all the matches found by locate * */ int launch_concord_in_Cassys(const char *text_name, const char *index_file, const char *alphabet_name, Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input){ ProgramInvoker *invoker = new_ProgramInvoker(main_Concord, "main_Concord"); add_argument(invoker,index_file); char tmp[FILENAME_MAX]; { tmp[0]=0; get_reading_encoding_text(tmp,sizeof(tmp)-1,mask_encoding_compatibility_input); if (tmp[0] != '\0') { add_argument(invoker,"-k"); add_argument(invoker,tmp); } tmp[0]=0; get_writing_encoding_text(tmp,sizeof(tmp)-1,encoding_output,bom_output); if (tmp[0] != '\0') { add_argument(invoker,"-q"); add_argument(invoker,tmp); } } char text_argument[FILENAME_MAX+7]; sprintf(text_argument,"--merge=%s",text_name); add_argument(invoker,text_argument); char alphabet_argument[FILENAME_MAX+11]; sprintf(alphabet_argument,"--alphabet=%s",alphabet_name); char line_command[4096]; build_command_line(invoker, line_command); fprintf(stdout, "%s\n", line_command); int result = invoke(invoker); free_ProgramInvoker(invoker); return result; }
/** * \brief Calls the Locate program in Cassys * * Locate is called with target the transducer file name of transudcer and options * --text=text_name, --alphabet=alphabet_name, --longest_matches, --all and --merge or --replace * depending of the output policy of the transducer. * * For more information about Locate, see the unitex manual. * * \param [in/out] text_name the name of the text * \param [in] alphabet the name of the alphabet * \param [in] transducer structure containing information about the transducer to be applied * */ int launch_locate_in_Cassys(const char *text_name, const transducer *transducer, const char* alphabet_name, const char*negation_operator, Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input){ ProgramInvoker *invoker = new_ProgramInvoker(main_Locate, "main_Locate"); char tmp[FILENAME_MAX]; { tmp[0]=0; get_reading_encoding_text(tmp,sizeof(tmp)-1,mask_encoding_compatibility_input); if (tmp[0] != '\0') { add_argument(invoker,"-k"); add_argument(invoker,tmp); } tmp[0]=0; get_writing_encoding_text(tmp,sizeof(tmp)-1,encoding_output,bom_output); if (tmp[0] != '\0') { add_argument(invoker,"-q"); add_argument(invoker,tmp); } } add_argument(invoker, transducer->transducer_file_name); // add the text char text_argument[FILENAME_MAX+7]; sprintf(text_argument,"--text=%s",text_name); add_argument(invoker, text_argument); // add the merge or replace option switch (transducer ->output_policy) { case MERGE_OUTPUTS: add_argument(invoker,"--merge"); break; case REPLACE_OUTPUTS: add_argument(invoker,"--replace"); break; default: add_argument(invoker,"--ignore"); break; } // add the alphabet char alphabet_argument[FILENAME_MAX+11]; sprintf(alphabet_argument,"--alphabet=%s",alphabet_name); add_argument(invoker, alphabet_argument); // look for the longest match argument add_argument(invoker, "--longest_matches"); // look for all the occurrences add_argument(invoker, "--all"); if ((*negation_operator) != 0) { char negation_operator_argument[0x40]; sprintf(negation_operator_argument,"--negation_operator=%s",negation_operator); add_argument(invoker,negation_operator_argument); } char line_command[4096]; build_command_line(invoker,line_command); fprintf(stdout, "%s\n",line_command); int result = invoke(invoker); free_ProgramInvoker(invoker); return result; }
int pseudo_main_Concord(const VersatileEncodingConfig* vec, const char* index_file,const char* font,int fontsize, int left_context,int right_context,const char* sort_order, const char* output,const char* directory,const char* alphabet, int thai,int only_ambiguous,int only_matches) { ProgramInvoker* invoker=new_ProgramInvoker(main_Concord,"main_Concord"); char tmp[256]; { tmp[0]=0; get_reading_encoding_text(tmp,sizeof(tmp)-1,vec->mask_encoding_compatibility_input); if (tmp[0] != '\0') { add_argument(invoker,"-k"); add_argument(invoker,tmp); } tmp[0]=0; get_writing_encoding_text(tmp,sizeof(tmp)-1,vec->encoding_output,vec->bom_output); if (tmp[0] != '\0') { add_argument(invoker,"-q"); add_argument(invoker,tmp); } } if (font!=NULL) { add_argument(invoker,"-f"); add_argument(invoker,font); sprintf(tmp,"%d",fontsize); add_argument(invoker,"-s"); add_argument(invoker,tmp); } sprintf(tmp,"%d",left_context); add_argument(invoker,"-l"); add_argument(invoker,tmp); sprintf(tmp,"%d",right_context); add_argument(invoker,"-r"); add_argument(invoker,tmp); if (sort_order==NULL) { add_argument(invoker,"--TO"); } else { add_argument(invoker,sort_order); } add_argument(invoker,output); if (directory!=NULL) { add_argument(invoker,"-d"); add_argument(invoker,directory); } if (alphabet!=NULL) { add_argument(invoker,"-a"); add_argument(invoker,alphabet); } if (thai) { add_argument(invoker,"-T"); } if (only_ambiguous) { add_argument(invoker,"--only_ambiguous"); } if (only_matches) { add_argument(invoker,"--only_matches"); } add_argument(invoker,index_file); int ret=invoke(invoker); free_ProgramInvoker(invoker); return ret; }