Пример #1
0
/**
 * \brief Calls the tokenize program in Cassys
 *
 *	Tokenize is called with target text_name and options --word_by_word, --alphabet=alphabet_name, --token=token_txt_name if
 *	if token_txt_name is not NULL. For more information about tokenize, see the unitex manual.
 *
 * \param [in/out] text_name the name of the text
 * \param [in] alphabet the name of the alphabet
 * \param [in/out] token_txt_name the file containing all the of the text or
 *
 *
 *
 *
 */
int launch_tokenize_in_Cassys(const char *text_name, const char *alphabet_name, const char *token_txt_name,
    Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input){

	fprintf(stdout,"Launch tokenize in Cassys\n");

	ProgramInvoker *invoker = new_ProgramInvoker(main_Tokenize,"main_Tokenize");

    char tmp[FILENAME_MAX];
    {
        tmp[0]=0;
        get_reading_encoding_text(tmp,sizeof(tmp)-1,mask_encoding_compatibility_input);
        if (tmp[0] != '\0') {
            add_argument(invoker,"-k");
            add_argument(invoker,tmp);
        }

        tmp[0]=0;
        get_writing_encoding_text(tmp,sizeof(tmp)-1,encoding_output,bom_output);
        if (tmp[0] != '\0') {
            add_argument(invoker,"-q");
            add_argument(invoker,tmp);
        }
    }

	// add the alphabet
	char alphabet_argument[FILENAME_MAX + 11];
	sprintf(alphabet_argument, "--alphabet=%s", alphabet_name);
	add_argument(invoker, alphabet_argument);

	// Tokenize word by word
	add_argument(invoker, "--word_by_word");

	// add the target text file
	add_argument(invoker,text_name);

	// if a token.txt file already exists, use it
	if(token_txt_name != NULL){
		char token_argument[FILENAME_MAX + 9];
		sprintf(token_argument,"--tokens=%s",token_txt_name);
		add_argument(invoker,token_argument);
	}

	char line_command[4096];
	build_command_line(invoker, line_command);
	fprintf(stdout, "%s\n", line_command);

	int result = invoke(invoker);
	free_ProgramInvoker(invoker);
	return result;
}
Пример #2
0
int pseudo_main_SortTxt(const VersatileEncodingConfig* vec, int duplicates,
    int reverse, char* sort_alphabet, char* line_info, int thai,
    char* text, int factorize) {
  ProgramInvoker* invoker = new_ProgramInvoker(main_SortTxt, "main_SortTxt");
  char tmp[200];
  {
    tmp[0] = 0;
    get_reading_encoding_text(tmp, sizeof(tmp) - 1,
        vec->mask_encoding_compatibility_input);
    if (tmp[0] != '\0') {
      add_argument(invoker, "-k");
      add_argument(invoker, tmp);
    }

    tmp[0] = 0;
    get_writing_encoding_text(tmp, sizeof(tmp) - 1, vec->encoding_output,
        vec->bom_output);
    if (tmp[0] != '\0') {
      add_argument(invoker, "-q");
      add_argument(invoker, tmp);
    }
  }
  if (duplicates) {
    add_argument(invoker, "-d");
  } else {
    add_argument(invoker, "-n");
  }
  if (reverse) {
    add_argument(invoker, "-r");
  }
  if (sort_alphabet != NULL) {
    add_argument(invoker, "-o");
    add_argument(invoker, sort_alphabet);
  }
  if (line_info != NULL) {
    add_argument(invoker, "-l");
    add_argument(invoker, line_info);
  }
  if (thai) {
    add_argument(invoker, "-t");
  }
  if (factorize) {
    add_argument(invoker, "-f");
  }
  add_argument(invoker, text);
  int ret = invoke(invoker);
  free_ProgramInvoker(invoker);
  return ret;
}
Пример #3
0
int pseudo_main_Fst2Check(Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input,
                          const char* fst2name,const char* output_name,int append,int display_statistics,
                          int yes_or_no,int no_empty_graph_warning,int tfst_check) {
ProgramInvoker* invoker=new_ProgramInvoker(main_Fst2Check,"main_Fst2Check");
add_argument(invoker,fst2name);
add_argument(invoker,yes_or_no?"-y":"-n");
char tmp[FILENAME_MAX];
{
    tmp[0]=0;
    get_reading_encoding_text(tmp,sizeof(tmp)-1,mask_encoding_compatibility_input);
    if (tmp[0] != '\0') {
        add_argument(invoker,"-k");
        add_argument(invoker,tmp);
    }

    tmp[0]=0;
    get_writing_encoding_text(tmp,sizeof(tmp)-1,encoding_output,bom_output);
    if (tmp[0] != '\0') {
        add_argument(invoker,"-q");
        add_argument(invoker,tmp);
    }
}

if (output_name!=NULL)
{
   add_argument(invoker,"-o");
   add_argument(invoker,output_name);
}

if (append) {
   add_argument(invoker,"-p");
}
if (display_statistics) {
   add_argument(invoker,"-s");
}
if (no_empty_graph_warning) {
   add_argument(invoker,"-e");
}
if (tfst_check) {
   add_argument(invoker,"-t");
}
int ret=invoke(invoker);
free_ProgramInvoker(invoker);
return ret;
}
Пример #4
0
/**
 * \brief Calls the Concord program in Cassys
 *
 *	Concord is called with target index_file and options
 *  --merge=text_name, --alphabet=alphabet_name.
 *
 *  For more information about Concord, see the unitex manual.
 *
 * \param [in/out] text_name the name of the text
 * \param [in] alphabet the name of the alphabet
 * \param [in] index_file file containing all the matches found by locate
 *
 */
int launch_concord_in_Cassys(const char *text_name, const char *index_file, const char *alphabet_name,
    Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input){
	ProgramInvoker *invoker = new_ProgramInvoker(main_Concord, "main_Concord");

	add_argument(invoker,index_file);

    char tmp[FILENAME_MAX];
    {
        tmp[0]=0;
        get_reading_encoding_text(tmp,sizeof(tmp)-1,mask_encoding_compatibility_input);
        if (tmp[0] != '\0') {
            add_argument(invoker,"-k");
            add_argument(invoker,tmp);
        }

        tmp[0]=0;
        get_writing_encoding_text(tmp,sizeof(tmp)-1,encoding_output,bom_output);
        if (tmp[0] != '\0') {
            add_argument(invoker,"-q");
            add_argument(invoker,tmp);
        }
    }

	char text_argument[FILENAME_MAX+7];
	sprintf(text_argument,"--merge=%s",text_name);
	add_argument(invoker,text_argument);

	char alphabet_argument[FILENAME_MAX+11];
	sprintf(alphabet_argument,"--alphabet=%s",alphabet_name);

	char line_command[4096];
	build_command_line(invoker, line_command);
	fprintf(stdout, "%s\n", line_command);

	int result = invoke(invoker);
	free_ProgramInvoker(invoker);
	return result;
}
Пример #5
0
/**
 * \brief Calls the Locate program in Cassys
 *
 *	Locate is called with target the transducer file name of transudcer and options
 *  --text=text_name, --alphabet=alphabet_name, --longest_matches, --all and --merge or --replace
 *  depending of the output policy of the transducer.
 *
 *  For more information about Locate, see the unitex manual.
 *
 * \param [in/out] text_name the name of the text
 * \param [in] alphabet the name of the alphabet
 * \param [in] transducer structure containing information about the transducer to be applied
 *
 */
int launch_locate_in_Cassys(const char *text_name, const transducer *transducer, const char* alphabet_name,
    const char*negation_operator,
    Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input){

	ProgramInvoker *invoker = new_ProgramInvoker(main_Locate, "main_Locate");

    char tmp[FILENAME_MAX];
    {
        tmp[0]=0;
        get_reading_encoding_text(tmp,sizeof(tmp)-1,mask_encoding_compatibility_input);
        if (tmp[0] != '\0') {
            add_argument(invoker,"-k");
            add_argument(invoker,tmp);
        }

        tmp[0]=0;
        get_writing_encoding_text(tmp,sizeof(tmp)-1,encoding_output,bom_output);
        if (tmp[0] != '\0') {
            add_argument(invoker,"-q");
            add_argument(invoker,tmp);
        }
    }

	add_argument(invoker, transducer->transducer_file_name);

	// add the text
	char text_argument[FILENAME_MAX+7];
	sprintf(text_argument,"--text=%s",text_name);
	add_argument(invoker, text_argument);

	// add the merge or replace option
	switch (transducer ->output_policy) {
	   case MERGE_OUTPUTS: add_argument(invoker,"--merge"); break;
	   case REPLACE_OUTPUTS: add_argument(invoker,"--replace"); break;
	   default: add_argument(invoker,"--ignore"); break;
	}

	// add the alphabet
	char alphabet_argument[FILENAME_MAX+11];
	sprintf(alphabet_argument,"--alphabet=%s",alphabet_name);
	add_argument(invoker, alphabet_argument);

	// look for the longest match argument
	add_argument(invoker, "--longest_matches");

	// look for all the occurrences
	add_argument(invoker, "--all");

    if ((*negation_operator) != 0) {
        char negation_operator_argument[0x40];
        sprintf(negation_operator_argument,"--negation_operator=%s",negation_operator);
        add_argument(invoker,negation_operator_argument);
    }

	char line_command[4096];
	build_command_line(invoker,line_command);
	fprintf(stdout, "%s\n",line_command);

	int result = invoke(invoker);
	free_ProgramInvoker(invoker);
	return result;
}
Пример #6
0
int pseudo_main_Concord(const VersatileEncodingConfig* vec,
                        const char* index_file,const char* font,int fontsize,
                        int left_context,int right_context,const char* sort_order,
                        const char* output,const char* directory,const char* alphabet,
                        int thai,int only_ambiguous,int only_matches) {
ProgramInvoker* invoker=new_ProgramInvoker(main_Concord,"main_Concord");
char tmp[256];
{
    tmp[0]=0;
    get_reading_encoding_text(tmp,sizeof(tmp)-1,vec->mask_encoding_compatibility_input);
    if (tmp[0] != '\0') {
        add_argument(invoker,"-k");
        add_argument(invoker,tmp);
    }

    tmp[0]=0;
    get_writing_encoding_text(tmp,sizeof(tmp)-1,vec->encoding_output,vec->bom_output);
    if (tmp[0] != '\0') {
        add_argument(invoker,"-q");
        add_argument(invoker,tmp);
    }
}
if (font!=NULL) {
   add_argument(invoker,"-f");
   add_argument(invoker,font);
   sprintf(tmp,"%d",fontsize);
   add_argument(invoker,"-s");
   add_argument(invoker,tmp);
}
sprintf(tmp,"%d",left_context);
add_argument(invoker,"-l");
add_argument(invoker,tmp);
sprintf(tmp,"%d",right_context);
add_argument(invoker,"-r");
add_argument(invoker,tmp);
if (sort_order==NULL) {
   add_argument(invoker,"--TO");
} else {
   add_argument(invoker,sort_order);
}
add_argument(invoker,output);
if (directory!=NULL) {
   add_argument(invoker,"-d");
   add_argument(invoker,directory);
}
if (alphabet!=NULL) {
   add_argument(invoker,"-a");
   add_argument(invoker,alphabet);
}
if (thai) {
   add_argument(invoker,"-T");
}
if (only_ambiguous) {
   add_argument(invoker,"--only_ambiguous");
}
if (only_matches) {
   add_argument(invoker,"--only_matches");
}
add_argument(invoker,index_file);
int ret=invoke(invoker);
free_ProgramInvoker(invoker);
return ret;
}