コード例 #1
0
void get_csc_wd_path(const char* filename, char* result) {
	char canonical_name[FILENAME_MAX];
	remove_path_and_extension(filename, canonical_name);

	char extension[FILENAME_MAX];
	get_extension(filename, extension);

	get_path(filename, result);
	remove_path_and_extension(filename, result + strlen(result));
	
	strcat(result, CASSYS_DIRECTORY_EXTENSION);

	strcat(result, PATH_SEPARATOR_STRING);
	sprintf(result+strlen(result), "%s_0_0%s",canonical_name,extension);
}
コード例 #2
0
ファイル: Cassys.cpp プロジェクト: adri87/Q-A
int initialize_working_directory(const char *text,int must_create_directory){
	char path[FILENAME_MAX];
	get_path(text,path);

	char canonical_name[FILENAME_MAX];
	remove_path_and_extension(text, canonical_name);

	char extension[FILENAME_MAX];
	get_extension(text,extension);

	char working_directory[FILENAME_MAX];
	sprintf(working_directory, "%s%s%s%c",path, canonical_name, CASSYS_DIRECTORY_EXTENSION, PATH_SEPARATOR_CHAR);

	if (must_create_directory != 0) {
        make_directory(working_directory);
    }

	char text_in_wd[FILENAME_MAX];
	sprintf(text_in_wd, "%s%s_0%s",working_directory,canonical_name,extension );
	copy_file(text_in_wd,text);

	char snt_dir_text_in_wd[FILENAME_MAX];
	get_snt_path(text_in_wd, snt_dir_text_in_wd);
    if (must_create_directory != 0) {
        make_directory(snt_dir_text_in_wd);
    }

	char original_snt_dir[FILENAME_MAX];
	get_snt_path(text,original_snt_dir);
	copy_directory_snt_content(snt_dir_text_in_wd, original_snt_dir);

	return 0;
}
コード例 #3
0
char* create_updated_graph_filename(const char *text,
	int next_transducer_label,
	int next_iteration,
	const char* graph_name,
	const char* ext)
{
	char path[FILENAME_MAX];
	get_path(text, path);

	char canonical_text_name[FILENAME_MAX];
	remove_path_and_extension(text, canonical_text_name);

	char working[FILENAME_MAX];
	sprintf(working, "%s%s%s%c%s_%d_%d_snt%c", path, canonical_text_name,
		CASSYS_DIRECTORY_EXTENSION, PATH_SEPARATOR_CHAR, 
		canonical_text_name, next_transducer_label, next_iteration, PATH_SEPARATOR_CHAR);



	strcat(working, graph_name);
	strcat(working, ext);


	char* full_graph_name = (char*)malloc(sizeof(char)*(strlen(working) + 1));
	if (graph_name == NULL) {
		fatal_alloc_error("create_updated_graph_filename");
		exit(1);
	}
	strcpy(full_graph_name, working);
	return full_graph_name;
}
コード例 #4
0
void get_csc_path(const char* filename, char* result) {

	get_path(filename, result);
	remove_path_and_extension(filename, result + strlen(result));
	
	strcat(result, CASSYS_DIRECTORY_EXTENSION);

	strcat(result, PATH_SEPARATOR_STRING);
}
コード例 #5
0
ファイル: Cassys.cpp プロジェクト: adri87/Q-A
char* create_labeled_files_and_directory(const char *text, int next_transducer_label,int must_create_directory,int must_copy_file) {
	char path[FILENAME_MAX];
	get_path(text, path);

	char canonical_text_name[FILENAME_MAX];
	remove_path_and_extension(text, canonical_text_name);

	char extension[FILENAME_MAX];
	get_extension(text, extension);

	char working_directory[FILENAME_MAX];
	sprintf(working_directory, "%s%s%s%c", path, canonical_text_name,
			CASSYS_DIRECTORY_EXTENSION, PATH_SEPARATOR_CHAR);

	// copy the text label i- to i
	char old_labeled_text_name[FILENAME_MAX];
	sprintf(old_labeled_text_name, "%s%s_%d%s", working_directory,
			canonical_text_name, next_transducer_label - 1, extension);

	char new_labeled_text_name[FILENAME_MAX];
	sprintf(new_labeled_text_name, "%s%s_%d%s", working_directory,
			canonical_text_name, next_transducer_label, extension);

	char new_labeled_snt_directory[FILENAME_MAX];
	get_snt_path(new_labeled_text_name, new_labeled_snt_directory);
    if (must_create_directory != 0) {
        make_directory(new_labeled_snt_directory);
    }

    if (must_copy_file != 0)
    {
	    copy_file(new_labeled_text_name, old_labeled_text_name);

	    // create snt directory labeled i
	    char old_labeled_snt_directory[FILENAME_MAX];
	    get_snt_path(old_labeled_text_name, old_labeled_snt_directory);


	    // copy dictionary files in the new snt directory
	    struct snt_files *old_snt_ = new_snt_files(old_labeled_text_name);
	    struct snt_files *new_snt_ = new_snt_files(new_labeled_text_name);

	    if (fexists(old_snt_->dlc)) {
		    copy_file(new_snt_->dlc, old_snt_->dlc);
	    }
	    if (fexists(old_snt_-> dlf)) {
		    copy_file(new_snt_->dlf, old_snt_->dlf);
	    }
	    if (fexists(old_snt_-> err)) {
		    copy_file(new_snt_->err, old_snt_->err);
	    }
	    if (fexists(old_snt_->dlc_n)) {
		    copy_file(new_snt_->dlc_n, old_snt_->dlc_n);
	    }
	    if (fexists(old_snt_->dlf_n)) {
		    copy_file(new_snt_->dlf_n, old_snt_->dlf_n);
	    }
	    if (fexists(old_snt_-> err_n)) {
		    copy_file(new_snt_->err_n, old_snt_->err_n);
	    }
	    if (fexists(old_snt_->stat_dic_n)) {
		    copy_file(new_snt_->stat_dic_n, old_snt_->stat_dic_n);
	    }
	    free_snt_files(old_snt_);
	    free_snt_files(new_snt_);
    }
	char *labeled_text_name;
	labeled_text_name = (char*)malloc(sizeof(char)*(strlen(new_labeled_text_name)+1));
	if(labeled_text_name == NULL){
		perror("malloc\n");
		fprintf(stderr,"Impossible to allocate memory\n");
		exit(1);
	}
	strcpy(labeled_text_name, new_labeled_text_name);
	return labeled_text_name;
}
コード例 #6
0
ファイル: TrainingTagger.cpp プロジェクト: adri87/Q-A
int main_TrainingTagger(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return 0;
}

int val,index=-1,binaries=1,r_forms=1,i_forms=1;
int semitic=0;
struct OptVars* vars=new_OptVars();
char text[FILENAME_MAX]="";
char raw_forms[FILENAME_MAX]="";
char inflected_forms[FILENAME_MAX]="";
char output[FILENAME_MAX]="";
Encoding encoding_output = DEFAULT_ENCODING_OUTPUT;
int bom_output = DEFAULT_BOM_OUTPUT;
int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT;
while (EOF!=(val=getopt_long_TS(argc,argv,optstring_TrainingTagger,lopts_TrainingTagger,&index,vars))) {
   switch(val) {
   case 'o': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty pattern\n");
             }
             strcpy(output,vars->optarg);
             break;
   case 'b': binaries = 1;
			 break;
   case 'n': binaries = 0;
			 break;
   case 'a': break;
   case 'c': i_forms = 0;
			 break;
   case 'm': r_forms = 0;
   			 break;
   case 'S': semitic=1;
   			 break;
   case 'k': if (vars->optarg[0]=='\0') {
                fatal_error("Empty input_encoding argument\n");
             }
             decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg);
             break;
   case 'q': if (vars->optarg[0]=='\0') {
                fatal_error("Empty output_encoding argument\n");
             }
             decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg);
             break;
   case 'h': usage(); return 0;
   case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt);
             else fatal_error("Missing argument for option --%s\n",lopts_TrainingTagger[index].name);
   case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt);
             else fatal_error("Invalid option --%s\n",vars->optarg);
             break;
   }
   index=-1;
}

if (vars->optind!=argc-1) {
   free_OptVars(vars);
   error("Invalid arguments: rerun with --help\n");
   return 1;
}
strcpy(text,argv[vars->optind]);
U_FILE* input_text=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,text,U_READ);
if (input_text==NULL) {
   free_OptVars(vars);
   fatal_error("cannot open file %s\n",text);
   return 1;
}

if(output[0]=='\0'){
	remove_path_and_extension(text,output);
}

char path[FILENAME_MAX],filename[FILENAME_MAX];
get_path(text,path);
if(strlen(path) == 0){
	strcpy(path,".");
}
/* we create files which will contain statistics extracted from the tagged corpus */
U_FILE* rforms_file = NULL, *iforms_file = NULL;
if(r_forms == 1){
	sprintf(filename,"%s_data_cat.dic",output);
	new_file(path,filename,raw_forms);
	rforms_file=u_fopen_creating_versatile_encoding(encoding_output,bom_output,raw_forms,U_WRITE);
}
if(i_forms == 1){
	sprintf(filename,"%s_data_morph.dic",output);
	new_file(path,filename,inflected_forms);
	iforms_file=u_fopen_creating_versatile_encoding(encoding_output,bom_output,inflected_forms,U_WRITE);
}

u_printf("Gathering statistics from tagged corpus...\n");
do_training(input_text,rforms_file,iforms_file);

/* we close all files and then we sort text dictionaries */
u_fclose(input_text);
char disclaimer[FILENAME_MAX];
if(rforms_file != NULL){
	u_fclose(rforms_file);
	pseudo_main_SortTxt(DEFAULT_ENCODING_OUTPUT,DEFAULT_BOM_OUTPUT,ALL_ENCODING_BOM_POSSIBLE,0,0,NULL,NULL,0,raw_forms);
	strcpy(disclaimer,raw_forms);
	remove_extension(disclaimer);
	strcat(disclaimer,".txt");
	create_disclaimer(disclaimer);
}
if(iforms_file != NULL){
	u_fclose(iforms_file);
	pseudo_main_SortTxt(DEFAULT_ENCODING_OUTPUT,DEFAULT_BOM_OUTPUT,ALL_ENCODING_BOM_POSSIBLE,0,0,NULL,NULL,0,inflected_forms);
	strcpy(disclaimer,inflected_forms);
	remove_extension(disclaimer);
	strcat(disclaimer,".txt");
	create_disclaimer(disclaimer);
}

/* we compress dictionaries if option is specified by user (output is ".bin") */
if(binaries == 1){
/* simple forms dictionary */
if(r_forms == 1){
	pseudo_main_Compress(DEFAULT_ENCODING_OUTPUT,DEFAULT_BOM_OUTPUT,ALL_ENCODING_BOM_POSSIBLE,0,semitic,raw_forms);
}
/* compound forms dictionary */
if(i_forms == 1){
	pseudo_main_Compress(DEFAULT_ENCODING_OUTPUT,DEFAULT_BOM_OUTPUT,ALL_ENCODING_BOM_POSSIBLE,0,semitic,inflected_forms);
}
}
free_OptVars(vars);
u_printf("Done.\n");
return 0;
}