Example #1
0
void protect_text(const char *fileName, const VersatileEncodingConfig* vec){

	U_FILE *file_reader = u_fopen(vec, fileName, U_READ);
	if(file_reader == NULL){
		fatal_error("u_fopen");
	}

	unichar *text = read_file(file_reader);

	unichar *protected_text = protect_lexical_tag(text, false);

	free(text);
	u_fclose(file_reader);

	U_FILE *file_write = u_fopen(vec, fileName, U_WRITE);
	if(file_write == NULL){
		fatal_error("u_fopen");
	}

	int written = u_fwrite(protected_text, u_strlen(protected_text),file_write);
	if(written != (int)u_strlen(protected_text)){
		fatal_error("u_fwrite");
	}

	u_fclose(file_write);
	free(protected_text);

}
Example #2
0
static OFILE *
find_config_file (OlyStatus *status) 
{
    char *path = strcat(strcpy(
            omalloc((strlen(getenv("HOME")) + strlen(DOT_CONFIG_NAME)+1)),
            getenv("HOME")), DOT_CONFIG_NAME),
            result[BUFSIZ], *token, *watch;
    const char  *basename = ETC_CONFIG_NAME,
                *sysconf_path = SYSCONFDIR;
    const char  *the_colon = ":";
    size_t       name_size = ( strlen(basename) + 1 ), 
                 result_len = BUFSIZ;
    OFILE       *return_file = NULL;
    void        *free_me;

    if (*status != OLY_OKAY)
        HANDLE_OLY_STATUS(*status);
    {
        return NULL;
    }
    
    if ( path != NULL )
    {
        return_file = u_fopen( path , "rb", NULL, NULL );
        free_me = (void *)path;
        OFREE(free_me);
    }

    if (return_file == NULL)
    {
        path = omalloc( (strlen(sysconf_path) + 1) );
        strcpy(path, sysconf_path);
        for ( token = strtok_r(path, the_colon, &watch); 
                ( token != NULL ); 
                token = strtok_r(NULL, the_colon, &watch) )
        {
            if ((token != NULL) && ((strlen(token) + name_size) < result_len))
            {
                strcpy( result, token );
                strcat( result, basename );
                return_file = u_fopen( result, "rb", 
                        char_default_locale(), char_default_encoding() );
            }
            else
            {
                *status = OLY_ERR_FILE_NOT_FOUND;
                *result = '\0';
            }
        }
        free_me = (void *)path;
        OFREE(free_me);
    }
    return return_file;
}
Example #3
0
/**
 * This function reads the given char order file.
 */
void read_char_order(const VersatileEncodingConfig* vec, const char* name,
    struct sort_infos* inf) {
  int c;
  int current_line = 1;
  U_FILE* f = u_fopen(vec, name, U_READ);
  if (f == NULL) {
    error("Cannot open file %s\n", name);
    return;
  }
  unichar current_canonical = '\0';
  int current_priority = 0;
  while ((c = u_fgetc(f)) != EOF) {
    if (c != '\n') {
      /* we ignore the \n char */
      if (inf->class_numbers[(unichar) c] != 0) {
        error("Error in %s: char 0x%x appears several times\n", name, c);
      } else {
        inf->class_numbers[(unichar) c] = current_line;
        if (current_canonical == '\0') {
          current_canonical = (unichar) c;
        }
        inf->canonical[(unichar) c] = current_canonical;
        inf->priority[(unichar) c] = ++current_priority;
      }
    } else {
      current_line++;
      current_canonical = '\0';
      current_priority = 0;
    }
  }
  u_fclose(f);
}
/**
 * Loads a compound word file, adding each word to the keywords.
 */
void load_compound_words(char* name,VersatileEncodingConfig* vec,
		struct string_hash_ptr* keywords) {
U_FILE* f=u_fopen(vec,name,U_READ);
if (f==NULL) return;
Ustring* line=new_Ustring(256);
Ustring* lower=new_Ustring(256);
while (EOF!=readline(line,f)) {
	if (line->str[0]=='{') {
		/* We skip tags */
		continue;
	}
	u_strcpy(lower,line->str);
	u_tolower(lower->str);
	int index=get_value_index(lower->str,keywords,INSERT_IF_NEEDED,NULL);
	if (index==-1) {
		fatal_error("Internal error in load_tokens_by_freq\n");
	}
	KeyWord* value=(KeyWord*)keywords->value[index];
	add_keyword(&value,line->str,1);
	keywords->value[index]=value;
}
free_Ustring(line);
free_Ustring(lower);
u_fclose(f);
}
/**
 * Opens a .fst2 file in output mode and returns the associated fst_file_out_t
 * structure, or NULL in case of error.
 */
Elag_fst_file_out* fst_file_out_open(const VersatileEncodingConfig* vec,const char* fname,int type) {
Elag_fst_file_out* res=(Elag_fst_file_out*)malloc(sizeof(Elag_fst_file_out));
if (res==NULL) {
   fatal_alloc_error("fst_file_out_open");
}
if (type<0 || type>=FST_BAD_TYPE) {
   fatal_error("fst_file_out_open: bad FST_TYPE\n");
}
if ((res->f=u_fopen(vec,fname,U_WRITE))==NULL) {
   error("fst_out_open: unable to open '%s'\n",fname);
   free(res);
   return NULL;
}
res->fstart=ftell(res->f);
u_fprintf(res->f,"0000000000\n");
res->name=strdup(fname);
if (res->name==NULL) {
   fatal_alloc_error("fst_file_out_open");
}
res->type=type;
res->nb_automata=0;
res->labels=new_string_hash(16);
/* We add <E> to the tags in order to be sure that this special tag will have #0 */
get_value_index(EPSILON,res->labels);
return res;
}
Example #6
0
U_CAPI UFILE* U_EXPORT2
u_fopen_u(const UChar   *filename,
        const char    *perm,
        const char    *locale,
        const char    *codepage)
{
    UFILE     *result;
    char buffer[256];

    u_austrcpy(buffer, filename);

    result = u_fopen(buffer, perm, locale, codepage);
#if U_PLATFORM_USES_ONLY_WIN32_API
    /* Try Windows API _wfopen if the above fails. */
    if (!result) {
        // TODO: test this code path, including wperm.
        wchar_t wperm[40] = {};
        size_t  retVal;
        mbstowcs_s(&retVal, wperm, perm, _TRUNCATE);
        FILE *systemFile = _wfopen((const wchar_t *)filename, wperm);
        if (systemFile) {
            result = finit_owner(systemFile, locale, codepage, TRUE);
        }
        if (!result) {
            /* Something bad happened.
               Maybe the converter couldn't be opened. */
            fclose(systemFile);
        }
    }
#endif
    return result; /* not a file leak */
}
Example #7
0
U_CAPI UFILE* U_EXPORT2
u_fopen_u(const UChar   *filename,
        const char    *perm,
        const char    *locale,
        const char    *codepage)
{
    UFILE     *result;
    char buffer[256];

    u_austrcpy(buffer, filename);

    result = u_fopen(buffer, perm, locale, codepage);
#if U_PLATFORM_USES_ONLY_WIN32_API
    /* Try Windows API _wfopen if the above fails. */
    if (!result) {
        FILE *systemFile = _wfopen(filename, (UChar*)perm);
        if (systemFile) {
            result = finit_owner(systemFile, locale, codepage, TRUE);
        }
        if (!result) {
            /* Something bad happened.
               Maybe the converter couldn't be opened. */
            fclose(systemFile);
        }
    }
#endif
    return result; /* not a file leak */
}
Example #8
0
cassys_tokens_list *cassys_load_text(const char *tokens_text_name, const char *text_cod_name, struct text_tokens **tokens){

	int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT;

	*tokens = load_text_tokens(tokens_text_name,mask_encoding_compatibility_input);

	U_FILE *f = u_fopen(BINARY, text_cod_name,U_READ);
	if( f == NULL){
		perror("fopen\n");
		fprintf(stderr,"Cannot open file  %s\n",text_cod_name);
		exit(1);
	}

	cassys_tokens_list *list = NULL;
	cassys_tokens_list *temp = list;

	int token_id;
	int char_read = (int)fread(&token_id,sizeof(int),1,f);
	while(char_read ==1){
		if(list==NULL){
			list = new_element((*tokens)->token[token_id],0);
			temp = list;
		}
		else {
			temp ->next_token = new_element((*tokens)->token[token_id],0);
			temp = temp -> next_token;
		}

		char_read = (int)fread(&token_id,sizeof(int),1,f);
	}
	u_fclose(f);

	return list;
}
/**
 * Loads the initial keyword list from a tok_by_freq.txt file,
 * and turns all those tokens in a list whose primary key is the
 * lower case token:
 * The/20 THE/2 the/50 => the->(The/20 THE/2 the/50)
 */
struct string_hash_ptr* load_tokens_by_freq(char* name,VersatileEncodingConfig* vec) {
U_FILE* f=u_fopen(vec,name,U_READ);
if (f==NULL) return NULL;
Ustring* line=new_Ustring(128);
Ustring* lower=new_Ustring(128);
struct string_hash_ptr* res=new_string_hash_ptr(1024);
int val,pos;
/* We skip the first line of the file, containing the number
 * of tokens
 */
if (EOF==readline(line,f)) {
	fatal_error("Invalid empty file %s\n",name);
}
while (EOF!=readline(line,f)) {
	if (1!=u_sscanf(line->str,"%d%n",&val,&pos)) {
		fatal_error("Invalid line in file %s:\n%S\n",name,line->str);
	}
	u_strcpy(lower,line->str+pos);
	u_tolower(lower->str);
	int index=get_value_index(lower->str,res,INSERT_IF_NEEDED,NULL);
	if (index==-1) {
		fatal_error("Internal error in load_tokens_by_freq\n");
	}
	KeyWord* value=(KeyWord*)res->value[index];
	res->value[index]=new_KeyWord(val,line->str+pos,value);
}
free_Ustring(line);
free_Ustring(lower);
u_fclose(f);
return res;
}
Example #10
0
/**
 * Returns the size in bytes of the given file, or -1 if not found.
 */
long get_file_size(const char* name) {
U_FILE* f=u_fopen(ASCII,name,U_READ);
if (f==NULL) return -1;
fseek(f,0,SEEK_END);
long size=ftell(f);
u_fclose(f);
return size;
}
Example #11
0
/**
 * This function takes two concordance index (in1 and in2) and
 * produces a HTML file (out) that shows the differences between
 * those two concordances.
 */
int diff(const VersatileEncodingConfig* vec,const char* in1,const char* in2,const char* out,
        const char* font,int size,int diff_only) {
char concor1[FILENAME_MAX];
char concor2[FILENAME_MAX];
get_path(in1,concor1);
strcat(concor1,"concord-1.txt");
get_path(in2,concor2);
strcat(concor2,"concord-2.txt");
/* First, we build the two concordances */
create_text_concordances(vec,in1,in2,concor1,concor2);
/* Then, we load the two index */
U_FILE* f1=u_fopen(vec,in1,U_READ);
if (f1==NULL) return 0;
struct match_list* l1=load_match_list(f1,NULL,NULL);
u_fclose(f1);
U_FILE* f2=u_fopen(vec,in2,U_READ);
if (f2==NULL) {
   return 0;
}
struct match_list* l2=load_match_list(f2,NULL,NULL);
u_fclose(f2);
/* We open the output file in UTF8, because the GUI expects this file
 * to be that encoded */
U_FILE* output=u_fopen(UTF8,out,U_WRITE);
if (output==NULL) {
   fatal_error("Cannot open output file %s\n",out);
   return 0;
}
/* We open the two concordance files */
f1=u_fopen(vec,concor1,U_READ);
f2=u_fopen(vec,concor2,U_READ);
/* And then we fill the output file with the differences
 * between the two concordances */
print_diff_HTML_header(output,font,size);
compute_concordance_differences(l1,l2,f1,f2,output,diff_only);
print_diff_HTML_end(output);
free_match_list(l1);
free_match_list(l2);
u_fclose(f1);
u_fclose(f2);
u_fclose(output);
/* We remove the tmp files */
//af_remove(concor1);
//af_remove(concor2);
return 1;
}
/**
 * This function takes a unicode string representing a regular expression and
 * compiles it into a .grf file. It returns 1 in case of success; 0 otherwise.
 */
int reg2grf(const unichar* regexp,const char* name_grf, const VersatileEncodingConfig* vec) {
if (regexp[0]=='\0') {
   error("You must specify a non empty regular expression\n");
   return 0;
}
U_FILE* out=u_fopen(vec,name_grf,U_WRITE);
if (out==NULL) {
   error("Cannot open the output file for the regular expression\n");
   return 0;
}
struct reg2grf_info* INFO=new_reg2grf_info();
/* We create the initial and final states that must have numbers 0 and 1 */
add_state(INFO,u_strdup("<E>"));
add_state(INFO,u_strdup(""));
/* We print the grf header */
u_fprintf(out,"#Unigraph\n");
u_fprintf(out,"SIZE 1313 950\n");
u_fprintf(out,"FONT Times New Roman:  12\n");
u_fprintf(out,"OFONT Times New Roman:B 12\n");
u_fprintf(out,"BCOLOR 16777215\n");
u_fprintf(out,"FCOLOR 0\n");
u_fprintf(out,"ACOLOR 12632256\n");
u_fprintf(out,"SCOLOR 16711680\n");
u_fprintf(out,"CCOLOR 255\n");
u_fprintf(out,"DBOXES y\n");
u_fprintf(out,"DFRAME y\n");
u_fprintf(out,"DDATE y\n");
u_fprintf(out,"DFILE y\n");
u_fprintf(out,"DDIR y\n");
u_fprintf(out,"DRIG n\n");
u_fprintf(out,"DRST n\n");
u_fprintf(out,"FITS 100\n");
u_fprintf(out,"PORIENT L\n");
u_fprintf(out,"#\n");

int input_state;
int output_state;
int result=reg_2_grf(regexp,&input_state,&output_state,INFO);
if (result!=1) {
   u_fclose(out);
   af_remove(name_grf);
   free_reg2grf_info(INFO);
   if (result==0) {
      error("Syntax error in regular expression\n");
   }
   return 0;
}
/* If the compilation has successed, we must link the resulting automaton piece
 * to the grf's initial and final states */
add_transition(0,input_state,INFO);
add_transition(output_state,1,INFO);
save_states(out,INFO);
free_reg2grf_info(INFO);
u_fclose(out);
return 1;
}
Example #13
0
int save_offsets(const VersatileEncodingConfig* vec, const char* filename, const vector_offset* offsets) {
	U_FILE* f_output_offsets = u_fopen(vec, filename, U_WRITE);
	if (f_output_offsets == NULL) {
		error("Cannot create offset file %s\n", filename);
		return 1;
	}
	save_offsets(f_output_offsets, offsets);
	u_fclose(f_output_offsets);
	return 0;
}
Example #14
0
/**
 * Loads an alphabet file and returns the associated 'Alphabet*' structure.
 * If 'korean' is non null, we compute the equivalences between Chinese and Hangul
 * characters.
 */
Alphabet* load_alphabet(const VersatileEncodingConfig* vec,const char* filename,int korean) {
void* a=get_persistent_structure(filename);
if (a!=NULL) {
	return (Alphabet*)a;
}
U_FILE* f;
f=u_fopen(vec,filename,U_READ);
if (f==NULL) {
   return NULL;
}
Alphabet* alphabet=new_alphabet(korean);
int c;
unichar lower,upper;
while ((c=u_fgetc(f))!=EOF) {
      upper=(unichar)c;
      if (upper=='\n') {
    	  /* We skip empty lines */
    	  continue;
      }
      if (upper=='#') {
         // we are in the case of an interval #AZ -> [A..Z]
         lower=(unichar)u_fgetc(f);
         upper=(unichar)u_fgetc(f);
         if (lower>upper) {
            error("Error in alphabet file: for an interval like #AZ, A must be before Z\n");
            free_alphabet(alphabet);
            u_fclose(f);
            return NULL;
         }
         for (c=lower;c<=upper;c++) {
		   SET_CASE_FLAG_MACRO(c,alphabet,1|2);
           add_letter_equivalence(alphabet,(unichar)c,(unichar)c);
         }
         u_fgetc(f); // reading the \n
      }
      else {
		SET_CASE_FLAG_MACRO(upper,alphabet,1);
        lower=(unichar)u_fgetc(f);
        if (lower!='\n') {
          SET_CASE_FLAG_MACRO(lower,alphabet,2);
          u_fgetc(f); // reading the \n
          add_letter_equivalence(alphabet,lower,upper);
        }
        else {
          // we are in the case of a single (no min/maj distinction like in thai)
          SET_CASE_FLAG_MACRO(upper,alphabet,2);
          add_letter_equivalence(alphabet,upper,upper);
        }
      }
}
u_fclose(f);
return alphabet;
}
Example #15
0
/**
 * Saves snt offsets to the given file, as a binary file containing integers.
 * Returns 1 in case of success; 0 otherwise.
 */
int save_snt_offsets(vector_int* snt_offsets,const char* name) {
if (snt_offsets==NULL) {
	fatal_error("Unexpected NULL offsets in save_snt_offsets\n");
}
if (snt_offsets->nbelems%3 != 0) {
	fatal_error("Invalid offsets in save_snt_offsets\n");
}
U_FILE* f=u_fopen(BINARY,name,U_WRITE);
if (f==NULL) return 0;
int ret=(int)(fwrite(snt_offsets->tab,sizeof(int),snt_offsets->nbelems,f));
u_fclose(f);
return (ret==snt_offsets->nbelems);
}
Example #16
0
/**
 * Change this to report differently when a library or commandline tool
 */
void jni_report( const char *fmt, ... )
{
    va_list ap;
    UChar message[128];
    va_start( ap, fmt );
    u_vsnprintf( message, 128, fmt, ap );
    UFILE *db = u_fopen("/tmp/formatter-debug.txt","a+",NULL,NULL);
    if ( db != NULL )
    {
        u_fprintf( db, "%s", message );
        u_fclose( db );
    }
    va_end( ap );
}
Example #17
0
/**
 * Loads the given offset file. Returns NULL in case of error.
 */
vector_offset* load_offsets(const VersatileEncodingConfig* vec,const char* name) {
U_FILE* f=u_fopen(vec,name,U_READ);
if (f==NULL) return NULL;
int a,b,c,d,n;
vector_offset* res=new_vector_offset();
while ((n=u_fscanf(f,"%d%d%d%d",&a,&b,&c,&d))!=EOF) {
	if (n!=4) {
		fatal_error("Corrupted offset file %s\n",name);
	}
	vector_offset_add(res,a,b,c,d);
}
u_fclose(f);
return res;
}
int do_list_file_in_pack_archive_to_file_with_encoding(const char* packFileName, const char* filename_out, Encoding encoding, int filename_only)
{
    U_FILE* fileout = NULL;
    if (filename_out != NULL)
        if (*filename_out != '\0')
        {
            fileout = u_fopen(encoding, filename_out, U_WRITE);
        }

    int result = do_list_file_in_pack_archive_to_filehandle(packFileName, fileout, filename_only);
    if (fileout != NULL)
        u_fclose(fileout);
    return result;
}
Example #19
0
File: global.c Project: nex3/jazz
static jz_val load(JZ_STATE, jz_args* args, jz_val arg) {
  char* filename = jz_str_to_chars(jz, jz_to_str(jz, arg));
  UFILE* file = u_fopen(filename, "r", NULL, NULL);
  jz_val result;

  if (file == NULL) {
    fprintf(stderr, "File does not exist: %s\n", filename);
    exit(1);
  }

  free(filename);
  result = jz_load(jz, file);
  u_fclose(file);

  return result;
}
/**
 * Loads the given DELAF and modifies the given keywords accordingly by
 * replacing any non removed token that appear in a DELAF entry
 * by its lemma. If there are ambiguities, several keywords are
 * generated. Doing that may merge keywords by adding their weights:
 * eats/2 + eaten/3 => eat/5
 */
void filter_keywords_with_dic(struct string_hash_ptr* keywords,char* name,
						VersatileEncodingConfig* vec,Alphabet* alphabet) {
U_FILE* f=u_fopen(vec,name,U_READ);
if (f==NULL) {
	error("Cannot load file %s\n",name);
	return;
}
Ustring* line=new_Ustring(128);
while (EOF!=readline(line,f)) {
	struct dela_entry* e=tokenize_DELAF_line(line->str);
	if (e==NULL) continue;
	lemmatize(e,keywords,alphabet);
	free_dela_entry(e);
}
free_Ustring(line);
u_fclose(f);
}
Example #21
0
/**
 * Reads the start and end positions of each token stored in the file
 * produced by Tokenize's --output_offsets option.
 */
vector_uima_offset* load_uima_offsets(const VersatileEncodingConfig* vec,const char* name) {
U_FILE* f;
f=u_fopen(vec,name,U_READ);
if (f==NULL) {
   return NULL;
}
vector_int* v=new_vector_int();
Ustring* line=new_Ustring();
int a,b,c;
while (EOF!=readline(line,f)) {
	u_sscanf(line->str,"%d%d%d",&a,&b,&c);
	vector_int_add(v,b);
	vector_int_add(v,c);
}
free_Ustring(line);
u_fclose(f);
return (vector_uima_offset*)v;
}
Example #22
0
struct transducer_name_and_mode_linked_list *load_transducer_list_file(const char *transducer_list_name) {

	U_FILE *file_transducer_list;
    struct transducer_name_and_mode_linked_list * res=NULL;

	file_transducer_list = u_fopen(ASCII, transducer_list_name,U_READ);
	if( file_transducer_list == NULL){
		perror("u_fopen\n");
		fprintf(stderr,"Impossible d'ouvrir le fichier %s\n",transducer_list_name);
		exit(1);
	}

    char line[1024];
    int i=1;
	while (cassys_fgets(line,1024,file_transducer_list) != NULL){
		char *transducer_file_name;
		OutputPolicy transducer_policy;	

		remove_cassys_comments(line);

		transducer_file_name = extract_cassys_transducer_name(line);
		//fprintf(stdout, "transducer name read =%s\n",transducer_file_name);

		transducer_policy = extract_cassys_transducer_policy(line);


		if (transducer_file_name != NULL && transducer_policy != IGNORE_OUTPUTS) {
			res=add_transducer_linked_list_new_name(res,transducer_file_name);
            set_last_transducer_linked_list_mode(res,transducer_policy);
		}
		else {
			if (transducer_file_name == NULL) {
				fprintf(stdout, "Line %d : Empty line\n",i);
			} else if (transducer_policy == IGNORE_OUTPUTS) {
				fprintf(stdout, "Line %d : Transducer policy not recognized\n",i);
			}
		}
        free(transducer_file_name);
		i++;
	}
    u_fclose(file_transducer_list);

	return res;
}
/**
 * Loads a .fst2 file with the given name and type, according to the
 * given language description.
 */
Elag_fst_file_in* load_elag_fst2_file(const VersatileEncodingConfig* vec,const char* fname,language_t* language) {
Elag_fst_file_in* fstf=(Elag_fst_file_in*)malloc(sizeof(Elag_fst_file_in));
if (fstf==NULL) {
   fatal_alloc_error("load_elag_fst2_file");
}
fstf->name=strdup(fname);
if (fstf->name==NULL) {
   fatal_alloc_error("load_elag_fst2_file");
}
if ((fstf->f=u_fopen(vec,fname,U_READ))==NULL) {
   error("load_fst_file: unable to open '%s' for reading\n",fname);
   goto error_fstf;
}
unichar buf[MAXBUF];
if (u_fgets(buf,MAXBUF,fstf->f)==EOF) {
   error("load_fst_file: '%s' is empty\n",fname);
   goto error_f;
}
if (!u_is_digit(*buf)) {
   error("load_fst_file: %s: bad file format\n",fname);
   goto error_f;
}
fstf->nb_automata=u_parse_int(buf);
fstf->language=language;
fstf->type=FST_GRAMMAR;
fstf->pos0=(int)ftell(fstf->f);
fstf->symbols=new_string_hash_ptr(64);
fstf->renumber=NULL;
if (load_elag_fst2_tags(fstf)==-1) {
   error("load_fst_file: %s: cannot load symbols\n",fstf->name);
   goto error_symbols;
}
fstf->pos=0;
return fstf;
/* If an error occurs */
error_symbols: free_string_hash_ptr(fstf->symbols,(void(*)(void*))free_symbols);

error_f: u_fclose(fstf->f);

error_fstf: free(fstf->name);

free(fstf);
return NULL;
}
Example #24
0
/**
 * Loads the given DELA into the given DELA tree.
 */
void load_DELA(const VersatileEncodingConfig* vec,const char* name,struct DELA_tree* tree) {
U_FILE* f=u_fopen(vec,name,U_READ);
if (f==NULL) {
   error("Cannot load dictionary %s\n",name);
   return;
}
u_printf("Loading %s...\n",name);
Ustring* line=new_Ustring(4096);
while (EOF!=readline(line,f)) {
   struct dela_entry* entry=tokenize_DELAF_line(line->str,1);
   if (entry!=NULL) {
      add_entry(tree,entry);
   }
   /* We don't need to free the entry, since it's done (if needed)
    * in the insertion function */
}
free_Ustring(line);
u_fclose(f);
}
Example #25
0
/**
 * Loads snt offsets from the given binary file.
 */
vector_int* load_snt_offsets(const char* name) {
U_FILE* f=u_fopen(BINARY,name,U_READ);
if (f==NULL) return NULL;
long size=get_file_size(f);
if (size%(3*sizeof(int))!=0) {
	u_fclose(f);
	return NULL;
}
vector_int* v=new_vector_int((int)(size/sizeof(int)));
if (size!=0) {
	int n=(int)fread(v->tab,sizeof(int),size/sizeof(int),f);
	u_fclose(f);
	if (n!=(int)(size/sizeof(int))) {
		free_vector_int(v);
		return NULL;
	}
	v->nbelems=v->size;
}
return v;
}
Example #26
0
static int icu_ufile_open(lua_State *L) {
	const char *filename = luaL_checkstring(L, 1);
	const char *mode = luaL_optstring(L, 2, "r");
	UFILE* ufile;
	/*
	FILE* f = fopen(filename, mode);
	if (f == NULL) {
		return pushresult(L, 0, filename);
	}
	ufile = u_finit(f, luaL_optstring(L,4,NULL), luaL_optstring(L,3,NULL));
	*/
	ufile = u_fopen(filename, mode, luaL_optstring(L,4,NULL), luaL_optstring(L,3,NULL));
	if (ufile == NULL) {
		lua_pushnil(L);
		lua_pushstring(L, "unable to initialize ufile");
		return 2;
	}
	*(UFILE**)lua_newuserdata(L, sizeof(UFILE*)) = ufile;
	lua_pushvalue(L, UFILE_UV_META);
	lua_setmetatable(L, -2);
	return 1;
}
/**
 * This function reads a file that contains a list of Elag grammar names,
 * and it compiles them into the file 'outname'. However, if the result
 * automaton is too big, it will be saved in several automata inside
 * the output file.
 */
int compile_elag_rules(char* rulesname,char* outname, const VersatileEncodingConfig* vec,language_t* language) {
u_printf("Compilation of %s\n",rulesname);
U_FILE* f=NULL;
U_FILE* frules=u_fopen(ASCII,rulesname,U_READ);
if (frules==NULL) {
   fatal_error("Cannot open file '%s'\n",rulesname);
}
U_FILE* out=u_fopen(ASCII,outname,U_WRITE);
if (out==NULL) {
   fatal_error("cannot open file '%s'\n",outname);
}
/* Name of the file that contains the result automaton */
char fstoutname[FILENAME_MAX];
int nbRules=0;
char buf[FILENAME_MAX];
time_t start_time=time(0);
Fst2Automaton* res=NULL;
Fst2Automaton* A;
int fst_number=0;
Ustring* ustr=new_Ustring();

char buf2[FILENAME_MAX];
char directory[FILENAME_MAX];
get_path(rulesname,directory);

while (af_fgets(buf,FILENAME_MAX,frules->f)) {
   /* We read one by one the Elag grammar names in the .lst file */
   chomp(buf);
   if (*buf=='\0') {
      /* If we have an empty line */
      continue;
   }
   if (!is_absolute_path(buf)) {
      strcpy(buf2,buf);
      sprintf(buf,"%s%s",directory,buf2);
   }

   u_printf("\n%s...\n",buf);
   remove_extension(buf);
   strcat(buf,".elg");
   if ((f=u_fopen(ASCII,buf,U_READ))==NULL) {
      /* If the .elg file doesn't exist, we create one */
      remove_extension(buf);
      u_printf("Precompiling %s.fst2\n",buf);
      strcat(buf,".fst2");
      elRule* rule=new_elRule(buf,vec,language);
      if (rule==NULL) {
         fatal_error("Unable to read grammar '%s'\n",buf);
      }
      if ((A=compile_elag_rule(rule,language))==NULL) {
         fatal_error("Unable to compile rule '%s'\n",buf);
      }
      free_elRule(rule);
   } else {
      /* If there is already .elg, we use it */
      u_fclose(f);
      A=load_elag_grammar_automaton(vec,buf,language);
      if (A==NULL) {
         fatal_error("Unable to load '%s'\n",buf);
      }
   }
   if (A->automaton->number_of_states==0) {
      error("Grammar %s forbids everything!\n",buf);
   }
   if (res!=NULL) {
      /* If there is already an automaton, we intersect it with the new one */
      SingleGraph tmp=res->automaton;
      res->automaton=elag_intersection(language,tmp,A->automaton,GRAMMAR_GRAMMAR);
      free_SingleGraph(tmp,NULL);
      free_Fst2Automaton(A,NULL);
      trim(res->automaton,NULL);
   } else {
      res=A;
   }
   nbRules++;
   if (res->automaton->number_of_states>MAX_GRAM_SIZE) {
      /* If the automaton is too large, we will split the grammar
       * into several automata */
      elag_minimize(res->automaton,1);
      sprintf(fstoutname,"%s-%d.elg",outname,fst_number++);
      u_fprintf(out,"<%s>\n",fstoutname);
      u_printf("Splitting big grammar in '%s' (%d states)\n",fstoutname,res->automaton->number_of_states);
      u_sprintf(ustr,"%s: compiled elag grammar",fstoutname);
      free(res->name);
      res->name=u_strdup(ustr->str);
      save_automaton(res,fstoutname,vec,FST_GRAMMAR);
      free_Fst2Automaton(res,NULL);
      res=NULL;
   }
}
if (res!=NULL) {
   /* We save the last automaton, if any */
   sprintf(fstoutname,"%s-%d.elg",outname,fst_number++);
   u_fprintf(out,"<%s>\n",fstoutname);
   u_printf("Saving grammar in '%s'(%d states)\n",fstoutname,res->automaton->number_of_states);
   elag_minimize(res->automaton,1);
   u_sprintf(ustr,"%s: compiled elag grammar",fstoutname);
   free(res->name);
   res->name=u_strdup(ustr->str);
   save_automaton(res,fstoutname,vec,FST_GRAMMAR);
   free_Fst2Automaton(res,free_symbol);
}
time_t end_time=time(0);
u_fclose(frules);
u_fclose(out);
free_Ustring(ustr);
u_printf("\nDone.\nElapsed time: %.0f s\n",difftime(end_time,start_time));
u_printf("\n%d rule%s from %s compiled in %s (%d automat%s)\n",
         nbRules,(nbRules>1)?"s":"",rulesname,outname,fst_number,
         (fst_number>1)?"a":"on");
return 0;
}
Example #28
0
int main_Uncompress(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
char output[FILENAME_MAX]="";
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_Uncompress,lopts_Uncompress,&index))) {
   switch(val) {
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;             
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Uncompress[index].name);
             return USAGE_ERROR_CODE;                         
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

if (output[0]=='\0') {
   remove_extension(argv[options.vars()->optind],output);
   strcat(output,".dic");
}

U_FILE* f=u_fopen(&vec,output,U_WRITE);
if (f==NULL) {
   error("Cannot open file %s\n",output);
   return DEFAULT_ERROR_CODE;
}

char inf_file[FILENAME_MAX];
remove_extension(argv[options.vars()->optind],inf_file);
strcat(inf_file,".inf");
u_printf("Uncompressing %s...\n",argv[options.vars()->optind]);
Dictionary* d=new_Dictionary(&vec,argv[options.vars()->optind],inf_file);

if (d!=NULL) {
  rebuild_dictionary(d,f);
}

u_fclose(f);
free_Dictionary(d);
u_printf("Done.\n");

return SUCCESS_RETURN_CODE;
}
Example #29
0
int main_PolyLex(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

int language=-1;
char alphabet[FILENAME_MAX]="";
char name_bin[FILENAME_MAX]="";
char output[FILENAME_MAX]="";
char info[FILENAME_MAX]="";
VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_PolyLex,lopts_PolyLex,&index))) {
   switch(val) {
   case 'D': language=DUTCH; break;
   case 'G': language=GERMAN; break;
   case 'N': language=NORWEGIAN; break;
   case 'R': language=RUSSIAN; break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 'd': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty dictionary file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(name_bin,options.vars()->optarg);
             break;
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'i': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty information file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(info,options.vars()->optarg);
             break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_PolyLex[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (name_bin[0]=='\0') {
   error("You must specify the .bin dictionary to use\n");
   return USAGE_ERROR_CODE;
}

if (output[0]=='\0') {
   error("You must specify the output dictionary file name\n");
   return USAGE_ERROR_CODE;
}

if (language==-1) {
   error("You must specify the language\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

Alphabet* alph=NULL;
if (alphabet[0]!='\0') {
   u_printf("Loading alphabet...\n");
   alph=load_alphabet(&vec,alphabet);
   if (alph==NULL) {
      error("Cannot load alphabet file %s\n",alphabet);
      return USAGE_ERROR_CODE;
   }
}

char name_inf[FILENAME_MAX];
struct string_hash* forbiddenWords=NULL;
if (language==DUTCH || language==NORWEGIAN) {
   get_path(name_bin,name_inf);
   strcat(name_inf,"ForbiddenWords.txt");
   forbiddenWords=load_key_list(&vec,name_inf);
   if (forbiddenWords==NULL) {
       /* If there was no file, we don't want to block the process */
       forbiddenWords=new_string_hash(DONT_USE_VALUES);
   }
}

strcpy(name_inf,name_bin);
name_inf[strlen(name_bin)-3]='\0';
strcat(name_inf,"inf");
Dictionary* d=new_Dictionary(&vec,name_bin,name_inf);
if (d==NULL) {
    error("Cannot load dictionary %s\n",name_bin);
    free_string_hash(forbiddenWords);
    free_alphabet(alph);
    return DEFAULT_ERROR_CODE;
}

char tmp[FILENAME_MAX];
strcpy(tmp,argv[options.vars()->optind]);
strcat(tmp,".tmp");

U_FILE* words=u_fopen(&vec,argv[options.vars()->optind],U_READ);
if (words==NULL) {
   error("Cannot open word list file %s\n",argv[options.vars()->optind]);
   free_Dictionary(d);
   free_string_hash(forbiddenWords);
   free_alphabet(alph);
   // here we return 0 in order to do not block the preprocessing
   // in the Unitex/GramLab IDE interface, if no dictionary was applied
   // so that there is no "err" file
   return SUCCESS_RETURN_CODE;
}

U_FILE* new_unknown_words=u_fopen(&vec,tmp,U_WRITE);
if (new_unknown_words==NULL) {
   error("Cannot open temporary word list file %s\n",tmp);
   u_fclose(words);
   free_Dictionary(d);
   free_string_hash(forbiddenWords);
   free_alphabet(alph);
   return DEFAULT_ERROR_CODE;
}

U_FILE* res=u_fopen(&vec,output,U_APPEND);
if (res==NULL) {
   error("Cannot open result file %s\n",output);
   u_fclose(new_unknown_words);
   u_fclose(words);
   free_Dictionary(d);
   free_string_hash(forbiddenWords);
   free_alphabet(alph);
   u_fclose(words);
   return DEFAULT_ERROR_CODE;
}

U_FILE* debug=NULL;
if ((*info)!='\0') {
   debug=u_fopen(&vec,info,U_WRITE);
   if (debug==NULL) {
      error("Cannot open debug file %s\n",info);
   }
}
struct utags UTAG;

switch(language) {
  case DUTCH:
    analyse_dutch_unknown_words(alph,
                                d,
                                words,
                                res,
                                debug,
                                new_unknown_words,
                                forbiddenWords);
    break;
  case GERMAN:
    analyse_german_compounds(alph,
                             d,
                             words,
                             res,
                             debug,
                             new_unknown_words);
    break;
  case NORWEGIAN:
    analyse_norwegian_unknown_words(alph,
                                    d,
                                    words,
                                    res,
                                    debug,
                                    new_unknown_words,
                                    forbiddenWords);
    break;
  case RUSSIAN:
     init_russian(&UTAG);
     analyse_compounds(alph,
                       d,
                       words,
                       res,
                       debug,
                       new_unknown_words,
                       UTAG);
     break;
}

free_alphabet(alph);
free_Dictionary(d);
u_fclose(words);
u_fclose(new_unknown_words);
free_string_hash(forbiddenWords);
af_remove(argv[options.vars()->optind]);
af_rename(tmp,argv[options.vars()->optind]);
u_fclose(res);

if (debug!=NULL) {
   u_fclose(debug);
}

return SUCCESS_RETURN_CODE;
}
Example #30
0
int main_SortTxt(int argc, char* const argv[]) {
  if (argc == 1) {
    usage();
    return SUCCESS_RETURN_CODE;
  }

  struct sort_infos* inf = new_sort_infos();
  if(!inf) {
    return ALLOC_ERROR_CODE;
  }

  int mode = DEFAULT;
  char line_info[FILENAME_MAX] = "";
  char sort_order[FILENAME_MAX] = "";
  VersatileEncodingConfig vec = { DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT,
      DEFAULT_ENCODING_OUTPUT, DEFAULT_BOM_OUTPUT };
  int val, index = -1;
  bool only_verify_arguments = false;
  UnitexGetOpt options;
  while (EOF != (val = options.parse_long(argc, argv, optstring_SortTxt,
      lopts_SortTxt, &index))) {
    switch (val) {
    case 'n':
      inf->REMOVE_DUPLICATES = 1;
      break;
    case 'd':
      inf->REMOVE_DUPLICATES = 0;
      break;
    case 'r':
      inf->REVERSE = -1;
      break;
    case 'o':
      if (options.vars()->optarg[0] == '\0') {
        error("You must specify a non empty sort order file name\n");
        free_sort_infos(inf);
        return USAGE_ERROR_CODE;
      }
      strcpy(sort_order, options.vars()->optarg);
      break;
    case 'l':
      if (options.vars()->optarg[0] == '\0') {
        error("You must specify a non empty information file name\n");
        free_sort_infos(inf);
        return USAGE_ERROR_CODE;
      }
      strcpy(line_info, options.vars()->optarg);
      break;
    case 't':
      mode = THAI;
      break;
    case 'f':
      inf->factorize_inflectional_codes = 1;
      break;
    case 'V': only_verify_arguments = true;
      break;
    case 'h':
      usage();
      free_sort_infos(inf);
      return SUCCESS_RETURN_CODE;
    case 'k':
      if (options.vars()->optarg[0] == '\0') {
        error("Empty input_encoding argument\n");
        free_sort_infos(inf);
        return USAGE_ERROR_CODE;
      }
      decode_reading_encoding_parameter(
          &(vec.mask_encoding_compatibility_input), options.vars()->optarg);
      break;
    case 'q':
      if (options.vars()->optarg[0] == '\0') {
        error("Empty output_encoding argument\n");
        free_sort_infos(inf);
        return USAGE_ERROR_CODE;
      }
      decode_writing_encoding_parameter(&(vec.encoding_output),
          &(vec.bom_output), options.vars()->optarg);
      break;
    case ':':
        index == -1 ? error("Missing argument for option -%c\n", options.vars()->optopt) :
                      error("Missing argument for option --%s\n",lopts_SortTxt[index].name);
        free_sort_infos(inf);
        return USAGE_ERROR_CODE;
    case '?':
        index == -1 ? error("Invalid option -%c\n", options.vars()->optopt) :
                      error("Invalid option --%s\n", options.vars()->optarg);
        free_sort_infos(inf);
        return USAGE_ERROR_CODE;
    }
    index = -1;
  }

  if (options.vars()->optind != argc - 1) {
    error("Invalid arguments: rerun with --help\n");
    free_sort_infos(inf);
    return USAGE_ERROR_CODE;
  }

  if (only_verify_arguments) {
    // freeing all allocated memory
    free_sort_infos(inf);
    return SUCCESS_RETURN_CODE;
  }

  if (sort_order[0] != '\0') {
    read_char_order(&vec, sort_order, inf);
  }

  char new_name[FILENAME_MAX];
  strcpy(new_name, argv[options.vars()->optind]);
  strcat(new_name, ".new");

  inf->f = u_fopen(&vec, argv[options.vars()->optind], U_READ);
  if (inf->f == NULL) {
    error("Cannot open file %s\n", argv[options.vars()->optind]);
    free_sort_infos(inf);
    return DEFAULT_ERROR_CODE;
  }

  inf->f_out = u_fopen(&vec, new_name, U_WRITE);
  if (inf->f_out == NULL) {
    error("Cannot open temporary file %s\n", new_name);
    u_fclose(inf->f);
    free_sort_infos(inf);
    return DEFAULT_ERROR_CODE;
  }

  switch (mode) {
  case DEFAULT:
    sort(inf);
    break;
  case THAI:
    sort_thai(inf);
    break;
  }
  if (line_info[0] != '\0') {
    U_FILE* F = u_fopen(&vec, line_info, U_WRITE);
    if (F == NULL) {
      error("Cannot write %s\n", line_info);
    } else {
      u_fprintf(F, "%d\n", inf->resulting_line_number);
      u_fclose(F);
    }
  }

  u_fclose(inf->f_out);
  u_fclose(inf->f);
  af_remove(argv[options.vars()->optind]);
  af_rename(new_name, argv[options.vars()->optind]);
  free_sort_infos(inf);

  u_printf("Done.\n");
  return SUCCESS_RETURN_CODE;
}