Beispiel #1
0
int load_global_context_forindel(global_context_t * context)
{
	char tmp_fname [MAX_FILE_NAME_LENGTH];

	warning_file_limit();
	context -> input_reads.avg_read_length = guess_reads_density_format(context->config.first_read_file , 0, NULL, NULL);
	if(context -> input_reads.avg_read_length<0 )context -> input_reads.avg_read_length = 250;
	if(context -> input_reads.avg_read_length<0 ||geinput_open_sam(context->config.first_read_file, &context->input_reads.first_read_file, context -> input_reads.is_paired_end_reads))
	{
		SUBREADprintf("Unable to open '%s' as a read file. Please check if it exists, you have the permission to read it, and it is in the FASTQ/FASTA/PLAIN format.\n", context->config.first_read_file);
		return -1;
	}


	struct stat ginp1_stat;
	stat(context->config.first_read_file , &ginp1_stat);
	context->input_reads.first_read_file_size = ginp1_stat.st_size;

	sprintf(tmp_fname, "%s.reads", context->config.index_prefix);
	if(!does_file_exist(tmp_fname))
	{
		SUBREADprintf("Unable top open index '%s'. Please make sure that the correct prefix is specified and you have the permission to read these files. For example, if there are files '/opt/my_index.reads', '/opt/my_index.files' and etc, the index prefix should be specified as '/opt/my_index' without any suffix. \n", context->config.index_prefix);
		return -1;
	}


	if(context->config.space_type == GENE_SPACE_COLOR)
		sprintf(tmp_fname, "%s.00.c.tab", context->config.index_prefix);
	else
		sprintf(tmp_fname, "%s.00.b.tab", context->config.index_prefix);
	if(!does_file_exist(tmp_fname))
	{
		SUBREADprintf("Your reads are in the %s space but the index was not built in the same space. Unable to precess the reads.\n", context->config.space_type == GENE_SPACE_COLOR?"color":"base");
		return -1;
	}

	context->index_block_number = 0; 
	while(1)
	{
		sprintf(tmp_fname, "%s.%02d.%c.tab", context->config.index_prefix, context->index_block_number, context->config.space_type == GENE_SPACE_COLOR?'c':'b');
		if(!does_file_exist(tmp_fname))break;
		context->index_block_number ++;
	}

	context->current_index_block_number = 0;
	load_offsets(&context->chromosome_table, context->config.index_prefix);

	
	context->all_processed_reads = 0;
	context->all_mapped_reads = 0;

	memset( context->all_value_indexes , 0 , 100 * sizeof(gene_value_index_t));

	return 0;
}
/**
 * The function creates and loads a new flux_cube structure
 * The data is taken from the file specified in the input.
 *
 * @param  fcube_file - the file name of the fluxcube
 *
 * @return ret        - the new flux_cube structure
 */
flux_cube *
load_fluxcube(const char fcube_file[])
{
  flux_cube *fcube;

  gsl_matrix *test;

  int nflux, n_ext=0;
  int i;


  // get the number of extensions
  n_ext = FITSextnum(fcube_file);
  if (n_ext <3)
    aXe_message(aXe_M_FATAL, __FILE__, __LINE__,
                "aXe_PETCONT: " "the fluxcube file %s has only %i extensions!\n",
                fcube_file, n_ext);

  // determine the number of flux images and allocate
  // the space for the fluxcube structure
  nflux = n_ext-2;
  fcube = alloc_fluxcube(nflux);

  // load XOFFS and YOFFS
  load_offsets(fcube_file, fcube);

  // load the segmentation image into the structure
  fcube->segmentation = load_segmentation(fcube_file);
  //  gsl_to_FITSimage (fcube->segmentation, "gogo.fits", 1, "MOO");

  // load the fluximages
  for (i=0; i < nflux; i++)
    {
      fcube->fluxims[i] = load_fluximage(fcube_file, i+3);
    }

  // fill the number of fluximages
  fcube->n_fimage = nflux;

  // order the fluximages and store the
  // vector with the ordered indices
  fcube->fimage_order = order_fluxims(fcube);

  return fcube;
}
Beispiel #3
0
int main_Fst2Txt(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

struct fst2txt_parameters* p=new_fst2txt_parameters();
char in_offsets[FILENAME_MAX]="";
char out_offsets[FILENAME_MAX]="";
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;

while (EOF!=(val=options.parse_long(argc,argv,optstring_Fst2Txt,lopts_Fst2Txt,&index))) {
   switch(val) {
   case 't': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty text file name\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE;
             }
             p->input_text_file=strdup(options.vars()->optarg);
             if (p->input_text_file==NULL) {
                alloc_error("main_Fst2Txt");
                free_fst2txt_parameters(p);
                return ALLOC_ERROR_CODE;
             }
             break;
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty text output file name\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE;
             }
             p->output_text_file=strdup(options.vars()->optarg);
			 p->output_text_file_is_temp=0;
             if (p->output_text_file==NULL) {
                alloc_error("main_Fst2Txt");
                free_fst2txt_parameters(p);
                return ALLOC_ERROR_CODE;
             }
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE;                
             }
             p->alphabet_file=strdup(options.vars()->optarg);
             if (p->alphabet_file==NULL) {
               alloc_error("main_Fst2Txt");
               free_fst2txt_parameters(p);
               return ALLOC_ERROR_CODE;               
             }
             break;
   case 'M': p->output_policy=MERGE_OUTPUTS; break;
   case 'R': p->output_policy=REPLACE_OUTPUTS; break;
   case 'c': p->tokenization_policy=CHAR_BY_CHAR_TOKENIZATION; break;
   case 'w': p->tokenization_policy=WORD_BY_WORD_TOKENIZATION; break;
   case 's': p->space_policy=START_WITH_SPACE; break;
   case 'x': p->space_policy=DONT_START_WITH_SPACE; break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             free_fst2txt_parameters(p);
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Fst2Txt[index].name);
             free_fst2txt_parameters(p);
             return USAGE_ERROR_CODE; 
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE;                 
             }
             decode_reading_encoding_parameter(&(p->vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE; 
             }
             decode_writing_encoding_parameter(&(p->vec.encoding_output),&(p->vec.bom_output),options.vars()->optarg);
             break;
   case '$': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_offsets argument\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE; 
             }
             strcpy(in_offsets,options.vars()->optarg);
             break;
   case '@': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_offsets argument\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE; 
             }
             strcpy(out_offsets,options.vars()->optarg);
             break;
   case 'l': p->convLFtoCRLF=0; break;
   case 'r': p->keepCR = 1; break;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             free_fst2txt_parameters(p);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   free_fst2txt_parameters(p);
   return USAGE_ERROR_CODE;
}

if (p->input_text_file==NULL) {
   error("You must specify the text file\n");
   free_fst2txt_parameters(p);
   return USAGE_ERROR_CODE;   
}

if (only_verify_arguments) {
  // freeing all allocated memory
  free_fst2txt_parameters(p);
  return SUCCESS_RETURN_CODE;
}

if (out_offsets[0]!='\0') {
	/* We deal with offsets only if the program is expected to produce some */
	if (in_offsets[0]!='\0') {
		p->v_in_offsets=load_offsets(&(p->vec),in_offsets);
		if (p->v_in_offsets==NULL) {
			error("Cannot load offset file %s\n",in_offsets);
      free_fst2txt_parameters(p);
      return DEFAULT_ERROR_CODE;      
		}
	} else {
		/* If there is no input offset file, we create an empty offset vector
		 * in order to avoid testing whether the vector is NULL or not */
		p->v_in_offsets=new_vector_offset(1);
	}
	p->f_out_offsets=u_fopen(&(p->vec),out_offsets,U_WRITE);
	if (p->f_out_offsets==NULL) {
		error("Cannot create file %s\n",out_offsets);
    free_fst2txt_parameters(p);
    return DEFAULT_ERROR_CODE;     
	}
}

if (p->output_text_file == NULL) {
	char tmp[FILENAME_MAX];
	remove_extension(p->input_text_file, tmp);
	strcat(tmp, ".tmp");
	p->output_text_file_is_temp=1;
	p->output_text_file = strdup(tmp);
	if (p->output_text_file == NULL) {
		alloc_error("main_Fst2Txt");
		free_fst2txt_parameters(p);
		return ALLOC_ERROR_CODE;
	}
}
p->fst_file=strdup(argv[options.vars()->optind]);
if (p->fst_file==NULL) {
   alloc_error("main_Fst2Txt");
   free_fst2txt_parameters(p);
   return ALLOC_ERROR_CODE;   
}

int result=main_fst2txt(p);

free_fst2txt_parameters(p);
return result;
}
int main_Normalize(int argc,char* const argv[]) {
if (argc==1) {
  usage();
  return SUCCESS_RETURN_CODE;
}
int mode=KEEP_CARRIAGE_RETURN;
int separator_normalization=1;
char rules[FILENAME_MAX]="";
char input_offsets[FILENAME_MAX]="";
char output_offsets[FILENAME_MAX]="";
VersatileEncodingConfig vec=VEC_DEFAULT;
int convLFtoCRLF=1;
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_Normalize,lopts_Normalize,&index))) {
   switch(val) {
   case 'l': convLFtoCRLF=0; break;
   case 'n': mode=REMOVE_CARRIAGE_RETURN; break;
   case 'r': if (options.vars()->optarg[0]=='\0') {
              error("You must specify a non empty replacement rule file name\n");
              return USAGE_ERROR_CODE;
             }
             strcpy(rules,options.vars()->optarg);
             break;
   case 1: separator_normalization=0; break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
              error("Empty input_encoding argument\n");
              return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
              error("Empty output_encoding argument\n");
              return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case '$': if (options.vars()->optarg[0]=='\0') {
              error("You must specify a non empty input offset file name\n");
              return USAGE_ERROR_CODE;
             }
             strcpy(input_offsets,options.vars()->optarg);
             break;
   case '@': if (options.vars()->optarg[0]=='\0') {
              error("You must specify a non empty output offset file name\n");
              return USAGE_ERROR_CODE;
             }
             strcpy(output_offsets,options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Normalize[index].name);
             return USAGE_ERROR_CODE;
             break;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
             break;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
  error("Invalid arguments: rerun with --help\n");
  return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

vector_offset* v_input_offsets=NULL;
vector_offset* v_output_offsets=NULL;
U_FILE* f_output_offsets=NULL;

if (output_offsets[0]!='\0') {
  /* We deal with offsets only if we have to produce output offsets */
  if (input_offsets[0]!='\0') {
    v_input_offsets=load_offsets(&vec,input_offsets);
  }
  f_output_offsets=u_fopen(&vec, output_offsets, U_WRITE);
  if (f_output_offsets==NULL) {
    error("Cannot create offset file %s\n",output_offsets);
    return DEFAULT_ERROR_CODE;
  }
  v_output_offsets=new_vector_offset();
}
char tmp_file[FILENAME_MAX];
get_extension(argv[options.vars()->optind],tmp_file);
if (!strcmp(tmp_file, ".snt")) {
   /* If the file to process has already the .snt extension, we temporary rename it to
   * .snt.normalizing */
  strcpy(tmp_file,argv[options.vars()->optind]);
  strcat(tmp_file,".normalizing");
  af_rename(argv[options.vars()->optind],tmp_file);
} else {
   strcpy(tmp_file,argv[options.vars()->optind]);
}
/* We set the destination file */
char dest_file[FILENAME_MAX];
remove_extension(argv[options.vars()->optind],dest_file);
strcat(dest_file,".snt");
u_printf("Normalizing %s...\n",argv[options.vars()->optind]);

int return_value = normalize(tmp_file,
                             dest_file,
                             &vec,
                             mode,
                             convLFtoCRLF,
                             rules,
                             v_output_offsets,
                             separator_normalization);
u_printf("\n");
/* If we have used a temporary file, we delete it */
if (strcmp(tmp_file,argv[options.vars()->optind])) {
   af_remove(tmp_file);
}
process_offsets(v_input_offsets,v_output_offsets,f_output_offsets);
u_fclose(f_output_offsets);
free_vector_offset(v_input_offsets);
free_vector_offset(v_output_offsets);
u_printf((return_value==SUCCESS_RETURN_CODE) ? "Done.\n" : "Unsuccessfull.\n");

return return_value;
}