Ejemplo n.º 1
0
CompMat * read_Blast_CompMat(FILE * ifp)
{
  char buffer[MAXLINE];
  int alphabet[MAXLINE];
  char * runner;
  int len;
  int linenum;
  int row;
  CompMat * out;



  /*** 
    Skip over # lines...

    read first line: is alphabet ie
    A R T G .... *
    ***/

  while( fgets(buffer,MAXLINE,ifp) != NULL)
    if( buffer[0] != '#')
      break;

  /** loop over line, getting letters: warn if longer than one, or not a letter **/

  for(len=0,runner=strtok(buffer,spacestr);runner != NULL;runner=strtok(NULL,spacestr)) {
    if( *runner == '*' )
      break; /* end column */

    if( !isalpha((int)*runner) || strlen(runner) > 1 ) {
      warn("In read Blast comp mat, probably an error: trying to interpret [%s] as an amino acid",runner);
      return NULL;
    }
    
    alphabet[len++] = toupper((int)*runner) -'A';
  }


  out = blank_CompMat();
  linenum = 0;

  /** get len lines, each line, get len numbers and put them away **/

  while( fgets(buffer,MAXLINE,ifp) != NULL ) {

    if( linenum >= len )
      break;
    
    for(runner=strtok(buffer,spacestr),row = 0;runner != NULL && row < len;runner=strtok(NULL,spacestr),row++) {
      if( is_integer_string(runner,&out->comp[alphabet[linenum]][alphabet[row]]) == FALSE ) {
	warn("In read Blast comp mat, probably an error: trying to interpret [%s] as a number ... continuing",runner);
      }
    }
    linenum++;
  }

  return out;

}
Ejemplo n.º 2
0
    boolean strip_out_integer_argument(int * argc,char ** argv,char * tag,int * value)
    {
        char * arg;

        if( (arg = strip_out_assigned_argument(argc,argv,tag)) == NULL )
            return FALSE;

        if( is_integer_string(arg,value) == FALSE ) {
            warn("Argument [%s] to [%s] is not an integer. Not changing the value [%d]",arg,tag,value);
            return FALSE;
        }

        return TRUE;
    }
Ejemplo n.º 3
0
DPImplementation * new_DPImplementation_from_argstr(int * argc,char ** argv)
{
  DPImplementation * out;
  char * temp;

  out = DPImplementation_alloc();
  
  if( (strip_out_boolean_argument(argc,argv,"pthreads")) == TRUE ) {
    out->do_threads = TRUE;
  }
  if( (temp=strip_out_assigned_argument(argc,argv,"dbtrace")) != NULL ) {
    if( is_integer_string(temp,&out->db_trace_level) == FALSE ) {
      warn("%s is not an integer argument for dbtrace",temp);
    }
  }

  if( strip_out_boolean_argument(argc,argv,"O") == TRUE ) {
    out->largemem= TRUE;
    /* other optimisations */
  }

  strip_out_boolean_def_argument(argc,argv,"largemem",&out->largemem);

  strip_out_boolean_def_argument(argc,argv,"onemodel",&out->doone);
    
  if( strip_out_boolean_argument(argc,argv,"prob") == TRUE ) {
    out->doprob = TRUE;
  }

  if( strip_out_boolean_argument(argc,argv,"g") == TRUE ) {
    out->dydebug = TRUE;
  }

  if( (temp=strip_out_assigned_argument(argc,argv,"logsum")) != NULL ) {
    out->calcfunc = stringalloc(temp);
  } else {
    out->calcfunc = stringalloc("Probability_logsum");
  }
  out->dycw = new_DycWarning_from_argstr(argc,argv);

  /*  fprintf(stderr,"And %d is extern warning",out->dycw->warn_extern);*/
  return out;
}
Ejemplo n.º 4
0
DPRunImpl * new_DPRunImpl_from_argv(int * argc,char ** argv)
{
  DPRunImpl * out;
  char * temp;

  out = DPRunImpl_alloc();

  if( (temp = strip_out_assigned_argument(argc,argv,"dymem")) != NULL ) {
    if( strcmp(temp,"explicit") == 0) {
      out->memory = DPIM_Explicit;
    } else if( strcmp(temp,"linear") == 0 ) {
      out->memory = DPIM_Linear;
    } else if( strcmp(temp,"default") == 0 ) {
      out->memory = DPIM_Default;
    } else {
      warn("String [%s] for dynamic memory layout is not recognised",temp);
      free_DPRunImpl(out);
      return NULL;
    }
  }

  if( (temp = strip_out_assigned_argument(argc,argv,"kbyte")) != NULL ) {
    if( is_integer_string(temp,&out->kbyte_size) == FALSE ) {
      warn("String [%s] for dynamic memory size is not recognised",temp);
      free_DPRunImpl(out);
      return NULL;
    }
  }


  if(strip_out_boolean_argument(argc,argv,"dydebug") == TRUE ) {
    out->debug  = 1;
    out->memory = DPIM_Explicit;
  }

  return out;
}
Ejemplo n.º 5
0
Gene * read_EMBL_feature_Gene(char * buffer,int maxlen,FILE * ifp)
{
  Gene * gene;
  Transcript * tr;
  Translation * ts;
  Exon * exon;

  char * runner;
  char * base;
  char * next;
  int i;
  int exon_start[MAX_EMBL_EXON_PARSE];
  int exon_end[MAX_EMBL_EXON_PARSE];
  int number;
  int exon_no = 0;
  int isstart = 1;
  int is_complement = 0;
  int is_cds = 0;
  int break_at_end = 0;

  if( strstartcmp(buffer,"FT") != 0 ) {
    warn("passed in a bad line [%s] to be used for feature table parsing",buffer);
    return NULL;
  }

  if( (runner=strtok(buffer+2,spacestr)) == NULL ) {
    warn("Bad embl feature line [%s]",buffer);
    return NULL;
  }

  if( strcmp(runner,"CDS") != 0 && strcmp(runner,"mRNA") != 0 ) {
    warn("passed in a feature line to read_EMBL_feature_Gene with a %s tag. This only handles CDS and mRNA tags",runner);
    return NULL;
  }

  if( strcmp(runner,"CDS") == 0 ) {
    is_cds = TRUE;
  }

  runner = strtok(NULL,spacestr);

  if( runner == NULL ) {
    warn("Bad embl feature line [%s]",buffer);
    return NULL;
  }

  if( strstartcmp(runner,"complement") == 0 ) {
    runner = strchr(runner,'(');
    if( runner == NULL) {
      warn("Could not find bracket on EMBL feature complement line");
      return NULL;
    }
    is_complement = 1;
    runner++;
  }


  if( strstartcmp(runner,"join") == 0 ) {
    runner = strchr(runner,'(');
    runner++;
  } else if( isdigit((int)*runner)  || *runner == '<' ) {
    /** ok - starts with the numbers. We'll cope!**/
  } else {
    warn("Expecting a join statement, got a [%s]",runner);
    return NULL;
  }

  
  /*** ok, now the major number loop ***/

  for(;;) {
    base= runner;
    for(;*runner && *runner != ')' && *runner != '.' && *runner != ',' && *runner != '>' && !isspace((int)*runner);runner++) 
      ;

    /*fprintf(stderr,"Got a runner of %s\n ",runner); */
    if( *runner == '\0' )
      next = runner;
    else next = runner+1;

    if( *runner == ')' ) {
      break_at_end = TRUE; /* out of reading exons */
    }

    
    *runner='\0';
    if( strstartcmp(base,"complement(") == 0 ) {
      is_complement = TRUE;
      for(;*base != '(';base++) 
	;
      base++;
      break_at_end = FALSE; /* we found an bracket too early! */
    }

    if( is_integer_string(base,&number) == FALSE ) {
      warn("Got a non integer [%s] in the middle of a join statement in EMBL parsing",runner);
      return NULL;
    }

    /** put this number away **/

    if( isstart ) {
      exon_start[exon_no] = number;
      isstart = 0;
    } else {
      exon_end[exon_no++] = number;
      isstart = 1;
    }
    if( break_at_end == TRUE)
      break;

    for(runner=next;*runner && (*runner == '.' || isspace((int)*runner));runner++)
      ;

    if( *runner == '\0' ) {
      if( next_feature_tab_line(buffer,maxlen,ifp) == FALSE) {
	warn("In the middle of getting a join statement, got a [%s]. Yuk!",buffer);
	return NULL;
      }

      if( !isdigit((int)buffer[0]) && buffer[0] != '.' && buffer[0] != ',') {
	/*** ok - sometimes people very boring end things in here ***/
	/* warn("In the middle of getting a join statement, got a [%s]. Ugh!",buffer); */
	break;
      }

      runner = buffer;
    }

  }

  if( isstart == 0 ) {
    warn("I have read an uneven number of start-end points in the exon thing. Yuk!");
    return NULL;
  }

  /** runner should now be on bracket **/

  if( is_complement == 1 ) {
    /** ok . should be another bracket. Do we care? **/
  }

  gene = Gene_alloc_len(1);
  tr  = Transcript_alloc_len(exon_no);
  add_Gene(gene,tr);
  tr->parent = gene;

  if( is_complement == 1 ) {
    gene->start = exon_end[exon_no-1]-1;
    gene->end = exon_start[0] -1;

    for(i=exon_no -1;i >= 0;i--) {
      exon = Exon_alloc();
      exon->start = (gene->start+1) - exon_end[i];
      exon->end = (gene->start+1) - exon_start[i] +1;
      add_ex_Transcript(tr,exon);
    }
  } else {
    gene->start = exon_start[0] -1;
    gene->end = exon_end[exon_no-1] -1;

    for(i=0;i<exon_no;i++) {
      exon = Exon_alloc();
      exon->start = exon_start[i] - (gene->start+1);
      exon->end = exon_end[i] - (gene->start+1)+1;
      add_ex_Transcript(tr,exon);
    }
  }

  if( is_cds == TRUE ) {
    ts = Translation_alloc();
    ts->start = 0;
    ts->end = length_Transcript(tr);
    ts->parent = tr;
    add_Transcript(tr,ts);
  }

  /*** read the rest of this feature ***/

  while( next_feature_tab_line(buffer,maxlen,ifp) == TRUE)
    ;

  return gene;

}
Ejemplo n.º 6
0
int main(int argc,char * argv[]) 
{
  FailureType fail = 0 ;
  FailureType should_fail_on = 0;
  int i;
  boolean doinfo = FALSE;
  boolean noaddnumbers = FALSE;
  MethodTypeSet * mts;
  MethodTypeSet * cp;
  boolean no_config_mts = FALSE;
  int prot_level = 0;
  int should_hard_link = 0;
  boolean should_warn_undoc = FALSE;
  char * prot_str;
  char * runner;
  char *config_dir=NULL;
  char buffer[64]; /** really for removing files **/
  char * telegraph;
  
  APIpara api;

  char * pack;
	
  /** we no longer read in configs **/

  mts = standard_dynamite_MethodTypeSet();


  if( strip_out_boolean_argument(&argc,argv,"h") == TRUE 
      || strip_out_boolean_argument(&argc,argv,"u") == TRUE /* arve */
      || argc == 1 ) {
	show_usage(stdout);
	exit(1);
	}

  noaddnumbers   = strip_out_boolean_argument(&argc,argv,"m");
  doinfo        = strip_out_boolean_argument(&argc,argv,"i");
  no_config_mts = strip_out_boolean_argument(&argc,argv,"U");
  should_hard_link = strip_out_boolean_argument(&argc,argv,"l");
  prot_str      = strip_out_assigned_argument(&argc,argv,"P");
  should_warn_undoc = strip_out_boolean_argument(&argc,argv,"D");
  telegraph    = strip_out_assigned_argument(&argc,argv,"tele");

  pack = strip_out_assigned_argument(&argc,argv,"n");

  api.xs_ext = NULL;
  api.typemap_ext = NULL;
  api.pod_ext = NULL;

  api.c_extension_name = strip_out_assigned_argument(&argc,argv,"a");
  api.t_extension_name = strip_out_assigned_argument(&argc,argv,"b");
  api.pfdoc_ext = strip_out_assigned_argument(&argc,argv,"p");
  api.xs_ext = strip_out_assigned_argument(&argc,argv,"x");
  api.typemap_ext = strip_out_assigned_argument(&argc,argv,"tym");
  api.all_callable = strip_out_boolean_argument(&argc,argv,"c");
  api.make_perl = strip_out_boolean_argument(&argc,argv,"perl");
  api.latex_ext = strip_out_assigned_argument(&argc,argv,"exttex");
  api.make_latex = strip_out_boolean_argument(&argc,argv,"latex");


  if( api.make_perl == TRUE) {
   
    if( api.xs_ext == NULL ) {
      api.xs_ext = ".xs";
    }
    if( api.typemap_ext == NULL ) {
      api.typemap_ext = ".typemap";
    }
    if( api.pod_ext == NULL ) {
      api.pod_ext = ".pod";
    }
  }


  if( strip_out_boolean_argument(&argc,argv,"F") == TRUE) {
    should_fail_on = FailureType_dyc_All;
  }

  /* do DPImplementation */

  dpi = new_DPImplementation_from_argstr(&argc,argv);

  if( prot_str != NULL ) {
      if( is_integer_string(prot_str,&prot_level) == FALSE ) {
	  warn("Protection level %s is no integer!");
	  prot_level = 0;
      }
  }

				/* Override/set WISECONFIGDIR on the cmdline. (arve) */
  config_dir = strip_out_assigned_argument(&argc, argv, "I");
  if (config_dir != NULL) {
      set_config_dir(config_dir); 
  }

  if( read_into_MethodTypeSet_filename(mts,"methods") == FALSE){
    warn("You have no config file called 'methods'. This is bad news for dynamite matrices. I will attempt to compile, but you cannot use logical types. 'methods' should be either in the current directory, the $WISECONFIGDIR or your $WISEPERSONALDIR");
  }



  /*** ok,loop over and do it ***/

  if( argc < 1 ) {
    warn("You must have at least one dynamite source file to compile!");
    show_usage(stdout);
    exit(1);
  }

  if( telegraph != NULL ) {
    tele_file= fopen(telegraph,"w");
  }

  
  for(i=1;i<argc;i++) {
    if( mts != NULL) 
      cp = copy_MethodTypeSet(mts); /* actually very cheap */
    if( do_a_file(argv[i],mts,FALSE,prot_level,should_hard_link,should_warn_undoc,noaddnumbers == TRUE ? FALSE : TRUE,pack,&api,&fail) == FALSE ) {
      
      fatal("Terminated dyc one %d argument %s",i,argv[i]);
    }
    if( (should_fail_on == 01 && fail != 0) || (fail & should_fail_on) ) {

      /*** remove files which fail ****/

      /*** ugh this should be done better ***/

      for(runner=argv[i]+strlen(argv[i]) - 1;runner > argv[i] && *runner != '.';runner--)
	;
      if( runner != argv[i] ) {
	*runner = '\0';
	sprintf(buffer,"%s.c",argv[i]);
	if( remove_file(buffer) == FALSE ) {
	  warn("Could not remove file %s from filesystem",buffer);
	}
	sprintf(buffer,"%s.h",argv[i]);
	if( remove_file(buffer) == FALSE ) {
	  warn("Could not remove file %s from filesystem",buffer);
	}
      }
      /* else - well - something bad has happened */
      
      fatal("Failed on file %s due to user defined fails",argv[i]);
    }

    if( mts != NULL ) {
      free_MethodTypeSet(mts);
      mts = cp;
    }
  }

  free_MethodTypeSet(mts);

  return 0;
}  
Ejemplo n.º 7
0
boolean build_objects(void)
{
  boolean ret = TRUE;
  Protein * pro_temp;
  Genomic * gen_temp;
  FILE * ifp;





  startend = threestatemodel_mode_from_string(startend_string);
  if( startend == TSM_unknown ) {
    warn("String %s was unable to converted into a start/end policy\n",startend_string);
    ret = FALSE;
  }


  if( tstart_str != NULL ) {
    if( is_integer_string(tstart_str,&tstart) == FALSE || tstart < 0) {
      warn("Could not make %s out as target start",tstart);
      ret = FALSE;
    }
  }

  if( tend_str != NULL ) {
    if( is_integer_string(tend_str,&tend) == FALSE || tend < 0) {
      warn("Could not make %s out as target end",tend);
      ret = FALSE;
    }
  }

  if( is_integer_string(gap_str,&gap) == FALSE ) {
      warn("Could not make %s out as gap penalty (must be integer at the moment)",gap_str);
      ret = FALSE;
  }
  

  if( is_integer_string(ext_str,&ext) == FALSE ) {
    warn("Could not make %s out as gap penalty (must be integer at the moment)",ext_str);
    ret = FALSE;
  }

  if( is_embl == FALSE ) {
    if( (gen = read_fasta_file_Genomic(dna_seq_file,length_of_N)) == NULL ) {
      ret = FALSE;
      warn("Could not read genomic sequence in %s",dna_seq_file);
      gen = NULL;
    } 
  } else {
    embl = read_EMBL_GenomicRegion_file(dna_seq_file);
    if( embl == NULL ) {
      warn("Could not read genomic EMBL file in %s",dna_seq_file);
      gen = NULL;
      ret = FALSE;
    } else {
      gen = hard_link_Genomic(embl->genomic);
      
    }
  }

  if( gen != NULL ) {

    if( tstart != -1 || tend != -1 ) {
      if( tstart == -1 )
	tstart = 0;
      if( tend == -1 ) 
	tend = gen->baseseq->len;
      gen_temp = truncate_Genomic(gen,tstart-1,tend);
      if( gen_temp == NULL ){
	ret = FALSE;
      } else {
	free_Genomic(gen);
	gen = gen_temp;
      }
    } else {
      /* no truncation required */
    }
  

    if( reverse == TRUE ) {
      if( tstart > tend ) {
	warn("You have already reversed the DNA by using %d - %d truncation. Re-reversing",tstart,tend);
    }
      
      gen_temp = reverse_complement_Genomic(gen); 
      free_Genomic(gen);
      gen = gen_temp;
    }
  }

  /*
   * Can't truncate on GenomicRegion (for good reasons!).
   * but we want only a section of the EMBL file to be used
   * 
   * So... swap genomic now. Positions in EMBL are still valid,
   * however - some genes will loose their sequence, which will be damaging. ;)
   */

  
  if( is_embl ) {
    free_Genomic(embl->genomic);
    embl->genomic = hard_link_Genomic(gen); /* pointer could be dead anyway ;) */
  }


  if( target_abs == TRUE ) {
    if( is_embl == TRUE ) {
      warn("Sorry you can't both use absolute positioning and EMBL files as I can't cope with all the coordinate remapping. You'll have to convert to fasta.");
      ret =  FALSE;
    }

    gen->baseseq->offset = 1;
    gen->baseseq->end  = strlen(gen->baseseq->seq);
  }

  if( alg_str != NULL ) {
    alg = gwrap_alg_type_from_string(alg_str);
  } else {
    if( use_tsm == TRUE ) {
      alg_str = "623L";
    } else {
      alg_str = "623";
    }
    alg = gwrap_alg_type_from_string(alg_str);
  }
      

  if( qstart_str != NULL ) {
    if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) {
      warn("Could not make %s out as query start",qstart);
      ret = FALSE;
    }
  }

  if( qend_str != NULL ) {
    if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) {
      warn("Could not make %s out as query end",qend);
      ret = FALSE;
    }
  }


  if( use_tsm == FALSE ) {
    if( startend != TSM_default && startend != TSM_global && startend != TSM_local && startend != TSM_endbiased) {
      warn("Proteins can only have local/global/endbias startend policies set, not %s",startend_string);
      ret = FALSE;
    }
    if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) {
      ret = FALSE;
      warn("Could not read Protein sequence in %s",protein_file);
    } else {
      
      if( qstart != -1 || qend != -1 ) {
	if( qstart == -1 )
	  qstart = 0;
	if( qend == -1 ) 
	  qend = pro->baseseq->len;
	
	pro_temp = truncate_Protein(pro,qstart-1,qend);
	if( pro_temp == NULL ){
	  ret = FALSE;
	} else {
	  free_Protein(pro);
	  pro = pro_temp;
	}
      }
    }
  } else {
    /** using a HMM **/
    
    /*tsm = read_HMMer_1_7_ascii_file(hmm_file);*/
    /*tsm = Wise2_read_ThreeStateModel_from_hmmer1_file(hmm_file);*/
    tsm = HMMer2_read_ThreeStateModel(hmm_file);
    
    
      if( tsm == NULL ) {
	warn("Could not read hmm from %s\n",hmm_file);
	ret = FALSE;
      }  else {
	
	display_char_in_ThreeStateModel(tsm);
	if( hmm_name != NULL ) {
	  if( tsm->name != NULL ) 
	    ckfree(tsm->name);
	  tsm->name = stringalloc(hmm_name);
	}
	
	if( tsm == NULL ) {
	  warn("Could not read %s as a hmm",hmm_file);
	}
	
	/** have to set start/end **/
	set_startend_policy_ThreeStateModel(tsm,startend,30,0.1);
	
      }
  } /* end of else tsm != NULL */
  

  
  if( main_block_str != NULL ) {
    if( is_integer_string(main_block_str,&main_block) == FALSE ) {
      warn("Could not get maximum main_block number %s",main_block_str);
      ret = FALSE;
    }
  }
   


  if( is_double_string(subs_string,&subs_error) == FALSE ) {
    warn("Could not convert %s to a double",subs_error);
    ret = FALSE;
  }

  if( is_double_string(indel_string,&indel_error) == FALSE ) {
    warn("Could not convert %s to a double",indel_error);
    ret = FALSE;
  }

  if( is_double_string(allN_string,&allN) == FALSE ) {
    warn("Could not convert %s to a double",allN_string);
    ret = FALSE;
  }

  
  if( strcmp(cfreq_string,"model") == 0 ) {
    model_codon = TRUE;
  } else if ( strcmp(cfreq_string,"flat") == 0 ) {
    model_codon = FALSE;
  } else {
    warn("Cannot interpret [%s] as a codon modelling parameter\n",cfreq_string);
    ret = FALSE;
  }
  

  if( strcmp(splice_string,"model") == 0 ) {
    model_splice = TRUE;
  } else if ( strcmp(splice_string,"flat") == 0 ) {
    model_splice = FALSE;
    gmp->use_gtag_splice = TRUE;
  } else {
    warn("Cannot interpret [%s] as a splice modelling parameter\n",splice_string);
    ret = FALSE;
  }

  if( strcmp(null_string,"syn") == 0 ) {
    use_syn = TRUE;
  } else if ( strcmp(null_string,"flat") == 0 ) {
    use_syn = FALSE;
  } else {
    warn("Cannot interpret [%s] as a null model string\n",null_string);
    ret = FALSE;
  }

  if( strcmp(intron_string,"model") == 0 ) {
    use_tied_model = FALSE;
  } else if ( strcmp(intron_string,"tied") == 0 ) {
    use_tied_model = TRUE;
  } else {
    warn("Cannot interpret [%s] as a intron tieing switch\n",intron_string);
    ret = FALSE;
  }



  if( (rm = default_RandomModel()) == NULL) {
    warn("Could not make default random model\n");
    ret = FALSE;
  }

  if( use_new_stats == 0 ) {
    if( (gf = read_GeneFrequency21_file(gene_file)) == NULL) {
      ret = FALSE;
      warn("Could not read a GeneFrequency file in %s",gene_file);
    }
  } else {
    if( (gs = GeneStats_from_GeneModelParam(gmp)) == NULL ){
      ret=FALSE;
      warn("Could not read gene statistics in %s",new_gene_file);
    }
  } /* end of else using new gene stats */


  if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) {
    if( use_tsm == TRUE ) {
      info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file);
    } else {
      warn("Could not read Comparison matrix file in %s",matrix_file);
      ret = FALSE;
    }
  }

  if( (ct = read_CodonTable_file(codon_file)) == NULL) {
    ret = FALSE;
    warn("Could not read codon table file in %s",codon_file);
  }

  if( (ofp = openfile(output_file,"W")) ==  NULL) {
    warn("Could not open %s as an output file",output_file);
    ret = FALSE;
  }

  rmd = RandomModelDNA_std();
  return ret;

}
Ejemplo n.º 8
0
GeneStats * read_GeneStats(FILE * ifp)
{
  char buffer[MAXLINE];
  GeneStats * out;
  SeqAlign * temp;
  char **base;
  char **brk;

  out = GeneStats_alloc();
  out->rnd = NULL;

  while( fgets(buffer,MAXLINE,ifp) != NULL ) {
    /*    fprintf(stderr,"Reading (main loop) %s",buffer); */
    if( buffer[0] == '#' )
      continue;
    
    if( buffer[0] == '%' && buffer[1] == '%' )
      break;
    
    if( strstartcmp(buffer,"splice5") == 0 ) {
      base = brk = breakstring(buffer,spacestr);
      if( *brk == NULL || *(brk+1) == NULL || is_integer_string(*(brk+1),&out->splice5_offset) == 0) {
	warn("Cannot read splice5 offset - must be splice5 <number>");
	return NULL;
      }
      ckfree(base);
      temp = read_selex_SeqAlign(ifp);
      if( temp == NULL ) {
	warn("Could not read in selex alignment for splice5");
	continue;
      }

      out->splice5 = temp;
      continue;
    }

    if( strstartcmp(buffer,"splice3") == 0 ) {

      base = brk = breakstring(buffer,spacestr);
      if( *brk == NULL || *(brk+1) == NULL || is_integer_string(*(brk+1),&out->splice3_offset) == 0) {
	warn("Cannot read splice3 offset - must be splice3 <number>");
	return NULL;
      }
      ckfree(base);

      temp = read_selex_SeqAlign(ifp);
      if( temp == NULL ) {
	warn("Could not read in selex alignment for splice5");
	continue;
      }

      out->splice3 = temp;

      continue;
    }
    
    
    if( strstartcmp(buffer,"intron_emission") == 0 ) {
      if( fgets(buffer,MAXLINE,ifp) == NULL ) {
	warn("Could not read in intron emission line");
	break;
      }
      out->intron = get_genestat_emission(buffer);
      if( fgets(buffer,MAXLINE,ifp) != NULL ) {
	continue;
      } else {
	break;
      }
    }

    if( strstartcmp(buffer,"polyp_emission") == 0 ) {
      if( fgets(buffer,MAXLINE,ifp) == NULL ) {
	warn("Could not read in polyp emission line");
	break;
      }
      out->polyp = get_genestat_emission(buffer);
      if( fgets(buffer,MAXLINE,ifp) != NULL ) {
	continue;
      } else {
	break;
      }
    }

    if( strstartcmp(buffer,"rnd_emission") == 0 ) {
      if( fgets(buffer,MAXLINE,ifp) == NULL ) {
	warn("Could not read in rnd emission line");
	break;
      }
      out->rnd = get_genestat_emission(buffer);
      if( fgets(buffer,MAXLINE,ifp) != NULL ) {
	continue;
      } else {
	break;
      }
    }

    if( strstartcmp(buffer,"rndcodon") == 0 ) {
      if( read_codon_GeneStats(out->codon,buffer,ifp) == FALSE ) {
	warn("Problem in reading codon line!");
      }
      continue;
    }

    if( isalpha(buffer[0]) ) { 
      warn("Could not read line %s in genestats reading\n",buffer);
    }
  
  }

  
  assert(out);
  assert(out->splice5);
  assert(out->splice3);
	
  return out;
}
Ejemplo n.º 9
0
boolean build_objects(void)
{
  boolean ret = TRUE;
  Protein * pro_temp;
  SequenceDB * psdb;



  startend = threestatemodel_mode_from_string(startend_string);
  if( startend == TSM_unknown ) {
    warn("String %s was unable to converted into a start/end policy\n",startend_string);
    ret = FALSE;
  }

  if( use_single_dna == TRUE ) {
    cdna = read_fasta_file_cDNA(dna_seq_file);
    if( cdna == NULL ) {
      warn("Could not open single dna sequence in %s",dna_seq_file);
      ret = FALSE;
    }
  } else {
    sdb = single_fasta_SequenceDB(dna_seq_file);
    
 
    if( sdb == NULL ) {
      warn("Could not build a sequence database on %s",dna_seq_file);
      ret = FALSE;
    }
  }

  rm = default_RandomModel();


  if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) {
    if( use_tsm == TRUE ) {
      info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file);
    } else {
      warn("Could not read Comparison matrix file in %s",matrix_file);
      ret = FALSE;
    }
  }
      
  if( is_integer_string(gap_str,&gap) == FALSE ) {
    warn("Could not get gap string number %s",gap_str);
    ret = FALSE;
  }

  if( is_integer_string(ext_str,&ext) == FALSE ) {
    warn("Could not get ext string number %s",ext_str);
    ret = FALSE;
  }

  if( qstart_str != NULL ) {
    if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) {
      warn("Could not make %s out as query start",qstart);
      ret = FALSE;
    }
  }

  if( qend_str != NULL ) {
    if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) {
      warn("Could not make %s out as query end",qend);
      ret = FALSE;
    }
  }


  if( aln_number_str != NULL ) {
    if( is_integer_string(aln_number_str,&aln_number) == FALSE || aln_number < 0) {
      warn("Weird aln number string %s...\n",aln_number_str);
      ret = FALSE;
    }
  }

  if( report_str != NULL ) {
    if( is_integer_string(report_str,&report_stagger) == FALSE ) {
      warn("Weird report stagger asked for %s",report_str);
      ret = FALSE;
    }
  }


  if( use_pfam1 == TRUE ) {
    tsmdb = new_PfamHmmer1DB_ThreeStateDB(protein_file);
    if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) {
      warn("Unable to set global/local switch on threestatedb");
      ret = FALSE;
    }

  } else if ( use_pfam2 == TRUE ) {
    tsmdb = HMMer2_ThreeStateDB(protein_file);
    if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) {
      warn("Unable to set global/local switch on threestatedb");
      ret = FALSE;
    }

  } else if ( use_tsm == TRUE) {
    /** using a HMM **/

    tsm = HMMer2_read_ThreeStateModel(protein_file);

    if( tsm == NULL ) {
      warn("Could not read hmm from %s\n",protein_file);
      ret = FALSE;
    }  else {

      display_char_in_ThreeStateModel(tsm);
      if( hmm_name != NULL ) {
	if( tsm->name != NULL ) 
	  ckfree(tsm->name);
	tsm->name = stringalloc(hmm_name);
      } else {
	if( tsm->name == NULL ) {
	  tsm->name = stringalloc(protein_file);
	}
      }

      
      
      /** have to set start/end **/

      set_startend_policy_ThreeStateModel(tsm,startend,15,0.2);
      tsmdb = new_single_ThreeStateDB(tsm,rm);
      if( tsmdb == NULL ) {
	warn("Could not build a threestatemodel database from a single tsm. Weird!");
	ret = FALSE;
      }
    } /* end of else tsm != NULL */
  } /* end of else is tsm */
  else if( use_single_pro ) {


    if( startend != TSM_default && startend != TSM_global && startend != TSM_local ) {
      warn("Proteins can only have local/global startend policies set, not %s",startend_string);
      ret = FALSE;
    }

    if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) {
      ret = FALSE;
      warn("Could not read Protein sequence in %s",protein_file);
    } else {
      if( qstart != -1 || qend != -1 ) {
	if( qstart == -1 )
	  qstart = 0;
	if( qend == -1 ) 
	  qend = pro->baseseq->len;

	pro_temp = truncate_Protein(pro,qstart-1,qend);
	if( pro_temp == NULL ){
	  ret = FALSE;
	} else {
	  free_Protein(pro);
	  pro = pro_temp;
	}
      }


      if( startend == TSM_global) 
	tsm = global_ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext);
      else
	tsm = ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext);

      if( tsm == NULL ) {
	warn("Could not build ThreeStateModel from a single protein sequence...");
	ret = FALSE; 
      } else {
	tsmdb = new_single_ThreeStateDB(tsm,rm);
	if( tsmdb == NULL ) {
	  warn("Could not build a threestatemodel database from a single tsm. Weird!");
	  ret = FALSE;
	}
      } /* end of could build a TSM */
    } /* else is a real protein */  

  } /* end of else is single protein */
  else if (use_db_pro == TRUE ) {
    psdb = single_fasta_SequenceDB(protein_file);
    tsmdb = new_proteindb_ThreeStateDB(psdb,mat,-gap,-ext);
    free_SequenceDB(psdb);
  }
  else {
    warn("No protein input file! Yikes!");
  }

  /***
  if( use_tsm == FALSE ) {
  } else {
  ****/


  if( main_block_str != NULL ) {
    if( is_integer_string(main_block_str,&main_block) == FALSE ) {
      warn("Could not get maximum main_block number %s",main_block_str);
      ret = FALSE;
    }
  }


  if( evalue_search_str != NULL && is_double_string(evalue_search_str,&evalue_search_cutoff) == FALSE ) {
    warn("Could not convert %s to a double",evalue_search_str);
    ret = FALSE;
  }
  
  if( is_double_string(search_cutoff_str,&search_cutoff) == FALSE ) {
    warn("Could not convert %s to a double",search_cutoff_str);
    ret = FALSE;
  }


  if( is_double_string(subs_string,&subs_error) == FALSE ) {
    warn("Could not convert %s to a double",subs_error);
    ret = FALSE;
  }

  if( is_double_string(indel_string,&indel_error) == FALSE ) {
    warn("Could not convert %s to a double",indel_error);
    ret = FALSE;
  }


  if( is_double_string(allN_string,&allN) == FALSE ) {
    warn("Could not convert %s to a double",allN_string);
    ret = FALSE;
  }
  


  if( strcmp(null_string,"syn") == 0 ) {
    use_syn = TRUE;
  } else if ( strcmp(null_string,"flat") == 0 ) {
    use_syn = FALSE;
  } else {
    warn("Cannot interpret [%s] as a null model string\n",null_string);
    ret = FALSE;
  }

   
  if( alg_str != NULL ) {
    alg = alg_estwrap_from_string(alg_str);
  } else {
    alg_str = "312";
    alg = alg_estwrap_from_string(alg_str);
  }

  if( aln_alg_str != NULL ) {
    aln_alg = alg_estwrap_from_string(aln_alg_str);
  } else {
    /* if it is a protein, don't loop */
    if( use_single_pro == TRUE || use_db_pro == TRUE ) 
      aln_alg_str = "333";
    else 
      aln_alg_str = "333L";
    aln_alg = alg_estwrap_from_string(aln_alg_str);
  }


  if( (rm = default_RandomModel()) == NULL) {
    warn("Could not make default random model\n");
    ret = FALSE;
  }

  if( (ct = read_CodonTable_file(codon_file)) == NULL) {
    ret = FALSE;
    warn("Could not read codon table file in %s",codon_file);
  }

  if( (ofp = openfile(output_file,"W")) ==  NULL) {
    warn("Could not open %s as an output file",output_file);
    ret = FALSE;
  }

  rmd = RandomModelDNA_std();


  cps = flat_cDNAParser(indel_error);
  cm = flat_CodonMapper(ct);
  sprinkle_errors_over_CodonMapper(cm,subs_error);

  return ret;

}
Ejemplo n.º 10
0
boolean user_DebugMatrix(DebugMatrix * de)
{
  char buffer[MAXLINE];
  char ** base;
  char ** brk;
  FILE * in;
  FILE * out;

  DebugBreakPoint * bp;

  assert(de);
  assert(de->in);
  assert(de->out);

  in  = de->in;
  out = de->out;

  /* set reset to FALSE */
  de->reset = 0;

  fprintf(out,"Entering dynamite debugger. Type help for help\n");

  while(1) {
    fprintf(out,"Dy %5d:%5d max: %d >",de->currenti,de->currentj,de->max_score);
    
    fgets(buffer,MAXLINE,in);
    
    if( strstartcmp(buffer,"quit") == 0 ) {
      exit(1);
    }

    if( strstartcmp(buffer,"show") == 0 ) {
      show_DebugMatrix(de,buffer);
      continue;
    }
    
    if( strstartcmp(buffer,"run") == 0 ) {
      /* return to calling function */
      return 0;
    }

    if( strstartcmp(buffer,"break") == 0 ) {
      base = brk = breakstring(buffer,spacestr);
      brk++;
      
      if( *brk == NULL || *(brk+1) == NULL ) {
	fprintf(out,">>>> break must have i j positions\n");
	continue;
      } else {
	bp = DebugBreakPoint_alloc();
	bp->type = MDBP_Cursor;
	/* reset cursor positions */
	if( is_integer_string(*brk,&bp->posi) == FALSE ) {
	  fprintf(out,">>>> i position not an integer.\n");
	} 
	brk++;
	if( is_integer_string(*brk,&bp->posj) == FALSE ) {
	  fprintf(out,">>>> j position not an integer.\n");
	} 	
	fprintf(out,"Adding cursor break point %d,%d\n",bp->posi,bp->posj);
	add_bp_DebugMatrix(de,bp);
      }
      ckfree(base);
      continue;
    }

    if( strstartcmp(buffer,"set") == 0 ) {
      base = brk = breakstring(buffer,spacestr);
      brk++;
      if( *brk == NULL || *(brk+1) == NULL ) {
	fprintf(out,">>>> set must have i j positions\n");
      } else {
	/* reset cursor positions */
	if( is_integer_string(*brk,&de->currenti) == FALSE ) {
	  fprintf(out,">>>> i position not an integer.\n");
	} 
	if( is_integer_string(*brk,&de->currenti) == FALSE ) {
	  fprintf(out,">>>> j position not an integer.\n");
	} 	
      }
      de->reset = 1;
      ckfree(base);
      continue;
    }

  }

}