示例#1
0
GeneModel * GeneModel_from_GeneModelParam(GeneModelParam * p)
{
  GeneStats * st;
  GeneModel * out;

  assert(p);

  st = GeneStats_from_GeneModelParam(p);

  out = GeneModel_from_GeneStats(st,p);

  free_GeneStats(st);

  return out;
}
示例#2
0
int main(int argc,char **argv)
{
  int i;
  AlignGeneModelParam * agmp;
  GeneStats * gs;
  GeneModelParam * gmp = NULL;
  CompProb * comp_prob;
  DnaProbMatrix * dm;
  CodonTable * ct;
  RandomModel * rm;

  Sequence * test;

  ct = read_CodonTable_file("codon.table");

  rm = default_RandomModel();

  comp_prob = read_Blast_file_CompProb("wag85");

  gmp = new_GeneModelParam_from_argv(&argc,argv);

  dm = DnaProbMatrix_from_match(0.8,NMaskType_VARIABLE);
  
  if((gs=GeneStats_from_GeneModelParam(gmp)) == NULL ) {
    fatal("Could not build gene stats");
  }
  
  agmp = std_AlignGeneModelParam(comp_prob,dm,ct,gs);

  test = read_fasta_file_Sequence(argv[1]);
  
  assert(test);
  
  for(i=0;i<test->len;i++) {
    fprintf(stdout,"%c ss5 %.6f ss3 %.6f\n",test->seq[i],prob_SpliceSiteProb(agmp->ss5,test,i),prob_SpliceSiteProb(agmp->ss3,test,i));
  }


}
示例#3
0
boolean build_objects(void)
{
  boolean ret = TRUE;
  Protein * pro_temp;
  Genomic * gen_temp;
  FILE * ifp;





  startend = threestatemodel_mode_from_string(startend_string);
  if( startend == TSM_unknown ) {
    warn("String %s was unable to converted into a start/end policy\n",startend_string);
    ret = FALSE;
  }


  if( tstart_str != NULL ) {
    if( is_integer_string(tstart_str,&tstart) == FALSE || tstart < 0) {
      warn("Could not make %s out as target start",tstart);
      ret = FALSE;
    }
  }

  if( tend_str != NULL ) {
    if( is_integer_string(tend_str,&tend) == FALSE || tend < 0) {
      warn("Could not make %s out as target end",tend);
      ret = FALSE;
    }
  }

  if( is_integer_string(gap_str,&gap) == FALSE ) {
      warn("Could not make %s out as gap penalty (must be integer at the moment)",gap_str);
      ret = FALSE;
  }
  

  if( is_integer_string(ext_str,&ext) == FALSE ) {
    warn("Could not make %s out as gap penalty (must be integer at the moment)",ext_str);
    ret = FALSE;
  }

  if( is_embl == FALSE ) {
    if( (gen = read_fasta_file_Genomic(dna_seq_file,length_of_N)) == NULL ) {
      ret = FALSE;
      warn("Could not read genomic sequence in %s",dna_seq_file);
      gen = NULL;
    } 
  } else {
    embl = read_EMBL_GenomicRegion_file(dna_seq_file);
    if( embl == NULL ) {
      warn("Could not read genomic EMBL file in %s",dna_seq_file);
      gen = NULL;
      ret = FALSE;
    } else {
      gen = hard_link_Genomic(embl->genomic);
      
    }
  }

  if( gen != NULL ) {

    if( tstart != -1 || tend != -1 ) {
      if( tstart == -1 )
	tstart = 0;
      if( tend == -1 ) 
	tend = gen->baseseq->len;
      gen_temp = truncate_Genomic(gen,tstart-1,tend);
      if( gen_temp == NULL ){
	ret = FALSE;
      } else {
	free_Genomic(gen);
	gen = gen_temp;
      }
    } else {
      /* no truncation required */
    }
  

    if( reverse == TRUE ) {
      if( tstart > tend ) {
	warn("You have already reversed the DNA by using %d - %d truncation. Re-reversing",tstart,tend);
    }
      
      gen_temp = reverse_complement_Genomic(gen); 
      free_Genomic(gen);
      gen = gen_temp;
    }
  }

  /*
   * Can't truncate on GenomicRegion (for good reasons!).
   * but we want only a section of the EMBL file to be used
   * 
   * So... swap genomic now. Positions in EMBL are still valid,
   * however - some genes will loose their sequence, which will be damaging. ;)
   */

  
  if( is_embl ) {
    free_Genomic(embl->genomic);
    embl->genomic = hard_link_Genomic(gen); /* pointer could be dead anyway ;) */
  }


  if( target_abs == TRUE ) {
    if( is_embl == TRUE ) {
      warn("Sorry you can't both use absolute positioning and EMBL files as I can't cope with all the coordinate remapping. You'll have to convert to fasta.");
      ret =  FALSE;
    }

    gen->baseseq->offset = 1;
    gen->baseseq->end  = strlen(gen->baseseq->seq);
  }

  if( alg_str != NULL ) {
    alg = gwrap_alg_type_from_string(alg_str);
  } else {
    if( use_tsm == TRUE ) {
      alg_str = "623L";
    } else {
      alg_str = "623";
    }
    alg = gwrap_alg_type_from_string(alg_str);
  }
      

  if( qstart_str != NULL ) {
    if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) {
      warn("Could not make %s out as query start",qstart);
      ret = FALSE;
    }
  }

  if( qend_str != NULL ) {
    if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) {
      warn("Could not make %s out as query end",qend);
      ret = FALSE;
    }
  }


  if( use_tsm == FALSE ) {
    if( startend != TSM_default && startend != TSM_global && startend != TSM_local && startend != TSM_endbiased) {
      warn("Proteins can only have local/global/endbias startend policies set, not %s",startend_string);
      ret = FALSE;
    }
    if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) {
      ret = FALSE;
      warn("Could not read Protein sequence in %s",protein_file);
    } else {
      
      if( qstart != -1 || qend != -1 ) {
	if( qstart == -1 )
	  qstart = 0;
	if( qend == -1 ) 
	  qend = pro->baseseq->len;
	
	pro_temp = truncate_Protein(pro,qstart-1,qend);
	if( pro_temp == NULL ){
	  ret = FALSE;
	} else {
	  free_Protein(pro);
	  pro = pro_temp;
	}
      }
    }
  } else {
    /** using a HMM **/
    
    /*tsm = read_HMMer_1_7_ascii_file(hmm_file);*/
    /*tsm = Wise2_read_ThreeStateModel_from_hmmer1_file(hmm_file);*/
    tsm = HMMer2_read_ThreeStateModel(hmm_file);
    
    
      if( tsm == NULL ) {
	warn("Could not read hmm from %s\n",hmm_file);
	ret = FALSE;
      }  else {
	
	display_char_in_ThreeStateModel(tsm);
	if( hmm_name != NULL ) {
	  if( tsm->name != NULL ) 
	    ckfree(tsm->name);
	  tsm->name = stringalloc(hmm_name);
	}
	
	if( tsm == NULL ) {
	  warn("Could not read %s as a hmm",hmm_file);
	}
	
	/** have to set start/end **/
	set_startend_policy_ThreeStateModel(tsm,startend,30,0.1);
	
      }
  } /* end of else tsm != NULL */
  

  
  if( main_block_str != NULL ) {
    if( is_integer_string(main_block_str,&main_block) == FALSE ) {
      warn("Could not get maximum main_block number %s",main_block_str);
      ret = FALSE;
    }
  }
   


  if( is_double_string(subs_string,&subs_error) == FALSE ) {
    warn("Could not convert %s to a double",subs_error);
    ret = FALSE;
  }

  if( is_double_string(indel_string,&indel_error) == FALSE ) {
    warn("Could not convert %s to a double",indel_error);
    ret = FALSE;
  }

  if( is_double_string(allN_string,&allN) == FALSE ) {
    warn("Could not convert %s to a double",allN_string);
    ret = FALSE;
  }

  
  if( strcmp(cfreq_string,"model") == 0 ) {
    model_codon = TRUE;
  } else if ( strcmp(cfreq_string,"flat") == 0 ) {
    model_codon = FALSE;
  } else {
    warn("Cannot interpret [%s] as a codon modelling parameter\n",cfreq_string);
    ret = FALSE;
  }
  

  if( strcmp(splice_string,"model") == 0 ) {
    model_splice = TRUE;
  } else if ( strcmp(splice_string,"flat") == 0 ) {
    model_splice = FALSE;
    gmp->use_gtag_splice = TRUE;
  } else {
    warn("Cannot interpret [%s] as a splice modelling parameter\n",splice_string);
    ret = FALSE;
  }

  if( strcmp(null_string,"syn") == 0 ) {
    use_syn = TRUE;
  } else if ( strcmp(null_string,"flat") == 0 ) {
    use_syn = FALSE;
  } else {
    warn("Cannot interpret [%s] as a null model string\n",null_string);
    ret = FALSE;
  }

  if( strcmp(intron_string,"model") == 0 ) {
    use_tied_model = FALSE;
  } else if ( strcmp(intron_string,"tied") == 0 ) {
    use_tied_model = TRUE;
  } else {
    warn("Cannot interpret [%s] as a intron tieing switch\n",intron_string);
    ret = FALSE;
  }



  if( (rm = default_RandomModel()) == NULL) {
    warn("Could not make default random model\n");
    ret = FALSE;
  }

  if( use_new_stats == 0 ) {
    if( (gf = read_GeneFrequency21_file(gene_file)) == NULL) {
      ret = FALSE;
      warn("Could not read a GeneFrequency file in %s",gene_file);
    }
  } else {
    if( (gs = GeneStats_from_GeneModelParam(gmp)) == NULL ){
      ret=FALSE;
      warn("Could not read gene statistics in %s",new_gene_file);
    }
  } /* end of else using new gene stats */


  if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) {
    if( use_tsm == TRUE ) {
      info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file);
    } else {
      warn("Could not read Comparison matrix file in %s",matrix_file);
      ret = FALSE;
    }
  }

  if( (ct = read_CodonTable_file(codon_file)) == NULL) {
    ret = FALSE;
    warn("Could not read codon table file in %s",codon_file);
  }

  if( (ofp = openfile(output_file,"W")) ==  NULL) {
    warn("Could not open %s as an output file",output_file);
    ret = FALSE;
  }

  rmd = RandomModelDNA_std();
  return ret;

}