Пример #1
ThreeStateScore * reload_ThreeStateDB(ThreeStateScore * prev,ThreeStateDB * mdb,int * return_status)
  ThreeStateModel * tsm;
  ThreeStateScore * tss;


  if( mdb->dbtype == TSMDB_SINGLE ) {
    *return_status = DB_RETURN_END;
    return NULL;

  tsm = read_TSM_ThreeStateDB(mdb,return_status);
  if( *return_status != DB_RETURN_OK) 
    return NULL;



  tss = ThreeStateScore_from_ThreeStateModel(tsm);


  *return_status = DB_RETURN_OK;

  return tss;
Пример #2
ThreeStateScore * init_ThreeStateDB(ThreeStateDB * mdb,int * return_status)
  ThreeStateModel * tsm;
  ThreeStateScore * tss;

  if( open_ThreeStateDB(mdb) == FALSE) {
    warn("Could not open ThreeStateDB, hence could not init it!");
    *return_status = DB_RETURN_ERROR;
    return NULL;

  tsm = read_TSM_ThreeStateDB(mdb,return_status);
  if( *return_status == DB_RETURN_ERROR) 
    return NULL;



  tss = ThreeStateScore_from_ThreeStateModel(tsm);


  *return_status = DB_RETURN_OK;

  return tss;
Пример #3
boolean build_objects(void)
  boolean ret = TRUE;
  Protein * pro_temp;
  Genomic * gen_temp;
  FILE * ifp;

  startend = threestatemodel_mode_from_string(startend_string);
  if( startend == TSM_unknown ) {
    warn("String %s was unable to converted into a start/end policy\n",startend_string);
    ret = FALSE;

  if( tstart_str != NULL ) {
    if( is_integer_string(tstart_str,&tstart) == FALSE || tstart < 0) {
      warn("Could not make %s out as target start",tstart);
      ret = FALSE;

  if( tend_str != NULL ) {
    if( is_integer_string(tend_str,&tend) == FALSE || tend < 0) {
      warn("Could not make %s out as target end",tend);
      ret = FALSE;

  if( is_integer_string(gap_str,&gap) == FALSE ) {
      warn("Could not make %s out as gap penalty (must be integer at the moment)",gap_str);
      ret = FALSE;

  if( is_integer_string(ext_str,&ext) == FALSE ) {
    warn("Could not make %s out as gap penalty (must be integer at the moment)",ext_str);
    ret = FALSE;

  if( is_embl == FALSE ) {
    if( (gen = read_fasta_file_Genomic(dna_seq_file,length_of_N)) == NULL ) {
      ret = FALSE;
      warn("Could not read genomic sequence in %s",dna_seq_file);
      gen = NULL;
  } else {
    embl = read_EMBL_GenomicRegion_file(dna_seq_file);
    if( embl == NULL ) {
      warn("Could not read genomic EMBL file in %s",dna_seq_file);
      gen = NULL;
      ret = FALSE;
    } else {
      gen = hard_link_Genomic(embl->genomic);

  if( gen != NULL ) {

    if( tstart != -1 || tend != -1 ) {
      if( tstart == -1 )
	tstart = 0;
      if( tend == -1 ) 
	tend = gen->baseseq->len;
      gen_temp = truncate_Genomic(gen,tstart-1,tend);
      if( gen_temp == NULL ){
	ret = FALSE;
      } else {
	gen = gen_temp;
    } else {
      /* no truncation required */

    if( reverse == TRUE ) {
      if( tstart > tend ) {
	warn("You have already reversed the DNA by using %d - %d truncation. Re-reversing",tstart,tend);
      gen_temp = reverse_complement_Genomic(gen); 
      gen = gen_temp;

   * Can't truncate on GenomicRegion (for good reasons!).
   * but we want only a section of the EMBL file to be used
   * So... swap genomic now. Positions in EMBL are still valid,
   * however - some genes will loose their sequence, which will be damaging. ;)

  if( is_embl ) {
    embl->genomic = hard_link_Genomic(gen); /* pointer could be dead anyway ;) */

  if( target_abs == TRUE ) {
    if( is_embl == TRUE ) {
      warn("Sorry you can't both use absolute positioning and EMBL files as I can't cope with all the coordinate remapping. You'll have to convert to fasta.");
      ret =  FALSE;

    gen->baseseq->offset = 1;
    gen->baseseq->end  = strlen(gen->baseseq->seq);

  if( alg_str != NULL ) {
    alg = gwrap_alg_type_from_string(alg_str);
  } else {
    if( use_tsm == TRUE ) {
      alg_str = "623L";
    } else {
      alg_str = "623";
    alg = gwrap_alg_type_from_string(alg_str);

  if( qstart_str != NULL ) {
    if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) {
      warn("Could not make %s out as query start",qstart);
      ret = FALSE;

  if( qend_str != NULL ) {
    if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) {
      warn("Could not make %s out as query end",qend);
      ret = FALSE;

  if( use_tsm == FALSE ) {
    if( startend != TSM_default && startend != TSM_global && startend != TSM_local && startend != TSM_endbiased) {
      warn("Proteins can only have local/global/endbias startend policies set, not %s",startend_string);
      ret = FALSE;
    if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) {
      ret = FALSE;
      warn("Could not read Protein sequence in %s",protein_file);
    } else {
      if( qstart != -1 || qend != -1 ) {
	if( qstart == -1 )
	  qstart = 0;
	if( qend == -1 ) 
	  qend = pro->baseseq->len;
	pro_temp = truncate_Protein(pro,qstart-1,qend);
	if( pro_temp == NULL ){
	  ret = FALSE;
	} else {
	  pro = pro_temp;
  } else {
    /** using a HMM **/
    /*tsm = read_HMMer_1_7_ascii_file(hmm_file);*/
    /*tsm = Wise2_read_ThreeStateModel_from_hmmer1_file(hmm_file);*/
    tsm = HMMer2_read_ThreeStateModel(hmm_file);
      if( tsm == NULL ) {
	warn("Could not read hmm from %s\n",hmm_file);
	ret = FALSE;
      }  else {
	if( hmm_name != NULL ) {
	  if( tsm->name != NULL ) 
	  tsm->name = stringalloc(hmm_name);
	if( tsm == NULL ) {
	  warn("Could not read %s as a hmm",hmm_file);
	/** have to set start/end **/
  } /* end of else tsm != NULL */

  if( main_block_str != NULL ) {
    if( is_integer_string(main_block_str,&main_block) == FALSE ) {
      warn("Could not get maximum main_block number %s",main_block_str);
      ret = FALSE;

  if( is_double_string(subs_string,&subs_error) == FALSE ) {
    warn("Could not convert %s to a double",subs_error);
    ret = FALSE;

  if( is_double_string(indel_string,&indel_error) == FALSE ) {
    warn("Could not convert %s to a double",indel_error);
    ret = FALSE;

  if( is_double_string(allN_string,&allN) == FALSE ) {
    warn("Could not convert %s to a double",allN_string);
    ret = FALSE;

  if( strcmp(cfreq_string,"model") == 0 ) {
    model_codon = TRUE;
  } else if ( strcmp(cfreq_string,"flat") == 0 ) {
    model_codon = FALSE;
  } else {
    warn("Cannot interpret [%s] as a codon modelling parameter\n",cfreq_string);
    ret = FALSE;

  if( strcmp(splice_string,"model") == 0 ) {
    model_splice = TRUE;
  } else if ( strcmp(splice_string,"flat") == 0 ) {
    model_splice = FALSE;
    gmp->use_gtag_splice = TRUE;
  } else {
    warn("Cannot interpret [%s] as a splice modelling parameter\n",splice_string);
    ret = FALSE;

  if( strcmp(null_string,"syn") == 0 ) {
    use_syn = TRUE;
  } else if ( strcmp(null_string,"flat") == 0 ) {
    use_syn = FALSE;
  } else {
    warn("Cannot interpret [%s] as a null model string\n",null_string);
    ret = FALSE;

  if( strcmp(intron_string,"model") == 0 ) {
    use_tied_model = FALSE;
  } else if ( strcmp(intron_string,"tied") == 0 ) {
    use_tied_model = TRUE;
  } else {
    warn("Cannot interpret [%s] as a intron tieing switch\n",intron_string);
    ret = FALSE;

  if( (rm = default_RandomModel()) == NULL) {
    warn("Could not make default random model\n");
    ret = FALSE;

  if( use_new_stats == 0 ) {
    if( (gf = read_GeneFrequency21_file(gene_file)) == NULL) {
      ret = FALSE;
      warn("Could not read a GeneFrequency file in %s",gene_file);
  } else {
    if( (gs = GeneStats_from_GeneModelParam(gmp)) == NULL ){
      warn("Could not read gene statistics in %s",new_gene_file);
  } /* end of else using new gene stats */

  if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) {
    if( use_tsm == TRUE ) {
      info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file);
    } else {
      warn("Could not read Comparison matrix file in %s",matrix_file);
      ret = FALSE;

  if( (ct = read_CodonTable_file(codon_file)) == NULL) {
    ret = FALSE;
    warn("Could not read codon table file in %s",codon_file);

  if( (ofp = openfile(output_file,"W")) ==  NULL) {
    warn("Could not open %s as an output file",output_file);
    ret = FALSE;

  rmd = RandomModelDNA_std();
  return ret;

Пример #4
boolean build_objects(void)
  boolean ret = TRUE;
  Protein * pro_temp;
  SequenceDB * psdb;

  startend = threestatemodel_mode_from_string(startend_string);
  if( startend == TSM_unknown ) {
    warn("String %s was unable to converted into a start/end policy\n",startend_string);
    ret = FALSE;

  if( use_single_dna == TRUE ) {
    cdna = read_fasta_file_cDNA(dna_seq_file);
    if( cdna == NULL ) {
      warn("Could not open single dna sequence in %s",dna_seq_file);
      ret = FALSE;
  } else {
    sdb = single_fasta_SequenceDB(dna_seq_file);
    if( sdb == NULL ) {
      warn("Could not build a sequence database on %s",dna_seq_file);
      ret = FALSE;

  rm = default_RandomModel();

  if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) {
    if( use_tsm == TRUE ) {
      info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file);
    } else {
      warn("Could not read Comparison matrix file in %s",matrix_file);
      ret = FALSE;
  if( is_integer_string(gap_str,&gap) == FALSE ) {
    warn("Could not get gap string number %s",gap_str);
    ret = FALSE;

  if( is_integer_string(ext_str,&ext) == FALSE ) {
    warn("Could not get ext string number %s",ext_str);
    ret = FALSE;

  if( qstart_str != NULL ) {
    if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) {
      warn("Could not make %s out as query start",qstart);
      ret = FALSE;

  if( qend_str != NULL ) {
    if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) {
      warn("Could not make %s out as query end",qend);
      ret = FALSE;

  if( aln_number_str != NULL ) {
    if( is_integer_string(aln_number_str,&aln_number) == FALSE || aln_number < 0) {
      warn("Weird aln number string %s...\n",aln_number_str);
      ret = FALSE;

  if( report_str != NULL ) {
    if( is_integer_string(report_str,&report_stagger) == FALSE ) {
      warn("Weird report stagger asked for %s",report_str);
      ret = FALSE;

  if( use_pfam1 == TRUE ) {
    tsmdb = new_PfamHmmer1DB_ThreeStateDB(protein_file);
    if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) {
      warn("Unable to set global/local switch on threestatedb");
      ret = FALSE;

  } else if ( use_pfam2 == TRUE ) {
    tsmdb = HMMer2_ThreeStateDB(protein_file);
    if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) {
      warn("Unable to set global/local switch on threestatedb");
      ret = FALSE;

  } else if ( use_tsm == TRUE) {
    /** using a HMM **/

    tsm = HMMer2_read_ThreeStateModel(protein_file);

    if( tsm == NULL ) {
      warn("Could not read hmm from %s\n",protein_file);
      ret = FALSE;
    }  else {

      if( hmm_name != NULL ) {
	if( tsm->name != NULL ) 
	tsm->name = stringalloc(hmm_name);
      } else {
	if( tsm->name == NULL ) {
	  tsm->name = stringalloc(protein_file);

      /** have to set start/end **/

      tsmdb = new_single_ThreeStateDB(tsm,rm);
      if( tsmdb == NULL ) {
	warn("Could not build a threestatemodel database from a single tsm. Weird!");
	ret = FALSE;
    } /* end of else tsm != NULL */
  } /* end of else is tsm */
  else if( use_single_pro ) {

    if( startend != TSM_default && startend != TSM_global && startend != TSM_local ) {
      warn("Proteins can only have local/global startend policies set, not %s",startend_string);
      ret = FALSE;

    if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) {
      ret = FALSE;
      warn("Could not read Protein sequence in %s",protein_file);
    } else {
      if( qstart != -1 || qend != -1 ) {
	if( qstart == -1 )
	  qstart = 0;
	if( qend == -1 ) 
	  qend = pro->baseseq->len;

	pro_temp = truncate_Protein(pro,qstart-1,qend);
	if( pro_temp == NULL ){
	  ret = FALSE;
	} else {
	  pro = pro_temp;

      if( startend == TSM_global) 
	tsm = global_ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext);
	tsm = ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext);

      if( tsm == NULL ) {
	warn("Could not build ThreeStateModel from a single protein sequence...");
	ret = FALSE; 
      } else {
	tsmdb = new_single_ThreeStateDB(tsm,rm);
	if( tsmdb == NULL ) {
	  warn("Could not build a threestatemodel database from a single tsm. Weird!");
	  ret = FALSE;
      } /* end of could build a TSM */
    } /* else is a real protein */  

  } /* end of else is single protein */
  else if (use_db_pro == TRUE ) {
    psdb = single_fasta_SequenceDB(protein_file);
    tsmdb = new_proteindb_ThreeStateDB(psdb,mat,-gap,-ext);
  else {
    warn("No protein input file! Yikes!");

  if( use_tsm == FALSE ) {
  } else {

  if( main_block_str != NULL ) {
    if( is_integer_string(main_block_str,&main_block) == FALSE ) {
      warn("Could not get maximum main_block number %s",main_block_str);
      ret = FALSE;

  if( evalue_search_str != NULL && is_double_string(evalue_search_str,&evalue_search_cutoff) == FALSE ) {
    warn("Could not convert %s to a double",evalue_search_str);
    ret = FALSE;
  if( is_double_string(search_cutoff_str,&search_cutoff) == FALSE ) {
    warn("Could not convert %s to a double",search_cutoff_str);
    ret = FALSE;

  if( is_double_string(subs_string,&subs_error) == FALSE ) {
    warn("Could not convert %s to a double",subs_error);
    ret = FALSE;

  if( is_double_string(indel_string,&indel_error) == FALSE ) {
    warn("Could not convert %s to a double",indel_error);
    ret = FALSE;

  if( is_double_string(allN_string,&allN) == FALSE ) {
    warn("Could not convert %s to a double",allN_string);
    ret = FALSE;

  if( strcmp(null_string,"syn") == 0 ) {
    use_syn = TRUE;
  } else if ( strcmp(null_string,"flat") == 0 ) {
    use_syn = FALSE;
  } else {
    warn("Cannot interpret [%s] as a null model string\n",null_string);
    ret = FALSE;

  if( alg_str != NULL ) {
    alg = alg_estwrap_from_string(alg_str);
  } else {
    alg_str = "312";
    alg = alg_estwrap_from_string(alg_str);

  if( aln_alg_str != NULL ) {
    aln_alg = alg_estwrap_from_string(aln_alg_str);
  } else {
    /* if it is a protein, don't loop */
    if( use_single_pro == TRUE || use_db_pro == TRUE ) 
      aln_alg_str = "333";
      aln_alg_str = "333L";
    aln_alg = alg_estwrap_from_string(aln_alg_str);

  if( (rm = default_RandomModel()) == NULL) {
    warn("Could not make default random model\n");
    ret = FALSE;

  if( (ct = read_CodonTable_file(codon_file)) == NULL) {
    ret = FALSE;
    warn("Could not read codon table file in %s",codon_file);

  if( (ofp = openfile(output_file,"W")) ==  NULL) {
    warn("Could not open %s as an output file",output_file);
    ret = FALSE;

  rmd = RandomModelDNA_std();

  cps = flat_cDNAParser(indel_error);
  cm = flat_CodonMapper(ct);

  return ret;

Пример #5
ThreeStateModel * read_TSM_ThreeStateDB(ThreeStateDB * mdb,int * return_status)
  ThreeStateModel * tsm;
  Protein * pro;
  Sequence * seq;

  if( mdb->hmm_model_end != -1 && mdb->current_no == mdb->hmm_model_end ) {
    *return_status = DB_RETURN_END;
    return NULL;


  switch( mdb->dbtype ) {

    *return_status = DB_RETURN_END;
    if( mdb->single->rm == NULL ) {
      warn("Threestate model without an internal random model!");  
      mdb->single->rm = hard_link_RandomModel(mdb->rm);

    return hard_link_ThreeStateModel(mdb->single);
    tsm= read_next_TSM_PfamHmmer1DB(mdb->phdb,return_status);
    return tsm;

    if( mdb->seq_cache != NULL ) {
      /* just after an open. Should actually use this sequence, and flush the cache */
      pro = Protein_from_Sequence(hard_link_Sequence(mdb->seq_cache));
      mdb->seq_cache = free_Sequence(mdb->seq_cache);
      *return_status = DB_RETURN_OK;
    } else {

      /* reload a sequence from a database */
      seq = reload_SequenceDB(NULL,mdb->sdb,return_status);

      /* exit now if error */
      if( *return_status == DB_RETURN_ERROR ) {
	return NULL; /* might have leaked memory. Ugh! */

      /* if we get NULL... for the moment, silent flag end */

      if( seq == NULL ) {
	*return_status = DB_RETURN_END;
	return NULL;

      pro = Protein_from_Sequence(seq);
    if( pro == NULL ) {
      warn("Could not convert sequence to a protein. Exiting!");
      *return_status = DB_RETURN_ERROR;
      return NULL;

    /* convert protein to threestatemodel */

    tsm = ThreeStateModel_from_half_bit_Sequence(pro,mdb->comp,mdb->rm,mdb->gap,mdb->ext);

    if( tsm == NULL ) {
      warn("Could not convert protein to threestatemode. Exiting!");
      *return_status = DB_RETURN_ERROR;
      return NULL;

    /* DB status already set by seqdb */
    return tsm;
    tsm =  ((*mdb->reload_generic)(mdb,return_status));
    if( tsm == NULL ) {
      return NULL; /* means end of database */
    return tsm;

  default :
    warn("Got an unrecognisable tsm db type in read-load");
    return NULL;

Пример #6
ThreeStateModel * indexed_ThreeStateModel_ThreeStateDB(ThreeStateDB * mdb,DataEntry * en)
  Sequence * seq;
  Protein * pro;
  ThreeStateModel * tsm;

  switch(mdb->dbtype) {
    return hard_link_ThreeStateModel(mdb->single);
    tsm = ThreeStateModel_from_name_PfamHmmer1DB(mdb->phdb,en->name);
    return tsm;

    seq = get_Sequence_from_SequenceDB(mdb->sdb,en);
    if( seq == NULL ) {
      warn("could not retrieve %s as a sequence from database",en->name);
      return NULL;

    pro = Protein_from_Sequence(seq);

    if( pro == NULL ) {
      warn("Could not convert sequence to a protein. Exiting!");
      return NULL;

    /* convert protein to threestatemodel */

    tsm = ThreeStateModel_from_half_bit_Sequence(pro,mdb->comp,mdb->rm,mdb->gap,mdb->ext);

    if( tsm == NULL ) {
      warn("Could not convert protein to threestatemode. Exiting!");
      return NULL;

    /* DB status already set by seqdb */
    return tsm;
    tsm = ((*mdb->index_generic)(mdb,en));
    if( tsm == NULL ) {
      return NULL;
    /*   fprintf(stdout,"Setting %d as policy\n",mdb->type); */

    return tsm;
  default : 
    warn("Unknown threestatedb type");
    return NULL;

  warn("Should never get here - in threestatedb reload!");

  return NULL;

Пример #7
int main(int argc,char ** argv)
  int i;
  SequenceSet * in;
  Sequence * trans;
  ThreeStateDB * tsd;

  DPRunImpl * dpri;
  CodonTable * ct;

  int return_status;
  ThreeStateModel * tsm;
  ThreeStateScore * tss;
  Protein * hmmp;
  ComplexSequence * cs;
  ComplexSequenceEvalSet * cses;

  PackAln * pal;
  AlnBlock * alb;

  int show_align = 0;
  int show_alb   = 0;
  int show_verbose = 1;
  int show_trans = 0;

  ct = read_CodonTable_file("codon.table");

  cses = default_aminoacid_ComplexSequenceEvalSet();
  dpri = new_DPRunImpl_from_argv(&argc,argv);




  if( argc != 3 ) {

  in = read_fasta_SequenceSet_file(argv[1]);

  tsd = HMMer2_ThreeStateDB(argv[2]);

  assert(in->len == 2);

  trans = translate_Sequence(in->set[0],ct);

  if( show_trans ) {

  cs = new_ComplexSequence(trans,cses);


  while( (tsm = read_TSM_ThreeStateDB(tsd,&return_status)) != NULL ) {

    tss = ThreeStateScore_from_ThreeStateModel(tsm);
    hmmp = pseudo_Protein_from_ThreeStateModel(tsm);

    pal = PackAln_bestmemory_ThreeStateLoop(tss,cs,NULL,dpri);
    alb = convert_PackAln_to_AlnBlock_ThreeStateLoop(pal);

    if( show_alb ) {

    if( show_align ) {
    if( show_verbose ) {
