ThreeStateModel * read_TSM_ThreeStateDB(ThreeStateDB * mdb,int * return_status)
{
  ThreeStateModel * tsm;
  Protein * pro;
  Sequence * seq;

  if( mdb->hmm_model_end != -1 && mdb->current_no == mdb->hmm_model_end ) {
    *return_status = DB_RETURN_END;
    return NULL;
  }

  mdb->current_no++;

  switch( mdb->dbtype ) {

  case TSMDB_SINGLE :
    *return_status = DB_RETURN_END;
    if( mdb->single->rm == NULL ) {
      warn("Threestate model without an internal random model!");  
      mdb->single->rm = hard_link_RandomModel(mdb->rm);
    }

    return hard_link_ThreeStateModel(mdb->single);
  case TSMDB_HMMER1PFAM :
    tsm= read_next_TSM_PfamHmmer1DB(mdb->phdb,return_status);
    set_startend_policy_ThreeStateModel(tsm,mdb->type,30,0.2);
    return tsm;

  case TSMDB_PROTEIN :
    if( mdb->seq_cache != NULL ) {
      /* just after an open. Should actually use this sequence, and flush the cache */
      pro = Protein_from_Sequence(hard_link_Sequence(mdb->seq_cache));
      mdb->seq_cache = free_Sequence(mdb->seq_cache);
      *return_status = DB_RETURN_OK;
    } else {

      /* reload a sequence from a database */
      seq = reload_SequenceDB(NULL,mdb->sdb,return_status);

      /* exit now if error */
      if( *return_status == DB_RETURN_ERROR ) {
	return NULL; /* might have leaked memory. Ugh! */
      } 

      /* if we get NULL... for the moment, silent flag end */

      if( seq == NULL ) {
	*return_status = DB_RETURN_END;
	return NULL;
      }

      pro = Protein_from_Sequence(seq);
    }
    if( pro == NULL ) {
      warn("Could not convert sequence to a protein. Exiting!");
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }

    /* convert protein to threestatemodel */

    tsm = ThreeStateModel_from_half_bit_Sequence(pro,mdb->comp,mdb->rm,mdb->gap,mdb->ext);

    if( tsm == NULL ) {
      warn("Could not convert protein to threestatemode. Exiting!");
      free_Protein(pro);
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }

    /* DB status already set by seqdb */
    return tsm;
  case TSMDB_GENERIC :
    tsm =  ((*mdb->reload_generic)(mdb,return_status));
    if( tsm == NULL ) {
      return NULL; /* means end of database */
    }
    set_startend_policy_ThreeStateModel(tsm,mdb->type,30,0.2);
    return tsm;

  default :
    warn("Got an unrecognisable tsm db type in read-load");
    return NULL;
  }


}
Example #2
0
boolean build_objects(void)
{
  boolean ret = TRUE;
  Protein * pro_temp;
  SequenceDB * psdb;



  startend = threestatemodel_mode_from_string(startend_string);
  if( startend == TSM_unknown ) {
    warn("String %s was unable to converted into a start/end policy\n",startend_string);
    ret = FALSE;
  }

  if( use_single_dna == TRUE ) {
    cdna = read_fasta_file_cDNA(dna_seq_file);
    if( cdna == NULL ) {
      warn("Could not open single dna sequence in %s",dna_seq_file);
      ret = FALSE;
    }
  } else {
    sdb = single_fasta_SequenceDB(dna_seq_file);
    
 
    if( sdb == NULL ) {
      warn("Could not build a sequence database on %s",dna_seq_file);
      ret = FALSE;
    }
  }

  rm = default_RandomModel();


  if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) {
    if( use_tsm == TRUE ) {
      info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file);
    } else {
      warn("Could not read Comparison matrix file in %s",matrix_file);
      ret = FALSE;
    }
  }
      
  if( is_integer_string(gap_str,&gap) == FALSE ) {
    warn("Could not get gap string number %s",gap_str);
    ret = FALSE;
  }

  if( is_integer_string(ext_str,&ext) == FALSE ) {
    warn("Could not get ext string number %s",ext_str);
    ret = FALSE;
  }

  if( qstart_str != NULL ) {
    if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) {
      warn("Could not make %s out as query start",qstart);
      ret = FALSE;
    }
  }

  if( qend_str != NULL ) {
    if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) {
      warn("Could not make %s out as query end",qend);
      ret = FALSE;
    }
  }


  if( aln_number_str != NULL ) {
    if( is_integer_string(aln_number_str,&aln_number) == FALSE || aln_number < 0) {
      warn("Weird aln number string %s...\n",aln_number_str);
      ret = FALSE;
    }
  }

  if( report_str != NULL ) {
    if( is_integer_string(report_str,&report_stagger) == FALSE ) {
      warn("Weird report stagger asked for %s",report_str);
      ret = FALSE;
    }
  }


  if( use_pfam1 == TRUE ) {
    tsmdb = new_PfamHmmer1DB_ThreeStateDB(protein_file);
    if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) {
      warn("Unable to set global/local switch on threestatedb");
      ret = FALSE;
    }

  } else if ( use_pfam2 == TRUE ) {
    tsmdb = HMMer2_ThreeStateDB(protein_file);
    if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) {
      warn("Unable to set global/local switch on threestatedb");
      ret = FALSE;
    }

  } else if ( use_tsm == TRUE) {
    /** using a HMM **/

    tsm = HMMer2_read_ThreeStateModel(protein_file);

    if( tsm == NULL ) {
      warn("Could not read hmm from %s\n",protein_file);
      ret = FALSE;
    }  else {

      display_char_in_ThreeStateModel(tsm);
      if( hmm_name != NULL ) {
	if( tsm->name != NULL ) 
	  ckfree(tsm->name);
	tsm->name = stringalloc(hmm_name);
      } else {
	if( tsm->name == NULL ) {
	  tsm->name = stringalloc(protein_file);
	}
      }

      
      
      /** have to set start/end **/

      set_startend_policy_ThreeStateModel(tsm,startend,15,0.2);
      tsmdb = new_single_ThreeStateDB(tsm,rm);
      if( tsmdb == NULL ) {
	warn("Could not build a threestatemodel database from a single tsm. Weird!");
	ret = FALSE;
      }
    } /* end of else tsm != NULL */
  } /* end of else is tsm */
  else if( use_single_pro ) {


    if( startend != TSM_default && startend != TSM_global && startend != TSM_local ) {
      warn("Proteins can only have local/global startend policies set, not %s",startend_string);
      ret = FALSE;
    }

    if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) {
      ret = FALSE;
      warn("Could not read Protein sequence in %s",protein_file);
    } else {
      if( qstart != -1 || qend != -1 ) {
	if( qstart == -1 )
	  qstart = 0;
	if( qend == -1 ) 
	  qend = pro->baseseq->len;

	pro_temp = truncate_Protein(pro,qstart-1,qend);
	if( pro_temp == NULL ){
	  ret = FALSE;
	} else {
	  free_Protein(pro);
	  pro = pro_temp;
	}
      }


      if( startend == TSM_global) 
	tsm = global_ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext);
      else
	tsm = ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext);

      if( tsm == NULL ) {
	warn("Could not build ThreeStateModel from a single protein sequence...");
	ret = FALSE; 
      } else {
	tsmdb = new_single_ThreeStateDB(tsm,rm);
	if( tsmdb == NULL ) {
	  warn("Could not build a threestatemodel database from a single tsm. Weird!");
	  ret = FALSE;
	}
      } /* end of could build a TSM */
    } /* else is a real protein */  

  } /* end of else is single protein */
  else if (use_db_pro == TRUE ) {
    psdb = single_fasta_SequenceDB(protein_file);
    tsmdb = new_proteindb_ThreeStateDB(psdb,mat,-gap,-ext);
    free_SequenceDB(psdb);
  }
  else {
    warn("No protein input file! Yikes!");
  }

  /***
  if( use_tsm == FALSE ) {
  } else {
  ****/


  if( main_block_str != NULL ) {
    if( is_integer_string(main_block_str,&main_block) == FALSE ) {
      warn("Could not get maximum main_block number %s",main_block_str);
      ret = FALSE;
    }
  }


  if( evalue_search_str != NULL && is_double_string(evalue_search_str,&evalue_search_cutoff) == FALSE ) {
    warn("Could not convert %s to a double",evalue_search_str);
    ret = FALSE;
  }
  
  if( is_double_string(search_cutoff_str,&search_cutoff) == FALSE ) {
    warn("Could not convert %s to a double",search_cutoff_str);
    ret = FALSE;
  }


  if( is_double_string(subs_string,&subs_error) == FALSE ) {
    warn("Could not convert %s to a double",subs_error);
    ret = FALSE;
  }

  if( is_double_string(indel_string,&indel_error) == FALSE ) {
    warn("Could not convert %s to a double",indel_error);
    ret = FALSE;
  }


  if( is_double_string(allN_string,&allN) == FALSE ) {
    warn("Could not convert %s to a double",allN_string);
    ret = FALSE;
  }
  


  if( strcmp(null_string,"syn") == 0 ) {
    use_syn = TRUE;
  } else if ( strcmp(null_string,"flat") == 0 ) {
    use_syn = FALSE;
  } else {
    warn("Cannot interpret [%s] as a null model string\n",null_string);
    ret = FALSE;
  }

   
  if( alg_str != NULL ) {
    alg = alg_estwrap_from_string(alg_str);
  } else {
    alg_str = "312";
    alg = alg_estwrap_from_string(alg_str);
  }

  if( aln_alg_str != NULL ) {
    aln_alg = alg_estwrap_from_string(aln_alg_str);
  } else {
    /* if it is a protein, don't loop */
    if( use_single_pro == TRUE || use_db_pro == TRUE ) 
      aln_alg_str = "333";
    else 
      aln_alg_str = "333L";
    aln_alg = alg_estwrap_from_string(aln_alg_str);
  }


  if( (rm = default_RandomModel()) == NULL) {
    warn("Could not make default random model\n");
    ret = FALSE;
  }

  if( (ct = read_CodonTable_file(codon_file)) == NULL) {
    ret = FALSE;
    warn("Could not read codon table file in %s",codon_file);
  }

  if( (ofp = openfile(output_file,"W")) ==  NULL) {
    warn("Could not open %s as an output file",output_file);
    ret = FALSE;
  }

  rmd = RandomModelDNA_std();


  cps = flat_cDNAParser(indel_error);
  cm = flat_CodonMapper(ct);
  sprinkle_errors_over_CodonMapper(cm,subs_error);

  return ret;

}
ThreeStateModel * indexed_ThreeStateModel_ThreeStateDB(ThreeStateDB * mdb,DataEntry * en)
{
  Sequence * seq;
  Protein * pro;
  ThreeStateModel * tsm;


  switch(mdb->dbtype) {
  case TSMDB_SINGLE :
    return hard_link_ThreeStateModel(mdb->single);
  case TSMDB_HMMER1PFAM :
    tsm = ThreeStateModel_from_name_PfamHmmer1DB(mdb->phdb,en->name);
    set_startend_policy_ThreeStateModel(tsm,mdb->type,30,0.2);
    return tsm;

  case TSMDB_PROTEIN :
    seq = get_Sequence_from_SequenceDB(mdb->sdb,en);
    if( seq == NULL ) {
      warn("could not retrieve %s as a sequence from database",en->name);
      return NULL;
    }

    pro = Protein_from_Sequence(seq);

    if( pro == NULL ) {
      warn("Could not convert sequence to a protein. Exiting!");
      return NULL;
    }

    /* convert protein to threestatemodel */

    tsm = ThreeStateModel_from_half_bit_Sequence(pro,mdb->comp,mdb->rm,mdb->gap,mdb->ext);

    if( tsm == NULL ) {
      warn("Could not convert protein to threestatemode. Exiting!");
      free_Protein(pro);
      return NULL;
    }

    free_Protein(pro);
    /* DB status already set by seqdb */
    return tsm;
  case TSMDB_GENERIC :
    tsm = ((*mdb->index_generic)(mdb,en));
    if( tsm == NULL ) {
      return NULL;
    }
    /*   fprintf(stdout,"Setting %d as policy\n",mdb->type); */
    set_startend_policy_ThreeStateModel(tsm,mdb->type,30,0.2);

    return tsm;
    
  default : 
    warn("Unknown threestatedb type");
    return NULL;
  }

  warn("Should never get here - in threestatedb reload!");

  return NULL;

}