Ejemplo n.º 1
0
boolean show_pretty_aln(void)
{
  Protein * ps;

  fprintf(ofp,"\n%s output\nScore %4.2f bits over entire alignment\n",program_name,Score2Bits(pal->score));
  
  
  if( alg == GWWRAP_2193L || alg == GWWRAP_2193) {
    fprintf(ofp,"Entrie alignment score contains unseen 'random' score segments\nYou should only use the per-alignments score printed below\nfor the bits score of the alignment\n\n");
  }

  if( use_syn == FALSE ) {
    fprintf(ofp,"Scores as bits over a flat simple random model\n\n");
  } else {
    fprintf(ofp,"Scores as bits over a synchronous coding model\n\n");
  }

  
  
  if( use_tsm == FALSE ) {
    fprintf(ofp,"Warning: The bits scores is not probablistically correct for single seqs\nSee WWW help for more info\n\n");

    protgene_ascii_display(alb,pro->baseseq->seq,pro->baseseq->name,pro->baseseq->offset,gen,ct,15,main_block,(alg == GWWRAP_623L || alg == GWWRAP_2193L || alg == GWWRAP_2193) ? TRUE : FALSE, ofp);
  } else {
    ps = pseudo_Protein_from_ThreeStateModel(tsm);
    protgene_ascii_display(alb,ps->baseseq->seq,ps->baseseq->name,ps->baseseq->offset,gen,ct,15,main_block,(alg == GWWRAP_623L || alg == GWWRAP_2193L || alg == GWWRAP_2193) ? TRUE : FALSE,ofp);
    free_Protein(ps);
  }
  fprintf(ofp,"%s\n",divide_str);

  return TRUE;
}
Ejemplo n.º 2
0
void free_objects(void)
{
  if( gwdb != NULL ) 
    gwdb = free_GeneWiseDB(gwdb);
  if( cdb != NULL ) 
    cdb = free_cDNADB(cdb);
  if( sdb != NULL )
    sdb = free_SequenceDB(sdb);
  if( cdna != NULL ) 
    cdna = free_cDNA(cdna);
  if( pro != NULL )
    pro = free_Protein(pro);
  if( tsm != NULL )
    tsm = free_ThreeStateModel(tsm);
  if( tsmdb != NULL )
    tsmdb = free_ThreeStateDB(tsmdb);
  if( gws != NULL )
    gws = free_GeneWiseScore(gws);
  if( hs != NULL )
    hs = free_Hscore(hs);
  if( cm != NULL )
    cm = free_CodonMapper(cm);
  if( cps != NULL )
    cps = free_cDNAParser(cps);

}
Ejemplo n.º 3
0
/* Function:  free_Translation(obj)
 *
 * Descrip:    Free Function: removes the memory held by obj
 *             Will chain up to owned members and clear all lists
 *
 *
 * Arg:        obj [UNKN ] Object that is free'd [Translation *]
 *
 * Return [UNKN ]  Undocumented return value [Translation *]
 *
 */
Translation * free_Translation(Translation * obj) 
{
    int return_early = 0;    


    if( obj == NULL) {  
      warn("Attempting to free a NULL pointer to a Translation obj. Should be trappable");   
      return NULL;   
      }  


#ifdef PTHREAD   
    assert(pthread_mutex_lock(&(obj->dynamite_mutex)) == 0); 
#endif   
    if( obj->dynamite_hard_link > 1)     {  
      return_early = 1;  
      obj->dynamite_hard_link--; 
      }  
#ifdef PTHREAD   
    assert(pthread_mutex_unlock(&(obj->dynamite_mutex)) == 0);   
#endif   
    if( return_early == 1)   
      return NULL;   
    /* obj->parent is linked in */ 
    if( obj->protein != NULL)    
      free_Protein(obj->protein);    


    ckfree(obj); 
    return NULL; 
}    
Ejemplo n.º 4
0
boolean free_io_objects(void)
{
  if( use_tsm == TRUE) {
    free_ThreeStateModel(tsm);
  } else {
    free_Protein(pro);
  }

  free_CodonTable(ct);
  if( gf != NULL ) {
    free_GeneFrequency21(gf);
  }
  free_RandomModelDNA(rmd);
  if( is_embl ) {
    free_GenomicRegion(embl);
  } 
  free_Genomic(gen);
  

  return TRUE;
}
Ejemplo n.º 5
0
boolean build_objects(void)
{
  boolean ret = TRUE;
  Protein * pro_temp;
  Genomic * gen_temp;
  FILE * ifp;





  startend = threestatemodel_mode_from_string(startend_string);
  if( startend == TSM_unknown ) {
    warn("String %s was unable to converted into a start/end policy\n",startend_string);
    ret = FALSE;
  }


  if( tstart_str != NULL ) {
    if( is_integer_string(tstart_str,&tstart) == FALSE || tstart < 0) {
      warn("Could not make %s out as target start",tstart);
      ret = FALSE;
    }
  }

  if( tend_str != NULL ) {
    if( is_integer_string(tend_str,&tend) == FALSE || tend < 0) {
      warn("Could not make %s out as target end",tend);
      ret = FALSE;
    }
  }

  if( is_integer_string(gap_str,&gap) == FALSE ) {
      warn("Could not make %s out as gap penalty (must be integer at the moment)",gap_str);
      ret = FALSE;
  }
  

  if( is_integer_string(ext_str,&ext) == FALSE ) {
    warn("Could not make %s out as gap penalty (must be integer at the moment)",ext_str);
    ret = FALSE;
  }

  if( is_embl == FALSE ) {
    if( (gen = read_fasta_file_Genomic(dna_seq_file,length_of_N)) == NULL ) {
      ret = FALSE;
      warn("Could not read genomic sequence in %s",dna_seq_file);
      gen = NULL;
    } 
  } else {
    embl = read_EMBL_GenomicRegion_file(dna_seq_file);
    if( embl == NULL ) {
      warn("Could not read genomic EMBL file in %s",dna_seq_file);
      gen = NULL;
      ret = FALSE;
    } else {
      gen = hard_link_Genomic(embl->genomic);
      
    }
  }

  if( gen != NULL ) {

    if( tstart != -1 || tend != -1 ) {
      if( tstart == -1 )
	tstart = 0;
      if( tend == -1 ) 
	tend = gen->baseseq->len;
      gen_temp = truncate_Genomic(gen,tstart-1,tend);
      if( gen_temp == NULL ){
	ret = FALSE;
      } else {
	free_Genomic(gen);
	gen = gen_temp;
      }
    } else {
      /* no truncation required */
    }
  

    if( reverse == TRUE ) {
      if( tstart > tend ) {
	warn("You have already reversed the DNA by using %d - %d truncation. Re-reversing",tstart,tend);
    }
      
      gen_temp = reverse_complement_Genomic(gen); 
      free_Genomic(gen);
      gen = gen_temp;
    }
  }

  /*
   * Can't truncate on GenomicRegion (for good reasons!).
   * but we want only a section of the EMBL file to be used
   * 
   * So... swap genomic now. Positions in EMBL are still valid,
   * however - some genes will loose their sequence, which will be damaging. ;)
   */

  
  if( is_embl ) {
    free_Genomic(embl->genomic);
    embl->genomic = hard_link_Genomic(gen); /* pointer could be dead anyway ;) */
  }


  if( target_abs == TRUE ) {
    if( is_embl == TRUE ) {
      warn("Sorry you can't both use absolute positioning and EMBL files as I can't cope with all the coordinate remapping. You'll have to convert to fasta.");
      ret =  FALSE;
    }

    gen->baseseq->offset = 1;
    gen->baseseq->end  = strlen(gen->baseseq->seq);
  }

  if( alg_str != NULL ) {
    alg = gwrap_alg_type_from_string(alg_str);
  } else {
    if( use_tsm == TRUE ) {
      alg_str = "623L";
    } else {
      alg_str = "623";
    }
    alg = gwrap_alg_type_from_string(alg_str);
  }
      

  if( qstart_str != NULL ) {
    if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) {
      warn("Could not make %s out as query start",qstart);
      ret = FALSE;
    }
  }

  if( qend_str != NULL ) {
    if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) {
      warn("Could not make %s out as query end",qend);
      ret = FALSE;
    }
  }


  if( use_tsm == FALSE ) {
    if( startend != TSM_default && startend != TSM_global && startend != TSM_local && startend != TSM_endbiased) {
      warn("Proteins can only have local/global/endbias startend policies set, not %s",startend_string);
      ret = FALSE;
    }
    if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) {
      ret = FALSE;
      warn("Could not read Protein sequence in %s",protein_file);
    } else {
      
      if( qstart != -1 || qend != -1 ) {
	if( qstart == -1 )
	  qstart = 0;
	if( qend == -1 ) 
	  qend = pro->baseseq->len;
	
	pro_temp = truncate_Protein(pro,qstart-1,qend);
	if( pro_temp == NULL ){
	  ret = FALSE;
	} else {
	  free_Protein(pro);
	  pro = pro_temp;
	}
      }
    }
  } else {
    /** using a HMM **/
    
    /*tsm = read_HMMer_1_7_ascii_file(hmm_file);*/
    /*tsm = Wise2_read_ThreeStateModel_from_hmmer1_file(hmm_file);*/
    tsm = HMMer2_read_ThreeStateModel(hmm_file);
    
    
      if( tsm == NULL ) {
	warn("Could not read hmm from %s\n",hmm_file);
	ret = FALSE;
      }  else {
	
	display_char_in_ThreeStateModel(tsm);
	if( hmm_name != NULL ) {
	  if( tsm->name != NULL ) 
	    ckfree(tsm->name);
	  tsm->name = stringalloc(hmm_name);
	}
	
	if( tsm == NULL ) {
	  warn("Could not read %s as a hmm",hmm_file);
	}
	
	/** have to set start/end **/
	set_startend_policy_ThreeStateModel(tsm,startend,30,0.1);
	
      }
  } /* end of else tsm != NULL */
  

  
  if( main_block_str != NULL ) {
    if( is_integer_string(main_block_str,&main_block) == FALSE ) {
      warn("Could not get maximum main_block number %s",main_block_str);
      ret = FALSE;
    }
  }
   


  if( is_double_string(subs_string,&subs_error) == FALSE ) {
    warn("Could not convert %s to a double",subs_error);
    ret = FALSE;
  }

  if( is_double_string(indel_string,&indel_error) == FALSE ) {
    warn("Could not convert %s to a double",indel_error);
    ret = FALSE;
  }

  if( is_double_string(allN_string,&allN) == FALSE ) {
    warn("Could not convert %s to a double",allN_string);
    ret = FALSE;
  }

  
  if( strcmp(cfreq_string,"model") == 0 ) {
    model_codon = TRUE;
  } else if ( strcmp(cfreq_string,"flat") == 0 ) {
    model_codon = FALSE;
  } else {
    warn("Cannot interpret [%s] as a codon modelling parameter\n",cfreq_string);
    ret = FALSE;
  }
  

  if( strcmp(splice_string,"model") == 0 ) {
    model_splice = TRUE;
  } else if ( strcmp(splice_string,"flat") == 0 ) {
    model_splice = FALSE;
    gmp->use_gtag_splice = TRUE;
  } else {
    warn("Cannot interpret [%s] as a splice modelling parameter\n",splice_string);
    ret = FALSE;
  }

  if( strcmp(null_string,"syn") == 0 ) {
    use_syn = TRUE;
  } else if ( strcmp(null_string,"flat") == 0 ) {
    use_syn = FALSE;
  } else {
    warn("Cannot interpret [%s] as a null model string\n",null_string);
    ret = FALSE;
  }

  if( strcmp(intron_string,"model") == 0 ) {
    use_tied_model = FALSE;
  } else if ( strcmp(intron_string,"tied") == 0 ) {
    use_tied_model = TRUE;
  } else {
    warn("Cannot interpret [%s] as a intron tieing switch\n",intron_string);
    ret = FALSE;
  }



  if( (rm = default_RandomModel()) == NULL) {
    warn("Could not make default random model\n");
    ret = FALSE;
  }

  if( use_new_stats == 0 ) {
    if( (gf = read_GeneFrequency21_file(gene_file)) == NULL) {
      ret = FALSE;
      warn("Could not read a GeneFrequency file in %s",gene_file);
    }
  } else {
    if( (gs = GeneStats_from_GeneModelParam(gmp)) == NULL ){
      ret=FALSE;
      warn("Could not read gene statistics in %s",new_gene_file);
    }
  } /* end of else using new gene stats */


  if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) {
    if( use_tsm == TRUE ) {
      info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file);
    } else {
      warn("Could not read Comparison matrix file in %s",matrix_file);
      ret = FALSE;
    }
  }

  if( (ct = read_CodonTable_file(codon_file)) == NULL) {
    ret = FALSE;
    warn("Could not read codon table file in %s",codon_file);
  }

  if( (ofp = openfile(output_file,"W")) ==  NULL) {
    warn("Could not open %s as an output file",output_file);
    ret = FALSE;
  }

  rmd = RandomModelDNA_std();
  return ret;

}
Ejemplo n.º 6
0
boolean show_output(void)
{
  int i;

  cDNA * cdna;
  Protein * trans;
  GenomicOverlapResults * gor;
  AlnColumn * alt;
  

 

  if( show_pretty == TRUE ) {
    show_pretty_aln();
  }

  if( show_match_sum == TRUE ) {
    show_MatchSummary_genewise_header(ofp);
    show_MatchSummarySet_genewise(mss,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_pretty_gene == TRUE ) {
    show_pretty_GenomicRegion(gr,0,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_supp_gene == TRUE ) {
    show_pretty_GenomicRegion(gr,1,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_embl == TRUE ) {
    write_Embl_FT_GenomicRegion(gr,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_diana == TRUE ) {
    write_Diana_FT_GenomicRegion(gr,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_overlap == TRUE ) {
    gor = Genomic_overlap(gr,embl);
    show_GenomicOverlapResults(gor,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }


  if( show_trans == TRUE ) {
    for(i=0;i<gr->len;i++) {
      if( gr->gene[i]->ispseudo == TRUE ) {
	fprintf(ofp,"#Gene %d is a pseudo gene - no translation possible\n",i);
      } else {
	trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
	write_fasta_Sequence(trans->baseseq,ofp);
      }
    } 
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_pep == TRUE ) {
    alt = alb->start;
    for(;alt != NULL;) {
      trans = Protein_from_GeneWise_AlnColumn(gen->baseseq,alt,1,&alt,ct,is_random_AlnColumn_genewise);
      if ( trans == NULL ) 
	break;
      write_fasta_Sequence(trans->baseseq,ofp);
      free_Protein(trans);
    }
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_cdna == TRUE ) {
    for(i=0;i<gr->len;i++) {
      cdna = get_cDNA_from_Transcript(gr->gene[i]->transcript[0]);
      write_fasta_Sequence(cdna->baseseq,ofp);
    } 
    fprintf(ofp,"%s\n",divide_str);
  }


  if( show_ace == TRUE ) {
    show_ace_GenomicRegion(gr,gen->baseseq->name,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_gff == TRUE ) {
    show_GFF_GenomicRegion(gr,gen->baseseq->name,"GeneWise",ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_gene_plain == TRUE ) {
    show_GenomicRegion(gr,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_AlnBlock == TRUE ) {
    mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_cumlative_PackAln == TRUE ) {
    show_bits_and_cumlative_PackAln(pal,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_PackAln == TRUE ) {
    show_simple_PackAln(pal,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }
  


  return TRUE;
}
Ejemplo n.º 7
0
boolean build_objects(void)
{
  boolean ret = TRUE;
  Protein * pro_temp;
  SequenceDB * psdb;



  startend = threestatemodel_mode_from_string(startend_string);
  if( startend == TSM_unknown ) {
    warn("String %s was unable to converted into a start/end policy\n",startend_string);
    ret = FALSE;
  }

  if( use_single_dna == TRUE ) {
    cdna = read_fasta_file_cDNA(dna_seq_file);
    if( cdna == NULL ) {
      warn("Could not open single dna sequence in %s",dna_seq_file);
      ret = FALSE;
    }
  } else {
    sdb = single_fasta_SequenceDB(dna_seq_file);
    
 
    if( sdb == NULL ) {
      warn("Could not build a sequence database on %s",dna_seq_file);
      ret = FALSE;
    }
  }

  rm = default_RandomModel();


  if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) {
    if( use_tsm == TRUE ) {
      info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file);
    } else {
      warn("Could not read Comparison matrix file in %s",matrix_file);
      ret = FALSE;
    }
  }
      
  if( is_integer_string(gap_str,&gap) == FALSE ) {
    warn("Could not get gap string number %s",gap_str);
    ret = FALSE;
  }

  if( is_integer_string(ext_str,&ext) == FALSE ) {
    warn("Could not get ext string number %s",ext_str);
    ret = FALSE;
  }

  if( qstart_str != NULL ) {
    if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) {
      warn("Could not make %s out as query start",qstart);
      ret = FALSE;
    }
  }

  if( qend_str != NULL ) {
    if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) {
      warn("Could not make %s out as query end",qend);
      ret = FALSE;
    }
  }


  if( aln_number_str != NULL ) {
    if( is_integer_string(aln_number_str,&aln_number) == FALSE || aln_number < 0) {
      warn("Weird aln number string %s...\n",aln_number_str);
      ret = FALSE;
    }
  }

  if( report_str != NULL ) {
    if( is_integer_string(report_str,&report_stagger) == FALSE ) {
      warn("Weird report stagger asked for %s",report_str);
      ret = FALSE;
    }
  }


  if( use_pfam1 == TRUE ) {
    tsmdb = new_PfamHmmer1DB_ThreeStateDB(protein_file);
    if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) {
      warn("Unable to set global/local switch on threestatedb");
      ret = FALSE;
    }

  } else if ( use_pfam2 == TRUE ) {
    tsmdb = HMMer2_ThreeStateDB(protein_file);
    if( set_search_type_ThreeStateDB(tsmdb,startend_string) == FALSE) {
      warn("Unable to set global/local switch on threestatedb");
      ret = FALSE;
    }

  } else if ( use_tsm == TRUE) {
    /** using a HMM **/

    tsm = HMMer2_read_ThreeStateModel(protein_file);

    if( tsm == NULL ) {
      warn("Could not read hmm from %s\n",protein_file);
      ret = FALSE;
    }  else {

      display_char_in_ThreeStateModel(tsm);
      if( hmm_name != NULL ) {
	if( tsm->name != NULL ) 
	  ckfree(tsm->name);
	tsm->name = stringalloc(hmm_name);
      } else {
	if( tsm->name == NULL ) {
	  tsm->name = stringalloc(protein_file);
	}
      }

      
      
      /** have to set start/end **/

      set_startend_policy_ThreeStateModel(tsm,startend,15,0.2);
      tsmdb = new_single_ThreeStateDB(tsm,rm);
      if( tsmdb == NULL ) {
	warn("Could not build a threestatemodel database from a single tsm. Weird!");
	ret = FALSE;
      }
    } /* end of else tsm != NULL */
  } /* end of else is tsm */
  else if( use_single_pro ) {


    if( startend != TSM_default && startend != TSM_global && startend != TSM_local ) {
      warn("Proteins can only have local/global startend policies set, not %s",startend_string);
      ret = FALSE;
    }

    if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) {
      ret = FALSE;
      warn("Could not read Protein sequence in %s",protein_file);
    } else {
      if( qstart != -1 || qend != -1 ) {
	if( qstart == -1 )
	  qstart = 0;
	if( qend == -1 ) 
	  qend = pro->baseseq->len;

	pro_temp = truncate_Protein(pro,qstart-1,qend);
	if( pro_temp == NULL ){
	  ret = FALSE;
	} else {
	  free_Protein(pro);
	  pro = pro_temp;
	}
      }


      if( startend == TSM_global) 
	tsm = global_ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext);
      else
	tsm = ThreeStateModel_from_half_bit_Sequence(pro,mat,rm,-gap,-ext);

      if( tsm == NULL ) {
	warn("Could not build ThreeStateModel from a single protein sequence...");
	ret = FALSE; 
      } else {
	tsmdb = new_single_ThreeStateDB(tsm,rm);
	if( tsmdb == NULL ) {
	  warn("Could not build a threestatemodel database from a single tsm. Weird!");
	  ret = FALSE;
	}
      } /* end of could build a TSM */
    } /* else is a real protein */  

  } /* end of else is single protein */
  else if (use_db_pro == TRUE ) {
    psdb = single_fasta_SequenceDB(protein_file);
    tsmdb = new_proteindb_ThreeStateDB(psdb,mat,-gap,-ext);
    free_SequenceDB(psdb);
  }
  else {
    warn("No protein input file! Yikes!");
  }

  /***
  if( use_tsm == FALSE ) {
  } else {
  ****/


  if( main_block_str != NULL ) {
    if( is_integer_string(main_block_str,&main_block) == FALSE ) {
      warn("Could not get maximum main_block number %s",main_block_str);
      ret = FALSE;
    }
  }


  if( evalue_search_str != NULL && is_double_string(evalue_search_str,&evalue_search_cutoff) == FALSE ) {
    warn("Could not convert %s to a double",evalue_search_str);
    ret = FALSE;
  }
  
  if( is_double_string(search_cutoff_str,&search_cutoff) == FALSE ) {
    warn("Could not convert %s to a double",search_cutoff_str);
    ret = FALSE;
  }


  if( is_double_string(subs_string,&subs_error) == FALSE ) {
    warn("Could not convert %s to a double",subs_error);
    ret = FALSE;
  }

  if( is_double_string(indel_string,&indel_error) == FALSE ) {
    warn("Could not convert %s to a double",indel_error);
    ret = FALSE;
  }


  if( is_double_string(allN_string,&allN) == FALSE ) {
    warn("Could not convert %s to a double",allN_string);
    ret = FALSE;
  }
  


  if( strcmp(null_string,"syn") == 0 ) {
    use_syn = TRUE;
  } else if ( strcmp(null_string,"flat") == 0 ) {
    use_syn = FALSE;
  } else {
    warn("Cannot interpret [%s] as a null model string\n",null_string);
    ret = FALSE;
  }

   
  if( alg_str != NULL ) {
    alg = alg_estwrap_from_string(alg_str);
  } else {
    alg_str = "312";
    alg = alg_estwrap_from_string(alg_str);
  }

  if( aln_alg_str != NULL ) {
    aln_alg = alg_estwrap_from_string(aln_alg_str);
  } else {
    /* if it is a protein, don't loop */
    if( use_single_pro == TRUE || use_db_pro == TRUE ) 
      aln_alg_str = "333";
    else 
      aln_alg_str = "333L";
    aln_alg = alg_estwrap_from_string(aln_alg_str);
  }


  if( (rm = default_RandomModel()) == NULL) {
    warn("Could not make default random model\n");
    ret = FALSE;
  }

  if( (ct = read_CodonTable_file(codon_file)) == NULL) {
    ret = FALSE;
    warn("Could not read codon table file in %s",codon_file);
  }

  if( (ofp = openfile(output_file,"W")) ==  NULL) {
    warn("Could not open %s as an output file",output_file);
    ret = FALSE;
  }

  rmd = RandomModelDNA_std();


  cps = flat_cDNAParser(indel_error);
  cm = flat_CodonMapper(ct);
  sprinkle_errors_over_CodonMapper(cm,subs_error);

  return ret;

}
Ejemplo n.º 8
0
boolean show_output(void)
{
  int i,k;
  ThreeStateModel * temptsm;
  AlnBlock * alb;
  PackAln * pal;
  MatchSummarySet * mss;
  Protein * ps;
  cDNA * cdna;
  double bits;
  boolean fitted_res = FALSE;
  AlnBlockList * alist;
  AlnBlock * anchored;
  SequenceSet * set;
  AlnColumn * alt;
  Protein * trans;

  /* sort by bit score first */

  sort_Hscore_by_score(hs);

  if( search_mode == PC_SEARCH_S2DB ) {
    if( hs->his == NULL || hs->his->total < 1000 ) {
	info("Cannot fit histogram to a db smaller than 1,000");
	fprintf(ofp,"[Warning: Can't fit histogram to a db smaller than 1,000]\n\n");
	show_histogram = FALSE;
    } else {
      fitted_res = TRUE;
      fit_Hscore_to_EVD(hs,20);
    }
  }

  /* deal with initialising anchored alignment.
   * Could be done for either single HMMs or single proteins,
   * but we will only do it for HMMs at the moment
   */

  if( make_anchored_aln == TRUE ) {
    if( tsm == NULL ) {
      warn("Attempting to make an achored alignment without a HMM. impossible!");
      make_anchored_aln = FALSE;
    } else {
      anchored = single_unit_AlnBlock(tsm->len,"MATCH_STATE");
      set = SequenceSet_alloc_std();
   }
  }

  /* dofus catcher */
  if( aln_alg != alg ) {
    fprintf(ofp,"\n#\n#WARNING!\n#\n# Your alignment algorithm is different from your search algorithm.\n# This is probably quite sensible but will lead to differing scores.\n# Use the search score as an indicator of the significance of the match\n# Read the docs for more information\n#\n");
  }

  fprintf(ofp,"\n\n#High Score list\n");
  fprintf(ofp,"#Protein ID                 DNA Str  ID                        Bits Evalue\n");  
  fprintf(ofp,"--------------------------------------------------------------------------\n");

  for(i=0;i<hs->len;i++) {
    bits = Score2Bits(hs->ds[i]->score);
    if( bits < search_cutoff ) {
      break;
    }

    if( fitted_res == TRUE && evalue_search_str != NULL ) {
      if( hs->ds[i]->evalue > evalue_search_cutoff ) 
	break;
    }

    if( fitted_res == TRUE) 
      fprintf(ofp,"Protein %-20sDNA [%c] %-24s %.2f %.2g\n",hs->ds[i]->query->name,hs->ds[i]->target->is_reversed == TRUE ? '-' : '+',hs->ds[i]->target->name,bits,hs->ds[i]->evalue);
    else
      fprintf(ofp,"Protein %-20sDNA [%c] %-24s %.2f\n",hs->ds[i]->query->name,hs->ds[i]->target->is_reversed == TRUE ? '-' : '+',hs->ds[i]->target->name,bits);

  }

  if( search_mode == PC_SEARCH_S2DB && show_histogram == TRUE ) {
    fprintf(ofp,"\n\n#Histogram\n");
    fprintf(ofp,"-----------------------------------------------------------------------\n");
    PrintASCIIHistogram(hs->his,ofp);
  }

  fprintf(ofp,"\n\n#Alignments\n");
  fprintf(ofp,"-----------------------------------------------------------------------\n");

  for(i=0;i<hs->len;i++) {
    bits = Score2Bits(hs->ds[i]->score);
    if( bits < search_cutoff ) {
      break;
    }
    if( i >= aln_number ) {
      break;
    }

    if( fitted_res == TRUE && evalue_search_str != NULL ) {
      if( hs->ds[i]->evalue > evalue_search_cutoff ) 
	break;
    }

    
    fprintf(ofp,"\n\n>Results for %s vs %s (%s) [%d]\n",hs->ds[i]->query->name,hs->ds[i]->target->name,hs->ds[i]->target->is_reversed == TRUE ? "reverse" : "forward",i+1 );

    cdna = get_cDNA_from_cDNADB(cdb,hs->ds[i]->target);
    temptsm = indexed_ThreeStateModel_ThreeStateDB(tsmdb,hs->ds[i]->query);


    alb = AlnBlock_from_TSM_estwise_wrap(temptsm,cdna,cps,cm,ct,rmd,aln_alg,use_syn,allN,flat_insert,dpri,&pal);

    if( alb == NULL ) {
      warn("Got a NULL alignment. Exiting now due to presumed problems");
      fprintf(ofp,"\n\n*Got a NULL alignment. Exiting now due to presumed problems*\n\n");
      return FALSE;
    }


 
    if( use_single_pro == FALSE) 
      mss = MatchSummarySet_from_AlnBlock_genewise(alb,temptsm->name,1,cdna->baseseq);
    else
      mss = MatchSummarySet_from_AlnBlock_genewise(alb,pro->baseseq->name,pro->baseseq->offset,cdna->baseseq);

    
    if( show_pretty == TRUE ) {

      fprintf(ofp,"\n%s output\nScore %4.2f bits over entire alignment.\nThis will be different from per-alignment scores. See manual for details\nFor computer parsable output, try %s -help or read the manual\n",program_name,Score2Bits(pal->score),program_name);
      
      if( use_syn == FALSE ) {
	fprintf(ofp,"Scores as bits over a flat simple random model\n\n");
      } else {
	fprintf(ofp,"Scores as bits over a synchronous coding model\n\n");
      }
      
      ps = pseudo_Protein_from_ThreeStateModel(temptsm);
      protcdna_ascii_display(alb,ps->baseseq->seq,ps->baseseq->name,ps->baseseq->offset,cdna,ct,15,main_block,TRUE,ofp);

      
      free_Protein(ps);

      fprintf(ofp,"%s\n",divide_str);
      
    }

    if( show_match_sum == TRUE ) {
      show_MatchSummary_genewise_header(ofp);
      show_MatchSummarySet_genewise(mss,ofp);
      fprintf(ofp,"%s\n",divide_str);
    }
    

    if( show_pep == TRUE ) {
      alt = alb->start;
      for(;alt != NULL;) {
	trans = Protein_from_GeneWise_AlnColumn(cdna->baseseq,alt,1,&alt,ct,is_random_AlnColumn_genewise);
	if ( trans == NULL ) 
	  break;
	write_fasta_Sequence(trans->baseseq,ofp);
	free_Protein(trans);
      }
      fprintf(ofp,"%s\n",divide_str);
    }

    if( show_AlnBlock == TRUE ) {
      mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp);
      fprintf(ofp,"%s\n",divide_str);
    }
    
    if( show_PackAln == TRUE ) {
      show_simple_PackAln(pal,ofp);
      fprintf(ofp,"%s\n",divide_str);
    }

    /*
     * This goes at the end because it destroys the alb structure
     */

    if( make_anchored_aln == TRUE ) {
      /* attach sequence to als in alb, so we have it for later use */
      alb->seq[1]->data = (void *) cdna->baseseq;
      /* add to SequenceSet so we can destroy the memory */
      add_SequenceSet(set,hard_link_Sequence(cdna->baseseq));

      alist = split_AlnBlock(alb,is_random_AlnColumn_genewise);

      for(k=0;k<alist->len;k++) {
	/* actually produce the anchored alignment */
	/*mapped_ascii_AlnBlock(alist->alb[k],Score2Bits,stderr);*/
	add_to_anchored_AlnBlock(anchored,alist->alb[k]);

	/*	dump_ascii_AlnBlock(anchored,stderr);*/
      }
    }

    alb = free_AlnBlock(alb);
    pal = free_PackAln(pal);
    mss = free_MatchSummarySet(mss);
    cdna = free_cDNA(cdna);
    temptsm = free_ThreeStateModel(temptsm);

  }

  if( do_complete_analysis == TRUE ) {
    fprintf(ofp,"\n\n#Complete Analysis\n");
    fprintf(ofp,"-------------------------------------------------------------\n\n");
    
    /* ok - end of loop over relevant hits. If we have an
     * anchored alignment, print it out!
     */
    if( make_anchored_aln == TRUE ) {
      /*dump_ascii_AlnBlock(anchored,stderr);*/
      write_mul_estwise_AlnBlock(anchored,ct,ofp);
      fprintf(ofp,"%s\n",divide_str);
    }
  }


  return TRUE;
}
Ejemplo n.º 9
0
ThreeStateModel * read_TSM_ThreeStateDB(ThreeStateDB * mdb,int * return_status)
{
  ThreeStateModel * tsm;
  Protein * pro;
  Sequence * seq;

  if( mdb->hmm_model_end != -1 && mdb->current_no == mdb->hmm_model_end ) {
    *return_status = DB_RETURN_END;
    return NULL;
  }

  mdb->current_no++;

  switch( mdb->dbtype ) {

  case TSMDB_SINGLE :
    *return_status = DB_RETURN_END;
    if( mdb->single->rm == NULL ) {
      warn("Threestate model without an internal random model!");  
      mdb->single->rm = hard_link_RandomModel(mdb->rm);
    }

    return hard_link_ThreeStateModel(mdb->single);
  case TSMDB_HMMER1PFAM :
    tsm= read_next_TSM_PfamHmmer1DB(mdb->phdb,return_status);
    set_startend_policy_ThreeStateModel(tsm,mdb->type,30,0.2);
    return tsm;

  case TSMDB_PROTEIN :
    if( mdb->seq_cache != NULL ) {
      /* just after an open. Should actually use this sequence, and flush the cache */
      pro = Protein_from_Sequence(hard_link_Sequence(mdb->seq_cache));
      mdb->seq_cache = free_Sequence(mdb->seq_cache);
      *return_status = DB_RETURN_OK;
    } else {

      /* reload a sequence from a database */
      seq = reload_SequenceDB(NULL,mdb->sdb,return_status);

      /* exit now if error */
      if( *return_status == DB_RETURN_ERROR ) {
	return NULL; /* might have leaked memory. Ugh! */
      } 

      /* if we get NULL... for the moment, silent flag end */

      if( seq == NULL ) {
	*return_status = DB_RETURN_END;
	return NULL;
      }

      pro = Protein_from_Sequence(seq);
    }
    if( pro == NULL ) {
      warn("Could not convert sequence to a protein. Exiting!");
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }

    /* convert protein to threestatemodel */

    tsm = ThreeStateModel_from_half_bit_Sequence(pro,mdb->comp,mdb->rm,mdb->gap,mdb->ext);

    if( tsm == NULL ) {
      warn("Could not convert protein to threestatemode. Exiting!");
      free_Protein(pro);
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }

    /* DB status already set by seqdb */
    return tsm;
  case TSMDB_GENERIC :
    tsm =  ((*mdb->reload_generic)(mdb,return_status));
    if( tsm == NULL ) {
      return NULL; /* means end of database */
    }
    set_startend_policy_ThreeStateModel(tsm,mdb->type,30,0.2);
    return tsm;

  default :
    warn("Got an unrecognisable tsm db type in read-load");
    return NULL;
  }


}
Ejemplo n.º 10
0
ThreeStateModel * indexed_ThreeStateModel_ThreeStateDB(ThreeStateDB * mdb,DataEntry * en)
{
  Sequence * seq;
  Protein * pro;
  ThreeStateModel * tsm;


  switch(mdb->dbtype) {
  case TSMDB_SINGLE :
    return hard_link_ThreeStateModel(mdb->single);
  case TSMDB_HMMER1PFAM :
    tsm = ThreeStateModel_from_name_PfamHmmer1DB(mdb->phdb,en->name);
    set_startend_policy_ThreeStateModel(tsm,mdb->type,30,0.2);
    return tsm;

  case TSMDB_PROTEIN :
    seq = get_Sequence_from_SequenceDB(mdb->sdb,en);
    if( seq == NULL ) {
      warn("could not retrieve %s as a sequence from database",en->name);
      return NULL;
    }

    pro = Protein_from_Sequence(seq);

    if( pro == NULL ) {
      warn("Could not convert sequence to a protein. Exiting!");
      return NULL;
    }

    /* convert protein to threestatemodel */

    tsm = ThreeStateModel_from_half_bit_Sequence(pro,mdb->comp,mdb->rm,mdb->gap,mdb->ext);

    if( tsm == NULL ) {
      warn("Could not convert protein to threestatemode. Exiting!");
      free_Protein(pro);
      return NULL;
    }

    free_Protein(pro);
    /* DB status already set by seqdb */
    return tsm;
  case TSMDB_GENERIC :
    tsm = ((*mdb->index_generic)(mdb,en));
    if( tsm == NULL ) {
      return NULL;
    }
    /*   fprintf(stdout,"Setting %d as policy\n",mdb->type); */
    set_startend_policy_ThreeStateModel(tsm,mdb->type,30,0.2);

    return tsm;
    
  default : 
    warn("Unknown threestatedb type");
    return NULL;
  }

  warn("Should never get here - in threestatedb reload!");

  return NULL;

}
Ejemplo n.º 11
0
void show_verbose_evo(AlnBlock * alb,ThreeStateModel * tsm,Sequence * ref,Sequence * diff,CodonTable * ct,FILE * ofp)
{
  AlnColumn * alc;
  Protein * hmmp;

  Sequence * ref_trans;
  Sequence * diff_trans;

  DnaProbMatrix * negative_dm;
  DnaProbMatrix * pseudo_dm;
  
  int i;
  int count = 0;
  double est_mutation = 0.0;

  int dna_offset;

  Score total_pseudo = 0;
  Score total_neg = 0;
  Score pseudo = 0;
  Score neg = 0;

  int count_ref_positive = 0;
  int count_ref_negative = 0; 

  int count_ref_negative_0_5   = 0;
  int count_ref_negative_5_10  = 0;
  int count_ref_negative_10_15 = 0;

  int syn_sites = 0;
  int nonsyn_sites = 0;

  int syn_changes = 0;
  int nonsyn_changes = 0;

  int diff_score;

  char diff_aa;
  char ref_aa;

  int score_ratio = 0;
  Score score_neg_5  = Probability2Score(Bits2Probability(-5.0));
  Score score_neg_10 = Probability2Score(Bits2Probability(-10.0));


  int k;

  for(i=0;i<ref->len;i+=3) {

    /* if this has changed, then it is definitely non syn */
    if( aminoacid_from_seq(ct,ref->seq+i) != aminoacid_from_seq(ct,diff->seq+i)) {
      for(k=0;k<3;k++) {
	if( ref->seq[i+k] != diff->seq[i+k] ) {
	  nonsyn_changes++;
	}
      }
    } else {
      /* could still be syn change */
      for(k=0;k<3;k++) {
	if( ref->seq[i+k] != diff->seq[i+k] ) {
	  syn_changes++;
	}
      }
    }

    /* calculate the sites. There is always 2 non syn sites */

    nonsyn_sites += 2;

    if( four_fold_sites_CodonTable(ct,ref->seq+i) > 0 ) {
      syn_sites++;
    } else {
      nonsyn_sites += 1;
    } 
  }

  for(i=0;i<ref->len;i++) {
    if( ref->seq[i] != diff->seq[i] ) {
      count++;
    }
  }


  est_mutation = (double)count / (double)ref->len;


  pseudo_dm = DnaProbMatrix_from_match(1.0 - est_mutation,NMaskType_BANNED);
  negative_dm = DnaProbMatrix_from_match(1.0 - (est_mutation*2),NMaskType_BANNED);


  ref_trans = translate_Sequence(ref,ct);
  diff_trans = translate_Sequence(diff,ct);
  
  hmmp = pseudo_Protein_from_ThreeStateModel(tsm);

  for(alc=alb->start;alc != NULL;alc = alc->next) {
    /*    fprintf(stdout,"In position %s\n",alc->alu[0]->text_label); */
    if( strcmp(alc->alu[0]->text_label,"SEQUENCE") == 0 &&
	strcmp(alc->alu[1]->text_label,"SEQUENCE") == 0 ) {
      dna_offset = alc->alu[1]->end*3;

      pseudo = 	      logl_pseudogene(ref->seq+dna_offset,diff->seq+dna_offset,pseudo_dm);
      neg = 	      logl_negative_selection(ref->seq+dna_offset,diff->seq+dna_offset,tsm->unit[alc->alu[0]->end],ct,
					      pseudo_dm);

      /*
      fprintf(ofp,"Position %d [%c], vs %d [%c,%c] Scores Negative %d, Pseudo %d\n",
	      alc->alu[0]->end,hmmp->baseseq->seq[alc->alu[0]->end],
	      alc->alu[1]->end,ref_trans->seq[alc->alu[1]->end],diff_trans->seq[alc->alu[1]->end],
	      neg,
	      pseudo
	      );
      */

      ref_aa = ref_trans->seq[alc->alu[1]->end];
      diff_aa = diff_trans->seq[alc->alu[1]->end]; 
      if( ref_aa != diff_aa  ) {
	score_ratio += Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']);

	diff_score = Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']);
 
	if( diff_score < 0) {
	  count_ref_negative++;
	  if( diff_score > score_neg_5 ) {
	    count_ref_negative_0_5++;
	  } else if ( diff_score > score_neg_10 ) {
	    count_ref_negative_5_10++;
	  } else {
	    count_ref_negative_10_15++;
	  }
	} else {
	  count_ref_positive++;
	}

      }

      total_pseudo += pseudo;
      total_neg += neg;
    }
  }

  fprintf(ofp,"%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\n",ref->name,hmmp->baseseq->name,Score2Bits(score_ratio),
	  count_ref_positive,count_ref_negative,
	  count_ref_negative_0_5,
	  count_ref_negative_5_10,
	  count_ref_negative_10_15);


  /*
  fprintf(ofp,"%s,%s Total Pseudo %d vs Negative %d, Ratio %.4f  Positive %d Negative %d Score %.2f Syn %d Changes %d NonSyn %d Changes %d\n",ref->name,hmmp->baseseq->name,total_pseudo,total_neg,Score2Bits(total_neg-total_pseudo),count_ref_positive,count_ref_negative,Score2Bits(score_ratio),syn_sites,syn_changes,nonsyn_sites,nonsyn_changes);
  */

  free_Protein(hmmp);
	      
}