Ejemplo n.º 1
0
boolean show_pretty_aln(void)
{
  Protein * ps;

  fprintf(ofp,"\n%s output\nScore %4.2f bits over entire alignment\n",program_name,Score2Bits(pal->score));
  
  
  if( alg == GWWRAP_2193L || alg == GWWRAP_2193) {
    fprintf(ofp,"Entrie alignment score contains unseen 'random' score segments\nYou should only use the per-alignments score printed below\nfor the bits score of the alignment\n\n");
  }

  if( use_syn == FALSE ) {
    fprintf(ofp,"Scores as bits over a flat simple random model\n\n");
  } else {
    fprintf(ofp,"Scores as bits over a synchronous coding model\n\n");
  }

  
  
  if( use_tsm == FALSE ) {
    fprintf(ofp,"Warning: The bits scores is not probablistically correct for single seqs\nSee WWW help for more info\n\n");

    protgene_ascii_display(alb,pro->baseseq->seq,pro->baseseq->name,pro->baseseq->offset,gen,ct,15,main_block,(alg == GWWRAP_623L || alg == GWWRAP_2193L || alg == GWWRAP_2193) ? TRUE : FALSE, ofp);
  } else {
    ps = pseudo_Protein_from_ThreeStateModel(tsm);
    protgene_ascii_display(alb,ps->baseseq->seq,ps->baseseq->name,ps->baseseq->offset,gen,ct,15,main_block,(alg == GWWRAP_623L || alg == GWWRAP_2193L || alg == GWWRAP_2193) ? TRUE : FALSE,ofp);
    free_Protein(ps);
  }
  fprintf(ofp,"%s\n",divide_str);

  return TRUE;
}
Ejemplo n.º 2
0
int main(int argc,char ** argv)
{
  int i;
  SequenceSet * in;
  Sequence * trans;
  ThreeStateDB * tsd;

  DPRunImpl * dpri;
  CodonTable * ct;

  int return_status;
  ThreeStateModel * tsm;
  ThreeStateScore * tss;
  Protein * hmmp;
  ComplexSequence * cs;
  ComplexSequenceEvalSet * cses;

  PackAln * pal;
  AlnBlock * alb;

  int show_align = 0;
  int show_alb   = 0;
  int show_verbose = 1;
  int show_trans = 0;

  ct = read_CodonTable_file("codon.table");

  cses = default_aminoacid_ComplexSequenceEvalSet();
  
  dpri = new_DPRunImpl_from_argv(&argc,argv);

  strip_out_boolean_def_argument(&argc,argv,"pretty",&show_align);

  strip_out_boolean_def_argument(&argc,argv,"alb",&show_alb);

  strip_out_boolean_def_argument(&argc,argv,"trans",&show_trans);

  if( argc != 3 ) {
    show_help(stdout);
    exit(63);
  }


  in = read_fasta_SequenceSet_file(argv[1]);

  tsd = HMMer2_ThreeStateDB(argv[2]);

  assert(in);
  assert(tsd);
  assert(in->len == 2);

  trans = translate_Sequence(in->set[0],ct);

  if( show_trans ) {
    write_fasta_Sequence(trans,stdout);
  }

  cs = new_ComplexSequence(trans,cses);

  open_ThreeStateDB(tsd);

  while( (tsm = read_TSM_ThreeStateDB(tsd,&return_status)) != NULL ) {
    fold_RandomModel_into_ThreeStateModel(tsm,tsm->rm);
    set_startend_policy_ThreeStateModel(tsm,TSM_local,10,1.0);

    tss = ThreeStateScore_from_ThreeStateModel(tsm);
    hmmp = pseudo_Protein_from_ThreeStateModel(tsm);

    pal = PackAln_bestmemory_ThreeStateLoop(tss,cs,NULL,dpri);
    alb = convert_PackAln_to_AlnBlock_ThreeStateLoop(pal);

    if( show_alb ) {
      show_flat_AlnBlock(alb,stdout);
    }

    if( show_align ) {
      write_pretty_seq_align(alb,hmmp->baseseq,trans,15,50,stdout);      
    }
    if( show_verbose ) {
      show_verbose_evo(alb,tsm,in->set[0],in->set[1],ct,stdout);
    }
      
  }
  

}
Ejemplo n.º 3
0
boolean show_output(void)
{
  int i,k;
  ThreeStateModel * temptsm;
  AlnBlock * alb;
  PackAln * pal;
  MatchSummarySet * mss;
  Protein * ps;
  cDNA * cdna;
  double bits;
  boolean fitted_res = FALSE;
  AlnBlockList * alist;
  AlnBlock * anchored;
  SequenceSet * set;
  AlnColumn * alt;
  Protein * trans;

  /* sort by bit score first */

  sort_Hscore_by_score(hs);

  if( search_mode == PC_SEARCH_S2DB ) {
    if( hs->his == NULL || hs->his->total < 1000 ) {
	info("Cannot fit histogram to a db smaller than 1,000");
	fprintf(ofp,"[Warning: Can't fit histogram to a db smaller than 1,000]\n\n");
	show_histogram = FALSE;
    } else {
      fitted_res = TRUE;
      fit_Hscore_to_EVD(hs,20);
    }
  }

  /* deal with initialising anchored alignment.
   * Could be done for either single HMMs or single proteins,
   * but we will only do it for HMMs at the moment
   */

  if( make_anchored_aln == TRUE ) {
    if( tsm == NULL ) {
      warn("Attempting to make an achored alignment without a HMM. impossible!");
      make_anchored_aln = FALSE;
    } else {
      anchored = single_unit_AlnBlock(tsm->len,"MATCH_STATE");
      set = SequenceSet_alloc_std();
   }
  }

  /* dofus catcher */
  if( aln_alg != alg ) {
    fprintf(ofp,"\n#\n#WARNING!\n#\n# Your alignment algorithm is different from your search algorithm.\n# This is probably quite sensible but will lead to differing scores.\n# Use the search score as an indicator of the significance of the match\n# Read the docs for more information\n#\n");
  }

  fprintf(ofp,"\n\n#High Score list\n");
  fprintf(ofp,"#Protein ID                 DNA Str  ID                        Bits Evalue\n");  
  fprintf(ofp,"--------------------------------------------------------------------------\n");

  for(i=0;i<hs->len;i++) {
    bits = Score2Bits(hs->ds[i]->score);
    if( bits < search_cutoff ) {
      break;
    }

    if( fitted_res == TRUE && evalue_search_str != NULL ) {
      if( hs->ds[i]->evalue > evalue_search_cutoff ) 
	break;
    }

    if( fitted_res == TRUE) 
      fprintf(ofp,"Protein %-20sDNA [%c] %-24s %.2f %.2g\n",hs->ds[i]->query->name,hs->ds[i]->target->is_reversed == TRUE ? '-' : '+',hs->ds[i]->target->name,bits,hs->ds[i]->evalue);
    else
      fprintf(ofp,"Protein %-20sDNA [%c] %-24s %.2f\n",hs->ds[i]->query->name,hs->ds[i]->target->is_reversed == TRUE ? '-' : '+',hs->ds[i]->target->name,bits);

  }

  if( search_mode == PC_SEARCH_S2DB && show_histogram == TRUE ) {
    fprintf(ofp,"\n\n#Histogram\n");
    fprintf(ofp,"-----------------------------------------------------------------------\n");
    PrintASCIIHistogram(hs->his,ofp);
  }

  fprintf(ofp,"\n\n#Alignments\n");
  fprintf(ofp,"-----------------------------------------------------------------------\n");

  for(i=0;i<hs->len;i++) {
    bits = Score2Bits(hs->ds[i]->score);
    if( bits < search_cutoff ) {
      break;
    }
    if( i >= aln_number ) {
      break;
    }

    if( fitted_res == TRUE && evalue_search_str != NULL ) {
      if( hs->ds[i]->evalue > evalue_search_cutoff ) 
	break;
    }

    
    fprintf(ofp,"\n\n>Results for %s vs %s (%s) [%d]\n",hs->ds[i]->query->name,hs->ds[i]->target->name,hs->ds[i]->target->is_reversed == TRUE ? "reverse" : "forward",i+1 );

    cdna = get_cDNA_from_cDNADB(cdb,hs->ds[i]->target);
    temptsm = indexed_ThreeStateModel_ThreeStateDB(tsmdb,hs->ds[i]->query);


    alb = AlnBlock_from_TSM_estwise_wrap(temptsm,cdna,cps,cm,ct,rmd,aln_alg,use_syn,allN,flat_insert,dpri,&pal);

    if( alb == NULL ) {
      warn("Got a NULL alignment. Exiting now due to presumed problems");
      fprintf(ofp,"\n\n*Got a NULL alignment. Exiting now due to presumed problems*\n\n");
      return FALSE;
    }


 
    if( use_single_pro == FALSE) 
      mss = MatchSummarySet_from_AlnBlock_genewise(alb,temptsm->name,1,cdna->baseseq);
    else
      mss = MatchSummarySet_from_AlnBlock_genewise(alb,pro->baseseq->name,pro->baseseq->offset,cdna->baseseq);

    
    if( show_pretty == TRUE ) {

      fprintf(ofp,"\n%s output\nScore %4.2f bits over entire alignment.\nThis will be different from per-alignment scores. See manual for details\nFor computer parsable output, try %s -help or read the manual\n",program_name,Score2Bits(pal->score),program_name);
      
      if( use_syn == FALSE ) {
	fprintf(ofp,"Scores as bits over a flat simple random model\n\n");
      } else {
	fprintf(ofp,"Scores as bits over a synchronous coding model\n\n");
      }
      
      ps = pseudo_Protein_from_ThreeStateModel(temptsm);
      protcdna_ascii_display(alb,ps->baseseq->seq,ps->baseseq->name,ps->baseseq->offset,cdna,ct,15,main_block,TRUE,ofp);

      
      free_Protein(ps);

      fprintf(ofp,"%s\n",divide_str);
      
    }

    if( show_match_sum == TRUE ) {
      show_MatchSummary_genewise_header(ofp);
      show_MatchSummarySet_genewise(mss,ofp);
      fprintf(ofp,"%s\n",divide_str);
    }
    

    if( show_pep == TRUE ) {
      alt = alb->start;
      for(;alt != NULL;) {
	trans = Protein_from_GeneWise_AlnColumn(cdna->baseseq,alt,1,&alt,ct,is_random_AlnColumn_genewise);
	if ( trans == NULL ) 
	  break;
	write_fasta_Sequence(trans->baseseq,ofp);
	free_Protein(trans);
      }
      fprintf(ofp,"%s\n",divide_str);
    }

    if( show_AlnBlock == TRUE ) {
      mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp);
      fprintf(ofp,"%s\n",divide_str);
    }
    
    if( show_PackAln == TRUE ) {
      show_simple_PackAln(pal,ofp);
      fprintf(ofp,"%s\n",divide_str);
    }

    /*
     * This goes at the end because it destroys the alb structure
     */

    if( make_anchored_aln == TRUE ) {
      /* attach sequence to als in alb, so we have it for later use */
      alb->seq[1]->data = (void *) cdna->baseseq;
      /* add to SequenceSet so we can destroy the memory */
      add_SequenceSet(set,hard_link_Sequence(cdna->baseseq));

      alist = split_AlnBlock(alb,is_random_AlnColumn_genewise);

      for(k=0;k<alist->len;k++) {
	/* actually produce the anchored alignment */
	/*mapped_ascii_AlnBlock(alist->alb[k],Score2Bits,stderr);*/
	add_to_anchored_AlnBlock(anchored,alist->alb[k]);

	/*	dump_ascii_AlnBlock(anchored,stderr);*/
      }
    }

    alb = free_AlnBlock(alb);
    pal = free_PackAln(pal);
    mss = free_MatchSummarySet(mss);
    cdna = free_cDNA(cdna);
    temptsm = free_ThreeStateModel(temptsm);

  }

  if( do_complete_analysis == TRUE ) {
    fprintf(ofp,"\n\n#Complete Analysis\n");
    fprintf(ofp,"-------------------------------------------------------------\n\n");
    
    /* ok - end of loop over relevant hits. If we have an
     * anchored alignment, print it out!
     */
    if( make_anchored_aln == TRUE ) {
      /*dump_ascii_AlnBlock(anchored,stderr);*/
      write_mul_estwise_AlnBlock(anchored,ct,ofp);
      fprintf(ofp,"%s\n",divide_str);
    }
  }


  return TRUE;
}
Ejemplo n.º 4
0
void show_verbose_evo(AlnBlock * alb,ThreeStateModel * tsm,Sequence * ref,Sequence * diff,CodonTable * ct,FILE * ofp)
{
  AlnColumn * alc;
  Protein * hmmp;

  Sequence * ref_trans;
  Sequence * diff_trans;

  DnaProbMatrix * negative_dm;
  DnaProbMatrix * pseudo_dm;
  
  int i;
  int count = 0;
  double est_mutation = 0.0;

  int dna_offset;

  Score total_pseudo = 0;
  Score total_neg = 0;
  Score pseudo = 0;
  Score neg = 0;

  int count_ref_positive = 0;
  int count_ref_negative = 0; 

  int count_ref_negative_0_5   = 0;
  int count_ref_negative_5_10  = 0;
  int count_ref_negative_10_15 = 0;

  int syn_sites = 0;
  int nonsyn_sites = 0;

  int syn_changes = 0;
  int nonsyn_changes = 0;

  int diff_score;

  char diff_aa;
  char ref_aa;

  int score_ratio = 0;
  Score score_neg_5  = Probability2Score(Bits2Probability(-5.0));
  Score score_neg_10 = Probability2Score(Bits2Probability(-10.0));


  int k;

  for(i=0;i<ref->len;i+=3) {

    /* if this has changed, then it is definitely non syn */
    if( aminoacid_from_seq(ct,ref->seq+i) != aminoacid_from_seq(ct,diff->seq+i)) {
      for(k=0;k<3;k++) {
	if( ref->seq[i+k] != diff->seq[i+k] ) {
	  nonsyn_changes++;
	}
      }
    } else {
      /* could still be syn change */
      for(k=0;k<3;k++) {
	if( ref->seq[i+k] != diff->seq[i+k] ) {
	  syn_changes++;
	}
      }
    }

    /* calculate the sites. There is always 2 non syn sites */

    nonsyn_sites += 2;

    if( four_fold_sites_CodonTable(ct,ref->seq+i) > 0 ) {
      syn_sites++;
    } else {
      nonsyn_sites += 1;
    } 
  }

  for(i=0;i<ref->len;i++) {
    if( ref->seq[i] != diff->seq[i] ) {
      count++;
    }
  }


  est_mutation = (double)count / (double)ref->len;


  pseudo_dm = DnaProbMatrix_from_match(1.0 - est_mutation,NMaskType_BANNED);
  negative_dm = DnaProbMatrix_from_match(1.0 - (est_mutation*2),NMaskType_BANNED);


  ref_trans = translate_Sequence(ref,ct);
  diff_trans = translate_Sequence(diff,ct);
  
  hmmp = pseudo_Protein_from_ThreeStateModel(tsm);

  for(alc=alb->start;alc != NULL;alc = alc->next) {
    /*    fprintf(stdout,"In position %s\n",alc->alu[0]->text_label); */
    if( strcmp(alc->alu[0]->text_label,"SEQUENCE") == 0 &&
	strcmp(alc->alu[1]->text_label,"SEQUENCE") == 0 ) {
      dna_offset = alc->alu[1]->end*3;

      pseudo = 	      logl_pseudogene(ref->seq+dna_offset,diff->seq+dna_offset,pseudo_dm);
      neg = 	      logl_negative_selection(ref->seq+dna_offset,diff->seq+dna_offset,tsm->unit[alc->alu[0]->end],ct,
					      pseudo_dm);

      /*
      fprintf(ofp,"Position %d [%c], vs %d [%c,%c] Scores Negative %d, Pseudo %d\n",
	      alc->alu[0]->end,hmmp->baseseq->seq[alc->alu[0]->end],
	      alc->alu[1]->end,ref_trans->seq[alc->alu[1]->end],diff_trans->seq[alc->alu[1]->end],
	      neg,
	      pseudo
	      );
      */

      ref_aa = ref_trans->seq[alc->alu[1]->end];
      diff_aa = diff_trans->seq[alc->alu[1]->end]; 
      if( ref_aa != diff_aa  ) {
	score_ratio += Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']);

	diff_score = Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']);
 
	if( diff_score < 0) {
	  count_ref_negative++;
	  if( diff_score > score_neg_5 ) {
	    count_ref_negative_0_5++;
	  } else if ( diff_score > score_neg_10 ) {
	    count_ref_negative_5_10++;
	  } else {
	    count_ref_negative_10_15++;
	  }
	} else {
	  count_ref_positive++;
	}

      }

      total_pseudo += pseudo;
      total_neg += neg;
    }
  }

  fprintf(ofp,"%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\n",ref->name,hmmp->baseseq->name,Score2Bits(score_ratio),
	  count_ref_positive,count_ref_negative,
	  count_ref_negative_0_5,
	  count_ref_negative_5_10,
	  count_ref_negative_10_15);


  /*
  fprintf(ofp,"%s,%s Total Pseudo %d vs Negative %d, Ratio %.4f  Positive %d Negative %d Score %.2f Syn %d Changes %d NonSyn %d Changes %d\n",ref->name,hmmp->baseseq->name,total_pseudo,total_neg,Score2Bits(total_neg-total_pseudo),count_ref_positive,count_ref_negative,Score2Bits(score_ratio),syn_sites,syn_changes,nonsyn_sites,nonsyn_changes);
  */

  free_Protein(hmmp);
	      
}