示例#1
0
Score logl_positive_selection(char * ref,char * diff,RandomModel * rm,CodonTable *ct,DnaProbMatrix * dm)
{
  int i;
  Score s = 0;
  char ref_aa;
  char diff_aa;

  /* we have to assess this position having changed */
  for(i=0;i<3;i++) {
    s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]);
  }

  /* if the position has not changed, then we know it would have been selected */


  ref_aa  = aminoacid_from_seq(ct,ref);
  diff_aa = aminoacid_from_seq(ct,diff);

  if( ref_aa == diff_aa ) {
    return s;
  }

  /* else we add the probability of seeing this amino acid*/

  s += Probability2Score(rm->aminoacid[diff_aa-'A']);

  return s;  
}
示例#2
0
SyExonScoreUnit * SyExonScoreUnit_from_SyExon(SyExon * sye)
{
  SyExonScoreUnit * out;

  out = SyExonScoreUnit_alloc();

  out->exit_score = Probability2Score(sye->exit_prob);
  out->stay_score = Probability2Score(sye->stay_prob);
  
  return out;
}
示例#3
0
Score * Probability2Score_move(Probability * from,Score * to,int len)
{
  register int i;

  for(i=0;i<len;i++)
    to[i] = Probability2Score(from[i]);

  return to;
}
示例#4
0
Score logl_pseudogene(char * ref,char * diff,DnaProbMatrix * dm)
{
  int i;
  Score s = 0;

  for(i=0;i<3;i++) {
    s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]);
  }

  return s;
}
ComplexConsensusWord * ComplexConsensusWord_from_string_and_prob(char * string,Probability p)
{
  ComplexConsensusWord * out;

  out = ComplexConsensusWord_alloc();

  out->pattern = stringalloc(string);
  out->p = p;
  out->score = Probability2Score(p);

  return out;
}
示例#6
0
RandomModelDNAScore * folded_RandomModelDNAScore_from_2RMD(RandomModelDNA * dis,RandomModelDNA * rnd)
{
  int i;
  RandomModelDNAScore * out;

  out = RandomModelDNAScore_alloc();

  for(i=0;i<5;i++)
    out->base[i]= Probability2Score(dis->base[i]/rnd->base[i]);

  return out;
}
示例#7
0
CompMat * CompMat_from_halfbit(CompMat * cm)
{
  CompMat * out;
  int i,j;

  out = CompMat_alloc();
  for(i=0;i<26;i++) 
    for(j=0;j<26;j++)
      out->comp[i][j] = Probability2Score(halfbit2Probability(cm->comp[i][j]));

  return out;
}
示例#8
0
GenePhaseScore * GenePhaseScore_from_GenePhaseModel(GenePhaseModel * gpm)
{
  int i;
  GenePhaseScore * out;

  assert(gpm != NULL);
  assert(gpm->gw != NULL);
  assert(gpm->gw->len == gpm->len);

  out = GenePhaseScore_alloc_len(gpm->len);
  out->gws = GeneWiseScore_from_GeneWise(gpm->gw);

  for(i=0;i<gpm->len;i++) {
    add_GenePhaseScore(out,GenePhaseSegScore_alloc());
    out->phase[i]->intron_0  = Probability2Score(gpm->phase[i]->intron_0);
    out->phase[i]->intron_1  = Probability2Score(gpm->phase[i]->intron_1);
    out->phase[i]->intron_2  = Probability2Score(gpm->phase[i]->intron_2);
  }

  return out;

}
示例#9
0
    CodonMatrixScore * CodonMatrixScore_from_CodonMatrix(CodonMatrix * cm)
    {
        int i,j;
        CodonMatrixScore * out;

        out = CodonMatrixScore_alloc();

        for(i=0; i<125; i++)
            for(j=0; j<125; j++)
                out->score[i][j] = Probability2Score(cm->prob[i][j]);

        return out;
    }
示例#10
0
    DnaProfileMatchPair * DnaProfileMatchPair_from_DnaProfile(DnaProfile * query,DnaProfile * target,DnaProfileEnginePara * dpep)
    {
        DnaProfileMatchPair * out;
        DnaProfileScore * query_s;
        DnaProfileScore * target_s;
        DnaProfileMatchScore * match;

        PackAln * pal;


        assert(query != NULL);
        assert(target != NULL);
        /*
        assert(query->len > 4 );
        assert(target->len > 4);
        */

        out = DnaProfileMatchPair_alloc();
        out->query = hard_link_DnaProfile(query);
        out->target = hard_link_DnaProfile(target);


        query_s  = DnaProfileScore_from_DnaProfile(query);
        target_s = DnaProfileScore_from_DnaProfile(target);

        fprintf(stderr,"Matching %d to %d\n",query->len,target->len);

        match= new_ALLR_DnaProfileMatchScore(query,target);



        pal = PackAln_bestmemory_DnaProfileMat(query_s,target_s,match,Probability2Score(dpep->open_unmatched),Probability2Score(dpep->ext_unmatched),Probability2Score(dpep->gap_unmatched),NULL,dpep->dpri);

        fprintf(stderr,"...Made pal %d\n",pal);
        out->alb = convert_PackAln_to_AlnBlock_DnaProfileMat(pal);
        out->score = pal->score;

        fprintf(stderr,"...freeing pal\n");
        free_PackAln(pal);


        fprintf(stderr,"...freeing match\n");
        free_DnaProfileMatchScore(match);
        fprintf(stderr,"...freeing query\n");
        free_DnaProfileScore(query_s);
        fprintf(stderr,"...freeing target\n");
        free_DnaProfileScore(target_s);


        return out;
    }
示例#11
0
MotifMatrixScore * MotifMatrixScore_from_MotifMatrixPara(MotifMatrixPara * mmp)
{
  MotifMatrixScore * out;
  DnaProbMatrix * dmp;

  out = MotifMatrixScore_alloc();

  dmp = DnaProbMatrix_from_match(mmp->comp_in_match,NMaskType_BANNED);  
  assert(dmp);
  flat_null_DnaProbMatrix(dmp);  

  out->comp_in_motif = DnaMatrix_from_DnaProbMatrix(dmp);
  free_DnaProbMatrix(dmp);

  dmp = DnaProbMatrix_from_match(mmp->comp_out_match,NMaskType_BANNED);  
  assert(dmp);
  flat_null_DnaProbMatrix(dmp);  

  out->comp_out_motif = DnaMatrix_from_DnaProbMatrix(dmp);
  free_DnaProbMatrix(dmp);


  dmp = DnaProbMatrix_from_match(mmp->comp_spacer,NMaskType_BANNED);  
  assert(dmp);
  flat_null_DnaProbMatrix(dmp);  

  out->comp_spacer = DnaMatrix_from_DnaProbMatrix(dmp);
  free_DnaProbMatrix(dmp);

  out->region_in       = Probability2Score(mmp->region_in);
  out->motif_indel     = Probability2Score(mmp->motif_indel);
  out->cons_indel      = Probability2Score(mmp->cons_indel);
  out->spacer_indel    = Probability2Score(mmp->spacer_indel);
  out->spacer_to_cons  = Probability2Score(mmp->spacer_to_cons);
  out->spacer_to_motif = Probability2Score(mmp->spacer_to_motif);
  out->spacer_duration = Probability2Score(mmp->spacer_duration);
  out->motif_duration  = Probability2Score(mmp->motif_duration);
  out->cons_duration   = Probability2Score(mmp->cons_duration);


  return out;

}
示例#12
0
RandomModelScoreaa * RandomModelScoreaa_from_RandomModel(RandomModel * rm)
{
  register int i;
  RandomModelScoreaa * out;

  out = RandomModelScoreaa_alloc();
  if( out == NULL )
    return NULL;

  for(i=0;i<26;i++) 
    out->aminoacid[i] = Probability2Score(rm->aminoacid[i]);

  return out;
}
示例#13
0
Score logl_negative_selection(char * ref,char * diff,ThreeStateUnit * unit,CodonTable * ct,DnaProbMatrix * dm)
{
  int i;
  Score s = 0;
  char ref_aa;
  char diff_aa;

  /* we have to assess this position having changed */
  for(i=0;i<3;i++) {
    s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]);
  }

  /* if the position has not changed, then we know it could not have been selected */


  ref_aa  = aminoacid_from_seq(ct,ref);
  diff_aa = aminoacid_from_seq(ct,diff);

  if( ref_aa == diff_aa ) {
    return s;
  }

  /* else we add the difference in probability between the two amino acids */
  /*
  fprintf(stdout,"%c vs %c has %d plays %d for total of %d\n",ref_aa,diff_aa,
	  Probability2Score(unit->match_emission[ref_aa-'A']),
	  Probability2Score(unit->match_emission[diff_aa-'A']),
	  Probability2Score(unit->match_emission[diff_aa-'A'])  - Probability2Score(unit->match_emission[ref_aa-'A'])
	  );
  */


  s += Probability2Score(unit->match_emission[diff_aa-'A'])  - Probability2Score(unit->match_emission[ref_aa-'A']);

  return s;
}
示例#14
0
CompMat * CompMat_from_CompProb(CompProb * cp)
{
  int i,j;
  CompMat * cm;

  cm = CompMat_alloc();

  for(i=0;i<26;i++) {
    for(j=0;j<26;j++) {
      cm->comp[i][j] = Probability2Score(cp->comp[i][j]);
    }
  }


  return cm;
}
示例#15
0
RandomModelDNAScore * RandomModelDNAScore_from_RandomModelDNA(RandomModelDNA * rmd)
{
  RandomModelDNAScore * out;
  register int i;

  out = RandomModelDNAScore_alloc();
  if( out == NULL )
    return NULL;

  for(i=0;i<5;i++) {
    out->base[i] = Probability2Score(rmd->base[i]);
  }


  return out;
}
示例#16
0
SpliceSiteModel * new_SpliceSiteModel(int offset,int pre_length,int post_length,int start,int stop,ComplexConsensi * cc,RandomModelDNAScore * rmds,Probability error)
{
  SpliceSiteModel * out;

  out = SpliceSiteModel_alloc();
  if( out == NULL )
    return NULL;

  out->offset = offset;
  out->pre_splice_site = pre_length;
  out->post_splice_site = post_length;
  out->start_random = start;
  out->stop_random  = stop;
  out->cc  = hard_link_ComplexConsensi(cc);
  out->rmds = hard_link_RandomModelDNAScore(rmds);
  out->error_pos = Probability2Score(error);

  return out;
}
示例#17
0
int main(int argc,char ** argv)
{
  int i;

  DPRunImpl * dpri = NULL;
  GeneModelParam * gmp = NULL;
  GeneModel * gm = NULL;

  Sequence * seq;

  RandomCodon * rc;
  RandomModelDNA * rmd;
  RandomCodonScore * rcs;


  ComplexSequenceEval * splice5;
  ComplexSequenceEval * splice3;
  ComplexSequenceEvalSet * cses;
  ComplexSequence * cseq;


  SyExonScore * exonscore;

  PackAln * pal;
  AlnBlock * alb;

  Genomic * genomic;
  GenomicRegion * gr;
  Protein * trans;

  dpri = new_DPRunImpl_from_argv(&argc,argv);
  if( dpri == NULL ) {
    fatal("Unable to build DPRun implementation. Bad arguments");
  }

  gmp = new_GeneModelParam_from_argv(&argc,argv);

  ct= read_CodonTable_file("codon.table");

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 2 ) {
    show_help(stdout);
    exit(12);
  }

  
  if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) {
    fatal("Could not build gene model");
  }


  seq = read_fasta_file_Sequence(argv[1]);
  
  assert(seq);

  cses = new_ComplexSequenceEvalSet_from_GeneModel(gm);

  cseq = new_ComplexSequence(seq,cses);

  rc = flat_RandomCodon(ct);
  rmd = RandomModelDNA_std();

  fold_in_RandomModelDNA_into_RandomCodon(rc,rmd);
  rcs = RandomCodonScore_from_RandomCodon(rc);

  exonscore = SyExonScore_flat_model(200,250,0.1,0.1);
  /*
  for(i=0;i<cseq->length;i++) {
    fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]);
  }
  exit(0);
  */
/*
  show_RandomCodonScore(rcs,stdout);


  for(i=3;i<seq->len;i++) {
    fprintf(stdout,"seq %d is %c with score %d\n",i,aminoacid_from_seq(ct,seq->seq+i-2),rcs->codon[CSEQ_GENOMIC_CODON(cseq,i)]);
  }

  exit(0);
*/

  pal = PackAln_bestmemory_StatWise10(exonscore,cseq,rcs,Probability2Score(1.0/10.0),Probability2Score(1.0/10.0),NULL,dpri);
  alb = convert_PackAln_to_AlnBlock_StatWise10(pal);

  mapped_ascii_AlnBlock(alb,id,1,stdout);

  genomic = Genomic_from_Sequence(seq);
  gr = new_GenomicRegion(genomic);

  add_Genes_to_GenomicRegion_GeneWise(gr,1,seq->len,alb,"bollocks",0,NULL);


  for(i=0;i<gr->len;i++) {
    if( gr->gene[i]->ispseudo == TRUE ) {
      fprintf(stdout,"#Gene %d is a pseudo gene - no translation possible\n",i);
    } else {
      trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
      write_fasta_Sequence(trans->baseseq,stdout);
    }
  } 


  
  return 0;
}
示例#18
0
void show_verbose_evo(AlnBlock * alb,ThreeStateModel * tsm,Sequence * ref,Sequence * diff,CodonTable * ct,FILE * ofp)
{
  AlnColumn * alc;
  Protein * hmmp;

  Sequence * ref_trans;
  Sequence * diff_trans;

  DnaProbMatrix * negative_dm;
  DnaProbMatrix * pseudo_dm;
  
  int i;
  int count = 0;
  double est_mutation = 0.0;

  int dna_offset;

  Score total_pseudo = 0;
  Score total_neg = 0;
  Score pseudo = 0;
  Score neg = 0;

  int count_ref_positive = 0;
  int count_ref_negative = 0; 

  int count_ref_negative_0_5   = 0;
  int count_ref_negative_5_10  = 0;
  int count_ref_negative_10_15 = 0;

  int syn_sites = 0;
  int nonsyn_sites = 0;

  int syn_changes = 0;
  int nonsyn_changes = 0;

  int diff_score;

  char diff_aa;
  char ref_aa;

  int score_ratio = 0;
  Score score_neg_5  = Probability2Score(Bits2Probability(-5.0));
  Score score_neg_10 = Probability2Score(Bits2Probability(-10.0));


  int k;

  for(i=0;i<ref->len;i+=3) {

    /* if this has changed, then it is definitely non syn */
    if( aminoacid_from_seq(ct,ref->seq+i) != aminoacid_from_seq(ct,diff->seq+i)) {
      for(k=0;k<3;k++) {
	if( ref->seq[i+k] != diff->seq[i+k] ) {
	  nonsyn_changes++;
	}
      }
    } else {
      /* could still be syn change */
      for(k=0;k<3;k++) {
	if( ref->seq[i+k] != diff->seq[i+k] ) {
	  syn_changes++;
	}
      }
    }

    /* calculate the sites. There is always 2 non syn sites */

    nonsyn_sites += 2;

    if( four_fold_sites_CodonTable(ct,ref->seq+i) > 0 ) {
      syn_sites++;
    } else {
      nonsyn_sites += 1;
    } 
  }

  for(i=0;i<ref->len;i++) {
    if( ref->seq[i] != diff->seq[i] ) {
      count++;
    }
  }


  est_mutation = (double)count / (double)ref->len;


  pseudo_dm = DnaProbMatrix_from_match(1.0 - est_mutation,NMaskType_BANNED);
  negative_dm = DnaProbMatrix_from_match(1.0 - (est_mutation*2),NMaskType_BANNED);


  ref_trans = translate_Sequence(ref,ct);
  diff_trans = translate_Sequence(diff,ct);
  
  hmmp = pseudo_Protein_from_ThreeStateModel(tsm);

  for(alc=alb->start;alc != NULL;alc = alc->next) {
    /*    fprintf(stdout,"In position %s\n",alc->alu[0]->text_label); */
    if( strcmp(alc->alu[0]->text_label,"SEQUENCE") == 0 &&
	strcmp(alc->alu[1]->text_label,"SEQUENCE") == 0 ) {
      dna_offset = alc->alu[1]->end*3;

      pseudo = 	      logl_pseudogene(ref->seq+dna_offset,diff->seq+dna_offset,pseudo_dm);
      neg = 	      logl_negative_selection(ref->seq+dna_offset,diff->seq+dna_offset,tsm->unit[alc->alu[0]->end],ct,
					      pseudo_dm);

      /*
      fprintf(ofp,"Position %d [%c], vs %d [%c,%c] Scores Negative %d, Pseudo %d\n",
	      alc->alu[0]->end,hmmp->baseseq->seq[alc->alu[0]->end],
	      alc->alu[1]->end,ref_trans->seq[alc->alu[1]->end],diff_trans->seq[alc->alu[1]->end],
	      neg,
	      pseudo
	      );
      */

      ref_aa = ref_trans->seq[alc->alu[1]->end];
      diff_aa = diff_trans->seq[alc->alu[1]->end]; 
      if( ref_aa != diff_aa  ) {
	score_ratio += Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']);

	diff_score = Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[ref_aa-'A']) - Probability2Score(tsm->unit[alc->alu[0]->end]->match_emission[diff_aa-'A']);
 
	if( diff_score < 0) {
	  count_ref_negative++;
	  if( diff_score > score_neg_5 ) {
	    count_ref_negative_0_5++;
	  } else if ( diff_score > score_neg_10 ) {
	    count_ref_negative_5_10++;
	  } else {
	    count_ref_negative_10_15++;
	  }
	} else {
	  count_ref_positive++;
	}

      }

      total_pseudo += pseudo;
      total_neg += neg;
    }
  }

  fprintf(ofp,"%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\n",ref->name,hmmp->baseseq->name,Score2Bits(score_ratio),
	  count_ref_positive,count_ref_negative,
	  count_ref_negative_0_5,
	  count_ref_negative_5_10,
	  count_ref_negative_10_15);


  /*
  fprintf(ofp,"%s,%s Total Pseudo %d vs Negative %d, Ratio %.4f  Positive %d Negative %d Score %.2f Syn %d Changes %d NonSyn %d Changes %d\n",ref->name,hmmp->baseseq->name,total_pseudo,total_neg,Score2Bits(total_neg-total_pseudo),count_ref_positive,count_ref_negative,Score2Bits(score_ratio),syn_sites,syn_changes,nonsyn_sites,nonsyn_changes);
  */

  free_Protein(hmmp);
	      
}
示例#19
0
Score Score_Probability_sum(Score one,Score two)
{
  return Probability2Score(Score2Probability(one) + Score2Probability(two));
}
示例#20
0
int main(int argc,char ** argv)
{
    int i;

    DPRunImpl * dpri = NULL;
    GeneModelParam * gmp = NULL;
    GeneModel * gm = NULL;

    FILE * ifp;
    SeqAlign   * al;
    PairBaseSeq * pbs;

    ComplexSequenceEval * splice5;
    ComplexSequenceEval * splice3;
    ComplexSequence * cseq;


    CompMat * score_mat;
    CompProb * comp_prob;
    RandomModel * rm;

    PairBaseCodonModelScore * codon_score;
    PairBaseModelScore* nonc_score;

    PairBaseCodonModelScore * start;
    PairBaseCodonModelScore * stop;


    SyExonScore * exonscore;

    PackAln * pal;
    AlnBlock * alb;

    Genomic * genomic;
    GenomicRegion * gr;
    GenomicRegion * gr2;
    Protein * trans;

    StandardOutputOptions * std_opt;
    ShowGenomicRegionOptions * sgro;

    char * dump_packaln = NULL;
    char * read_packaln = NULL;
    FILE * packifp = NULL;

    boolean show_trans    = 1;
    boolean show_gene_raw = 0;



    ct = read_CodonTable_file(codon_table);
    /*
      score_mat = read_Blast_file_CompMat("blosum62.bla");
      comp_prob = CompProb_from_halfbit(score_mat);
    */
    rm = default_RandomModel();

    comp_prob = read_Blast_file_CompProb("wag85");

    fold_column_RandomModel_CompProb(comp_prob,rm);

    dpri = new_DPRunImpl_from_argv(&argc,argv);
    if( dpri == NULL ) {
        fatal("Unable to build DPRun implementation. Bad arguments");
    }

    gmp = new_GeneModelParam_from_argv(&argc,argv);

    std_opt = new_StandardOutputOptions_from_argv(&argc,argv);
    sgro = new_ShowGenomicRegionOptions_from_argv(&argc,argv);


    dump_packaln = strip_out_assigned_argument(&argc,argv,"dump");
    read_packaln = strip_out_assigned_argument(&argc,argv,"recover");

    strip_out_standard_options(&argc,argv,show_help,show_version);
    if( argc != 2 ) {
        show_help(stdout);
        exit(12);
    }


    if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) {
        fatal("Could not build gene model");
    }

    codon_score = make_PairBaseCodonModelScore(comp_prob);
    nonc_score  = make_PairBaseModelScore();

    splice5 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice5score);
    splice3 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice3score);

    if((ifp = openfile(argv[1],"r")) == NULL ) {
        fatal("Could not open file %s",argv[1]);
    }

    al = read_fasta_SeqAlign(ifp);

    assert(al);
    assert(al->len == 2);
    assert(al->seq[0]->len > 0);
    assert(al->seq[1]->len > 0);

    /*  write_fasta_SeqAlign(al,stdout);*/


    pbs = new_PairBaseSeq_SeqAlign(al);

    if( read_packaln == NULL ) {
        cseq = ComplexSequence_from_PairBaseSeq(pbs,splice5,splice3);
    }

    start = make_start_PairBaseCodonModelScore(ct);
    stop  = make_stop_PairBaseCodonModelScore(ct);


    /*  show_PairBaseCodonModelScore(stop,ct,stdout); */

    /*
      for(i=0;i<pbs->anchor->len;i++) {
        printf("%3d  %c For %-6d %-6d %c Rev %-6d %-6d\n",i,pbs->anchor->seq[i],
    	   CSEQ_PAIR_5SS(cseq,i),CSEQ_PAIR_3SS(cseq,i),
    	   char_complement_base(pbs->anchor->seq[i]),
    	   CSEQ_REV_PAIR_5SS(cseq,i),CSEQ_REV_PAIR_3SS(cseq,i));
      }
    */


    /*  show_ComplexSequence(cseq,stdout);

    */


    exonscore = SyExonScore_flat_model(100,150,0.1,1.0);
    /*
    for(i=0;i<cseq->length;i++) {
      fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]);
    }
    exit(0);
    */

    if( read_packaln != NULL ) {
        packifp = openfile(read_packaln,"r");
        if( packifp == NULL ) {
            fatal("File %s is unopenable - ignoring dump command",dump_packaln);
        } else {
            pal = read_simple_PackAln(packifp);
        }
    } else {
        pal = PackAln_bestmemory_SyWise20(exonscore,cseq,codon_score,nonc_score,start,stop,Probability2Score(1.0/100.0),Probability2Score(1.0/10000.0),Probability2Score(1.0/10.0),NULL,dpri);
    }

    alb = convert_PackAln_to_AlnBlock_SyWise20(pal);


    if( dump_packaln != NULL ) {
        packifp = openfile(dump_packaln,"w");
        if( packifp == NULL ) {
            warn("File %s is unopenable - ignoring dump command",dump_packaln);
        } else {
            show_simple_PackAln(pal,packifp);
        }
    }

    show_score_sequence(alb,pbs,nonc_score,stdout);
    /*
      show_StandardOutputOptions(std_opt,alb,pal,"//",stdout);
    */
    genomic = Genomic_from_Sequence(al->seq[0]);
    gr = new_GenomicRegion(genomic);
    gr2 = new_GenomicRegion(genomic);

    add_Genes_to_GenomicRegion_new(gr,alb);


    show_GenomicRegionOptions(sgro,gr,ct,"//",stdout);

    return 0;
}
示例#21
0
Bits Probability2Bits(Probability p)
{
  return Score2Bits(Probability2Score(p));
}
示例#22
0
Score Probability_logsum(Score one,Score two)
{
  return Probability2Score(Score2Probability(one) + Score2Probability(two));
}
示例#23
0
GeneModel * GeneModel_from_GeneStats(GeneStats * gs,GeneModelParam * p)
{
  GeneModel * out;
  int i;
  double total;

  out = GeneModel_alloc();

  assert(gs);
  assert(gs->splice5);
  assert(gs->splice3);
  assert(gs->intron);
  assert(gs->rnd);
  

  for(i=0;i<64;i++) {
     out->codon[i] = gs->codon[i];
  }
  
  out->splice5 = pwmDNA_from_SeqAlign(gs->splice5,p->splice5_pseudo);
  
  /*  fprintf(stdout,"GS splice5 %d splice3 %d\n",gs->splice5,gs->splice3);*/

  fold_randommodel_pwmDNA(out->splice5,gs->rnd);

  out->splice5score = SpliceSiteScore_alloc();
  out->splice5score->score = pwmDNAScore_from_pwmDNA(out->splice5);
  out->splice5score->offset = gs->splice5_offset;
  out->splice5score->min_collar   = Probability2Score(Bits2Probability(p->min_collar));
  out->splice5score->max_collar   = Probability2Score(Bits2Probability(p->max_collar));
  out->splice5score->score_offset = Probability2Score(Bits2Probability(p->score_offset));
 

  out->splice3 = pwmDNA_from_SeqAlign(gs->splice3,p->splice3_pseudo);
  fold_randommodel_pwmDNA(out->splice3,gs->rnd);

  out->splice3score = SpliceSiteScore_alloc();
  out->splice3score->score = pwmDNAScore_from_pwmDNA(out->splice3);
  out->splice3score->offset = gs->splice3_offset;
  out->splice3score->min_collar   = Probability2Score(Bits2Probability(p->min_collar));
  out->splice3score->max_collar   = Probability2Score(Bits2Probability(p->max_collar));
  out->splice3score->score_offset = Probability2Score(Bits2Probability(p->score_offset));

  out->use_gtag_splice = p->use_gtag_splice;
  out->score_for_gtag  = Probability2Score(p->prob_for_gtag);

  out->intron  = RandomModelDNA_alloc();
  for(total = 0.0,i=0;i<4;i++)
    total += gs->intron->base[i] + p->intron_emission_pseudo;

  for(i=0;i<4;i++)
    out->intron->base[i] = (gs->intron->base[i] + p->intron_emission_pseudo)/total;

  out->intron->base[4] = 1.0;

  if( gs->polyp != NULL ) {
    out->polyp  = RandomModelDNA_alloc();
    for(total = 0.0,i=0;i<4;i++)
     total += gs->polyp->base[i] + p->polyp_emission_pseudo;

    for(i=0;i<4;i++)
       out->polyp->base[i] = (gs->polyp->base[i] + p->polyp_emission_pseudo)/total;
  }

  out->rnd = hard_link_RandomModelDNA(gs->rnd);
  return out;
}
示例#24
0
int main(int argc,char **argv)
{
  FiveStateFrameSet * frame;
  
  FiveStateModel * fsm;
  FiveStateScore * fss;

  RandomModel * rm;

  ProteinDB * proteindb;
  DBSearchImpl * dbsi;
  Hscore * hs;

  double gathering_cutoff = 0.0;
  double bits;
  int i;

  dbsi = new_DBSearchImpl_from_argv(&argc,argv);

  strip_out_float_argument(&argc,argv,"ga",&gathering_cutoff);

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 3 ) {
    show_help(stdout);
    exit(12);
  }

  rm = default_RandomModel();

  frame = read_FiveStateFrameSet_file(argv[1],"block.str");
  if( frame == NULL ) 
    fatal("Unable to make FiveStateModel from context %s, block.str file",argv[1]);
  fsm   = FiveStateModel_from_FiveStateFrameSet(frame);
  /*    dump_FiveStateModel(fsm,stdout); */
  fsm->name = stringalloc(argv[1]);
  
  fold_RandomModel_into_FiveStateModel(fsm,rm);  

  /* converts probabilities to integers for calculation */
  fss = FiveStateScore_from_FiveStateModel(fsm);
  

  proteindb = single_fasta_ProteinDB(argv[2]);

  if( proteindb== NULL )
    fatal("Unable to make proteindb from %s",argv[2]);


  hs = std_score_Hscore(Probability2Score(gathering_cutoff)-10,-1);


  search_FiveStateProtein(dbsi,hs,fss,proteindb);



  fprintf(stdout,"\n\n#High Score list\n");
  fprintf(stdout,"#Protein ID                 DNA Str  ID                        Bits Evalue\n");  
  fprintf(stdout,"--------------------------------------------------------------------------\n");

  for(i=0;i<hs->len;i++) {
    bits = Score2Bits(hs->ds[i]->score);
    if( bits < gathering_cutoff ) {
      break;
    }


    fprintf(stdout,"Protein %-20sDNA [%c] %-24s %.2f\n",hs->ds[i]->query->name,hs->ds[i]->target->is_reversed == TRUE ? '-' : '+',hs->ds[i]->target->name,bits);
  }


}
示例#25
0
int main(int argc,char ** argv)
{
  Sequence * cdna;
  Sequence * gen;
  Sequence * active_gen;
  Sequence * active_cdna;

  int i;
  int dstart = -1;
  int dend   = -1;

  int cstart = -1;
  int cend   = -1;

  CodonTable * ct = NULL;
  CodonMatrixScore * cm = NULL;
  RandomCodon * rndcodon = NULL;
  RandomCodonScore * rndcodonscore = NULL;
  DnaMatrix * dm   = NULL;

  DPRunImpl * dpri = NULL;
 
  GeneModel * gm;
  GeneModelParam * gmp;
  GeneStats * gs;
  GeneParser21 * gp21;
  GeneParser21Score * gp21s;
  GeneParser4Score * gp;


  ComplexSequenceEvalSet * cdna_cses;
  ComplexSequenceEvalSet * gen_cses;

  ComplexSequence * cs_cdna;
  ComplexSequence * cs_gen;
  
  Genomic * gent;
  GenomicRegion * gr;

  CompMat  * cmat;
  CompProb * cprob;
  char * matfile = "blosum62.bla";
  Protein * trans;

  PackAln * pal;
  AlnBlock * alb;

  FILE * ofp = stdout;

  dpri = new_DPRunImpl_from_argv(&argc,argv);
  gmp  = new_GeneModelParam_from_argv(&argc,argv);

  strip_out_integer_argument(&argc,argv,"u",&dstart);
  strip_out_integer_argument(&argc,argv,"v",&dend);

  strip_out_integer_argument(&argc,argv,"s",&cstart);
  strip_out_integer_argument(&argc,argv,"t",&cend);


  strip_out_standard_options(&argc,argv,show_help,show_version);


  ct = read_CodonTable_file(codon_file);

  cmat = read_Blast_file_CompMat(matfile);
  cprob = CompProb_from_halfbit(cmat);
  cm = naive_CodonMatrixScore_from_prob(ct,cprob);
  
  gm = GeneModel_from_GeneModelParam(gmp);

  cdna = read_fasta_file_Sequence(argv[1]);
  gen = read_fasta_file_Sequence(argv[2]);

  if( dstart != -1 || dend != -1 ) {
    if( dstart == -1 ) {
      dstart = 1;
    }
    if( dend == -1 ) {
      dend = gen->len;
    }
    active_gen = magic_trunc_Sequence(gen,dstart,dend);
  } else {
    active_gen = hard_link_Sequence(gen);
  }

  if( cstart != -1 || cend != -1 ) {
    if( cstart == -1 ) {
      cstart = 1;
    }
    if( cend == -1 ) {
      cend = gen->len;
    }
    active_cdna = magic_trunc_Sequence(gen,cstart,cend);
  } else {
    active_cdna = hard_link_Sequence(gen);
  }

  

  rndcodon = RandomCodon_from_raw_CodonFrequency(gm->codon,ct);
  fold_in_RandomModelDNA_into_RandomCodon(rndcodon,gm->rnd);

  rndcodonscore = RandomCodonScore_from_RandomCodon(rndcodon);

  assert(active_cdna);
  assert(active_gen);

  cdna_cses = default_cDNA_ComplexSequenceEvalSet();
  gen_cses  = new_ComplexSequenceEvalSet_from_GeneModel(gm);

  cs_cdna = new_ComplexSequence(active_cdna,cdna_cses);
  cs_gen  = new_ComplexSequence(active_gen,gen_cses);

  gp21 = std_GeneParser21();
  GeneParser21_fold_in_RandomModelDNA(gp21,gm->rnd);
  gp21s = GeneParser21Score_from_GeneParser21(gp21);
  gp = GeneParser4Score_from_GeneParser21Score(gp21s);
 
  dm = identity_DnaMatrix(Probability2Score(halfbit2Probability(1)),Probability2Score(halfbit2Probability(-1)));

  assert(cs_cdna);
  assert(cs_gen);
  assert(gp);
  assert(rndcodonscore);
  assert(dm);
  assert(dpri);
  
  /*  show_CodonMatrixScore(cm,ct,ofp);*/

  pal = PackAln_bestmemory_CdnaWise10(cs_cdna,cs_gen,gp,cm,rndcodonscore,dm,
				      Probability2Score(halfbit2Probability(-12)),
				      Probability2Score(halfbit2Probability(-2)),
				      Probability2Score(halfbit2Probability(-5)),
				      Probability2Score(halfbit2Probability(0)),
				      NULL,
				      dpri);


  alb = convert_PackAln_to_AlnBlock_CdnaWise10(pal);

  gent = Genomic_from_Sequence(gen);
  assert(gent);

  gr = new_GenomicRegion(gent);
  assert(gr);


  add_Genes_to_GenomicRegion_GeneWise(gr,active_gen->offset,active_gen->end,alb,cdna->name,0,NULL);
				      
  mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp);

  show_pretty_GenomicRegion(gr,0,ofp);

  for(i=0;i<gr->len;i++) {
    if( gr->gene[i]->ispseudo == TRUE ) {
	fprintf(ofp,"#Gene %d is a pseudo gene - no translation possible\n",i);
    } else {
      trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
      write_fasta_Sequence(trans->baseseq,ofp);
    }
  } 
 
}