boolean build_alignment(void)
{
  GeneParameter21 * gpara;
  GeneModel * gm;

  if( pal_file != NULL ) {
    pal = read_simple_PackAln_file(pal_file);
  }


  if( use_new_stats == 0 ) {
    gpara = GeneParameter21_wrap(gf,subs_error,indel_error,rmd,model_codon,model_splice,use_tied_model,ct,rnd_loop,cds_loop,rnd_to_model,link_loop,link_to_model);
  } else {
    gm = GeneModel_from_GeneStats(gs,gmp);
    
    gpara= GeneParameter21_from_GeneModel(gm,ct,rnd_loop,cds_loop,rnd_to_model,link_loop,link_to_model,subs_error,indel_error);
  }
  

  if( gpara == NULL ) {
    warn("Sorry - could not build gene parameters. Must be a bug of some sort");
    return FALSE;
  }

  
  /* phased based proteins use a different source */

  if( alg == GWWRAP_623P ) {
    alb = AlnBlock_from_phased_protein_wrap(pro,tsm,gen,gpara->cm,rm,mat,ppp,gpara,dpri,&pal);
  } else {
    if( use_tsm == FALSE) {
      alb =  AlnBlock_from_protein_genewise_wrap(pro,gen,mat,-gap,-ext,gpara,rmd,rmd,alg,use_syn,rm,allN,startend,dpri,&pal,gwrp);
    } else {
      alb =  AlnBlock_from_TSM_genewise_wrap(tsm,gen,gpara,rmd,rmd,use_syn,alg,allN,flat_insert,dpri,&pal,NULL);
    }
  }

  free_GeneParameter21(gpara);



  if( alb == NULL )
    return FALSE;


  gr = new_GenomicRegion(gen);


  add_Genes_to_GenomicRegion_GeneWise(gr,gen->baseseq->offset,gen->baseseq->end,alb,use_tsm == TRUE ? tsm->name : pro->baseseq->name,pseudo,NULL);

  if( use_tsm == TRUE) 
    mss = MatchSummarySet_from_AlnBlock_genewise(alb,tsm->name,1,gen->baseseq);
  else
    mss = MatchSummarySet_from_AlnBlock_genewise(alb,pro->baseseq->name,pro->baseseq->offset,gen->baseseq);

  return TRUE;
}
Exemple #2
0
int main(int argc,char ** argv)
{
  Sequence   * gen;
  Genomic    * genomic;
  CodonTable * ct = NULL;
  GenomeEvidenceSet * ges = NULL;
  RandomCodonScore * rcs;
  FILE * ifp = NULL;
  ComplexSequence * cs = NULL;
  ComplexSequenceEvalSet * cses = NULL;
  AlnBlock * alb;
  PackAln * pal;
  GenomicRegion * gr;
  int i;
  Protein * trans;
  cDNA    * cdna;
  int kbyte                = 10000;
  int stop_codon_pen  = 200;
  int start_codon_pen = 30;
  int new_gene        = 5000;
  int switch_cost     = 100;
  int smell           = 8;
  DPRunImpl * dpri = NULL;
    
  EstEvidence * est;

  boolean show_trans = TRUE;
  boolean show_cdna  = FALSE;
  boolean show_genes = TRUE;
  boolean show_alb   = FALSE;
  boolean show_pal   = FALSE;
  boolean show_gff   = TRUE;
  boolean show_debug = FALSE;
  boolean show_geneu = TRUE;
  char * divide_string = "//";

  strip_out_boolean_def_argument(&argc,argv,"geneutr",&show_geneu);
  strip_out_boolean_def_argument(&argc,argv,"genes",&show_genes);
  strip_out_boolean_def_argument(&argc,argv,"trans",&show_trans);
  strip_out_boolean_def_argument(&argc,argv,"gff",&show_gff);
  strip_out_boolean_def_argument(&argc,argv,"alb",&show_alb);
  strip_out_boolean_def_argument(&argc,argv,"pal",&show_pal);
  strip_out_boolean_def_argument(&argc,argv,"debug",&show_debug);
  strip_out_boolean_def_argument(&argc,argv,"cdna",&show_cdna);
  strip_out_integer_argument(&argc,argv,"stop",&stop_codon_pen);
  strip_out_integer_argument(&argc,argv,"start",&start_codon_pen);
  strip_out_integer_argument(&argc,argv,"gene",&new_gene);
  strip_out_integer_argument(&argc,argv,"switch",&switch_cost);
  strip_out_integer_argument(&argc,argv,"smell",&smell);
  
  dpri = new_DPRunImpl_from_argv(&argc,argv);
  if( dpri == NULL ) {
    fatal("Unable to build DPRun implementation. Bad arguments");
  }


  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 3 ) {
    show_help(stdout);
    exit(12);
  }

    

  ct  = read_CodonTable_file("codon.table");
  gen = read_fasta_file_Sequence(argv[1]);
  ifp = openfile(argv[2],"r");
  ges = read_est_evidence(ifp,ct);

  for(i=0;i<ges->len;i++) {
    est = (EstEvidence *) ges->geu[i]->data;
    est->in_smell = smell;
  }


  rcs= RandomCodonScore_alloc();
  for(i=0;i<125;i++) {
    if( is_stop_codon(i,ct) ) {
      rcs->codon[i] = -1000000;
    } else {
      rcs->codon[i] = 0;
    }
    /*    fprintf(stderr,"Got %d for %d\n",rcs->codon[i],i); */
  }

 

  cses = default_genomic_ComplexSequenceEvalSet();
  cs   = new_ComplexSequence(gen,cses);

 
  pal  = PackAln_bestmemory_GenomeWise9(ges,cs,-switch_cost,-new_gene,-start_codon_pen,-stop_codon_pen,rcs,NULL,dpri);
  alb  = convert_PackAln_to_AlnBlock_GenomeWise9(pal);


  genomic = Genomic_from_Sequence(gen);
  gr = new_GenomicRegion(genomic);

  add_Genes_to_GenomicRegion_GeneWise(gr,1,gen->len,alb,gen->name,0,NULL);

  if( show_genes ) {
    show_pretty_GenomicRegion(gr,0,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_gff ) {
    show_GFF_GenomicRegion(gr,gen->name,"genomwise",stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_trans ) {
    for(i=0;i<gr->len;i++) {
      if( gr->gene[i]->ispseudo == TRUE ) {
	fprintf(stdout,"#Gene %d is a pseudo gene - no translation possible\n",i);
      } else {
	trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
	write_fasta_Sequence(trans->baseseq,stdout);
      }
    } 
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_cdna ) {
    for(i=0;i<gr->len;i++) {
      cdna = get_cDNA_from_Transcript(gr->gene[i]->transcript[0]);
      write_fasta_Sequence(cdna->baseseq,stdout);
    } 
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_geneu ) {
    show_utr_exon_genomewise(alb,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_alb ) {
    mapped_ascii_AlnBlock(alb,id,1,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_debug ) {
    debug_genomewise(alb,ges,ct,gen,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }
    
  if( show_pal ) {
    show_simple_PackAln(pal,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  return 0;
}
Exemple #3
0
int main(int argc,char ** argv)
{
    int i;

    DPRunImpl * dpri = NULL;
    GeneModelParam * gmp = NULL;
    GeneModel * gm = NULL;

    FILE * ifp;
    SeqAlign   * al;
    PairBaseSeq * pbs;

    ComplexSequenceEval * splice5;
    ComplexSequenceEval * splice3;
    ComplexSequence * cseq;


    CompMat * score_mat;
    CompProb * comp_prob;
    RandomModel * rm;

    PairBaseCodonModelScore * codon_score;
    PairBaseModelScore* nonc_score;

    PairBaseCodonModelScore * start;
    PairBaseCodonModelScore * stop;


    SyExonScore * exonscore;

    PackAln * pal;
    AlnBlock * alb;

    Genomic * genomic;
    GenomicRegion * gr;
    GenomicRegion * gr2;
    Protein * trans;

    StandardOutputOptions * std_opt;
    ShowGenomicRegionOptions * sgro;

    char * dump_packaln = NULL;
    char * read_packaln = NULL;
    FILE * packifp = NULL;

    boolean show_trans    = 1;
    boolean show_gene_raw = 0;



    ct = read_CodonTable_file(codon_table);
    /*
      score_mat = read_Blast_file_CompMat("blosum62.bla");
      comp_prob = CompProb_from_halfbit(score_mat);
    */
    rm = default_RandomModel();

    comp_prob = read_Blast_file_CompProb("wag85");

    fold_column_RandomModel_CompProb(comp_prob,rm);

    dpri = new_DPRunImpl_from_argv(&argc,argv);
    if( dpri == NULL ) {
        fatal("Unable to build DPRun implementation. Bad arguments");
    }

    gmp = new_GeneModelParam_from_argv(&argc,argv);

    std_opt = new_StandardOutputOptions_from_argv(&argc,argv);
    sgro = new_ShowGenomicRegionOptions_from_argv(&argc,argv);


    dump_packaln = strip_out_assigned_argument(&argc,argv,"dump");
    read_packaln = strip_out_assigned_argument(&argc,argv,"recover");

    strip_out_standard_options(&argc,argv,show_help,show_version);
    if( argc != 2 ) {
        show_help(stdout);
        exit(12);
    }


    if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) {
        fatal("Could not build gene model");
    }

    codon_score = make_PairBaseCodonModelScore(comp_prob);
    nonc_score  = make_PairBaseModelScore();

    splice5 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice5score);
    splice3 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice3score);

    if((ifp = openfile(argv[1],"r")) == NULL ) {
        fatal("Could not open file %s",argv[1]);
    }

    al = read_fasta_SeqAlign(ifp);

    assert(al);
    assert(al->len == 2);
    assert(al->seq[0]->len > 0);
    assert(al->seq[1]->len > 0);

    /*  write_fasta_SeqAlign(al,stdout);*/


    pbs = new_PairBaseSeq_SeqAlign(al);

    if( read_packaln == NULL ) {
        cseq = ComplexSequence_from_PairBaseSeq(pbs,splice5,splice3);
    }

    start = make_start_PairBaseCodonModelScore(ct);
    stop  = make_stop_PairBaseCodonModelScore(ct);


    /*  show_PairBaseCodonModelScore(stop,ct,stdout); */

    /*
      for(i=0;i<pbs->anchor->len;i++) {
        printf("%3d  %c For %-6d %-6d %c Rev %-6d %-6d\n",i,pbs->anchor->seq[i],
    	   CSEQ_PAIR_5SS(cseq,i),CSEQ_PAIR_3SS(cseq,i),
    	   char_complement_base(pbs->anchor->seq[i]),
    	   CSEQ_REV_PAIR_5SS(cseq,i),CSEQ_REV_PAIR_3SS(cseq,i));
      }
    */


    /*  show_ComplexSequence(cseq,stdout);

    */


    exonscore = SyExonScore_flat_model(100,150,0.1,1.0);
    /*
    for(i=0;i<cseq->length;i++) {
      fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]);
    }
    exit(0);
    */

    if( read_packaln != NULL ) {
        packifp = openfile(read_packaln,"r");
        if( packifp == NULL ) {
            fatal("File %s is unopenable - ignoring dump command",dump_packaln);
        } else {
            pal = read_simple_PackAln(packifp);
        }
    } else {
        pal = PackAln_bestmemory_SyWise20(exonscore,cseq,codon_score,nonc_score,start,stop,Probability2Score(1.0/100.0),Probability2Score(1.0/10000.0),Probability2Score(1.0/10.0),NULL,dpri);
    }

    alb = convert_PackAln_to_AlnBlock_SyWise20(pal);


    if( dump_packaln != NULL ) {
        packifp = openfile(dump_packaln,"w");
        if( packifp == NULL ) {
            warn("File %s is unopenable - ignoring dump command",dump_packaln);
        } else {
            show_simple_PackAln(pal,packifp);
        }
    }

    show_score_sequence(alb,pbs,nonc_score,stdout);
    /*
      show_StandardOutputOptions(std_opt,alb,pal,"//",stdout);
    */
    genomic = Genomic_from_Sequence(al->seq[0]);
    gr = new_GenomicRegion(genomic);
    gr2 = new_GenomicRegion(genomic);

    add_Genes_to_GenomicRegion_new(gr,alb);


    show_GenomicRegionOptions(sgro,gr,ct,"//",stdout);

    return 0;
}
int main(int argc,char ** argv)
{
  int i;

  DPRunImpl * dpri = NULL;
  GeneModelParam * gmp = NULL;
  GeneModel * gm = NULL;

  Sequence * seq;

  RandomCodon * rc;
  RandomModelDNA * rmd;
  RandomCodonScore * rcs;


  ComplexSequenceEval * splice5;
  ComplexSequenceEval * splice3;
  ComplexSequenceEvalSet * cses;
  ComplexSequence * cseq;


  SyExonScore * exonscore;

  PackAln * pal;
  AlnBlock * alb;

  Genomic * genomic;
  GenomicRegion * gr;
  Protein * trans;

  dpri = new_DPRunImpl_from_argv(&argc,argv);
  if( dpri == NULL ) {
    fatal("Unable to build DPRun implementation. Bad arguments");
  }

  gmp = new_GeneModelParam_from_argv(&argc,argv);

  ct= read_CodonTable_file("codon.table");

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 2 ) {
    show_help(stdout);
    exit(12);
  }

  
  if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) {
    fatal("Could not build gene model");
  }


  seq = read_fasta_file_Sequence(argv[1]);
  
  assert(seq);

  cses = new_ComplexSequenceEvalSet_from_GeneModel(gm);

  cseq = new_ComplexSequence(seq,cses);

  rc = flat_RandomCodon(ct);
  rmd = RandomModelDNA_std();

  fold_in_RandomModelDNA_into_RandomCodon(rc,rmd);
  rcs = RandomCodonScore_from_RandomCodon(rc);

  exonscore = SyExonScore_flat_model(200,250,0.1,0.1);
  /*
  for(i=0;i<cseq->length;i++) {
    fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]);
  }
  exit(0);
  */
/*
  show_RandomCodonScore(rcs,stdout);


  for(i=3;i<seq->len;i++) {
    fprintf(stdout,"seq %d is %c with score %d\n",i,aminoacid_from_seq(ct,seq->seq+i-2),rcs->codon[CSEQ_GENOMIC_CODON(cseq,i)]);
  }

  exit(0);
*/

  pal = PackAln_bestmemory_StatWise10(exonscore,cseq,rcs,Probability2Score(1.0/10.0),Probability2Score(1.0/10.0),NULL,dpri);
  alb = convert_PackAln_to_AlnBlock_StatWise10(pal);

  mapped_ascii_AlnBlock(alb,id,1,stdout);

  genomic = Genomic_from_Sequence(seq);
  gr = new_GenomicRegion(genomic);

  add_Genes_to_GenomicRegion_GeneWise(gr,1,seq->len,alb,"bollocks",0,NULL);


  for(i=0;i<gr->len;i++) {
    if( gr->gene[i]->ispseudo == TRUE ) {
      fprintf(stdout,"#Gene %d is a pseudo gene - no translation possible\n",i);
    } else {
      trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
      write_fasta_Sequence(trans->baseseq,stdout);
    }
  } 


  
  return 0;
}
Exemple #5
0
int main(int argc,char ** argv)
{
  Sequence * cdna;
  Sequence * gen;
  Sequence * active_gen;
  Sequence * active_cdna;

  int i;
  int dstart = -1;
  int dend   = -1;

  int cstart = -1;
  int cend   = -1;

  CodonTable * ct = NULL;
  CodonMatrixScore * cm = NULL;
  RandomCodon * rndcodon = NULL;
  RandomCodonScore * rndcodonscore = NULL;
  DnaMatrix * dm   = NULL;

  DPRunImpl * dpri = NULL;
 
  GeneModel * gm;
  GeneModelParam * gmp;
  GeneStats * gs;
  GeneParser21 * gp21;
  GeneParser21Score * gp21s;
  GeneParser4Score * gp;


  ComplexSequenceEvalSet * cdna_cses;
  ComplexSequenceEvalSet * gen_cses;

  ComplexSequence * cs_cdna;
  ComplexSequence * cs_gen;
  
  Genomic * gent;
  GenomicRegion * gr;

  CompMat  * cmat;
  CompProb * cprob;
  char * matfile = "blosum62.bla";
  Protein * trans;

  PackAln * pal;
  AlnBlock * alb;

  FILE * ofp = stdout;

  dpri = new_DPRunImpl_from_argv(&argc,argv);
  gmp  = new_GeneModelParam_from_argv(&argc,argv);

  strip_out_integer_argument(&argc,argv,"u",&dstart);
  strip_out_integer_argument(&argc,argv,"v",&dend);

  strip_out_integer_argument(&argc,argv,"s",&cstart);
  strip_out_integer_argument(&argc,argv,"t",&cend);


  strip_out_standard_options(&argc,argv,show_help,show_version);


  ct = read_CodonTable_file(codon_file);

  cmat = read_Blast_file_CompMat(matfile);
  cprob = CompProb_from_halfbit(cmat);
  cm = naive_CodonMatrixScore_from_prob(ct,cprob);
  
  gm = GeneModel_from_GeneModelParam(gmp);

  cdna = read_fasta_file_Sequence(argv[1]);
  gen = read_fasta_file_Sequence(argv[2]);

  if( dstart != -1 || dend != -1 ) {
    if( dstart == -1 ) {
      dstart = 1;
    }
    if( dend == -1 ) {
      dend = gen->len;
    }
    active_gen = magic_trunc_Sequence(gen,dstart,dend);
  } else {
    active_gen = hard_link_Sequence(gen);
  }

  if( cstart != -1 || cend != -1 ) {
    if( cstart == -1 ) {
      cstart = 1;
    }
    if( cend == -1 ) {
      cend = gen->len;
    }
    active_cdna = magic_trunc_Sequence(gen,cstart,cend);
  } else {
    active_cdna = hard_link_Sequence(gen);
  }

  

  rndcodon = RandomCodon_from_raw_CodonFrequency(gm->codon,ct);
  fold_in_RandomModelDNA_into_RandomCodon(rndcodon,gm->rnd);

  rndcodonscore = RandomCodonScore_from_RandomCodon(rndcodon);

  assert(active_cdna);
  assert(active_gen);

  cdna_cses = default_cDNA_ComplexSequenceEvalSet();
  gen_cses  = new_ComplexSequenceEvalSet_from_GeneModel(gm);

  cs_cdna = new_ComplexSequence(active_cdna,cdna_cses);
  cs_gen  = new_ComplexSequence(active_gen,gen_cses);

  gp21 = std_GeneParser21();
  GeneParser21_fold_in_RandomModelDNA(gp21,gm->rnd);
  gp21s = GeneParser21Score_from_GeneParser21(gp21);
  gp = GeneParser4Score_from_GeneParser21Score(gp21s);
 
  dm = identity_DnaMatrix(Probability2Score(halfbit2Probability(1)),Probability2Score(halfbit2Probability(-1)));

  assert(cs_cdna);
  assert(cs_gen);
  assert(gp);
  assert(rndcodonscore);
  assert(dm);
  assert(dpri);
  
  /*  show_CodonMatrixScore(cm,ct,ofp);*/

  pal = PackAln_bestmemory_CdnaWise10(cs_cdna,cs_gen,gp,cm,rndcodonscore,dm,
				      Probability2Score(halfbit2Probability(-12)),
				      Probability2Score(halfbit2Probability(-2)),
				      Probability2Score(halfbit2Probability(-5)),
				      Probability2Score(halfbit2Probability(0)),
				      NULL,
				      dpri);


  alb = convert_PackAln_to_AlnBlock_CdnaWise10(pal);

  gent = Genomic_from_Sequence(gen);
  assert(gent);

  gr = new_GenomicRegion(gent);
  assert(gr);


  add_Genes_to_GenomicRegion_GeneWise(gr,active_gen->offset,active_gen->end,alb,cdna->name,0,NULL);
				      
  mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp);

  show_pretty_GenomicRegion(gr,0,ofp);

  for(i=0;i<gr->len;i++) {
    if( gr->gene[i]->ispseudo == TRUE ) {
	fprintf(ofp,"#Gene %d is a pseudo gene - no translation possible\n",i);
    } else {
      trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
      write_fasta_Sequence(trans->baseseq,ofp);
    }
  } 
 
}