Beispiel #1
0
AlnBlock * Align_Sequences_ProteinBlockAligner(Sequence * one,Sequence * two,CompMat * comp,int bentry,int bexit,int b1exit,int b_self,int b_on,DPRunImpl * dpri)
{
  ComplexSequence * cone=NULL;
  ComplexSequence * ctwo=NULL;
  AlnBlock * alb= NULL;
  PackAln * pal=NULL;
  ComplexSequenceEvalSet * evalfunc = NULL;

  if( one == NULL || two == NULL || comp == NULL ) {
    warn("Passed in NULL objects into Align_Sequences_ProteinSmithWaterman!");
    return NULL;
  }

  if( one->type != SEQUENCE_PROTEIN ) {
    warn("Sequence %s is not typed as protein... ignoring!\n",one->name);
  }

  if( two->type != SEQUENCE_PROTEIN ) {
    warn("Sequence %s is not typed as protein... ignoring!\n",two->name);
  }


  evalfunc = default_aminoacid_ComplexSequenceEvalSet();
  
  cone = new_ComplexSequence(one,evalfunc);
  if( cone == NULL )
    goto cleanup;
  ctwo = new_ComplexSequence(two,evalfunc);
  if( ctwo == NULL )
    goto cleanup;

  pal = PackAln_bestmemory_ProteinBlockAligner(cone,ctwo,comp,bentry,b1exit,b_on,b_self,bexit,NULL,dpri);

  if( pal == NULL ) 
    goto cleanup;

  alb = convert_PackAln_to_AlnBlock_ProteinSW(pal);
  
  goto cleanup;

  cleanup :

  if( cone != NULL )
      free_ComplexSequence(cone);

  if( ctwo != NULL )
    free_ComplexSequence(ctwo);

  if( pal != NULL )
    free_PackAln(pal);

  if( evalfunc != NULL )
    free_ComplexSequenceEvalSet(evalfunc);

  return alb;

}
Beispiel #2
0
cDNADB * new_cDNADB_from_single_seq(cDNA * seq)
{
  ComplexSequence * cs,*cs_rev;
  Sequence * temp;
  ComplexSequenceEvalSet * cses;
  
  cses = default_cDNA_ComplexSequenceEvalSet();

  cs = new_ComplexSequence(seq->baseseq,cses);
  temp = reverse_complement_Sequence(seq->baseseq);
  cs_rev = new_ComplexSequence(temp,cses);
  free_Sequence(temp);
  free_ComplexSequenceEvalSet(cses);

  return new_cDNADB_from_forrev_cseq(cs,cs_rev);
}
Beispiel #3
0
ComplexSequence * init_cDNADB(cDNADB * cdnadb,int * return_status)
{
  ComplexSequence * cs;
  Sequence * seq;

  if( cdnadb->is_single_seq == TRUE) {
    *return_status = DB_RETURN_OK;
    cdnadb->done_forward = TRUE;
    return hard_link_ComplexSequence(cdnadb->forw);
    
  }

  /* is a seq db */

  seq = init_SequenceDB(cdnadb->sdb,return_status);

  if( seq == NULL || *return_status == DB_RETURN_ERROR || *return_status == DB_RETURN_END ) {
    return NULL; /** error already reported **/
  }

  if( force_to_dna_Sequence(seq,cdnadb->error_tol,NULL) == FALSE ) {
    warn("first sequence below error level, have to fail at the moment. Ooops...");
    free_Sequence(seq);
    *return_status = DB_RETURN_ERROR;
    return NULL;
  }

  cdnadb->current = seq;
  cdnadb->done_forward = TRUE;
  cs = new_ComplexSequence(seq,cdnadb->cses);
  if( cs == NULL ) {
    warn("Cannot make initial ComplexSequence. Unable to error catch this. Failing!");
    *return_status = DB_RETURN_ERROR;
    return NULL;
  }



  return cs;
}
int main(int argc,char ** argv)
{
  Sequence * seq;

  ComplexSequence * cs;
  ComplexSequenceEvalSet * cset;


  GeneStats * st;
  GeneModel * gm;
  GeneModelParam * gp;
  FILE * ifp;

  gp = std_GeneModelParam();


  seq = read_fasta_file_Sequence("../../test_data/human.genomic");
  
  ifp = openfile("gene.stat","r");
  
  st = read_GeneStats(ifp); 
  
  /*  dump_GeneStats(st,stdout); */

  fflush(stdout);

  gm = GeneModel_from_GeneStats(st,gp);

  show_GeneModel(gm,stdout);

  fflush(stdout);

  cset = new_ComplexSequenceEvalSet_from_GeneModel(gm);

  cs = new_ComplexSequence(seq,cset);

  show_ComplexSequence(cs,stdout);

}
ComplexSequence * evaluate_ComplexSequence_Genomic(Genomic * gen,ComplexSequenceEvalSet * cses,int score_for_repeat,int score_for_cds_in_repeat)
{
  int i,j;
  ComplexSequence * out;

  if( cses == NULL || cses->type != SEQUENCE_GENOMIC ) {
    warn("ComplexSequenceEvalSet is not valid for genomic!");
    return NULL;
  }

  assert(gen);
  assert(gen->baseseq);
  assert(gen->baseseq->seq);

  out = new_ComplexSequence(gen->baseseq,cses);


  if( out == NULL ) {
    return NULL;
  }
  
  for(i=0;i<gen->len;i++) {
    for(j=gen->repeat[i]->start;j<gen->repeat[i]->end;j++) {
      if( j >= gen->baseseq->len ) {
	warn("Overran sequence - j position %d, sequence length %d on repeat %d",j,gen->baseseq->len,i);
	break;
      }

      CSEQ_GENOMIC_REPEAT(out,j) = score_for_repeat;
      CSEQ_GENOMIC_CDSPOT(out,j) = score_for_cds_in_repeat;
    }
  }
  /*
  for(i=0;i<gen->baseseq->len;i++) 
    fprintf(stderr,"At position %6d %d\n",CSEQ_GENOMIC_CDSPOT(out,j));
  */

  return out;
}
Beispiel #6
0
ComplexSequence * reload_cDNADB(ComplexSequence * last,cDNADB * cdnadb,int * return_status)
{
  ComplexSequence * cs;
  Sequence * seq,*temp;
  

  /** free Complex Sequence **/

  if ( last != NULL ) {
    free_ComplexSequence(last);
  }

  if( cdnadb->forward_only == TRUE) {
     temp = reload_SequenceDB(NULL,cdnadb->sdb,return_status);
     if ( *return_status  != DB_RETURN_OK ) {
         return NULL;
     }
    cs = new_ComplexSequence(temp,cdnadb->cses);
    return cs;
  }

  if( cdnadb->is_single_seq == TRUE ) {
    if( cdnadb->done_forward == TRUE ) {
      *return_status = DB_RETURN_OK;
      cdnadb->done_forward = FALSE;
      return hard_link_ComplexSequence(cdnadb->rev);
    } else {
      *return_status = DB_RETURN_END;
      return NULL;
    }
  }

  
  /** standard database **/


  if( cdnadb->done_forward == TRUE ) {
    if( cdnadb->current == NULL ) {
      warn("A bad internal cDNA db error - unable to find current sequence in db reload");
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }

    temp = reverse_complement_Sequence(cdnadb->current);


    if( temp == NULL ) {
      warn("A bad internal cDNA db error - unable to reverse complements current");
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }

    cs = new_ComplexSequence(temp,cdnadb->cses);

    if( cs == NULL ) {
      warn("A bad internal cDNA db error - unable to make complex sequence in db reload");
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }

    free_Sequence(temp);
    cdnadb->done_forward = FALSE;
    return cs;
  }


  /* otherwise we have to get a new sequence */

  seq = reload_SequenceDB(NULL,cdnadb->sdb,return_status);

  if( seq == NULL || *return_status == DB_RETURN_ERROR || *return_status == DB_RETURN_END ) {
    return NULL; /** error already reported **/
  }

  uppercase_Sequence(seq);

  if( force_to_dna_Sequence(seq,cdnadb->error_tol,NULL) == FALSE ) {
    if( cdnadb->error_handling == CDNADB_READ_THROUGH ) {
      warn("Unable to map %s sequence to a cDNA sequence, but ignoring that for the moment...",seq->name);
      free_Sequence(seq);
      return reload_cDNADB(NULL,cdnadb,return_status);
    } else {
      warn("Unable to map %s sequence to a cDNA sequence. Failing",seq->name);
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }
  }


  cs = new_ComplexSequence(seq,cdnadb->cses);
  if( cs == NULL ) {
    if( cdnadb->error_handling == CDNADB_READ_THROUGH ) {
      warn("Unable to map %s sequence to a cDNA sequence, but ignoring that for the moment...",seq->name);
      free_Sequence(seq);
      return reload_cDNADB(NULL,cdnadb,return_status);
    } else {
      warn("Unable to map %s sequence to a cDNA sequence. Failing",seq->name);
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }
  }

  cdnadb->current = free_Sequence(cdnadb->current);
  cdnadb->current = seq;
  cdnadb->done_forward= TRUE;

  return cs;
}
Beispiel #7
0
AlnBlock * Align_Sequences_ProteinSmithWaterman(Sequence * one,Sequence * two,CompMat * comp,int gap,int ext,DPRunImpl * dpri)
{
  AlnBlock * out = NULL;
  ComplexSequenceEvalSet * evalfunc = NULL;
  ComplexSequence * query_cs = NULL;
  ComplexSequence * target_cs = NULL;
  PackAln * pal = NULL;

  if( one == NULL || two == NULL || comp == NULL ) {
    warn("Passed in NULL objects into Align_Sequences_ProteinSmithWaterman!");
    return NULL;
  }

  if( one->type != SEQUENCE_PROTEIN ) {
    warn("Sequence %s is not typed as protein... ignoring!\n",one->name);
  }

  if( two->type != SEQUENCE_PROTEIN ) {
    warn("Sequence %s is not typed as protein... ignoring!\n",two->name);
  }


  if( gap > 0 || ext > 0 ) {
    warn("Gap penalties %d,%d only make sense if they are negative",gap,ext);
    return NULL;
  }

  evalfunc = default_aminoacid_ComplexSequenceEvalSet();
  
  query_cs = new_ComplexSequence(one,evalfunc);
  if( query_cs == NULL )
    goto cleanup;
  target_cs = new_ComplexSequence(two,evalfunc);
  if( target_cs == NULL )
    goto cleanup;

  pal = PackAln_bestmemory_ProteinSW(query_cs,target_cs,comp,gap,ext,NULL,dpri);
  if( pal == NULL ) 
    goto cleanup;

  out = convert_PackAln_to_AlnBlock_ProteinSW(pal);
  
  goto cleanup;

  cleanup :

    if( query_cs != NULL )
      free_ComplexSequence(query_cs);

  if( target_cs != NULL )
    free_ComplexSequence(target_cs);

  if( pal != NULL )
    free_PackAln(pal);

  if( evalfunc != NULL )
    free_ComplexSequenceEvalSet(evalfunc);

  return out;

}
Beispiel #8
0
int main(int argc,char ** argv)
{
  Sequence   * gen;
  Genomic    * genomic;
  CodonTable * ct = NULL;
  GenomeEvidenceSet * ges = NULL;
  RandomCodonScore * rcs;
  FILE * ifp = NULL;
  ComplexSequence * cs = NULL;
  ComplexSequenceEvalSet * cses = NULL;
  AlnBlock * alb;
  PackAln * pal;
  GenomicRegion * gr;
  int i;
  Protein * trans;
  cDNA    * cdna;
  int kbyte                = 10000;
  int stop_codon_pen  = 200;
  int start_codon_pen = 30;
  int new_gene        = 5000;
  int switch_cost     = 100;
  int smell           = 8;
  DPRunImpl * dpri = NULL;
    
  EstEvidence * est;

  boolean show_trans = TRUE;
  boolean show_cdna  = FALSE;
  boolean show_genes = TRUE;
  boolean show_alb   = FALSE;
  boolean show_pal   = FALSE;
  boolean show_gff   = TRUE;
  boolean show_debug = FALSE;
  boolean show_geneu = TRUE;
  char * divide_string = "//";

  strip_out_boolean_def_argument(&argc,argv,"geneutr",&show_geneu);
  strip_out_boolean_def_argument(&argc,argv,"genes",&show_genes);
  strip_out_boolean_def_argument(&argc,argv,"trans",&show_trans);
  strip_out_boolean_def_argument(&argc,argv,"gff",&show_gff);
  strip_out_boolean_def_argument(&argc,argv,"alb",&show_alb);
  strip_out_boolean_def_argument(&argc,argv,"pal",&show_pal);
  strip_out_boolean_def_argument(&argc,argv,"debug",&show_debug);
  strip_out_boolean_def_argument(&argc,argv,"cdna",&show_cdna);
  strip_out_integer_argument(&argc,argv,"stop",&stop_codon_pen);
  strip_out_integer_argument(&argc,argv,"start",&start_codon_pen);
  strip_out_integer_argument(&argc,argv,"gene",&new_gene);
  strip_out_integer_argument(&argc,argv,"switch",&switch_cost);
  strip_out_integer_argument(&argc,argv,"smell",&smell);
  
  dpri = new_DPRunImpl_from_argv(&argc,argv);
  if( dpri == NULL ) {
    fatal("Unable to build DPRun implementation. Bad arguments");
  }


  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 3 ) {
    show_help(stdout);
    exit(12);
  }

    

  ct  = read_CodonTable_file("codon.table");
  gen = read_fasta_file_Sequence(argv[1]);
  ifp = openfile(argv[2],"r");
  ges = read_est_evidence(ifp,ct);

  for(i=0;i<ges->len;i++) {
    est = (EstEvidence *) ges->geu[i]->data;
    est->in_smell = smell;
  }


  rcs= RandomCodonScore_alloc();
  for(i=0;i<125;i++) {
    if( is_stop_codon(i,ct) ) {
      rcs->codon[i] = -1000000;
    } else {
      rcs->codon[i] = 0;
    }
    /*    fprintf(stderr,"Got %d for %d\n",rcs->codon[i],i); */
  }

 

  cses = default_genomic_ComplexSequenceEvalSet();
  cs   = new_ComplexSequence(gen,cses);

 
  pal  = PackAln_bestmemory_GenomeWise9(ges,cs,-switch_cost,-new_gene,-start_codon_pen,-stop_codon_pen,rcs,NULL,dpri);
  alb  = convert_PackAln_to_AlnBlock_GenomeWise9(pal);


  genomic = Genomic_from_Sequence(gen);
  gr = new_GenomicRegion(genomic);

  add_Genes_to_GenomicRegion_GeneWise(gr,1,gen->len,alb,gen->name,0,NULL);

  if( show_genes ) {
    show_pretty_GenomicRegion(gr,0,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_gff ) {
    show_GFF_GenomicRegion(gr,gen->name,"genomwise",stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_trans ) {
    for(i=0;i<gr->len;i++) {
      if( gr->gene[i]->ispseudo == TRUE ) {
	fprintf(stdout,"#Gene %d is a pseudo gene - no translation possible\n",i);
      } else {
	trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
	write_fasta_Sequence(trans->baseseq,stdout);
      }
    } 
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_cdna ) {
    for(i=0;i<gr->len;i++) {
      cdna = get_cDNA_from_Transcript(gr->gene[i]->transcript[0]);
      write_fasta_Sequence(cdna->baseseq,stdout);
    } 
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_geneu ) {
    show_utr_exon_genomewise(alb,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_alb ) {
    mapped_ascii_AlnBlock(alb,id,1,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_debug ) {
    debug_genomewise(alb,ges,ct,gen,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }
    
  if( show_pal ) {
    show_simple_PackAln(pal,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  return 0;
}
Beispiel #9
0
int main(int argc,char ** argv)
{
  Sequence * query;
  Sequence * target;
  CompMat * comp;
  char * comp_file;
  int gap = (12);
  int ext = (2);
  int a = 120;
  int b = 10;
  int c = 3;
  ComplexSequence * query_cs;
  ComplexSequence * target_cs;
  ComplexSequenceEvalSet * evalfunc;

  boolean show_label_output = FALSE;
  boolean show_fancy_output = FALSE;
  boolean use_abc = FALSE;

  PackAln * pal;
  AlnBlock * alb;

  DPRunImpl * dpri = NULL;

  /*
   * Process command line options
   * -h or -help gives us help
   * -g for gap value (an int) - rely on commandline error processing
   * -e for ext value (an int) - rely on commandline error processing
   * -m for matrix (a char)
   * -l - label output
   * -f - fancy output
   *
   *
   * Use calls to commandline.h functions
   *
   */
  
  if( strip_out_boolean_argument(&argc,argv,"h") == TRUE || strip_out_boolean_argument(&argc,argv,"-help") == TRUE) {
    show_help(stdout);
    exit(1);
  }

  dpri = new_DPRunImpl_from_argv(&argc,argv);
  if( dpri == NULL ) {
    fatal("Unable to build DPRun implementation. Bad arguments");
  }

  show_label_output = strip_out_boolean_argument(&argc,argv,"l");
  show_fancy_output = strip_out_boolean_argument(&argc,argv,"f");


  /** if all FALSE, set fancy to TRUE **/

  if( show_label_output == FALSE ) 
    show_fancy_output = TRUE;


  (void) strip_out_integer_argument(&argc,argv,"g",&gap);
  (void) strip_out_integer_argument(&argc,argv,"e",&ext);
  (void) strip_out_integer_argument(&argc,argv,"a",&a);
  (void) strip_out_integer_argument(&argc,argv,"b",&b);
  (void) strip_out_integer_argument(&argc,argv,"c",&c);

  use_abc = strip_out_boolean_argument(&argc,argv,"abc"); 
  
  comp_file = strip_out_assigned_argument(&argc,argv,"m");
  if( comp_file == NULL)
    comp_file = "blosum62.bla";

  
  
  if( argc != 3 ) {
    warn("Must have two arguments for sequence 1 and sequence 2 %d",argc);
    show_help(stdout);
    exit(1);
  }
  
  /*
   * Read in two sequences
   */
  
  if( (query=read_fasta_file_Sequence(argv[1])) == NULL ) {
    fatal("Unable to read the sequence in file %s",argv[1]);
  }
  
  if( (target=read_fasta_file_Sequence(argv[2])) == NULL ) {
    fatal("Unable to read the sequence in file %s",argv[2]);
  }
  
  
  /*
   * Open a blosum matrix. This will be opened from WISECONFIGDIR
   * or WISEPERSONALDIR if it is not present in the current directory.
   */
  
  comp = read_Blast_file_CompMat(comp_file);
  
  if( comp == NULL ) {
    fatal("unable to read file %s",comp_file);
  }
  
  /* if abc - factor up matrix! */

  if( use_abc == TRUE ) {
    factor_CompMat(comp,10);
  }


  /*
   * Make an alignment. I don't care about the implementation:
   * hand it over to sw_wrap function to do it
   *
   */		 

  if( use_abc ) {
    evalfunc = default_aminoacid_ComplexSequenceEvalSet();
  
    query_cs = new_ComplexSequence(query,evalfunc);
    if( query_cs == NULL )
      fatal("Cannot build cs objects!");
    target_cs = new_ComplexSequence(target,evalfunc);
    if( target_cs == NULL )
      fatal("Cannot build cs objects!");

    pal = PackAln_bestmemory_abc(query_cs,target_cs,comp,-a,-b,-c,NULL,dpri);
    alb = convert_PackAln_to_AlnBlock_abc(pal);
    free_ComplexSequence(query_cs);
    free_ComplexSequence(target_cs);
  } else {
    alb = Align_Sequences_ProteinSmithWaterman(query,target,comp,-gap,-ext,dpri);
  }


  /*
   * show output. If multiple outputs, divide using //
   */


  if( show_label_output == TRUE ) {
    show_flat_AlnBlock(alb,stdout);
    puts("//\n");
  }

  if( show_fancy_output == TRUE ) {
    write_pretty_seq_align(alb,query,target,15,50,stdout);
    puts("//\n");
  }

  /*
   * Destroy the memory.
   */	

  free_Sequence(query);
  free_Sequence(target);
  free_CompMat(comp);
  free_AlnBlock(alb);

  return 0;
}
Beispiel #10
0
int main(int argc,char ** argv)
{
  int i;

  DPRunImpl * dpri = NULL;
  GeneModelParam * gmp = NULL;
  GeneModel * gm = NULL;

  Sequence * seq;

  RandomCodon * rc;
  RandomModelDNA * rmd;
  RandomCodonScore * rcs;


  ComplexSequenceEval * splice5;
  ComplexSequenceEval * splice3;
  ComplexSequenceEvalSet * cses;
  ComplexSequence * cseq;


  SyExonScore * exonscore;

  PackAln * pal;
  AlnBlock * alb;

  Genomic * genomic;
  GenomicRegion * gr;
  Protein * trans;

  dpri = new_DPRunImpl_from_argv(&argc,argv);
  if( dpri == NULL ) {
    fatal("Unable to build DPRun implementation. Bad arguments");
  }

  gmp = new_GeneModelParam_from_argv(&argc,argv);

  ct= read_CodonTable_file("codon.table");

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 2 ) {
    show_help(stdout);
    exit(12);
  }

  
  if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) {
    fatal("Could not build gene model");
  }


  seq = read_fasta_file_Sequence(argv[1]);
  
  assert(seq);

  cses = new_ComplexSequenceEvalSet_from_GeneModel(gm);

  cseq = new_ComplexSequence(seq,cses);

  rc = flat_RandomCodon(ct);
  rmd = RandomModelDNA_std();

  fold_in_RandomModelDNA_into_RandomCodon(rc,rmd);
  rcs = RandomCodonScore_from_RandomCodon(rc);

  exonscore = SyExonScore_flat_model(200,250,0.1,0.1);
  /*
  for(i=0;i<cseq->length;i++) {
    fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]);
  }
  exit(0);
  */
/*
  show_RandomCodonScore(rcs,stdout);


  for(i=3;i<seq->len;i++) {
    fprintf(stdout,"seq %d is %c with score %d\n",i,aminoacid_from_seq(ct,seq->seq+i-2),rcs->codon[CSEQ_GENOMIC_CODON(cseq,i)]);
  }

  exit(0);
*/

  pal = PackAln_bestmemory_StatWise10(exonscore,cseq,rcs,Probability2Score(1.0/10.0),Probability2Score(1.0/10.0),NULL,dpri);
  alb = convert_PackAln_to_AlnBlock_StatWise10(pal);

  mapped_ascii_AlnBlock(alb,id,1,stdout);

  genomic = Genomic_from_Sequence(seq);
  gr = new_GenomicRegion(genomic);

  add_Genes_to_GenomicRegion_GeneWise(gr,1,seq->len,alb,"bollocks",0,NULL);


  for(i=0;i<gr->len;i++) {
    if( gr->gene[i]->ispseudo == TRUE ) {
      fprintf(stdout,"#Gene %d is a pseudo gene - no translation possible\n",i);
    } else {
      trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
      write_fasta_Sequence(trans->baseseq,stdout);
    }
  } 


  
  return 0;
}
int main(int argc,char ** argv)
{
  int i;
  SequenceSet * in;
  Sequence * trans;
  ThreeStateDB * tsd;

  DPRunImpl * dpri;
  CodonTable * ct;

  int return_status;
  ThreeStateModel * tsm;
  ThreeStateScore * tss;
  Protein * hmmp;
  ComplexSequence * cs;
  ComplexSequenceEvalSet * cses;

  PackAln * pal;
  AlnBlock * alb;

  int show_align = 0;
  int show_alb   = 0;
  int show_verbose = 1;
  int show_trans = 0;

  ct = read_CodonTable_file("codon.table");

  cses = default_aminoacid_ComplexSequenceEvalSet();
  
  dpri = new_DPRunImpl_from_argv(&argc,argv);

  strip_out_boolean_def_argument(&argc,argv,"pretty",&show_align);

  strip_out_boolean_def_argument(&argc,argv,"alb",&show_alb);

  strip_out_boolean_def_argument(&argc,argv,"trans",&show_trans);

  if( argc != 3 ) {
    show_help(stdout);
    exit(63);
  }


  in = read_fasta_SequenceSet_file(argv[1]);

  tsd = HMMer2_ThreeStateDB(argv[2]);

  assert(in);
  assert(tsd);
  assert(in->len == 2);

  trans = translate_Sequence(in->set[0],ct);

  if( show_trans ) {
    write_fasta_Sequence(trans,stdout);
  }

  cs = new_ComplexSequence(trans,cses);

  open_ThreeStateDB(tsd);

  while( (tsm = read_TSM_ThreeStateDB(tsd,&return_status)) != NULL ) {
    fold_RandomModel_into_ThreeStateModel(tsm,tsm->rm);
    set_startend_policy_ThreeStateModel(tsm,TSM_local,10,1.0);

    tss = ThreeStateScore_from_ThreeStateModel(tsm);
    hmmp = pseudo_Protein_from_ThreeStateModel(tsm);

    pal = PackAln_bestmemory_ThreeStateLoop(tss,cs,NULL,dpri);
    alb = convert_PackAln_to_AlnBlock_ThreeStateLoop(pal);

    if( show_alb ) {
      show_flat_AlnBlock(alb,stdout);
    }

    if( show_align ) {
      write_pretty_seq_align(alb,hmmp->baseseq,trans,15,50,stdout);      
    }
    if( show_verbose ) {
      show_verbose_evo(alb,tsm,in->set[0],in->set[1],ct,stdout);
    }
      
  }
  

}
Beispiel #12
0
int main(int argc,char ** argv)
{
  Sequence * cdna;
  Sequence * gen;
  Sequence * active_gen;
  Sequence * active_cdna;

  int i;
  int dstart = -1;
  int dend   = -1;

  int cstart = -1;
  int cend   = -1;

  CodonTable * ct = NULL;
  CodonMatrixScore * cm = NULL;
  RandomCodon * rndcodon = NULL;
  RandomCodonScore * rndcodonscore = NULL;
  DnaMatrix * dm   = NULL;

  DPRunImpl * dpri = NULL;
 
  GeneModel * gm;
  GeneModelParam * gmp;
  GeneStats * gs;
  GeneParser21 * gp21;
  GeneParser21Score * gp21s;
  GeneParser4Score * gp;


  ComplexSequenceEvalSet * cdna_cses;
  ComplexSequenceEvalSet * gen_cses;

  ComplexSequence * cs_cdna;
  ComplexSequence * cs_gen;
  
  Genomic * gent;
  GenomicRegion * gr;

  CompMat  * cmat;
  CompProb * cprob;
  char * matfile = "blosum62.bla";
  Protein * trans;

  PackAln * pal;
  AlnBlock * alb;

  FILE * ofp = stdout;

  dpri = new_DPRunImpl_from_argv(&argc,argv);
  gmp  = new_GeneModelParam_from_argv(&argc,argv);

  strip_out_integer_argument(&argc,argv,"u",&dstart);
  strip_out_integer_argument(&argc,argv,"v",&dend);

  strip_out_integer_argument(&argc,argv,"s",&cstart);
  strip_out_integer_argument(&argc,argv,"t",&cend);


  strip_out_standard_options(&argc,argv,show_help,show_version);


  ct = read_CodonTable_file(codon_file);

  cmat = read_Blast_file_CompMat(matfile);
  cprob = CompProb_from_halfbit(cmat);
  cm = naive_CodonMatrixScore_from_prob(ct,cprob);
  
  gm = GeneModel_from_GeneModelParam(gmp);

  cdna = read_fasta_file_Sequence(argv[1]);
  gen = read_fasta_file_Sequence(argv[2]);

  if( dstart != -1 || dend != -1 ) {
    if( dstart == -1 ) {
      dstart = 1;
    }
    if( dend == -1 ) {
      dend = gen->len;
    }
    active_gen = magic_trunc_Sequence(gen,dstart,dend);
  } else {
    active_gen = hard_link_Sequence(gen);
  }

  if( cstart != -1 || cend != -1 ) {
    if( cstart == -1 ) {
      cstart = 1;
    }
    if( cend == -1 ) {
      cend = gen->len;
    }
    active_cdna = magic_trunc_Sequence(gen,cstart,cend);
  } else {
    active_cdna = hard_link_Sequence(gen);
  }

  

  rndcodon = RandomCodon_from_raw_CodonFrequency(gm->codon,ct);
  fold_in_RandomModelDNA_into_RandomCodon(rndcodon,gm->rnd);

  rndcodonscore = RandomCodonScore_from_RandomCodon(rndcodon);

  assert(active_cdna);
  assert(active_gen);

  cdna_cses = default_cDNA_ComplexSequenceEvalSet();
  gen_cses  = new_ComplexSequenceEvalSet_from_GeneModel(gm);

  cs_cdna = new_ComplexSequence(active_cdna,cdna_cses);
  cs_gen  = new_ComplexSequence(active_gen,gen_cses);

  gp21 = std_GeneParser21();
  GeneParser21_fold_in_RandomModelDNA(gp21,gm->rnd);
  gp21s = GeneParser21Score_from_GeneParser21(gp21);
  gp = GeneParser4Score_from_GeneParser21Score(gp21s);
 
  dm = identity_DnaMatrix(Probability2Score(halfbit2Probability(1)),Probability2Score(halfbit2Probability(-1)));

  assert(cs_cdna);
  assert(cs_gen);
  assert(gp);
  assert(rndcodonscore);
  assert(dm);
  assert(dpri);
  
  /*  show_CodonMatrixScore(cm,ct,ofp);*/

  pal = PackAln_bestmemory_CdnaWise10(cs_cdna,cs_gen,gp,cm,rndcodonscore,dm,
				      Probability2Score(halfbit2Probability(-12)),
				      Probability2Score(halfbit2Probability(-2)),
				      Probability2Score(halfbit2Probability(-5)),
				      Probability2Score(halfbit2Probability(0)),
				      NULL,
				      dpri);


  alb = convert_PackAln_to_AlnBlock_CdnaWise10(pal);

  gent = Genomic_from_Sequence(gen);
  assert(gent);

  gr = new_GenomicRegion(gent);
  assert(gr);


  add_Genes_to_GenomicRegion_GeneWise(gr,active_gen->offset,active_gen->end,alb,cdna->name,0,NULL);
				      
  mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp);

  show_pretty_GenomicRegion(gr,0,ofp);

  for(i=0;i<gr->len;i++) {
    if( gr->gene[i]->ispseudo == TRUE ) {
	fprintf(ofp,"#Gene %d is a pseudo gene - no translation possible\n",i);
    } else {
      trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
      write_fasta_Sequence(trans->baseseq,ofp);
    }
  } 
 
}
Beispiel #13
0
int main(int argc,char ** argv)
{
  Sequence * query;
  Sequence * target;
  ComplexSequence * query_cs;
  ComplexSequence * target_cs;
  ComplexSequenceEvalSet  * evalfunc;
  CompMat * comp;
  char * comp_file;
  int gap = (12);
  int ext = (2);

  boolean show_raw_output = FALSE;
  boolean show_label_output = FALSE;
  boolean show_fancy_output = FALSE;
  boolean has_outputted = FALSE;

  PackAln * pal;
  AlnBlock * alb;
  
  /*
   * Process command line options
   * -h or -help gives us help
   * -g for gap value (an int) - rely on commandline error processing
   * -e for ext value (an int) - rely on commandline error processing
   * -m for matrix (a char)
   * -r - raw matrix output
   * -l - label output
   * -f - fancy output
   *
   *
   * Use calls to commandline.h functions
   *
   */
  
  if( strip_out_boolean_argument(&argc,argv,"h") == TRUE || strip_out_boolean_argument(&argc,argv,"-help") == TRUE) {
    show_help(stdout);
    exit(1);
  }

  show_raw_output = strip_out_boolean_argument(&argc,argv,"r");
  show_label_output = strip_out_boolean_argument(&argc,argv,"l");
  show_fancy_output = strip_out_boolean_argument(&argc,argv,"f");


  /** if all FALSE, set fancy to TRUE **/

  if( show_raw_output == FALSE && show_label_output == FALSE ) 
    show_fancy_output = TRUE;


  (void) strip_out_integer_argument(&argc,argv,"g",&gap);
  (void) strip_out_integer_argument(&argc,argv,"e",&ext);

  comp_file = strip_out_assigned_argument(&argc,argv,"m");
  if( comp_file == NULL)
    comp_file = "blosum62.bla";

  
  
  if( argc != 3 ) {
    warn("Must have two arguments for sequence 1 and sequence 2 %d",argc);
    show_help(stdout);
    exit(1);
  }
  
  /*
   * Read in two sequences
   */
  
  if( (query=read_fasta_file_Sequence(argv[1])) == NULL ) {
    fatal("Unable to read the sequence in file %s",argv[1]);
  }
  
  if( (target=read_fasta_file_Sequence(argv[2])) == NULL ) {
    fatal("Unable to read the sequence in file %s",argv[2]);
  }
  
  
  /*
   * Open a blosum matrix. This will be opened from WISECONFIGDIR
   * or WISEPERSONALDIR if it is not present in the current directory.
   */
  
  comp = read_Blast_file_CompMat(comp_file);
  
  if( comp == NULL ) {
    fatal("unable to read file %s",comp_file);
  }
  
  /*
   * Convert sequences to ComplexSequences: 
   * To do this we need an protein ComplexSequenceEvalSet
   *
   */
  
  evalfunc = default_aminoacid_ComplexSequenceEvalSet();
  
  query_cs = new_ComplexSequence(query,evalfunc);
  if( query_cs == NULL ) {
    fatal("Unable to make a protein complex sequence from %s",query->name);
  }
  
  target_cs = new_ComplexSequence(target,evalfunc);
  if( target_cs == NULL ) {
    fatal("Unable to make a protein complex sequence from %s",target->name);
  }
  
  /*
   * Make an alignment. I don't care about the implementation:
   * If the sequences are small enough then it should use explicit memory.
   * Long sequences should use divide and conquor methods.
   *
   * Calling PackAln_bestmemory_ProteinSW is the answer
   * This function decides on the best method considering the
   * memory and changes accordingly. It frees the matrix memory 
   * at the end as well.
   *
   */		 

  pal = PackAln_bestmemory_ProteinSW(query_cs,target_cs,comp,-gap,-ext,NULL);

  if( pal == NULL ) {
    fatal("Unable to make an alignment from %s and %s",query->name,target->name);
  }

  /*
   * ok, make other alignment forms, and be ready to show
   */



  alb = convert_PackAln_to_AlnBlock_ProteinSW(pal);


  /*
   * show output. If multiple outputs, divide using //
   */

  if( show_raw_output == TRUE ) {
    show_simple_PackAln(pal,stdout);
    puts("//\n");
  }

  if( show_label_output == TRUE ) {
    show_flat_AlnBlock(alb,stdout);
  }

  if( show_fancy_output == TRUE ) {
    write_pretty_seq_align(alb,query,target,15,50,stdout);
    puts("//\n");
  }

  /*
   * Destroy the memory.
   */	

  free_Sequence(query);
  free_Sequence(target);
  free_CompMat(comp);
  free_ComplexSequence(query_cs);
  free_ComplexSequence(target_cs);
  free_PackAln(pal);
  free_AlnBlock(alb);

  return 0;
}