示例#1
0
void show_StandardOutputOptions(StandardOutputOptions * out,AlnBlock * alb,PackAln * pal,char * divide_str,FILE * ofp)
{
  assert(out);
  assert(alb);
  assert(pal);
  assert(ofp);
  assert(divide_str);

  if( out->show_alb == TRUE ) {
    mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( out->show_cumlative_alb == TRUE ) {
    mapped_ascii_AlnBlock(alb,Score2Bits,1,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }


  if( out->show_cumlative_pal == TRUE ) {
    show_bits_and_cumlative_PackAln(pal,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( out->show_pal == TRUE ) {
    show_simple_PackAln(pal,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  return;
}
示例#2
0
boolean show_output(void)
{
  int i;

  cDNA * cdna;
  Protein * trans;
  GenomicOverlapResults * gor;
  AlnColumn * alt;
  

 

  if( show_pretty == TRUE ) {
    show_pretty_aln();
  }

  if( show_match_sum == TRUE ) {
    show_MatchSummary_genewise_header(ofp);
    show_MatchSummarySet_genewise(mss,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_pretty_gene == TRUE ) {
    show_pretty_GenomicRegion(gr,0,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_supp_gene == TRUE ) {
    show_pretty_GenomicRegion(gr,1,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_embl == TRUE ) {
    write_Embl_FT_GenomicRegion(gr,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_diana == TRUE ) {
    write_Diana_FT_GenomicRegion(gr,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_overlap == TRUE ) {
    gor = Genomic_overlap(gr,embl);
    show_GenomicOverlapResults(gor,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }


  if( show_trans == TRUE ) {
    for(i=0;i<gr->len;i++) {
      if( gr->gene[i]->ispseudo == TRUE ) {
	fprintf(ofp,"#Gene %d is a pseudo gene - no translation possible\n",i);
      } else {
	trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
	write_fasta_Sequence(trans->baseseq,ofp);
      }
    } 
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_pep == TRUE ) {
    alt = alb->start;
    for(;alt != NULL;) {
      trans = Protein_from_GeneWise_AlnColumn(gen->baseseq,alt,1,&alt,ct,is_random_AlnColumn_genewise);
      if ( trans == NULL ) 
	break;
      write_fasta_Sequence(trans->baseseq,ofp);
      free_Protein(trans);
    }
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_cdna == TRUE ) {
    for(i=0;i<gr->len;i++) {
      cdna = get_cDNA_from_Transcript(gr->gene[i]->transcript[0]);
      write_fasta_Sequence(cdna->baseseq,ofp);
    } 
    fprintf(ofp,"%s\n",divide_str);
  }


  if( show_ace == TRUE ) {
    show_ace_GenomicRegion(gr,gen->baseseq->name,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_gff == TRUE ) {
    show_GFF_GenomicRegion(gr,gen->baseseq->name,"GeneWise",ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_gene_plain == TRUE ) {
    show_GenomicRegion(gr,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_AlnBlock == TRUE ) {
    mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_cumlative_PackAln == TRUE ) {
    show_bits_and_cumlative_PackAln(pal,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }

  if( show_PackAln == TRUE ) {
    show_simple_PackAln(pal,ofp);
    fprintf(ofp,"%s\n",divide_str);
  }
  


  return TRUE;
}
示例#3
0
int main(int argc,char ** argv)
{
  Sequence   * gen;
  Genomic    * genomic;
  CodonTable * ct = NULL;
  GenomeEvidenceSet * ges = NULL;
  RandomCodonScore * rcs;
  FILE * ifp = NULL;
  ComplexSequence * cs = NULL;
  ComplexSequenceEvalSet * cses = NULL;
  AlnBlock * alb;
  PackAln * pal;
  GenomicRegion * gr;
  int i;
  Protein * trans;
  cDNA    * cdna;
  int kbyte                = 10000;
  int stop_codon_pen  = 200;
  int start_codon_pen = 30;
  int new_gene        = 5000;
  int switch_cost     = 100;
  int smell           = 8;
  DPRunImpl * dpri = NULL;
    
  EstEvidence * est;

  boolean show_trans = TRUE;
  boolean show_cdna  = FALSE;
  boolean show_genes = TRUE;
  boolean show_alb   = FALSE;
  boolean show_pal   = FALSE;
  boolean show_gff   = TRUE;
  boolean show_debug = FALSE;
  boolean show_geneu = TRUE;
  char * divide_string = "//";

  strip_out_boolean_def_argument(&argc,argv,"geneutr",&show_geneu);
  strip_out_boolean_def_argument(&argc,argv,"genes",&show_genes);
  strip_out_boolean_def_argument(&argc,argv,"trans",&show_trans);
  strip_out_boolean_def_argument(&argc,argv,"gff",&show_gff);
  strip_out_boolean_def_argument(&argc,argv,"alb",&show_alb);
  strip_out_boolean_def_argument(&argc,argv,"pal",&show_pal);
  strip_out_boolean_def_argument(&argc,argv,"debug",&show_debug);
  strip_out_boolean_def_argument(&argc,argv,"cdna",&show_cdna);
  strip_out_integer_argument(&argc,argv,"stop",&stop_codon_pen);
  strip_out_integer_argument(&argc,argv,"start",&start_codon_pen);
  strip_out_integer_argument(&argc,argv,"gene",&new_gene);
  strip_out_integer_argument(&argc,argv,"switch",&switch_cost);
  strip_out_integer_argument(&argc,argv,"smell",&smell);
  
  dpri = new_DPRunImpl_from_argv(&argc,argv);
  if( dpri == NULL ) {
    fatal("Unable to build DPRun implementation. Bad arguments");
  }


  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 3 ) {
    show_help(stdout);
    exit(12);
  }

    

  ct  = read_CodonTable_file("codon.table");
  gen = read_fasta_file_Sequence(argv[1]);
  ifp = openfile(argv[2],"r");
  ges = read_est_evidence(ifp,ct);

  for(i=0;i<ges->len;i++) {
    est = (EstEvidence *) ges->geu[i]->data;
    est->in_smell = smell;
  }


  rcs= RandomCodonScore_alloc();
  for(i=0;i<125;i++) {
    if( is_stop_codon(i,ct) ) {
      rcs->codon[i] = -1000000;
    } else {
      rcs->codon[i] = 0;
    }
    /*    fprintf(stderr,"Got %d for %d\n",rcs->codon[i],i); */
  }

 

  cses = default_genomic_ComplexSequenceEvalSet();
  cs   = new_ComplexSequence(gen,cses);

 
  pal  = PackAln_bestmemory_GenomeWise9(ges,cs,-switch_cost,-new_gene,-start_codon_pen,-stop_codon_pen,rcs,NULL,dpri);
  alb  = convert_PackAln_to_AlnBlock_GenomeWise9(pal);


  genomic = Genomic_from_Sequence(gen);
  gr = new_GenomicRegion(genomic);

  add_Genes_to_GenomicRegion_GeneWise(gr,1,gen->len,alb,gen->name,0,NULL);

  if( show_genes ) {
    show_pretty_GenomicRegion(gr,0,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_gff ) {
    show_GFF_GenomicRegion(gr,gen->name,"genomwise",stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_trans ) {
    for(i=0;i<gr->len;i++) {
      if( gr->gene[i]->ispseudo == TRUE ) {
	fprintf(stdout,"#Gene %d is a pseudo gene - no translation possible\n",i);
      } else {
	trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
	write_fasta_Sequence(trans->baseseq,stdout);
      }
    } 
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_cdna ) {
    for(i=0;i<gr->len;i++) {
      cdna = get_cDNA_from_Transcript(gr->gene[i]->transcript[0]);
      write_fasta_Sequence(cdna->baseseq,stdout);
    } 
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_geneu ) {
    show_utr_exon_genomewise(alb,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_alb ) {
    mapped_ascii_AlnBlock(alb,id,1,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_debug ) {
    debug_genomewise(alb,ges,ct,gen,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }
    
  if( show_pal ) {
    show_simple_PackAln(pal,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  return 0;
}
示例#4
0
boolean show_output(void)
{
  int i,k;
  ThreeStateModel * temptsm;
  AlnBlock * alb;
  PackAln * pal;
  MatchSummarySet * mss;
  Protein * ps;
  cDNA * cdna;
  double bits;
  boolean fitted_res = FALSE;
  AlnBlockList * alist;
  AlnBlock * anchored;
  SequenceSet * set;
  AlnColumn * alt;
  Protein * trans;

  /* sort by bit score first */

  sort_Hscore_by_score(hs);

  if( search_mode == PC_SEARCH_S2DB ) {
    if( hs->his == NULL || hs->his->total < 1000 ) {
	info("Cannot fit histogram to a db smaller than 1,000");
	fprintf(ofp,"[Warning: Can't fit histogram to a db smaller than 1,000]\n\n");
	show_histogram = FALSE;
    } else {
      fitted_res = TRUE;
      fit_Hscore_to_EVD(hs,20);
    }
  }

  /* deal with initialising anchored alignment.
   * Could be done for either single HMMs or single proteins,
   * but we will only do it for HMMs at the moment
   */

  if( make_anchored_aln == TRUE ) {
    if( tsm == NULL ) {
      warn("Attempting to make an achored alignment without a HMM. impossible!");
      make_anchored_aln = FALSE;
    } else {
      anchored = single_unit_AlnBlock(tsm->len,"MATCH_STATE");
      set = SequenceSet_alloc_std();
   }
  }

  /* dofus catcher */
  if( aln_alg != alg ) {
    fprintf(ofp,"\n#\n#WARNING!\n#\n# Your alignment algorithm is different from your search algorithm.\n# This is probably quite sensible but will lead to differing scores.\n# Use the search score as an indicator of the significance of the match\n# Read the docs for more information\n#\n");
  }

  fprintf(ofp,"\n\n#High Score list\n");
  fprintf(ofp,"#Protein ID                 DNA Str  ID                        Bits Evalue\n");  
  fprintf(ofp,"--------------------------------------------------------------------------\n");

  for(i=0;i<hs->len;i++) {
    bits = Score2Bits(hs->ds[i]->score);
    if( bits < search_cutoff ) {
      break;
    }

    if( fitted_res == TRUE && evalue_search_str != NULL ) {
      if( hs->ds[i]->evalue > evalue_search_cutoff ) 
	break;
    }

    if( fitted_res == TRUE) 
      fprintf(ofp,"Protein %-20sDNA [%c] %-24s %.2f %.2g\n",hs->ds[i]->query->name,hs->ds[i]->target->is_reversed == TRUE ? '-' : '+',hs->ds[i]->target->name,bits,hs->ds[i]->evalue);
    else
      fprintf(ofp,"Protein %-20sDNA [%c] %-24s %.2f\n",hs->ds[i]->query->name,hs->ds[i]->target->is_reversed == TRUE ? '-' : '+',hs->ds[i]->target->name,bits);

  }

  if( search_mode == PC_SEARCH_S2DB && show_histogram == TRUE ) {
    fprintf(ofp,"\n\n#Histogram\n");
    fprintf(ofp,"-----------------------------------------------------------------------\n");
    PrintASCIIHistogram(hs->his,ofp);
  }

  fprintf(ofp,"\n\n#Alignments\n");
  fprintf(ofp,"-----------------------------------------------------------------------\n");

  for(i=0;i<hs->len;i++) {
    bits = Score2Bits(hs->ds[i]->score);
    if( bits < search_cutoff ) {
      break;
    }
    if( i >= aln_number ) {
      break;
    }

    if( fitted_res == TRUE && evalue_search_str != NULL ) {
      if( hs->ds[i]->evalue > evalue_search_cutoff ) 
	break;
    }

    
    fprintf(ofp,"\n\n>Results for %s vs %s (%s) [%d]\n",hs->ds[i]->query->name,hs->ds[i]->target->name,hs->ds[i]->target->is_reversed == TRUE ? "reverse" : "forward",i+1 );

    cdna = get_cDNA_from_cDNADB(cdb,hs->ds[i]->target);
    temptsm = indexed_ThreeStateModel_ThreeStateDB(tsmdb,hs->ds[i]->query);


    alb = AlnBlock_from_TSM_estwise_wrap(temptsm,cdna,cps,cm,ct,rmd,aln_alg,use_syn,allN,flat_insert,dpri,&pal);

    if( alb == NULL ) {
      warn("Got a NULL alignment. Exiting now due to presumed problems");
      fprintf(ofp,"\n\n*Got a NULL alignment. Exiting now due to presumed problems*\n\n");
      return FALSE;
    }


 
    if( use_single_pro == FALSE) 
      mss = MatchSummarySet_from_AlnBlock_genewise(alb,temptsm->name,1,cdna->baseseq);
    else
      mss = MatchSummarySet_from_AlnBlock_genewise(alb,pro->baseseq->name,pro->baseseq->offset,cdna->baseseq);

    
    if( show_pretty == TRUE ) {

      fprintf(ofp,"\n%s output\nScore %4.2f bits over entire alignment.\nThis will be different from per-alignment scores. See manual for details\nFor computer parsable output, try %s -help or read the manual\n",program_name,Score2Bits(pal->score),program_name);
      
      if( use_syn == FALSE ) {
	fprintf(ofp,"Scores as bits over a flat simple random model\n\n");
      } else {
	fprintf(ofp,"Scores as bits over a synchronous coding model\n\n");
      }
      
      ps = pseudo_Protein_from_ThreeStateModel(temptsm);
      protcdna_ascii_display(alb,ps->baseseq->seq,ps->baseseq->name,ps->baseseq->offset,cdna,ct,15,main_block,TRUE,ofp);

      
      free_Protein(ps);

      fprintf(ofp,"%s\n",divide_str);
      
    }

    if( show_match_sum == TRUE ) {
      show_MatchSummary_genewise_header(ofp);
      show_MatchSummarySet_genewise(mss,ofp);
      fprintf(ofp,"%s\n",divide_str);
    }
    

    if( show_pep == TRUE ) {
      alt = alb->start;
      for(;alt != NULL;) {
	trans = Protein_from_GeneWise_AlnColumn(cdna->baseseq,alt,1,&alt,ct,is_random_AlnColumn_genewise);
	if ( trans == NULL ) 
	  break;
	write_fasta_Sequence(trans->baseseq,ofp);
	free_Protein(trans);
      }
      fprintf(ofp,"%s\n",divide_str);
    }

    if( show_AlnBlock == TRUE ) {
      mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp);
      fprintf(ofp,"%s\n",divide_str);
    }
    
    if( show_PackAln == TRUE ) {
      show_simple_PackAln(pal,ofp);
      fprintf(ofp,"%s\n",divide_str);
    }

    /*
     * This goes at the end because it destroys the alb structure
     */

    if( make_anchored_aln == TRUE ) {
      /* attach sequence to als in alb, so we have it for later use */
      alb->seq[1]->data = (void *) cdna->baseseq;
      /* add to SequenceSet so we can destroy the memory */
      add_SequenceSet(set,hard_link_Sequence(cdna->baseseq));

      alist = split_AlnBlock(alb,is_random_AlnColumn_genewise);

      for(k=0;k<alist->len;k++) {
	/* actually produce the anchored alignment */
	/*mapped_ascii_AlnBlock(alist->alb[k],Score2Bits,stderr);*/
	add_to_anchored_AlnBlock(anchored,alist->alb[k]);

	/*	dump_ascii_AlnBlock(anchored,stderr);*/
      }
    }

    alb = free_AlnBlock(alb);
    pal = free_PackAln(pal);
    mss = free_MatchSummarySet(mss);
    cdna = free_cDNA(cdna);
    temptsm = free_ThreeStateModel(temptsm);

  }

  if( do_complete_analysis == TRUE ) {
    fprintf(ofp,"\n\n#Complete Analysis\n");
    fprintf(ofp,"-------------------------------------------------------------\n\n");
    
    /* ok - end of loop over relevant hits. If we have an
     * anchored alignment, print it out!
     */
    if( make_anchored_aln == TRUE ) {
      /*dump_ascii_AlnBlock(anchored,stderr);*/
      write_mul_estwise_AlnBlock(anchored,ct,ofp);
      fprintf(ofp,"%s\n",divide_str);
    }
  }


  return TRUE;
}
示例#5
0
int main (int argc,char ** argv)
{
  MappedCloneSet * trusted;
  MappedCloneSet * weak;
  MappedCloneMatch * match;


  FILE * in;
  int kbyte = 10000;
  PackAln  * pal;
  AlnBlock * alb;

  int spread = 30;

  boolean show_alb = 0;
  boolean show_pal = 0;
  boolean show_zip = 1;
  boolean show_path = 0;

  char * alg_string = "local";
  char * temp;
  char * divide_string = "//";

  strip_out_boolean_def_argument(&argc,argv,"alb",&show_alb);
  strip_out_boolean_def_argument(&argc,argv,"pal",&show_pal);
  strip_out_boolean_def_argument(&argc,argv,"zip",&show_zip);
  strip_out_boolean_def_argument(&argc,argv,"path",&show_path);

  strip_out_integer_argument(&argc,argv,"wgap",&query_gap_start);
  strip_out_integer_argument(&argc,argv,"wext",&query_gap_extend);
  strip_out_integer_argument(&argc,argv,"wswitch",&query_switch_cost);
  strip_out_integer_argument(&argc,argv,"tgap",&target_gap_start);
  strip_out_integer_argument(&argc,argv,"text",&target_gap_extend);
  strip_out_integer_argument(&argc,argv,"match",&match_score);
  strip_out_integer_argument(&argc,argv,"mismatch",&mismatch_score);
  temp =strip_out_assigned_argument(&argc,argv,"alg");
  if( temp != NULL ) {
    alg_string = temp;
  }

  strip_out_integer_argument(&argc,argv,"spread",&spread);
  strip_out_integer_argument(&argc,argv,"kbyte",&kbyte);

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 3 ) {
    show_help(stdout); 
    exit(12);
  }

  in = fopen(argv[1],"r");
  if( in == NULL ) {
    fatal("Unable to open %s",argv[1]);
  }
  trusted = read_MappedCloneSet(in);

  /*  fprintf(stderr,"first start %d\n",trusted->clone[0]->start);*/

  in = fopen(argv[2],"r");
  if( in == NULL ) {
    fatal("Unable to open %s",argv[2]);
  }

  change_max_BaseMatrix_kbytes(kbyte);

  weak = read_MappedCloneSet(in);

  synchronise_MappedCloneSets(trusted,weak);

  /*  fprintf(stderr,"score for 2,2 is %d\n",MappedCloneSet_match(weak,trusted,2,2,0,10,-5)); */
  
  match = new_MappedCloneMatch(weak,trusted,match_score,mismatch_score);

  fprintf(stderr,"Match matrix calculated\n");

  if( strcmp(alg_string,"global") == 0 ) {
    pal = PackAln_bestmemory_CloneWise(weak,trusted,match,-query_gap_start,-query_gap_extend,-target_gap_start,-target_gap_extend,spread,-query_switch_cost,NULL);
    alb = convert_PackAln_to_AlnBlock_CloneWise(pal);
  } else if ( strcmp(alg_string,"local") == 0 ) {
    pal = PackAln_bestmemory_LocalCloneWise(weak,trusted,match,-query_gap_start,-query_gap_extend,-target_gap_start,-target_gap_extend,spread,-query_switch_cost,NULL);
    alb = convert_PackAln_to_AlnBlock_LocalCloneWise(pal);
  } else {
    /* keep gcc happy */
    pal = NULL;
    alb = NULL;
    fatal("Not a proper algorithm string %s",alg_string);
  }

  if( show_path ) {
    extended_path(alb,weak,trusted,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_zip ) {
    debug_zip(alb,weak,trusted,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_alb ) {
    mapped_ascii_AlnBlock(alb,id,1,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_pal ) {
    show_simple_PackAln(pal,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  return 0;
}
示例#6
0
int main(int argc,char ** argv)
{
    int i;

    DPRunImpl * dpri = NULL;
    GeneModelParam * gmp = NULL;
    GeneModel * gm = NULL;

    FILE * ifp;
    SeqAlign   * al;
    PairBaseSeq * pbs;

    ComplexSequenceEval * splice5;
    ComplexSequenceEval * splice3;
    ComplexSequence * cseq;


    CompMat * score_mat;
    CompProb * comp_prob;
    RandomModel * rm;

    PairBaseCodonModelScore * codon_score;
    PairBaseModelScore* nonc_score;

    PairBaseCodonModelScore * start;
    PairBaseCodonModelScore * stop;


    SyExonScore * exonscore;

    PackAln * pal;
    AlnBlock * alb;

    Genomic * genomic;
    GenomicRegion * gr;
    GenomicRegion * gr2;
    Protein * trans;

    StandardOutputOptions * std_opt;
    ShowGenomicRegionOptions * sgro;

    char * dump_packaln = NULL;
    char * read_packaln = NULL;
    FILE * packifp = NULL;

    boolean show_trans    = 1;
    boolean show_gene_raw = 0;



    ct = read_CodonTable_file(codon_table);
    /*
      score_mat = read_Blast_file_CompMat("blosum62.bla");
      comp_prob = CompProb_from_halfbit(score_mat);
    */
    rm = default_RandomModel();

    comp_prob = read_Blast_file_CompProb("wag85");

    fold_column_RandomModel_CompProb(comp_prob,rm);

    dpri = new_DPRunImpl_from_argv(&argc,argv);
    if( dpri == NULL ) {
        fatal("Unable to build DPRun implementation. Bad arguments");
    }

    gmp = new_GeneModelParam_from_argv(&argc,argv);

    std_opt = new_StandardOutputOptions_from_argv(&argc,argv);
    sgro = new_ShowGenomicRegionOptions_from_argv(&argc,argv);


    dump_packaln = strip_out_assigned_argument(&argc,argv,"dump");
    read_packaln = strip_out_assigned_argument(&argc,argv,"recover");

    strip_out_standard_options(&argc,argv,show_help,show_version);
    if( argc != 2 ) {
        show_help(stdout);
        exit(12);
    }


    if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) {
        fatal("Could not build gene model");
    }

    codon_score = make_PairBaseCodonModelScore(comp_prob);
    nonc_score  = make_PairBaseModelScore();

    splice5 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice5score);
    splice3 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice3score);

    if((ifp = openfile(argv[1],"r")) == NULL ) {
        fatal("Could not open file %s",argv[1]);
    }

    al = read_fasta_SeqAlign(ifp);

    assert(al);
    assert(al->len == 2);
    assert(al->seq[0]->len > 0);
    assert(al->seq[1]->len > 0);

    /*  write_fasta_SeqAlign(al,stdout);*/


    pbs = new_PairBaseSeq_SeqAlign(al);

    if( read_packaln == NULL ) {
        cseq = ComplexSequence_from_PairBaseSeq(pbs,splice5,splice3);
    }

    start = make_start_PairBaseCodonModelScore(ct);
    stop  = make_stop_PairBaseCodonModelScore(ct);


    /*  show_PairBaseCodonModelScore(stop,ct,stdout); */

    /*
      for(i=0;i<pbs->anchor->len;i++) {
        printf("%3d  %c For %-6d %-6d %c Rev %-6d %-6d\n",i,pbs->anchor->seq[i],
    	   CSEQ_PAIR_5SS(cseq,i),CSEQ_PAIR_3SS(cseq,i),
    	   char_complement_base(pbs->anchor->seq[i]),
    	   CSEQ_REV_PAIR_5SS(cseq,i),CSEQ_REV_PAIR_3SS(cseq,i));
      }
    */


    /*  show_ComplexSequence(cseq,stdout);

    */


    exonscore = SyExonScore_flat_model(100,150,0.1,1.0);
    /*
    for(i=0;i<cseq->length;i++) {
      fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]);
    }
    exit(0);
    */

    if( read_packaln != NULL ) {
        packifp = openfile(read_packaln,"r");
        if( packifp == NULL ) {
            fatal("File %s is unopenable - ignoring dump command",dump_packaln);
        } else {
            pal = read_simple_PackAln(packifp);
        }
    } else {
        pal = PackAln_bestmemory_SyWise20(exonscore,cseq,codon_score,nonc_score,start,stop,Probability2Score(1.0/100.0),Probability2Score(1.0/10000.0),Probability2Score(1.0/10.0),NULL,dpri);
    }

    alb = convert_PackAln_to_AlnBlock_SyWise20(pal);


    if( dump_packaln != NULL ) {
        packifp = openfile(dump_packaln,"w");
        if( packifp == NULL ) {
            warn("File %s is unopenable - ignoring dump command",dump_packaln);
        } else {
            show_simple_PackAln(pal,packifp);
        }
    }

    show_score_sequence(alb,pbs,nonc_score,stdout);
    /*
      show_StandardOutputOptions(std_opt,alb,pal,"//",stdout);
    */
    genomic = Genomic_from_Sequence(al->seq[0]);
    gr = new_GenomicRegion(genomic);
    gr2 = new_GenomicRegion(genomic);

    add_Genes_to_GenomicRegion_new(gr,alb);


    show_GenomicRegionOptions(sgro,gr,ct,"//",stdout);

    return 0;
}
示例#7
0
int main(int argc,char ** argv)
{
  Sequence * query;
  Sequence * target;
  ComplexSequence * query_cs;
  ComplexSequence * target_cs;
  ComplexSequenceEvalSet  * evalfunc;
  CompMat * comp;
  char * comp_file;
  int gap = (12);
  int ext = (2);

  boolean show_raw_output = FALSE;
  boolean show_label_output = FALSE;
  boolean show_fancy_output = FALSE;
  boolean has_outputted = FALSE;

  PackAln * pal;
  AlnBlock * alb;
  
  /*
   * Process command line options
   * -h or -help gives us help
   * -g for gap value (an int) - rely on commandline error processing
   * -e for ext value (an int) - rely on commandline error processing
   * -m for matrix (a char)
   * -r - raw matrix output
   * -l - label output
   * -f - fancy output
   *
   *
   * Use calls to commandline.h functions
   *
   */
  
  if( strip_out_boolean_argument(&argc,argv,"h") == TRUE || strip_out_boolean_argument(&argc,argv,"-help") == TRUE) {
    show_help(stdout);
    exit(1);
  }

  show_raw_output = strip_out_boolean_argument(&argc,argv,"r");
  show_label_output = strip_out_boolean_argument(&argc,argv,"l");
  show_fancy_output = strip_out_boolean_argument(&argc,argv,"f");


  /** if all FALSE, set fancy to TRUE **/

  if( show_raw_output == FALSE && show_label_output == FALSE ) 
    show_fancy_output = TRUE;


  (void) strip_out_integer_argument(&argc,argv,"g",&gap);
  (void) strip_out_integer_argument(&argc,argv,"e",&ext);

  comp_file = strip_out_assigned_argument(&argc,argv,"m");
  if( comp_file == NULL)
    comp_file = "blosum62.bla";

  
  
  if( argc != 3 ) {
    warn("Must have two arguments for sequence 1 and sequence 2 %d",argc);
    show_help(stdout);
    exit(1);
  }
  
  /*
   * Read in two sequences
   */
  
  if( (query=read_fasta_file_Sequence(argv[1])) == NULL ) {
    fatal("Unable to read the sequence in file %s",argv[1]);
  }
  
  if( (target=read_fasta_file_Sequence(argv[2])) == NULL ) {
    fatal("Unable to read the sequence in file %s",argv[2]);
  }
  
  
  /*
   * Open a blosum matrix. This will be opened from WISECONFIGDIR
   * or WISEPERSONALDIR if it is not present in the current directory.
   */
  
  comp = read_Blast_file_CompMat(comp_file);
  
  if( comp == NULL ) {
    fatal("unable to read file %s",comp_file);
  }
  
  /*
   * Convert sequences to ComplexSequences: 
   * To do this we need an protein ComplexSequenceEvalSet
   *
   */
  
  evalfunc = default_aminoacid_ComplexSequenceEvalSet();
  
  query_cs = new_ComplexSequence(query,evalfunc);
  if( query_cs == NULL ) {
    fatal("Unable to make a protein complex sequence from %s",query->name);
  }
  
  target_cs = new_ComplexSequence(target,evalfunc);
  if( target_cs == NULL ) {
    fatal("Unable to make a protein complex sequence from %s",target->name);
  }
  
  /*
   * Make an alignment. I don't care about the implementation:
   * If the sequences are small enough then it should use explicit memory.
   * Long sequences should use divide and conquor methods.
   *
   * Calling PackAln_bestmemory_ProteinSW is the answer
   * This function decides on the best method considering the
   * memory and changes accordingly. It frees the matrix memory 
   * at the end as well.
   *
   */		 

  pal = PackAln_bestmemory_ProteinSW(query_cs,target_cs,comp,-gap,-ext,NULL);

  if( pal == NULL ) {
    fatal("Unable to make an alignment from %s and %s",query->name,target->name);
  }

  /*
   * ok, make other alignment forms, and be ready to show
   */



  alb = convert_PackAln_to_AlnBlock_ProteinSW(pal);


  /*
   * show output. If multiple outputs, divide using //
   */

  if( show_raw_output == TRUE ) {
    show_simple_PackAln(pal,stdout);
    puts("//\n");
  }

  if( show_label_output == TRUE ) {
    show_flat_AlnBlock(alb,stdout);
  }

  if( show_fancy_output == TRUE ) {
    write_pretty_seq_align(alb,query,target,15,50,stdout);
    puts("//\n");
  }

  /*
   * Destroy the memory.
   */	

  free_Sequence(query);
  free_Sequence(target);
  free_CompMat(comp);
  free_ComplexSequence(query_cs);
  free_ComplexSequence(target_cs);
  free_PackAln(pal);
  free_AlnBlock(alb);

  return 0;
}