Esempio n. 1
0
int main(int argc,char ** argv)
{
  GenoVarSet * s;
  PackAln * pal;
  AlnBlock * alb;
  
  DPRunImpl * dpri;
  SnpMatch * snpm;
  int i;

  GenomePara * p;

  MouseSNPMatch * forward;
  MouseSNPMatch * backward;
  

  FILE * ifp;

  
  dpri =  new_DPRunImpl_from_argv(&argc,argv);

  strip_out_standard_options(&argc,argv,show_help,show_version);
  
  ifp = openfile(argv[1],"r");
  assert(ifp != NULL);
  
  s = read_sanger_genotype_file(ifp);


  
  p = new_GenomePara(0.95,0.01);


  fprintf(stdout,"Score for match %d, score for mismatch %d\n",p->match,p->mismatch);

  for(i=0;i < s->len ;i++) {
    snpm = new_SnpMatch(s->chr[i],s,"DBA","PWK","CAST","WSB");

    /*    show_SnpMatchStats(snpm,stdout); */


    backward = backward_logsum_MouseSNPMatch(p,snpm,dpri);
    forward = forward_logsum_MouseSNPMatch(p,snpm,dpri); 


    fprintf(stdout,"Backward score is %d\n",
	    MouseSNPMatch_EXPL_SPECIAL(backward,0,-1,0));

    fprintf(stdout,"Forward score is %d Backward score is %d\n",MouseSNPMatch_EXPL_SPECIAL(forward,0,forward->lenj-1,1),
	    MouseSNPMatch_EXPL_SPECIAL(backward,0,-1,0));
    

  }
  
  
  
}
int main(int argc,char ** argv)
{
  DnaMatchPara * para;
  HitListOutputImpl * hitoutput;
  HitList * hitlist;

  Sequence * reference;
  Sequence * swap;
  Sequence * trans;
  SequenceSet * amplimers;
  int show_hitlist = 0;
  int show_swapped = 1;

  hitoutput = new_HitListOutputImpl_from_argv(&argc,argv);
  para = new_DnaMatchPara_from_argv(&argc,argv);

  strip_out_boolean_def_argument(&argc,argv,"hitlist",&show_hitlist);
  strip_out_boolean_def_argument(&argc,argv,"swapped",&show_hitlist);

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 3 ) {
    show_help(stdout);
    exit(12);
  }

  reference = read_fasta_file_Sequence(argv[1]);
  amplimers = read_fasta_SequenceSet_file(argv[2]);

  hitlist = HitList_from_Sequence_SequenceSet_DNA(reference,amplimers,para);
  
  if( show_hitlist ) {
    show_HitList_HitListOutputImpl(hitoutput,hitlist,stdout);
  }

  swap  = swapped_Sequence(reference,hitlist);
  trans = translate_swapped(swap);


  if( show_swapped ) {
    write_fasta_Sequence(swap,stdout);
    write_fasta_Sequence(trans,stdout);
  }

}
int main(int argc,char ** argv)
{
  DnaProfileEnginePara * dpep;
  DnaProfileNode * root;
  DnaProfileSet * set;
  FILE * ifp;

  boolean is_four = FALSE;

  dpep = new_DnaProfileEnginePara_from_argv(&argc,argv);

  is_four = strip_out_boolean_argument(&argc,argv,"bfour");

  strip_out_standard_options(&argc,argv,show_help,show_version);


  if( argc != 2 ) {
    show_help(stdout);
    exit(12);
  }

  ifp = openfile(argv[1],"r");
  if( ifp == NULL ) {
    fatal("could not open file %s",argv[1]);
  }

  if( is_four ) {
    root = balanced_4_Sequence_fasta_stream(ifp);
  } else {
    root = simple_cascade_Sequence_fasta_stream(ifp);
  }

  populate_DnaProfileNode_from_root(root,dpep);

  /*set = filter_DnaProfileSet(root->set,0,0);*/

  show_DnaProfileSet(root->set,dpep->rm,stdout);

  return 0;
}
Esempio n. 4
0
int main(int argc,char ** argv) 
{
  int i;
  char * temp;


  build_defaults();
  
  strip_out_standard_options(&argc,argv,show_help,show_version);

  potential_file = strip_out_assigned_argument(&argc,argv,"pg");

  pal_file = strip_out_assigned_argument(&argc,argv,"pal_file");

  if( (temp = strip_out_assigned_argument(&argc,argv,"gap")) != NULL )
    gap_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"g")) != NULL )
    gap_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"ext")) != NULL )
    ext_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"e")) != NULL )
    ext_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"matrix")) != NULL )
    matrix_file = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"m")) != NULL )
    matrix_file = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"s")) != NULL )
    qstart_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"t")) != NULL )
    qend_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"u")) != NULL )
    tstart_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"v")) != NULL )
    tend_str = temp;

  if( (strip_out_boolean_argument(&argc,argv,"trev")) == TRUE )
    reverse = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"[no]newgene")) == TRUE )
    use_new_stats = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"tfor")) == TRUE ){
    if( reverse == TRUE ) {
      warn("You have specified both trev and tfor. Treating as both");
      do_both = TRUE;
      reverse = FALSE;
    } else {
      reverse = FALSE;
    }
  }

  if( (temp = strip_out_assigned_argument(&argc,argv,"insert")) != NULL ) {
    if( strcmp(temp,"flat") == 0 ) {
      flat_insert = TRUE;
    } else {
      flat_insert = FALSE;
    }
  }

  if( (strip_out_boolean_argument(&argc,argv,"both")) == TRUE )
    do_both = TRUE;

      
  if( (strip_out_boolean_argument(&argc,argv,"fembl")) == TRUE )
    is_embl = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"tabs")) == TRUE )
    target_abs = TRUE;

  pseudo = strip_out_boolean_argument(&argc,argv,"pseudo");

  if( (temp = strip_out_assigned_argument(&argc,argv,"codon")) != NULL )
    codon_file = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"gene")) != NULL )
    gene_file = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"alg")) != NULL )
    alg_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"kbyte")) != NULL )
    kbyte_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"subs")) != NULL )
    subs_string = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"indel")) != NULL )
    indel_string = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"cfreq")) != NULL )
    cfreq_string = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"splice")) != NULL ) {
    warn("deprecated command line option -splice. use -splice_gtag now");
    splice_string = temp;
  }

  if( (temp = strip_out_assigned_argument(&argc,argv,"init")) != NULL )
    startend_string = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"null")) != NULL )
    null_string = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"intron")) != NULL )
    intron_string = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"alln")) != NULL )
    allN_string = temp;

  if( (strip_out_boolean_argument(&argc,argv,"hmmer")) == TRUE )
    use_tsm = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"intie")) == TRUE )
    use_tied_model = TRUE;

  if( (temp = strip_out_assigned_argument(&argc,argv,"hname")) != NULL )
    hmm_name = temp;


  if( (strip_out_boolean_argument(&argc,argv,"pretty")) != FALSE )
    show_pretty = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"gff")) != FALSE )
    show_gff = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"diana")) != FALSE )
    show_diana = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"embl")) != FALSE )
    show_embl = TRUE;


  if( (strip_out_boolean_argument(&argc,argv,"genes")) != FALSE )
    show_pretty_gene = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"genesf")) != FALSE )
    show_supp_gene = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"para")) != FALSE )
    show_para = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"trans")) != FALSE )
    show_trans = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"pep")) != FALSE )
    show_pep = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"cdna")) != FALSE )
    show_cdna = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"sum")) != FALSE )
    show_match_sum = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"alb")) != FALSE )
    show_AlnBlock = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"ace")) != FALSE )
    show_ace = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"pal")) != FALSE )
    show_PackAln = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"gener")) != FALSE )
    show_gene_plain = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"over")) != FALSE )
    show_overlap = TRUE;

  if( (temp = strip_out_assigned_argument(&argc,argv,"divide")) != NULL )
    divide_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"block")) != NULL )
    main_block_str = temp;

  dpri = new_DPRunImpl_from_argv(&argc,argv);

  gmp  = new_GeneModelParam_from_argv(&argc,argv);

  ppp = new_PhasedProteinPara_from_argv(&argc,argv);

  gwrp = new_GeneWiseRunPara_from_argv(&argc,argv);

  strip_out_remaining_options_with_warning(&argc,argv);

  

  if( argc !=  3 ) {
    warn("Wrong number of arguments (expect 2)!\n");
    if( argc > 1 ){
      warn("Arg line looked like (after option processing)");
      for(i=1;i<argc;i++) {
	fprintf(stderr,"   %s\n",argv[i]);
      }
    }

    show_short_help();
  }

  if( show_embl == FALSE && show_diana == FALSE && show_gff == FALSE && show_overlap == FALSE && show_pretty_gene == FALSE && show_match_sum == FALSE && show_ace == FALSE && show_gene_plain == FALSE && show_pretty == FALSE && show_AlnBlock == FALSE && show_PackAln == FALSE && show_pep == FALSE ) {
    show_pretty = TRUE;
    show_para = TRUE;
  }
 
  dna_seq_file = argv[2];
  if( use_tsm == FALSE) 
    protein_file = argv[1];
  else 
    hmm_file  = argv[1];


  if( build_objects() == FALSE) 
    fatal("Could not build objects!");

  if( show_para == TRUE) {
    show_parameters();
  }

  if( build_alignment() == FALSE)
    fatal("Could not build alignment!");

  if( show_output() == FALSE)
    fatal("Could not show alignment. Sorry!");

  if( do_both == TRUE) {
    reverse_target();

    if( build_alignment() == FALSE)
      fatal("Could not build alignment!");

    if( show_output() == FALSE)
      fatal("Could not show alignment. Sorry!");
  }

  free_temporary_objects();
  free_io_objects();
  return 0;
}
Esempio n. 5
0
int main(int argc,char ** argv)
{
  Sequence   * gen;
  Genomic    * genomic;
  CodonTable * ct = NULL;
  GenomeEvidenceSet * ges = NULL;
  RandomCodonScore * rcs;
  FILE * ifp = NULL;
  ComplexSequence * cs = NULL;
  ComplexSequenceEvalSet * cses = NULL;
  AlnBlock * alb;
  PackAln * pal;
  GenomicRegion * gr;
  int i;
  Protein * trans;
  cDNA    * cdna;
  int kbyte                = 10000;
  int stop_codon_pen  = 200;
  int start_codon_pen = 30;
  int new_gene        = 5000;
  int switch_cost     = 100;
  int smell           = 8;
  DPRunImpl * dpri = NULL;
    
  EstEvidence * est;

  boolean show_trans = TRUE;
  boolean show_cdna  = FALSE;
  boolean show_genes = TRUE;
  boolean show_alb   = FALSE;
  boolean show_pal   = FALSE;
  boolean show_gff   = TRUE;
  boolean show_debug = FALSE;
  boolean show_geneu = TRUE;
  char * divide_string = "//";

  strip_out_boolean_def_argument(&argc,argv,"geneutr",&show_geneu);
  strip_out_boolean_def_argument(&argc,argv,"genes",&show_genes);
  strip_out_boolean_def_argument(&argc,argv,"trans",&show_trans);
  strip_out_boolean_def_argument(&argc,argv,"gff",&show_gff);
  strip_out_boolean_def_argument(&argc,argv,"alb",&show_alb);
  strip_out_boolean_def_argument(&argc,argv,"pal",&show_pal);
  strip_out_boolean_def_argument(&argc,argv,"debug",&show_debug);
  strip_out_boolean_def_argument(&argc,argv,"cdna",&show_cdna);
  strip_out_integer_argument(&argc,argv,"stop",&stop_codon_pen);
  strip_out_integer_argument(&argc,argv,"start",&start_codon_pen);
  strip_out_integer_argument(&argc,argv,"gene",&new_gene);
  strip_out_integer_argument(&argc,argv,"switch",&switch_cost);
  strip_out_integer_argument(&argc,argv,"smell",&smell);
  
  dpri = new_DPRunImpl_from_argv(&argc,argv);
  if( dpri == NULL ) {
    fatal("Unable to build DPRun implementation. Bad arguments");
  }


  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 3 ) {
    show_help(stdout);
    exit(12);
  }

    

  ct  = read_CodonTable_file("codon.table");
  gen = read_fasta_file_Sequence(argv[1]);
  ifp = openfile(argv[2],"r");
  ges = read_est_evidence(ifp,ct);

  for(i=0;i<ges->len;i++) {
    est = (EstEvidence *) ges->geu[i]->data;
    est->in_smell = smell;
  }


  rcs= RandomCodonScore_alloc();
  for(i=0;i<125;i++) {
    if( is_stop_codon(i,ct) ) {
      rcs->codon[i] = -1000000;
    } else {
      rcs->codon[i] = 0;
    }
    /*    fprintf(stderr,"Got %d for %d\n",rcs->codon[i],i); */
  }

 

  cses = default_genomic_ComplexSequenceEvalSet();
  cs   = new_ComplexSequence(gen,cses);

 
  pal  = PackAln_bestmemory_GenomeWise9(ges,cs,-switch_cost,-new_gene,-start_codon_pen,-stop_codon_pen,rcs,NULL,dpri);
  alb  = convert_PackAln_to_AlnBlock_GenomeWise9(pal);


  genomic = Genomic_from_Sequence(gen);
  gr = new_GenomicRegion(genomic);

  add_Genes_to_GenomicRegion_GeneWise(gr,1,gen->len,alb,gen->name,0,NULL);

  if( show_genes ) {
    show_pretty_GenomicRegion(gr,0,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_gff ) {
    show_GFF_GenomicRegion(gr,gen->name,"genomwise",stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_trans ) {
    for(i=0;i<gr->len;i++) {
      if( gr->gene[i]->ispseudo == TRUE ) {
	fprintf(stdout,"#Gene %d is a pseudo gene - no translation possible\n",i);
      } else {
	trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
	write_fasta_Sequence(trans->baseseq,stdout);
      }
    } 
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_cdna ) {
    for(i=0;i<gr->len;i++) {
      cdna = get_cDNA_from_Transcript(gr->gene[i]->transcript[0]);
      write_fasta_Sequence(cdna->baseseq,stdout);
    } 
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_geneu ) {
    show_utr_exon_genomewise(alb,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_alb ) {
    mapped_ascii_AlnBlock(alb,id,1,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_debug ) {
    debug_genomewise(alb,ges,ct,gen,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }
    
  if( show_pal ) {
    show_simple_PackAln(pal,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  return 0;
}
Esempio n. 6
0
int main(int argc,char ** argv) 
{
  int i;
  char * temp;

  build_defaults();

  bootstrap_HMMer2();
  
  strip_out_standard_options(&argc,argv,show_help,show_version);

  if( (temp = strip_out_assigned_argument(&argc,argv,"gap")) != NULL )
    gap_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"g")) != NULL )
    gap_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"ext")) != NULL )
    ext_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"e")) != NULL )
    ext_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"matrix")) != NULL )
    matrix_file = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"m")) != NULL )
    matrix_file = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"s")) != NULL )
    qstart_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"t")) != NULL )
    qend_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"aln")) != NULL )
    aln_number_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"codon")) != NULL )
    codon_file = temp;


  if( (temp = strip_out_assigned_argument(&argc,argv,"alg")) != NULL )
    alg_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"aalg")) != NULL )
    aln_alg_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"cut")) != NULL )
    search_cutoff_str = temp;


  if( (temp = strip_out_assigned_argument(&argc,argv,"ecut")) != NULL )
    evalue_search_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"subs")) != NULL )
    subs_string = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"indel")) != NULL )
    indel_string = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"init")) != NULL )
    startend_string = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"alln")) != NULL )
    allN_string = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"null")) != NULL )
    null_string = temp;

  if( (strip_out_boolean_argument(&argc,argv,"dnas")) == TRUE )
    use_single_dna = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"dnadb")) == TRUE )
    use_single_dna = FALSE;

  if( (strip_out_boolean_argument(&argc,argv,"tfor")) == TRUE )
    do_forward_only = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"flati")) == TRUE )
    flat_insert = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"hmmer")) == TRUE )
    use_tsm = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"pfam2")) == TRUE )
    use_pfam1 = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"pfam")) == TRUE )
    use_pfam2 = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"protein")) == TRUE )
    use_single_pro = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"prodb")) == TRUE )
    use_db_pro = TRUE;


  if( (temp = strip_out_assigned_argument(&argc,argv,"hname")) != NULL )
    hmm_name = temp;

  if( (strip_out_boolean_argument(&argc,argv,"nohis")) != FALSE )
    show_histogram = FALSE;

  if( (strip_out_boolean_argument(&argc,argv,"pretty")) != FALSE )
    show_pretty = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"pep")) != FALSE )
    show_pep = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"mul")) != FALSE )
    make_anchored_aln = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"para")) != FALSE )
    show_para = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"sum")) != FALSE )
    show_match_sum = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"alb")) != FALSE )
    show_AlnBlock = TRUE;

  if( (strip_out_boolean_argument(&argc,argv,"pal")) != FALSE )
    show_PackAln = TRUE;

  if( (temp = strip_out_assigned_argument(&argc,argv,"divide")) != NULL )
    divide_str = temp;

  if( (temp = strip_out_assigned_argument(&argc,argv,"block")) != NULL )
    main_block_str = temp;


  if( (temp = strip_out_assigned_argument(&argc,argv,"report")) != NULL )
    report_str = temp;

  dbsi = new_DBSearchImpl_from_argv(&argc,argv);
  
  dpri = new_DPRunImpl_from_argv(&argc,argv);


  strip_out_remaining_options_with_warning(&argc,argv);
  

  if( argc !=  3 ) {
    warn("Wrong number of arguments (expect 2)!\n");
    if( argc > 1 ){
      warn("Arg line looked like (after option processing)");
      for(i=1;i<argc;i++) {
	fprintf(stderr,"   %s\n",argv[i]);
      }
    }

    show_short_help();
  }

  if( show_pretty == FALSE && show_AlnBlock == FALSE && show_PackAln == FALSE && show_pep == FALSE ) {
    show_pretty = TRUE;
    show_para = TRUE;
  }

  if( use_db_pro == FALSE && use_single_pro == FALSE && use_tsm == FALSE && use_pfam1 == FALSE && use_pfam2 == FALSE ) {
    use_single_pro = TRUE;
  }

  if( use_single_pro == TRUE || use_tsm == TRUE ) {
    if( use_single_dna == TRUE ) 
      fatal("one on one search. Shouldn't you use pcwise?");
    search_mode = PC_SEARCH_S2DB;
  } else {
    if( use_single_dna == TRUE ) 
      search_mode = PC_SEARCH_DB2S;
    else 
      search_mode = PC_SEARCH_DB2DB;
  }

  if( evalue_search_str != NULL && search_mode != PC_SEARCH_S2DB ) {
    fatal("Trying to set a evalue cutoff on a non evalue based search. you can only use evalues in a protein HMM vs DNA database search (sorry!)");
  }

  if( make_anchored_aln == TRUE && search_mode != PC_SEARCH_S2DB ) {
    fatal("Trying to make an anchored alignment and not in single search mode");
  }

  if( make_anchored_aln == TRUE) {
    do_complete_analysis = TRUE;
  }

  /* pick up remaining args and do it */

    
  dna_seq_file = argv[2];
  protein_file = argv[1];

  if( build_objects() == FALSE) 
    fatal("Could not build objects!");

  if( build_db_objects() == FALSE) 
    fatal("Could not build database-ready objects!");


  show_header(stdout);

  if( search_db() == FALSE) 
    warn("Could not search database");


  show_output();


  free_objects();


  return 0;
}
Esempio n. 7
0
int main (int argc,char ** argv)
{
  MappedCloneSet * trusted;
  MappedCloneSet * weak;
  MappedCloneMatch * match;


  FILE * in;
  int kbyte = 10000;
  PackAln  * pal;
  AlnBlock * alb;

  int spread = 30;

  boolean show_alb = 0;
  boolean show_pal = 0;
  boolean show_zip = 1;
  boolean show_path = 0;

  char * alg_string = "local";
  char * temp;
  char * divide_string = "//";

  strip_out_boolean_def_argument(&argc,argv,"alb",&show_alb);
  strip_out_boolean_def_argument(&argc,argv,"pal",&show_pal);
  strip_out_boolean_def_argument(&argc,argv,"zip",&show_zip);
  strip_out_boolean_def_argument(&argc,argv,"path",&show_path);

  strip_out_integer_argument(&argc,argv,"wgap",&query_gap_start);
  strip_out_integer_argument(&argc,argv,"wext",&query_gap_extend);
  strip_out_integer_argument(&argc,argv,"wswitch",&query_switch_cost);
  strip_out_integer_argument(&argc,argv,"tgap",&target_gap_start);
  strip_out_integer_argument(&argc,argv,"text",&target_gap_extend);
  strip_out_integer_argument(&argc,argv,"match",&match_score);
  strip_out_integer_argument(&argc,argv,"mismatch",&mismatch_score);
  temp =strip_out_assigned_argument(&argc,argv,"alg");
  if( temp != NULL ) {
    alg_string = temp;
  }

  strip_out_integer_argument(&argc,argv,"spread",&spread);
  strip_out_integer_argument(&argc,argv,"kbyte",&kbyte);

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 3 ) {
    show_help(stdout); 
    exit(12);
  }

  in = fopen(argv[1],"r");
  if( in == NULL ) {
    fatal("Unable to open %s",argv[1]);
  }
  trusted = read_MappedCloneSet(in);

  /*  fprintf(stderr,"first start %d\n",trusted->clone[0]->start);*/

  in = fopen(argv[2],"r");
  if( in == NULL ) {
    fatal("Unable to open %s",argv[2]);
  }

  change_max_BaseMatrix_kbytes(kbyte);

  weak = read_MappedCloneSet(in);

  synchronise_MappedCloneSets(trusted,weak);

  /*  fprintf(stderr,"score for 2,2 is %d\n",MappedCloneSet_match(weak,trusted,2,2,0,10,-5)); */
  
  match = new_MappedCloneMatch(weak,trusted,match_score,mismatch_score);

  fprintf(stderr,"Match matrix calculated\n");

  if( strcmp(alg_string,"global") == 0 ) {
    pal = PackAln_bestmemory_CloneWise(weak,trusted,match,-query_gap_start,-query_gap_extend,-target_gap_start,-target_gap_extend,spread,-query_switch_cost,NULL);
    alb = convert_PackAln_to_AlnBlock_CloneWise(pal);
  } else if ( strcmp(alg_string,"local") == 0 ) {
    pal = PackAln_bestmemory_LocalCloneWise(weak,trusted,match,-query_gap_start,-query_gap_extend,-target_gap_start,-target_gap_extend,spread,-query_switch_cost,NULL);
    alb = convert_PackAln_to_AlnBlock_LocalCloneWise(pal);
  } else {
    /* keep gcc happy */
    pal = NULL;
    alb = NULL;
    fatal("Not a proper algorithm string %s",alg_string);
  }

  if( show_path ) {
    extended_path(alb,weak,trusted,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_zip ) {
    debug_zip(alb,weak,trusted,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_alb ) {
    mapped_ascii_AlnBlock(alb,id,1,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  if( show_pal ) {
    show_simple_PackAln(pal,stdout);
    fprintf(stdout,"%s\n",divide_string);
  }

  return 0;
}
int main(int argc,char **argv)
{
  FiveStateFrameSet * frame;
  
  FiveStateModel * fsm;
  FiveStateScore * fss;

  RandomModel * rm;

  ProteinDB * proteindb;
  DBSearchImpl * dbsi;
  Hscore * hs;

  double gathering_cutoff = 0.0;
  double bits;
  int i;

  dbsi = new_DBSearchImpl_from_argv(&argc,argv);

  strip_out_float_argument(&argc,argv,"ga",&gathering_cutoff);

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 3 ) {
    show_help(stdout);
    exit(12);
  }

  rm = default_RandomModel();

  frame = read_FiveStateFrameSet_file(argv[1],"block.str");
  if( frame == NULL ) 
    fatal("Unable to make FiveStateModel from context %s, block.str file",argv[1]);
  fsm   = FiveStateModel_from_FiveStateFrameSet(frame);
  /*    dump_FiveStateModel(fsm,stdout); */
  fsm->name = stringalloc(argv[1]);
  
  fold_RandomModel_into_FiveStateModel(fsm,rm);  

  /* converts probabilities to integers for calculation */
  fss = FiveStateScore_from_FiveStateModel(fsm);
  

  proteindb = single_fasta_ProteinDB(argv[2]);

  if( proteindb== NULL )
    fatal("Unable to make proteindb from %s",argv[2]);


  hs = std_score_Hscore(Probability2Score(gathering_cutoff)-10,-1);


  search_FiveStateProtein(dbsi,hs,fss,proteindb);



  fprintf(stdout,"\n\n#High Score list\n");
  fprintf(stdout,"#Protein ID                 DNA Str  ID                        Bits Evalue\n");  
  fprintf(stdout,"--------------------------------------------------------------------------\n");

  for(i=0;i<hs->len;i++) {
    bits = Score2Bits(hs->ds[i]->score);
    if( bits < gathering_cutoff ) {
      break;
    }


    fprintf(stdout,"Protein %-20sDNA [%c] %-24s %.2f\n",hs->ds[i]->query->name,hs->ds[i]->target->is_reversed == TRUE ? '-' : '+',hs->ds[i]->target->name,bits);
  }


}
Esempio n. 9
0
int main(int argc,char ** argv)
{
  int type = ALIGN_NORMAL;
  DPRunImpl * dpri = NULL;
  HitList * hl;
  HitListOutputImpl * hloi;

  Sequence * query;
  Sequence * target;
  Sequence * target_rev;
  PairwiseShortDna * two;


  LocalCisHitSet * set;
  LocalCisHitSet * greedy_set;

  LocalCisHitScore * lchs;
  LocalCisHitSetPara * setpara;

  MotifMatrixPara  * mmp;
  MotifMatrixScore * mms;

  TransFactorMatchSet * tfms_query = NULL;
  TransFactorMatchSet * tfms_target = NULL;
  TransFactorMatchSet * tfms_target_rev = NULL;

  int qstart = -1;
  int qend   = -1;
  
  int tstart = -1;
  int tend   = -1;
  int i;

  char * temp;

  DnaMatrix * dm;
  DnaProbMatrix * dmp;
  
  TransFactorBuildPara * tfbp;
  TransFactorMatchPara * tfmp;

  TransFactorSet * tfs;

  char * motif_library = NULL;
  int use_laurence     = FALSE;
  int use_ben          = FALSE;

  dmp = DnaProbMatrix_from_match(0.75,NMaskType_BANNED);  
  assert(dmp);
  flat_null_DnaProbMatrix(dmp);  

  dm = DnaMatrix_from_DnaProbMatrix(dmp);
  
  dpri      = new_DPRunImpl_from_argv(&argc,argv);
  hloi      = new_HitListOutputImpl_from_argv(&argc,argv);
  setpara   = new_LocalCisHitSetPara_from_argv(&argc,argv);
  mmp       = new_MotifMatrixPara_from_argv(&argc,argv);
  tfbp      = new_TransFactorBuildPara_from_argv(&argc,argv);
  tfmp      = new_TransFactorMatchPara_from_argv(&argc,argv);

  strip_out_integer_argument(&argc,argv,"s",&qstart);
  strip_out_integer_argument(&argc,argv,"t",&qend);
  strip_out_integer_argument(&argc,argv,"u",&tstart);
  strip_out_integer_argument(&argc,argv,"v",&tend);

  temp = strip_out_assigned_argument(&argc,argv,"motiflib");
  if( temp != NULL ) {
    motif_library = stringalloc(temp);
  }

  use_laurence = strip_out_boolean_argument(&argc,argv,"lr");
  use_ben      = strip_out_boolean_argument(&argc,argv,"ben");


  temp = strip_out_assigned_argument(&argc,argv,"align");
  if( temp != NULL ) {
    if( strcmp(temp,"motif") == 0 ) {
      type = ALIGN_MOTIF;
    } else if ( strcmp(temp,"normal") == 0 ) {
      type = ALIGN_NORMAL;
    } else {
      fatal("cannot recognise string %s as align type",temp);
    }
  }

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 3 ) {
    show_help(stdout);
    exit(12);
  }

    

  lchs = standard_LocalCisHitScore(NMaskType_VARIABLE);

  query = read_fasta_file_Sequence(argv[1]);
  target = read_fasta_file_Sequence(argv[2]);

  for(i=0;i<query->len;i++) {
    query->seq[i] = toupper(query->seq[i]);
  }

  assert(query != NULL);
  assert(target != NULL);

  target_rev = reverse_complement_Sequence(target);

  mms = MotifMatrixScore_from_MotifMatrixPara(mmp);

  if( type == ALIGN_MOTIF ) {
    if( motif_library == NULL ) {
      fatal("Wanted to align with motif but not motif library. Must use -motiflib");
    }


    if( use_laurence == TRUE ) {
      tfs = read_laurence_TransFactorSet_file(motif_library);
    } else if( use_ben == TRUE ) {
      tfs = read_ben_IUPAC_TransFactorSet_file(motif_library);
    } else {
      tfs = read_TransFactorSet_file(motif_library);
    }


    build_TransFactorSet(tfs,tfbp);

    tfms_query = calculate_TransFactorMatchSet(query,tfs,tfmp);
    sort_by_start_TransFactorMatchSet(tfms_query);

    tfms_target = calculate_TransFactorMatchSet(target,tfs,tfmp);
    sort_by_start_TransFactorMatchSet(tfms_target);

    tfms_target_rev = calculate_TransFactorMatchSet(target_rev,tfs,tfmp);
    sort_by_start_TransFactorMatchSet(tfms_target);

    fprintf(stdout,"Motif Set: %d in query and %d in target\n",tfms_query->len,tfms_target->len);
  }


  if( qstart == -1 ) {
    qstart = 0;
  }
  if( qend == -1 ) {
    qend = query->len;
  }
  if( tstart == -1 ) {
    tstart = 0;
  }
  if( tend == -1 ) {
    tend = target->len;
  }

  
  two = query_to_reverse_target(query,target,dm,qstart,qend,tstart,tend);

  set = make_LocalCisHitSet(query,target,target_rev,two->forward,two->reverse,setpara,lchs,tfms_query,tfms_target,tfms_target_rev,mms,type == ALIGN_MOTIF ? 1 : 0,dpri);

  greedy_set = greedy_weed_LocalCisHitSet(set,setpara);


  hl = HitList_from_LocalCisHitSet(greedy_set);

  show_HitList_HitListOutputImpl(hloi,hl,stdout);

  return 0;
}
Esempio n. 10
0
int main(int argc,char ** argv)
{
  DPRunImpl * dpri = NULL;
  ScanWiseHSPImpl * scani = NULL;
  HSP2HitListImpl * hsp2hiti = NULL;
  HitListOutputImpl * hloi = NULL;
  ProteinIndexConstructor * pic = NULL;
 


  HSPScanInterface * hsi;
  HSPScanInterfacePara * para;
  SearchStatInterface * ssi;
  SearchStatInterface * ssl;
  SeqLookupLoadPara * slp;

  HSPset2HitPairPara * hsp2hit;
  CompMat * mat;
  SequenceDB * db;
  Sequence * seq;
  int ret;
  int i;
  int effective_db_size = 300000;
  int kk;
  
  int count = 0;

  LinearHSPmanager * lm;
  HitList * hl;

  boolean use_mott = 1;

  boolean trunc_best_hsp = 0;
  boolean verbose = 0;
  static struct rusage use;

  struct timeval t0, t1;

  gettimeofday(&t0, NULL);


  dpri      = new_DPRunImpl_from_argv(&argc,argv);

  dpri->memory = DPIM_Explicit;

  scani     = new_ScanWiseHSPImpl_from_argv(&argc,argv);
  
  hsp2hiti  = new_HSP2HitListImpl_from_argv(&argc,argv);

  hloi = new_HitListOutputImpl_from_argv(&argc,argv);

  slp = new_SeqLookupLoadPara_from_argv(&argc,argv);

  pic = new_ProteinIndexConstructor_from_argv(&argc,argv);

  hsp2hit = new_HSPset2HitPairPara_from_argv(&argc,argv);

  para = new_HSPScanInterfacePara_from_argv(&argc,argv);

  verbose = strip_out_boolean_argument(&argc,argv,"verbose") ;


  strip_out_boolean_def_argument(&argc,argv,"mott",&use_mott);

  strip_out_boolean_def_argument(&argc,argv,"besthsp",&trunc_best_hsp);

  strip_out_integer_argument(&argc,argv,"dbsize",&effective_db_size);

  

#ifdef SCAN_CORBA
  sorb = get_Wise2Corba_Singleton(&argc,argv,"orbit-local-orb");
#endif

  if( dpri == NULL ) {
    fatal("Unable to build DPRun implementation. Bad arguments");
  }

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 2 ) {
    show_help(stdout);
    exit(12);
  }

  /* ugly, but we don't want to bounce matrices around the network... */

  mat = read_Blast_file_CompMat("BLOSUM62.bla");
  
  erroroff(REPORT);

  hsi = new_HSPScanInterface_from_ScanWiseHSPImpl(scani,pic,slp);

  ssi = new_Mott_SearchStatInterface();

  ssl = new_lookup_SearchStatInterface(40,2.3);


  if( verbose ) {
    info("contacted database");
  }

  db = single_fasta_SequenceDB(argv[1]);

  if( db == NULL ) {
    fatal("Could not open sequence db...\n");
  }

  for(seq = init_SequenceDB(db,&ret); seq != NULL;seq = get_next_SequenceDB(db) ) {

	count++;

    for(i=0;i<seq->len;i++) {
      if( !isalpha(seq->seq[i]) ) {
	fatal("Sequence position %d [%c] is not valid",i,seq->seq[i]);
      }
      seq->seq[i] = toupper(seq->seq[i]);
    }

    info("Processing %s",seq->name);

    getrusage(RUSAGE_SELF,&use);
    
    /*    info("Before query %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */

    lm = (*hsi->scan_query)(hsi->data,seq,para);


    fprintf(stderr,"Got linear manager is %d entries\n",lm->len);

    if( lm->mat == NULL ) {
      lm->mat = hard_link_CompMat(mat);
    }

    getrusage(RUSAGE_SELF,&use);
    /*
    info("After query %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */
    sort_LinearHSPmanager(lm,compare_HSPset_score);


    if( trunc_best_hsp == 1 ) {
      for(kk=1;kk<lm->len;kk++) {
	free_HSPset(lm->set[kk]);
	lm->set[kk] = NULL;
      }
      lm->len = 1;
    }

    getrusage(RUSAGE_SELF,&use);
    
    /*
    info("After sort %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */
    hl   = HitList_from_HSP_HSP2HitListImpl(hsp2hiti,lm,dpri,hsp2hit);


    getrusage(RUSAGE_SELF,&use);
    /*
    info("After conversion %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */
    free_LinearHSPmanager(lm);

    if( use_mott == 1 ) {
      apply_SearchStat_to_HitList(hl,ssi,effective_db_size);
    } else {
      for(kk=0;kk<hl->len;kk++) {
	hl->pair[kk]->bit_score = hl->pair[kk]->raw_score / 2.0; 
      }
    }

    sort_HitList_by_score(hl);

    show_HitList_HitListOutputImpl(hloi,hl,stdout);

    getrusage(RUSAGE_SELF,&use);
    /*
    info("After output %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */

    free_HitList(hl);
    free_Sequence(seq);
  }
    

  free_DPRunImpl(dpri);
  free_HSPScanInterface(hsi);

  gettimeofday(&t1, NULL);
  fprintf(stderr, "[client stats] queries, time (s): %d %f\n",
                count,
		(t1.tv_sec - t0.tv_sec) +
                (t1.tv_usec - t0.tv_usec) * 1e-6);

  return 0;

}
Esempio n. 11
0
int main(int argc,char ** argv)
{
  int i;

  DPRunImpl * dpri = NULL;
  GeneModelParam * gmp = NULL;
  GeneModel * gm = NULL;

  Sequence * seq;

  RandomCodon * rc;
  RandomModelDNA * rmd;
  RandomCodonScore * rcs;


  ComplexSequenceEval * splice5;
  ComplexSequenceEval * splice3;
  ComplexSequenceEvalSet * cses;
  ComplexSequence * cseq;


  SyExonScore * exonscore;

  PackAln * pal;
  AlnBlock * alb;

  Genomic * genomic;
  GenomicRegion * gr;
  Protein * trans;

  dpri = new_DPRunImpl_from_argv(&argc,argv);
  if( dpri == NULL ) {
    fatal("Unable to build DPRun implementation. Bad arguments");
  }

  gmp = new_GeneModelParam_from_argv(&argc,argv);

  ct= read_CodonTable_file("codon.table");

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 2 ) {
    show_help(stdout);
    exit(12);
  }

  
  if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) {
    fatal("Could not build gene model");
  }


  seq = read_fasta_file_Sequence(argv[1]);
  
  assert(seq);

  cses = new_ComplexSequenceEvalSet_from_GeneModel(gm);

  cseq = new_ComplexSequence(seq,cses);

  rc = flat_RandomCodon(ct);
  rmd = RandomModelDNA_std();

  fold_in_RandomModelDNA_into_RandomCodon(rc,rmd);
  rcs = RandomCodonScore_from_RandomCodon(rc);

  exonscore = SyExonScore_flat_model(200,250,0.1,0.1);
  /*
  for(i=0;i<cseq->length;i++) {
    fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]);
  }
  exit(0);
  */
/*
  show_RandomCodonScore(rcs,stdout);


  for(i=3;i<seq->len;i++) {
    fprintf(stdout,"seq %d is %c with score %d\n",i,aminoacid_from_seq(ct,seq->seq+i-2),rcs->codon[CSEQ_GENOMIC_CODON(cseq,i)]);
  }

  exit(0);
*/

  pal = PackAln_bestmemory_StatWise10(exonscore,cseq,rcs,Probability2Score(1.0/10.0),Probability2Score(1.0/10.0),NULL,dpri);
  alb = convert_PackAln_to_AlnBlock_StatWise10(pal);

  mapped_ascii_AlnBlock(alb,id,1,stdout);

  genomic = Genomic_from_Sequence(seq);
  gr = new_GenomicRegion(genomic);

  add_Genes_to_GenomicRegion_GeneWise(gr,1,seq->len,alb,"bollocks",0,NULL);


  for(i=0;i<gr->len;i++) {
    if( gr->gene[i]->ispseudo == TRUE ) {
      fprintf(stdout,"#Gene %d is a pseudo gene - no translation possible\n",i);
    } else {
      trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
      write_fasta_Sequence(trans->baseseq,stdout);
    }
  } 


  
  return 0;
}
Esempio n. 12
0
int main(int argc,char ** argv)
{
    int i;

    DPRunImpl * dpri = NULL;
    GeneModelParam * gmp = NULL;
    GeneModel * gm = NULL;

    FILE * ifp;
    SeqAlign   * al;
    PairBaseSeq * pbs;

    ComplexSequenceEval * splice5;
    ComplexSequenceEval * splice3;
    ComplexSequence * cseq;


    CompMat * score_mat;
    CompProb * comp_prob;
    RandomModel * rm;

    PairBaseCodonModelScore * codon_score;
    PairBaseModelScore* nonc_score;

    PairBaseCodonModelScore * start;
    PairBaseCodonModelScore * stop;


    SyExonScore * exonscore;

    PackAln * pal;
    AlnBlock * alb;

    Genomic * genomic;
    GenomicRegion * gr;
    GenomicRegion * gr2;
    Protein * trans;

    StandardOutputOptions * std_opt;
    ShowGenomicRegionOptions * sgro;

    char * dump_packaln = NULL;
    char * read_packaln = NULL;
    FILE * packifp = NULL;

    boolean show_trans    = 1;
    boolean show_gene_raw = 0;



    ct = read_CodonTable_file(codon_table);
    /*
      score_mat = read_Blast_file_CompMat("blosum62.bla");
      comp_prob = CompProb_from_halfbit(score_mat);
    */
    rm = default_RandomModel();

    comp_prob = read_Blast_file_CompProb("wag85");

    fold_column_RandomModel_CompProb(comp_prob,rm);

    dpri = new_DPRunImpl_from_argv(&argc,argv);
    if( dpri == NULL ) {
        fatal("Unable to build DPRun implementation. Bad arguments");
    }

    gmp = new_GeneModelParam_from_argv(&argc,argv);

    std_opt = new_StandardOutputOptions_from_argv(&argc,argv);
    sgro = new_ShowGenomicRegionOptions_from_argv(&argc,argv);


    dump_packaln = strip_out_assigned_argument(&argc,argv,"dump");
    read_packaln = strip_out_assigned_argument(&argc,argv,"recover");

    strip_out_standard_options(&argc,argv,show_help,show_version);
    if( argc != 2 ) {
        show_help(stdout);
        exit(12);
    }


    if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) {
        fatal("Could not build gene model");
    }

    codon_score = make_PairBaseCodonModelScore(comp_prob);
    nonc_score  = make_PairBaseModelScore();

    splice5 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice5score);
    splice3 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice3score);

    if((ifp = openfile(argv[1],"r")) == NULL ) {
        fatal("Could not open file %s",argv[1]);
    }

    al = read_fasta_SeqAlign(ifp);

    assert(al);
    assert(al->len == 2);
    assert(al->seq[0]->len > 0);
    assert(al->seq[1]->len > 0);

    /*  write_fasta_SeqAlign(al,stdout);*/


    pbs = new_PairBaseSeq_SeqAlign(al);

    if( read_packaln == NULL ) {
        cseq = ComplexSequence_from_PairBaseSeq(pbs,splice5,splice3);
    }

    start = make_start_PairBaseCodonModelScore(ct);
    stop  = make_stop_PairBaseCodonModelScore(ct);


    /*  show_PairBaseCodonModelScore(stop,ct,stdout); */

    /*
      for(i=0;i<pbs->anchor->len;i++) {
        printf("%3d  %c For %-6d %-6d %c Rev %-6d %-6d\n",i,pbs->anchor->seq[i],
    	   CSEQ_PAIR_5SS(cseq,i),CSEQ_PAIR_3SS(cseq,i),
    	   char_complement_base(pbs->anchor->seq[i]),
    	   CSEQ_REV_PAIR_5SS(cseq,i),CSEQ_REV_PAIR_3SS(cseq,i));
      }
    */


    /*  show_ComplexSequence(cseq,stdout);

    */


    exonscore = SyExonScore_flat_model(100,150,0.1,1.0);
    /*
    for(i=0;i<cseq->length;i++) {
      fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]);
    }
    exit(0);
    */

    if( read_packaln != NULL ) {
        packifp = openfile(read_packaln,"r");
        if( packifp == NULL ) {
            fatal("File %s is unopenable - ignoring dump command",dump_packaln);
        } else {
            pal = read_simple_PackAln(packifp);
        }
    } else {
        pal = PackAln_bestmemory_SyWise20(exonscore,cseq,codon_score,nonc_score,start,stop,Probability2Score(1.0/100.0),Probability2Score(1.0/10000.0),Probability2Score(1.0/10.0),NULL,dpri);
    }

    alb = convert_PackAln_to_AlnBlock_SyWise20(pal);


    if( dump_packaln != NULL ) {
        packifp = openfile(dump_packaln,"w");
        if( packifp == NULL ) {
            warn("File %s is unopenable - ignoring dump command",dump_packaln);
        } else {
            show_simple_PackAln(pal,packifp);
        }
    }

    show_score_sequence(alb,pbs,nonc_score,stdout);
    /*
      show_StandardOutputOptions(std_opt,alb,pal,"//",stdout);
    */
    genomic = Genomic_from_Sequence(al->seq[0]);
    gr = new_GenomicRegion(genomic);
    gr2 = new_GenomicRegion(genomic);

    add_Genes_to_GenomicRegion_new(gr,alb);


    show_GenomicRegionOptions(sgro,gr,ct,"//",stdout);

    return 0;
}
Esempio n. 13
0
int main(int argc,char ** argv)
{
  Sequence * cdna;
  Sequence * gen;
  Sequence * active_gen;
  Sequence * active_cdna;

  int i;
  int dstart = -1;
  int dend   = -1;

  int cstart = -1;
  int cend   = -1;

  CodonTable * ct = NULL;
  CodonMatrixScore * cm = NULL;
  RandomCodon * rndcodon = NULL;
  RandomCodonScore * rndcodonscore = NULL;
  DnaMatrix * dm   = NULL;

  DPRunImpl * dpri = NULL;
 
  GeneModel * gm;
  GeneModelParam * gmp;
  GeneStats * gs;
  GeneParser21 * gp21;
  GeneParser21Score * gp21s;
  GeneParser4Score * gp;


  ComplexSequenceEvalSet * cdna_cses;
  ComplexSequenceEvalSet * gen_cses;

  ComplexSequence * cs_cdna;
  ComplexSequence * cs_gen;
  
  Genomic * gent;
  GenomicRegion * gr;

  CompMat  * cmat;
  CompProb * cprob;
  char * matfile = "blosum62.bla";
  Protein * trans;

  PackAln * pal;
  AlnBlock * alb;

  FILE * ofp = stdout;

  dpri = new_DPRunImpl_from_argv(&argc,argv);
  gmp  = new_GeneModelParam_from_argv(&argc,argv);

  strip_out_integer_argument(&argc,argv,"u",&dstart);
  strip_out_integer_argument(&argc,argv,"v",&dend);

  strip_out_integer_argument(&argc,argv,"s",&cstart);
  strip_out_integer_argument(&argc,argv,"t",&cend);


  strip_out_standard_options(&argc,argv,show_help,show_version);


  ct = read_CodonTable_file(codon_file);

  cmat = read_Blast_file_CompMat(matfile);
  cprob = CompProb_from_halfbit(cmat);
  cm = naive_CodonMatrixScore_from_prob(ct,cprob);
  
  gm = GeneModel_from_GeneModelParam(gmp);

  cdna = read_fasta_file_Sequence(argv[1]);
  gen = read_fasta_file_Sequence(argv[2]);

  if( dstart != -1 || dend != -1 ) {
    if( dstart == -1 ) {
      dstart = 1;
    }
    if( dend == -1 ) {
      dend = gen->len;
    }
    active_gen = magic_trunc_Sequence(gen,dstart,dend);
  } else {
    active_gen = hard_link_Sequence(gen);
  }

  if( cstart != -1 || cend != -1 ) {
    if( cstart == -1 ) {
      cstart = 1;
    }
    if( cend == -1 ) {
      cend = gen->len;
    }
    active_cdna = magic_trunc_Sequence(gen,cstart,cend);
  } else {
    active_cdna = hard_link_Sequence(gen);
  }

  

  rndcodon = RandomCodon_from_raw_CodonFrequency(gm->codon,ct);
  fold_in_RandomModelDNA_into_RandomCodon(rndcodon,gm->rnd);

  rndcodonscore = RandomCodonScore_from_RandomCodon(rndcodon);

  assert(active_cdna);
  assert(active_gen);

  cdna_cses = default_cDNA_ComplexSequenceEvalSet();
  gen_cses  = new_ComplexSequenceEvalSet_from_GeneModel(gm);

  cs_cdna = new_ComplexSequence(active_cdna,cdna_cses);
  cs_gen  = new_ComplexSequence(active_gen,gen_cses);

  gp21 = std_GeneParser21();
  GeneParser21_fold_in_RandomModelDNA(gp21,gm->rnd);
  gp21s = GeneParser21Score_from_GeneParser21(gp21);
  gp = GeneParser4Score_from_GeneParser21Score(gp21s);
 
  dm = identity_DnaMatrix(Probability2Score(halfbit2Probability(1)),Probability2Score(halfbit2Probability(-1)));

  assert(cs_cdna);
  assert(cs_gen);
  assert(gp);
  assert(rndcodonscore);
  assert(dm);
  assert(dpri);
  
  /*  show_CodonMatrixScore(cm,ct,ofp);*/

  pal = PackAln_bestmemory_CdnaWise10(cs_cdna,cs_gen,gp,cm,rndcodonscore,dm,
				      Probability2Score(halfbit2Probability(-12)),
				      Probability2Score(halfbit2Probability(-2)),
				      Probability2Score(halfbit2Probability(-5)),
				      Probability2Score(halfbit2Probability(0)),
				      NULL,
				      dpri);


  alb = convert_PackAln_to_AlnBlock_CdnaWise10(pal);

  gent = Genomic_from_Sequence(gen);
  assert(gent);

  gr = new_GenomicRegion(gent);
  assert(gr);


  add_Genes_to_GenomicRegion_GeneWise(gr,active_gen->offset,active_gen->end,alb,cdna->name,0,NULL);
				      
  mapped_ascii_AlnBlock(alb,Score2Bits,0,ofp);

  show_pretty_GenomicRegion(gr,0,ofp);

  for(i=0;i<gr->len;i++) {
    if( gr->gene[i]->ispseudo == TRUE ) {
	fprintf(ofp,"#Gene %d is a pseudo gene - no translation possible\n",i);
    } else {
      trans = get_Protein_from_Translation(gr->gene[i]->transcript[0]->translation[0],ct);
      write_fasta_Sequence(trans->baseseq,ofp);
    }
  } 
 
}