Пример #1
0
int main(int argc,char ** argv)
{
  SequenceDB * db;
  Sequence * seq;
  SeqLookupInterface * sli;
  SeqLookupPos * slp;
  HSPScanInterface * hsi;
  LinearHSPmanager * lm;
  HitList * hl;
  CompMat * mat;
  int ret;
  HSPScanInterfacePara p;


  p.min_score= 30;
  p.max_results = 200;

  db = single_fasta_SequenceDB(argv[1]);

  mat = read_Blast_file_CompMat("blosum62.bla");

  sli = new_ghash_SeqLookupInterface();

  for(seq = init_SequenceDB(db,&ret); seq != NULL;seq = get_next_SequenceDB(db) ) {
    load_aa_flat_Sequence_SeqLookupInterface(sli,hard_link_Sequence(seq));
  }


  seq = read_fasta_file_Sequence(argv[2]);

  assert(seq);

  hsi = Wise2_new_one_off_HSPScanInterface(sli,mat,20,10);

/*  hspm = simple_HSPScan_scan_query((void*)hsi->data,seq); */

  lm = (*hsi->scan_query)(hsi->data,seq,&p); 

  hl = Wise2_HitList_from_LinearHSPmanager(lm);

  Wise2_write_pseudoblast_HitList(hl,stdout);

}
Пример #2
0
Sequence * reload_SequenceDB(Sequence * last,SequenceDB * sdb,int * return_status)
{
  Sequence * out;
  int count = 0;

  /*
   * free last Sequence: if we did something clever with
   * memory, this is where we should do it 
   */

  if( last != NULL ) 
    free_Sequence(last);

  /** see if we can read a Sequence now **/


  if( (out = get_next_SequenceDB(sdb)) != NULL ) {
    *return_status = DB_RETURN_OK;
    return out;
  }

 
  if( SequenceDB_at_end(sdb) == TRUE ) {
    if( close_last_fs_SequenceDB(sdb) == FALSE ) {
      warn("On file source [%d] [%s] could not close",sdb->current_source,sdb->fs[sdb->current_source]->filename);
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }
    *return_status = DB_RETURN_END;
    return NULL;
  }

  /** ok, see if we can swap FileSources then **/

  for(;;) {
    if( close_last_fs_SequenceDB(sdb) == FALSE ) {
      warn("On file source [%d] [%s] could not close",sdb->current_source,sdb->fs[sdb->current_source]->filename);
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }
    
    if( load_next_fs_SequenceDB(sdb) == FALSE ) {
      warn("On file source [%d] [%s] could not open the file",sdb->current_source+1,sdb->fs[sdb->current_source+1]->filename);
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }
    
    
    if( (out = get_next_SequenceDB(sdb)) != NULL ) {
      *return_status = DB_RETURN_OK;
      return out;
    }
    count++;
    warn("Ok, don't like this, just loaded the next Filesource, and got no sequence. Nope!");

    if( SequenceDB_at_end(sdb) == TRUE ) {
      *return_status = DB_RETURN_END;
      return NULL;
    }

    if( count > 10 ) {
      /*** break out of infinite loop ***/

      warn("Too many failed reloads of databases, going to fail");
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }


  } /** back for for(;;) **/

}
Пример #3
0
int main(int argc,char ** argv)
{
  DPRunImpl * dpri = NULL;
  ScanWiseHSPImpl * scani = NULL;
  HSP2HitListImpl * hsp2hiti = NULL;
  HitListOutputImpl * hloi = NULL;
  ProteinIndexConstructor * pic = NULL;
 


  HSPScanInterface * hsi;
  HSPScanInterfacePara * para;
  SearchStatInterface * ssi;
  SearchStatInterface * ssl;
  SeqLookupLoadPara * slp;

  HSPset2HitPairPara * hsp2hit;
  CompMat * mat;
  SequenceDB * db;
  Sequence * seq;
  int ret;
  int i;
  int effective_db_size = 300000;
  int kk;
  
  int count = 0;

  LinearHSPmanager * lm;
  HitList * hl;

  boolean use_mott = 1;

  boolean trunc_best_hsp = 0;
  boolean verbose = 0;
  static struct rusage use;

  struct timeval t0, t1;

  gettimeofday(&t0, NULL);


  dpri      = new_DPRunImpl_from_argv(&argc,argv);

  dpri->memory = DPIM_Explicit;

  scani     = new_ScanWiseHSPImpl_from_argv(&argc,argv);
  
  hsp2hiti  = new_HSP2HitListImpl_from_argv(&argc,argv);

  hloi = new_HitListOutputImpl_from_argv(&argc,argv);

  slp = new_SeqLookupLoadPara_from_argv(&argc,argv);

  pic = new_ProteinIndexConstructor_from_argv(&argc,argv);

  hsp2hit = new_HSPset2HitPairPara_from_argv(&argc,argv);

  para = new_HSPScanInterfacePara_from_argv(&argc,argv);

  verbose = strip_out_boolean_argument(&argc,argv,"verbose") ;


  strip_out_boolean_def_argument(&argc,argv,"mott",&use_mott);

  strip_out_boolean_def_argument(&argc,argv,"besthsp",&trunc_best_hsp);

  strip_out_integer_argument(&argc,argv,"dbsize",&effective_db_size);

  

#ifdef SCAN_CORBA
  sorb = get_Wise2Corba_Singleton(&argc,argv,"orbit-local-orb");
#endif

  if( dpri == NULL ) {
    fatal("Unable to build DPRun implementation. Bad arguments");
  }

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 2 ) {
    show_help(stdout);
    exit(12);
  }

  /* ugly, but we don't want to bounce matrices around the network... */

  mat = read_Blast_file_CompMat("BLOSUM62.bla");
  
  erroroff(REPORT);

  hsi = new_HSPScanInterface_from_ScanWiseHSPImpl(scani,pic,slp);

  ssi = new_Mott_SearchStatInterface();

  ssl = new_lookup_SearchStatInterface(40,2.3);


  if( verbose ) {
    info("contacted database");
  }

  db = single_fasta_SequenceDB(argv[1]);

  if( db == NULL ) {
    fatal("Could not open sequence db...\n");
  }

  for(seq = init_SequenceDB(db,&ret); seq != NULL;seq = get_next_SequenceDB(db) ) {

	count++;

    for(i=0;i<seq->len;i++) {
      if( !isalpha(seq->seq[i]) ) {
	fatal("Sequence position %d [%c] is not valid",i,seq->seq[i]);
      }
      seq->seq[i] = toupper(seq->seq[i]);
    }

    info("Processing %s",seq->name);

    getrusage(RUSAGE_SELF,&use);
    
    /*    info("Before query %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */

    lm = (*hsi->scan_query)(hsi->data,seq,para);


    fprintf(stderr,"Got linear manager is %d entries\n",lm->len);

    if( lm->mat == NULL ) {
      lm->mat = hard_link_CompMat(mat);
    }

    getrusage(RUSAGE_SELF,&use);
    /*
    info("After query %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */
    sort_LinearHSPmanager(lm,compare_HSPset_score);


    if( trunc_best_hsp == 1 ) {
      for(kk=1;kk<lm->len;kk++) {
	free_HSPset(lm->set[kk]);
	lm->set[kk] = NULL;
      }
      lm->len = 1;
    }

    getrusage(RUSAGE_SELF,&use);
    
    /*
    info("After sort %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */
    hl   = HitList_from_HSP_HSP2HitListImpl(hsp2hiti,lm,dpri,hsp2hit);


    getrusage(RUSAGE_SELF,&use);
    /*
    info("After conversion %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */
    free_LinearHSPmanager(lm);

    if( use_mott == 1 ) {
      apply_SearchStat_to_HitList(hl,ssi,effective_db_size);
    } else {
      for(kk=0;kk<hl->len;kk++) {
	hl->pair[kk]->bit_score = hl->pair[kk]->raw_score / 2.0; 
      }
    }

    sort_HitList_by_score(hl);

    show_HitList_HitListOutputImpl(hloi,hl,stdout);

    getrusage(RUSAGE_SELF,&use);
    /*
    info("After output %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */

    free_HitList(hl);
    free_Sequence(seq);
  }
    

  free_DPRunImpl(dpri);
  free_HSPScanInterface(hsi);

  gettimeofday(&t1, NULL);
  fprintf(stderr, "[client stats] queries, time (s): %d %f\n",
                count,
		(t1.tv_sec - t0.tv_sec) +
                (t1.tv_usec - t0.tv_usec) * 1e-6);

  return 0;

}