Esempio n. 1
0
boolean open_ThreeStateDB(ThreeStateDB * mdb)
{
  int ret;
  int return_status;
  ThreeStateModel * temp;
  int count;

  mdb->current_no = 0;

  switch( mdb->dbtype ) {

  case TSMDB_SINGLE :
    return TRUE; /* should be fine! */
  case TSMDB_HMMER1PFAM :
    if( mdb->phdb == NULL ) {
      warn("No hmmer1 db to open for threestatedb!");
      return FALSE;
    }
    mdb->phdb->cur = 0;
    break;
  case TSMDB_PROTEIN :
    if( mdb->sdb == NULL ) {
      warn("Attempting to open a protein tsm with no sequence db!");
      return FALSE;
    } 
    mdb->seq_cache = init_SequenceDB(mdb->sdb,&ret);
    if( ret == DB_RETURN_ERROR ) {
      return FALSE;
    }
    if( ret == DB_RETURN_END ) {
      warn("Due to some bad coding, can't cope with single protein databases in tsmdbs. oooops!");
    }
    break;
  case TSMDB_GENERIC :
    ((*mdb->open_generic)(mdb));
    break;
  default :
    warn("Got an unrecognisable tsm db type in opening tsm %d",mdb->dbtype);
    return FALSE;
  }
    
  if( mdb->hmm_model_start != -1 && mdb->hmm_model_end != -1 ) {
    for(count=0;count<mdb->hmm_model_start;count++) {
      temp = read_TSM_ThreeStateDB(mdb,&return_status);
      free_ThreeStateModel(temp);
    }
  }


  return TRUE;
}
Esempio n. 2
0
int main(int argc,char ** argv)
{
  SequenceDB * db;
  Sequence * seq;
  SeqLookupInterface * sli;
  SeqLookupPos * slp;
  HSPScanInterface * hsi;
  LinearHSPmanager * lm;
  HitList * hl;
  CompMat * mat;
  int ret;
  HSPScanInterfacePara p;


  p.min_score= 30;
  p.max_results = 200;

  db = single_fasta_SequenceDB(argv[1]);

  mat = read_Blast_file_CompMat("blosum62.bla");

  sli = new_ghash_SeqLookupInterface();

  for(seq = init_SequenceDB(db,&ret); seq != NULL;seq = get_next_SequenceDB(db) ) {
    load_aa_flat_Sequence_SeqLookupInterface(sli,hard_link_Sequence(seq));
  }


  seq = read_fasta_file_Sequence(argv[2]);

  assert(seq);

  hsi = Wise2_new_one_off_HSPScanInterface(sli,mat,20,10);

/*  hspm = simple_HSPScan_scan_query((void*)hsi->data,seq); */

  lm = (*hsi->scan_query)(hsi->data,seq,&p); 

  hl = Wise2_HitList_from_LinearHSPmanager(lm);

  Wise2_write_pseudoblast_HitList(hl,stdout);

}
Esempio n. 3
0
ComplexSequence * init_cDNADB(cDNADB * cdnadb,int * return_status)
{
  ComplexSequence * cs;
  Sequence * seq;

  if( cdnadb->is_single_seq == TRUE) {
    *return_status = DB_RETURN_OK;
    cdnadb->done_forward = TRUE;
    return hard_link_ComplexSequence(cdnadb->forw);
    
  }

  /* is a seq db */

  seq = init_SequenceDB(cdnadb->sdb,return_status);

  if( seq == NULL || *return_status == DB_RETURN_ERROR || *return_status == DB_RETURN_END ) {
    return NULL; /** error already reported **/
  }

  if( force_to_dna_Sequence(seq,cdnadb->error_tol,NULL) == FALSE ) {
    warn("first sequence below error level, have to fail at the moment. Ooops...");
    free_Sequence(seq);
    *return_status = DB_RETURN_ERROR;
    return NULL;
  }

  cdnadb->current = seq;
  cdnadb->done_forward = TRUE;
  cs = new_ComplexSequence(seq,cdnadb->cses);
  if( cs == NULL ) {
    warn("Cannot make initial ComplexSequence. Unable to error catch this. Failing!");
    *return_status = DB_RETURN_ERROR;
    return NULL;
  }



  return cs;
}
Esempio n. 4
0
ComplexSequence * init_GenomicDB(GenomicDB * gendb,int * return_status)
{
  ComplexSequence * cs;
  Sequence * seq;

  if( gendb->is_single_seq == TRUE) {
    *return_status = DB_RETURN_OK;
    gendb->done_forward = TRUE;
    return hard_link_ComplexSequence(gendb->forw);
  }

  /* is a seq db */

  seq = init_SequenceDB(gendb->sdb,return_status);

  if( seq == NULL || *return_status == DB_RETURN_ERROR || *return_status == DB_RETURN_END ) {
    return NULL; /** error already reported **/
  }

  /* check dna status. We assumme someone knows what he is doing when he makes a genomic db!*/
  if( seq->type != SEQUENCE_DNA) {
    warn("Sequence from %s data entry doesn't look like DNA. Forcing it to",seq->name);
  }

  force_to_dna_Sequence(seq,1.0,NULL);

  /* map to Genomic on length of N buiness */

  gendb->current = Genomic_from_Sequence_Nheuristic(seq,gendb->length_of_N);
  gendb->done_forward = TRUE;
  cs = evaluate_ComplexSequence_Genomic(gendb->current,gendb->cses,0,gendb->repeat_in_cds_score);
  if( cs == NULL ) {
    warn("Cannot make initial ComplexSequence. Unable to error catch this. Failing!");
    *return_status = DB_RETURN_ERROR;
    return NULL;
  }


  return cs;
}
Esempio n. 5
0
int main(int argc,char ** argv)
{
  DPRunImpl * dpri = NULL;
  ScanWiseHSPImpl * scani = NULL;
  HSP2HitListImpl * hsp2hiti = NULL;
  HitListOutputImpl * hloi = NULL;
  ProteinIndexConstructor * pic = NULL;
 


  HSPScanInterface * hsi;
  HSPScanInterfacePara * para;
  SearchStatInterface * ssi;
  SearchStatInterface * ssl;
  SeqLookupLoadPara * slp;

  HSPset2HitPairPara * hsp2hit;
  CompMat * mat;
  SequenceDB * db;
  Sequence * seq;
  int ret;
  int i;
  int effective_db_size = 300000;
  int kk;
  
  int count = 0;

  LinearHSPmanager * lm;
  HitList * hl;

  boolean use_mott = 1;

  boolean trunc_best_hsp = 0;
  boolean verbose = 0;
  static struct rusage use;

  struct timeval t0, t1;

  gettimeofday(&t0, NULL);


  dpri      = new_DPRunImpl_from_argv(&argc,argv);

  dpri->memory = DPIM_Explicit;

  scani     = new_ScanWiseHSPImpl_from_argv(&argc,argv);
  
  hsp2hiti  = new_HSP2HitListImpl_from_argv(&argc,argv);

  hloi = new_HitListOutputImpl_from_argv(&argc,argv);

  slp = new_SeqLookupLoadPara_from_argv(&argc,argv);

  pic = new_ProteinIndexConstructor_from_argv(&argc,argv);

  hsp2hit = new_HSPset2HitPairPara_from_argv(&argc,argv);

  para = new_HSPScanInterfacePara_from_argv(&argc,argv);

  verbose = strip_out_boolean_argument(&argc,argv,"verbose") ;


  strip_out_boolean_def_argument(&argc,argv,"mott",&use_mott);

  strip_out_boolean_def_argument(&argc,argv,"besthsp",&trunc_best_hsp);

  strip_out_integer_argument(&argc,argv,"dbsize",&effective_db_size);

  

#ifdef SCAN_CORBA
  sorb = get_Wise2Corba_Singleton(&argc,argv,"orbit-local-orb");
#endif

  if( dpri == NULL ) {
    fatal("Unable to build DPRun implementation. Bad arguments");
  }

  strip_out_standard_options(&argc,argv,show_help,show_version);
  if( argc != 2 ) {
    show_help(stdout);
    exit(12);
  }

  /* ugly, but we don't want to bounce matrices around the network... */

  mat = read_Blast_file_CompMat("BLOSUM62.bla");
  
  erroroff(REPORT);

  hsi = new_HSPScanInterface_from_ScanWiseHSPImpl(scani,pic,slp);

  ssi = new_Mott_SearchStatInterface();

  ssl = new_lookup_SearchStatInterface(40,2.3);


  if( verbose ) {
    info("contacted database");
  }

  db = single_fasta_SequenceDB(argv[1]);

  if( db == NULL ) {
    fatal("Could not open sequence db...\n");
  }

  for(seq = init_SequenceDB(db,&ret); seq != NULL;seq = get_next_SequenceDB(db) ) {

	count++;

    for(i=0;i<seq->len;i++) {
      if( !isalpha(seq->seq[i]) ) {
	fatal("Sequence position %d [%c] is not valid",i,seq->seq[i]);
      }
      seq->seq[i] = toupper(seq->seq[i]);
    }

    info("Processing %s",seq->name);

    getrusage(RUSAGE_SELF,&use);
    
    /*    info("Before query %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */

    lm = (*hsi->scan_query)(hsi->data,seq,para);


    fprintf(stderr,"Got linear manager is %d entries\n",lm->len);

    if( lm->mat == NULL ) {
      lm->mat = hard_link_CompMat(mat);
    }

    getrusage(RUSAGE_SELF,&use);
    /*
    info("After query %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */
    sort_LinearHSPmanager(lm,compare_HSPset_score);


    if( trunc_best_hsp == 1 ) {
      for(kk=1;kk<lm->len;kk++) {
	free_HSPset(lm->set[kk]);
	lm->set[kk] = NULL;
      }
      lm->len = 1;
    }

    getrusage(RUSAGE_SELF,&use);
    
    /*
    info("After sort %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */
    hl   = HitList_from_HSP_HSP2HitListImpl(hsp2hiti,lm,dpri,hsp2hit);


    getrusage(RUSAGE_SELF,&use);
    /*
    info("After conversion %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */
    free_LinearHSPmanager(lm);

    if( use_mott == 1 ) {
      apply_SearchStat_to_HitList(hl,ssi,effective_db_size);
    } else {
      for(kk=0;kk<hl->len;kk++) {
	hl->pair[kk]->bit_score = hl->pair[kk]->raw_score / 2.0; 
      }
    }

    sort_HitList_by_score(hl);

    show_HitList_HitListOutputImpl(hloi,hl,stdout);

    getrusage(RUSAGE_SELF,&use);
    /*
    info("After output %s %.3fu %.3fs\n", seq->name,
	 use.ru_utime.tv_sec + use.ru_utime.tv_usec*MICROSECOND,
	 use.ru_stime.tv_sec + use.ru_stime.tv_usec*MICROSECOND
	);
    */

    free_HitList(hl);
    free_Sequence(seq);
  }
    

  free_DPRunImpl(dpri);
  free_HSPScanInterface(hsi);

  gettimeofday(&t1, NULL);
  fprintf(stderr, "[client stats] queries, time (s): %d %f\n",
                count,
		(t1.tv_sec - t0.tv_sec) +
                (t1.tv_usec - t0.tv_usec) * 1e-6);

  return 0;

}