Beispiel #1
0
int
p7_hit_Compare(const P7_HIT *h1, const P7_HIT *h2, float tol)
{
  int d;
  int status;

  if (    strcmp(h1->name, h2->name) != 0) return eslFAIL;
  if (esl_strcmp(h1->acc,  h2->acc)  != 0) return eslFAIL;
  if (esl_strcmp(h1->desc, h2->desc) != 0) return eslFAIL;

  if ( h1->window_length != h2->window_length) return eslFAIL;
  if ( h1->ndom          != h2->ndom)          return eslFAIL;
  if ( h1->noverlaps     != h2->noverlaps)     return eslFAIL;
  if ( h1->flags         != h2->flags)         return eslFAIL;
  if ( h1->nreported     != h2->nreported)     return eslFAIL;
  if ( h1->nincluded     != h2->nincluded)     return eslFAIL;
  if ( h1->best_domain   != h2->best_domain)   return eslFAIL;
  if ( h1->seqidx        != h2->seqidx)        return eslFAIL;
  if ( h1->subseq_start  != h2->subseq_start)  return eslFAIL;
  if ( h1->offset        != h2->offset)        return eslFAIL;
  
  if ( esl_DCompare( h1->sortkey,   h2->sortkey,   tol ) != eslOK) return eslFAIL;
  if ( esl_FCompare( h1->score,     h2->score,     tol ) != eslOK) return eslFAIL;
  if ( esl_FCompare( h1->pre_score, h2->pre_score, tol ) != eslOK) return eslFAIL;
  if ( esl_FCompare( h1->sum_score, h2->sum_score, tol ) != eslOK) return eslFAIL;
  if ( esl_DCompare( h1->lnP,       h2->lnP,       tol ) != eslOK) return eslFAIL;
  if ( esl_DCompare( h1->pre_lnP,   h2->pre_lnP,   tol ) != eslOK) return eslFAIL;
  if ( esl_DCompare( h1->sum_lnP,   h2->sum_lnP,   tol ) != eslOK) return eslFAIL;
  if ( esl_DCompare( h1->nexpected, h2->nexpected, tol ) != eslOK) return eslFAIL;

  for (d = 0; d < h1->ndom; d++)
    if (( status = p7_domain_Compare(&(h1->dcl[d]), &(h2->dcl[d]), tol)) != eslOK) return status;
  return eslOK;
}
Beispiel #2
0
int
esl_vec_FCompare(const float *vec1, const float *vec2, int n, float tol)
{
  int i;
  for (i = 0; i < n; i++) if (esl_DCompare(vec1[i], vec2[i], tol) == eslFAIL) return eslFAIL;
  return eslOK;
}
Beispiel #3
0
/* Function:  esl_vec_DCompare()
 * Synopsis:  Return <eslOK> if two vectors are equal.
 * Incept:    SRE, Mon Nov  6 10:20:28 2006 [Janelia]
 *
 * Purpose:   Compare <vec1> to <vec2> for equality, by
 *            comparing each cognate element pair. Both vectors 
 *            are of size <n>. Equality of elements is
 *            defined by being $\leq$ fractional tolerance <tol> 
 *            for floating point comparisons, and strict equality
 *            for integer comparisons. Return <eslOK>
 *            if the vectors are equal, and <eslFAIL> if not.
 *
 *            <esl_vec_FCompare()> and <esl_vec_ICompare()> do the same,
 *            for float and integer vectors.
 */
int
esl_vec_DCompare(const double *vec1, const double *vec2, int n, double tol)
{
  int i;
  for (i = 0; i < n; i++) if (esl_DCompare(vec1[i], vec2[i], tol) == eslFAIL) return eslFAIL;
  return eslOK;
}
Beispiel #4
0
/* The LogGamma() function is rate-limiting in hmmbuild, because it is
 * used so heavily in mixture Dirichlet calculations.
 *    ./configure --with-gsl; [compile test driver]
 *    ./stats_utest -v
 * runs a comparison of time/precision against GSL.
 * SRE, Sat May 23 10:04:41 2009, on home Mac:
 *     LogGamma       = 1.29u  / N=1e8  =  13 nsec/call
 *     gsl_sf_lngamma = 1.43u  / N=1e8  =  14 nsec/call
 */
static void
utest_LogGamma(ESL_RANDOMNESS *r, int N, int be_verbose)
{
  char          *msg = "esl_stats_LogGamma() unit test failed";
  ESL_STOPWATCH *w   = esl_stopwatch_Create();
  double        *x   = malloc(sizeof(double) * N);
  double        *lg  = malloc(sizeof(double) * N);
  double        *lg2 = malloc(sizeof(double) * N);
  int            i;

  for (i = 0; i < N; i++) 
    x[i] = esl_random(r) * 100.;
  
  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++) 
    if (esl_stats_LogGamma(x[i], &(lg[i])) != eslOK) esl_fatal(msg);
  esl_stopwatch_Stop(w);

  if (be_verbose) esl_stopwatch_Display(stdout, w, "esl_stats_LogGamma() timing: ");

#ifdef HAVE_LIBGSL
  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++) lg2[i] = gsl_sf_lngamma(x[i]);
  esl_stopwatch_Stop(w);

  if (be_verbose) esl_stopwatch_Display(stdout, w, "gsl_sf_lngamma() timing:     ");
  
  for (i = 0; i < N; i++)
    if (esl_DCompare(lg[i], lg2[i], 1e-2) != eslOK) esl_fatal(msg);
#endif
  
  free(lg2);
  free(lg);
  free(x);
  esl_stopwatch_Destroy(w);
}
Beispiel #5
0
static void
utest_pvectors(void)
{
  char  *msg   = "pvector unit test failed";
  double p1[4] = { 0.25, 0.25, 0.25, 0.25 };
  double p2[4];
  double p3[4];
  float  p1f[4]; 
  float  p2f[4] = { 0.0,   0.5, 0.5,  0.0  };
  float  p3f[4];
  int    n = 4;
  double result;

  esl_vec_D2F(p1,  n, p1f);
  esl_vec_F2D(p2f, n, p2);  

  if (esl_vec_DValidate(p1,  n, 1e-12, NULL) != eslOK) esl_fatal(msg);
  if (esl_vec_FValidate(p1f, n, 1e-7,  NULL) != eslOK) esl_fatal(msg);

  result = esl_vec_DEntropy(p1,  n);          if (esl_DCompare(2.0, result, 1e-9) != eslOK) esl_fatal(msg);
  result = esl_vec_FEntropy(p1f, n);          if (esl_DCompare(2.0, result, 1e-9) != eslOK) esl_fatal(msg);
  result = esl_vec_DEntropy(p2,  n);          if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg);
  result = esl_vec_FEntropy(p2f, n);          if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg);

  result = esl_vec_DRelEntropy(p2,  p1,  n);  if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg);
  result = esl_vec_FRelEntropy(p2f, p1f, n);  if (esl_DCompare(1.0, result, 1e-9) != eslOK) esl_fatal(msg);

  result = esl_vec_DRelEntropy(p1,  p2,  n);  if (result != eslINFINITY)  esl_fatal(msg);
  result = esl_vec_FRelEntropy(p1f, p2f, n);  if (result != eslINFINITY)  esl_fatal(msg);

  esl_vec_DLog(p2, n);
  if (esl_vec_DLogValidate(p2, n, 1e-12, NULL) != eslOK) esl_fatal(msg);
  esl_vec_DExp(p2, n);
  if (p2[0] != 0.) esl_fatal(msg);

  esl_vec_FLog(p2f, n);
  if (esl_vec_FLogValidate(p2f, n, 1e-7, NULL) != eslOK) esl_fatal(msg);
  esl_vec_FExp(p2f, n);
  if (p2f[0] != 0.) esl_fatal(msg);

  esl_vec_DCopy(p2, n, p3);
  esl_vec_DScale(p3, n, 10.);
  esl_vec_DNorm(p3, n);
  if (esl_vec_DCompare(p2, p3, n, 1e-12) != eslOK) esl_fatal(msg);

  esl_vec_DLog(p3, n);
  result = esl_vec_DLogSum(p3, n); if (esl_DCompare(0.0, result, 1e-12) != eslOK) esl_fatal(msg);
  esl_vec_DIncrement(p3, n, 2.0);
  esl_vec_DLogNorm(p3, n);
  if (esl_vec_DCompare(p2, p3, n, 1e-12) != eslOK) esl_fatal(msg);

  esl_vec_FCopy(p2f, n, p3f);
  esl_vec_FScale(p3f, n, 10.);
  esl_vec_FNorm(p3f, n);
  if (esl_vec_FCompare(p2f, p3f, n, 1e-7) != eslOK) esl_fatal(msg);

  esl_vec_FLog(p3f, n);
  result = esl_vec_FLogSum(p3f, n); if (esl_DCompare(0.0, result, 1e-7) != eslOK) esl_fatal(msg);
  esl_vec_FIncrement(p3f, n, 2.0);
  esl_vec_FLogNorm(p3f, n);
  if (esl_vec_FCompare(p2f, p3f, n, 1e-7) != eslOK) esl_fatal(msg);

  return;
}
Beispiel #6
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS  *go;
  char         *msafile;
  ESLX_MSAFILE *afp;
  ESL_MSA      *msa;
  float        *sqd;
  int          status;
  int          nbad;
  int          nali    = 0;
  int          nbadali = 0;
  int          nwgt    = 0;
  int          nbadwgt = 0;
  int i;
  int be_quiet;
  int do_gsc;
  int do_pb;
  int do_blosum;
  double maxid;
  double tol;
  int    maxN;

  /* Process command line
   */
  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("failed to parse cmd line: %s\n", go->errbuf);
  if (esl_opt_VerifyConfig(go)               != eslOK) esl_fatal("failed to parse cmd line: %s\n", go->errbuf);
  if (esl_opt_GetBoolean(go, "-h") == TRUE) {
    puts(usage); 
    puts("\n  where options are:");
    esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */
    return 0;
  }
  be_quiet  = esl_opt_GetBoolean(go, "-q");
  do_blosum = esl_opt_GetBoolean(go, "--blosum");
  do_gsc    = esl_opt_GetBoolean(go, "--gsc");
  do_pb     = esl_opt_GetBoolean(go, "--pb");
  maxid     = esl_opt_GetReal   (go, "--id");
  tol       = esl_opt_GetReal   (go, "--tol");
  maxN      = esl_opt_GetInteger(go, "--maxN");
  if (esl_opt_ArgNumber(go) != 1) {
    puts("Incorrect number of command line arguments.");
    puts(usage);
    return 1;
  }
  msafile = esl_opt_GetArg(go, 1);
  esl_getopts_Destroy(go);

  /* Weight one or more alignments from input file
   */
  if ((status = eslx_msafile_Open(NULL, msafile, NULL, eslMSAFILE_UNKNOWN, NULL, &afp)) != eslOK)
    eslx_msafile_OpenFailure(afp, status);

  while ( (status = eslx_msafile_Read(afp, &msa)) != eslEOF)
    {
      if (status != eslOK) eslx_msafile_ReadFailure(afp, status);

      if (maxN > 0 && msa->nseq > maxN) { esl_msa_Destroy(msa); continue; }

      nali++;
      nwgt += msa->nseq;
      ESL_ALLOC(sqd, sizeof(float) * msa->nseq);

      if (do_gsc) {
	esl_msaweight_GSC(msa);
	GSCWeights(msa->aseq, msa->nseq, msa->alen, sqd);
      } else if (do_pb) {
	esl_msaweight_PB(msa);
	PositionBasedWeights(msa->aseq, msa->nseq, msa->alen, sqd);
      } else if (do_blosum) {
	esl_msaweight_BLOSUM(msa, maxid);
	BlosumWeights(msa->aseq, msa->nseq, msa->alen, maxid, sqd);
	/* workaround SQUID bug: BLOSUM weights weren't renormalized to sum to nseq. */
	esl_vec_FNorm (sqd, msa->nseq);
	esl_vec_FScale(sqd, msa->nseq, (float) msa->nseq);	
      }

      if (! be_quiet) {
	for (i = 0; i < msa->nseq; i++)
	  fprintf(stdout, "%-20s  %.3f  %.3f\n",
		  msa->sqname[i], msa->wgt[i], sqd[i]);
      }
	
      nbad = 0;
      for (i = 0; i < msa->nseq; i++)
	if (esl_DCompare((double) sqd[i], msa->wgt[i], tol) != eslOK) 
	  nbad++;
      if (nbad > 0) nbadali++;
      nbadwgt += nbad;

      if (nbad > 0) printf("%-20s  :: alignment shows %d weights that differ (out of %d) \n", 
			   msa->name, nbad, msa->nseq);

      esl_msa_Destroy(msa);
      free(sqd);
    } 
  eslx_msafile_Close(afp);

  if (nbadali == 0) 
    printf("OK: all weights identical between squid and Easel in %d alignment(s)\n", nali);
  else {
    printf("%d of %d weights mismatched at (> %f fractional difference)\n",
	   nbadwgt, nwgt, tol);
    printf("involving %d of %d total alignments\n", nbadali, nali);
  }
  return eslOK;

 ERROR:
  return status;
}
Beispiel #7
0
/* Function: cm_tr_penalties_Validate()
 * Date:     EPN, Fri Jan 27 14:57:04 2012
 *
 * Purpose:  Validate a CM_TR_PENALTIES object by checking that
 *           all possible fragments in local mode sum to 1.0
 *           for the three scenarios: 5' and 3' truncation, 
 *           5' truncation only and 3' truncation only.
 *        
 *           This is an expensive test and was written only to test
 *           the code that determines fragment probability (really
 *           only for local mode) in cm_tr_penalties_Create().  It can
 *           only be run if the <ignore_inserts> flag was set to TRUE
 *           when cm_tr_penalties_Create() was called.  However, in
 *           real life that inserts should not be ignored, so this
 *           test should never actually be run except during testing
 *           (it also is helpful for understanding the logic behind
 *           the derivation of the truncated begin
 *           penalties/probabilities).
 * 
 * Returns:  eslOK if all checks pass within tolerance level.
 *           eslFAIL if any check fails, errbuf is filled.
 */
int
cm_tr_penalties_Validate(CM_TR_PENALTIES *trp, CM_t *cm, double tol, char *errbuf)
{
  if(! trp->ignored_inserts) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate(), trp->ignored_inserts flag is not TRUE");

  /* This is an expensive test of the trp->l_ptyAA values, the truncation
   * penalties for local mode alignment. We test each of the three arrays
   * in trp->ptyAA, one each for the following three scenarios:
   * 
   * 1. trp->l_ptyAA[TRPENALTY_5P_AND_3P][0..v..M-1]: penalty for state v 
   *    when 5' and 3' truncation are allowed.
   * 2. trp->l_ptyAA[TRPENALTY_5P_ONLY][0..v..M-1]: penalty for state v when
   *    only 5' truncation is allowed.
   * 3. trp->l_ptyAA[TRPENALTY_3P_ONLY][0..v..M-1]: penalty for state v when
   *    only 3' truncation is allowed.
   *
   * The test is to enumerate all possible g,h fragments in the
   * consensus yield 1..clen, for those that can possibly be generated
   * in the scenario (^), determine the state t with the smallest
   * subtree yield that contains g..h. This is the state at which an
   * alignment of a g..h fragment would be rooted. We then add the
   * probability of a truncated parsetree rooted at v (that is,
   * exp_2(trp->l_ptyAA[][t])) to a growing sum. After all fragments
   * are considered the sum should be 1.0.  If it is then our
   * penalties are valid, if not they're invalid and we computed them
   * incorrectly.
   *
   * (^): When 5' and 3' truncation are both allowed, all fragments can be
   * generated, but not all fragments (for most models) can be generated if
   * only 5' or 3' truncation is allowed.
   *
   */
  
  double sump = 0.;  /* the sum, should be 1.0 after all fragments are considered */
  int    lpos, rpos; /* left and right consensus positions of a parsetree */
  int    g, h;       /* fragment start/stop */
  int    keep_going; /* break the loop when this is set to FALSE */
  int    nd, v; 
  /* test 1: trp->l_ptyAA[TRPENALTY_5P_AND_3P]: */
  for(g = 1; g <= cm->clen; g++) { 
    for(h = g; h <= cm->clen; h++) { 
      /* determine which node a truncated parsetree from [a..b] would align to, 
       * this will be lowest node in the model whose subtree spans a..b
       */
      nd = cm->nodes-1;
      keep_going = TRUE;
      while(keep_going) { 
	if(nd == 0) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate: 5' and 3' test, unable to find node that spans %d..%d\n", g, h);
	lpos = cm->emap->lpos[nd];
	rpos = cm->emap->rpos[nd];
	if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATL_nd) lpos++; /* lpos was one less than what we want */
	if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATR_nd) rpos--; /* rpos was one more than what we want */
	if((cm->ndtype[nd] == BIF_nd || cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd) && 
	   (lpos <= g && rpos >= h)) { 
	  keep_going = FALSE; 
	}
	else { nd--; }
      }
      v = cm->nodemap[nd];
      sump += sreEXP2(trp->l_ptyAA[TRPENALTY_5P_AND_3P][v]);
      /* printf("LRBOTH g: %3d h: %3d nd: %3d adding %10.5f  (%10.5f)\n", g, h, nd, trp->l_ptyAA[TRPENALTY_5P_AND_3P][v], sump); */
    }
  }
  printf("L and R sump:  %.5f\n", sump);
  if(esl_DCompare(1.0, sump, tol) != eslOK) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate(), 5' and 3' truncation test failed (%g != 1.0)", sump);

  /* test 2: trp->l_ptyAA[TRPENALTY_5P_ONLY]: */
  sump = 0.;
  for(g = 1; g <= cm->clen; g++) { 
    for(h = g; h <= cm->clen; h++) { 
      /* determine which node a truncated parsetree from [g..h] would align to, 
       * this will be lowest node in the model whose subtree spans g..h.
       * Since we're only truncating on the left, an alignment from 
       * g..h may be impossible, only those fragments for which a node exists with 
       * lpos <= g and rpos==h will be possible.
       */
      nd = cm->nodes-1;
      keep_going = TRUE;
      while(keep_going && nd > 0) { 
	lpos = cm->emap->lpos[nd];
	rpos = cm->emap->rpos[nd];
	if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATL_nd) lpos++; /* lpos was one less than what we want */
	if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATR_nd) rpos--; /* rpos was one more than what we want */
	if((cm->ndtype[nd] == BIF_nd || cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd) && 
	   (lpos <= g && rpos == h)) { 
	  keep_going = FALSE; 
	}
	else { nd--; }
      }
      if(keep_going == FALSE) { 
	v = cm->nodemap[nd];
	sump += sreEXP2(trp->l_ptyAA[TRPENALTY_5P_ONLY][v]);
      }
    }
  }
  printf("L only  sump:  %.5f\n", sump);
  if(esl_DCompare(1.0, sump, tol) != eslOK) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate(), 5' only truncation test failed (%g != 1.0)", sump);

  /* test 3: trp->l_ptyAA[TRPENALTY_3P_ONLY]: */
  sump = 0.;
  for(g = 1; g <= cm->clen; g++) { 
    for(h = g; h <= cm->clen; h++) { 
      /* determine which node a truncated parsetree from [g..h] would align to, 
       * this will be lowest node in the model whose subtree spans g..h
       * since we're only truncating on the right, an alignment from 
       * g..h may be impossible, only those for which a node exists with 
       * lpos==g and rpos >= h will be possible.
       */
      nd = cm->nodes-1;
      keep_going = TRUE;
      while(keep_going && nd > 0) { 
	lpos = cm->emap->lpos[nd];
	rpos = cm->emap->rpos[nd];
	if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATL_nd) lpos++; /* lpos was one less than what we want */
	if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATR_nd) rpos--; /* rpos was one more than what we want */
	if((cm->ndtype[nd] == BIF_nd || cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd) && 
	   (lpos == g && rpos >= h)) { 
	  keep_going = FALSE; 
	}
	else { nd--; }
      }
      if(keep_going == FALSE) { 
	v = cm->nodemap[nd];
	sump += sreEXP2(trp->l_ptyAA[TRPENALTY_3P_ONLY][v]);
      }
    }
  }
  printf("R only  sump:  %.5f\n", sump);
  if(esl_DCompare(1.0, sump, tol) != eslOK) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate(), 3' only truncation test failed (%g != 1.0)", sump);
  
  return eslOK;
}