Exemple #1
0
/* yes LogSum2 and FLogsum are identical, this is for backwards compatibility */
float
FLogsum(float s1, float s2)
{
  const float max = ESL_MAX(s1, s2);
  const float min = ESL_MIN(s1, s2);
#if 0
  return (min == -eslINFINITY || (max-min) >= 23.f) ? max : max + sreLOG2(1.0 + sreEXP2(min-max));  /* EPN: While debugging. Replaces logsum table with analytical calculation. Remember to remove! */
#endif
  return  (min == -eslINFINITY || (max-min) >= 23.f) ? max : max + flogsum_lookup[(int)((max-min)*INTSCALE)];
} 
Exemple #2
0
void 
init_ilogsum(void)
{
  static int firsttime = TRUE;
  if (!firsttime)  return;
  firsttime = FALSE;
    
  int i;
  for (i = 0; i < LOGSUM_TBL; i++) 
    ilogsum_lookup[i] = rint(INTSCALE * (sreLOG2(1.+sreEXP2((double) -i/INTSCALE))));
}
Exemple #3
0
void
FLogsumInit(void)
{
  static int firsttime = TRUE;
  if (!firsttime) return;
  firsttime = FALSE;

  int i;
  for (i = 0; i < LOGSUM_TBL; i++) 
    flogsum_lookup[i] = sreLOG2(1. + sreEXP2((double) -i / INTSCALE));
  return;
}
Exemple #4
0
/* Function: cm_tr_penalties_Validate()
 * Date:     EPN, Fri Jan 27 14:57:04 2012
 *
 * Purpose:  Validate a CM_TR_PENALTIES object by checking that
 *           all possible fragments in local mode sum to 1.0
 *           for the three scenarios: 5' and 3' truncation, 
 *           5' truncation only and 3' truncation only.
 *        
 *           This is an expensive test and was written only to test
 *           the code that determines fragment probability (really
 *           only for local mode) in cm_tr_penalties_Create().  It can
 *           only be run if the <ignore_inserts> flag was set to TRUE
 *           when cm_tr_penalties_Create() was called.  However, in
 *           real life that inserts should not be ignored, so this
 *           test should never actually be run except during testing
 *           (it also is helpful for understanding the logic behind
 *           the derivation of the truncated begin
 *           penalties/probabilities).
 * 
 * Returns:  eslOK if all checks pass within tolerance level.
 *           eslFAIL if any check fails, errbuf is filled.
 */
int
cm_tr_penalties_Validate(CM_TR_PENALTIES *trp, CM_t *cm, double tol, char *errbuf)
{
  if(! trp->ignored_inserts) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate(), trp->ignored_inserts flag is not TRUE");

  /* This is an expensive test of the trp->l_ptyAA values, the truncation
   * penalties for local mode alignment. We test each of the three arrays
   * in trp->ptyAA, one each for the following three scenarios:
   * 
   * 1. trp->l_ptyAA[TRPENALTY_5P_AND_3P][0..v..M-1]: penalty for state v 
   *    when 5' and 3' truncation are allowed.
   * 2. trp->l_ptyAA[TRPENALTY_5P_ONLY][0..v..M-1]: penalty for state v when
   *    only 5' truncation is allowed.
   * 3. trp->l_ptyAA[TRPENALTY_3P_ONLY][0..v..M-1]: penalty for state v when
   *    only 3' truncation is allowed.
   *
   * The test is to enumerate all possible g,h fragments in the
   * consensus yield 1..clen, for those that can possibly be generated
   * in the scenario (^), determine the state t with the smallest
   * subtree yield that contains g..h. This is the state at which an
   * alignment of a g..h fragment would be rooted. We then add the
   * probability of a truncated parsetree rooted at v (that is,
   * exp_2(trp->l_ptyAA[][t])) to a growing sum. After all fragments
   * are considered the sum should be 1.0.  If it is then our
   * penalties are valid, if not they're invalid and we computed them
   * incorrectly.
   *
   * (^): When 5' and 3' truncation are both allowed, all fragments can be
   * generated, but not all fragments (for most models) can be generated if
   * only 5' or 3' truncation is allowed.
   *
   */
  
  double sump = 0.;  /* the sum, should be 1.0 after all fragments are considered */
  int    lpos, rpos; /* left and right consensus positions of a parsetree */
  int    g, h;       /* fragment start/stop */
  int    keep_going; /* break the loop when this is set to FALSE */
  int    nd, v; 
  /* test 1: trp->l_ptyAA[TRPENALTY_5P_AND_3P]: */
  for(g = 1; g <= cm->clen; g++) { 
    for(h = g; h <= cm->clen; h++) { 
      /* determine which node a truncated parsetree from [a..b] would align to, 
       * this will be lowest node in the model whose subtree spans a..b
       */
      nd = cm->nodes-1;
      keep_going = TRUE;
      while(keep_going) { 
	if(nd == 0) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate: 5' and 3' test, unable to find node that spans %d..%d\n", g, h);
	lpos = cm->emap->lpos[nd];
	rpos = cm->emap->rpos[nd];
	if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATL_nd) lpos++; /* lpos was one less than what we want */
	if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATR_nd) rpos--; /* rpos was one more than what we want */
	if((cm->ndtype[nd] == BIF_nd || cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd) && 
	   (lpos <= g && rpos >= h)) { 
	  keep_going = FALSE; 
	}
	else { nd--; }
      }
      v = cm->nodemap[nd];
      sump += sreEXP2(trp->l_ptyAA[TRPENALTY_5P_AND_3P][v]);
      /* printf("LRBOTH g: %3d h: %3d nd: %3d adding %10.5f  (%10.5f)\n", g, h, nd, trp->l_ptyAA[TRPENALTY_5P_AND_3P][v], sump); */
    }
  }
  printf("L and R sump:  %.5f\n", sump);
  if(esl_DCompare(1.0, sump, tol) != eslOK) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate(), 5' and 3' truncation test failed (%g != 1.0)", sump);

  /* test 2: trp->l_ptyAA[TRPENALTY_5P_ONLY]: */
  sump = 0.;
  for(g = 1; g <= cm->clen; g++) { 
    for(h = g; h <= cm->clen; h++) { 
      /* determine which node a truncated parsetree from [g..h] would align to, 
       * this will be lowest node in the model whose subtree spans g..h.
       * Since we're only truncating on the left, an alignment from 
       * g..h may be impossible, only those fragments for which a node exists with 
       * lpos <= g and rpos==h will be possible.
       */
      nd = cm->nodes-1;
      keep_going = TRUE;
      while(keep_going && nd > 0) { 
	lpos = cm->emap->lpos[nd];
	rpos = cm->emap->rpos[nd];
	if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATL_nd) lpos++; /* lpos was one less than what we want */
	if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATR_nd) rpos--; /* rpos was one more than what we want */
	if((cm->ndtype[nd] == BIF_nd || cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd) && 
	   (lpos <= g && rpos == h)) { 
	  keep_going = FALSE; 
	}
	else { nd--; }
      }
      if(keep_going == FALSE) { 
	v = cm->nodemap[nd];
	sump += sreEXP2(trp->l_ptyAA[TRPENALTY_5P_ONLY][v]);
      }
    }
  }
  printf("L only  sump:  %.5f\n", sump);
  if(esl_DCompare(1.0, sump, tol) != eslOK) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate(), 5' only truncation test failed (%g != 1.0)", sump);

  /* test 3: trp->l_ptyAA[TRPENALTY_3P_ONLY]: */
  sump = 0.;
  for(g = 1; g <= cm->clen; g++) { 
    for(h = g; h <= cm->clen; h++) { 
      /* determine which node a truncated parsetree from [g..h] would align to, 
       * this will be lowest node in the model whose subtree spans g..h
       * since we're only truncating on the right, an alignment from 
       * g..h may be impossible, only those for which a node exists with 
       * lpos==g and rpos >= h will be possible.
       */
      nd = cm->nodes-1;
      keep_going = TRUE;
      while(keep_going && nd > 0) { 
	lpos = cm->emap->lpos[nd];
	rpos = cm->emap->rpos[nd];
	if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATL_nd) lpos++; /* lpos was one less than what we want */
	if(cm->ndtype[nd] != MATP_nd && cm->ndtype[nd] != MATR_nd) rpos--; /* rpos was one more than what we want */
	if((cm->ndtype[nd] == BIF_nd || cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATR_nd) && 
	   (lpos == g && rpos >= h)) { 
	  keep_going = FALSE; 
	}
	else { nd--; }
      }
      if(keep_going == FALSE) { 
	v = cm->nodemap[nd];
	sump += sreEXP2(trp->l_ptyAA[TRPENALTY_3P_ONLY][v]);
      }
    }
  }
  printf("R only  sump:  %.5f\n", sump);
  if(esl_DCompare(1.0, sump, tol) != eslOK) ESL_FAIL(eslFAIL, errbuf, "cm_tr_penalties_Validate(), 3' only truncation test failed (%g != 1.0)", sump);
  
  return eslOK;
}
/* Function: Score2Prob()
 * 
 * Purpose:  Convert an integer log_2 odds score back to a probability;
 *           needs the null model probability, if any, to do the conversion.
 */
float 
Score2Prob(int sc, float null)
{
  if (sc == -INFTY) return 0.;
  else              return (null * sreEXP2((float) sc / INTSCALE));
}
/* Function: CPlan9InitEL()
 * Incept:   EPN, Tue Jun 19 13:10:56 2007
 * 
 * Purpose:  Initialize a CP9 HMM for possible EL local ends
 *           by determining how the EL states should be connected
 *           based on the CM node topology.
 *           
 * Args:     cm     - the CM
 *           cp9    - the CP9 HMM, built from cm
 *
 * Return:   (void)
 */
void
CPlan9InitEL(CM_t *cm, CP9_t *cp9)
{
  int status;
  CMEmitMap_t *emap;         /* consensus emit map for the CM */
  int k;                     /* counter over HMM nodes */
  int nd;
  int *tmp_el_from_ct;

  /* First copy the CM el self transition score/probability: */
  cp9->el_self   = sreEXP2(cm->el_selfsc);
  cp9->el_selfsc = Prob2Score(cp9->el_self, 1.0);

  /* For each HMM node k, we can transit FROM >= 0 EL states from 
   * HMM nodes kp. Determine how many such valid transitions exist
   * from each node, then allocate and fill cp9->el_from_idx[k] and 
   * cp9->el_from_cmnd arrays based on that.
   * This two-pass method saves memory b/c we only allocate for
   * what we'll need.
   */
  emap = CreateEmitMap(cm); 

  /* Initialize to 0 */
  for(k = 0; k <= cp9->M; k++) 
    {
      cp9->el_from_ct[k] = 0;
      cp9->has_el[k] = FALSE;
    }
  cp9->el_from_ct[(cp9->M+1)] = 0; /* special case, we can get to E state from EL states */
    
  /* first pass to get number of valid transitions */
  for(nd = 0; nd < cm->nodes; nd++)
    {
      if ((cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd ||
	   cm->ndtype[nd] == MATR_nd || cm->ndtype[nd] == BEGL_nd ||
	   cm->ndtype[nd] == BEGR_nd) && 
	  cm->ndtype[nd+1] != END_nd)
	{
	  /*printf("HMM node %d can be reached from HMM node %d's EL state\n", emap->rpos[nd], emap->lpos[nd]);*/
	  cp9->el_from_ct[emap->rpos[nd]]++;
	  cp9->has_el[emap->lpos[nd]] = TRUE;
	}
    }

  /* allocate cp9->el_from_idx[k], cp9->el_from_cmnd for all k */
  for(k = 0; k <= (cp9->M+1); k++) 
    {
      if(cp9->el_from_idx[k] != NULL) /* if !NULL we already filled it, shouldn't happen */
	cm_Fail("ERROR in CPlan9InitEL() el_from_idx has already been initialized\n");
      if(cp9->el_from_ct[k] > 0)
	{
	  ESL_ALLOC(cp9->el_from_idx[k], sizeof(int) * cp9->el_from_ct[k]);
	  ESL_ALLOC(cp9->el_from_cmnd[k],sizeof(int) * cp9->el_from_ct[k]);
	}
      /* else it remains NULL */
    }

  /* now fill in cp9->el_from_idx, we need a new counter array */
  ESL_ALLOC(tmp_el_from_ct, sizeof(int) * (cp9->M+2));
  for(k = 0; k <= (cp9->M+1); k++) 
    tmp_el_from_ct[k] = 0;
  for(nd = 0; nd < cm->nodes; nd++)
    {
      if ((cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd ||
	   cm->ndtype[nd] == MATR_nd || cm->ndtype[nd] == BEGL_nd ||
	   cm->ndtype[nd] == BEGR_nd) && 
	  cm->ndtype[nd+1] != END_nd)
	{
	  k = emap->rpos[nd];
	  cp9->el_from_idx[k][tmp_el_from_ct[k]] = emap->lpos[nd];
	  cp9->el_from_cmnd[k][tmp_el_from_ct[k]] = nd;
	  tmp_el_from_ct[k]++;
	}
    }

  /* Debugging printfs */
  /*  for(k = 0; k <= (cp9->M+1); k++) 
    {
      for(c = 0; c < cp9->el_from_ct[k]; c++)
	printf("cp9->el_from_idx[%3d][%2d]: %4d\n", k, c, cp9->el_from_idx[k][c]);
      if(cp9->has_el[k])
      printf("node k:%3d HAS an EL!\n", k);
      }*/

  /* Free memory and exit */
  free(tmp_el_from_ct);
  FreeEmitMap(emap);
  return;

 ERROR:
  cm_Fail("Memory allocation error.");
}