Exemplo n.º 1
0
/* Function: StrMarkov0()
 * Date:     SRE, Fri Oct 29 11:08:31 1999 [St. Louis]
 *
 * Purpose:  Returns a random string s1 with the same
 *           length and zero-th order Markov properties
 *           as s2. 
 *           
 *           s1 and s2 may be identical, to randomize s2
 *           in place.
 *
 * Args:     s1 - allocated space for random string
 *           s2 - string to base s1's properties on.
 *
 * Returns:  1 on success; 0 if s2 doesn't look alphabetical.
 */
int 
StrMarkov0(char *s1, char *s2)
{
  int   len;
  int   pos; 
  float p[26];			/* symbol probabilities */

  /* First, verify that the string is entirely alphabetic.
   */
  len = strlen(s2);
  for (pos = 0; pos < len; pos++)
    if (! isalpha(s2[pos])) return 0;

  /* Collect zeroth order counts and convert to frequencies.
   */
  FSet(p, 26, 0.);
  for (pos = 0; pos < len; pos++)
    p[(int)(toupper(s2[pos]) - 'A')] += 1.0;
  FNorm(p, 26);

  /* Generate a random string using those p's.
   */
  for (pos = 0; pos < len; pos++)
    s1[pos] = FChoose(p, 26) + 'A';
  s1[pos] = '\0';

  return 1;
}
Exemplo n.º 2
0
/* Function: StrMarkov1()
 * Date:     SRE, Fri Oct 29 11:22:20 1999 [St. Louis]
 *
 * Purpose:  Returns a random string s1 with the same
 *           length and first order Markov properties
 *           as s2. 
 *           
 *           s1 and s2 may be identical, to randomize s2
 *           in place.
 *
 * Args:     s1 - allocated space for random string
 *           s2 - string to base s1's properties on.
 *
 * Returns:  1 on success; 0 if s2 doesn't look alphabetical.
 */
int 
StrMarkov1(char *s1, char *s2)
{
  int   len;
  int   pos; 
  int   x,y;
  int   i;			/* initial symbol */
  float p[26][26];		/* symbol probabilities */

  /* First, verify that the string is entirely alphabetic.
   */
  len = strlen(s2);
  for (pos = 0; pos < len; pos++)
    if (! isalpha(s2[pos])) return 0;

  /* Collect first order counts and convert to frequencies.
   */
  for (x = 0; x < 26; x++) FSet(p[x], 26, 0.);

  i = x = toupper(s2[0]) - 'A';
  for (pos = 1; pos < len; pos++)
    {
      y = toupper(s2[pos]) - 'A';
      p[x][y] += 1.0; 
      x = y;
    }
  for (x = 0; x < 26; x++) 
    FNorm(p[x], 26);

  /* Generate a random string using those p's.
   */
  x = i;
  s1[0] = x + 'A';
  for (pos = 1; pos < len; pos++)
    {
      y = FChoose(p[x], 26);
      s1[pos] = y + 'A';
      x = y;
    } 
  s1[pos] = '\0';

  return 1;
}
Exemplo n.º 3
0
/* Function: EmitSequence()
 * Date:     SRE, Sun Mar  8 12:28:03 1998 [St. Louis]
 *
 * Purpose:  Given a model, sample a sequence and/or traceback.
 *
 * Args:     hmm     - the model
 *           ret_dsq - RETURN: generated digitized sequence (pass NULL if unwanted)
 *           ret_L   - RETURN: length of generated sequence 
 *           ret_tr  - RETURN: generated trace (pass NULL if unwanted)
 *
 * Returns:  void
 */
void
EmitSequence(struct plan7_s *hmm, char **ret_dsq, int *ret_L, struct p7trace_s **ret_tr)
{
  struct p7trace_s *tr;
  enum   p7stype    type;	/* current state type */
  int   k;			/* current node index */
  char *dsq;                    /* generated sequence, digitized */
  int   L;			/* length of sequence */
  int   alloc_tlen;		/* allocated space for traceback */
  int   alloc_L;		/* allocated space for sequence  */
  int   tpos;			/* position in traceback */
  int   sym;			/* a generated symbol index */
  float t[4];			/* little array for choosing M transition from */
  
  /* Initialize; allocations
   */
  P7AllocTrace(64, &tr);
  alloc_tlen = 64;
  dsq = MallocOrDie(sizeof(char) * 64);
  alloc_L = 64;

  TraceSet(tr, 0, STS, 0, 0);
  TraceSet(tr, 1, STN, 0, 0);
  dsq[0] = (char) Alphabet_iupac;
  L      = 1;
  k      = 0;
  type   = STN;
  tpos   = 2;

  while (type != STT) 
    {
      /* Deal with state transition
       */
      switch (type) {
      case STB:	type = STM; k = FChoose(hmm->begin+1, hmm->M) + 1; break;
      case STI:	type = (FChoose(hmm->t[k]+TIM, 2) == 0)    ? STM : STI; if (type == STM) k++; break;
      case STN: type = (FChoose(hmm->xt[XTN], 2)  == LOOP) ? STN : STB; k = 0; break;
      case STE:	type = (FChoose(hmm->xt[XTE], 2)  == LOOP) ? STJ : STC; k = 0; break;
      case STC:	type = (FChoose(hmm->xt[XTC], 2)  == LOOP) ? STC : STT; k = 0; break;
      case STJ:	type = (FChoose(hmm->xt[XTJ], 2)  == LOOP) ? STJ : STB; k = 0; break;

      case STD:	
	if (k < hmm->M) {
	  type = (FChoose(hmm->t[k]+TDM, 2) == 0) ? STM : STD; 
	  k++;   
	} else {
	  type = STE;
	  k = 0;
	}
	break;

      case STM:
	if (k < hmm->M) {
	  FCopy(t, hmm->t[k], 3);
	  t[3] = hmm->end[k];
	  switch (FChoose(t,4)) {
	  case 0: k++;  type = STM; break;
	  case 1:       type = STI; break;
	  case 2: k++;  type = STD; break;
	  case 3: k=0;  type = STE; break;
	  default: Die("never happens");
	  }
	} else {
	  k    = 0;
	  type = STE;
	}
	break;

      case STT:
      case STBOGUS:
      default:
	Die("can't happen.");
      }
  
      /* Choose a symbol emission, if necessary
       */
      sym = -1;
      if      (type == STM) sym = FChoose(hmm->mat[k], Alphabet_size);
      else if (type == STI) sym = FChoose(hmm->ins[k], Alphabet_size); 
      else if ((type == STN && tr->statetype[tpos-1] == STN) ||
	       (type == STC && tr->statetype[tpos-1] == STC) ||
	       (type == STJ && tr->statetype[tpos-1] == STJ))
	sym = FChoose(hmm->null, Alphabet_size);
	
      /* Add to the traceback; deal with realloc if necessary
       */
      TraceSet(tr, tpos, type, k, (sym != -1) ? L : 0);
      tpos++;
      if (tpos == alloc_tlen) {
	alloc_tlen += 64; 
	P7ReallocTrace(tr, alloc_tlen);
      }

      /* Add to the digitized seq; deal with realloc, if necessary
       */
      if (sym != -1) {
	dsq[L] = (char) sym;
	L++;
	if (L+1 == alloc_L) {	/* L+1 leaves room for sentinel byte + \0 */
	  alloc_L += 64;
	  dsq = ReallocOrDie(dsq, sizeof(char) * alloc_L);
	}
      }
    }
  
  /* Finish off the trace
   */ 
  tr->tlen = tpos;

  /* Finish off the dsq with sentinel byte and null terminator.
   * Emitted Sequence length is L-1.
   */
  dsq[L]   = (char) Alphabet_iupac;
  dsq[L+1] = '\0';
  L--;

  /* Return
   */
  if (ret_dsq != NULL) *ret_dsq = dsq; else free(dsq);
  if (ret_L   != NULL) *ret_L   = L;
  if (ret_tr  != NULL) *ret_tr  = tr;  else P7FreeTrace(tr);
  return;
}