Esempio n. 1
0
char *
revcomp(char *comp, char *seq)
{
  long  bases;
  char *bckp, *fwdp;
  int   idx;
  long  pos;
  int   c;

  if (comp == NULL) return NULL;
  if (seq == NULL)  return NULL;
  bases = strlen(seq);

  fwdp = comp;
  bckp = seq + bases -1;
  for (pos = 0; pos < bases; pos++)
    {
      c = *bckp;
      c = sre_toupper(c);
      for (idx = 0; c != iupac[idx].sym && idx < IUPACSYMNUM; idx++);
      if (idx == IUPACSYMNUM)
	{
	  Warn("Can't reverse complement an %c, pal. Using N.", c);
	  *fwdp = 'N';
	}
      else
	*fwdp = iupac[idx].symcomp;
      if (islower((int) *bckp)) *fwdp = sre_tolower((int) *fwdp);
      fwdp++;
      bckp--;
    }
  *fwdp = '\0';
  return comp;
}
Esempio n. 2
0
int
GCGchecksum(char *seq, int   seqlen)
{
  int  check = 0, count = 0, i;

  for (i = 0; i < seqlen; i++) {
    count++;
    check += count * sre_toupper((int) seq[i]);
    if (count == 57) count = 0;
    }
  return (check % 10000);
}
Esempio n. 3
0
/* Function: GCGMultchecksum()
 * 
 * Purpose:  Simple modification of GCGchecksum(),
 *           to create a checksum for multiple sequences.
 *           Gaps count.
 *           
 * Args:     seqs - sequences to be checksummed
 *           nseq - number of sequences
 *           
 * Return:   the checksum, a number between 0 and 9999
 */                      
int
GCGMultchecksum(char **seqs, int nseq)
{
  int check = 0;
  int count = 0;
  int idx;
  char *sptr;

  for (idx = 0; idx < nseq; idx++)
    for (sptr = seqs[idx]; *sptr; sptr++)
      {
	count++;
	check += count * sre_toupper((int) *sptr);
	if (count == 57) count = 0;
      }
  return (check % 10000);
}
Esempio n. 4
0
/* Function: Seqtype()
 * 
 * Purpose:  Returns a (very good) guess about type of sequence:
 *           kDNA, kRNA, kAmino, or kOtherSeq.
 *           
 *           Modified from, and replaces, Gilbert getseqtype().
 */
int
Seqtype(char *seq)
{
  int  saw;			/* how many non-gap characters I saw */
  char c;
  int  po = 0;			/* count of protein-only */
  int  nt = 0;			/* count of t's */
  int  nu = 0;			/* count of u's */
  int  na = 0;			/* count of nucleotides */
  int  aa = 0;			/* count of amino acids */
  int  no = 0;			/* count of others */
  
  /* Look at the first 300 non-gap characters
   */
  for (saw = 0; *seq != '\0' && saw < 300; seq++)
    {
      c = sre_toupper((int) *seq);
      if (! isgap(c)) 
	{
	  if (strchr(protonly, c)) po++;
	  else if (strchr(primenuc,c)) {
	    na++;
	    if (c == 'T') nt++;
	    else if (c == 'U') nu++;
	  }
	  else if (strchr(aminos,c)) aa++;
	  else if (isalpha(c)) no++;
	  saw++;
	}
    }

  if (no > 0) return kOtherSeq;
  else if (po > 0) return kAmino;
  else if (na > aa) {
    if (nu > nt) return kRNA;
    else return kDNA;
    }
  else return kAmino;
}