char * revcomp(char *comp, char *seq) { long bases; char *bckp, *fwdp; int idx; long pos; int c; if (comp == NULL) return NULL; if (seq == NULL) return NULL; bases = strlen(seq); fwdp = comp; bckp = seq + bases -1; for (pos = 0; pos < bases; pos++) { c = *bckp; c = sre_toupper(c); for (idx = 0; c != iupac[idx].sym && idx < IUPACSYMNUM; idx++); if (idx == IUPACSYMNUM) { Warn("Can't reverse complement an %c, pal. Using N.", c); *fwdp = 'N'; } else *fwdp = iupac[idx].symcomp; if (islower((int) *bckp)) *fwdp = sre_tolower((int) *fwdp); fwdp++; bckp--; } *fwdp = '\0'; return comp; }
int GCGchecksum(char *seq, int seqlen) { int check = 0, count = 0, i; for (i = 0; i < seqlen; i++) { count++; check += count * sre_toupper((int) seq[i]); if (count == 57) count = 0; } return (check % 10000); }
/* Function: GCGMultchecksum() * * Purpose: Simple modification of GCGchecksum(), * to create a checksum for multiple sequences. * Gaps count. * * Args: seqs - sequences to be checksummed * nseq - number of sequences * * Return: the checksum, a number between 0 and 9999 */ int GCGMultchecksum(char **seqs, int nseq) { int check = 0; int count = 0; int idx; char *sptr; for (idx = 0; idx < nseq; idx++) for (sptr = seqs[idx]; *sptr; sptr++) { count++; check += count * sre_toupper((int) *sptr); if (count == 57) count = 0; } return (check % 10000); }
/* Function: Seqtype() * * Purpose: Returns a (very good) guess about type of sequence: * kDNA, kRNA, kAmino, or kOtherSeq. * * Modified from, and replaces, Gilbert getseqtype(). */ int Seqtype(char *seq) { int saw; /* how many non-gap characters I saw */ char c; int po = 0; /* count of protein-only */ int nt = 0; /* count of t's */ int nu = 0; /* count of u's */ int na = 0; /* count of nucleotides */ int aa = 0; /* count of amino acids */ int no = 0; /* count of others */ /* Look at the first 300 non-gap characters */ for (saw = 0; *seq != '\0' && saw < 300; seq++) { c = sre_toupper((int) *seq); if (! isgap(c)) { if (strchr(protonly, c)) po++; else if (strchr(primenuc,c)) { na++; if (c == 'T') nt++; else if (c == 'U') nu++; } else if (strchr(aminos,c)) aa++; else if (isalpha(c)) no++; saw++; } } if (no > 0) return kOtherSeq; else if (po > 0) return kAmino; else if (na > aa) { if (nu > nt) return kRNA; else return kDNA; } else return kAmino; }