boolean faFastReadNext(FILE *f, DNA **retDna, int *retSize, char **retName) /* Read in next FA entry as fast as we can. Return FALSE at EOF. * The returned DNA and name will be overwritten by the next call * to this function. */ { int c; int bufIx = 0; static char name[256]; int nameIx = 0; boolean gotSpace = FALSE; /* Seek to next '\n' and save first word as name. */ dnaUtilOpen(); name[0] = 0; for (;;) { if ((c = fgetc(f)) == EOF) { *retDna = NULL; *retSize = 0; *retName = NULL; return FALSE; } if (!gotSpace && nameIx < ArraySize(name)-1) { if (isspace(c)) gotSpace = TRUE; else if (c != '>') { name[nameIx++] = c; } } if (c == '\n') break; } name[nameIx] = 0; /* Read until next '>' */ for (;;) { c = fgetc(f); if (c == EOF || c == '>') c = 0; else if (!isalpha(c)) continue; else { c = ntChars[c]; if (c == 0) c = 'n'; } if (bufIx >= faFastBufSize) expandFaFastBuf(bufIx, 0); faFastBuf[bufIx++] = c; if (c == 0) { *retDna = faFastBuf; *retSize = bufIx-1; *retName = name; return TRUE; } } }
boolean faMixedSpeedReadNext(struct lineFile *lf, DNA **retDna, int *retSize, char **retName) /* Read in DNA or Peptide FA record in mixed case. Allow any upper or lower case * letter, or the dash character in. */ { char c; int bufIx = 0; static char name[512]; int lineSize, i; char *line; dnaUtilOpen(); /* Read first line, make sure it starts with '>', and read first word * as name of sequence. */ name[0] = 0; if (!lineFileNext(lf, &line, &lineSize)) { *retDna = NULL; *retSize = 0; return FALSE; } if (line[0] == '>') { line = firstWordInLine(skipLeadingSpaces(line+1)); if (line == NULL) errAbort("Expecting sequence name after '>' line %d of %s", lf->lineIx, lf->fileName); strncpy(name, line, sizeof(name)); name[sizeof(name)-1] = '\0'; /* Just to make sure name is NULL terminated. */ } else { errAbort("Expecting '>' line %d of %s", lf->lineIx, lf->fileName); } /* Read until next '>' */ for (;;) { if (!lineFileNext(lf, &line, &lineSize)) break; if (line[0] == '>') { lineFileReuse(lf); break; } if (bufIx + lineSize >= faFastBufSize) expandFaFastBuf(bufIx, lineSize); for (i=0; i<lineSize; ++i) { c = line[i]; if (isalpha(c) || c == '-') faFastBuf[bufIx++] = c; } } if (bufIx >= faFastBufSize) expandFaFastBuf(bufIx, 0); faFastBuf[bufIx] = 0; *retDna = faFastBuf; *retSize = bufIx; *retName = name; if (bufIx == 0) { warn("Invalid fasta format: sequence size == 0 for element %s",name); } return TRUE; }
boolean faSpeedReadNextKeepCase(struct lineFile *lf, DNA **retDna, int *retSize, char **retName) /* Read in next FA entry as fast as we can. Faster than that old, * pokey faFastReadNext. Return FALSE at EOF. * The returned DNA and name will be overwritten by the next call * to this function. */ { int c; int bufIx = 0; static char name[256]; int lineSize, i; char *line; dnaUtilOpen(); /* Read first line, make sure it starts wiht '>', and read first word * as name of sequence. */ name[0] = 0; if (!lineFileNext(lf, &line, &lineSize)) { *retDna = NULL; *retSize = 0; return FALSE; } if (line[0] == '>') { line = firstWordInLine(skipLeadingSpaces(line+1)); if (line == NULL) errAbort("Expecting sequence name after '>' line %d of %s", lf->lineIx, lf->fileName); strncpy(name, line, sizeof(name)); } else { errAbort("Expecting '>' line %d of %s", lf->lineIx, lf->fileName); } /* Read until next '>' */ for (;;) { if (!lineFileNext(lf, &line, &lineSize)) break; if (line[0] == '>') { lineFileReuse(lf); break; } if (bufIx + lineSize >= faFastBufSize) expandFaFastBuf(bufIx); for (i=0; i<lineSize; ++i) { c = line[i]; if (isalpha(c)) { faFastBuf[bufIx++] = c; } } } if (bufIx >= faFastBufSize) expandFaFastBuf(bufIx); faFastBuf[bufIx] = 0; *retDna = faFastBuf; *retSize = bufIx; *retName = name; return TRUE; }