Ejemplo n.º 1
0
boolean faFastReadNext(FILE *f, DNA **retDna, int *retSize, char **retName)
/* Read in next FA entry as fast as we can. Return FALSE at EOF. 
 * The returned DNA and name will be overwritten by the next call
 * to this function. */
{
int c;
int bufIx = 0;
static char name[256];
int nameIx = 0;
boolean gotSpace = FALSE;

/* Seek to next '\n' and save first word as name. */
dnaUtilOpen();
name[0] = 0;
for (;;)
    {
    if ((c = fgetc(f)) == EOF)
        {
        *retDna = NULL;
        *retSize = 0;
        *retName = NULL;
        return FALSE;
        }
    if (!gotSpace && nameIx < ArraySize(name)-1)
        {
        if (isspace(c))
            gotSpace = TRUE;
        else if (c != '>')
            {
            name[nameIx++] = c;
            }
        }
    if (c == '\n')
        break;
    }
name[nameIx] = 0;
/* Read until next '>' */
for (;;)
    {
    c = fgetc(f);
    if (c == EOF || c == '>')
        c = 0;
    else if (!isalpha(c))
        continue;
    else
	{
	c = ntChars[c];
	if (c == 0) c = 'n';
	}
    if (bufIx >= faFastBufSize)
	expandFaFastBuf(bufIx, 0);
    faFastBuf[bufIx++] = c;
    if (c == 0)
        {
        *retDna = faFastBuf;
        *retSize = bufIx-1;
        *retName = name;
        return TRUE;
        }
    }
}
Ejemplo n.º 2
0
boolean faMixedSpeedReadNext(struct lineFile *lf, DNA **retDna, int *retSize, char **retName)
/* Read in DNA or Peptide FA record in mixed case.   Allow any upper or lower case
 * letter, or the dash character in. */
{
char c;
int bufIx = 0;
static char name[512];
int lineSize, i;
char *line;

dnaUtilOpen();

/* Read first line, make sure it starts with '>', and read first word
 * as name of sequence. */
name[0] = 0;
if (!lineFileNext(lf, &line, &lineSize))
    {
    *retDna = NULL;
    *retSize = 0;
    return FALSE;
    }
if (line[0] == '>')
    {
    line = firstWordInLine(skipLeadingSpaces(line+1));
    if (line == NULL)
        errAbort("Expecting sequence name after '>' line %d of %s", lf->lineIx, lf->fileName);
    strncpy(name, line, sizeof(name));
    name[sizeof(name)-1] = '\0'; /* Just to make sure name is NULL terminated. */
    }
else
    {
    errAbort("Expecting '>' line %d of %s", lf->lineIx, lf->fileName);
    }
/* Read until next '>' */
for (;;)
    {
    if (!lineFileNext(lf, &line, &lineSize))
        break;
    if (line[0] == '>')
        {
	lineFileReuse(lf);
	break;
	}
    if (bufIx + lineSize >= faFastBufSize)
	expandFaFastBuf(bufIx, lineSize);
    for (i=0; i<lineSize; ++i)
        {
	c = line[i];
	if (isalpha(c) || c == '-')
	    faFastBuf[bufIx++] = c;
	}
    }
if (bufIx >= faFastBufSize)
    expandFaFastBuf(bufIx, 0);
faFastBuf[bufIx] = 0;
*retDna = faFastBuf;
*retSize = bufIx;
*retName = name;
if (bufIx == 0)
    {
    warn("Invalid fasta format: sequence size == 0 for element %s",name);
    }

return TRUE;
}
Ejemplo n.º 3
0
boolean faSpeedReadNextKeepCase(struct lineFile *lf, 
	DNA **retDna, int *retSize, char **retName)
/* Read in next FA entry as fast as we can. Faster than that old,
 * pokey faFastReadNext. Return FALSE at EOF. 
 * The returned DNA and name will be overwritten by the next call
 * to this function. */
{
int c;
int bufIx = 0;
static char name[256];
int lineSize, i;
char *line;

dnaUtilOpen();

/* Read first line, make sure it starts wiht '>', and read first word
 * as name of sequence. */
name[0] = 0;
if (!lineFileNext(lf, &line, &lineSize))
    {
    *retDna = NULL;
    *retSize = 0;
    return FALSE;
    }
if (line[0] == '>')
    {
    line = firstWordInLine(skipLeadingSpaces(line+1));
    if (line == NULL)
        errAbort("Expecting sequence name after '>' line %d of %s", lf->lineIx, lf->fileName);
    strncpy(name, line, sizeof(name));
    }
else
    {
    errAbort("Expecting '>' line %d of %s", lf->lineIx, lf->fileName);
    }
/* Read until next '>' */
for (;;)
    {
    if (!lineFileNext(lf, &line, &lineSize))
        break;
    if (line[0] == '>')
        {
	lineFileReuse(lf);
	break;
	}
    if (bufIx + lineSize >= faFastBufSize)
	expandFaFastBuf(bufIx);
    for (i=0; i<lineSize; ++i)
        {
	c = line[i];
	if (isalpha(c))
	    {
	    faFastBuf[bufIx++] = c;
	    }
	}
    }
if (bufIx >= faFastBufSize)
    expandFaFastBuf(bufIx);
faFastBuf[bufIx] = 0;
*retDna = faFastBuf;
*retSize = bufIx;
*retName = name;
return TRUE;
}