Beispiel #1
0
struct dnaSeq *faReadSeq(char *fileName, boolean isDna)
/* Open fa file and read a single sequence from it. */
{
int maxSize = fileSize(fileName);
int fd;
DNA *s;

if (maxSize < 0)
    errAbort("can't open %s", fileName);
s = needHugeMem(maxSize+1);
fd = open(fileName, O_RDONLY);
read(fd, s, maxSize);
close(fd);
s[maxSize] = 0;
return faSeqFromMemText(s, isDna);
}
Beispiel #2
0
/* Check a protein sequence, return FALSE if there is some reason it can't be
 * obtained or doesn't match */
static boolean faCheckProtRec(char *protAcc, short protVer, struct extFile* extFile,
                              off_t faOff, unsigned seqSize, unsigned recSize)
{
static const int extraBytes = 8;  /* extra bytes to read to allow checking next record */
int askSize = recSize+extraBytes;
int readSize;
char *faBuf, *p, gotAcc[GB_ACC_BUFSZ];
short gotVer;
struct dnaSeq *protSeq;
FILE *fh = mustOpen(extFile->path, "r");

/* bounds have already been check; so error if we can read the bytes */
if (fseeko(fh, faOff, SEEK_SET) < 0)
    errnoAbort("%s: can't seek to %lld in %s", protAcc, (long long)faOff, extFile->path);
faBuf = needMem(askSize+1);
readSize = fread(faBuf, 1, askSize, fh);
if (readSize < 0)
    errnoAbort("%s: read failed at %lld in %s", protAcc, (long long)faOff, extFile->path);
if (readSize < recSize)
    errAbort("%s: can't read %d bytes at %lld in %s", protAcc, recSize, (long long)faOff, extFile->path);
carefulClose(&fh);
faBuf[readSize] = '\0';

/* check that it starts with a '>' and that there are no extra bases after the
 * end of sequence */
if (faBuf[0] != '>')
    {
    gbVerbMsg(3, "%s: fasta record at %lld does not start with a '>': %s", protAcc, 
              (long long)faOff, extFile->path);
    freeMem(faBuf);
    return FALSE;
    }
p = skipLeadingSpaces(faBuf+recSize);
if (!((*p == '>') || (*p == '\0')))
    {
    gbVerbMsg(3, "%s: fasta record at %lld for %d has extra characters following the record: %s", protAcc, 
              (long long)faOff, recSize, extFile->path);
    freeMem(faBuf);
    return FALSE;
    }
protSeq = faSeqFromMemText(faBuf, FALSE);
gotVer = gbSplitAccVer(protSeq->name, gotAcc);
if (!(sameString(gotAcc, protAcc) && (gotVer == protVer)))
    {
    gbVerbMsg(3, "%s: expected sequence %s.%d, found %s.%d in fasta record at %lld : %s", protAcc,
              protAcc, protVer, gotAcc, gotVer, (long long)faOff, extFile->path);
    dnaSeqFree(&protSeq);
    return FALSE;
    }

if (protSeq->size != seqSize)
    {
    gbVerbMsg(3, "%s: expected sequence of %d chars, got %d from fasta record at %lld : %s", protAcc,
              seqSize, protSeq->size, (long long)faOff, extFile->path);
    dnaSeqFree(&protSeq);
    return FALSE;
    }

dnaSeqFree(&protSeq);
return TRUE;
}