struct dnaSeq *faReadSeq(char *fileName, boolean isDna) /* Open fa file and read a single sequence from it. */ { int maxSize = fileSize(fileName); int fd; DNA *s; if (maxSize < 0) errAbort("can't open %s", fileName); s = needHugeMem(maxSize+1); fd = open(fileName, O_RDONLY); read(fd, s, maxSize); close(fd); s[maxSize] = 0; return faSeqFromMemText(s, isDna); }
/* Check a protein sequence, return FALSE if there is some reason it can't be * obtained or doesn't match */ static boolean faCheckProtRec(char *protAcc, short protVer, struct extFile* extFile, off_t faOff, unsigned seqSize, unsigned recSize) { static const int extraBytes = 8; /* extra bytes to read to allow checking next record */ int askSize = recSize+extraBytes; int readSize; char *faBuf, *p, gotAcc[GB_ACC_BUFSZ]; short gotVer; struct dnaSeq *protSeq; FILE *fh = mustOpen(extFile->path, "r"); /* bounds have already been check; so error if we can read the bytes */ if (fseeko(fh, faOff, SEEK_SET) < 0) errnoAbort("%s: can't seek to %lld in %s", protAcc, (long long)faOff, extFile->path); faBuf = needMem(askSize+1); readSize = fread(faBuf, 1, askSize, fh); if (readSize < 0) errnoAbort("%s: read failed at %lld in %s", protAcc, (long long)faOff, extFile->path); if (readSize < recSize) errAbort("%s: can't read %d bytes at %lld in %s", protAcc, recSize, (long long)faOff, extFile->path); carefulClose(&fh); faBuf[readSize] = '\0'; /* check that it starts with a '>' and that there are no extra bases after the * end of sequence */ if (faBuf[0] != '>') { gbVerbMsg(3, "%s: fasta record at %lld does not start with a '>': %s", protAcc, (long long)faOff, extFile->path); freeMem(faBuf); return FALSE; } p = skipLeadingSpaces(faBuf+recSize); if (!((*p == '>') || (*p == '\0'))) { gbVerbMsg(3, "%s: fasta record at %lld for %d has extra characters following the record: %s", protAcc, (long long)faOff, recSize, extFile->path); freeMem(faBuf); return FALSE; } protSeq = faSeqFromMemText(faBuf, FALSE); gotVer = gbSplitAccVer(protSeq->name, gotAcc); if (!(sameString(gotAcc, protAcc) && (gotVer == protVer))) { gbVerbMsg(3, "%s: expected sequence %s.%d, found %s.%d in fasta record at %lld : %s", protAcc, protAcc, protVer, gotAcc, gotVer, (long long)faOff, extFile->path); dnaSeqFree(&protSeq); return FALSE; } if (protSeq->size != seqSize) { gbVerbMsg(3, "%s: expected sequence of %d chars, got %d from fasta record at %lld : %s", protAcc, seqSize, protSeq->size, (long long)faOff, extFile->path); dnaSeqFree(&protSeq); return FALSE; } dnaSeqFree(&protSeq); return TRUE; }