Пример #1
0
boolean anyCdnaSeq(char *name, struct dnaSeq **retDna, struct wormCdnaInfo *retInfo)
/* Get a single  cDNA sequence. Optionally (if retInfo is non-null) get additional
 * info about the sequence. */
{
static FILE *cdnaFa;
static struct snof *cdnaSnof = NULL;
long offset;
char *faComment;
char **pFaComment = (retInfo == NULL ? NULL : &faComment);

if (cdnaSnof == NULL)
    {
    char buf[512];

    cdnaSnof = snofMustOpen(cdnaName);
    sprintf(buf, "%s%s", cdnaName, ".fa");
    cdnaFa = mustOpen(buf, "rb");
    }
if (!snofFindOffset(cdnaSnof, name, &offset))
    return FALSE;
fseek(cdnaFa, offset, SEEK_SET);
if (!faReadNext(cdnaFa, name, TRUE, pFaComment, retDna))
    return FALSE;
if (retInfo != NULL)
    {
    /* Kludge - only look up info if format is more or less right. */
    int fieldCount = countChars(faComment, '|');
    if (fieldCount >= 8)
        wormFaCommentIntoInfo(faComment, retInfo);
    else
        zeroBytes(retInfo, sizeof(*retInfo));
    }
return TRUE;
}
Пример #2
0
struct dnaSeq *readSeqFromFaPos(struct seqFilePos *sfp,  FILE *f)
/* Read part of FA file. */
{
struct dnaSeq *seq;
fseek(f, sfp->pos, SEEK_SET);
if (!faReadNext(f, "", TRUE, NULL, &seq))
    errAbort("Couldn't faReadNext on %s in %s\n", sfp->name, sfp->file);
return seq;
}
Пример #3
0
struct dnaSeq *faReadOneDnaSeq(FILE *f, char *defaultName, boolean mustStartWithComment)
/* Read sequence from FA file. Assumes positioned at or before
 * the '>' at start of sequence. */  
{
struct dnaSeq *seq;
if (!faReadNext(f, defaultName, mustStartWithComment, NULL, &seq))
    return NULL;
else
    return seq;
}
Пример #4
0
boolean nextWormCdnaAndInfo(struct wormCdnaIterator *it, struct dnaSeq **retSeq, 
    struct wormCdnaInfo *retInfo)
/* Return next sequence and associated info from database. */
{
char *faComment;

if (!faReadNext(it->faFile, "unknown", TRUE, &faComment, retSeq))
    return FALSE;
wormFaCommentIntoInfo(faComment, retInfo);
return TRUE;
}
Пример #5
0
boolean wormCdnaSeq(char *name, struct dnaSeq **retDna, struct wormCdnaInfo *retInfo)
/* Get a single worm cDNA sequence. Optionally (if retInfo is non-null) get additional
 * info about the sequence. */
{
long offset;
char *faComment;
char **pFaComment = (retInfo == NULL ? NULL : &faComment);

wormCdnaCache();
if (!snofFindOffset(cdnaSnof, name, &offset))
    return FALSE;
fseek(cdnaFa, offset, SEEK_SET);
if (!faReadNext(cdnaFa, name, TRUE, pFaComment, retDna))
    return FALSE;
wormFaCommentIntoInfo(faComment, retInfo);
return TRUE;
}
Пример #6
0
void chopFaLines(char *inName, char *outName)
/* chopFaLines - Read in FA file with long lines and rewrite it with shorter lines. */
{
FILE *in = mustOpen(inName, "r");
FILE *out = mustOpen(outName, "w");
char *commentLine;
struct dnaSeq *seq;

while (faReadNext(in, NULL, TRUE, &commentLine, &seq))
    {
    commentLine = trimSpaces(commentLine+1);
    uglyf(">%s\n", commentLine);
    mustWrite(uglyOut, seq->dna, 100);
    uglyf("\n");
    uglyAbort("All for now");
    faWriteNext(out, commentLine, seq->dna, seq->size);
    }
}
Пример #7
0
static struct traceInfo* parseFastaRecord(FILE* fh, char* fastaName)
/* read the next fasta record akd create a traceInfo object.  This
 * parses the sequence id and comment for the read and clone name. */
{
struct dnaSeq* dna;
char* comment;
struct traceInfo* traceInfo;

if (!faReadNext(fh, NULL, 0, &comment, &dna))
    return NULL; /* EOF */

AllocVar(traceInfo);
traceInfo->ti = parseTraceId(dna->name, fastaName);
traceInfo->size = dna->size;
traceInfo->templateId = parseTemplateId(comment, fastaName);

freeMem(comment);
freeDnaSeq(&dna);
return traceInfo;
}
Пример #8
0
boolean flyCdnaSeq(char *name, struct dnaSeq **retDna, struct wormCdnaInfo *retInfo)
/* Get a single fly cDNA sequence. Optionally (if retInfo is non-null) get additional
 * info about the sequence. */
{
long offset;
char *faComment;
char **pFaComment = (retInfo == NULL ? NULL : &faComment);
static struct snof *cdnaSnof = NULL;
static FILE *cdnaFa;

if (cdnaSnof == NULL)
	cdnaSnof = snofMustOpen("c:/biodata/fly/cDna/allcdna");
if (cdnaFa == NULL)
	cdnaFa = mustOpen("c:/biodata/fly/cDna/allcdna.fa", "rb");
if (!snofFindOffset(cdnaSnof, name, &offset))
    return FALSE;
fseek(cdnaFa, offset, SEEK_SET);
if (!faReadNext(cdnaFa, name, TRUE, pFaComment, retDna))
    return FALSE;
flyFaCommentIntoInfo(faComment, retInfo);
return TRUE;
}
Пример #9
0
int main(int argc, char *argv[])
{
    char *estName, *targetName, *oocName;
    FILE *estFile;
    struct dnaSeq *target;
    struct dnaSeq *est;
    struct patSpace *ps;
    struct patClump *clumpList, *clump;
    int estIx = 0;

    /* Check command line arguments and assign to local variables. */
    if (argc != 4)
        usage();
    estName = argv[1];
    estFile = mustOpen(estName, "rb");
    targetName = argv[2];
    oocName = argv[3];

    /* Read in target DNA from fasta files and check not too big. */
    fprintf(stderr, "Reading %s\n", targetName);
    target = faReadAllDna(targetName);
    if (totalSequenceSize(target) > 8000000)
    {
        errAbort("Can only handle 8000000 bases of genomic sequence at once, %s has %d.",
                 targetName, totalSequenceSize(target));
    }

    /* Make a pattern space index structure. */
    fprintf(stderr, "Making Pattern Space index\n");
    ps = makePatSpace(&target, 1, oocName, 4, 32000);

    /* Loop through each EST in query list. */
    printf("Searching for hits\n\n");
    while (faReadNext(estFile, NULL, TRUE, NULL, &est))
    {
        boolean isRc;   /* Reverse complemented? */

        if (++estIx % 5000 == 0)
            fprintf(stderr, "Processing EST %d\n", estIx);
        if (est->size > 20000)
        {
            warn("Very large EST sequence %s.\n"
                 "Maybe you mixed up the EST and genomic parameters?", est->name);
            usage();
        }

        for (isRc = 0; isRc <= 1; ++isRc)   /* Search both strands. */
        {
            if (isRc)
                reverseComplement(est->dna, est->size);
            clumpList = patSpaceFindOne(ps, est->dna, est->size);

            /* For each homology clump patSpace finds, do a fuzzyFinder
             * alignment of it and print the results. */
            for (clump = clumpList; clump != NULL; clump = clump->next)
            {
                struct ffAli *ali, *a;
                boolean isRc;
                int score;
                struct dnaSeq *t = clump->seq;
                DNA *tStart = t->dna + clump->start;

                ali = ffFind(est->dna, est->dna+est->size, tStart, tStart + clump->size, ffCdna);
                if (ali != NULL)
                {
                    score = ffScoreCdna(ali);
                    printf("%s hits %s strand %c score %d\n",
                           est->name, t->name, (isRc ? '+' : '-'), score);
                    for (a = ali; a != NULL; a = a->right)
                    {
                        printf("  Q %4d - %4d\t T %4d -%4d\n",
                               a->nStart - est->dna, a->nEnd - est->dna,
                               a->hStart - t->dna, a->hEnd - t->dna);
                    }
                    printf("\n");
                    ffFreeAli(&ali);
                }
                else
                {
                    printf("Couldn't align clump at %s %d-%d\n",
                           t->name, clump->start, clump->start + clump->size);
                }
            }
            slFreeList(&clumpList);
        }
        freeDnaSeq(&est);
    }
    /* Clean up time. */
    freePatSpace(&ps);
    freeSeqList(&target);
    return 0;
}