void readAllSts(FILE *asf) /* Read in current sequences for sts markers */ { struct dnaSeq *ds; struct sts *s; char *words[8], *acc=NULL, *line; int wordCount; while (faReadMixedNext(asf, 0, "default", TRUE, &line, &ds)) { /* Determine the UCSC id */ wordCount = chopByWhite(line, words, ArraySize(words)); stripString(words[0], ">"); if (wordCount == 3) acc = cloneString(words[2]); else acc = NULL; /* Find the record and attach */ if (hashLookup(stsHash, ds->name)) { s = hashMustFindVal(stsHash, ds->name); s->fa = ds; s->faAcc = acc; s->si->sequence = 1; } else { dnaSeqFree(&ds); freez(&line); if (acc != NULL) freez(&acc); } } }
struct dnaSeq *readSeqFromFaPos(struct seqFilePos *sfp, FILE *f) /* Read part of FA file. */ { struct dnaSeq *seq; fseek(f, sfp->pos, SEEK_SET); if (!faReadMixedNext(f, TRUE, "", TRUE, NULL, &seq)) errAbort("Couldn't faReadNext on %s in %s\n", sfp->name, sfp->file); return seq; }
boolean faReadNext(FILE *f, char *defaultName, boolean mustStartWithComment, char **retCommentLine, struct dnaSeq **retSeq) /* Read next sequence from .fa file. Return sequence in retSeq. * If retCommentLine is non-null * return the '>' line in retCommentLine. * The whole thing returns FALSE at end of file. * DNA chars are mapped to lower case.*/ { return faReadMixedNext(f, 0, defaultName, mustStartWithComment, retCommentLine, retSeq); }
void readDbstsFa(FILE *dff) /* Read in sequences from dbSTS.fa and add, if possible */ { struct dnaSeq *ds; struct sts *s; struct gb *gb; char name[256], *line; while (faReadMixedNext(dff, 0, "default", TRUE, &line, &ds)) { /* Determine the UCSC id */ if (hashLookup(gbAccHash, ds->name)) { /* Determine if this is linked to a marker */ gb = hashMustFindVal(gbAccHash, ds->name); if (gb->s != NULL) { /* If no recorded sequence, then add */ s = gb->s; if (s->fa == NULL) { s->faAcc = cloneString(ds->name); safef(name, ArraySize(name), "%d", s->si->identNo); ds->name = cloneString(name); s->fa = ds; s->si->sequence = 1; } /* If no accession recorded, see if sequences are the same */ else if (s->faAcc == NULL) { if (sameString(s->fa->dna, ds->dna)) { s->faAcc = cloneString(ds->name); s->si->sequence = 1; } freeDnaSeq(&ds); } /* If same accession as recorded, the update sequence */ else if (sameString(s->faAcc, ds->name)) { ds->name = cloneString(s->fa->name); freeDnaSeq(&s->fa); s->fa = ds; s->si->sequence = 1; } else freeDnaSeq(&ds); } else freeDnaSeq(&ds); } } }
void axtChain(char *axtIn, char *tNibDir, char *qNibDir, char *chainOut) /* axtChain - Chain together axt alignments.. */ { struct hash *pairHash = newHash(0); /* Hash keyed by qSeq<strand>tSeq */ struct seqPair *spList = NULL, *sp; FILE *f = mustOpen(chainOut, "w"); char *qName = "", *tName = ""; struct dnaSeq *qSeq = NULL, *tSeq = NULL; char qStrand = 0, tStrand = 0; struct chain *chainList = NULL, *chain; FILE *details = NULL; struct dnaSeq *seq, *seqList = NULL; struct hash *faHash = newHash(0); struct hash *tFaHash = newHash(0); FILE *faF; boolean qIsTwoBit = twoBitIsFile(qNibDir); boolean tIsTwoBit = twoBitIsFile(tNibDir); axtScoreSchemeDnaWrite(scoreScheme, f, "axtChain"); if (detailsName != NULL) details = mustOpen(detailsName, "w"); /* Read input file and divide alignments into various parts. */ if (optionExists("psl")) spList = readPslBlocks(axtIn, pairHash, f); else spList = readAxtBlocks(axtIn, pairHash, f); if (optionExists("faQ")) { faF = mustOpen(qNibDir, "r"); while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq)) { hashAdd(faHash, seq->name, seq); slAddHead(&seqList, seq); } fclose(faF); } if (optionExists("faT")) { faF = mustOpen(tNibDir, "r"); while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq)) { hashAdd(tFaHash, seq->name, seq); slAddHead(&seqList, seq); } fclose(faF); } for (sp = spList; sp != NULL; sp = sp->next) { slReverse(&sp->blockList); removeExactOverlaps(&sp->blockList); verbose(1, "%d blocks after duplicate removal\n", slCount(sp->blockList)); if (optionExists("faQ")) { assert (faHash != NULL); loadFaSeq(faHash, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand); } else { loadIfNewSeq(qNibDir, qIsTwoBit, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand); } if (optionExists("faT")) { assert (tFaHash != NULL); loadFaSeq(tFaHash, sp->tName, '+', &tName, &tSeq, &tStrand); } else { loadIfNewSeq(tNibDir, tIsTwoBit, sp->tName, '+', &tName, &tSeq, &tStrand); } chainPair(sp, qSeq, tSeq, &chainList, details); } slSort(&chainList, chainCmpScore); for (chain = chainList; chain != NULL; chain = chain->next) { assert(chain->qStart == chain->blockList->qStart && chain->tStart == chain->blockList->tStart); chainWrite(chain, f); } carefulClose(&f); }
void doChainScore(char *chainIn, char *tNibDir, char *qNibDir, char *chainOut) { char qStrand = 0, tStrand = 0; struct dnaSeq *qSeq = NULL, *tSeq = NULL; char *qName = "", *tName = ""; FILE *f = mustOpen(chainOut, "w"); struct chain *chainList = NULL, *chain; struct chain *inputChains, *next; FILE *details = NULL; struct lineFile *lf = NULL; struct dnaSeq *seq, *seqList = NULL; struct hash *faHash = newHash(0); struct hash *chainHash = newHash(0); char comment[1024]; FILE *faF; struct seqPair *spList = NULL, *sp; struct dyString *dy = newDyString(512); struct lineFile *chainsLf = lineFileOpen(chainIn, TRUE); while ((chain = chainRead(chainsLf)) != NULL) { dyStringClear(dy); dyStringPrintf(dy, "%s%c%s", chain->qName, chain->qStrand, chain->tName); sp = hashFindVal(chainHash, dy->string); if (sp == NULL) { AllocVar(sp); slAddHead(&spList, sp); hashAddSaveName(chainHash, dy->string, sp, &sp->name); sp->qName = cloneString(chain->qName); sp->tName = cloneString(chain->tName); sp->qStrand = chain->qStrand; } slAddHead(&sp->chain, chain); } slSort(&spList, seqPairCmp); lineFileClose(&chainsLf); if (optionExists("faQ")) { faF = mustOpen(qNibDir, "r"); while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq)) { hashAdd(faHash, seq->name, seq); slAddHead(&seqList, seq); } fclose(faF); } for (sp = spList; sp != NULL; sp = sp->next) { if (optionExists("faQ")) { assert (faHash != NULL); loadFaSeq(faHash, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand); } else loadIfNewSeq(qNibDir, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand); loadIfNewSeq(tNibDir, sp->tName, '+', &tName, &tSeq, &tStrand); scorePair(sp, qSeq, tSeq, &chainList, sp->chain); } slSort(&chainList, chainCmpScore); for (chain = chainList; chain != NULL; chain = chain->next) { assert(chain->qStart == chain->blockList->qStart && chain->tStart == chain->blockList->tStart); chainWrite(chain, f); } carefulClose(&f); }