boolean sameStickyEnd(struct cutter *enz1, struct cutter *enz2) /* Check to see if two enzymes make the same sticky ends. If either of the enzymes have sticky ends that isn't all ACGT, then this returns false. */ { boolean ret = FALSE; struct dnaSeq *sticky1 = stickyEnd(enz1); struct dnaSeq *sticky2 = stickyEnd(enz2); if (sticky1 && sticky2) if (sticky1 && sticky2 && (sticky1->size == sticky2->size) && (acgtCount(sticky1->dna) == sticky1->size) && (acgtCount(sticky2->dna) == sticky2->size)) { if (sameString(sticky1->dna, sticky2->dna)) ret = TRUE; else { reverseComplement(sticky2->dna, sticky2->size); if (sameString(sticky1->dna, sticky2->dna)) ret = TRUE; } } freeDnaSeq(&sticky1); freeDnaSeq(&sticky2); return ret; }
void trans3Free(struct trans3 **pT3) /* Free a trans3 structure. */ { struct trans3 *t3 = *pT3; if (t3 != NULL) { freeDnaSeq(&t3->trans[0]); freeDnaSeq(&t3->trans[1]); freeDnaSeq(&t3->trans[2]); freez(pT3); } }
void readDbstsFa(FILE *dff) /* Read in sequences from dbSTS.fa and add, if possible */ { struct dnaSeq *ds; struct sts *s; struct gb *gb; char name[256], *line; while (faReadMixedNext(dff, 0, "default", TRUE, &line, &ds)) { /* Determine the UCSC id */ if (hashLookup(gbAccHash, ds->name)) { /* Determine if this is linked to a marker */ gb = hashMustFindVal(gbAccHash, ds->name); if (gb->s != NULL) { /* If no recorded sequence, then add */ s = gb->s; if (s->fa == NULL) { s->faAcc = cloneString(ds->name); safef(name, ArraySize(name), "%d", s->si->identNo); ds->name = cloneString(name); s->fa = ds; s->si->sequence = 1; } /* If no accession recorded, see if sequences are the same */ else if (s->faAcc == NULL) { if (sameString(s->fa->dna, ds->dna)) { s->faAcc = cloneString(ds->name); s->si->sequence = 1; } freeDnaSeq(&ds); } /* If same accession as recorded, the update sequence */ else if (sameString(s->faAcc, ds->name)) { ds->name = cloneString(s->fa->name); freeDnaSeq(&s->fa); s->fa = ds; s->si->sequence = 1; } else freeDnaSeq(&ds); } else freeDnaSeq(&ds); } } }
void gfAlignStrand(int *pConn, char *tSeqDir, struct dnaSeq *seq, boolean isRc, int minMatch, struct hash *tFileCache, struct gfOutput *out) /* Search genome on server with one strand of other sequence to find homology. * Then load homologous bits of genome locally and do detailed alignment. * Call 'outFunction' with each alignment that is found. */ { struct ssBundle *bun; struct gfRange *rangeList = NULL, *range; struct dnaSeq *targetSeq; char targetName[PATH_LEN]; rangeList = gfQuerySeq(*pConn, seq); close(*pConn); *pConn = -1; slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); for (range = rangeList; range != NULL; range = range->next) { getTargetName(range->tName, out->includeTargetFile, targetName); targetSeq = gfiExpandAndLoadCached(range, tFileCache, tSeqDir, seq->size, &range->tTotalSize, FALSE, FALSE, usualExpansion); AllocVar(bun); bun->qSeq = seq; bun->genoSeq = targetSeq; alignComponents(range, bun, ffCdna); ssStitch(bun, ffCdna, minMatch, ssAliCount); saveAlignments(targetName, range->tTotalSize, range->tStart, bun, NULL, isRc, FALSE, ffCdna, minMatch, out); ssBundleFree(&bun); freeDnaSeq(&targetSeq); } gfRangeFreeList(&rangeList); }
static void makeOligoHistogram(char *fileName, struct seqList *seqList, int oligoSize, int **retTable, int *retTotal) /* Make up table of oligo occurences. Either pass in an FA file or a seqList. * (the other should be NULL). */ { FILE *f = NULL; int tableSize = (1<<(oligoSize+oligoSize)); int tableByteSize = tableSize * sizeof(int); int *table = needLargeMem(tableByteSize); struct dnaSeq *seq; struct seqList *seqEl = seqList; int *softMask = NULL; int total = 0; if (seqList == NULL) f = mustOpen(fileName, "rb"); memset(table, 0, tableByteSize); for (;;) { DNA *dna; int size; int endIx; int i; int oliVal; if (seqList != NULL) { if (seqEl == NULL) break; seq = seqEl->seq; softMask = seqEl->softMask; seqEl = seqEl->next; } else { seq = faReadOneDnaSeq(f, "", TRUE); if (seq == NULL) break; } dna = seq->dna; size = seq->size; endIx = size-oligoSize; for (i=0; i<=endIx; ++i) { if (softMask == NULL || !masked(softMask+i, oligoSize) ) { if ((oliVal = oligoVal(dna+i, oligoSize)) >= 0) { table[oliVal] += 1; ++total; } } } if (seqList == NULL) freeDnaSeq(&seq); } carefulClose(&f); *retTable = table; *retTotal = total; }
void foldPslIntoStats(struct psl *psl, struct dnaSeq *tSeq, struct hash *otherHash, struct stats *stats) /* Load sequence corresponding to bed and add alignment stats. */ { struct dnaSeq *qSeq = loadSomeSeq(otherHash, psl->qName, psl->qStart, psl->qEnd); int i, bCount = psl->blockCount; int qOffset; // uglyf("%s:%d-%d %s %s:%d-%d\n", psl->qName, psl->qStart, psl->qEnd, psl->strand, psl->tName, psl->tStart, psl->tEnd); if (qSeq != NULL && tSeq != NULL) { if (psl->strand[0] == '-') { reverseComplement(qSeq->dna, qSeq->size); qOffset = psl->qSize - psl->qEnd; } else qOffset = psl->qStart; if (psl->strand[1] == '-') errAbort("Can't yet handle reverse complemented targets"); for (i=0; i<bCount; ++i) { int bSize = psl->blockSizes[i]; stats->bedBaseAli += bSize; stats->bedBaseMatch += baseMatch(qSeq->dna + psl->qStarts[i] - qOffset, tSeq->dna + psl->tStarts[i], bSize); } } freeDnaSeq(&qSeq); }
static void simpleFillInSequence(char *seqDir, struct agpFrag *agpList, DNA *dna, int dnaSize) /* Fill in DNA array with sequences from simple clones. */ { struct agpFrag *agp; char underline = '_'; for (agp = agpList; agp != NULL; agp = agp->next) { char clone[128]; char path[512]; struct dnaSeq *seq; int size; strcpy(clone, agp->frag); chopSuffixAt(clone,underline); sprintf(path, "%s/%s.fa", seqDir, clone); seq = faReadAllDna(path); if (slCount(seq) != 1) errAbort("Can only handle exactly one clone in %s.", path); size = agp->fragEnd - agp->fragStart; if (agp->strand[0] == '-') reverseComplement(seq->dna + agp->fragStart, size); memcpy(dna + agp->chromStart, seq->dna + agp->fragStart, size); freeDnaSeq(&seq); } }
void musAliAt(char *database, char *chrom, char *humanFa, char *mouseFa) /* musAliAt - Produce .fa files where mouse alignments hit on chr22. */ { char query[256], **row; struct sqlResult *sr; struct sqlConnection *conn; struct dnaSeq *musSeq, *homoSeq; struct psl *psl; struct hash *musHash = newHash(10); FILE *musOut = mustOpen(mouseFa, "w"); hSetDb(database); conn = hAllocConn(); sqlSafef(query, sizeof query, "select * from blatMouse where tName = '%s'", chrom); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { psl = pslLoad(row); if ((musSeq = hashFindVal(musHash, psl->qName)) == NULL) { musSeq = hExtSeq(psl->qName); hashAdd(musHash, psl->qName, NULL); faWriteNext(musOut, musSeq->name, musSeq->dna, musSeq->size); freeDnaSeq(&musSeq); } pslFree(&psl); } }
void loadIfNewSeq(char *nibDir, char *newName, char strand, char **pName, struct dnaSeq **pSeq, char *pStrand) /* Load sequence unless it is already loaded. Reverse complement * if necessary. */ { struct dnaSeq *seq; if (sameString(newName, *pName)) { if (strand != *pStrand) { seq = *pSeq; reverseComplement(seq->dna, seq->size); *pStrand = strand; } } else { char fileName[512]; freeDnaSeq(pSeq); snprintf(fileName, sizeof(fileName), "%s/%s.nib", nibDir, newName); *pName = newName; *pSeq = seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName); *pStrand = strand; if (strand == '-') reverseComplement(seq->dna, seq->size); uglyf("Loaded %d bases in %s\n", seq->size, fileName); } }
void twoBitDup(char *filename) /* twoBitDup - check to see if a twobit file has any identical sequences in it. */ { struct twoBitFile *tbf; tbf = twoBitOpen(filename); struct twoBitIndex *index; int seqCount = slCount(tbf->indexList); int hashSize = log2(seqCount) + 2; // +2 for luck struct hash *seqHash = newHash(hashSize); verbose(2, "hash size is %d\n", hashSize); for (index = tbf->indexList; index != NULL; index = index->next) { verbose(2,"grabbing seq %s\n", index->name); int size; struct dnaSeq *seq = twoBitReadSeqFragExt(tbf, index->name, 0, 0, FALSE, &size); struct hashEl *hel; if ((hel = hashLookup(seqHash, seq->dna)) != NULL) printf("%s and %s are identical\n", index->name, (char *)hel->val); else hashAdd(seqHash, seq->dna, index->name); freeDnaSeq(&seq); } }
void writeChainPart(struct dnaSeq *tChrom, struct nibTwoCache *qNtc, char *nibDir, struct chain *chain, int tStart, int tEnd, FILE *f, FILE *gapFile) /* write out axt's from subset of chain */ { struct dnaSeq *qSeq; boolean isRev = (chain->qStrand == '-'); struct chain *subChain, *chainToFree; int fullSeqSize; int qStart; chainSubsetOnT(chain, tStart, tEnd, &subChain, &chainToFree); if (subChain == NULL) errAbort("null subchain in chain ID %d\n", chain->id); /* Get query sequence fragment. */ nibTwoCacheSeqPart(qNtc, chain->qName, 1, 1, &fullSeqSize); qStart = (isRev ? fullSeqSize - subChain->qEnd : subChain->qStart); qSeq = nibTwoCacheSeqPart(qNtc, subChain->qName, qStart, subChain->qEnd - subChain->qStart, NULL); if (isRev) reverseComplement(qSeq->dna, qSeq->size); verbose(9, "fill chain id, subchain %d %s %d %d %c qOffset=%d\n", subChain->id, subChain->qName, tStart, tEnd, subChain->qStrand, qStart); writeAxtFromChain(subChain, qSeq, subChain->qStart, tChrom, 0, f, gapFile); chainFree(&chainToFree); freeDnaSeq(&qSeq); }
void netToAxt(char *netName, char *chainName, char *tNibDir, char *qNibDir, char *axtName) /* netToAxt - Convert net (and chain) to axt.. */ { Bits *usedBits = findUsedIds(netName); struct hash *chainHash; struct chainNet *net; struct lineFile *lf = lineFileOpen(netName, TRUE); FILE *f = mustOpen(axtName, "w"); struct dnaSeq *tChrom = NULL; struct nibTwoCache *qNtc = nibTwoCacheNew(qNibDir); char *gapFileName = optionVal("gapOut", NULL); FILE *gapFile = NULL; if (gapFileName) gapFile = mustOpen(gapFileName, "w"); lineFileSetMetaDataOutput(lf, f); chainHash = chainReadUsedSwap(chainName, qChain, usedBits); bitFree(&usedBits); while ((net = chainNetRead(lf)) != NULL) { verbose(1, "Processing %s\n", net->name); tChrom = nibTwoLoadOne(tNibDir, net->name); if (tChrom->size != net->size) errAbort("Size mismatch on %s. Net/nib out of sync or possibly nib dirs swapped?", tChrom->name); rConvert(net->fillList, tChrom, qNtc, qNibDir, chainHash, f, gapFile); freeDnaSeq(&tChrom); chainNetFree(&net); } nibTwoCacheFree(&qNtc); }
void correctOne(struct dnaSeq *est, struct psl *psl, char *nibDir, struct hash *nibHash, FILE *f) /* Write one corrected EST to file. */ { struct dnaSeq *geno = readCachedNib(nibHash, nibDir, psl->tName, psl->tStart, psl->tEnd - psl->tStart); struct dyString *t = newDyString(est->size+20); int qSize = psl->qSize; int tSize = psl->tSize; int qLastEnd = 0; int blockIx; struct mrnaBlock *mbList, *mb; int genoOffset = psl->tStart; boolean isRc = FALSE; /* Load sequence and alignment blocks, coping with reverse * strand as necessary. */ toUpperN(geno->dna, geno->size); /* This helps debug... */ mbList = mrnaBlockFromPsl(psl); if (psl->strand[0] == '-') { reverseComplement(geno->dna, geno->size); genoOffset = tSize - psl->tEnd; for (mb = mbList; mb != NULL; mb = mb->next) { reverseIntRange(&mb->tStart, &mb->tEnd, tSize); reverseIntRange(&mb->qStart, &mb->qEnd, qSize); } slReverse(&mbList); isRc = TRUE; } /* Make t have corrected sequence. */ for (mb = mbList; mb != NULL; mb = mb->next) { int qStart = mb->qStart; int qEnd = mb->qEnd; int uncovSize = qStart - qLastEnd; if (uncovSize > 0) dyStringAppendN(t, est->dna + qLastEnd, uncovSize); dyStringAppendN(t, geno->dna + mb->tStart - genoOffset, mb->tEnd - mb->tStart); qLastEnd = qEnd; } if (qLastEnd != qSize) { int uncovSize = qSize - qLastEnd; dyStringAppendN(t, est->dna + qLastEnd, uncovSize); } /* Output */ faWriteNext(f, est->name, t->string, t->stringSize); /* Clean up time. */ slFreeList(&mbList); freeDyString(&t); freeDnaSeq(&geno); }
void freeSeqList(struct dnaSeq **pSeqList) /* Free an entire list of sequences */ { struct dnaSeq *seq, *next; for (seq = *pSeqList; seq != NULL; seq = next) { next = seq->next; freeDnaSeq(&seq); } *pSeqList = NULL; }
void freeCdnaAliList(struct cdnaAli **pList) /* Free a list of alignments and associated data. */ { struct cdnaAli *ca; for (ca = *pList; ca != NULL; ca = ca->next) { ffFreeAli(&ca->ali); freeDnaSeq(&ca->cdna); } slFreeList(pList); }
void freeDnaSeqList(struct dnaSeq **pSeqList) /* Free up list of DNA sequences. */ { struct dnaSeq *seq, *next; for (seq = *pSeqList; seq != NULL; seq = next) { next = seq->next; freeDnaSeq(&seq); } *pSeqList = NULL; }
void hgNibSeq(char *database, char *destDir, int faCount, char *faNames[]) /* hgNibSeq - convert DNA to nibble-a-base and store location in database. */ { char dir[256], name[128], chromName[128], ext[64]; char nibName[512]; struct sqlConnection *conn = sqlConnect(database); char query[512]; int i; char *faName; struct dnaSeq *seq = NULL; unsigned long total = 0; int size; if (!strchr(destDir, '/')) errAbort("Use full path name for nib file dir\n"); makeDir(destDir); if ((!appendTbl) || !sqlTableExists(conn, tableName)) createTable(conn); for (i=0; i<faCount; ++i) { faName = faNames[i]; splitPath(faName, dir, name, ext); sprintf(nibName, "%s/%s.nib", destDir, name); printf("Processing %s to %s\n", faName, nibName); if (preMadeNib) { FILE *nibFile; nibOpenVerify(nibName, &nibFile, &size); carefulClose(&nibFile); } else { seq = faReadDna(faName); if (seq != NULL) { size = seq->size; uglyf("Read DNA\n"); nibWrite(seq, nibName); uglyf("Wrote nib\n"); freeDnaSeq(&seq); } } strcpy(chromName, chromPrefix); strcat(chromName, name); sqlSafef(query, sizeof query, "INSERT into %s VALUES('%s', %d, '%s')", tableName, chromName, size, nibName); sqlUpdate(conn,query); total += size; } sqlDisconnect(&conn); printf("%lu total bases\n", total); }
void freeAllSeq(struct dnaSeq **pList) /* Free all sequences on list. */ { struct dnaSeq *seq, *next; if (*pList != NULL) { for (seq = *pList; seq != NULL; seq = next) { next = seq->next; freeDnaSeq(&seq); } *pList = NULL; } }
static void doAChain(struct chain *chain, struct nibTwoCache *tSeqCache, struct nibTwoCache *qSeqCache, FILE *f) /* Convert one chain to an axt. */ { struct dnaSeq *qSeq = loadSeqStrand(qSeqCache, chain->qName, chain->qStart, chain->qEnd, chain->qStrand); struct dnaSeq *tSeq = loadSeqStrand(tSeqCache, chain->tName, chain->tStart, chain->tEnd, '+'); struct axt *axtList= chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart, maxGap, BIGNUM); struct axt *axt = NULL; for (axt = axtList; axt != NULL; axt = axt->next) { double idRatio = axtIdRatio(axt); if (minIdRatio <= idRatio) { if (bedOut) bedWriteAxt(axt, chain->qSize, chain->tSize, idRatio, f); else axtWrite(axt, f); } } axtFreeList(&axtList); freeDnaSeq(&qSeq); freeDnaSeq(&tSeq); }
struct axt *netFillToAxt(struct cnFill *fill, struct dnaSeq *tChrom , int tSize, struct hash *qChromHash, char *nibDir, struct chain *chain, boolean swap) /* Convert subset of chain as defined by fill to axt. swap query and target if swap is true*/ { struct dnaSeq *qSeq; boolean isRev = (chain->qStrand == '-'); struct chain *subChain, *chainToFree; int qOffset; struct axt *axtList = NULL , *axt; struct nibInfo *nib = hashFindVal(qChromHash, fill->qName); /* Get query sequence fragment. */ { if (nib == NULL) { char path[512]; AllocVar(nib); safef(path, sizeof(path), "%s/%s.nib", nibDir, fill->qName); nib->fileName = cloneString(path); nibOpenVerify(path, &nib->f, &nib->size); hashAdd(qChromHash, fill->qName, nib); } qSeq = nibLoadPartMasked(NIB_MASK_MIXED, nib->fileName, fill->qStart, fill->qSize); if (isRev) { reverseComplement(qSeq->dna, qSeq->size); qOffset = nib->size - (fill->qStart + fill->qSize); } else qOffset = fill->qStart; } chainSubsetOnT(chain, fill->tStart, fill->tStart + fill->tSize, &subChain, &chainToFree); if (subChain != NULL) { axtList = chainToAxt(subChain, qSeq, qOffset, tChrom, fill->tStart, 100, BIGNUM); if (swap) { for (axt = axtList ; axt != NULL ; axt = axt->next) axtSwap(axt, tSize, nib->size); } } chainFree(&chainToFree); freeDnaSeq(&qSeq); return axtList; }
void randomEst(char *database, int count, char *output) /* randomEst - Select random ESTs from database. */ { struct sqlConnection *conn = sqlConnect(database); struct sqlResult *sr; char **row; int i, elIx, okCount = 0; struct slName *list = NULL, *el; FILE *f = NULL; char **array = NULL; struct dnaSeq *seq; struct hash *uniqHash = newHash(0); hSetDb(database); printf("Scanning database\n"); sr = sqlGetResult(conn, "select acc,type,direction from mrna"); while ((row = sqlNextRow(sr)) != NULL) { if (sameString(row[1], "EST") && sameString(row[2], "3")) { el = newSlName(row[0]); slAddHead(&list, el); ++okCount; } } sqlFreeResult(&sr); printf("Got %d 3' ESTs\n", okCount); AllocArray(array, okCount); for (i=0, el = list; el != NULL; el = el->next, ++i) array[i] = el->name; printf("Selecting %d to put into %s\n", count, output); f = mustOpen(output, "w"); for (i=0; i<count; ++i) { char *name; elIx = rand()%okCount; name = array[elIx]; if (!hashLookup(uniqHash, name)) { hashAdd(uniqHash, name, NULL); seq = hRnaSeq(name); faWriteNext(f, seq->name, seq->dna, seq->size); freeDnaSeq(&seq); } } }
void chromFeatureSeq(struct sqlConnection *conn, char *database, char *chrom, char *trackSpec, FILE *bedFile, FILE *faFile, int *retItemCount, int *retBaseCount) /* Write out sequence file for features from one chromosome. * This separate routine handles the non-merged case. It's * reason for being is so that the feature names get preserved. */ { boolean hasBin; char t[512], *s = NULL; char table[HDB_MAX_TABLE_STRING]; struct featureBits *fbList = NULL, *fb; if (trackSpec[0] == '!') errAbort("Sorry, '!' not available with fa output unless you use faMerge"); isolateTrackPartOfSpec(trackSpec, t); s = strchr(t, '.'); if (s != NULL) errAbort("Sorry, only database (not file) tracks allowed with " "fa output unless you use faMerge"); // ignore isSplit return from hFindSplitTable() (void) hFindSplitTable(database, chrom, t, table, &hasBin); fbList = fbGetRangeQuery(database, trackSpec, chrom, 0, hChromSize(database, chrom), where, TRUE, TRUE); for (fb = fbList; fb != NULL; fb = fb->next) { int s = fb->start, e = fb->end; if (bedFile != NULL) { fprintf(bedFile, "%s\t%d\t%d\t%s", fb->chrom, fb->start, fb->end, fb->name); if (fb->strand != '?') fprintf(bedFile, "\t0\t%c", fb->strand); fprintf(bedFile, "\n"); } if (faFile != NULL) { struct dnaSeq *seq = hDnaFromSeq(database, chrom, s, e, dnaLower); if (fb->strand == '-') reverseComplement(seq->dna, seq->size); faWriteNext(faFile, fb->name, seq->dna, seq->size); freeDnaSeq(&seq); } } featureBitsFreeList(&fbList); }
void printExons(struct genePred *gene, struct dnaSeq *seq, FILE *f) /* print the sequence from the exons */ { int exonPos = 0; int exonStart = 0; int exonEnd = 0; int size = 0; int total = 0; struct dnaSeq *exonOnlySeq; int offset = 0; verbose(3, "exonCount = %d\n", gene->exonCount); // get length of exons for (exonPos = 0; exonPos < gene->exonCount; exonPos++) { exonStart = gene->exonStarts[exonPos] - gene->txStart; exonEnd = gene->exonEnds[exonPos] - gene->txStart; size = exonEnd - exonStart; assert (size > 0); total += size; } // modeled after hgSeq.c AllocVar(exonOnlySeq); exonOnlySeq->dna = needLargeMem(total+1); exonOnlySeq->size = total; offset = 0; for (exonPos = 0; exonPos < gene->exonCount; exonPos++) { exonStart = gene->exonStarts[exonPos] - gene->txStart; exonEnd = gene->exonEnds[exonPos] - gene->txStart; size = exonEnd - exonStart; verbose(4, "size = %d\n", size); memcpy(exonOnlySeq->dna+offset, seq->dna+exonStart, size); offset += size; } assert(offset == exonOnlySeq->size); exonOnlySeq->dna[offset] = 0; faWriteNext(f, gene->name, exonOnlySeq->dna, exonOnlySeq->size); freeDnaSeq(&exonOnlySeq); }
void runSamples(char *goodFile, char *badFile, char *newDb, char *oldDb, int numToRun) /* run a bunch of tests */ { int i,j,k; FILE *good = mustOpen(goodFile, "w"); FILE *bad = mustOpen(badFile, "w"); char *tmp = NULL; int numGood=0, numBad=0, tooManyNs=0; boolean success = FALSE; struct dnaSeq *seq = NULL; printf("Running Tests\t"); for(i=0;i<numToRun;i++) { struct coordConvRep *ccr = NULL; struct coordConv *cc = NULL; if(!(i%10)) putTic(); cc = getRandomCoord(oldDb); seq = hDnaFromSeq(cc->chrom, cc->chromStart, cc->chromEnd, dnaLower); if(!(strstr(seq->dna, "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn"))) { chrom = cc->chrom; chromStart = cc->chromStart; chromEnd = cc->chromEnd; success = convertCoordinates(good, bad, printReport, printReport); if(success) numGood++; else numBad++; } else { tooManyNs++; } freeDnaSeq(&seq); coordConvFree(&cc); } carefulClose(&good); carefulClose(&bad); printf("\tDone.\n"); printf("Out of %d attempts got %d 'succesfully converted' and %d 'had problems', %d had too many N's\n", (numGood + numBad), numGood, numBad, tooManyNs); printf("After checking got %d of %d correctly called and %d incorrectly called.\n", hgTestCorrect, hgTestCorrect+hgTestWrong, hgTestWrong); }
static struct traceInfo* parseFastaRecord(FILE* fh, char* fastaName) /* read the next fasta record akd create a traceInfo object. This * parses the sequence id and comment for the read and clone name. */ { struct dnaSeq* dna; char* comment; struct traceInfo* traceInfo; if (!faReadNext(fh, NULL, 0, &comment, &dna)) return NULL; /* EOF */ AllocVar(traceInfo); traceInfo->ti = parseTraceId(dna->name, fastaName); traceInfo->size = dna->size; traceInfo->templateId = parseTemplateId(comment, fastaName); freeMem(comment); freeDnaSeq(&dna); return traceInfo; }
static struct mafAli *getRefAli(char *database, char *chrom, int start, int end) { struct mafAli *ali; struct mafComp *comp; char buffer[1024]; AllocVar(ali); AllocVar(comp); ali->components = comp; ali->textSize = end - start; safef(buffer, sizeof buffer, "%s.%s", database, chrom); comp->src = cloneString(buffer); comp->start = start; comp->strand = '+'; comp->size = end - start; struct dnaSeq *seq = hChromSeqMixed(database, chrom, start , end); comp->text = cloneString(seq->dna); freeDnaSeq(&seq); return ali; }
static void checkExtRecord(struct seqFields *seq, char *extPath) /* Check the external file record for a sequence (slow). Assumes * that bounds have been sanity check for a file. */ { /* read range into buffer */ FILE *fh = mustOpen(extPath, "r"); char *faBuf; char accVer[GB_ACC_BUFSZ]; struct dnaSeq *dnaSeq; if (fseeko(fh, seq->file_offset, SEEK_SET) < 0) { gbError("%s: can't seek %s", seq->acc, extPath); carefulClose(&fh); } faBuf = needMem(seq->file_size+1); mustRead(fh, faBuf, seq->file_size); faBuf[seq->file_size] = '\0'; carefulClose(&fh); /* verify contents */ if (faBuf[0] != '>') { gbError("%s: gbExtFile offset %lld doesn't start a fasta record: %s", seq->acc, (long long)seq->file_offset, extPath); free(faBuf); return; } dnaSeq = faFromMemText(faBuf); safef(accVer, sizeof(accVer), "%s.%d", seq->acc, seq->version); if (!sameString(dnaSeq->name, accVer)) gbError("%s: name in fasta header \"%s\" doesn't match expected \"%s\": %s", seq->acc, dnaSeq->name, accVer, extPath); if (dnaSeq->size != seq->size) gbError("%s: size of fasta sequence (%d) doesn't match expected (%d): %s", seq->acc, dnaSeq->size, seq->size, extPath); freeDnaSeq(&dnaSeq); }
void countCosmids(char *listFileName, FILE *out) /* Read each cosmid in list file and find out how big it is. */ { FILE *listFile = mustOpen(listFileName, "r"); char line[512], *s; int lineCount; struct dnaSeq *seq; char path[512]; while (fgets(line, sizeof(line), listFile)) { ++lineCount; s = trimSpaces(line); sprintf(path, "%s/%s", "C:/biodata/cbriggsae/finish", s); seq = faReadDna(path); ++cosmidCount; cosmidTotalSize += seq->size; freeDnaSeq(&seq); } fclose(listFile); cosmidAverageSize = round((double)cosmidTotalSize/cosmidCount); fprintf(out, "%d cosmids, average length %d\n", cosmidCount, cosmidAverageSize); }
void loadIfNewSeq(char *seqPath, boolean isTwoBit, char *newName, char strand, char **pName, struct dnaSeq **pSeq, char *pStrand) /* Load sequence unless it is already loaded. Reverse complement * if necessary. */ { struct dnaSeq *seq; if (sameString(newName, *pName)) { if (strand != *pStrand) { seq = *pSeq; reverseComplement(seq->dna, seq->size); *pStrand = strand; } } else { char fileName[512]; freeDnaSeq(pSeq); if (isTwoBit) { struct twoBitFile *tbf = twoBitOpenCached(seqPath); *pSeq = seq = twoBitReadSeqFrag(tbf, newName, 0, 0); verbose(1, "Loaded %d bases of %s from %s\n", seq->size, newName, seqPath); } else { snprintf(fileName, sizeof(fileName), "%s/%s.nib", seqPath, newName); *pSeq = seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName); verbose(1, "Loaded %d bases in %s\n", seq->size, fileName); } *pName = newName; *pStrand = strand; if (strand == '-') reverseComplement(seq->dna, seq->size); } }
void writeCassetteExon(struct bed *bedList, struct altGraphX *ag, int eIx, boolean *outputted, FILE *bedOutFile, FILE *outfile, FILE *html, float conf ) /* Write out the information for a cassette exon. */ { int i = eIx; struct bed *bed=NULL; if(bedOutFile != NULL) bedTabOutN(bedList,12, bedOutFile); writeBrowserLink(html, ag, conf, i); if(!outputted) { altGraphXTabOut(ag, stdout); *outputted = TRUE; } if(outfile != NULL) { struct dnaSeq *seq = hChromSeq(ag->tName, ag->vPositions[ag->edgeStarts[i]], ag->vPositions[ag->edgeEnds[i]]); if(sameString(ag->strand , "+")) reverseComplement(seq->dna, seq->size); if(seq->size < 200) faWriteNext(outfile, seq->name, seq->dna, seq->size); freeDnaSeq(&seq); } }