void splitAbout(char *inName, off_t approxSize, char *outRoot) /* Split into chunks of about approxSize. Don't break up * sequence though. */ { struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); int digits = 2; off_t curPos = approxSize; int fileCount = 0; FILE *f = NULL; char outPath[PATH_LEN]; ZeroVar(&seq); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { if (curPos >= approxSize) { carefulClose(&f); curPos = 0; mkOutPath(outPath, outRoot, digits, fileCount++); verbose(2, "writing %s\n", outPath); f = mustOpen(outPath, "w"); } curPos += seq.size; faWriteNext(f, seq.name, seq.dna, seq.size); } carefulClose(&f); lineFileClose(&lf); }
void musAliAt(char *database, char *chrom, char *humanFa, char *mouseFa) /* musAliAt - Produce .fa files where mouse alignments hit on chr22. */ { char query[256], **row; struct sqlResult *sr; struct sqlConnection *conn; struct dnaSeq *musSeq, *homoSeq; struct psl *psl; struct hash *musHash = newHash(10); FILE *musOut = mustOpen(mouseFa, "w"); hSetDb(database); conn = hAllocConn(); sqlSafef(query, sizeof query, "select * from blatMouse where tName = '%s'", chrom); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { psl = pslLoad(row); if ((musSeq = hashFindVal(musHash, psl->qName)) == NULL) { musSeq = hExtSeq(psl->qName); hashAdd(musHash, psl->qName, NULL); faWriteNext(musOut, musSeq->name, musSeq->dna, musSeq->size); freeDnaSeq(&musSeq); } pslFree(&psl); } }
static void writeOneByGap(boolean oneFile, char *outRoot, int digits, int *pieceIx, FILE *f, char noPath[256], int pos, int thisSize, struct dnaSeq *seq, FILE *lift, int *writeCount, char *outPath) { char numOut[128]; if (!oneFile) { char fileName[512]; mkOutPath(fileName, outRoot, digits, *pieceIx); f = mustOpen(fileName, "w"); verbose(2, "writing %s\n", fileName); } else verbose(2, "writing %s\n", outPath); sprintf(numOut, "%s%0*d", noPath, digits, *pieceIx); verbose(3,"#\twriting piece %s, at pos %d for size %d\n", numOut,pos,thisSize); faWriteNext(f, numOut, seq->dna + pos, thisSize); if (lift) fprintf(lift, "%d\t%s\t%d\t%s\t%d\n", pos, numOut, thisSize, seq->name, seq->size); *writeCount += 1; *pieceIx += 1; if (!oneFile) carefulClose(&f); }
int filterByQual(char *faFileName, FILE *f, int minQual, int minQualRun, struct hash *uniqHash) /* Write out parts of sequence that meet quality standards to fa file in out. * Returns untrimmed size. */ { char qaFileName[512], dir[256], name[128], ext[64]; struct qaSeq *qa; int start, size; int initialSize; splitPath(faFileName, dir, name, ext); sprintf(qaFileName, "%s%s.qual", dir, name); qa = qaMustReadBoth(qaFileName, faFileName); if (hashLookup(uniqHash, qa->name)) warn("%s duplicated, ignoring all but first occurence", qa->name); else { hashAdd(uniqHash, qa->name, NULL); if (trimQa(qa, minQual, minQualRun, &start, &size)) { faWriteNext(f, qa->name, qa->dna + start, size); } } initialSize = qa->size; qaSeqFree(&qa); return initialSize; }
void agpToFaOne(struct agpFrag **pAgpList, char *agpFile, char *agpSeq, char *seqDir, int lastPos, FILE *f) /* Given one sequence's worth of AGP in pAgpList, process it into FASTA * and write to f. */ { DNA *dna = NULL; slReverse(pAgpList); if (lastPos == 0) errAbort("%s not found in %s\n", agpSeq, agpFile); dna = needHugeMem(lastPos+1); memset(dna, 'n', lastPos); dna[lastPos] = 0; if (optionExists("simpleMulti")) { simpleMultiFillInSequence(0, seqDir, *pAgpList, dna, lastPos); } else if (optionExists("simpleMultiMixed")) { simpleMultiFillInSequence(1, seqDir, *pAgpList, dna, lastPos); } else if (optionExists("simple")) { simpleFillInSequence(seqDir, *pAgpList, dna, lastPos); } else { gsFillInSequence(seqDir, *pAgpList, dna, lastPos); } verbose(2,"Writing %s (%d bases)\n", agpSeq, lastPos); faWriteNext(f, agpSeq, dna, lastPos); agpFragFreeList(pAgpList); }
void splitByNamePrefix(char *inName, char *outRoot, int preFixCount) /* Split into chunks using prefix of sequence names. */ { struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = NULL; char outDir[256], outFile[128], ext[64], outPath[512], preFix[512]; ZeroVar(&seq); splitPath(outRoot, outDir, outFile, ext); assert(preFixCount < sizeof(preFix)); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { carefulClose(&f); strncpy(preFix, seq.name, preFixCount); preFix[preFixCount] = '\0'; sprintf(outPath, "%s%s.fa", outDir, preFix); verbose(2, "writing %s\n", outPath); f = mustOpen(outPath, "a"); faWriteNext(f, seq.name, seq.dna, seq.size); } carefulClose(&f); lineFileClose(&lf); }
void splitByRecord(char *inName, int splitCount, char *outRoot, off_t estSize) /* Split into a file base by base. */ { struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); int digits = digitsBaseTen(splitCount); off_t nextEnd = 0; off_t curPos = 0; int fileCount = 0; FILE *f = NULL; char outPath[PATH_LEN]; ZeroVar(&seq); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { curPos += seq.size; if (curPos > nextEnd) { carefulClose(&f); mkOutPath(outPath, outRoot, digits, fileCount++); verbose(2, "writing %s\n", outPath); f = mustOpen(outPath, "w"); nextEnd = calcNextEnd(fileCount, splitCount, estSize); } faWriteNext(f, seq.name, seq.dna, seq.size); } carefulClose(&f); lineFileClose(&lf); }
void faWrite(char *fileName, char *startLine, DNA *dna, int dnaSize) /* Write out FA file or die trying. */ { FILE *f = mustOpen(fileName, "w"); faWriteNext(f, startLine, dna, dnaSize); if (fclose(f) != 0) errnoAbort("fclose failed"); }
void correctOne(struct dnaSeq *est, struct psl *psl, char *nibDir, struct hash *nibHash, FILE *f) /* Write one corrected EST to file. */ { struct dnaSeq *geno = readCachedNib(nibHash, nibDir, psl->tName, psl->tStart, psl->tEnd - psl->tStart); struct dyString *t = newDyString(est->size+20); int qSize = psl->qSize; int tSize = psl->tSize; int qLastEnd = 0; int blockIx; struct mrnaBlock *mbList, *mb; int genoOffset = psl->tStart; boolean isRc = FALSE; /* Load sequence and alignment blocks, coping with reverse * strand as necessary. */ toUpperN(geno->dna, geno->size); /* This helps debug... */ mbList = mrnaBlockFromPsl(psl); if (psl->strand[0] == '-') { reverseComplement(geno->dna, geno->size); genoOffset = tSize - psl->tEnd; for (mb = mbList; mb != NULL; mb = mb->next) { reverseIntRange(&mb->tStart, &mb->tEnd, tSize); reverseIntRange(&mb->qStart, &mb->qEnd, qSize); } slReverse(&mbList); isRc = TRUE; } /* Make t have corrected sequence. */ for (mb = mbList; mb != NULL; mb = mb->next) { int qStart = mb->qStart; int qEnd = mb->qEnd; int uncovSize = qStart - qLastEnd; if (uncovSize > 0) dyStringAppendN(t, est->dna + qLastEnd, uncovSize); dyStringAppendN(t, geno->dna + mb->tStart - genoOffset, mb->tEnd - mb->tStart); qLastEnd = qEnd; } if (qLastEnd != qSize) { int uncovSize = qSize - qLastEnd; dyStringAppendN(t, est->dna + qLastEnd, uncovSize); } /* Output */ faWriteNext(f, est->name, t->string, t->stringSize); /* Clean up time. */ slFreeList(&mbList); freeDyString(&t); freeDnaSeq(&geno); }
void webOutFasta(struct dnaSeq *seq, char *db) { /** output a blat link and the fasta in cut and past form */ printf("<pre>\n"); faWriteNext(stdout, seq->name, seq->dna, seq->size); printf("</pre>\n"); outputBlatLink("Blat Sequence on new Draft", db, seq); printf("<br><br>"); }
static void processRnaSeq(FILE *fh, struct sqlConnection *conn, struct refSeqVerInfo *rsvi) /* get an RNA sequence, which already includes version in name */ { struct dnaSeq *seq = hGenBankGetMrnaC(conn, rsvi->acc, NULL); if (seq == NULL) errAbort("failed to get %s from database", rsvi->acc); faWriteNext(fh, seq->name, seq->dna, seq->size); dnaSeqFree(&seq); }
void outputOne(struct twoBitFile *tbf, char *seqSpec, FILE *f, int start, int end) /* Output sequence. */ { struct dnaSeq *seq = twoBitReadSeqFrag(tbf, seqSpec, start, end); if (noMask) toUpperN(seq->dna, seq->size); faWriteNext(f, seq->name, seq->dna, seq->size); dnaSeqFree(&seq); }
void outputProtein(struct cdsEvidence *cds, struct dnaSeq *txSeq, FILE *f) /* Translate txSeq to protein guided by cds, and output to file. * The implementation is a little complicated by checking for internal * stop codons and other error conditions. */ { boolean selenocysteine = FALSE; if (selenocysteineHash != NULL) { if (hashLookup(selenocysteineHash, txSeq->name)) selenocysteine = TRUE; } struct dyString *dy = dyStringNew(4*1024); int blockIx; for (blockIx=0; blockIx<cds->cdsCount; ++blockIx) { DNA *dna = txSeq->dna + cds->cdsStarts[blockIx]; int rnaSize = cds->cdsSizes[blockIx]; if (rnaSize%3 != 0) { errAbort("size of block (%d) not multiple of 3 in %s", rnaSize, cds->name); } int aaSize = rnaSize/3; int i; for (i=0; i<aaSize; ++i) { AA aa = lookupCodon(dna); if (aa == 0) { aa = '*'; if (selenocysteine) { if (!isReallyStopCodon(dna, TRUE)) aa = 'U'; } } dyStringAppendC(dy, aa); dna += 3; } } int lastCharIx = dy->stringSize-1; if (dy->string[lastCharIx] == '*') { dy->string[lastCharIx] = 0; dy->stringSize = lastCharIx; } char *prematureStop = strchr(dy->string, '*'); if (prematureStop != NULL) { errAbort("Stop codons in CDS at position %d for %s", (int)(prematureStop - dy->string), cds->name); } faWriteNext(f, cds->name, dy->string, dy->stringSize); dyStringFree(&dy); }
void writePeptide(FILE *outFa, char *acc, struct dnaSeq *dna, struct genbankCds *cds) /* translate the sequence to a peptide and output */ { char *pep = needMem(dna->size); /* more than needed */ char hold = dna->dna[cds->end]; dna->dna[cds->end] = '\0'; dnaTranslateSome(dna->dna+cds->start, pep, dna->size); dna->dna[cds->end] = hold; faWriteNext(outFa, acc, pep, strlen(pep)); freeMem(pep); }
static void writeFastas(struct gff3File *g3f, FILE *fh) /* write fasta records fo the file */ { if (g3f->seqs != NULL) { fputs("##FASTA\n", fh); struct dnaSeq *seq; for (seq = g3f->seqs; seq != NULL; seq = seq->next) faWriteNext(fh, seq->name, seq->dna, seq->size); } }
void faWriteAll(char *fileName, bioSeq *seqList) /* Write out all sequences in list to file. */ { FILE *f = mustOpen(fileName, "w"); bioSeq *seq; for (seq=seqList; seq != NULL; seq = seq->next) faWriteNext(f, seq->name, seq->dna, seq->size); if (fclose(f) != 0) errnoAbort("fclose failed"); }
void writeSeg(char *seqName, struct segment *seg, FILE *gtf, FILE *sub, FILE *trans) /* Write out gtf and bed files. */ { struct genScanGene *gene; struct genScanFeature *gsf; for (gene = seg->geneList; gene != NULL; gene = gene->next) { char geneName[128]; boolean someCds = FALSE; sprintf(geneName, "%s.%d", seqName, gene->id); for (gsf = gene->featureList; gsf != NULL; gsf = gsf->next) { if (sameString("Init", gsf->type)) { cdsOut(gtf, gsf, geneName, seqName); someCds = TRUE; } else if (sameString("Intr", gsf->type)) { cdsOut(gtf, gsf, geneName, seqName); someCds = TRUE; } else if (sameString("Term", gsf->type)) { cdsOut(gtf, gsf, geneName, seqName); someCds = TRUE; } else if (sameString("Sngl", gsf->type)) { cdsOut(gtf, gsf, geneName, seqName); someCds = TRUE; } } if ((trans != NULL) && (gene->featureList != NULL)) { if (someCds) faWriteNext(trans, geneName, gene->translation, strlen(gene->translation)); } } if (sub != NULL) { for (gsf = seg->suboptList; gsf != NULL; gsf = gsf->next) { fprintf(sub, "%s\t%d\t%d\t%s.%d\t%d\t%c\n", seqName, gsf->start, gsf->end, seqName, gsf->featId, round(1000*gsf->p), gsf->strand); } } }
void createFastaFilesForBits(char *root, struct genomeBit *gbList, boolean addDummy) /* load all of the fasta records for the bits in the genome list into one fasta file. Uses .nib files as they are much more compact and allow random access. */ { struct dnaSeq *seq = NULL; struct genomeBit *gb = NULL; FILE *faOut = NULL; char *faFile = NULL; char *nibFile = NULL; int totalBp = 0; assert(gbList); faFile = fileNameFromGenomeBit(outputRoot, ".fa", gbList); faOut = mustOpen(faFile, "w"); for(gb = gbList; gb != NULL; gb = gb->next) { char buff[256]; snprintf(buff, sizeof(buff), "%s:%u-%u", gb->chrom, gb->chromStart, gb->chromEnd); nibFile = nibFileFromChrom(root, gb->chrom); seq = nibLoadPartMasked(NIB_MASK_MIXED, nibFile, gb->chromStart, gb->chromEnd-gb->chromStart); totalBp += strlen(seq->dna); faWriteNext(faOut, buff, seq->dna, seq->size); dnaSeqFree(&seq); freez(&nibFile); } /* Add a dummy fasta record so that avid will order and orient things for us.. */ if(addDummy) faWriteNext(faOut, "garbage", "nnnnnnnnnn", 10); carefulClose(&faOut); /** This bit is commented out as we are now using nnnn's as repeat masking */ /* if(slCount(gbList) > 1) */ /* repeatMaskFile(outputRoot, gbList); */ /* else */ /* fakeRepeatMaskFile(outputRoot, gbList); */ freez(&faFile); }
void randomEst(char *database, int count, char *output) /* randomEst - Select random ESTs from database. */ { struct sqlConnection *conn = sqlConnect(database); struct sqlResult *sr; char **row; int i, elIx, okCount = 0; struct slName *list = NULL, *el; FILE *f = NULL; char **array = NULL; struct dnaSeq *seq; struct hash *uniqHash = newHash(0); hSetDb(database); printf("Scanning database\n"); sr = sqlGetResult(conn, "select acc,type,direction from mrna"); while ((row = sqlNextRow(sr)) != NULL) { if (sameString(row[1], "EST") && sameString(row[2], "3")) { el = newSlName(row[0]); slAddHead(&list, el); ++okCount; } } sqlFreeResult(&sr); printf("Got %d 3' ESTs\n", okCount); AllocArray(array, okCount); for (i=0, el = list; el != NULL; el = el->next, ++i) array[i] = el->name; printf("Selecting %d to put into %s\n", count, output); f = mustOpen(output, "w"); for (i=0; i<count; ++i) { char *name; elIx = rand()%okCount; name = array[elIx]; if (!hashLookup(uniqHash, name)) { hashAdd(uniqHash, name, NULL); seq = hRnaSeq(name); faWriteNext(f, seq->name, seq->dna, seq->size); freeDnaSeq(&seq); } } }
void chromFeatureSeq(struct sqlConnection *conn, char *database, char *chrom, char *trackSpec, FILE *bedFile, FILE *faFile, int *retItemCount, int *retBaseCount) /* Write out sequence file for features from one chromosome. * This separate routine handles the non-merged case. It's * reason for being is so that the feature names get preserved. */ { boolean hasBin; char t[512], *s = NULL; char table[HDB_MAX_TABLE_STRING]; struct featureBits *fbList = NULL, *fb; if (trackSpec[0] == '!') errAbort("Sorry, '!' not available with fa output unless you use faMerge"); isolateTrackPartOfSpec(trackSpec, t); s = strchr(t, '.'); if (s != NULL) errAbort("Sorry, only database (not file) tracks allowed with " "fa output unless you use faMerge"); // ignore isSplit return from hFindSplitTable() (void) hFindSplitTable(database, chrom, t, table, &hasBin); fbList = fbGetRangeQuery(database, trackSpec, chrom, 0, hChromSize(database, chrom), where, TRUE, TRUE); for (fb = fbList; fb != NULL; fb = fb->next) { int s = fb->start, e = fb->end; if (bedFile != NULL) { fprintf(bedFile, "%s\t%d\t%d\t%s", fb->chrom, fb->start, fb->end, fb->name); if (fb->strand != '?') fprintf(bedFile, "\t0\t%c", fb->strand); fprintf(bedFile, "\n"); } if (faFile != NULL) { struct dnaSeq *seq = hDnaFromSeq(database, chrom, s, e, dnaLower); if (fb->strand == '-') reverseComplement(seq->dna, seq->size); faWriteNext(faFile, fb->name, seq->dna, seq->size); freeDnaSeq(&seq); } } featureBitsFreeList(&fbList); }
static void processProtSeq(FILE *fh, struct sqlConnection *conn, struct refSeqVerInfo *rsvi, struct hash *doneProts) /* get an protein sequence, which already includes version in name. Don't duplicate NPs */ { char query[128]; sqlSafef(query, sizeof(query), "SELECT protAcc FROM refLink WHERE mrnaAcc = \"%s\"", rsvi->acc); char *protAcc = sqlNeedQuickString(conn, query); if (isNotEmpty(protAcc) && hashLookup(doneProts, protAcc) == NULL) { struct dnaSeq *seq = hGenBankGetPepC(conn, protAcc, NULL); if (seq == NULL) errAbort("failed to get %s from database", protAcc); faWriteNext(fh, seq->name, seq->dna, seq->size); dnaSeqFree(&seq); hashAdd(doneProts, protAcc, NULL); } freeMem(protAcc); }
void printExons(struct genePred *gene, struct dnaSeq *seq, FILE *f) /* print the sequence from the exons */ { int exonPos = 0; int exonStart = 0; int exonEnd = 0; int size = 0; int total = 0; struct dnaSeq *exonOnlySeq; int offset = 0; verbose(3, "exonCount = %d\n", gene->exonCount); // get length of exons for (exonPos = 0; exonPos < gene->exonCount; exonPos++) { exonStart = gene->exonStarts[exonPos] - gene->txStart; exonEnd = gene->exonEnds[exonPos] - gene->txStart; size = exonEnd - exonStart; assert (size > 0); total += size; } // modeled after hgSeq.c AllocVar(exonOnlySeq); exonOnlySeq->dna = needLargeMem(total+1); exonOnlySeq->size = total; offset = 0; for (exonPos = 0; exonPos < gene->exonCount; exonPos++) { exonStart = gene->exonStarts[exonPos] - gene->txStart; exonEnd = gene->exonEnds[exonPos] - gene->txStart; size = exonEnd - exonStart; verbose(4, "size = %d\n", size); memcpy(exonOnlySeq->dna+offset, seq->dna+exonStart, size); offset += size; } assert(offset == exonOnlySeq->size); exonOnlySeq->dna[offset] = 0; faWriteNext(f, gene->name, exonOnlySeq->dna, exonOnlySeq->size); freeDnaSeq(&exonOnlySeq); }
void chopFaLines(char *inName, char *outName) /* chopFaLines - Read in FA file with long lines and rewrite it with shorter lines. */ { FILE *in = mustOpen(inName, "r"); FILE *out = mustOpen(outName, "w"); char *commentLine; struct dnaSeq *seq; while (faReadNext(in, NULL, TRUE, &commentLine, &seq)) { commentLine = trimSpaces(commentLine+1); uglyf(">%s\n", commentLine); mustWrite(uglyOut, seq->dna, 100); uglyf("\n"); uglyAbort("All for now"); faWriteNext(out, commentLine, seq->dna, seq->size); } }
void polyTrimSeq(struct dnaSeq *seq, FILE *fh) /* trim a sequence */ { if (trimPolyA) { int sz = maskTailPolyA(seq->dna, seq->size); seq->size -= sz; seq->dna[seq->size] = '\0'; } if (trimPolyT) { int sz = maskHeadPolyT(seq->dna, seq->size); seq->size -= sz; seq->dna += sz; } faWriteNext(fh, seq->name, seq->dna, seq->size); }
int main(int argc, char *argv[]) /* Process command line. */ { char *inName, *outName, **inNames; FILE *in, *out; int i, inCount; DNA *dna; int inSize, outSize; int dnaOff; char *seqName; struct dyString *subSeqName = newDyString(512); int maxSize = 100000; if (argc < 3) usage(); outName = argv[1]; inNames = &argv[2]; inCount = argc-2; out = mustOpen(outName, "w"); for (i=0; i<inCount; ++i) { inName = inNames[i]; printf("processing %s", inName); in = mustOpen(inName, "r"); while (faFastReadNext(in, &dna, &inSize, &seqName)) { for (dnaOff = 0; dnaOff < inSize; dnaOff += outSize) { printf("."); fflush(stdout); outSize = inSize - dnaOff; if (outSize > maxSize) outSize = maxSize; dyStringClear(subSeqName); dyStringPrintf(subSeqName, "%s.%d", seqName, dnaOff); faWriteNext(out, subSeqName->string, dna+dnaOff, outSize); } } fclose(in); printf("\n"); } }
void scrambleFa(char *inName, char *outName) /* scrambleFa - scramble the order of records in an fa file. */ { struct dnaSeq *seqList, *seq; int seqCount; int seqIx; FILE *out; seqList = faReadAllDna(inName); out = mustOpen(outName, "w"); seqCount = slCount(seqList); while (seqCount > 0) { seqIx = rand()%seqCount; seq = slElementFromIx(seqList, seqIx); faWriteNext(out, seq->name, seq->dna, seq->size); slRemoveEl(&seqList, seq); --seqCount; } fclose(out); }
void getAccMrna(char *acc, struct sqlConnection *conn, FILE *outFa) /* get mrna for an accession */ { HGID seqId; char *faSeq; struct dnaSeq *dna; boolean cdsOk = TRUE; char accBuf[512]; struct genbankCds cds; faSeq = hGetSeqAndId(conn, acc, &seqId); if (faSeq == NULL) { fprintf(stderr, "%s\tsequence not in database\n", acc); return; } dna = faFromMemText(faSeq); if (cdsUpper || peptides) cdsOk = getCds(conn, acc, dna->size, !cdsUpperAll, &cds); if (cdsOk && cdsUpper) upperCaseCds(dna, &cds); if ((cdsOk || cdsUpperAll) && inclVer) { int ver = getVersion(conn, acc); safef(accBuf, sizeof(accBuf), "%s.%d", acc, ver); acc = accBuf; } if ((cdsOk || cdsUpperAll)) { if (peptides) writePeptide(outFa, acc, dna, &cds); else faWriteNext(outFa, acc, dna->dna, dna->size); } dnaSeqFree(&dna); }
static void processSeqsFromBed(struct twoBitFile *tbf, char *bedFileName, FILE *outFile) /* Get sequences defined by beds. Exclude introns. */ { struct bed *bed, *bedList = bedLoadAll(bedFileName); for (bed = bedList; bed != NULL; bed = bed->next) { struct dnaSeq *seq = twoBitAndBedToSeq(tbf, bed); char* seqName = NULL; if (clBedPos) { char buf[1024]; safef(buf, 1024, "%s:%d-%d", bed->chrom, bed->chromStart, bed->chromEnd); seqName = buf; } else seqName = seq->name; if (noMask) toUpperN(seq->dna, seq->size); faWriteNext(outFile, seqName, seq->dna, seq->size); dnaSeqFree(&seq); } }
void correctEst(char *oldFa, char *pslFile, char *nibDir, char *outFa) /* correctEst - Correct ESTs by passing them through genome. */ { struct hash *pslHash = hashPsls(pslFile); struct lineFile *lf = lineFileOpen(oldFa, FALSE); FILE *f = mustOpen(outFa, "w"); static struct dnaSeq est; struct hashEl *hel; struct psl *psl; struct hash *nibHash = newHash(8); while (faSpeedReadNext(lf, &est.dna, &est.size, &est.name)) { if ((psl = hashFindVal(pslHash, est.name)) != NULL) { correctOne(&est, psl, nibDir, nibHash, f); } else { faWriteNext(f, est.name, est.dna, est.size); } } }
void seqFromPsl(char *inPsl, char *inTwoBit, char *outFa) /* seqFromPsl - Extract masked sequence from database corresponding to psl file. */ { struct twoBitFile *tbf = twoBitOpen(inTwoBit); struct lineFile *lf = pslFileOpen(inPsl); FILE *f = mustOpen(outFa, "w"); struct psl *psl; while ((psl = pslNext(lf)) != NULL) { char faHead[512]; struct dnaSeq *seq = twoBitReadSeqFrag(tbf, psl->tName, psl->tStart, psl->tEnd); if (psl->strand[0] == '-') reverseComplement(seq->dna, seq->size); safef(faHead, sizeof(faHead), "%s (%s:%d-%d)", psl->qName, psl->tName, psl->tStart+1, psl->tEnd); if (hardMask) lowerToN(seq->dna, seq->size); faWriteNext(f, faHead, seq->dna, seq->size); } carefulClose(&f); }