static int calcDigitCount(struct axt *axt, int tSize, int qSize) /* Figure out how many digits needed for blast position display. */ { int tDig, qDig; if (axt->qStrand == '-') qDig = digitsBaseTen(qSize - axt->qStart + 1); else qDig = digitsBaseTen(axt->qEnd); if (axt->tStrand == '-') tDig = digitsBaseTen(tSize - axt->tStart + 1); else tDig = digitsBaseTen(axt->tEnd); return Blatmax(tDig, qDig); }
void segWrite(FILE *f, struct segBlock *block) /* Write next segment block to the file. */ { struct segComp *comp; int srcChars = 0, startChars = 0, sizeChars = 0, srcSizeChars = 0; /* Write segment block header. */ fprintf(f, "b"); if (block->name != NULL) fprintf(f, " name=%s", block->name); if (block->val != 0) fprintf(f, " val=%d", block->val); fprintf(f, "\n"); /* Figure out length of each field. */ for (comp = block->components; comp != NULL; comp = comp->next) { int len = 0; /* A name line '.' will break some tools, so replace it with a * generic name */ if (sameString(comp->src, ".")) { freeMem(&comp->src); comp->src = cloneString("defaultName"); } len = strlen(comp->src); if (srcChars < len) srcChars = len; len = digitsBaseTen(comp->start); if (startChars < len) startChars = len; len = digitsBaseTen(comp->size); if (sizeChars < len) sizeChars = len; len = digitsBaseTen(comp->srcSize); if (srcSizeChars < len) srcSizeChars = len; } /* Write out each component. */ for (comp = block->components; comp != NULL; comp = comp->next) { fprintf(f, "s %-*s %*d %*d %c %*d\n", srcChars, comp->src, startChars, comp->start, sizeChars, comp->size, comp->strand, srcSizeChars, comp->srcSize); } /* Write out blank separator line. */ fprintf(f, "\n"); }
void splitByRecord(char *inName, int splitCount, char *outRoot, off_t estSize) /* Split into a file base by base. */ { struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); int digits = digitsBaseTen(splitCount); off_t nextEnd = 0; off_t curPos = 0; int fileCount = 0; FILE *f = NULL; char outPath[PATH_LEN]; ZeroVar(&seq); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { curPos += seq.size; if (curPos > nextEnd) { carefulClose(&f); mkOutPath(outPath, outRoot, digits, fileCount++); verbose(2, "writing %s\n", outPath); f = mustOpen(outPath, "w"); nextEnd = calcNextEnd(fileCount, splitCount, estSize); } faWriteNext(f, seq.name, seq.dna, seq.size); } carefulClose(&f); lineFileClose(&lf); }
static int maxDigits(int x, int y) { int xDigits = digitsBaseTen(x); int yDigits = digitsBaseTen(y); return (xDigits > yDigits ? xDigits : yDigits); }
void splitByGap(char *inName, int pieceSize, char *outRoot, long long estSize) /* Split up file into pieces at most pieceSize bases long, at gap boundaries * if possible. */ { off_t pieces = (estSize + pieceSize-1)/pieceSize; int digits = digitsBaseTen(pieces); int minGapSize = optionInt("minGapSize", 1000); boolean noGapDrops = optionExists("noGapDrops"); int maxN = optionInt("maxN", pieceSize-1); boolean oneFile = optionExists("oneFile"); char fileName[512]; char dirOnly[256], noPath[128]; int pos, pieceIx = 0, writeCount = 0; struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = NULL; Bits *bits = NULL; int seqCount = 0; char *outFile = optionVal("out", NULL); char *liftFile = optionVal("lift", NULL); FILE *lift = NULL; ZeroVar(&seq); if (minGapSize < 1) errAbort("ERROR: minGapSize must be > 0"); splitPath(outRoot, dirOnly, noPath, NULL); if (oneFile) { sprintf(fileName, "%s.fa", outRoot); f = mustOpen(fileName, "w"); } else fileName[0] = '\0'; if (liftFile) lift = mustOpen(liftFile, "w"); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { bits = bitAlloc(seq.size); setBitsN(seq.dna, seq.size, bits); ++seqCount; if (outFile != NULL) { if (seqCount > 1) errAbort("Can only handle in files with one sequence using out option"); bitsForOut(outFile, seq.size, bits); } pos = 0; while (pos < seq.size) { boolean gotGap = FALSE; int gapStart = 0; int gapSize = 0; int endSize = seq.size - pos; int thisSize = min(endSize, pieceSize); int startGapLen = 0; if (seq.dna[pos] == 'n' || seq.dna[pos] == 'N') { startGapLen = bitFindClear(bits, pos, endSize) - pos; verbose(3,"#\tstarting gap at %d for length: %d\n", pos, startGapLen ); } /* if a block is all gap for longer than minGapSize, then * keep it all together in one large piece */ if (startGapLen > minGapSize) { if (noGapDrops) { writeOneByGap(oneFile, outRoot, digits, &pieceIx, f, noPath, pos, startGapLen, &seq, lift, &writeCount, fileName); } else verbose(3,"#\tbeginning gap of %d size skipped\n", startGapLen); thisSize = startGapLen; } else if (thisSize > 0 && bitCountRange(bits, pos, thisSize) <= maxN) { if (endSize>pieceSize) /* otherwise chops tiny piece at very end */ { gotGap = findLastGap(&(seq.dna[pos]), thisSize, endSize, minGapSize, &gapStart, &gapSize); if (gotGap) thisSize = gapStart; } writeOneByGap(oneFile, outRoot, digits, &pieceIx, f, noPath, pos, thisSize, &seq, lift, &writeCount, fileName); } pos += thisSize; if (gotGap) { /* last block is all gap, write it all out */ /*if ((pos + gapSize) >= seq.size)*/ if (noGapDrops) { writeOneByGap(oneFile, outRoot, digits, &pieceIx, f, noPath, pos, gapSize, &seq ,lift, &writeCount, fileName); verbose(3, "#\tadding gapSize %d to pos %d -> %d and writing gap\n", gapSize, pos, pos+gapSize); } else verbose(3,"#\tadding gapSize %d to pos %d -> %d\n", gapSize, pos, pos+gapSize); pos += gapSize; } } bitFree(&bits); } carefulClose(&f); carefulClose(&lift); lineFileClose(&lf); printf("%d pieces of %d written\n", writeCount, pieceIx); }
void splitByCount(char *inName, int pieceSize, char *outRoot, off_t estSize, int extra) /* Split up file into pieces pieceSize long. */ { off_t pieces = (estSize + pieceSize-1)/pieceSize; int digits = digitsBaseTen(pieces); int maxN = optionInt("maxN", pieceSize-1); boolean oneFile = optionExists("oneFile"); char fileName[PATH_LEN]; char dirOnly[PATH_LEN], noPath[128]; int pos, pieceIx = 0, writeCount = 0; struct dnaSeq seq; struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = NULL; Bits *bits = NULL; int seqCount = 0; char *outFile = optionVal("out", NULL); char *liftFile = optionVal("lift", NULL); FILE *lift = NULL; ZeroVar(&seq); splitPath(outRoot, dirOnly, noPath, NULL); if (oneFile) { sprintf(fileName, "%s.fa", outRoot); f = mustOpen(fileName, "w"); } if (liftFile) lift = mustOpen(liftFile, "w"); /* Count number of N's from s[0] to s[size-1]. * Treat any parts past end of string as N's. */ while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { bits = bitAlloc(seq.size); setBitsN(seq.dna, seq.size, bits); ++seqCount; if (outFile != NULL) { if (seqCount > 1) errAbort("Can only handle in files with one sequence using out option"); bitsForOut(outFile, seq.size, bits); } for (pos = 0; pos < seq.size; pos += pieceSize) { char numOut[128]; int thisSize = seq.size - pos; if (thisSize > (pieceSize + extra)) thisSize = pieceSize + extra; if ((thisSize <= extra) && (pos > 0)) break; /* nobody wants duplicate smaller than extra overhang */ if (bitCountRange(bits, pos, thisSize) <= maxN) { if (!oneFile) { mkOutPath(fileName, outRoot, digits, pieceIx); f = mustOpen(fileName, "w"); } sprintf(numOut, "%s%0*d", noPath, digits, pieceIx); faWriteNext(f, numOut, seq.dna + pos, thisSize); if (lift) fprintf(lift, "%d\t%s\t%d\t%s\t%d\n", pos, numOut, thisSize, seq.name, seq.size); ++writeCount; if (!oneFile) carefulClose(&f); } pieceIx++; } bitFree(&bits); } carefulClose(&f); carefulClose(&lift); lineFileClose(&lf); printf("%d pieces of %d written\n", writeCount, pieceIx); }
void splitByBase(char *inName, int splitCount, char *outRoot, off_t estSize) /* Split into a file base by base. */ { struct lineFile *lf = lineFileOpen(inName, TRUE); int lineSize; char *line; char c; char dir[PATH_LEN], seqName[128], outFile[128], outPathName[PATH_LEN]; int digits = digitsBaseTen(splitCount); boolean warnedMultipleRecords = FALSE; int fileCount = 0; off_t nextEnd = 0; off_t curPos = 0; FILE *f = NULL; int linePos = 0; int outLineSize = 50; if (!lineFileNext(lf, &line, &lineSize)) errAbort("%s is empty", inName); if (line[0] == '>') { line = firstWordInLine(line+1); if (line == NULL) errAbort("Empty initial '>' line in %s", inName); strncpy(seqName, line, sizeof(seqName)); } else { splitPath(inName, dir, seqName, NULL); lineFileReuse(lf); } splitPath(outRoot, NULL, outFile, NULL); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == '>') { if (!warnedMultipleRecords) { warnedMultipleRecords = TRUE; warn("More than one record in FA file line %d of %s", lf->lineIx, lf->fileName); continue; } } while ((c = *line++) != 0) { if (isdigit(c) || isspace(c)) continue; if (!isalpha(c)) errAbort("Weird %c (0x%x) line %d of %s", c, c, lf->lineIx, lf->fileName); if (++curPos >= nextEnd) { if (f != NULL) { if (linePos != 0) fputc('\n', f); fclose(f); } mkOutPath(outPathName, outRoot, digits, fileCount); verbose(2, "writing %s\n", outPathName); f = mustOpen(outPathName, "w"); fprintf(f, ">%s%0*d\n", outFile, digits, fileCount); ++fileCount; linePos = 0; nextEnd = calcNextEnd(fileCount, splitCount, estSize); } fputc(c, f); if (++linePos >= outLineSize) { fputc('\n', f); linePos = 0; } } } if (f != NULL) { if (linePos != 0) fputc('\n', f); fclose(f); } lineFileClose(&lf); }
void mafPrettyOut(FILE *f, struct mafAli *maf, int lineSize, boolean onlyDiff, int blockNo) { int ii, ch; int srcChars = 0; struct mafComp *mc; int lineStart, lineEnd; char *summaryLine = needMem(lineSize+1); char *referenceText; int startChars, sizeChars, srcSizeChars; boolean haveInserts = FALSE; struct mafComp *masterMc = maf->components; startChars = sizeChars = srcSizeChars = 0; for (mc = maf->components; mc != NULL; mc = mc->next) { /* Figure out length of source (species) field. */ /*if (mc->size != 0)*/ { char dbOnly[128]; int len; char *org; memset(dbOnly, 0, sizeof(dbOnly)); safef(dbOnly, sizeof(dbOnly), "%s", mc->src); chopPrefix(dbOnly); if ((org = hOrganism(dbOnly)) == NULL) len = strlen(dbOnly); else len = strlen(org); if (srcChars < len) srcChars = len; len = digitsBaseTen(mc->start); if (startChars < len) startChars = len; len = digitsBaseTen(mc->size); if (sizeChars < len) sizeChars = len; len = digitsBaseTen(mc->srcSize); if (srcSizeChars < len) srcSizeChars = len; if (mc->text && (mc->rightStatus == MAF_INSERT_STATUS) && (masterMc->start + masterMc->size < winEnd)) haveInserts = TRUE; #ifdef REVERSESTRAND /* complement bases if hgTracks is on reverse strand */ if (mc->size && cartCgiUsualBoolean(cart, COMPLEMENT_BASES_VAR, FALSE)) complement(mc->text, maf->textSize); #endif } } /* first sequence in the alignment */ referenceText = maf->components->text; for (lineStart = 0; lineStart < maf->textSize; lineStart = lineEnd) { int size; lineEnd = lineStart + lineSize; if (lineEnd >= maf->textSize) lineEnd = maf->textSize; size = lineEnd - lineStart; initSummaryLine(summaryLine, size, '*'); for (mc = maf->components; mc != NULL; mc = mc->next) { char dbOnly[128], *chrom; int s = mc->start; int e = s + mc->size; char *org; char *revComp = ""; char strand = mc->strand; struct dyString *dy = newDyString(512); #ifdef REVERSESTRAND if (cartCgiUsualBoolean(cart, COMPLEMENT_BASES_VAR, FALSE)) strand = (strand == '+') ? '-' : '+'; #endif if (strand == '-') revComp = "&hgSeq.revComp=on"; dyStringClear(dy); safef(dbOnly, sizeof(dbOnly), "%s", mc->src); chrom = chopPrefix(dbOnly); if ((org = hOrganism(dbOnly)) == NULL) org = dbOnly; if (mc->strand == '-') reverseIntRange(&s, &e, mc->srcSize); if (mc->text != NULL) { if (lineStart == 0) { if (hDbIsActive(dbOnly)) { dyStringPrintf(dy, "%s Browser %s:%d-%d %c %*dbps",hOrganism(dbOnly),chrom, s+1, e, mc->strand,sizeChars, mc->size); linkToOtherBrowserTitle(dbOnly, chrom, s, e, dy->string); dyStringClear(dy); fprintf(f, "B</A> "); } else fprintf(f, " "); if (hDbExists(dbOnly)) { dyStringPrintf(dy, "Get %s DNA %s:%d-%d %c %*dbps",hOrganism(dbOnly),chrom, s+1, e, mc->strand,sizeChars, mc->size); printf("<A TITLE=\"%s\" TARGET=\"_blank\" HREF=\"%s?o=%d&g=getDna&i=%s&c=%s&l=%d&r=%d&db=%s%s\">D</A> ", dy->string,hgcName(), s, cgiEncode(chrom), chrom, s, e, dbOnly, revComp); } else fprintf(f, " "); } else { fprintf(f, " "); } dyStringClear(dy); dyStringPrintf(dy, "%s:%d-%d %c %*dbps",chrom, s+1, e, mc->strand,sizeChars, mc->size); fprintf(f, "<A TITLE=\"%s\"> %*s </A> ", dy->string, srcChars, org); updateSummaryLine(summaryLine, referenceText + lineStart, mc->text + lineStart, size); blueCapWrite(f, mc->text + lineStart, size, (onlyDiff && mc != maf->components) ? referenceText + lineStart : NULL); fprintf(f, "\n"); } else { if (((mc->leftStatus == MAF_CONTIG_STATUS) && (mc->rightStatus == MAF_CONTIG_STATUS) ) || ((mc->leftStatus == MAF_TANDEM_STATUS) && (mc->rightStatus == MAF_TANDEM_STATUS) ) || ((mc->leftStatus == MAF_INSERT_STATUS) && (mc->rightStatus == MAF_INSERT_STATUS) ) || ((mc->leftStatus == MAF_MISSING_STATUS) && (mc->rightStatus == MAF_MISSING_STATUS) )) { if (lineStart == 0) { int s = mc->start; int e = s + mc->rightLen; struct dyString *dy = newDyString(512); if (mc->strand == '-') reverseIntRange(&s, &e, mc->srcSize); if ( hDbIsActive(dbOnly)) { dyStringPrintf(dy, "%s Browser %s:%d-%d %c %d bps Unaligned",hOrganism(dbOnly),chrom, s+1, e, mc->strand, e-s); linkToOtherBrowserTitle(dbOnly, chrom, s, e, dy->string); fprintf(f,"B</A> "); dyStringClear(dy); } else fprintf(f," "); if (hDbExists(dbOnly)) { dyStringPrintf(dy, "Get %s DNA %s:%d-%d %c %d bps Unaligned",hOrganism(dbOnly),chrom, s+1, e, mc->strand, e-s); printf("<A TITLE=\"%s\" TARGET=\"_blank\" HREF=\"%s?o=%d&g=getDna&i=%s&c=%s&l=%d&r=%d&db=%s%s\">D</A> ", dy->string, hgcName(), s, cgiEncode(chrom), chrom, s, e, dbOnly,revComp); } else fprintf(f, " "); } else fprintf(f, " "); initSummaryLine(summaryLine, size, ' '); dyStringClear(dy); dyStringPrintf(dy, "%s:%d-%d %c %*dbps",chrom, s+1, e, mc->strand,sizeChars, mc->size); fprintf(f, "<A TITLE=\"%s\">%*s</A> ", dy->string, srcChars, org); ch = '-'; switch(mc->rightStatus) { case MAF_INSERT_STATUS: ch = '='; break; case MAF_MISSING_STATUS: ch = 'N'; break; case MAF_TANDEM_STATUS: case MAF_CONTIG_STATUS: ch = '-'; break; } for(ii=lineStart; ii < lineEnd ; ii++) fputc(ch,f); fprintf(f,"\n"); } } } #ifdef ADDMATCHLINE if (lineStart == 0) fprintf(f, " %-*s %s\n", srcChars, "", summaryLine); else fprintf(f, "%-*s %s\n", srcChars, "", summaryLine); #else fprintf(f, "\n"); #endif } if (haveInserts) { fprintf(f, "<B>Inserts between block %d and %d in window</B>\n",blockNo, blockNo+1); for (mc = maf->components; mc != NULL; mc = mc->next) { char dbOnly[128], *chrom; int s = mc->start + mc->size; int e = s + mc->rightLen; char *org; if (mc->text == NULL) continue; if (mc->strand == '-') reverseIntRange(&s, &e, mc->srcSize); safef(dbOnly, sizeof(dbOnly), "%s", mc->src); chrom = chopPrefix(dbOnly); if ((org = hOrganism(dbOnly)) == NULL) org = dbOnly; if (mc->rightStatus == MAF_INSERT_STATUS) { char *revComp = ""; if (hDbIsActive(dbOnly)) { char strand = mc->strand; #ifdef REVERSESTRAND if (cartCgiUsualBoolean(cart, COMPLEMENT_BASES_VAR, FALSE)) strand = (strand == '+') ? '-' : '+'; #endif if (strand == '-') revComp = "&hgSeq.revComp=on"; linkToOtherBrowser(dbOnly, chrom, s, e); fprintf(f,"B"); fprintf(f, "</A>"); fprintf(f, " "); } else fprintf(f, " "); if (hDbExists(dbOnly)) { printf("<A TARGET=\"_blank\" HREF=\"%s?o=%d&g=getDna&i=%s&c=%s&l=%d&r=%d" "&db=%s%s\">D</A> ", hgcName(), s, cgiEncode(chrom), chrom, s, e, dbOnly,revComp); } else fprintf(f, " "); fprintf(f, "%*s %dbp\n", srcChars, org,mc->rightLen); } } fprintf(f, "\n"); } freeMem(summaryLine); }
void segSplitPairwise(char *inSeg, char *outSeg) /* segSplitPairwise - Split a segment file pairwise. */ { struct segFile *sf = segOpen(inSeg); FILE *of = mustOpen(outSeg, "w"); struct segBlock *sb; struct segBlock newSb; struct segComp *refComp, *sc, *refSc, *newSc; char *splitName = NULL; int sNum, bNum = 1; segWriteStart(of); while ((sb = segNext(sf)) != NULL) { /* Set ref if it wasn't set on the commandline. */ if (ref == NULL && ((ref = segFirstCompSpecies(sb, '.')) == NULL)) errAbort("ref is not set and the first segment block has no " "components."); /* Find and clone the reference species component. */ refComp = segFindCompSpecies(sb, ref, '.'); refSc = cloneSegComp(refComp); sNum = 1; for (sc = sb->components; sc != NULL; sc = sc->next) { if (sc == refComp) continue; /* Clone the current component. */ newSc = cloneSegComp(sc); newSc->next = NULL; /* Point the reference component to the newly cloned component. */ refSc->next = newSc; /* Set up the segment block to hold the two components above. */ if (number) { if (sb->name == NULL) { AllocArray(sb->name, 6 + digitsBaseTen(bNum)); sprintf(sb->name, "Block%d", bNum); } AllocArray(splitName, strlen(sb->name) + digitsBaseTen(sNum) + 2); sprintf(splitName, "%s.%d", sb->name, sNum++); newSb.name = splitName; } else newSb.name = sb->name; newSb.val = sb->val; newSb.components = refSc; /* Output this new block. */ segWrite(of, &newSb); if (number) freeMem(splitName); /* Free the cloned component. */ segCompFree(&newSc); } /* Free the cloned reference component. */ segCompFree(&refSc); segBlockFree(&sb); bNum++; } segWriteEnd(of); carefulClose(&of); segFileFree(&sf); }
void mafWrite(FILE *f, struct mafAli *ali) /* Write next alignment to file. */ { struct mafComp *comp; int srcChars = 0, startChars = 0, sizeChars = 0, srcSizeChars = 0; /* Write out alignment header */ fprintf(f, "a score=%f\n", ali->score); /* Figure out length of each field. */ for (comp = ali->components; comp != NULL; comp = comp->next) { int len = 0; /* a name like '.' will break some tools, so replace it * with a generic name */ if (sameString(comp->src,".")) comp->src=cloneString("defaultName"); len = strlen(comp->src); if (srcChars < len) srcChars = len; len = digitsBaseTen(comp->start); if (startChars < len) startChars = len; len = digitsBaseTen(comp->size); if (sizeChars < len) sizeChars = len; len = digitsBaseTen(comp->srcSize); if (srcSizeChars < len) srcSizeChars = len; } /* Write out each component. */ for (comp = ali->components; comp != NULL; comp = comp->next) { if ((comp->size == 0) && (comp->leftStatus)) fprintf(f, "e %-*s %*d %*d %c %*d %c\n", srcChars, comp->src, startChars, comp->start, sizeChars, comp->leftLen, comp->strand, srcSizeChars, comp->srcSize, comp->leftStatus); else { fprintf(f, "s %-*s %*d %*d %c %*d %s\n", srcChars, comp->src, startChars, comp->start, sizeChars, comp->size, comp->strand, srcSizeChars, comp->srcSize, comp->text); if (comp->quality) fprintf(f, "q %-*s %s\n", srcChars + startChars + sizeChars + srcSizeChars + 5, comp->src, comp->quality); if (comp->leftStatus) fprintf(f,"i %-*s %c %d %c %d\n",srcChars,comp->src, comp->leftStatus,comp->leftLen,comp->rightStatus,comp->rightLen); } } /* Write out blank separator line. */ fprintf(f, "\n"); }