static void checkChromNameAndSize(struct lineFile *lf, char *s, unsigned chromEnd) /* Check that the name is non-empty and exists, and that chromEnd <= chromSize. Abort on error. */ { unsigned *chromSize; if (strlen(s) > 0) { if (chrHash) { if ( (chromSize = hashFindVal(chrHash, s)) != NULL) { if (chromEnd > *chromSize) lineFileAbort(lf, "chromEnd (%d) > chromEnd (%d)\n", chromEnd, *chromSize); return; } else { lineFileAbort(lf, "chrom %s not found", s); } } else { return; // chrom name not blank, and not validating against chromInfo } } lineFileAbort(lf, "chrom column empty"); }
INLINE void noTabixSupport(struct lineFile *lf, char *where) { #ifdef USE_TABIX if (lf->tabix != NULL) lineFileAbort(lf, "%s: not implemented for lineFile opened with lineFileTabixMayOpen.", where); #endif // USE_TABIX }
void bedIntersect(char *aFile, char *bFile, char *outFile) /* bedIntersect - Intersect two bed files. */ { struct lineFile *lf = lineFileOpen(aFile, TRUE); struct hash *bHash = readBed(bFile); FILE *f = mustOpen(outFile, "w"); char *row[40]; int wordCount; while ((wordCount = (strictTab ? lineFileChopTab(lf, row) : lineFileChop(lf, row))) != 0) { char *chrom = row[0]; int start = lineFileNeedNum(lf, row, 1); int end = lineFileNeedNum(lf, row, 2); if (start > end) errAbort("start after end line %d of %s", lf->lineIx, lf->fileName); if (start == end && !allowStartEqualEnd) lineFileAbort(lf, "start==end (if this is legit, use -allowStartEqualEnd)"); struct binKeeper *bk = hashFindVal(bHash, chrom); if (bk != NULL) { struct binElement *hitList = NULL, *hit; if (allowStartEqualEnd && start == end) hitList = binKeeperFind(bk, start-1, end+1); else hitList = binKeeperFind(bk, start, end); if (aHitAny) { for (hit = hitList; hit != NULL; hit = hit->next) { float cov = getCov(start, end, hit->val); if (cov >= minCoverage) { outputBed(f, row, wordCount, start, end, hit->val); break; } else { struct bed5 *b = hit->val; verbose(1, "filter out %s %d %d %d %d overlap %d %d %d %.3f\n", chrom, start, end, b->start, b->end, positiveRangeIntersection(start, end, b->start, b->end), end-start, b->end-b->start, cov); } } } else { for (hit = hitList; hit != NULL; hit = hit->next) { if (getCov(start, end, hit->val) >= minCoverage) outputBed(f, row, wordCount, start, end, hit->val); } } slFreeList(&hitList); } } }
struct hash *readBed(char *fileName) /* Read bed and return it as a hash keyed by chromName * with binKeeper values. */ { char *row[5]; struct lineFile *lf = lineFileOpen(fileName, TRUE); struct hash *hash = newHash(0); int expectedCols = bScore ? 5 : 3; while (lineFileNextRow(lf, row, expectedCols)) { struct binKeeper *bk; struct bed5 *bed; struct hashEl *hel = hashLookup(hash, row[0]); if (hel == NULL) { bk = binKeeperNew(0, 1024*1024*1024); hel = hashAdd(hash, row[0], bk); } bk = hel->val; AllocVar(bed); bed->chrom = hel->name; bed->start = lineFileNeedNum(lf, row, 1); bed->end = lineFileNeedNum(lf, row, 2); if (bScore) bed->score = lineFileNeedNum(lf, row, 4); if (bed->start > bed->end) errAbort("start after end line %d of %s", lf->lineIx, lf->fileName); if (bed->start == bed->end) { if (allowStartEqualEnd) // Note we are tweaking binKeeper coords here, so use bed->start and bed->end. binKeeperAdd(bk, max(0, bed->start-1), bed->end+1, bed); else lineFileAbort(lf, "start==end (if this is legit, use -allowStartEqualEnd)"); } else binKeeperAdd(bk, bed->start, bed->end, bed); } lineFileClose(&lf); return hash; }
struct encodePeak *encodePeakLineFileLoad(char **row, enum encodePeakType pt, struct lineFile *lf) /* From a linefile line, load an encodePeak row. Errors outputted */ /* have line numbers, etc. Does more error checking as well. */ { struct encodePeak *peak; if (!pt) errAbort("Unknown peak type set for track"); AllocVar(peak); peak->chrom = cloneString(row[0]); peak->chromStart = lineFileNeedNum(lf, row, 1); peak->chromEnd = lineFileNeedNum(lf, row, 2); peak->peak = -1; if (peak->chromEnd < 1) lineFileAbort(lf, "chromEnd less than 1 (%d)", peak->chromEnd); if (peak->chromEnd < peak->chromStart) lineFileAbort(lf, "chromStart after chromEnd (%d > %d)", peak->chromStart, peak->chromEnd); peak->name = cloneString(row[3]); peak->score = lineFileNeedNum(lf, row, 4); safecpy(peak->strand, sizeof(peak->strand), row[5]); if (peak->strand[0] != '+' && peak->strand[0] != '-' && peak->strand[0] != '.') lineFileAbort(lf, "Expecting +, -, or . in strand"); if (pt != gappedPeak) /* deal with signalValue, pValue, qValue, and peak */ { peak->signalValue = (float)lineFileNeedDouble(lf, row, 6); peak->pValue = (float)lineFileNeedDouble(lf, row, 7); peak->qValue = (float)lineFileNeedDouble(lf, row, 8); if ((pt == narrowPeak) || (pt == encodePeak)) { peak->peak = lineFileNeedNum(lf, row, 9); if (peak->peak >= (int)peak->chromEnd) lineFileAbort(lf, "peak site past chromEnd (%d > %d)", peak->peak, peak->chromEnd); } } else /* must be gappedPeak */ /* deal with thickStart, thickEnd, itemRgb even though they're not used */ { int thickStart = lineFileNeedNum(lf, row, 6); int thickEnd = lineFileNeedNum(lf, row, 7); int itemRgb = 0; char *comma; /* Allow comma separated list of rgb values here */ comma = strchr(row[8], ','); if (comma) itemRgb = bedParseRgb(row[8]); else itemRgb = lineFileNeedNum(lf, row, 8); if ((thickStart != 0) || (thickEnd != 0) || (itemRgb != 0)) lineFileAbort(lf, "thickStart, thickEnd, and itemRgb columns not used in gappedPeak type, set all to 0"); } /* Deal with blocks */ if ((pt == gappedPeak) || (pt == encodePeak)) { int i, count; int lastEnd, lastStart; int blockCountIx, blockSizesIx, blockStartsIx; if (pt == gappedPeak) { blockCountIx = 9; blockSizesIx = 10; blockStartsIx = 11; } else { blockCountIx = 10; blockSizesIx = 11; blockStartsIx = 12; } peak->blockCount = lineFileNeedNum(lf, row, blockCountIx); sqlUnsignedDynamicArray(row[blockSizesIx], &peak->blockSizes, &count); if (count != peak->blockCount) lineFileAbort(lf, "expecting %d elements in array", peak->blockCount); sqlUnsignedDynamicArray(row[blockStartsIx], &peak->blockStarts, &count); if (count != peak->blockCount) lineFileAbort(lf, "expecting %d elements in array", peak->blockCount); // tell the user if they appear to be using absolute starts rather than // relative... easy to forget! Also check block order, coord ranges... lastStart = -1; lastEnd = 0; for (i=0; i < peak->blockCount; i++) { if (peak->blockStarts[i]+peak->chromStart >= peak->chromEnd) { if (peak->blockStarts[i] >= peak->chromStart) lineFileAbort(lf, "BED blockStarts offsets must be relative to chromStart, " "not absolute. Try subtracting chromStart from each offset " "in blockStarts."); else lineFileAbort(lf, "BED blockStarts[i]+chromStart must be less than chromEnd."); } lastStart = peak->blockStarts[i]; lastEnd = peak->chromStart + peak->blockStarts[i] + peak->blockSizes[i]; } if (peak->blockStarts[0] != 0) lineFileAbort(lf, "BED blocks must span chromStart to chromEnd. " "BED blockStarts[0] must be 0 (==%d) so that (chromStart + " "blockStarts[0]) equals chromStart.", peak->blockStarts[0]); i = peak->blockCount-1; if ((peak->chromStart + peak->blockStarts[i] + peak->blockSizes[i]) != peak->chromEnd) { lineFileAbort(lf, "BED blocks must span chromStart to chromEnd. (chromStart + " "blockStarts[last] + blockSizes[last]) must equal chromEnd."); } } if (pt == gappedPeak) /* deal with final three columns of a gappedPeak */ { peak->signalValue = (float)lineFileNeedDouble(lf, row, 12); peak->pValue = (float)lineFileNeedDouble(lf, row, 13); peak->qValue = (float)lineFileNeedDouble(lf, row, 14); } return peak; }