Пример #1
0
static void checkChromNameAndSize(struct lineFile *lf, char *s, unsigned chromEnd)
/* Check that the name is non-empty and exists, and that chromEnd <= chromSize. Abort on error. */
{
unsigned *chromSize;
if (strlen(s) > 0)
    {
    if (chrHash)
	{
	if ( (chromSize = hashFindVal(chrHash, s)) != NULL)
	    {
	    if (chromEnd > *chromSize)
		lineFileAbort(lf, "chromEnd (%d) > chromEnd (%d)\n", chromEnd, *chromSize);
	    return; 
	    }
	else
	    {
	    lineFileAbort(lf, "chrom %s not found", s);
	    }
	}
    else
	{
	return; // chrom name not blank, and not validating against chromInfo
	}
    }
lineFileAbort(lf, "chrom column empty");
}
Пример #2
0
INLINE void noTabixSupport(struct lineFile *lf, char *where)
{
#ifdef USE_TABIX
if (lf->tabix != NULL)
    lineFileAbort(lf, "%s: not implemented for lineFile opened with lineFileTabixMayOpen.", where);
#endif // USE_TABIX
}
Пример #3
0
void bedIntersect(char *aFile, char *bFile, char *outFile)
/* bedIntersect - Intersect two bed files. */
{
struct lineFile *lf = lineFileOpen(aFile, TRUE);
struct hash *bHash = readBed(bFile);
FILE *f = mustOpen(outFile, "w");
char *row[40];
int wordCount;

while ((wordCount = (strictTab ? lineFileChopTab(lf, row) : lineFileChop(lf, row))) != 0)
    {
    char *chrom = row[0];
    int start = lineFileNeedNum(lf, row, 1);
    int end = lineFileNeedNum(lf, row, 2);
    if (start > end)
        errAbort("start after end line %d of %s", lf->lineIx, lf->fileName);
    if (start == end && !allowStartEqualEnd)
	lineFileAbort(lf, "start==end (if this is legit, use -allowStartEqualEnd)");
    struct binKeeper *bk = hashFindVal(bHash, chrom);
    if (bk != NULL)
	{
	struct binElement *hitList = NULL, *hit;
	if (allowStartEqualEnd && start == end)
	    hitList = binKeeperFind(bk, start-1, end+1);
	else
	    hitList = binKeeperFind(bk, start, end);
	if (aHitAny)
	    {
	    for (hit = hitList; hit != NULL; hit = hit->next)
		{
		float cov = getCov(start, end, hit->val);
		if (cov >= minCoverage)
		    {
		    outputBed(f, row, wordCount, start, end, hit->val);
		    break;
		    }
		else
		    {
		    struct bed5 *b = hit->val;
		    verbose(1, "filter out %s %d %d %d %d overlap %d %d %d %.3f\n",
			    chrom, start, end, b->start, b->end,
			    positiveRangeIntersection(start, end, b->start, b->end),
			    end-start, b->end-b->start, cov);
		    }
		}
	    }
	else
	    {
	    for (hit = hitList; hit != NULL; hit = hit->next)
	        {
		if (getCov(start, end, hit->val) >= minCoverage)
		    outputBed(f, row, wordCount, start, end, hit->val);
		}
	    }
	slFreeList(&hitList);
	}
    }
}
Пример #4
0
struct hash *readBed(char *fileName)
/* Read bed and return it as a hash keyed by chromName
 * with binKeeper values. */
{
char *row[5];
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *hash = newHash(0);
int expectedCols = bScore ? 5 : 3;

while (lineFileNextRow(lf, row, expectedCols))
    {
    struct binKeeper *bk;
    struct bed5 *bed;
    struct hashEl *hel = hashLookup(hash, row[0]);
    if (hel == NULL)
       {
       bk = binKeeperNew(0, 1024*1024*1024);
       hel = hashAdd(hash, row[0], bk);
       }
    bk = hel->val;
    AllocVar(bed);
    bed->chrom = hel->name;
    bed->start = lineFileNeedNum(lf, row, 1);
    bed->end = lineFileNeedNum(lf, row, 2);
    if (bScore)
	bed->score = lineFileNeedNum(lf, row, 4);
    if (bed->start > bed->end)
        errAbort("start after end line %d of %s", lf->lineIx, lf->fileName);
    if (bed->start == bed->end)
	{
	if (allowStartEqualEnd)
	    // Note we are tweaking binKeeper coords here, so use bed->start and bed->end.
	    binKeeperAdd(bk, max(0, bed->start-1), bed->end+1, bed);
	else
	    lineFileAbort(lf, "start==end (if this is legit, use -allowStartEqualEnd)");
	}
    else
	binKeeperAdd(bk, bed->start, bed->end, bed);
    }
lineFileClose(&lf);
return hash;
}
Пример #5
0
struct encodePeak *encodePeakLineFileLoad(char **row, enum encodePeakType pt, struct lineFile *lf)
/* From a linefile line, load an encodePeak row.  Errors outputted */
/* have line numbers, etc. Does more error checking as well. */
{
struct encodePeak *peak;
if (!pt)
    errAbort("Unknown peak type set for track");
AllocVar(peak);
peak->chrom = cloneString(row[0]);
peak->chromStart = lineFileNeedNum(lf, row, 1);
peak->chromEnd = lineFileNeedNum(lf, row, 2);
peak->peak = -1;
if (peak->chromEnd < 1)
    lineFileAbort(lf, "chromEnd less than 1 (%d)", peak->chromEnd);
if (peak->chromEnd < peak->chromStart)
    lineFileAbort(lf, "chromStart after chromEnd (%d > %d)", 
    	peak->chromStart, peak->chromEnd);
peak->name = cloneString(row[3]);
peak->score = lineFileNeedNum(lf, row, 4);
safecpy(peak->strand, sizeof(peak->strand), row[5]);
if (peak->strand[0] != '+' && peak->strand[0] != '-' && peak->strand[0] != '.')
    lineFileAbort(lf, "Expecting +, -, or . in strand");
if (pt != gappedPeak)
/* deal with signalValue, pValue, qValue, and peak */
    {
    peak->signalValue = (float)lineFileNeedDouble(lf, row, 6);
    peak->pValue = (float)lineFileNeedDouble(lf, row, 7);
    peak->qValue = (float)lineFileNeedDouble(lf, row, 8);
    if ((pt == narrowPeak) || (pt == encodePeak))
	{	
	peak->peak = lineFileNeedNum(lf, row, 9);
	if (peak->peak >= (int)peak->chromEnd)
	    lineFileAbort(lf, "peak site past chromEnd (%d > %d)", peak->peak, peak->chromEnd);
	}
    }
else  /* must be gappedPeak */
/* deal with thickStart, thickEnd, itemRgb even though they're not used */
    {
    int thickStart = lineFileNeedNum(lf, row, 6);
    int thickEnd = lineFileNeedNum(lf, row, 7);
    int itemRgb = 0;
    char *comma;
    /*	Allow comma separated list of rgb values here	*/
    comma = strchr(row[8], ',');
    if (comma)
	itemRgb = bedParseRgb(row[8]);
    else
	itemRgb = lineFileNeedNum(lf, row, 8);
    if ((thickStart != 0) || (thickEnd != 0) || (itemRgb != 0))
	lineFileAbort(lf, "thickStart, thickEnd, and itemRgb columns not used in gappedPeak type, set all to 0");
    }
/* Deal with blocks */
if ((pt == gappedPeak) || (pt == encodePeak))
    {
    int i, count;
    int lastEnd, lastStart;
    int blockCountIx, blockSizesIx, blockStartsIx;
    if (pt == gappedPeak)
	{
	blockCountIx = 9;
	blockSizesIx = 10;
	blockStartsIx = 11;
	}
    else
	{
	blockCountIx = 10;
	blockSizesIx = 11;
	blockStartsIx = 12;
	}
    peak->blockCount = lineFileNeedNum(lf, row, blockCountIx);
    sqlUnsignedDynamicArray(row[blockSizesIx], &peak->blockSizes, &count);
    if (count != peak->blockCount)
	lineFileAbort(lf,  "expecting %d elements in array", peak->blockCount);
    sqlUnsignedDynamicArray(row[blockStartsIx], &peak->blockStarts, &count);
    if (count != peak->blockCount)
	lineFileAbort(lf, "expecting %d elements in array", peak->blockCount);
    // tell the user if they appear to be using absolute starts rather than 
    // relative... easy to forget!  Also check block order, coord ranges...
    lastStart = -1;
    lastEnd = 0;
    for (i=0;  i < peak->blockCount;  i++)
	{
	if (peak->blockStarts[i]+peak->chromStart >= peak->chromEnd)
	    {
	    if (peak->blockStarts[i] >= peak->chromStart)
		lineFileAbort(lf, 
		    "BED blockStarts offsets must be relative to chromStart, "
		    "not absolute.  Try subtracting chromStart from each offset "
		    "in blockStarts.");
	    else
		lineFileAbort(lf, 
		    "BED blockStarts[i]+chromStart must be less than chromEnd.");
	    }
	lastStart = peak->blockStarts[i];
	lastEnd = peak->chromStart + peak->blockStarts[i] + peak->blockSizes[i];
	}
    if (peak->blockStarts[0] != 0)
	lineFileAbort(lf, 
	    "BED blocks must span chromStart to chromEnd.  "
	    "BED blockStarts[0] must be 0 (==%d) so that (chromStart + "
	    "blockStarts[0]) equals chromStart.", peak->blockStarts[0]);
    i = peak->blockCount-1;
    if ((peak->chromStart + peak->blockStarts[i] + peak->blockSizes[i]) !=
	peak->chromEnd)
	{
	lineFileAbort(lf, 
	    "BED blocks must span chromStart to chromEnd.  (chromStart + "
	    "blockStarts[last] + blockSizes[last]) must equal chromEnd.");
	}
    }
if (pt == gappedPeak)
    /* deal with final three columns of a gappedPeak */
    {
    peak->signalValue = (float)lineFileNeedDouble(lf, row, 12);
    peak->pValue = (float)lineFileNeedDouble(lf, row, 13);
    peak->qValue = (float)lineFileNeedDouble(lf, row, 14);    
    }
return peak;
}