void wigSort(char *input, char *output)
/* wigSort - Sort a wig file.. */
{
struct lineFile *lf = lineFileOpen(input, TRUE);
struct pos *pos, *posList = NULL;
char *line;
while (lineFileNextReal(lf, &line))
    {
    verbose(2, "processing %s\n", line);
    AllocVar(pos);
    pos->fileOffset = lineFileTell(lf);
    if (posList != NULL)
        posList->fileSize = pos->fileOffset - posList->fileOffset;
    slAddHead(&posList, pos);
    if (stringIn("chrom=", line))
	{
	parseSteppedSection(lf, line, pos);
	}
    else
        {
	/* Check for bed... */
	char *words[5];
	int wordCount = chopLine(line, words);
	if (wordCount != 4)
	    errAbort("Unrecognized format line %d of %s:\n", lf->lineIx, lf->fileName);
	pos->chrom = cloneString(words[0]);
	pos->start = lineFileNeedNum(lf, words, 1);
	}
    }
if (posList != NULL)
    {
    posList->fileSize = lineFileTell(lf) - posList->fileOffset;
    slReverse(&posList);
    slSort(&posList, posCmp);
    }
lineFileClose(&lf);

FILE *in = mustOpen(input, "r");
FILE *out = mustOpen(output, "w");
for (pos = posList; pos != NULL; pos = pos->next)
    {
    fseek(in, pos->fileOffset, SEEK_SET);
    copyFileBytes(in, out, pos->fileSize);
    }
carefulClose(&in);
carefulClose(&out);
}
Пример #2
0
struct bwgSection *bwgParseWig(
	char *fileName,       /* Name of ascii wig file. */
	boolean clipDontDie,  /* Skip items outside chromosome rather than aborting. */
	struct hash *chromSizeHash,  /* If non-NULL items checked to be inside chromosome. */
	int maxSectionSize,   /* Biggest size of a section.  100 - 100,000 is usual range. */
	struct lm *lm)	      /* Memory pool to allocate from. */
/* Parse out ascii wig file - allocating memory in lm. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
struct bwgSection *sectionList = NULL;

/* remove initial browser and track lines */
lineFileRemoveInitialCustomTrackLines(lf);

while (lineFileNextReal(lf, &line))
    {
    verbose(2, "processing %s\n", line);
    if (stringIn("chrom=", line))
	parseSteppedSection(lf, clipDontDie, chromSizeHash, line, lm, maxSectionSize, &sectionList);
    else
        {
	/* Check for bed... */
	char *dupe = cloneString(line);
	char *words[5];
	int wordCount = chopLine(dupe, words);
	if (wordCount != 4)
	    errAbort("Unrecognized line %d of %s:\n%s\n", lf->lineIx, lf->fileName, line);

	/* Parse out a bed graph line just to check numerical format. */
	char *chrom = words[0];
	int start = lineFileNeedNum(lf, words, 1);
	int end = lineFileNeedNum(lf, words, 2);
	double val = lineFileNeedDouble(lf, words, 3);
	verbose(2, "bedGraph %s:%d-%d@%g\n", chrom, start, end, val);

	/* Push back line and call bed parser. */
	lineFileReuse(lf);
	parseBedGraphSection(lf, clipDontDie, chromSizeHash, lm, maxSectionSize, &sectionList);
	}
    }
slSort(&sectionList, bwgSectionCmp);

/* Check for overlap at section level. */
struct bwgSection *section, *nextSection;
for (section = sectionList; section != NULL; section = nextSection)
    {
    nextSection = section->next;
    if (nextSection != NULL)
        {
	if (sameString(section->chrom, nextSection->chrom))
	    {
	    if (section->end > nextSection->start)
	        {
		errAbort("There's more than one value for %s base %d (in coordinates that start with 1).\n",
		    section->chrom, nextSection->start+1);
		}
	    }
	}
    }

return sectionList;
}