void wigSort(char *input, char *output) /* wigSort - Sort a wig file.. */ { struct lineFile *lf = lineFileOpen(input, TRUE); struct pos *pos, *posList = NULL; char *line; while (lineFileNextReal(lf, &line)) { verbose(2, "processing %s\n", line); AllocVar(pos); pos->fileOffset = lineFileTell(lf); if (posList != NULL) posList->fileSize = pos->fileOffset - posList->fileOffset; slAddHead(&posList, pos); if (stringIn("chrom=", line)) { parseSteppedSection(lf, line, pos); } else { /* Check for bed... */ char *words[5]; int wordCount = chopLine(line, words); if (wordCount != 4) errAbort("Unrecognized format line %d of %s:\n", lf->lineIx, lf->fileName); pos->chrom = cloneString(words[0]); pos->start = lineFileNeedNum(lf, words, 1); } } if (posList != NULL) { posList->fileSize = lineFileTell(lf) - posList->fileOffset; slReverse(&posList); slSort(&posList, posCmp); } lineFileClose(&lf); FILE *in = mustOpen(input, "r"); FILE *out = mustOpen(output, "w"); for (pos = posList; pos != NULL; pos = pos->next) { fseek(in, pos->fileOffset, SEEK_SET); copyFileBytes(in, out, pos->fileSize); } carefulClose(&in); carefulClose(&out); }
struct bwgSection *bwgParseWig( char *fileName, /* Name of ascii wig file. */ boolean clipDontDie, /* Skip items outside chromosome rather than aborting. */ struct hash *chromSizeHash, /* If non-NULL items checked to be inside chromosome. */ int maxSectionSize, /* Biggest size of a section. 100 - 100,000 is usual range. */ struct lm *lm) /* Memory pool to allocate from. */ /* Parse out ascii wig file - allocating memory in lm. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; struct bwgSection *sectionList = NULL; /* remove initial browser and track lines */ lineFileRemoveInitialCustomTrackLines(lf); while (lineFileNextReal(lf, &line)) { verbose(2, "processing %s\n", line); if (stringIn("chrom=", line)) parseSteppedSection(lf, clipDontDie, chromSizeHash, line, lm, maxSectionSize, §ionList); else { /* Check for bed... */ char *dupe = cloneString(line); char *words[5]; int wordCount = chopLine(dupe, words); if (wordCount != 4) errAbort("Unrecognized line %d of %s:\n%s\n", lf->lineIx, lf->fileName, line); /* Parse out a bed graph line just to check numerical format. */ char *chrom = words[0]; int start = lineFileNeedNum(lf, words, 1); int end = lineFileNeedNum(lf, words, 2); double val = lineFileNeedDouble(lf, words, 3); verbose(2, "bedGraph %s:%d-%d@%g\n", chrom, start, end, val); /* Push back line and call bed parser. */ lineFileReuse(lf); parseBedGraphSection(lf, clipDontDie, chromSizeHash, lm, maxSectionSize, §ionList); } } slSort(§ionList, bwgSectionCmp); /* Check for overlap at section level. */ struct bwgSection *section, *nextSection; for (section = sectionList; section != NULL; section = nextSection) { nextSection = section->next; if (nextSection != NULL) { if (sameString(section->chrom, nextSection->chrom)) { if (section->end > nextSection->start) { errAbort("There's more than one value for %s base %d (in coordinates that start with 1).\n", section->chrom, nextSection->start+1); } } } } return sectionList; }