void chainIndex(char *inChain, char *outIndex) /* chainIndex - Create simple two column file index for chain. */ { struct lineFile *lf = lineFileOpen(inChain, TRUE); FILE *f = mustOpen(outIndex, "w"); struct chain *chain, *lastChain = NULL; long pos = 0; struct hash *uniqHash = hashNew(16); while ((chain = chainRead(lf)) != NULL) { if (lastChain == NULL || !sameString(chain->tName, lastChain->tName)) { if (hashLookup(uniqHash, chain->tName)) { errAbort("%s is not sorted, %s repeated with intervening %s", inChain, chain->tName, lastChain->tName); } hashAddInt(uniqHash, chain->tName, pos); fprintf(f, "%lx\t%s\n", pos, chain->tName); } chainFree(&lastChain); lastChain = chain; pos = lineFileTell(lf); } }
void getOffset(char *directoryName, char *chromName, char *outputFileName) { FILE *outputFileHandle = mustOpen(outputFileName, "w"); struct lineFile *lf = NULL; char *line; off_t offset; char *row[9], *rsId[2]; char inputFileName[64]; safef(inputFileName, sizeof(inputFileName), "%s/%s.fa", directoryName, chromName); lf = lineFileOpen(inputFileName, TRUE); while (lineFileNext(lf, &line, NULL)) { if (line[0] == '>') { chopString(line, "|", row, ArraySize(row)); chopString(row[2], " ", rsId, ArraySize(rsId)); offset = lineFileTell(lf); fprintf(outputFileHandle, "%s\t%s\t%ld\n", rsId[0], chromName, offset); } } carefulClose(&outputFileHandle); lineFileClose(&lf); }
void wigSort(char *input, char *output) /* wigSort - Sort a wig file.. */ { struct lineFile *lf = lineFileOpen(input, TRUE); struct pos *pos, *posList = NULL; char *line; while (lineFileNextReal(lf, &line)) { verbose(2, "processing %s\n", line); AllocVar(pos); pos->fileOffset = lineFileTell(lf); if (posList != NULL) posList->fileSize = pos->fileOffset - posList->fileOffset; slAddHead(&posList, pos); if (stringIn("chrom=", line)) { parseSteppedSection(lf, line, pos); } else { /* Check for bed... */ char *words[5]; int wordCount = chopLine(line, words); if (wordCount != 4) errAbort("Unrecognized format line %d of %s:\n", lf->lineIx, lf->fileName); pos->chrom = cloneString(words[0]); pos->start = lineFileNeedNum(lf, words, 1); } } if (posList != NULL) { posList->fileSize = lineFileTell(lf) - posList->fileOffset; slReverse(&posList); slSort(&posList, posCmp); } lineFileClose(&lf); FILE *in = mustOpen(input, "r"); FILE *out = mustOpen(output, "w"); for (pos = posList; pos != NULL; pos = pos->next) { fseek(in, pos->fileOffset, SEEK_SET); copyFileBytes(in, out, pos->fileSize); } carefulClose(&in); carefulClose(&out); }
struct chromIxRange *chromIxRangeLoadAll(char *fileName, bits64 *retFileSize) /* Load all chromIxRange from a whitespace-separated file. * Dispose of this with chromIxRangeFreeList(). */ { struct chromIxRange *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[3]; while (lineFileRow(lf, row)) { AllocVar(el); el->chromIx = sqlUnsigned(row[0]); el->start = sqlUnsigned(row[1]); el->end = sqlUnsigned(row[2]); el->fileOffset = lineFileTell(lf); slAddHead(&list, el); } *retFileSize = lineFileTell(lf); lineFileClose(&lf); slReverse(&list); return list; }
void axtIndex(char *in, char *out) /* axtIndex - Create summary file for axt. */ { struct lineFile *lf = lineFileOpen(in, TRUE); FILE *f = mustOpen(out, "w"); struct axt *axt; for (;;) { off_t pos = lineFileTell(lf); axt = axtRead(lf); if (axt == NULL) break; fprintf(f, "%d %d %lld\n", axt->tStart, axt->tEnd - axt->tStart, (unsigned long long) pos); axtFree(&axt); } carefulClose(&f); }
void addFa(char *file, struct hash *fileHash, struct hash *seqHash) /* Add a fa file to hashes. */ { struct lineFile *lf = lineFileOpen(file, TRUE); char *line, *name; char *rFile = hashStoreName(fileHash, file); while (lineFileNext(lf, &line, NULL)) { if (line[0] == '>') { struct seqFilePos *sfp; line += 1; name = nextWord(&line); if (name == NULL) errAbort("bad line %d of %s", lf->lineIx, lf->fileName); AllocVar(sfp); hashAddSaveName(seqHash, name, sfp, &sfp->name); sfp->file = rFile; sfp->pos = lineFileTell(lf); } } lineFileClose(&lf); }
struct segBlock *segNextWithPos(struct segFile *sf, off_t *retOffset) /* Return next segment in segment file or NULL if at end. If retOffset * is not NULL, return start offset of record in file. */ { struct lineFile *lf = sf->lf; struct segBlock *block; struct segComp *comp, *tail = NULL; char *line, *name, *row[6], *val, *word; int wordCount; /* Loop until we get a segment paragraph or reach end of file. */ for (;;) { /* Get segment header line. If it's not there assume end of file. */ if (!nextLine(lf, &line)) { lineFileClose(&sf->lf); return NULL; } /* Parse segment header line. */ word = nextWord(&line); if (word == NULL) continue; /* Ignore blank lines. */ if (sameString(word, "b")) { if (retOffset != NULL) *retOffset = lineFileTell(sf->lf); AllocVar(block); /* Parse name=val. */ while ((word = nextWord(&line)) != NULL) { name = word; val = strchr(word, '='); if (val == NULL) errAbort("Missing = after %s line %d of %s", name, lf->lineIx, lf->fileName); *val++ = 0; if (sameString(name, "name")) block->name = cloneString(val); else if (sameString(name, "val")) block->val = atoi(val); } /* Parse segment components until blank line. */ for (;;) { if (!nextLine(lf, &line)) errAbort("Unexpected end of file %s", lf->fileName); word = nextWord(&line); if (word == NULL) break; if (sameString(word, "s")) { /* Chop line up by white space. This involves a few +=1's * because we have already chopped out the first word. */ row[0] = word; wordCount = chopByWhite(line, row+1, ArraySize(row)-1) +1; lineFileExpectWords(lf, ArraySize(row), wordCount); AllocVar(comp); /* Convert ASCII text representation to segComp structure. */ comp->src = cloneString(row[1]); comp->start = lineFileNeedNum(lf, row, 2); comp->size = lineFileNeedNum(lf, row, 3); comp->strand = row[4][0]; comp->srcSize = lineFileNeedNum(lf, row, 5); /* Do some sanity checking. */ if (comp->size <= 0 || comp->srcSize <= 0) errAbort("Got a negative or zero size line %d of %s", lf->lineIx, lf->fileName); if (comp->start < 0 || comp->start + comp->size > comp->srcSize) errAbort("Coordinates out of range line %d of %s", lf->lineIx, lf->fileName); if (comp->strand != '+' && comp->strand != '-') errAbort("Invalid strand line %d of %s", lf->lineIx, lf->fileName); /* Add the new component to the current list. */ if (block->components == NULL) block->components = comp; else tail->next = comp; tail = comp; } } return block; } else /* Skip over paragraph we don't understand. */ { for (;;) { if (!nextLine(lf, &line)) return NULL; if (nextWord(&line) == NULL) break; } } } }
struct mafAli *mafNextWithPos(struct mafFile *mf, off_t *retOffset) /* Return next alignment in FILE or NULL if at end. If retOffset is * nonNULL, return start offset of record in file. */ { struct lineFile *lf = mf->lf; struct mafAli *ali; char *line, *word; /* Loop until get an alignment paragraph or reach end of file. */ for (;;) { /* Get alignment header line. If it's not there assume end of file. */ if (!nextLine(lf, &line)) { lineFileClose(&mf->lf); return NULL; } /* Parse alignment header line. */ word = nextWord(&line); if (word == NULL) continue; /* Ignore blank lines. */ if (sameString(word, "a")) { if (retOffset != NULL) *retOffset = lineFileTell(mf->lf); AllocVar(ali); while ((word = nextWord(&line)) != NULL) { /* Parse name=val. */ char *name = word; char *val = strchr(word, '='); if (val == NULL) errAbort("Missing = after %s line 1 of %s", name, lf->fileName); *val++ = 0; if (sameString(name, "score")) ali->score = atof(val); } /* Parse alignment components until blank line. */ for (;;) { if (!nextLine(lf, &line)) errAbort("Unexpected end of file %s", lf->fileName); word = nextWord(&line); if (word == NULL) break; if (sameString(word, "s") || sameString(word, "e")) { struct mafComp *comp; int wordCount; char *row[7]; int textSize; /* Chop line up by white space. This involves a few +-1's because * have already chopped out first word. */ row[0] = word; wordCount = chopByWhite(line, row+1, ArraySize(row)-1) + 1; /* +-1 because of "s" */ lineFileExpectWords(lf, ArraySize(row), wordCount); AllocVar(comp); /* Convert ascii text representation to mafComp structure. */ comp->src = cloneString(row[1]); comp->srcSize = lineFileNeedNum(lf, row, 5); comp->strand = row[4][0]; comp->start = lineFileNeedNum(lf, row, 2); if (sameString(word, "e")) { comp->size = 0; comp->rightLen = comp->leftLen = lineFileNeedNum(lf, row, 3); comp->rightStatus = comp->leftStatus = *row[6]; } else { comp->size = lineFileNeedNum(lf, row, 3); comp->text = cloneString(row[6]); textSize = strlen(comp->text); /* Fill in ali->text size. */ if (ali->textSize == 0) ali->textSize = textSize; else if (ali->textSize != textSize) errAbort("Text size inconsistent (%d vs %d) line %d of %s", textSize, ali->textSize, lf->lineIx, lf->fileName); } /* Do some sanity checking. */ if (comp->srcSize < 0 || comp->size < 0) errAbort("Got a negative size line %d of %s", lf->lineIx, lf->fileName); if (comp->start < 0 || comp->start + comp->size > comp->srcSize) errAbort("Coordinates out of range line %d of %s", lf->lineIx, lf->fileName); /* Add component to head of list. */ slAddHead(&ali->components, comp); } if (sameString(word, "i")) { struct mafComp *comp; int wordCount; char *row[6]; /* Chop line up by white space. This involves a few +-1's because * have already chopped out first word. */ row[0] = word; wordCount = chopByWhite(line, row+1, ArraySize(row)-1) + 1; /* +-1 because of "s" */ lineFileExpectWords(lf, ArraySize(row), wordCount); if (!sameString(row[1],ali->components->src)) errAbort("i line src mismatch: i is %s :: s is %s\n", row[1], ali->components->src); comp = ali->components; comp->leftStatus = *row[2]; comp->leftLen = atoi(row[3]); comp->rightStatus = *row[4]; comp->rightLen = atoi(row[5]); } if (sameString(word, "q")) { struct mafComp *comp; int wordCount; char *row[3]; /* Chop line up by white space. This involves a few +-1's because * have already chopped out first word. */ row[0] = word; wordCount = chopByWhite(line, row+1, ArraySize(row)-1) + 1; /* +-1 because of "s" */ lineFileExpectWords(lf, ArraySize(row), wordCount); if (!sameString(row[1],ali->components->src)) errAbort("q line src mismatch: q is %s :: s is %s\n", row[1], ali->components->src); comp = ali->components; comp->quality = cloneString(row[2]); } } slReverse(&ali->components); return ali; } else /* Skip over paragraph we don't understand. */ { for (;;) { if (!nextLine(lf, &line)) return NULL; if (nextWord(&line) == NULL) break; } } } }