Ejemplo n.º 1
0
void chainIndex(char *inChain, char *outIndex)
/* chainIndex - Create simple two column file index for chain. */
{
struct lineFile *lf = lineFileOpen(inChain, TRUE);
FILE *f = mustOpen(outIndex, "w");
struct chain *chain, *lastChain = NULL;
long pos = 0;
struct hash *uniqHash = hashNew(16);

while ((chain = chainRead(lf)) != NULL)
    {
    if (lastChain == NULL || !sameString(chain->tName, lastChain->tName))
	{
	if (hashLookup(uniqHash, chain->tName))
	    {
	    errAbort("%s is not sorted, %s repeated with intervening %s", 
	    	inChain, chain->tName, lastChain->tName);
	    }
	hashAddInt(uniqHash, chain->tName, pos);
        fprintf(f, "%lx\t%s\n", pos, chain->tName);
	}
    chainFree(&lastChain);
    lastChain = chain;
    pos = lineFileTell(lf);
    }
}
void getOffset(char *directoryName, char *chromName, char *outputFileName)
{
FILE *outputFileHandle = mustOpen(outputFileName, "w");
struct lineFile *lf = NULL;
char *line;
off_t offset;
char *row[9], *rsId[2];
char inputFileName[64];

safef(inputFileName, sizeof(inputFileName), "%s/%s.fa", directoryName, chromName);
lf = lineFileOpen(inputFileName, TRUE);
while (lineFileNext(lf, &line, NULL))
    {
    if (line[0] == '>')
        {
	chopString(line, "|", row, ArraySize(row));
        chopString(row[2], " ", rsId, ArraySize(rsId));
	offset = lineFileTell(lf);
	fprintf(outputFileHandle, "%s\t%s\t%ld\n", rsId[0], chromName, offset);
	}
    }

carefulClose(&outputFileHandle);
lineFileClose(&lf);
}
void wigSort(char *input, char *output)
/* wigSort - Sort a wig file.. */
{
struct lineFile *lf = lineFileOpen(input, TRUE);
struct pos *pos, *posList = NULL;
char *line;
while (lineFileNextReal(lf, &line))
    {
    verbose(2, "processing %s\n", line);
    AllocVar(pos);
    pos->fileOffset = lineFileTell(lf);
    if (posList != NULL)
        posList->fileSize = pos->fileOffset - posList->fileOffset;
    slAddHead(&posList, pos);
    if (stringIn("chrom=", line))
	{
	parseSteppedSection(lf, line, pos);
	}
    else
        {
	/* Check for bed... */
	char *words[5];
	int wordCount = chopLine(line, words);
	if (wordCount != 4)
	    errAbort("Unrecognized format line %d of %s:\n", lf->lineIx, lf->fileName);
	pos->chrom = cloneString(words[0]);
	pos->start = lineFileNeedNum(lf, words, 1);
	}
    }
if (posList != NULL)
    {
    posList->fileSize = lineFileTell(lf) - posList->fileOffset;
    slReverse(&posList);
    slSort(&posList, posCmp);
    }
lineFileClose(&lf);

FILE *in = mustOpen(input, "r");
FILE *out = mustOpen(output, "w");
for (pos = posList; pos != NULL; pos = pos->next)
    {
    fseek(in, pos->fileOffset, SEEK_SET);
    copyFileBytes(in, out, pos->fileSize);
    }
carefulClose(&in);
carefulClose(&out);
}
Ejemplo n.º 4
0
struct chromIxRange *chromIxRangeLoadAll(char *fileName, bits64 *retFileSize) 
/* Load all chromIxRange from a whitespace-separated file.
 * Dispose of this with chromIxRangeFreeList(). */
{
struct chromIxRange *list = NULL, *el;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[3];

while (lineFileRow(lf, row))
    {
    AllocVar(el);
    el->chromIx = sqlUnsigned(row[0]);
    el->start = sqlUnsigned(row[1]);
    el->end = sqlUnsigned(row[2]);
    el->fileOffset = lineFileTell(lf);
    slAddHead(&list, el);
    }
*retFileSize = lineFileTell(lf);
lineFileClose(&lf);
slReverse(&list);
return list;
}
Ejemplo n.º 5
0
void axtIndex(char *in, char *out)
/* axtIndex - Create summary file for axt. */
{
struct lineFile *lf = lineFileOpen(in, TRUE);
FILE *f = mustOpen(out, "w");
struct axt *axt;

for (;;)
    {
    off_t pos = lineFileTell(lf);
    axt = axtRead(lf);
    if (axt == NULL)
        break;
    fprintf(f, "%d %d %lld\n", axt->tStart, axt->tEnd - axt->tStart, (unsigned long long) pos); 
    axtFree(&axt);
    }
carefulClose(&f);
}
Ejemplo n.º 6
0
void addFa(char *file, struct hash *fileHash, struct hash *seqHash)
/* Add a fa file to hashes. */
{
struct lineFile *lf = lineFileOpen(file, TRUE);
char *line, *name;
char *rFile = hashStoreName(fileHash, file);

while (lineFileNext(lf, &line, NULL))
    {
    if (line[0] == '>')
        {
	struct seqFilePos *sfp;
	line += 1;
	name = nextWord(&line);
	if (name == NULL)
	   errAbort("bad line %d of %s", lf->lineIx, lf->fileName);
	AllocVar(sfp);
	hashAddSaveName(seqHash, name, sfp, &sfp->name);
	sfp->file = rFile;
	sfp->pos = lineFileTell(lf);
	}
    }
lineFileClose(&lf);
}
Ejemplo n.º 7
0
struct segBlock *segNextWithPos(struct segFile *sf, off_t *retOffset)
/* Return next segment in segment file or NULL if at end. If retOffset
 * is not NULL, return start offset of record in file. */
{
struct lineFile *lf = sf->lf;
struct segBlock *block;
struct segComp *comp, *tail = NULL;
char *line, *name, *row[6], *val, *word;
int wordCount;

/* Loop until we get a segment paragraph or reach end of file. */
for (;;)
	{
	/* Get segment header line. If it's not there assume end of file. */
	if (!nextLine(lf, &line))
		{
		lineFileClose(&sf->lf);
		return NULL;
		}

	/* Parse segment header line. */
	word = nextWord(&line);
	if (word == NULL)
		continue;	/* Ignore blank lines. */

	if (sameString(word, "b"))
		{
		if (retOffset != NULL)
			*retOffset = lineFileTell(sf->lf);
		AllocVar(block);
		/* Parse name=val. */
		while ((word = nextWord(&line)) != NULL)
			{
			name = word;
			val = strchr(word, '=');
			if (val == NULL)
				errAbort("Missing = after %s line %d of %s",
					name, lf->lineIx, lf->fileName);
			*val++ = 0;

			if (sameString(name, "name"))
				block->name = cloneString(val);
			else if (sameString(name, "val"))
				block->val = atoi(val);
			}

		/* Parse segment components until blank line. */
		for (;;)
			{
			if (!nextLine(lf, &line))
				errAbort("Unexpected end of file %s", lf->fileName);
			word = nextWord(&line);
			if (word == NULL)
				break;
			if (sameString(word, "s"))
				{
				/* Chop line up by white space. This involves a few +=1's
				 * because we have already chopped out the first word. */
				row[0] = word;
				wordCount = chopByWhite(line, row+1, ArraySize(row)-1) +1;
				lineFileExpectWords(lf, ArraySize(row), wordCount);
				AllocVar(comp);

				/* Convert ASCII text representation to segComp structure. */
				comp->src     = cloneString(row[1]);
				comp->start   = lineFileNeedNum(lf, row, 2);
				comp->size    = lineFileNeedNum(lf, row, 3);
				comp->strand  = row[4][0];
				comp->srcSize = lineFileNeedNum(lf, row, 5);

				/* Do some sanity checking. */
				if (comp->size <= 0 || comp->srcSize <= 0)
					errAbort("Got a negative or zero size line %d of %s",
						lf->lineIx, lf->fileName);
				if (comp->start < 0 || comp->start + comp->size > comp->srcSize)
					errAbort("Coordinates out of range line %d of %s",
						lf->lineIx, lf->fileName);
				if (comp->strand != '+' && comp->strand != '-')
					errAbort("Invalid strand line %d of %s",
						lf->lineIx, lf->fileName);

				/* Add the new component to the current list. */
				if (block->components == NULL)
					block->components = comp;
				else
					tail->next = comp;
				tail = comp;
				}
			}
		return block;
		}
		else	/* Skip over paragraph we don't understand. */
		{
		for (;;)
			{
			if (!nextLine(lf, &line))
				return NULL;
			if (nextWord(&line) == NULL)
				break;
			}
		}
	}
}
Ejemplo n.º 8
0
struct mafAli *mafNextWithPos(struct mafFile *mf, off_t *retOffset)
/* Return next alignment in FILE or NULL if at end.  If retOffset is
 * nonNULL, return start offset of record in file. */
{
struct lineFile *lf = mf->lf;
struct mafAli *ali;
char *line, *word;

/* Loop until get an alignment paragraph or reach end of file. */
for (;;)
    {
    /* Get alignment header line.  If it's not there assume end of file. */
    if (!nextLine(lf, &line))
	{
	lineFileClose(&mf->lf);
	return NULL;
	}

    /* Parse alignment header line. */
    word = nextWord(&line);
    if (word == NULL)
	continue;	/* Ignore blank lines. */
	
    if (sameString(word, "a"))
	{
	if (retOffset != NULL)
	    *retOffset = lineFileTell(mf->lf);
	AllocVar(ali);
	while ((word = nextWord(&line)) != NULL)
	    {
	    /* Parse name=val. */
	    char *name = word;
	    char *val = strchr(word, '=');
	    if (val == NULL)
	       errAbort("Missing = after %s line 1 of %s", name, lf->fileName);
	    *val++ = 0;

	    if (sameString(name, "score"))
		ali->score = atof(val);
	    }

	/* Parse alignment components until blank line. */
	for (;;)
	    {
	    if (!nextLine(lf, &line))
		errAbort("Unexpected end of file %s", lf->fileName);
	    word = nextWord(&line);
	    if (word == NULL)
		break;
	    if (sameString(word, "s") || sameString(word, "e"))
		{
		struct mafComp *comp;
		int wordCount;
		char *row[7];
		int textSize;

		/* Chop line up by white space.  This involves a few +-1's because
		 * have already chopped out first word. */
		row[0] = word;
		wordCount = chopByWhite(line, row+1, ArraySize(row)-1) + 1; /* +-1 because of "s" */
		lineFileExpectWords(lf, ArraySize(row), wordCount);
		AllocVar(comp);

		/* Convert ascii text representation to mafComp structure. */
		comp->src = cloneString(row[1]);
		comp->srcSize = lineFileNeedNum(lf, row, 5);
		comp->strand = row[4][0];
		comp->start = lineFileNeedNum(lf, row, 2);

		if (sameString(word, "e"))
		    {
		    comp->size = 0;
		    comp->rightLen = comp->leftLen = lineFileNeedNum(lf, row, 3);
		    comp->rightStatus = comp->leftStatus = *row[6];
		    }
		else
		    {
		    comp->size = lineFileNeedNum(lf, row, 3);
		    comp->text = cloneString(row[6]);
		    textSize = strlen(comp->text);

		    /* Fill in ali->text size. */
		    if (ali->textSize == 0)
			ali->textSize = textSize;
		    else if (ali->textSize != textSize)
			errAbort("Text size inconsistent (%d vs %d) line %d of %s",
			    textSize, ali->textSize, lf->lineIx, lf->fileName);
		    }

		/* Do some sanity checking. */
		if (comp->srcSize < 0 || comp->size < 0)
		     errAbort("Got a negative size line %d of %s", lf->lineIx, lf->fileName);
		if (comp->start < 0 || comp->start + comp->size > comp->srcSize)
		     errAbort("Coordinates out of range line %d of %s", lf->lineIx, lf->fileName);
		  
		/* Add component to head of list. */
		slAddHead(&ali->components, comp);
		}
	    if (sameString(word, "i"))
		{
		struct mafComp *comp;
		int wordCount;
		char *row[6];

		/* Chop line up by white space.  This involves a few +-1's because
		 * have already chopped out first word. */
		row[0] = word;
		wordCount = chopByWhite(line, row+1, ArraySize(row)-1) + 1; /* +-1 because of "s" */
		lineFileExpectWords(lf, ArraySize(row), wordCount);
		if (!sameString(row[1],ali->components->src))
		    errAbort("i line src mismatch: i is %s :: s is %s\n", row[1], ali->components->src);

		comp = ali->components;
		comp->leftStatus = *row[2];
		comp->leftLen = atoi(row[3]);
		comp->rightStatus = *row[4];
		comp->rightLen = atoi(row[5]);
		}
		if (sameString(word, "q"))
		{
		struct mafComp *comp;
		int wordCount;
		char *row[3];

		/* Chop line up by white space.  This involves a few +-1's because
		 * have already chopped out first word. */
		row[0] = word;
		wordCount = chopByWhite(line, row+1, ArraySize(row)-1) + 1; /* +-1 because of "s" */
		lineFileExpectWords(lf, ArraySize(row), wordCount);
		if (!sameString(row[1],ali->components->src))
		    errAbort("q line src mismatch: q is %s :: s is %s\n", row[1], ali->components->src);

			comp = ali->components;
			comp->quality = cloneString(row[2]);
		}
	    }
	slReverse(&ali->components);
	return ali;
	}
    else  /* Skip over paragraph we don't understand. */
	{
	for (;;)
	    {
	    if (!nextLine(lf, &line))
		return NULL;
            if (nextWord(&line) == NULL)
		break;
	    }
	}
    }
}