Example #1
0
void checkAllBeds(struct bed **bedList, int expCount)
/** check to make sure that all the beds have the same
    number of experiments associated with them */
{
struct bed *bed = NULL;
for(bed = *bedList; bed != NULL; )
    {
    if(bed->expCount != expCount)
	{
	struct bed *tmp = NULL;
	if(bed->expCount != 0)
	    {
	    warn("Bed %s at %d has only %d exps, mark is %d", bed->name, slIxFromElement(*bedList, bed), bed->expCount, expCount);
	    missingExpsCount++;
	    }
	else
	    noExpCount++;
	tmp = bed->next;
	slRemoveEl(bedList, bed);
	bed = tmp;
	}
    else
	bed = bed->next;
    }
}
Example #2
0
struct bbExIndexMaker *bbExIndexMakerNew(struct slName *extraIndexList, struct asObject *as)
/* Return an index maker corresponding to extraIndexList. Checks that all fields
 * mentioned are in autoSql definition, and for now that they are all text fields. */
{
/* Fill in scalar fields and return quickly if no extra indexes. */
struct bbExIndexMaker *eim;
AllocVar(eim);
eim->indexCount = slCount(extraIndexList);
if (eim->indexCount == 0)
     return eim;	// Not much to do in this case

/* Allocate arrays according field count. */
AllocArray(eim->indexFields, eim->indexCount);
AllocArray(eim->maxFieldSize, eim->indexCount);
AllocArray(eim->chunkArrayArray, eim->indexCount);
AllocArray(eim->fileOffsets, eim->indexCount);

/* Loop through each field checking that it is indeed something we can index
 * and if so saving information about it */
int indexIx = 0;
struct slName *name;
for (name = extraIndexList; name != NULL; name = name->next)
    {
    struct asColumn *col = asColumnFind(as, name->name);
    if (col == NULL)
        errAbort("extraIndex field %s not a standard bed field or found in 'as' file.",
	    name->name);
    if (!sameString(col->lowType->name, "string"))
        errAbort("Sorry for now can only index string fields.");
    eim->indexFields[indexIx] = slIxFromElement(as->columnList, col);
    ++indexIx;
    }
return eim;
}
Example #3
0
struct bptFile *bigBedOpenExtraIndex(struct bbiFile *bbi, char *fieldName, int *retFieldIx)
/* Return index associated with fieldName.  Aborts if no such index.  Optionally return
 * index in a row of this field. */
{
struct udcFile *udc = bbi->udc;
boolean isSwapped = bbi->isSwapped;
struct asObject *as = bigBedAsOrDefault(bbi);
struct asColumn *col = asColumnFind(as, fieldName);
if (col == NULL)
    errAbort("No field %s in %s", fieldName, bbi->fileName);
int colIx = slIxFromElement(as->columnList, col);
if (retFieldIx != NULL)
   *retFieldIx = colIx;
asObjectFree(&as);

/* See if we have any extra indexes, and if so seek to there. */
bits64 offset = bbi->extraIndexListOffset;
if (offset == 0)
   errAbort("%s has no indexes", bbi->fileName);
udcSeek(udc, offset);

/* Go through each extra index and see if it's a match */
int i;
for (i=0; i<bbi->extraIndexCount; ++i)
    {
    bits16 type = udcReadBits16(udc, isSwapped);
    bits16 fieldCount = udcReadBits16(udc, isSwapped);
    bits64 fileOffset = udcReadBits64(udc, isSwapped);
    udcSeekCur(udc, 4);    // skip over reserved bits

    if (type != 0)
        {
	warn("Don't understand type %d", type);
	internalErr();
	}
    if (fieldCount == 1)
        {
	bits16 fieldId = udcReadBits16(udc, isSwapped);
	udcSeekCur(udc, 2);    // skip over reserved bits
	if (fieldId == colIx)
	    {
	    udcSeek(udc, fileOffset);
	    struct bptFile *bpt = bptFileAttach(bbi->fileName, udc);
	    return bpt;
	    }
	}
    else
        {
	warn("Not yet understanding indexes on multiple fields at once.");
	internalErr();
	}
    }

errAbort("%s is not indexed in %s", fieldName, bbi->fileName);
return NULL;
}
Example #4
0
int altGraphXItemHeight(struct track *tg, void *item)
/* Return how high an item is. If we're using altGraphXDrawPackTrack()
 * we have to look up how many rows an item takes in the associated
 * hash, otherwise it is just the heightPer. */
{
if(tg->limitedVis == tvDense || tg->customPt == NULL)
    return tg->lineHeight;
else if(tg->limitedVis == tvFull)
    {
    char key[128];
    safef(key, sizeof(key), "%d", slIxFromElement(tg->items, item));
    return (hashIntVal((struct hash*)tg->customPt, key)) * tg->lineHeight;
    }
else
    return tg->heightPer;
}
Example #5
0
int *makeNewToOldArray(struct asObject *as, struct slPair *fieldList)
/* Return an array where we can lookup old index given new index. */
{
int oldFieldCount = slCount(as->columnList);
int newFieldCount = slCount(fieldList);
int *oldIx;
AllocArray(oldIx, newFieldCount);
int i;
struct slPair *fieldPair;
for (i=0, fieldPair = fieldList; i<newFieldCount; ++i, fieldPair = fieldPair->next)
    {
    char *oldName = fieldPair->val;
    struct asColumn *col = asColumnFind(as, oldName);
    assert(col != NULL);  /* We checked earlier but... */
    int ix = slIxFromElement(as->columnList, col);
    assert(ix >= 0 && ix <= oldFieldCount);
    oldIx[i] = ix;
    }
return oldIx;
}
Example #6
0
void outputUniqueOnSharedKey(char *inTab, struct asObject *as, struct asColumn *keyCol,
    struct slPair *fieldList, char *outTab, char *outErr)
/* Scan through tab-separated file inTab and output fields in fieldList to
 * outTab. Make sure there is only one row for each value of sharedKey field. 
 * If there would be multiple different rows in output with sharedKey, 
 * complain about it in outErr. */
{
/* Open input and output. */
struct lineFile *lf = lineFileOpen(inTab, TRUE);
FILE *f = mustOpen(outTab, "w");
FILE *fErr = mustOpen(outErr, "w");

/* Set up array for input fields with more than we expect for better error reporting. */
int oldFieldCount = slCount(as->columnList);
int newFieldCount = slCount(fieldList);
int allocFields = oldFieldCount+10;
char *words[allocFields];

/* Set up array for output fields that says where to find them in input. */
int *oldIx = makeNewToOldArray(as, fieldList);

/* Figure out index of key field. */
int keyIx = slIxFromElement(as->columnList, keyCol);

/* Go through each line of input, outputting selected columns. */
struct hash *uniqHash = hashNew(18); 
struct hash *errHash = hashNew(0);
struct dyString *dy = dyStringNew(1024);
int fieldCount;
while ((fieldCount = lineFileChopNextTab(lf, words, allocFields)) > 0)
    {
    lineFileExpectWords(lf, oldFieldCount, fieldCount);

    /* Collect possible output into dy. */
    dyStringClear(dy);
    dyStringPrintf(dy, "%s", words[oldIx[0]]);
    int i;
    for (i=1; i<newFieldCount; ++i)
	dyStringPrintf(dy,  "\t%s", words[oldIx[i]]);
    dyStringPrintf(dy, "\n");

    /* Check that this line is either unique for this key, or the same as previous lines
     * for the key. */
    char *key = words[keyIx];
    char *oldVal = hashFindVal(uniqHash, key);
    if (oldVal != NULL)
        {
	if (!sameString(oldVal, dy->string))
	    {
	    /* Error reporting is a little complex.  We want to output all lines associated
	     * with key, including the first one, but we only want to do first line once. */
	    if (!hashLookup(errHash, key))
	        {
		hashAdd(errHash, key, NULL);
		fputs(oldVal, fErr);
		}
	    fputs(dy->string, fErr);
	    }
	}
    else
	{
	hashAdd(uniqHash, key, cloneString(dy->string));
        fputs(dy->string, f);
	}
    }

/* Report error summary */
if (errHash->elCount > 0)
    {
    warn("Warning: %d shared keys have multiple values in table 2. See %s.\n"
         "Only first row for each key put in %s" , errHash->elCount, outErr, outTab);
    if (!mergeOk)
        noWarnAbort();
    }

/* Clean up and go home. */
freez(&oldIx);
carefulClose(&fErr);
carefulClose(&f);
lineFileClose(&lf);
}
Example #7
0
struct bed *pathToBed(struct path *path, struct splice *splice,
		      int source, int sink, boolean spoofEnds)
/* Construct a bed for the path. If spoofEnds is TRUE,
   ensure that there is at least a 1bp exon at splice
   sites. */
{
struct bed *bed = NULL;
int vertIx = 0;
int *verts = path->vertices;
int *vPos = splice->vPositions;
unsigned char *vTypes = splice->vTypes;
int i = 0;
struct dyString *buff = newDyString(256);

AllocVar(bed);
bed->chrom = cloneString(splice->tName);
bed->chromStart = BIGNUM;
bed->chromEnd = 0;
safef(bed->strand, sizeof(bed->strand), "%s", splice->strand);
bed->score = splice->type;
AllocArray(bed->chromStarts, path->vCount);
AllocArray(bed->blockSizes, path->vCount);

/* If necessary tack on a fake exon. */
if(spoofEnds && verts[vertIx] != source && verts[vertIx+1] <= splice->vCount &&
   pathEdgeType(vTypes, verts[vertIx], verts[vertIx+1]) != ggExon)
    {
    bed->blockSizes[bed->blockCount] = 1;
    bed->chromStarts[bed->blockCount] = vPos[verts[vertIx]] - 1;
    bed->chromStart = bed->thickStart = min(bed->chromStart, vPos[verts[vertIx]] - 1 );
    bed->chromEnd = bed->thickEnd = max(bed->chromEnd, vPos[verts[vertIx+1]]);
    bed->blockCount++;
    }

/* For each edge that is an exon count up the base pairs. */
for(vertIx = 0; vertIx < path->vCount - 1; vertIx++)
    {
    if(verts[vertIx] != source && verts[vertIx] <= splice->vCount)
	{
	/* If exon add up the base pairs. */
	if(pathEdgeType(vTypes, verts[vertIx], verts[vertIx+1]) == ggExon)
	    {
	    bed->blockSizes[bed->blockCount] = vPos[verts[vertIx+1]] - vPos[verts[vertIx]];
	    bed->chromStarts[bed->blockCount] = vPos[verts[vertIx]];
	    bed->chromStart = bed->thickStart = min(bed->chromStart, vPos[verts[vertIx]]);
	    bed->chromEnd = bed->thickEnd = max(bed->chromEnd, vPos[verts[vertIx+1]]);
	    bed->blockCount++;
	    }
	}
    }

/* if spoofing ends tack on a 1bp exon as necessary. */
vertIx = path->vCount - 2;
if(spoofEnds && verts[vertIx] != source && verts[vertIx+1] <= splice->vCount &&
   pathEdgeType(vTypes, verts[vertIx], verts[vertIx+1]) != ggExon)
    {
    bed->blockSizes[bed->blockCount] = 1;
    bed->chromStarts[bed->blockCount] = vPos[verts[vertIx+1]];
    bed->chromStart = bed->thickStart = min(bed->chromStart, vPos[verts[vertIx+1]]);
    bed->chromEnd = bed->thickEnd = max(bed->chromEnd, vPos[verts[vertIx+1]]+1);
    bed->blockCount++;
    }

/* Fix up the name and adjust the chromStarts. */
dyStringPrintf(buff, "%s.%d.", splice->name, slIxFromElement(splice->paths, path));
for(i = 0; i < path->vCount; i++)
    {
    if(path->vertices[i] != sink && path->vertices[i] <= splice->vCount)
	dyStringPrintf(buff, "%d,", path->vertices[i]);
    }
if(splice->type == alt5Prime || splice->type == alt3Prime ||
   splice->type == altRetInt || splice->type == altCassette)
    {
    int pathIx = slIxFromElement(splice->paths, path);
    if(pathIx == 0)
	dyStringPrintf(buff, "-Ex");
    else if(pathIx == 1)
	dyStringPrintf(buff, "-Inc");
    }
bed->name = cloneString(buff->string);
for(i = 0; i < bed->blockCount; i++)
    bed->chromStarts[i] -= bed->chromStart;

/* If we don't have any blocks, quit now. */
if(bed->blockCount == 0)
    bedFree(&bed);
dyStringFree(&buff);
return bed;
}