Example #1
0
struct bed *bigBedGetFilteredBedsOnRegions(struct sqlConnection *conn,
	char *db, char *table, struct region *regionList, struct lm *lm,
	int *retFieldCount)
/* Get list of beds from bigBed, in all regions, that pass filtering. */
{
/* Connect to big bed and get metadata and filter. */
char *fileName = bigBedFileName(table, conn);
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct asObject *as = bigBedAsOrDefault(bbi);
struct asFilter *filter = asFilterFromCart(cart, db, table, as);

/* Get beds a region at a time. */
struct bed *bedList = NULL;
struct region *region;
for (region = regionList; region != NULL; region = region->next)
    addFilteredBedsOnRegion(bbi, region, table, filter, lm, &bedList);
slReverse(&bedList);

/* Clean up and return. */
if (retFieldCount != NULL)
	*retFieldCount = bbi->definedFieldCount;
bbiFileClose(&bbi);
freeMem(fileName);
return bedList;
}
Example #2
0
struct bptFile *bigBedOpenExtraIndex(struct bbiFile *bbi, char *fieldName, int *retFieldIx)
/* Return index associated with fieldName.  Aborts if no such index.  Optionally return
 * index in a row of this field. */
{
struct udcFile *udc = bbi->udc;
boolean isSwapped = bbi->isSwapped;
struct asObject *as = bigBedAsOrDefault(bbi);
struct asColumn *col = asColumnFind(as, fieldName);
if (col == NULL)
    errAbort("No field %s in %s", fieldName, bbi->fileName);
int colIx = slIxFromElement(as->columnList, col);
if (retFieldIx != NULL)
   *retFieldIx = colIx;
asObjectFree(&as);

/* See if we have any extra indexes, and if so seek to there. */
bits64 offset = bbi->extraIndexListOffset;
if (offset == 0)
   errAbort("%s has no indexes", bbi->fileName);
udcSeek(udc, offset);

/* Go through each extra index and see if it's a match */
int i;
for (i=0; i<bbi->extraIndexCount; ++i)
    {
    bits16 type = udcReadBits16(udc, isSwapped);
    bits16 fieldCount = udcReadBits16(udc, isSwapped);
    bits64 fileOffset = udcReadBits64(udc, isSwapped);
    udcSeekCur(udc, 4);    // skip over reserved bits

    if (type != 0)
        {
	warn("Don't understand type %d", type);
	internalErr();
	}
    if (fieldCount == 1)
        {
	bits16 fieldId = udcReadBits16(udc, isSwapped);
	udcSeekCur(udc, 2);    // skip over reserved bits
	if (fieldId == colIx)
	    {
	    udcSeek(udc, fileOffset);
	    struct bptFile *bpt = bptFileAttach(bbi->fileName, udc);
	    return bpt;
	    }
	}
    else
        {
	warn("Not yet understanding indexes on multiple fields at once.");
	internalErr();
	}
    }

errAbort("%s is not indexed in %s", fieldName, bbi->fileName);
return NULL;
}
Example #3
0
struct slName *bigBedListExtraIndexes(struct bbiFile *bbi)
/* Return list of names of extra indexes beyond primary chrom:start-end one" */
{
struct udcFile *udc = bbi->udc;
boolean isSwapped = bbi->isSwapped;

/* See if we have any extra indexes, and if so seek to there. */
bits64 offset = bbi->extraIndexListOffset;
if (offset == 0)
   return NULL;
udcSeek(udc, offset);

/* Construct list of field that are being indexed.  List is list of 
 * field numbers within asObj. */
int i;
struct slInt *intList = NULL, *intEl;
for (i=0; i<bbi->extraIndexCount; ++i)
    {
    bits16 type,fieldCount;
    type = udcReadBits16(udc, isSwapped);
    fieldCount = udcReadBits16(udc, isSwapped);
    udcSeekCur(udc, sizeof(bits64));  // skip over fileOffset
    udcSeekCur(udc, 4);    // skip over reserved bits
    if (fieldCount == 1)
        {
	bits16 fieldId = udcReadBits16(udc, isSwapped);
	udcSeekCur(udc, 2);    // skip over reserved bits
	intEl = slIntNew(fieldId);
	slAddHead(&intList, intEl);
	}
    else
        {
	warn("Not yet understanding indexes on multiple fields at once.");
	internalErr();
	}
    }

/* Now have to make an asObject to find out name that corresponds to this field. */
struct asObject *as = bigBedAsOrDefault(bbi);

/* Make list of field names out of list of field numbers */
struct slName *nameList = NULL;
for (intEl = intList; intEl != NULL; intEl = intEl->next)
    {
    struct asColumn *col = slElementFromIx(as->columnList, intEl->val);
    if (col == NULL)
	{
        warn("Inconsistent bigBed file %s", bbi->fileName);
	internalErr();
	}
    slNameAddHead(&nameList, col->name);
    }

asObjectFree(&as);
return nameList;
}
Example #4
0
struct slName *bigBedGetFields(char *table, struct sqlConnection *conn)
/* Get fields of bigBed as simple name list. */
{
char *fileName = bigBedFileName(table, conn);
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct asObject *as = bigBedAsOrDefault(bbi);
struct slName *names = asColNames(as);
freeMem(fileName);
bbiFileClose(&bbi);
return names;
}
Example #5
0
struct sqlFieldType *bigBedListFieldsAndTypes(char *table, struct sqlConnection *conn)
/* Get fields of bigBed as list of sqlFieldType. */
{
char *fileName = bigBedFileName(table, conn);
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct asObject *as = bigBedAsOrDefault(bbi);
struct sqlFieldType *list = sqlFieldTypesFromAs(as);
freeMem(fileName);
bbiFileClose(&bbi);
return list;
}
Example #6
0
struct asObject *bigBedFileAsObjOrDefault(char *fileName)
// Get asObject associated with bigBed file, or the default.
{
struct bbiFile *bbi = bigBedFileOpen(fileName);
if (bbi)
    {
    struct asObject *as = bigBedAsOrDefault(bbi);
    bbiFileClose(&bbi);
    return as;
    }
return NULL;
}
Example #7
0
struct hTableInfo *bigBedToHti(char *table, struct sqlConnection *conn)
/* Get fields of bigBed into hti structure. */
{
/* Get columns in asObject format. */
char *fileName = bigBedFileName(table, conn);
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct asObject *as = bigBedAsOrDefault(bbi);

/* Allocate hTableInfo structure and fill in info about bed fields. */
struct hash *colHash = asColumnHash(as);
struct hTableInfo *hti;
AllocVar(hti);
hti->rootName = cloneString(table);
hti->isPos= TRUE;
fillField(colHash, "chrom", hti->chromField);
fillField(colHash, "chromStart", hti->startField);
fillField(colHash, "chromEnd", hti->endField);
fillField(colHash, "name", hti->nameField);
fillField(colHash, "score", hti->scoreField);
fillField(colHash, "strand", hti->strandField);
fillField(colHash, "thickStart", hti->cdsStartField);
fillField(colHash, "thickEnd", hti->cdsEndField);
fillField(colHash, "blockCount", hti->countField);
fillField(colHash, "chromStarts", hti->startsField);
fillField(colHash, "blockSizes", hti->endsSizesField);
hti->hasCDS = (bbi->definedFieldCount >= 8);
hti->hasBlocks = (bbi->definedFieldCount >= 12);
char type[256];
safef(type, sizeof(type), "bed %d %c", bbi->definedFieldCount,
	(bbi->definedFieldCount == bbi->fieldCount ? '.' : '+'));
hti->type = cloneString(type);

freeMem(fileName);
hashFree(&colHash);
bbiFileClose(&bbi);
return hti;
}
Example #8
0
void showSchemaBigBed(char *table, struct trackDb *tdb)
/* Show schema on bigBed. */
{
/* Figure out bigBed file name and open it.  Get contents for first chromosome as an example. */
struct sqlConnection *conn = NULL;
if (!trackHubDatabase(database))
    conn = hAllocConn(database);
char *fileName = bigBedFileName(table, conn);
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct bbiChromInfo *chromList = bbiChromList(bbi);
struct lm *lm = lmInit(0);
struct bigBedInterval *ivList = getNElements(bbi, chromList, lm, 10);

/* Get description of columns, making it up from BED records if need be. */
struct asObject *as = bigBedAsOrDefault(bbi);

hPrintf("<B>Database:</B> %s", database);
hPrintf("&nbsp;&nbsp;&nbsp;&nbsp;<B>Primary Table:</B> %s<br>", table);
hPrintf("<B>Big Bed File:</B> %s", fileName);
if (bbi->version >= 2)
    {
    hPrintf("<BR><B>Item Count:</B> ");
    printLongWithCommas(stdout, bigBedItemCount(bbi));
    }
hPrintf("<BR>\n");
hPrintf("<B>Format description:</B> %s<BR>", as->comment);

/* Put up table that describes fields. */
hTableStart();
hPrintf("<TR><TH>field</TH>");
if (ivList != NULL)
    hPrintf("<TH>example</TH>");
hPrintf("<TH>description</TH> ");
puts("</TR>\n");
struct asColumn *col;
int colCount = 0;
char *row[bbi->fieldCount];
char startBuf[16], endBuf[16];
if (ivList != NULL)
    {
    char *dupeRest = lmCloneString(lm, ivList->rest);	/* Manage rest-stomping side-effect */
    bigBedIntervalToRow(ivList, chromList->name, startBuf, endBuf, row, bbi->fieldCount);
    ivList->rest = dupeRest;
    }
for (col = as->columnList; col != NULL; col = col->next)
    {
    hPrintf("<TR><TD><TT>%s</TT></TD>", col->name);
    if (ivList != NULL)
	hPrintf("<TD>%s</TD>", row[colCount]);
    hPrintf("<TD>%s</TD></TR>", col->comment);
    ++colCount;
    }

/* If more fields than descriptions put up minimally helpful info (at least has example). */
for ( ; colCount < bbi->fieldCount; ++colCount)
    {
    hPrintf("<TR><TD><TT>column%d</TT></TD>", colCount+1);
    if (ivList != NULL)
	hPrintf("<TD>%s</TD>", row[colCount]);
    hPrintf("<TD>n/a</TD></TR>\n");
    }
hTableEnd();


if (ivList != NULL)
    {
    /* Put up another section with sample rows. */
    webNewSection("Sample Rows");
    hTableStart();

    /* Print field names as column headers for example */
    hPrintf("<TR>");
    int colIx = 0;
    for (col = as->columnList; col != NULL; col = col->next)
	{
	hPrintf("<TH>%s</TH>", col->name);
	++colIx;
	}
    for (; colIx < colCount; ++colIx)
	hPrintf("<TH>column%d</TH>", colIx+1);
    hPrintf("</TR>\n");

    /* Print sample lines. */
    struct bigBedInterval *iv;
    for (iv=ivList; iv != NULL; iv = iv->next)
	{
	bigBedIntervalToRow(iv, chromList->name, startBuf, endBuf, row, bbi->fieldCount);
	hPrintf("<TR>");
	for (colIx=0; colIx<colCount; ++colIx)
	    {
	    writeHtmlCell(row[colIx]);
	    }
	hPrintf("</TR>\n");
	}
    hTableEnd();
    }
printTrackHtml(tdb);
/* Clean up and go home. */
lmCleanup(&lm);
bbiFileClose(&bbi);
freeMem(fileName);
hFreeConn(&conn);
}
Example #9
0
void bigBedTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f)
/* Print out selected fields from Big Bed.  If fields is NULL, then print out all fields. */
{
if (f == NULL)
    f = stdout;

/* Convert comma separated list of fields to array. */
int fieldCount = chopByChar(fields, ',', NULL, 0);
char **fieldArray;
AllocArray(fieldArray, fieldCount);
chopByChar(fields, ',', fieldArray, fieldCount);

/* Get list of all fields in big bed and turn it into a hash of column indexes keyed by
 * column name. */
struct hash *fieldHash = hashNew(0);
struct slName *bb, *bbList = bigBedGetFields(table, conn);
int i;
for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i)
    hashAddInt(fieldHash, bb->name, i);

// If bigBed has name column, look up pasted/uploaded identifiers if any:
struct hash *idHash = NULL;
if (slCount(bbList) >= 4)
    idHash = identifierHash(db, table);

/* Create an array of column indexes corresponding to the selected field list. */
int *columnArray;
AllocArray(columnArray, fieldCount);
for (i=0; i<fieldCount; ++i)
    {
    columnArray[i] = hashIntVal(fieldHash, fieldArray[i]);
    }

/* Output row of labels */
fprintf(f, "#%s", fieldArray[0]);
for (i=1; i<fieldCount; ++i)
    fprintf(f, "\t%s", fieldArray[i]);
fprintf(f, "\n");

/* Open up bigBed file. */
char *fileName = bigBedFileName(table, conn);
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct asObject *as = bigBedAsOrDefault(bbi);
struct asFilter *filter = NULL;

if (anyFilter())
    {
    filter = asFilterFromCart(cart, db, table, as);
    if (filter)
        {
	fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList));
	}
    }

/* Loop through outputting each region */
struct region *region, *regionList = getRegions();
for (region = regionList; region != NULL; region = region->next)
    {
    struct lm *lm = lmInit(0);
    struct bigBedInterval *iv, *ivList = bigBedIntervalQuery(bbi, region->chrom,
    	region->start, region->end, 0, lm);
    char *row[bbi->fieldCount];
    char startBuf[16], endBuf[16];
    for (iv = ivList; iv != NULL; iv = iv->next)
        {
	bigBedIntervalToRow(iv, region->chrom, startBuf, endBuf, row, bbi->fieldCount);
	if (asFilterOnRow(filter, row))
	    {
	    if ((idHash != NULL) && (hashLookup(idHash, row[3]) == NULL))
		continue;
	    int i;
	    fprintf(f, "%s", row[columnArray[0]]);
	    for (i=1; i<fieldCount; ++i)
		fprintf(f, "\t%s", row[columnArray[i]]);
	    fprintf(f, "\n");
	    }
	}
    lmCleanup(&lm);
    }

/* Clean up and exit. */
bbiFileClose(&bbi);
hashFree(&fieldHash);
freeMem(fieldArray);
freeMem(columnArray);
}