struct bed *bigBedGetFilteredBedsOnRegions(struct sqlConnection *conn, char *db, char *table, struct region *regionList, struct lm *lm, int *retFieldCount) /* Get list of beds from bigBed, in all regions, that pass filtering. */ { /* Connect to big bed and get metadata and filter. */ char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct asObject *as = bigBedAsOrDefault(bbi); struct asFilter *filter = asFilterFromCart(cart, db, table, as); /* Get beds a region at a time. */ struct bed *bedList = NULL; struct region *region; for (region = regionList; region != NULL; region = region->next) addFilteredBedsOnRegion(bbi, region, table, filter, lm, &bedList); slReverse(&bedList); /* Clean up and return. */ if (retFieldCount != NULL) *retFieldCount = bbi->definedFieldCount; bbiFileClose(&bbi); freeMem(fileName); return bedList; }
struct bptFile *bigBedOpenExtraIndex(struct bbiFile *bbi, char *fieldName, int *retFieldIx) /* Return index associated with fieldName. Aborts if no such index. Optionally return * index in a row of this field. */ { struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; struct asObject *as = bigBedAsOrDefault(bbi); struct asColumn *col = asColumnFind(as, fieldName); if (col == NULL) errAbort("No field %s in %s", fieldName, bbi->fileName); int colIx = slIxFromElement(as->columnList, col); if (retFieldIx != NULL) *retFieldIx = colIx; asObjectFree(&as); /* See if we have any extra indexes, and if so seek to there. */ bits64 offset = bbi->extraIndexListOffset; if (offset == 0) errAbort("%s has no indexes", bbi->fileName); udcSeek(udc, offset); /* Go through each extra index and see if it's a match */ int i; for (i=0; i<bbi->extraIndexCount; ++i) { bits16 type = udcReadBits16(udc, isSwapped); bits16 fieldCount = udcReadBits16(udc, isSwapped); bits64 fileOffset = udcReadBits64(udc, isSwapped); udcSeekCur(udc, 4); // skip over reserved bits if (type != 0) { warn("Don't understand type %d", type); internalErr(); } if (fieldCount == 1) { bits16 fieldId = udcReadBits16(udc, isSwapped); udcSeekCur(udc, 2); // skip over reserved bits if (fieldId == colIx) { udcSeek(udc, fileOffset); struct bptFile *bpt = bptFileAttach(bbi->fileName, udc); return bpt; } } else { warn("Not yet understanding indexes on multiple fields at once."); internalErr(); } } errAbort("%s is not indexed in %s", fieldName, bbi->fileName); return NULL; }
struct slName *bigBedListExtraIndexes(struct bbiFile *bbi) /* Return list of names of extra indexes beyond primary chrom:start-end one" */ { struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; /* See if we have any extra indexes, and if so seek to there. */ bits64 offset = bbi->extraIndexListOffset; if (offset == 0) return NULL; udcSeek(udc, offset); /* Construct list of field that are being indexed. List is list of * field numbers within asObj. */ int i; struct slInt *intList = NULL, *intEl; for (i=0; i<bbi->extraIndexCount; ++i) { bits16 type,fieldCount; type = udcReadBits16(udc, isSwapped); fieldCount = udcReadBits16(udc, isSwapped); udcSeekCur(udc, sizeof(bits64)); // skip over fileOffset udcSeekCur(udc, 4); // skip over reserved bits if (fieldCount == 1) { bits16 fieldId = udcReadBits16(udc, isSwapped); udcSeekCur(udc, 2); // skip over reserved bits intEl = slIntNew(fieldId); slAddHead(&intList, intEl); } else { warn("Not yet understanding indexes on multiple fields at once."); internalErr(); } } /* Now have to make an asObject to find out name that corresponds to this field. */ struct asObject *as = bigBedAsOrDefault(bbi); /* Make list of field names out of list of field numbers */ struct slName *nameList = NULL; for (intEl = intList; intEl != NULL; intEl = intEl->next) { struct asColumn *col = slElementFromIx(as->columnList, intEl->val); if (col == NULL) { warn("Inconsistent bigBed file %s", bbi->fileName); internalErr(); } slNameAddHead(&nameList, col->name); } asObjectFree(&as); return nameList; }
struct slName *bigBedGetFields(char *table, struct sqlConnection *conn) /* Get fields of bigBed as simple name list. */ { char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct asObject *as = bigBedAsOrDefault(bbi); struct slName *names = asColNames(as); freeMem(fileName); bbiFileClose(&bbi); return names; }
struct sqlFieldType *bigBedListFieldsAndTypes(char *table, struct sqlConnection *conn) /* Get fields of bigBed as list of sqlFieldType. */ { char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct asObject *as = bigBedAsOrDefault(bbi); struct sqlFieldType *list = sqlFieldTypesFromAs(as); freeMem(fileName); bbiFileClose(&bbi); return list; }
struct asObject *bigBedFileAsObjOrDefault(char *fileName) // Get asObject associated with bigBed file, or the default. { struct bbiFile *bbi = bigBedFileOpen(fileName); if (bbi) { struct asObject *as = bigBedAsOrDefault(bbi); bbiFileClose(&bbi); return as; } return NULL; }
struct hTableInfo *bigBedToHti(char *table, struct sqlConnection *conn) /* Get fields of bigBed into hti structure. */ { /* Get columns in asObject format. */ char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct asObject *as = bigBedAsOrDefault(bbi); /* Allocate hTableInfo structure and fill in info about bed fields. */ struct hash *colHash = asColumnHash(as); struct hTableInfo *hti; AllocVar(hti); hti->rootName = cloneString(table); hti->isPos= TRUE; fillField(colHash, "chrom", hti->chromField); fillField(colHash, "chromStart", hti->startField); fillField(colHash, "chromEnd", hti->endField); fillField(colHash, "name", hti->nameField); fillField(colHash, "score", hti->scoreField); fillField(colHash, "strand", hti->strandField); fillField(colHash, "thickStart", hti->cdsStartField); fillField(colHash, "thickEnd", hti->cdsEndField); fillField(colHash, "blockCount", hti->countField); fillField(colHash, "chromStarts", hti->startsField); fillField(colHash, "blockSizes", hti->endsSizesField); hti->hasCDS = (bbi->definedFieldCount >= 8); hti->hasBlocks = (bbi->definedFieldCount >= 12); char type[256]; safef(type, sizeof(type), "bed %d %c", bbi->definedFieldCount, (bbi->definedFieldCount == bbi->fieldCount ? '.' : '+')); hti->type = cloneString(type); freeMem(fileName); hashFree(&colHash); bbiFileClose(&bbi); return hti; }
void showSchemaBigBed(char *table, struct trackDb *tdb) /* Show schema on bigBed. */ { /* Figure out bigBed file name and open it. Get contents for first chromosome as an example. */ struct sqlConnection *conn = NULL; if (!trackHubDatabase(database)) conn = hAllocConn(database); char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct bbiChromInfo *chromList = bbiChromList(bbi); struct lm *lm = lmInit(0); struct bigBedInterval *ivList = getNElements(bbi, chromList, lm, 10); /* Get description of columns, making it up from BED records if need be. */ struct asObject *as = bigBedAsOrDefault(bbi); hPrintf("<B>Database:</B> %s", database); hPrintf(" <B>Primary Table:</B> %s<br>", table); hPrintf("<B>Big Bed File:</B> %s", fileName); if (bbi->version >= 2) { hPrintf("<BR><B>Item Count:</B> "); printLongWithCommas(stdout, bigBedItemCount(bbi)); } hPrintf("<BR>\n"); hPrintf("<B>Format description:</B> %s<BR>", as->comment); /* Put up table that describes fields. */ hTableStart(); hPrintf("<TR><TH>field</TH>"); if (ivList != NULL) hPrintf("<TH>example</TH>"); hPrintf("<TH>description</TH> "); puts("</TR>\n"); struct asColumn *col; int colCount = 0; char *row[bbi->fieldCount]; char startBuf[16], endBuf[16]; if (ivList != NULL) { char *dupeRest = lmCloneString(lm, ivList->rest); /* Manage rest-stomping side-effect */ bigBedIntervalToRow(ivList, chromList->name, startBuf, endBuf, row, bbi->fieldCount); ivList->rest = dupeRest; } for (col = as->columnList; col != NULL; col = col->next) { hPrintf("<TR><TD><TT>%s</TT></TD>", col->name); if (ivList != NULL) hPrintf("<TD>%s</TD>", row[colCount]); hPrintf("<TD>%s</TD></TR>", col->comment); ++colCount; } /* If more fields than descriptions put up minimally helpful info (at least has example). */ for ( ; colCount < bbi->fieldCount; ++colCount) { hPrintf("<TR><TD><TT>column%d</TT></TD>", colCount+1); if (ivList != NULL) hPrintf("<TD>%s</TD>", row[colCount]); hPrintf("<TD>n/a</TD></TR>\n"); } hTableEnd(); if (ivList != NULL) { /* Put up another section with sample rows. */ webNewSection("Sample Rows"); hTableStart(); /* Print field names as column headers for example */ hPrintf("<TR>"); int colIx = 0; for (col = as->columnList; col != NULL; col = col->next) { hPrintf("<TH>%s</TH>", col->name); ++colIx; } for (; colIx < colCount; ++colIx) hPrintf("<TH>column%d</TH>", colIx+1); hPrintf("</TR>\n"); /* Print sample lines. */ struct bigBedInterval *iv; for (iv=ivList; iv != NULL; iv = iv->next) { bigBedIntervalToRow(iv, chromList->name, startBuf, endBuf, row, bbi->fieldCount); hPrintf("<TR>"); for (colIx=0; colIx<colCount; ++colIx) { writeHtmlCell(row[colIx]); } hPrintf("</TR>\n"); } hTableEnd(); } printTrackHtml(tdb); /* Clean up and go home. */ lmCleanup(&lm); bbiFileClose(&bbi); freeMem(fileName); hFreeConn(&conn); }
void bigBedTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f) /* Print out selected fields from Big Bed. If fields is NULL, then print out all fields. */ { if (f == NULL) f = stdout; /* Convert comma separated list of fields to array. */ int fieldCount = chopByChar(fields, ',', NULL, 0); char **fieldArray; AllocArray(fieldArray, fieldCount); chopByChar(fields, ',', fieldArray, fieldCount); /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by * column name. */ struct hash *fieldHash = hashNew(0); struct slName *bb, *bbList = bigBedGetFields(table, conn); int i; for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i) hashAddInt(fieldHash, bb->name, i); // If bigBed has name column, look up pasted/uploaded identifiers if any: struct hash *idHash = NULL; if (slCount(bbList) >= 4) idHash = identifierHash(db, table); /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; i<fieldCount; ++i) { columnArray[i] = hashIntVal(fieldHash, fieldArray[i]); } /* Output row of labels */ fprintf(f, "#%s", fieldArray[0]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", fieldArray[i]); fprintf(f, "\n"); /* Open up bigBed file. */ char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct asObject *as = bigBedAsOrDefault(bbi); struct asFilter *filter = NULL; if (anyFilter()) { filter = asFilterFromCart(cart, db, table, as); if (filter) { fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); } } /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); for (region = regionList; region != NULL; region = region->next) { struct lm *lm = lmInit(0); struct bigBedInterval *iv, *ivList = bigBedIntervalQuery(bbi, region->chrom, region->start, region->end, 0, lm); char *row[bbi->fieldCount]; char startBuf[16], endBuf[16]; for (iv = ivList; iv != NULL; iv = iv->next) { bigBedIntervalToRow(iv, region->chrom, startBuf, endBuf, row, bbi->fieldCount); if (asFilterOnRow(filter, row)) { if ((idHash != NULL) && (hashLookup(idHash, row[3]) == NULL)) continue; int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", row[columnArray[i]]); fprintf(f, "\n"); } } lmCleanup(&lm); } /* Clean up and exit. */ bbiFileClose(&bbi); hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); }