long metaBigNumItems(struct metaBig* mb, boolean verbose) /* return the total number of items in a bigBed or BAM */ /* used on a bigWig will return 0 */ /* unfortunately this is a loop through the entire file basically. */ /* nicer would be something that just glances at the index, but doing that */ /* might count items that would be filtered out upon fetching. */ { long sum = 0; struct bed* section; struct bed* chroms = NULL; if (mb->type == isaBigWig) return 0; else if (mb->type == isaBigBed) return (long)bigBedItemCount(mb->big.bbi); else chroms = sectionsFromChromSizes(mb->chromSizeHash); for (section = chroms; section != NULL; section = section->next) { struct lm* lm = lmInit(0); struct bed6* list = metaBigBed6Fetch(mb, section->chrom, section->chromStart, section->chromEnd, lm); int num = slCount(list); if (verbose) printf("Number of items in %s of %s: %d\n", section->chrom, mb->fileName, num); sum += num; lmCleanup(&lm); } bedFreeList(&chroms); return sum; }
struct metaBig* metaBigOpenWithTmpDir(char* fileOrUrlwSections, char* cacheDir, char* sectionsBed) /* load a file or URL with or without sectioning */ /* if it's a bam, load the index. */ { struct metaBig* mb; char* fullFileName = NULL; char* remoteDir = NULL; char* baseFileName = NULL; char* sections = NULL; AllocVar(mb); mb->originalFileName = cloneString(fileOrUrlwSections); /* first deal with filename and separate URL/file/sections */ mb->isRemote = parseMetaBigFileName(fileOrUrlwSections, &remoteDir, &fullFileName, &baseFileName, §ions); mb->fileName = fullFileName; mb->baseFileName = baseFileName; mb->remoteSiteAndDir = remoteDir; /* sniff the file */ mb->type = sniffBigFile(mb->fileName); /* depending on the type, open the files and get the chrom-size hash different ways */ if (mb->type == isaBigBed) { mb->big.bbi = bigBedFileOpen(mb->fileName); mb->chromSizeHash = bbiChromSizes(mb->big.bbi); mb->numReads = bigBedItemCount(mb->big.bbi); } #ifdef USE_HTSLIB else if (mb->type == isaBam) { mb->chromSizeHash = bamChromSizes(mb->fileName); mb->header = bamGetHeaderOnly(mb->fileName); mb->big.bam = sam_open(mb->fileName, "r"); /* Also need to load the index since it's a bam */ mb->idx = bam_index_load(mb->fileName); metaBigBamFlagCountsInit(mb); } #endif else if (mb->type == isaBigWig) { mb->big.bbi = bigWigFileOpenWithDir(mb->fileName, cacheDir); mb->chromSizeHash = bbiChromSizes(mb->big.bbi); } else { /* maybe I should free some stuff up here */ if (fullFileName) freeMem(fullFileName); if (remoteDir) freeMem(remoteDir); if (baseFileName) freeMem(baseFileName); if (sections) freeMem(sections); freez(&mb); return NULL; } if (sectionsBed && sections) { struct bed* regions = (fileExists(sectionsBed)) ? regionsLoad(sectionsBed) : parseSectionString(sectionsBed, mb->chromSizeHash); struct bed* subsets = subset_beds(sections, ®ions, mb->chromSizeHash); mb->sections = subsets; } else if (sectionsBed) { mb->sections = (fileExists(sectionsBed)) ? regionsLoad(sectionsBed) : parseSectionString(sectionsBed, mb->chromSizeHash); } else mb->sections = parseSectionString(sections, mb->chromSizeHash); return mb; }
void showSchemaBigBed(char *table, struct trackDb *tdb) /* Show schema on bigBed. */ { /* Figure out bigBed file name and open it. Get contents for first chromosome as an example. */ struct sqlConnection *conn = NULL; if (!trackHubDatabase(database)) conn = hAllocConn(database); char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct bbiChromInfo *chromList = bbiChromList(bbi); struct lm *lm = lmInit(0); struct bigBedInterval *ivList = getNElements(bbi, chromList, lm, 10); /* Get description of columns, making it up from BED records if need be. */ struct asObject *as = bigBedAsOrDefault(bbi); hPrintf("<B>Database:</B> %s", database); hPrintf(" <B>Primary Table:</B> %s<br>", table); hPrintf("<B>Big Bed File:</B> %s", fileName); if (bbi->version >= 2) { hPrintf("<BR><B>Item Count:</B> "); printLongWithCommas(stdout, bigBedItemCount(bbi)); } hPrintf("<BR>\n"); hPrintf("<B>Format description:</B> %s<BR>", as->comment); /* Put up table that describes fields. */ hTableStart(); hPrintf("<TR><TH>field</TH>"); if (ivList != NULL) hPrintf("<TH>example</TH>"); hPrintf("<TH>description</TH> "); puts("</TR>\n"); struct asColumn *col; int colCount = 0; char *row[bbi->fieldCount]; char startBuf[16], endBuf[16]; if (ivList != NULL) { char *dupeRest = lmCloneString(lm, ivList->rest); /* Manage rest-stomping side-effect */ bigBedIntervalToRow(ivList, chromList->name, startBuf, endBuf, row, bbi->fieldCount); ivList->rest = dupeRest; } for (col = as->columnList; col != NULL; col = col->next) { hPrintf("<TR><TD><TT>%s</TT></TD>", col->name); if (ivList != NULL) hPrintf("<TD>%s</TD>", row[colCount]); hPrintf("<TD>%s</TD></TR>", col->comment); ++colCount; } /* If more fields than descriptions put up minimally helpful info (at least has example). */ for ( ; colCount < bbi->fieldCount; ++colCount) { hPrintf("<TR><TD><TT>column%d</TT></TD>", colCount+1); if (ivList != NULL) hPrintf("<TD>%s</TD>", row[colCount]); hPrintf("<TD>n/a</TD></TR>\n"); } hTableEnd(); if (ivList != NULL) { /* Put up another section with sample rows. */ webNewSection("Sample Rows"); hTableStart(); /* Print field names as column headers for example */ hPrintf("<TR>"); int colIx = 0; for (col = as->columnList; col != NULL; col = col->next) { hPrintf("<TH>%s</TH>", col->name); ++colIx; } for (; colIx < colCount; ++colIx) hPrintf("<TH>column%d</TH>", colIx+1); hPrintf("</TR>\n"); /* Print sample lines. */ struct bigBedInterval *iv; for (iv=ivList; iv != NULL; iv = iv->next) { bigBedIntervalToRow(iv, chromList->name, startBuf, endBuf, row, bbi->fieldCount); hPrintf("<TR>"); for (colIx=0; colIx<colCount; ++colIx) { writeHtmlCell(row[colIx]); } hPrintf("</TR>\n"); } hTableEnd(); } printTrackHtml(tdb); /* Clean up and go home. */ lmCleanup(&lm); bbiFileClose(&bbi); freeMem(fileName); hFreeConn(&conn); }