void doItBigWig(struct inInfo *in, struct bed *chiaList, struct bigWigValsOnChrom *chromVals, double *out1, double *out2) { struct bed *chromStart, *chromEnd, *chia; int chiaIx = 0; for (chromStart = chiaList; chromStart != NULL; chromStart = chromEnd) { chromEnd = bedListNextDifferentChrom(chromStart); if (bigWigValsOnChromFetchData(chromVals, chromStart->chrom, in->bbi)) { for (chia = chromStart; chia != chromEnd; chia = chia->next) { int blockStart = chia->chromStart; int blockSize = chia->blockSizes[0]; out1[chiaIx] = averageInRegion(chromVals, blockStart, blockSize); blockStart = chia->chromStart + chia->chromStarts[1]; blockSize = chia->blockSizes[1]; out2[chiaIx] = averageInRegion(chromVals, blockStart, blockSize); ++chiaIx; } } else { /* No data on this chrom, just output zero everywhere. */ for (chia = chromStart; chia != chromEnd; chia = chia->next) { out1[chiaIx] = out2[chiaIx] = 0; ++chiaIx; } } verboseDot(); } }
void doDots(int *pDotMod) /* Output a dot every now and then. */ { if (dots > 0) { if (--*pDotMod <= 0) { verboseDot(); *pDotMod = dots; } } }
static void itsaFillInTraverseArray(char *dna, bits32 *suffixArray, bits32 arraySize, bits32 *traverseArray, UBYTE *cursorArray) /* Fill in the bits that will help us traverse the array as if it were a tree. */ { int depth = 0; int stackSize = 4*1024; int *stack; AllocArray(stack, stackSize); bits32 i; for (i=0; i<arraySize; ++i) { char *curDna = dna + suffixArray[i]; int d; for (d = 0; d<depth; ++d) { bits32 prevIx = stack[d]; char *prevDna = dna + suffixArray[prevIx]; if (curDna[d] != prevDna[d]) { int stackIx; for (stackIx=d; stackIx<depth; ++stackIx) { prevIx = stack[stackIx]; traverseArray[prevIx] = i - prevIx; } depth = d; break; } } if (depth >= stackSize) errAbort("Stack overflow, depth >= %d", stackSize); stack[depth] = i; cursorArray[i] = depth; // May overflow. That's ok. We just care about indexed ones which are < 13. depth += 1; if ((i&0xFFFFF)==0xFFFFF) { verboseDot(); if ((i&0x3FFFFFF)==0) verbose(1, "traversed %lld%%\n", 100LL*i/arraySize); } } verbose(1, "finished traversal\n"); /* Do final clear out of stack */ int stackIx; for (stackIx=0; stackIx < depth; ++stackIx) { bits32 prevIx = stack[stackIx]; traverseArray[prevIx] = arraySize - prevIx; } }
static void itsaWriteMerged(struct chromInfo *chromList, DNA *allDna, bits32 *offsetArray, bits32 *listArray, bits32 *index13, char *output) /* Write out a file that contains a single splix that is the merger of * all of the individual splixes in list. As a side effect will replace * offsetArray with suffix array and listArray with traverse array */ { FILE *f = mustOpen(output, "w+"); /** Allocate header and fill out easy constant fields. */ struct itsaFileHeader *header; AllocVar(header); header->majorVersion = ITSA_MAJOR_VERSION; header->minorVersion = ITSA_MINOR_VERSION; /* Figure out sizes of names and sequence for each chromosome. */ struct chromInfo *chrom; bits32 chromNamesSize = 0; bits64 dnaDiskSize = 1; /* For initial zero. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) { chromNamesSize += strlen(chrom->name) + 1; dnaDiskSize += chrom->size + 1; /* Include separating zeroes. */ } /* Fill in most of rest of header fields */ header->chromCount = slCount(chromList); header->chromNamesSize = roundUpTo4(chromNamesSize); header->dnaDiskSize = roundUpTo4(dnaDiskSize); bits32 chromSizesSize = header->chromCount*sizeof(bits32); /* Write header. */ mustWrite(f, header, sizeof(*header)); /* Write chromosome names. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) mustWrite(f, chrom->name, strlen(chrom->name)+1); zeroPad(f, header->chromNamesSize - chromNamesSize); /* Write chromosome sizes. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) mustWrite(f, &chrom->size, sizeof(chrom->size)); int chromSizesSizePad = chromSizesSize - header->chromCount * sizeof(bits32); zeroPad(f, chromSizesSizePad); /* Write out chromosome DNA and zeros before, between, and after. */ mustWrite(f, allDna, dnaDiskSize); zeroPad(f, header->dnaDiskSize - dnaDiskSize); verboseTime(1, "Wrote %lld bases of DNA including zero padding", header->dnaDiskSize); /* Calculate and write suffix array. Convert index13 to index of array as opposed to index * of sequence. */ bits64 arraySize = 0; off_t suffixArrayFileOffset = ftello(f); int slotCount = itsaSlotCount; int slotIx; for (slotIx=0; slotIx < slotCount; ++slotIx) { int slotSize = finishAndWriteOneSlot(offsetArray, listArray, index13[slotIx], allDna, f); /* Convert index13 to hold the position in the suffix array where the first thing matching * the corresponding 13-base prefix is found. */ if (slotSize != 0) index13[slotIx] = arraySize+1; /* The +1 is so we can keep 0 for not found. */ else index13[slotIx] = 0; arraySize += slotSize; if ((slotIx % 200000 == 0) && slotIx != 0) { verboseDot(); if (slotIx % 10000000 == 0) verbose(1, "fine sort bucket %d of %d\n", slotIx, slotCount); } } verbose(1, "fine sort bucket %d of %d\n", slotCount, slotCount); verboseTime(1, "Wrote %lld suffix array positions", arraySize); /* Now we're done with the offsetArray and listArray buffers, so use them for the * next phase. */ bits32 *suffixArray = offsetArray; offsetArray = NULL; /* Help make some errors more obvious */ bits32 *traverseArray = listArray; listArray = NULL; /* Help make some errors more obvious */ /* Read the suffix array back from the file. */ fseeko(f, suffixArrayFileOffset, SEEK_SET); mustRead(f, suffixArray, arraySize*sizeof(bits32)); verboseTime(1, "Read suffix array back in"); /* Calculate traverse array and cursor arrays */ memset(traverseArray, 0, arraySize*sizeof(bits32)); UBYTE *cursorArray = needHugeMem(arraySize); itsaFillInTraverseArray(allDna, suffixArray, arraySize, traverseArray, cursorArray); verboseTime(1, "Filled in traverseArray"); /* Write out traverse array. */ mustWrite(f, traverseArray, arraySize*sizeof(bits32)); verboseTime(1, "Wrote out traverseArray"); /* Write out 13-mer index. */ mustWrite(f, index13, itsaSlotCount*sizeof(bits32)); verboseTime(1, "Wrote out index13"); /* Write out bits of cursor array corresponding to index. */ for (slotIx=0; slotIx<itsaSlotCount; ++slotIx) { bits32 indexPos = index13[slotIx]; if (indexPos == 0) fputc(0, f); else fputc(cursorArray[indexPos-1], f); } verboseTime(1, "Wrote out cursors13"); /* Update a few fields in header, and go back and write it out again with * the correct magic number to indicate it's complete. */ header->magic = ITSA_MAGIC; header->arraySize = arraySize; header->size = sizeof(*header) // header + header->chromNamesSize + // chromosome names + header->chromCount * sizeof(bits32) // chromosome sizes + header->dnaDiskSize // dna sequence + sizeof(bits32) * arraySize // suffix array + sizeof(bits32) * arraySize // traverse array + sizeof(bits32) * itsaSlotCount // index13 + sizeof(UBYTE) * itsaSlotCount; // cursors13 rewind(f); mustWrite(f, header, sizeof(*header)); carefulClose(&f); verbose(1, "Completed %s is %lld bytes\n", output, header->size); }
void hierSort(char *inputList) /* Do a hierarchical merge sort so we don't run out of system file handles */ { int level = 0; char thisName[256]; char nextName[256]; char sortName[256]; struct lineFile *thisLf = NULL; FILE *nextF = NULL; int sortCount = 0; FILE *sortF = NULL; int fileCount = 0; char *files[MAXFILES]; boolean more = FALSE; int block=0; char *line=NULL; safef(nextName, sizeof(nextName), "%s", inputList); do { block=0; safef(thisName, sizeof(thisName), "%s", nextName); safef(nextName, sizeof(nextName), "%sinputList%d-", tempDir, level+1); makeTempName(&tempName, nextName, ".tmp"); safef(nextName, sizeof(nextName), "%s", tempName.forCgi); thisLf = lineFileOpen(thisName,TRUE); if (!thisLf) errAbort("error lineFileOpen(%s) returned NULL\n",thisName); more = lineFileNext(thisLf, &line, NULL); while (more) { int i=0; fileCount = 0; while (more && fileCount < MAXFILES) { files[fileCount++]=cloneString(line); more = lineFileNext(thisLf, &line, NULL); } if (!more && block==0) { /* last level */ sortF = stdout; } else { if (!nextF) nextF = mustOpen(nextName,"w"); safef(sortName, sizeof(sortName), "%ssort%d-", tempDir, sortCount++); makeTempName(&tempName, sortName, ".tmp"); safef(sortName, sizeof(sortName), "%s", tempName.forCgi); fprintf(nextF, "%s\n", sortName); sortF = mustOpen(sortName,"w"); } chainMergeSort(fileCount, files, sortF, level); if (sortF != stdout) carefulClose(&sortF); for(i=0;i<fileCount;++i) freez(&files[i]); verboseDot(); verbose(2,"block=%d\n",block); ++block; } lineFileClose(&thisLf); if (nextF) carefulClose(&nextF); if (level > 0) { remove(thisName); } verbose(1,"\n"); verbose(2,"level=%d, block=%d\n",level,block); ++level; } while (block > 1); }
void averageFetchingEachChrom(struct bbiFile *bbi, struct bed **pBedList, int fieldCount, FILE *f, FILE *bedF) /* Do the averaging by sorting bedList by chromosome, and then processing each chromosome * at once. Faster for long bedLists. */ { /* Sort by chromosome. */ slSort(pBedList, bedCmpChrom); struct bigWigValsOnChrom *chromVals = bigWigValsOnChromNew(); struct bed *bed, *bedList, *nextChrom; verbose(1, "processing chromosomes"); for (bedList = *pBedList; bedList != NULL; bedList = nextChrom) { /* Figure out which chromosome we're working on, and the last bed using it. */ char *chrom = bedList->chrom; nextChrom = nextChromInList(bedList); verbose(2, "Processing %s\n", chrom); if (bigWigValsOnChromFetchData(chromVals, chrom, bbi)) { double *valBuf = chromVals->valBuf; Bits *covBuf = chromVals->covBuf; /* Loop through beds doing sums and outputting. */ for (bed = bedList; bed != nextChrom; bed = bed->next) { int size = 0, coverage = 0; double sum = 0.0; if (sampleAroundCenter > 0) { int center = (bed->chromStart + bed->chromEnd)/2; int left = center - (sampleAroundCenter/2); addBufIntervalInfo(valBuf, covBuf, left, left+sampleAroundCenter, &size, &coverage, &sum); } else { if (fieldCount < 12) { addBufIntervalInfo(valBuf, covBuf, bed->chromStart, bed->chromEnd, &size, &coverage, &sum); } else { int i; for (i=0; i<bed->blockCount; ++i) { int start = bed->chromStart + bed->chromStarts[i]; int end = start + bed->blockSizes[i]; addBufIntervalInfo(valBuf, covBuf, start, end, &size, &coverage, &sum); } } } /* Print out result, fudging mean to 0 if no coverage at all. */ double mean = 0; if (coverage > 0) mean = sum/coverage; fprintf(f, "%s\t%d\t%d\t%g\t%g\t%g\n", bed->name, size, coverage, sum, sum/size, mean); optionallyPrintBedPlus(bedF, bed, fieldCount, mean); } verboseDot(); } else { /* If no bigWig data on this chromosome, just output as if coverage is 0 */ for (bed = bedList; bed != nextChrom; bed = bed->next) { fprintf(f, "%s\t%d\t0\t0\t0\t0\n", bed->name, bedTotalBlockSize(bed)); optionallyPrintBedPlus(bedF, bed, fieldCount, 0); } } } verbose(1, "\n"); }
/* Core algorithm */ int cluster (FILE *fw, Edge **el, int n) { int block_id = 0; Block **bb; int allocated = po->SCH_BLOCK; AllocArray(bb, allocated); Edge *e; Block *b; struct dyStack *genes, *scores, *b_genes, *allincluster; int i, j, k, components; AllocArray(profile, cols); for (j = 0; j < cols; j++) AllocArray(profile[j], sigma); genes = dsNew(rows); scores = dsNew(rows); allincluster = dsNew(rows); bool *candidates; AllocArray(candidates, rows); e = *el; i = 0; while (i++ < n) { /*printf ("%d\n",i);*/ e = *el++; /* check if both genes already enumerated in previous blocks */ bool flag = TRUE; /* speed up the program if the rows bigger than 200 */ if (rows > 250) { if ( isInStack(allincluster,e->gene_one) && isInStack(allincluster,e->gene_two) ) flag = FALSE; else if ((po->IS_TFname)&&(e->gene_one!= TFindex)&&(e->gene_two!=TFindex)) flag = FALSE; else if ((po->IS_list)&&(!sublist[e->gene_one] || !sublist[e->gene_two])) flag =FALSE; } else { flag = check_seed(e, bb, block_id); if ((po->IS_TFname)&&(e->gene_one!= TFindex)&&(e->gene_two!=TFindex)) flag = FALSE; if ((po->IS_list)&&(!sublist[e->gene_one] || !sublist[e->gene_two])) flag = FALSE; } if (!flag) continue; for (j = 0; j < cols; j++) for (k = 0; k < sigma; k++) profile[j][k] = 0; /*you must allocate a struct if you want to use the pointers related to it*/ AllocVar(b); /*initial the b->score*/ b->score = MIN(2, e->score); /* initialize the stacks genes and scores */ int ii; dsClear(genes); dsClear(scores); for(ii = 0; ii < rows; ii ++) { dsPush(genes,-1); dsPush(scores,-1); } dsClear(genes); dsClear(scores); /*printf ("%d\t%d\n",e->gene_one,e->gene_two);*/ dsPush(genes, e->gene_one); dsPush(genes, e->gene_two); dsPush(scores, 1); dsPush(scores, b->score); /* branch-and-cut condition for seed expansion */ int cand_threshold = floor(po->COL_WIDTH * po->TOLERANCE); if (cand_threshold < 2) cand_threshold = 2; /* maintain a candidate list to avoid looping through all rows */ for (j = 0; j < rows; j++) candidates[j] = TRUE; candidates[e->gene_one] = candidates[e->gene_two] = FALSE; components = 2; /* expansion step, generate a bicluster without noise */ block_init(e, b, genes, scores, candidates, cand_threshold, &components, allincluster); /* track back to find the genes by which we get the best score*/ for(k = 0; k < components; k++) { /* printf ("******%d\t%d\n",dsItem(scores,k),b->score);*/ if ((dsItem(scores,k) == b->score)&&(dsItem(scores,k+1)!= b->score)) break; } components = k + 1; /*printf ("%d",components);*/ int ki; for (ki=0; ki < rows; ki++) candidates[ki] = TRUE; for (ki=0; ki < components - 1 ; ki++) { seed_update(arr_c[dsItem(genes,ki)]); candidates[dsItem(genes,ki)] = FALSE; } candidates[dsItem(genes,k)] = FALSE; genes->top = k ; int cnt = 0; bool *colcand; AllocArray(colcand, cols); for(ki = 0; ki < cols; ki++) colcand[ki] = FALSE; /* add columns satisfy the conservative r */ seed_current_modify(arr_c[dsItem(genes,k)], colcand, &cnt, components); /* add some new possible genes */ int m_cnt=0; continuous KL_score=0; discrete *sub_array; for ( ki = 0; ki < rows; ki++) { if (po->IS_list && !sublist[ki]) continue; m_cnt = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]); if ( candidates[ki] && (m_cnt >= floor(cnt* po->TOLERANCE)) ) { sub_array = get_intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[ki],m_cnt); KL_score = get_KL (sub_array, arr_c[ki], m_cnt, cols); /*printf ("%d\t%.2f\n",m_cnt,KL_score);*/ if (KL_score>=b->significance * po->TOLERANCE) { dsPush(genes,ki); components++; candidates[ki] = FALSE; } } } b->block_rows_pre = components; /* add genes that negative regulated to the consensus */ for ( ki = 0; ki < rows; ki++) { if (po->IS_list && !sublist[ki]) continue; m_cnt = reverse_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]); if ( candidates[ki] && (m_cnt >= floor(cnt * po->TOLERANCE)) ) { sub_array = get_intersect_reverse_row(colcand,arr_c[dsItem(genes,0)],arr_c[ki],m_cnt); KL_score = get_KL (sub_array, arr_c[ki], m_cnt, cols); if (KL_score>=b->significance * po->TOLERANCE) { dsPush(genes,ki); components++; candidates[ki] = FALSE; } } } free(colcand); /* save the current cluster*/ b_genes = dsNew(b->block_rows_pre); for (ki = 0; ki < b->block_rows_pre; ki++) dsPush(b_genes, dsItem(genes,ki)); /* store gene arrays inside block */ b->genes = dsNew(components); b->conds = dsNew(cols); scan_block(b_genes, b); if (b->block_cols == 0) continue; b->block_rows = components; b->score = b->score; /* b->score = b->block_rows * b->block_cols; */ dsClear(b->genes); for ( ki=0; ki < components; ki++) dsPush(b->genes,dsItem(genes,ki)); for(ki = 0; ki < components; ki++) if(!isInStack(allincluster, dsItem(genes,ki))) dsPush(allincluster,dsItem(genes,ki)); /*save the current block b to the block list bb so that we can sort the blocks by their score*/ bb[block_id++] = b; /* reaching the results number limit */ if (block_id == po->SCH_BLOCK) break; verboseDot(); } /* writes character to the current position in the standard output (stdout) and advances the internal file position indicator to the next position. * It is equivalent to putc(character,stdout).*/ putchar('\n'); /* free-up the candidate list */ free(candidates); free(allincluster); block_enrichment(fw, bb, block_id); return report_blocks(fw, bb, block_id); }
void pslSort2(char *outFile, char *tempDir) /* Do second step of sort - merge all sorted files in tempDir * to final. */ { char fileName[512]; struct slName *tmpList, *tmp; struct midFile *midList = NULL, *mid; int aliCount = 0; FILE *f = mustOpen(outFile, "w"); if (!nohead) pslWriteHead(f); tmpList = listDir(tempDir, "tmp*.psl"); if (tmpList == NULL) errAbort("No tmp*.psl files in %s\n", tempDir); for (tmp = tmpList; tmp != NULL; tmp = tmp->next) { sprintf(fileName, "%s/%s", tempDir, tmp->name); AllocVar(mid); mid->lf = pslFileOpen(fileName); slAddHead(&midList, mid); } verbose(1, "writing %s", outFile); fflush(stdout); /* Write out the lowest sorting line from mid list until done. */ for (;;) { struct midFile *bestMid = NULL; if ( (++aliCount & 0xffff) == 0) { verboseDot(); fflush(stdout); } for (mid = midList; mid != NULL; mid = mid->next) { if (mid->lf != NULL && mid->psl == NULL) { if ((mid->psl = nextPsl(mid->lf)) == NULL) lineFileClose(&mid->lf); } if (mid->psl != NULL) { if (bestMid == NULL || pslCmpQuery(&mid->psl, &bestMid->psl) < 0) bestMid = mid; } } if (bestMid == NULL) break; pslTabOut(bestMid->psl, f); pslFree(&bestMid->psl); } printf("\n"); fclose(f); /* The followint really shouldn't be necessary.... */ for (mid = midList; mid != NULL; mid = mid->next) lineFileClose(&mid->lf); printf("Cleaning up temp files\n"); for (tmp = tmpList; tmp != NULL; tmp = tmp->next) { sprintf(fileName, "%s/%s", tempDir, tmp->name); remove(fileName); } }
/* Core algorithm */ int cluster (FILE *fw, Edge **el, int n) { int block_id = 0; Block **bb; int allocated = po->SCH_BLOCK; AllocArray(bb, allocated); Edge *e; Block *b; struct dyStack *genes, *scores, *b_genes, *allincluster; int i, j, k, components; AllocArray(profile, cols); for (j = 0; j < cols; j++) AllocArray(profile[j], sigma); genes = dsNew(rows); scores = dsNew(rows); allincluster = dsNew(rows); bool *candidates; AllocArray(candidates, rows); e = *el; i = 0; while (i++ < n) { e = *el++; /*printf("a:%d b:%d score:%d\n",e->gene_one,e->gene_two,e->score);*/ /* check if both genes already enumerated in previous blocks */ bool flag = TRUE; /* speed up the program if the rows bigger than 200 */ if (rows > 200) { if ( isInStack(allincluster,e->gene_one) && isInStack(allincluster,e->gene_two) ) flag = FALSE; } else { flag = check_seed(e, bb, block_id); } if (!flag) continue; for (j = 0; j < cols; j++) for (k = 0; k < sigma; k++) profile[j][k] = 0; AllocVar(b); b->score = MIN(2, e->score); /* initialize the stacks genes and scores */ int ii; dsClear(genes); dsClear(scores); for(ii = 0; ii < rows; ii ++) { dsPush(genes,-1); dsPush(scores,-1); } dsClear(genes); dsClear(scores); dsPush(genes, e->gene_one); dsPush(genes, e->gene_two); dsPush(scores, 1); dsPush(scores, b->score); /* branch-and-cut condition for seed expansion */ int cand_threshold = floor(po->COL_WIDTH * po->TOLERANCE); if (cand_threshold < 2) cand_threshold = 2; /* maintain a candidate list to avoid looping through all rows */ for (j = 0; j < rows; j++) candidates[j] = TRUE; candidates[e->gene_one] = candidates[e->gene_two] = FALSE; components = 2; /* expansion step, generate a bicluster without noise */ block_init(e, b, genes, scores, candidates, cand_threshold, &components, allincluster); /* track back to find the best score that which genes makes it */ for(k = 0; k < components; k++) if ((dsItem(scores,k) == b->score)&&(dsItem(scores,k+1)!= b->score)) break; components = k + 1; int ki; for (ki=0; ki < rows; ki++) candidates[ki] = TRUE; for (ki=0; ki < components - 1 ; ki++) { seed_update(arr_c[dsItem(genes,ki)]); candidates[dsItem(genes,ki)] = FALSE; } candidates[dsItem(genes,k)] = FALSE; genes->top = k ; int cnt = 0; bool *colcand; AllocArray(colcand, cols); for(ki = 0; ki < cols; ki++) colcand[ki] = FALSE; /* add columns satisfy the conservative r */ seed_current_modify(arr_c[dsItem(genes,k)], colcand, &cnt, components); /* add some new possible genes */ int m_cnt; for ( ki = 0; ki < rows; ki++) { m_cnt = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]); if ( candidates[ki] && (m_cnt >= floor(cnt* po->TOLERANCE)) ) { dsPush(genes,ki); components++; candidates[ki] = FALSE; } } b->block_rows_pre = components; /* add genes that negative regulated to the consensus */ for ( ki = 0; ki < rows; ki++) { m_cnt = reverse_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]); if ( candidates[ki] && (m_cnt >= floor(cnt * po->TOLERANCE)) ) { dsPush(genes,ki); components++; candidates[ki] = FALSE; } } free(colcand); /* save the current cluster*/ b_genes = dsNew(b->block_rows_pre); for (ki = 0; ki < b->block_rows_pre; ki++) dsPush(b_genes, dsItem(genes,ki)); /* store gene arrays inside block */ b->genes = dsNew(components); b->conds = dsNew(cols); scan_block(b_genes, b); if (b->block_cols == 0) continue; b->block_rows = components; b->score = b->block_rows * b->block_cols; dsClear(b->genes); for ( ki=0; ki < components; ki++) dsPush(b->genes,dsItem(genes,ki)); for(ki = 0; ki < components; ki++) if(!isInStack(allincluster, dsItem(genes,ki))) dsPush(allincluster,dsItem(genes,ki)); bb[block_id++] = b; /* reaching the results number limit */ if (block_id == po->SCH_BLOCK) break; verboseDot(); } putchar('\n'); /* free-up the candidate list */ free(candidates); free(allincluster); return report_blocks(fw, bb, block_id); }
void bigWigMerge(int inCount, char *inFiles[], char *outFile) /* bigWigMerge - Merge together multiple bigWigs into a single one.. */ { /* Make a list of open bigWig files. */ struct bbiFile *inFile, *inFileList = NULL; int i; for (i=0; i<inCount; ++i) { if (clInList) { addWigsInFile(inFiles[i], &inFileList); } else { inFile = bigWigFileOpen(inFiles[i]); slAddTail(&inFileList, inFile); } } FILE *f = mustOpen(outFile, "w"); struct bbiChromInfo *chrom, *chromList = getAllChroms(inFileList); verbose(1, "Got %d chromosomes from %d bigWigs\nProcessing", slCount(chromList), slCount(inFileList)); double *mergeBuf = NULL; int mergeBufSize = 0; for (chrom = chromList; chrom != NULL; chrom = chrom->next) { struct lm *lm = lmInit(0); /* Make sure merge buffer is big enough. */ int chromSize = chrom->size; verboseDot(); verbose(2, "Processing %s (%d bases)\n", chrom->name, chromSize); if (chromSize > mergeBufSize) { mergeBufSize = chromSize; freeMem(mergeBuf); mergeBuf = needHugeMem(mergeBufSize * sizeof(double)); } int i; for (i=0; i<chromSize; ++i) mergeBuf[i] = 0.0; /* Loop through each input file grabbing data and merging it in. */ for (inFile = inFileList; inFile != NULL; inFile = inFile->next) { struct bbiInterval *ivList = bigWigIntervalQuery(inFile, chrom->name, 0, chromSize, lm); verbose(3, "Got %d intervals in %s\n", slCount(ivList), inFile->fileName); struct bbiInterval *iv; for (iv = ivList; iv != NULL; iv = iv->next) { double val = iv->val; if (val > clClip) val = clClip; int end = iv->end; for (i=iv->start; i < end; ++i) mergeBuf[i] += val; } } /* Output each range of same values as a bedGraph item */ int sameCount; for (i=0; i<chromSize; i += sameCount) { sameCount = doublesTheSame(mergeBuf+i, chromSize-i); double val = mergeBuf[i] + clAdjust; if (val > clThreshold) fprintf(f, "%s\t%d\t%d\t%g\n", chrom->name, i, i + sameCount, val); } lmCleanup(&lm); } verbose(1, "\n"); carefulClose(&f); }