Beispiel #1
0
void doItBigWig(struct inInfo *in, struct bed *chiaList, struct bigWigValsOnChrom *chromVals,
	double *out1, double *out2)
{
struct bed *chromStart, *chromEnd, *chia;
int chiaIx = 0;
for (chromStart = chiaList; chromStart != NULL; chromStart = chromEnd)
    {
    chromEnd = bedListNextDifferentChrom(chromStart);
    if (bigWigValsOnChromFetchData(chromVals, chromStart->chrom, in->bbi))
        {
	for (chia = chromStart; chia != chromEnd; chia = chia->next)
	    {
	    int blockStart = chia->chromStart;
	    int blockSize = chia->blockSizes[0];
	    out1[chiaIx] = averageInRegion(chromVals, blockStart, blockSize);

	    blockStart = chia->chromStart + chia->chromStarts[1];
	    blockSize = chia->blockSizes[1];
	    out2[chiaIx] = averageInRegion(chromVals, blockStart, blockSize);

	    ++chiaIx;
	    }
	}
    else
	{
	/* No data on this chrom, just output zero everywhere. */
	for (chia = chromStart; chia != chromEnd; chia = chia->next)
	    {
	    out1[chiaIx] = out2[chiaIx] = 0;
	    ++chiaIx;
	    }
	}
    verboseDot();
    }
}
void doDots(int *pDotMod)
/* Output a dot every now and then. */
{
if (dots > 0)
    {
    if (--*pDotMod <= 0)
	{
        verboseDot();
	*pDotMod = dots;
	}
    }
}
static void itsaFillInTraverseArray(char *dna, bits32 *suffixArray, bits32 arraySize, 
	bits32 *traverseArray, UBYTE *cursorArray)
/* Fill in the bits that will help us traverse the array as if it were a tree. */
{
int depth = 0;
int stackSize = 4*1024;
int *stack;
AllocArray(stack, stackSize);
bits32 i;
for (i=0; i<arraySize; ++i)
    {
    char *curDna = dna + suffixArray[i];
    int d;
    for (d = 0; d<depth; ++d)
        {
	bits32 prevIx = stack[d];
	char *prevDna = dna + suffixArray[prevIx];
	if (curDna[d] != prevDna[d])
	    {
	    int stackIx;
	    for (stackIx=d; stackIx<depth; ++stackIx)
	        {
		prevIx = stack[stackIx];
		traverseArray[prevIx] = i - prevIx;
		}
	    depth = d;
	    break;
	    }
	}
    if (depth >= stackSize)
        errAbort("Stack overflow, depth >= %d", stackSize);
    stack[depth] = i;
    cursorArray[i] = depth;  // May overflow. That's ok. We just care about indexed ones which are < 13.
    depth += 1;
    if ((i&0xFFFFF)==0xFFFFF)
        {
	verboseDot();
	if ((i&0x3FFFFFF)==0)
	    verbose(1, "traversed %lld%%\n", 100LL*i/arraySize);
	}
    }
verbose(1, "finished traversal\n");

/* Do final clear out of stack */
int stackIx;
for (stackIx=0; stackIx < depth; ++stackIx)
    {
    bits32 prevIx = stack[stackIx];
    traverseArray[prevIx] = arraySize - prevIx;
    }
}
static void itsaWriteMerged(struct chromInfo *chromList, DNA *allDna,
	bits32 *offsetArray, bits32 *listArray, bits32 *index13, char *output)
/* Write out a file that contains a single splix that is the merger of
 * all of the individual splixes in list.   As a side effect will replace
 * offsetArray with suffix array and listArray with traverse array */
{
FILE *f = mustOpen(output, "w+");

/** Allocate header and fill out easy constant fields. */
struct itsaFileHeader *header;
AllocVar(header);
header->majorVersion = ITSA_MAJOR_VERSION;
header->minorVersion = ITSA_MINOR_VERSION;

/* Figure out sizes of names and sequence for each chromosome. */
struct chromInfo *chrom;
bits32 chromNamesSize = 0;
bits64 dnaDiskSize = 1;	/* For initial zero. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
   {
   chromNamesSize += strlen(chrom->name) + 1;
   dnaDiskSize += chrom->size + 1;  /* Include separating zeroes. */
   }

/* Fill in  most of rest of header fields */
header->chromCount = slCount(chromList);
header->chromNamesSize = roundUpTo4(chromNamesSize);
header->dnaDiskSize = roundUpTo4(dnaDiskSize);
bits32 chromSizesSize = header->chromCount*sizeof(bits32);

/* Write header. */
mustWrite(f, header, sizeof(*header));

/* Write chromosome names. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    mustWrite(f, chrom->name, strlen(chrom->name)+1);
zeroPad(f, header->chromNamesSize - chromNamesSize);

/* Write chromosome sizes. */
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    mustWrite(f, &chrom->size, sizeof(chrom->size));
int chromSizesSizePad = chromSizesSize - header->chromCount * sizeof(bits32);
zeroPad(f, chromSizesSizePad);

/* Write out chromosome DNA and zeros before, between, and after. */
mustWrite(f, allDna, dnaDiskSize);
zeroPad(f, header->dnaDiskSize - dnaDiskSize);
verboseTime(1, "Wrote %lld bases of DNA including zero padding", header->dnaDiskSize);

/* Calculate and write suffix array. Convert index13 to index of array as opposed to index
 * of sequence. */
bits64 arraySize = 0;
off_t suffixArrayFileOffset = ftello(f);
int slotCount = itsaSlotCount;
int slotIx;
for (slotIx=0; slotIx < slotCount; ++slotIx)
    {
    int slotSize = finishAndWriteOneSlot(offsetArray, listArray, index13[slotIx], allDna, f);
    /* Convert index13 to hold the position in the suffix array where the first thing matching
     * the corresponding 13-base prefix is found. */
    if (slotSize != 0)
        index13[slotIx] = arraySize+1;	/* The +1 is so we can keep 0 for not found. */
    else
        index13[slotIx] = 0;
    arraySize += slotSize;
    if ((slotIx % 200000 == 0) && slotIx != 0)
	{
	verboseDot();
	if (slotIx % 10000000 == 0)
	    verbose(1, "fine sort bucket %d of %d\n", slotIx, slotCount);
	}
    }
verbose(1, "fine sort bucket %d of %d\n", slotCount, slotCount);
verboseTime(1, "Wrote %lld suffix array positions", arraySize);

/* Now we're done with the offsetArray and listArray buffers, so use them for the
 * next phase. */
bits32 *suffixArray = offsetArray;
offsetArray = NULL;	/* Help make some errors more obvious */
bits32 *traverseArray = listArray;
listArray = NULL;	/* Help make some errors more obvious */

/* Read the suffix array back from the file. */
fseeko(f, suffixArrayFileOffset, SEEK_SET);
mustRead(f, suffixArray, arraySize*sizeof(bits32));
verboseTime(1, "Read suffix array back in");

/* Calculate traverse array and cursor arrays */
memset(traverseArray, 0, arraySize*sizeof(bits32));
UBYTE *cursorArray = needHugeMem(arraySize);
itsaFillInTraverseArray(allDna, suffixArray, arraySize, traverseArray, cursorArray);
verboseTime(1, "Filled in traverseArray");

/* Write out traverse array. */
mustWrite(f, traverseArray, arraySize*sizeof(bits32));
verboseTime(1, "Wrote out traverseArray");

/* Write out 13-mer index. */
mustWrite(f, index13, itsaSlotCount*sizeof(bits32));
verboseTime(1, "Wrote out index13");

/* Write out bits of cursor array corresponding to index. */
for (slotIx=0; slotIx<itsaSlotCount; ++slotIx)
    {
    bits32 indexPos = index13[slotIx];
    if (indexPos == 0)
       fputc(0, f);
    else
       fputc(cursorArray[indexPos-1], f);
    }
verboseTime(1, "Wrote out cursors13");

/* Update a few fields in header, and go back and write it out again with
 * the correct magic number to indicate it's complete. */
header->magic = ITSA_MAGIC;
header->arraySize = arraySize;
header->size = sizeof(*header) 			// header
	+ header->chromNamesSize + 		// chromosome names
	+ header->chromCount * sizeof(bits32)	// chromosome sizes
	+ header->dnaDiskSize 			// dna sequence
	+ sizeof(bits32) * arraySize	 	// suffix array
	+ sizeof(bits32) * arraySize   		// traverse array
	+ sizeof(bits32) * itsaSlotCount 	// index13
	+ sizeof(UBYTE) * itsaSlotCount;	// cursors13

rewind(f);
mustWrite(f, header, sizeof(*header));
carefulClose(&f);
verbose(1, "Completed %s is %lld bytes\n", output, header->size);
}
Beispiel #5
0
void hierSort(char *inputList)
/* Do a hierarchical merge sort so we don't run out of system file handles */
{
int level = 0;
char thisName[256]; 
char nextName[256]; 
char sortName[256]; 
struct lineFile *thisLf = NULL;
FILE *nextF = NULL;
int sortCount = 0;
FILE *sortF = NULL;
int fileCount = 0; 
char *files[MAXFILES];
boolean more = FALSE;
int block=0;
char *line=NULL;
safef(nextName, sizeof(nextName), "%s", inputList);
do
    {
    block=0;
    safef(thisName, sizeof(thisName), "%s", nextName);
    safef(nextName, sizeof(nextName), "%sinputList%d-", tempDir, level+1);
    makeTempName(&tempName, nextName, ".tmp");
    safef(nextName, sizeof(nextName), "%s", tempName.forCgi);
    thisLf = lineFileOpen(thisName,TRUE);
    if (!thisLf)
	errAbort("error  lineFileOpen(%s) returned NULL\n",thisName);
    more = lineFileNext(thisLf, &line, NULL);
    while (more)
	{
	int i=0;
	fileCount = 0;
	while (more && fileCount < MAXFILES) 
	    {
	    files[fileCount++]=cloneString(line);
	    more = lineFileNext(thisLf, &line, NULL);
    	    }
	if (!more && block==0)
	    { /* last level */
	    sortF = stdout;
	    }
	else
	    {
	    if (!nextF)
		nextF = mustOpen(nextName,"w");
	    safef(sortName, sizeof(sortName), "%ssort%d-", tempDir, sortCount++);
	    makeTempName(&tempName, sortName, ".tmp");
	    safef(sortName, sizeof(sortName), "%s", tempName.forCgi);
	    fprintf(nextF, "%s\n", sortName);
	    sortF = mustOpen(sortName,"w");
	    }
    	chainMergeSort(fileCount, files, sortF, level);
	if (sortF != stdout)
    	    carefulClose(&sortF);
	for(i=0;i<fileCount;++i)	    
    	    freez(&files[i]);
	verboseDot();
	verbose(2,"block=%d\n",block);
	++block;
	}
    lineFileClose(&thisLf);
    if (nextF)
	carefulClose(&nextF);
    if (level > 0)
	{
	remove(thisName);
	}
    verbose(1,"\n");
    verbose(2,"level=%d, block=%d\n",level,block);
    ++level;
    } while (block > 1);
}
Beispiel #6
0
void averageFetchingEachChrom(struct bbiFile *bbi, struct bed **pBedList, int fieldCount, 
	FILE *f, FILE *bedF)
/* Do the averaging by sorting bedList by chromosome, and then processing each chromosome
 * at once. Faster for long bedLists. */
{
/* Sort by chromosome. */
slSort(pBedList, bedCmpChrom);

struct bigWigValsOnChrom *chromVals = bigWigValsOnChromNew();

struct bed *bed, *bedList, *nextChrom;
verbose(1, "processing chromosomes");
for (bedList = *pBedList; bedList != NULL; bedList = nextChrom)
    {
    /* Figure out which chromosome we're working on, and the last bed using it. */
    char *chrom = bedList->chrom;
    nextChrom = nextChromInList(bedList);
    verbose(2, "Processing %s\n", chrom);

    if (bigWigValsOnChromFetchData(chromVals, chrom, bbi))
	{
	double *valBuf = chromVals->valBuf;
	Bits *covBuf = chromVals->covBuf;

	/* Loop through beds doing sums and outputting. */
	for (bed = bedList; bed != nextChrom; bed = bed->next)
	    {
	    int size = 0, coverage = 0;
	    double sum = 0.0;
	    if (sampleAroundCenter > 0)
		{
		int center = (bed->chromStart + bed->chromEnd)/2;
		int left = center - (sampleAroundCenter/2);
		addBufIntervalInfo(valBuf, covBuf, left, left+sampleAroundCenter,
		    &size, &coverage, &sum);
		}
	    else
		{
		if (fieldCount < 12)
		    {
		    addBufIntervalInfo(valBuf, covBuf, bed->chromStart, bed->chromEnd,
			&size, &coverage, &sum);
		    }
		else
		    {
		    int i;
		    for (i=0; i<bed->blockCount; ++i)
			{
			int start = bed->chromStart + bed->chromStarts[i];
			int end = start + bed->blockSizes[i];
			addBufIntervalInfo(valBuf, covBuf, start, end, &size, &coverage, &sum);
			}
		    }
		}

	    /* Print out result, fudging mean to 0 if no coverage at all. */
	    double mean = 0;
	    if (coverage > 0)
		 mean = sum/coverage;
	    fprintf(f, "%s\t%d\t%d\t%g\t%g\t%g\n", bed->name, size, coverage, sum, sum/size, mean);
	    optionallyPrintBedPlus(bedF, bed, fieldCount, mean);
	    }
	verboseDot();
	}
    else
        {
	/* If no bigWig data on this chromosome, just output as if coverage is 0 */
	for (bed = bedList; bed != nextChrom; bed = bed->next)
	    {
	    fprintf(f, "%s\t%d\t0\t0\t0\t0\n", bed->name, bedTotalBlockSize(bed));
	    optionallyPrintBedPlus(bedF, bed, fieldCount, 0);
	    }
	}
    }
verbose(1, "\n");
}
Beispiel #7
0
/* Core algorithm */
int cluster (FILE *fw, Edge **el, int n)
{
	int block_id = 0;
	Block **bb;
	int allocated = po->SCH_BLOCK;
	AllocArray(bb, allocated);

	Edge *e;
	Block *b;
	struct dyStack *genes, *scores, *b_genes, *allincluster;
	
	int i, j, k, components;
	AllocArray(profile, cols);
	for (j = 0; j < cols; j++) 
		AllocArray(profile[j], sigma);

	genes = dsNew(rows);
	scores = dsNew(rows);
	allincluster = dsNew(rows);

    

	bool *candidates;
	AllocArray(candidates, rows);

	e = *el; 
	i = 0;
	while (i++ < n)
	{	
		/*printf ("%d\n",i);*/
		e = *el++;
		/* check if both genes already enumerated in previous blocks */
		bool flag = TRUE;
		/* speed up the program if the rows bigger than 200 */
	        if (rows > 250)
		{ 
			if ( isInStack(allincluster,e->gene_one) && isInStack(allincluster,e->gene_two) )
				flag = FALSE;
			else if ((po->IS_TFname)&&(e->gene_one!= TFindex)&&(e->gene_two!=TFindex))
				flag = FALSE;
			else if ((po->IS_list)&&(!sublist[e->gene_one] || !sublist[e->gene_two]))
				flag =FALSE;
		}
		else   
		{
			flag = check_seed(e, bb, block_id);
			if ((po->IS_TFname)&&(e->gene_one!= TFindex)&&(e->gene_two!=TFindex))
				flag = FALSE;
			if ((po->IS_list)&&(!sublist[e->gene_one] || !sublist[e->gene_two]))
				flag = FALSE;
		}
		if (!flag) continue;

		for (j = 0; j < cols; j++)
			for (k = 0; k < sigma; k++) 
				profile[j][k] = 0;

		/*you must allocate a struct if you want to use the pointers related to it*/
		AllocVar(b);
		/*initial the b->score*/
                b->score = MIN(2, e->score);
	
		/* initialize the stacks genes and scores */		
		int ii;		
		dsClear(genes);
		dsClear(scores);		
		for(ii = 0; ii < rows; ii ++)
		{
			dsPush(genes,-1);
			dsPush(scores,-1);
		}		
		dsClear(genes);
		dsClear(scores);
		
		/*printf ("%d\t%d\n",e->gene_one,e->gene_two);*/
		dsPush(genes, e->gene_one);
		dsPush(genes, e->gene_two);
		dsPush(scores, 1);
		dsPush(scores, b->score);

		/* branch-and-cut condition for seed expansion */
		int cand_threshold = floor(po->COL_WIDTH * po->TOLERANCE);
                if (cand_threshold < 2) 
			cand_threshold = 2;

		/* maintain a candidate list to avoid looping through all rows */		
		for (j = 0; j < rows; j++) 
			candidates[j] = TRUE;
		candidates[e->gene_one] = candidates[e->gene_two] = FALSE;
		components = 2;

		/* expansion step, generate a bicluster without noise */
		block_init(e, b, genes, scores, candidates, cand_threshold, &components, allincluster);

		/* track back to find the genes by which we get the best score*/
		for(k = 0; k < components; k++)
		{
/*			printf ("******%d\t%d\n",dsItem(scores,k),b->score);*/
			if ((dsItem(scores,k) == b->score)&&(dsItem(scores,k+1)!= b->score)) break;
		}
		components = k + 1;
		/*printf ("%d",components);*/
		int ki;
		for (ki=0; ki < rows; ki++)
			candidates[ki] = TRUE;

		for (ki=0; ki < components - 1 ; ki++)
		{
			seed_update(arr_c[dsItem(genes,ki)]);
			candidates[dsItem(genes,ki)] = FALSE;
		}
		candidates[dsItem(genes,k)] = FALSE;
		genes->top = k ;
		int cnt = 0;
		bool *colcand;
		AllocArray(colcand, cols);
		for(ki = 0; ki < cols; ki++) 
			colcand[ki] = FALSE;             
    
		/* add columns satisfy the conservative r */ 
		seed_current_modify(arr_c[dsItem(genes,k)], colcand, &cnt, components);
		
		/* add some new possible genes */
		int m_cnt=0;
		continuous KL_score=0;
		discrete *sub_array;
		for ( ki = 0; ki < rows; ki++)
		{
			if (po->IS_list && !sublist[ki]) continue;
			m_cnt = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]);
			if ( candidates[ki] && (m_cnt >= floor(cnt* po->TOLERANCE)) )
			{
				sub_array = get_intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[ki],m_cnt);
				KL_score = get_KL (sub_array, arr_c[ki], m_cnt, cols);
				/*printf ("%d\t%.2f\n",m_cnt,KL_score);*/
				if (KL_score>=b->significance * po->TOLERANCE)
				{
					dsPush(genes,ki);
					components++;
					candidates[ki] = FALSE;
				}
			}
		}

                b->block_rows_pre = components;
		
		/* add genes that negative regulated to the consensus */
		for ( ki = 0; ki < rows; ki++)
		{
			if (po->IS_list && !sublist[ki]) continue;
			m_cnt = reverse_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]);
			if ( candidates[ki] && (m_cnt >= floor(cnt * po->TOLERANCE)) )
			{
				sub_array = get_intersect_reverse_row(colcand,arr_c[dsItem(genes,0)],arr_c[ki],m_cnt);
				KL_score = get_KL (sub_array, arr_c[ki], m_cnt, cols);
				if (KL_score>=b->significance * po->TOLERANCE)
				{
					dsPush(genes,ki);
					components++;
					candidates[ki] = FALSE;
				}
			}
		}
		free(colcand);

		/* save the current cluster*/
		b_genes = dsNew(b->block_rows_pre);
		for (ki = 0; ki < b->block_rows_pre; ki++)
			dsPush(b_genes, dsItem(genes,ki));

		/* store gene arrays inside block */
		b->genes = dsNew(components);
		b->conds = dsNew(cols);
	
		scan_block(b_genes, b);
		if (b->block_cols == 0) continue;
		b->block_rows = components;

		b->score = b->score;
		/*	b->score = b->block_rows * b->block_cols;		*/

		dsClear(b->genes);
		for ( ki=0; ki < components; ki++)
			dsPush(b->genes,dsItem(genes,ki));
		for(ki = 0; ki < components; ki++)
			if(!isInStack(allincluster, dsItem(genes,ki))) 
				dsPush(allincluster,dsItem(genes,ki));	
		/*save the current block b to the block list bb so that we can sort the blocks by their score*/
		bb[block_id++] = b;

		/* reaching the results number limit */
		if (block_id == po->SCH_BLOCK) break;
		verboseDot();	
	}
	/* writes character to the current position in the standard output (stdout) and advances the internal file position indicator to the next position.
	 * It is equivalent to putc(character,stdout).*/
	putchar('\n');
	/* free-up the candidate list */
	free(candidates);
	free(allincluster);
	block_enrichment(fw, bb, block_id);
	return report_blocks(fw, bb, block_id);
}
Beispiel #8
0
void pslSort2(char *outFile, char *tempDir)
/* Do second step of sort - merge all sorted files in tempDir
 * to final. */
{
char fileName[512];
struct slName *tmpList, *tmp;
struct midFile *midList = NULL, *mid;
int aliCount = 0;
FILE *f = mustOpen(outFile, "w");


if (!nohead)
    pslWriteHead(f);
tmpList = listDir(tempDir, "tmp*.psl");
if (tmpList == NULL)
    errAbort("No tmp*.psl files in %s\n", tempDir);
for (tmp = tmpList; tmp != NULL; tmp = tmp->next)
    {
    sprintf(fileName, "%s/%s", tempDir, tmp->name);
    AllocVar(mid);
    mid->lf = pslFileOpen(fileName);
    slAddHead(&midList, mid);
    }
verbose(1, "writing %s", outFile);
fflush(stdout);
/* Write out the lowest sorting line from mid list until done. */
for (;;)
    {
    struct midFile *bestMid = NULL;
    if ( (++aliCount & 0xffff) == 0)
	{
	verboseDot();
	fflush(stdout);
	}
    for (mid = midList; mid != NULL; mid = mid->next)
	{
	if (mid->lf != NULL && mid->psl == NULL)
	    {
	    if ((mid->psl = nextPsl(mid->lf)) == NULL)
		lineFileClose(&mid->lf);
	    }
	if (mid->psl != NULL)
	    {
	    if (bestMid == NULL || pslCmpQuery(&mid->psl, &bestMid->psl) < 0)
		bestMid = mid;
	    }
	}
    if (bestMid == NULL)
	break;
    pslTabOut(bestMid->psl, f);
    pslFree(&bestMid->psl);
    }
printf("\n");
fclose(f);

/* The followint really shouldn't be necessary.... */
for (mid = midList; mid != NULL; mid = mid->next)
    lineFileClose(&mid->lf);

printf("Cleaning up temp files\n");
for (tmp = tmpList; tmp != NULL; tmp = tmp->next)
    {
    sprintf(fileName, "%s/%s", tempDir, tmp->name);
    remove(fileName);
    }
}
Beispiel #9
0
/* Core algorithm */
int cluster (FILE *fw, Edge **el, int n)
{
	int block_id = 0;
	Block **bb;
	int allocated = po->SCH_BLOCK;
	AllocArray(bb, allocated);

	Edge *e;
	Block *b;
	struct dyStack *genes, *scores, *b_genes, *allincluster;
	
	int i, j, k, components;

	AllocArray(profile, cols);
	for (j = 0; j < cols; j++) AllocArray(profile[j], sigma);

	genes = dsNew(rows);
	scores = dsNew(rows);
	allincluster = dsNew(rows);
    
	bool *candidates;
	AllocArray(candidates, rows);

	e = *el; i = 0;
	while (i++ < n)
	{	
		e = *el++;
        /*printf("a:%d b:%d score:%d\n",e->gene_one,e->gene_two,e->score);*/

		/* check if both genes already enumerated in previous blocks */
		bool flag = TRUE;
		/* speed up the program if the rows bigger than 200 */
	        if (rows > 200)
		{ 
			if ( isInStack(allincluster,e->gene_one) && isInStack(allincluster,e->gene_two) )
			flag = FALSE;
		}
		else   
                    {
		     flag = check_seed(e, bb, block_id);
		    }
		if (!flag) continue;

		for (j = 0; j < cols; j++)
			for (k = 0; k < sigma; k++) profile[j][k] = 0;

		AllocVar(b);
                b->score = MIN(2, e->score);
	
		/* initialize the stacks genes and scores */		
		int ii;		
		dsClear(genes);
		dsClear(scores);		
		for(ii = 0; ii < rows; ii ++)
		{
			dsPush(genes,-1);
			dsPush(scores,-1);
		}		
		dsClear(genes);
		dsClear(scores);
		
		dsPush(genes, e->gene_one);
		dsPush(genes, e->gene_two);
		dsPush(scores, 1);
		dsPush(scores, b->score);

		/* branch-and-cut condition for seed expansion */
		int cand_threshold = floor(po->COL_WIDTH * po->TOLERANCE);
                if (cand_threshold < 2) cand_threshold = 2;

		/* maintain a candidate list to avoid looping through all rows */		
		for (j = 0; j < rows; j++) candidates[j] = TRUE;
		candidates[e->gene_one] = candidates[e->gene_two] = FALSE;
		
		components = 2;

		/* expansion step, generate a bicluster without noise */
		block_init(e, b, genes, scores, candidates, cand_threshold, &components, allincluster);

		/* track back to find the best score that which genes makes it */
		for(k = 0; k < components; k++)
			if ((dsItem(scores,k) == b->score)&&(dsItem(scores,k+1)!= b->score)) break;
		components = k + 1;

		int ki;
		for (ki=0; ki < rows; ki++)
		candidates[ki] = TRUE;

		for (ki=0; ki < components - 1 ; ki++)
		{
			seed_update(arr_c[dsItem(genes,ki)]);
			candidates[dsItem(genes,ki)] = FALSE;
		}
		candidates[dsItem(genes,k)] = FALSE;
		genes->top = k ;
		int cnt = 0;
		bool *colcand;
		AllocArray(colcand, cols);
		for(ki = 0; ki < cols; ki++) colcand[ki] = FALSE;             
    
        /* add columns satisfy the conservative r */ 
		seed_current_modify(arr_c[dsItem(genes,k)], colcand, &cnt, components);
		
        /* add some new possible genes */
		int m_cnt;
		for ( ki = 0; ki < rows; ki++)
		{
			m_cnt = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]);
			if ( candidates[ki] && (m_cnt >= floor(cnt* po->TOLERANCE)) )
			{
				dsPush(genes,ki);
				components++;
				candidates[ki] = FALSE;
			}
		}
                b->block_rows_pre = components;
		
        /* add genes that negative regulated to the consensus */
		for ( ki = 0; ki < rows; ki++)
		{
			m_cnt = reverse_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]);
			if ( candidates[ki] && (m_cnt >= floor(cnt * po->TOLERANCE)) )
			{
				dsPush(genes,ki);
				components++;
				candidates[ki] = FALSE;
			}
		}
		free(colcand);

		/* save the current cluster*/
		b_genes = dsNew(b->block_rows_pre);
		for (ki = 0; ki < b->block_rows_pre; ki++)
			dsPush(b_genes, dsItem(genes,ki));

		/* store gene arrays inside block */
		b->genes = dsNew(components);
		b->conds = dsNew(cols);
	
		scan_block(b_genes, b);
		if (b->block_cols == 0) continue;
		b->block_rows = components;
                b->score = b->block_rows * b->block_cols;		
		dsClear(b->genes);
		for ( ki=0; ki < components; ki++)
			dsPush(b->genes,dsItem(genes,ki));
		for(ki = 0; ki < components; ki++)
			if(!isInStack(allincluster, dsItem(genes,ki))) dsPush(allincluster,dsItem(genes,ki));	

		bb[block_id++] = b;

        /* reaching the results number limit */
		if (block_id == po->SCH_BLOCK) break;
		verboseDot();	

	}

	putchar('\n');
    /* free-up the candidate list */
	free(candidates);
	free(allincluster);

	return report_blocks(fw, bb, block_id);
}
Beispiel #10
0
void bigWigMerge(int inCount, char *inFiles[], char *outFile)
/* bigWigMerge - Merge together multiple bigWigs into a single one.. */
{
/* Make a list of open bigWig files. */
struct bbiFile *inFile, *inFileList = NULL;
int i;
for (i=0; i<inCount; ++i)
    {
    if (clInList)
        {
	addWigsInFile(inFiles[i], &inFileList);
	}
    else
	{
	inFile = bigWigFileOpen(inFiles[i]);
	slAddTail(&inFileList, inFile);
	}
    }

FILE *f = mustOpen(outFile, "w");

struct bbiChromInfo *chrom, *chromList = getAllChroms(inFileList);
verbose(1, "Got %d chromosomes from %d bigWigs\nProcessing", 
	slCount(chromList), slCount(inFileList));
double *mergeBuf = NULL;
int mergeBufSize = 0;
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    struct lm *lm = lmInit(0);

    /* Make sure merge buffer is big enough. */
    int chromSize = chrom->size;
    verboseDot();
    verbose(2, "Processing %s (%d bases)\n", chrom->name, chromSize);
    if (chromSize > mergeBufSize)
        {
	mergeBufSize = chromSize;
	freeMem(mergeBuf);
	mergeBuf = needHugeMem(mergeBufSize * sizeof(double));
	}
    int i;
    for (i=0; i<chromSize; ++i)
        mergeBuf[i] = 0.0;

    /* Loop through each input file grabbing data and merging it in. */
    for (inFile = inFileList; inFile != NULL; inFile = inFile->next)
        {
	struct bbiInterval *ivList = bigWigIntervalQuery(inFile, chrom->name, 0, chromSize, lm);
	verbose(3, "Got %d intervals in %s\n", slCount(ivList), inFile->fileName);
	struct bbiInterval *iv;
	for (iv = ivList; iv != NULL; iv = iv->next)
	    {
	    double val = iv->val;
	    if (val > clClip)
	        val = clClip;
	    int end = iv->end;
	    for (i=iv->start; i < end; ++i)
	         mergeBuf[i] += val;
	    }
	}


    /* Output each range of same values as a bedGraph item */
    int sameCount;
    for (i=0; i<chromSize; i += sameCount)
        {
	sameCount = doublesTheSame(mergeBuf+i, chromSize-i);
	double val = mergeBuf[i] + clAdjust;
	if (val > clThreshold)
	    fprintf(f, "%s\t%d\t%d\t%g\n", chrom->name, i, i + sameCount, val);
	}

    lmCleanup(&lm);
    }
verbose(1, "\n");

carefulClose(&f);
}