Esempio n. 1
0
/* scan through all columns and identify the set within threshold,
 * "fuzziness" of the block is controlled by TOLERANCE (-c)
 */
void scan_block (struct dyStack *gene_set, Block *b_ptr)
{
	int i, j;
	int block_rows, cur_rows;
	block_rows = cur_rows = dsSize(gene_set);
	
	int k;
	for (j = 0; j < cols; j++)
		for (k=0; k<sigma; k++) 
			profile[j][k] = 0;
	for (j = 0; j< cur_rows ; j++)
		seed_update(arr_c[dsItem(gene_set,j)]);

	int btolerance = ceil(po->TOLERANCE* block_rows);
	for (j = 0; j < cols; j++)
	{
		/* See if this column satisfies tolerance */
		/* here i start from 1 because symbols[0]=0 */
		for (i = 1; i < sigma; i++)
		{
			if ((profile[j][i] >= btolerance))
			{
				dsPush(b_ptr->conds, j); break;
			}
		}		
	}
	b_ptr->block_cols = dsSize(b_ptr->conds);
}
Esempio n. 2
0
void seed_intersect (const discrete *s1, const discrete *s2)
{
	seed_update(s1); seed_update(s2);
}
Esempio n. 3
0
/* Core algorithm */
int cluster (FILE *fw, Edge **el, int n)
{
	int block_id = 0;
	Block **bb;
	int allocated = po->SCH_BLOCK;
	AllocArray(bb, allocated);

	Edge *e;
	Block *b;
	struct dyStack *genes, *scores, *b_genes, *allincluster;
	
	int i, j, k, components;
	AllocArray(profile, cols);
	for (j = 0; j < cols; j++) 
		AllocArray(profile[j], sigma);

	genes = dsNew(rows);
	scores = dsNew(rows);
	allincluster = dsNew(rows);

    

	bool *candidates;
	AllocArray(candidates, rows);

	e = *el; 
	i = 0;
	while (i++ < n)
	{	
		/*printf ("%d\n",i);*/
		e = *el++;
		/* check if both genes already enumerated in previous blocks */
		bool flag = TRUE;
		/* speed up the program if the rows bigger than 200 */
	        if (rows > 250)
		{ 
			if ( isInStack(allincluster,e->gene_one) && isInStack(allincluster,e->gene_two) )
				flag = FALSE;
			else if ((po->IS_TFname)&&(e->gene_one!= TFindex)&&(e->gene_two!=TFindex))
				flag = FALSE;
			else if ((po->IS_list)&&(!sublist[e->gene_one] || !sublist[e->gene_two]))
				flag =FALSE;
		}
		else   
		{
			flag = check_seed(e, bb, block_id);
			if ((po->IS_TFname)&&(e->gene_one!= TFindex)&&(e->gene_two!=TFindex))
				flag = FALSE;
			if ((po->IS_list)&&(!sublist[e->gene_one] || !sublist[e->gene_two]))
				flag = FALSE;
		}
		if (!flag) continue;

		for (j = 0; j < cols; j++)
			for (k = 0; k < sigma; k++) 
				profile[j][k] = 0;

		/*you must allocate a struct if you want to use the pointers related to it*/
		AllocVar(b);
		/*initial the b->score*/
                b->score = MIN(2, e->score);
	
		/* initialize the stacks genes and scores */		
		int ii;		
		dsClear(genes);
		dsClear(scores);		
		for(ii = 0; ii < rows; ii ++)
		{
			dsPush(genes,-1);
			dsPush(scores,-1);
		}		
		dsClear(genes);
		dsClear(scores);
		
		/*printf ("%d\t%d\n",e->gene_one,e->gene_two);*/
		dsPush(genes, e->gene_one);
		dsPush(genes, e->gene_two);
		dsPush(scores, 1);
		dsPush(scores, b->score);

		/* branch-and-cut condition for seed expansion */
		int cand_threshold = floor(po->COL_WIDTH * po->TOLERANCE);
                if (cand_threshold < 2) 
			cand_threshold = 2;

		/* maintain a candidate list to avoid looping through all rows */		
		for (j = 0; j < rows; j++) 
			candidates[j] = TRUE;
		candidates[e->gene_one] = candidates[e->gene_two] = FALSE;
		components = 2;

		/* expansion step, generate a bicluster without noise */
		block_init(e, b, genes, scores, candidates, cand_threshold, &components, allincluster);

		/* track back to find the genes by which we get the best score*/
		for(k = 0; k < components; k++)
		{
/*			printf ("******%d\t%d\n",dsItem(scores,k),b->score);*/
			if ((dsItem(scores,k) == b->score)&&(dsItem(scores,k+1)!= b->score)) break;
		}
		components = k + 1;
		/*printf ("%d",components);*/
		int ki;
		for (ki=0; ki < rows; ki++)
			candidates[ki] = TRUE;

		for (ki=0; ki < components - 1 ; ki++)
		{
			seed_update(arr_c[dsItem(genes,ki)]);
			candidates[dsItem(genes,ki)] = FALSE;
		}
		candidates[dsItem(genes,k)] = FALSE;
		genes->top = k ;
		int cnt = 0;
		bool *colcand;
		AllocArray(colcand, cols);
		for(ki = 0; ki < cols; ki++) 
			colcand[ki] = FALSE;             
    
		/* add columns satisfy the conservative r */ 
		seed_current_modify(arr_c[dsItem(genes,k)], colcand, &cnt, components);
		
		/* add some new possible genes */
		int m_cnt=0;
		continuous KL_score=0;
		discrete *sub_array;
		for ( ki = 0; ki < rows; ki++)
		{
			if (po->IS_list && !sublist[ki]) continue;
			m_cnt = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]);
			if ( candidates[ki] && (m_cnt >= floor(cnt* po->TOLERANCE)) )
			{
				sub_array = get_intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[ki],m_cnt);
				KL_score = get_KL (sub_array, arr_c[ki], m_cnt, cols);
				/*printf ("%d\t%.2f\n",m_cnt,KL_score);*/
				if (KL_score>=b->significance * po->TOLERANCE)
				{
					dsPush(genes,ki);
					components++;
					candidates[ki] = FALSE;
				}
			}
		}

                b->block_rows_pre = components;
		
		/* add genes that negative regulated to the consensus */
		for ( ki = 0; ki < rows; ki++)
		{
			if (po->IS_list && !sublist[ki]) continue;
			m_cnt = reverse_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]);
			if ( candidates[ki] && (m_cnt >= floor(cnt * po->TOLERANCE)) )
			{
				sub_array = get_intersect_reverse_row(colcand,arr_c[dsItem(genes,0)],arr_c[ki],m_cnt);
				KL_score = get_KL (sub_array, arr_c[ki], m_cnt, cols);
				if (KL_score>=b->significance * po->TOLERANCE)
				{
					dsPush(genes,ki);
					components++;
					candidates[ki] = FALSE;
				}
			}
		}
		free(colcand);

		/* save the current cluster*/
		b_genes = dsNew(b->block_rows_pre);
		for (ki = 0; ki < b->block_rows_pre; ki++)
			dsPush(b_genes, dsItem(genes,ki));

		/* store gene arrays inside block */
		b->genes = dsNew(components);
		b->conds = dsNew(cols);
	
		scan_block(b_genes, b);
		if (b->block_cols == 0) continue;
		b->block_rows = components;

		b->score = b->score;
		/*	b->score = b->block_rows * b->block_cols;		*/

		dsClear(b->genes);
		for ( ki=0; ki < components; ki++)
			dsPush(b->genes,dsItem(genes,ki));
		for(ki = 0; ki < components; ki++)
			if(!isInStack(allincluster, dsItem(genes,ki))) 
				dsPush(allincluster,dsItem(genes,ki));	
		/*save the current block b to the block list bb so that we can sort the blocks by their score*/
		bb[block_id++] = b;

		/* reaching the results number limit */
		if (block_id == po->SCH_BLOCK) break;
		verboseDot();	
	}
	/* writes character to the current position in the standard output (stdout) and advances the internal file position indicator to the next position.
	 * It is equivalent to putc(character,stdout).*/
	putchar('\n');
	/* free-up the candidate list */
	free(candidates);
	free(allincluster);
	block_enrichment(fw, bb, block_id);
	return report_blocks(fw, bb, block_id);
}
Esempio n. 4
0
/* Core algorithm */
int cluster (FILE *fw, Edge **el, int n)
{
	int block_id = 0;
	Block **bb;
	int allocated = po->SCH_BLOCK;
	AllocArray(bb, allocated);

	Edge *e;
	Block *b;
	struct dyStack *genes, *scores, *b_genes, *allincluster;
	
	int i, j, k, components;

	AllocArray(profile, cols);
	for (j = 0; j < cols; j++) AllocArray(profile[j], sigma);

	genes = dsNew(rows);
	scores = dsNew(rows);
	allincluster = dsNew(rows);
    
	bool *candidates;
	AllocArray(candidates, rows);

	e = *el; i = 0;
	while (i++ < n)
	{	
		e = *el++;
        /*printf("a:%d b:%d score:%d\n",e->gene_one,e->gene_two,e->score);*/

		/* check if both genes already enumerated in previous blocks */
		bool flag = TRUE;
		/* speed up the program if the rows bigger than 200 */
	        if (rows > 200)
		{ 
			if ( isInStack(allincluster,e->gene_one) && isInStack(allincluster,e->gene_two) )
			flag = FALSE;
		}
		else   
                    {
		     flag = check_seed(e, bb, block_id);
		    }
		if (!flag) continue;

		for (j = 0; j < cols; j++)
			for (k = 0; k < sigma; k++) profile[j][k] = 0;

		AllocVar(b);
                b->score = MIN(2, e->score);
	
		/* initialize the stacks genes and scores */		
		int ii;		
		dsClear(genes);
		dsClear(scores);		
		for(ii = 0; ii < rows; ii ++)
		{
			dsPush(genes,-1);
			dsPush(scores,-1);
		}		
		dsClear(genes);
		dsClear(scores);
		
		dsPush(genes, e->gene_one);
		dsPush(genes, e->gene_two);
		dsPush(scores, 1);
		dsPush(scores, b->score);

		/* branch-and-cut condition for seed expansion */
		int cand_threshold = floor(po->COL_WIDTH * po->TOLERANCE);
                if (cand_threshold < 2) cand_threshold = 2;

		/* maintain a candidate list to avoid looping through all rows */		
		for (j = 0; j < rows; j++) candidates[j] = TRUE;
		candidates[e->gene_one] = candidates[e->gene_two] = FALSE;
		
		components = 2;

		/* expansion step, generate a bicluster without noise */
		block_init(e, b, genes, scores, candidates, cand_threshold, &components, allincluster);

		/* track back to find the best score that which genes makes it */
		for(k = 0; k < components; k++)
			if ((dsItem(scores,k) == b->score)&&(dsItem(scores,k+1)!= b->score)) break;
		components = k + 1;

		int ki;
		for (ki=0; ki < rows; ki++)
		candidates[ki] = TRUE;

		for (ki=0; ki < components - 1 ; ki++)
		{
			seed_update(arr_c[dsItem(genes,ki)]);
			candidates[dsItem(genes,ki)] = FALSE;
		}
		candidates[dsItem(genes,k)] = FALSE;
		genes->top = k ;
		int cnt = 0;
		bool *colcand;
		AllocArray(colcand, cols);
		for(ki = 0; ki < cols; ki++) colcand[ki] = FALSE;             
    
        /* add columns satisfy the conservative r */ 
		seed_current_modify(arr_c[dsItem(genes,k)], colcand, &cnt, components);
		
        /* add some new possible genes */
		int m_cnt;
		for ( ki = 0; ki < rows; ki++)
		{
			m_cnt = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]);
			if ( candidates[ki] && (m_cnt >= floor(cnt* po->TOLERANCE)) )
			{
				dsPush(genes,ki);
				components++;
				candidates[ki] = FALSE;
			}
		}
                b->block_rows_pre = components;
		
        /* add genes that negative regulated to the consensus */
		for ( ki = 0; ki < rows; ki++)
		{
			m_cnt = reverse_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]);
			if ( candidates[ki] && (m_cnt >= floor(cnt * po->TOLERANCE)) )
			{
				dsPush(genes,ki);
				components++;
				candidates[ki] = FALSE;
			}
		}
		free(colcand);

		/* save the current cluster*/
		b_genes = dsNew(b->block_rows_pre);
		for (ki = 0; ki < b->block_rows_pre; ki++)
			dsPush(b_genes, dsItem(genes,ki));

		/* store gene arrays inside block */
		b->genes = dsNew(components);
		b->conds = dsNew(cols);
	
		scan_block(b_genes, b);
		if (b->block_cols == 0) continue;
		b->block_rows = components;
                b->score = b->block_rows * b->block_cols;		
		dsClear(b->genes);
		for ( ki=0; ki < components; ki++)
			dsPush(b->genes,dsItem(genes,ki));
		for(ki = 0; ki < components; ki++)
			if(!isInStack(allincluster, dsItem(genes,ki))) dsPush(allincluster,dsItem(genes,ki));	

		bb[block_id++] = b;

        /* reaching the results number limit */
		if (block_id == po->SCH_BLOCK) break;
		verboseDot();	

	}

	putchar('\n');
    /* free-up the candidate list */
	free(candidates);
	free(allincluster);

	return report_blocks(fw, bb, block_id);
}