Пример #1
0
static void block_init(Edge *e, Block *b, 
                     struct dyStack *genes, struct dyStack *scores,
                     bool *candidates, const int cand_threshold,
                     int *components, struct dyStack *allincluster)
{
	int i,j=0,score,top;
	int cnt = 0, cnt_all=0, col_num=0;
	continuous cnt_ave=0, row_all = rows;
	double pvalue=0;
	int max_cnt, max_i;
	int *arr_rows, *arr_rows_b;
	AllocArray(arr_rows, rows);
	AllocArray(arr_rows_b, rows);	
	bool *colcand;
	AllocArray(colcand, cols);
	for (i=0; i< cols; i++) 
		colcand[i] = FALSE;
	discrete *g1, *g2;
	discrete *sub_array, col_array[2], col_all[rows];
	continuous KL_score[cols],KL_score_c=0, KL_score_r=0;
	g1 = arr_c[dsItem(genes,0)];
	g2 = arr_c[dsItem(genes,1)];

	/*update intial colcand*/
	for (i=0; i< cols; i++)
	{
		if ((g1[i] == g2[i])&&(symbols[g1[i]]!=0)) 
		{
			colcand[i] = TRUE;
			for (j=0;j<rows;j++)
				col_all[j]=arr_c[j][i];
			col_array[0]=col_array[1]=symbols[g1[i]];
	                KL_score[i] = get_KL (col_array, col_all, 2, rows);
			KL_score_c += KL_score[i];
			col_num++;
		}
	}
	KL_score_c = KL_score_c/col_num;
			

	for (i = 0; i < rows; i++)
	{
		arr_rows[i] = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[i]);
		arr_rows_b[i] = arr_rows[i];
	}
	/*we just get the largest 100 rows when we initial a bicluster because we believe that 
	 * the 100 rows can characterize the structure of the bicluster 
	 * btw, it can reduce the time complexity*/
	if (rows > 100)
	{		
		qsort(arr_rows_b, rows, sizeof *arr_rows, compare_int);
		top = arr_rows_b[rows -100];
		for (i = 0; i < rows; i++)
			if (arr_rows[i] < top) 
				candidates[i] = FALSE;
	}

	/*calculate the condition low bound for current seed*/
	int cutoff = floor (0.05*rows);
	b->cond_low_bound = arr_rows_b[rows-cutoff];

	while (*components < rows)
	{
		max_cnt = -1;
		max_i = -1;
		(*components)++;
		cnt_all =0;
		cnt_ave = 0;
		/******************************************************/
		/*add a function of controling the bicluster by pvalue*/
		/******************************************************/
		for (i=0; i< rows; i++)
		{
			if (!candidates[i]) continue;
			if (po->IS_list && !sublist[i]) continue;
			cnt = intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[i]);
			cnt_all += cnt;
			if (cnt < cand_threshold) 
				candidates[i] = FALSE;
			if (cnt > max_cnt)
			{
				max_cnt = cnt;
				max_i = i;
			}
		}
		cnt_ave = cnt_all/row_all;
		/*pvalue = get_pvalue (cnt_ave, max_cnt);*/

		/* reconsider the genes with cnt=max_cnt when expand current bicluster base on the cwm-like significant of each row */
		if (max_cnt>0)
		{
			KL_score_r = 0;
			/*KL_score_max = 0;
			for (i=0; i< rows; i++)
			{
				if (!candidates[i]) continue;
                	        if (po->IS_list && !sublist[i]) continue;
                        	cnt = intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[i]);
				if (cnt == max_cnt)
				{
					sub_array = get_intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[i],cnt);
					KL_score = get_KL (sub_array, arr_c[i], cnt, cols);
					if (KL_score > KL_score_max)
					{
						max_cnt = cnt;
						max_i = i;
					}
				}
			}*/
			sub_array = get_intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[max_i],max_cnt);
	                KL_score_r = get_KL (sub_array, arr_c[max_i], max_cnt, cols);
                	for (j=0;j<*components-1;j++)
        	                KL_score_r += get_KL (sub_array, arr_c[dsItem(genes,j)], max_cnt, cols);
        	        KL_score_r = KL_score_r/(*components);
			for (j=0;j<cols;j++)
			{
				if (colcand[j] && (arr_c[max_i][j] != arr_c[dsItem(genes,0)][j]))
				{
					KL_score_c += (KL_score_c-KL_score[j])/(col_num-1);
					col_num--;
				}
			}
			pvalue = max_cnt/cnt_ave;
		}
		/*printf ("%d\t%.2f\t%.2f\t%.2f\n",max_cnt, KL_score_r, KL_score_c, pvalue);*/
		if (po->IS_cond)
		{
			if (max_cnt < po->COL_WIDTH || max_i < 0 || max_cnt < b->cond_low_bound) break;
		}
		else
		{
			if (max_cnt < po->COL_WIDTH || max_i < 0) break;
		}

		/*printf ("%d\t%d\n",*components, max_cnt);*/
		if (po->IS_area)
			score = *components*max_cnt;
		else
			score = floor(100*(KL_score_r+KL_score_c));
	/*		score = floor(100*KL_score_r*pvalue);*/
		/*	score = MIN(*components, max_cnt);*/
		if (score > b->score) 
		{
			b->score = score;
			b->significance = KL_score_r;
		}
		dsPush(genes, max_i);
		dsPush(scores,score);
		update_colcand(colcand,arr_c[dsItem(genes,0)], arr_c[max_i]);
		candidates[max_i] = FALSE;
	}
	/*be sure to free a pointer when you finish using it*/
	free(colcand);
}
Пример #2
0
static void block_init(Edge *e, Block *b, 
                     struct dyStack *genes, struct dyStack *scores,
                     bool *candidates, const int cand_threshold,
                     int *components, struct dyStack *allincluster)
{
	int i,score,top;
	int cnt = 0;
	int max_cnt, max_i;
	int *arr_rows, *arr_rows_b;
	AllocArray(arr_rows, rows);
	AllocArray(arr_rows_b, rows);	
	bool *colcand;
	AllocArray(colcand, cols);
	for (i=0; i< cols; i++) colcand[i] = FALSE;
	discrete *g1, *g2;
	g1 = arr_c[dsItem(genes,0)];
	g2 = arr_c[dsItem(genes,1)];
	for (i=0; i< cols; i++)
		if ((g1[i] == g2[i])&&(g1[i]!=0)) colcand[i] = TRUE;

	for (i = 0; i < rows; i++)
	{
		arr_rows[i] = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[i]);
		arr_rows_b[i] = arr_rows[i];
	}
	if (rows > 100)
	{		
		qsort(arr_rows_b, rows, sizeof *arr_rows, compare_int);
		top = arr_rows_b[rows -100];
		for (i = 0; i < rows; i++)
		if (arr_rows[i] < top) candidates[i] = FALSE;
	}

	while (*components < rows)
	{
		max_cnt = -1;
		max_i = -1;
		(*components)++;
		for (i=0; i< rows; i++)
		{
			if (!candidates[i]) continue;

			cnt = intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[i]);
			if (cnt < cand_threshold) candidates[i] = FALSE;
			if (cnt > max_cnt)
			{
				max_cnt = cnt;
				max_i = i;
			}
		}
		if (max_cnt < po->COL_WIDTH || max_i < 0) break;
		else
		{	
            score = MIN(*components, max_cnt);
			if (score > b->score) b->score = score;
			dsPush(genes, max_i);
			dsPush(scores,score);
			update_colcand(colcand,arr_c[dsItem(genes,0)], arr_c[max_i]);
			candidates[max_i] = FALSE;
		}
	}
	free(colcand);
}