static void block_init(Edge *e, Block *b, struct dyStack *genes, struct dyStack *scores, bool *candidates, const int cand_threshold, int *components, struct dyStack *allincluster) { int i,j=0,score,top; int cnt = 0, cnt_all=0, col_num=0; continuous cnt_ave=0, row_all = rows; double pvalue=0; int max_cnt, max_i; int *arr_rows, *arr_rows_b; AllocArray(arr_rows, rows); AllocArray(arr_rows_b, rows); bool *colcand; AllocArray(colcand, cols); for (i=0; i< cols; i++) colcand[i] = FALSE; discrete *g1, *g2; discrete *sub_array, col_array[2], col_all[rows]; continuous KL_score[cols],KL_score_c=0, KL_score_r=0; g1 = arr_c[dsItem(genes,0)]; g2 = arr_c[dsItem(genes,1)]; /*update intial colcand*/ for (i=0; i< cols; i++) { if ((g1[i] == g2[i])&&(symbols[g1[i]]!=0)) { colcand[i] = TRUE; for (j=0;j<rows;j++) col_all[j]=arr_c[j][i]; col_array[0]=col_array[1]=symbols[g1[i]]; KL_score[i] = get_KL (col_array, col_all, 2, rows); KL_score_c += KL_score[i]; col_num++; } } KL_score_c = KL_score_c/col_num; for (i = 0; i < rows; i++) { arr_rows[i] = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[i]); arr_rows_b[i] = arr_rows[i]; } /*we just get the largest 100 rows when we initial a bicluster because we believe that * the 100 rows can characterize the structure of the bicluster * btw, it can reduce the time complexity*/ if (rows > 100) { qsort(arr_rows_b, rows, sizeof *arr_rows, compare_int); top = arr_rows_b[rows -100]; for (i = 0; i < rows; i++) if (arr_rows[i] < top) candidates[i] = FALSE; } /*calculate the condition low bound for current seed*/ int cutoff = floor (0.05*rows); b->cond_low_bound = arr_rows_b[rows-cutoff]; while (*components < rows) { max_cnt = -1; max_i = -1; (*components)++; cnt_all =0; cnt_ave = 0; /******************************************************/ /*add a function of controling the bicluster by pvalue*/ /******************************************************/ for (i=0; i< rows; i++) { if (!candidates[i]) continue; if (po->IS_list && !sublist[i]) continue; cnt = intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[i]); cnt_all += cnt; if (cnt < cand_threshold) candidates[i] = FALSE; if (cnt > max_cnt) { max_cnt = cnt; max_i = i; } } cnt_ave = cnt_all/row_all; /*pvalue = get_pvalue (cnt_ave, max_cnt);*/ /* reconsider the genes with cnt=max_cnt when expand current bicluster base on the cwm-like significant of each row */ if (max_cnt>0) { KL_score_r = 0; /*KL_score_max = 0; for (i=0; i< rows; i++) { if (!candidates[i]) continue; if (po->IS_list && !sublist[i]) continue; cnt = intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[i]); if (cnt == max_cnt) { sub_array = get_intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[i],cnt); KL_score = get_KL (sub_array, arr_c[i], cnt, cols); if (KL_score > KL_score_max) { max_cnt = cnt; max_i = i; } } }*/ sub_array = get_intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[max_i],max_cnt); KL_score_r = get_KL (sub_array, arr_c[max_i], max_cnt, cols); for (j=0;j<*components-1;j++) KL_score_r += get_KL (sub_array, arr_c[dsItem(genes,j)], max_cnt, cols); KL_score_r = KL_score_r/(*components); for (j=0;j<cols;j++) { if (colcand[j] && (arr_c[max_i][j] != arr_c[dsItem(genes,0)][j])) { KL_score_c += (KL_score_c-KL_score[j])/(col_num-1); col_num--; } } pvalue = max_cnt/cnt_ave; } /*printf ("%d\t%.2f\t%.2f\t%.2f\n",max_cnt, KL_score_r, KL_score_c, pvalue);*/ if (po->IS_cond) { if (max_cnt < po->COL_WIDTH || max_i < 0 || max_cnt < b->cond_low_bound) break; } else { if (max_cnt < po->COL_WIDTH || max_i < 0) break; } /*printf ("%d\t%d\n",*components, max_cnt);*/ if (po->IS_area) score = *components*max_cnt; else score = floor(100*(KL_score_r+KL_score_c)); /* score = floor(100*KL_score_r*pvalue);*/ /* score = MIN(*components, max_cnt);*/ if (score > b->score) { b->score = score; b->significance = KL_score_r; } dsPush(genes, max_i); dsPush(scores,score); update_colcand(colcand,arr_c[dsItem(genes,0)], arr_c[max_i]); candidates[max_i] = FALSE; } /*be sure to free a pointer when you finish using it*/ free(colcand); }
static void block_init(Edge *e, Block *b, struct dyStack *genes, struct dyStack *scores, bool *candidates, const int cand_threshold, int *components, struct dyStack *allincluster) { int i,score,top; int cnt = 0; int max_cnt, max_i; int *arr_rows, *arr_rows_b; AllocArray(arr_rows, rows); AllocArray(arr_rows_b, rows); bool *colcand; AllocArray(colcand, cols); for (i=0; i< cols; i++) colcand[i] = FALSE; discrete *g1, *g2; g1 = arr_c[dsItem(genes,0)]; g2 = arr_c[dsItem(genes,1)]; for (i=0; i< cols; i++) if ((g1[i] == g2[i])&&(g1[i]!=0)) colcand[i] = TRUE; for (i = 0; i < rows; i++) { arr_rows[i] = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[i]); arr_rows_b[i] = arr_rows[i]; } if (rows > 100) { qsort(arr_rows_b, rows, sizeof *arr_rows, compare_int); top = arr_rows_b[rows -100]; for (i = 0; i < rows; i++) if (arr_rows[i] < top) candidates[i] = FALSE; } while (*components < rows) { max_cnt = -1; max_i = -1; (*components)++; for (i=0; i< rows; i++) { if (!candidates[i]) continue; cnt = intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[i]); if (cnt < cand_threshold) candidates[i] = FALSE; if (cnt > max_cnt) { max_cnt = cnt; max_i = i; } } if (max_cnt < po->COL_WIDTH || max_i < 0) break; else { score = MIN(*components, max_cnt); if (score > b->score) b->score = score; dsPush(genes, max_i); dsPush(scores,score); update_colcand(colcand,arr_c[dsItem(genes,0)], arr_c[max_i]); candidates[max_i] = FALSE; } } free(colcand); }