static bool check_seed(Edge *e, Block **bb, const int block_id) { int profiles[rows]; int i,b1,b2,b3; bool fg = FALSE; b1 = b2 = -1; for (i = 0; i < block_id; i++) if ( isInStack(bb[i]->genes,e->gene_one) && isInStack(bb[i]->genes, e->gene_two) ) return FALSE; for ( i = 0; i < rows; i++) profiles[i] = 0; fg = FALSE; for ( i = 0; i < block_id; i++) if ( isInStack(bb[i]->genes, e->gene_one) ) { fg = TRUE; break; } if (fg) b1 = i; fg = FALSE; for ( i = 0; i < block_id; i++) if ( isInStack(bb[i]->genes, e->gene_two) ) { fg = TRUE; break; } if (fg) b2 = i; if ( (b1 == -1)||(b2 == -1) ) return TRUE; else { for ( i = 0; i < bb[b1]->block_rows; i++) profiles[dsItem(bb[b1]->genes,i)]++; for ( i = 0; i < bb[b2]->block_rows; i++) profiles[dsItem(bb[b2]->genes,i)]++; for ( i = 0; i < rows; i++) if (profiles[i] > 1) return FALSE; b3 = MAX(bb[b1]->block_cols, bb[b2]->block_cols); if ( e->score <b3/* (bb[b1]->block_cols + bb[b2]->block_cols) / 2*/ ) return FALSE; else return TRUE; } err("never see this message\n"); return FALSE; }
// Node v is the i-the node // Graph is passed to use getOutgoingEdges void StronglyConnectedComponentsVisitor::strongconnect(Graph* g, Node *v, int i) { // Set the depth index for v to the smallest unused index _indexTable[i] = _index; _lowlinkTable[i] = _index; _index = _index +1; S.push_front(v); list<Edge*> outgoing = g->getOutgoingEdges(v); // Consider successors of v // For each (v, w) in E for (list<Edge*>::iterator it = outgoing.begin() ; it != outgoing.end(); ++it) { // current w Node* w = (*it)->getTarget(); // if w's index is undefined if ( _indexTable[g->nodeToIndex(w)] == -1) { // Successor w has not yet been visited; recurse on it strongconnect(g, w, g->nodeToIndex(w)); // v.lowlink := min(v.lowlink, w.lowlink) _lowlinkTable[g->nodeToIndex(v)] = min( _lowlinkTable[g->nodeToIndex(v)], _lowlinkTable[g->nodeToIndex(w)] ); } // else if ( w is in S ) else if ( isInStack(w) ) { // Successor w is in stack S and hence in the current SCC // v.lowlink := min(v.lowlink, w.index) _lowlinkTable[g->nodeToIndex(v)] = min(_lowlinkTable[g->nodeToIndex(v)], _indexTable[g->nodeToIndex(w)] ); } } // If v is a root node, pop the stack and generate a SCC if (_lowlinkTable[g->nodeToIndex(v)] == _indexTable[g->nodeToIndex(v)]) { Node * w; // start a new strongly connected component vector<Node*> scc; do { w = S.front(); S.pop_front(); // add w to current strongly connected component scc.push_back(w); } while (!(w==v)); // add the current strongly connected component to the vector of strongly connected component _SCC.push_back(scc); } }
/* Core algorithm */ int cluster (FILE *fw, Edge **el, int n) { int block_id = 0; Block **bb; int allocated = po->SCH_BLOCK; AllocArray(bb, allocated); Edge *e; Block *b; struct dyStack *genes, *scores, *b_genes, *allincluster; int i, j, k, components; AllocArray(profile, cols); for (j = 0; j < cols; j++) AllocArray(profile[j], sigma); genes = dsNew(rows); scores = dsNew(rows); allincluster = dsNew(rows); bool *candidates; AllocArray(candidates, rows); e = *el; i = 0; while (i++ < n) { /*printf ("%d\n",i);*/ e = *el++; /* check if both genes already enumerated in previous blocks */ bool flag = TRUE; /* speed up the program if the rows bigger than 200 */ if (rows > 250) { if ( isInStack(allincluster,e->gene_one) && isInStack(allincluster,e->gene_two) ) flag = FALSE; else if ((po->IS_TFname)&&(e->gene_one!= TFindex)&&(e->gene_two!=TFindex)) flag = FALSE; else if ((po->IS_list)&&(!sublist[e->gene_one] || !sublist[e->gene_two])) flag =FALSE; } else { flag = check_seed(e, bb, block_id); if ((po->IS_TFname)&&(e->gene_one!= TFindex)&&(e->gene_two!=TFindex)) flag = FALSE; if ((po->IS_list)&&(!sublist[e->gene_one] || !sublist[e->gene_two])) flag = FALSE; } if (!flag) continue; for (j = 0; j < cols; j++) for (k = 0; k < sigma; k++) profile[j][k] = 0; /*you must allocate a struct if you want to use the pointers related to it*/ AllocVar(b); /*initial the b->score*/ b->score = MIN(2, e->score); /* initialize the stacks genes and scores */ int ii; dsClear(genes); dsClear(scores); for(ii = 0; ii < rows; ii ++) { dsPush(genes,-1); dsPush(scores,-1); } dsClear(genes); dsClear(scores); /*printf ("%d\t%d\n",e->gene_one,e->gene_two);*/ dsPush(genes, e->gene_one); dsPush(genes, e->gene_two); dsPush(scores, 1); dsPush(scores, b->score); /* branch-and-cut condition for seed expansion */ int cand_threshold = floor(po->COL_WIDTH * po->TOLERANCE); if (cand_threshold < 2) cand_threshold = 2; /* maintain a candidate list to avoid looping through all rows */ for (j = 0; j < rows; j++) candidates[j] = TRUE; candidates[e->gene_one] = candidates[e->gene_two] = FALSE; components = 2; /* expansion step, generate a bicluster without noise */ block_init(e, b, genes, scores, candidates, cand_threshold, &components, allincluster); /* track back to find the genes by which we get the best score*/ for(k = 0; k < components; k++) { /* printf ("******%d\t%d\n",dsItem(scores,k),b->score);*/ if ((dsItem(scores,k) == b->score)&&(dsItem(scores,k+1)!= b->score)) break; } components = k + 1; /*printf ("%d",components);*/ int ki; for (ki=0; ki < rows; ki++) candidates[ki] = TRUE; for (ki=0; ki < components - 1 ; ki++) { seed_update(arr_c[dsItem(genes,ki)]); candidates[dsItem(genes,ki)] = FALSE; } candidates[dsItem(genes,k)] = FALSE; genes->top = k ; int cnt = 0; bool *colcand; AllocArray(colcand, cols); for(ki = 0; ki < cols; ki++) colcand[ki] = FALSE; /* add columns satisfy the conservative r */ seed_current_modify(arr_c[dsItem(genes,k)], colcand, &cnt, components); /* add some new possible genes */ int m_cnt=0; continuous KL_score=0; discrete *sub_array; for ( ki = 0; ki < rows; ki++) { if (po->IS_list && !sublist[ki]) continue; m_cnt = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]); if ( candidates[ki] && (m_cnt >= floor(cnt* po->TOLERANCE)) ) { sub_array = get_intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[ki],m_cnt); KL_score = get_KL (sub_array, arr_c[ki], m_cnt, cols); /*printf ("%d\t%.2f\n",m_cnt,KL_score);*/ if (KL_score>=b->significance * po->TOLERANCE) { dsPush(genes,ki); components++; candidates[ki] = FALSE; } } } b->block_rows_pre = components; /* add genes that negative regulated to the consensus */ for ( ki = 0; ki < rows; ki++) { if (po->IS_list && !sublist[ki]) continue; m_cnt = reverse_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]); if ( candidates[ki] && (m_cnt >= floor(cnt * po->TOLERANCE)) ) { sub_array = get_intersect_reverse_row(colcand,arr_c[dsItem(genes,0)],arr_c[ki],m_cnt); KL_score = get_KL (sub_array, arr_c[ki], m_cnt, cols); if (KL_score>=b->significance * po->TOLERANCE) { dsPush(genes,ki); components++; candidates[ki] = FALSE; } } } free(colcand); /* save the current cluster*/ b_genes = dsNew(b->block_rows_pre); for (ki = 0; ki < b->block_rows_pre; ki++) dsPush(b_genes, dsItem(genes,ki)); /* store gene arrays inside block */ b->genes = dsNew(components); b->conds = dsNew(cols); scan_block(b_genes, b); if (b->block_cols == 0) continue; b->block_rows = components; b->score = b->score; /* b->score = b->block_rows * b->block_cols; */ dsClear(b->genes); for ( ki=0; ki < components; ki++) dsPush(b->genes,dsItem(genes,ki)); for(ki = 0; ki < components; ki++) if(!isInStack(allincluster, dsItem(genes,ki))) dsPush(allincluster,dsItem(genes,ki)); /*save the current block b to the block list bb so that we can sort the blocks by their score*/ bb[block_id++] = b; /* reaching the results number limit */ if (block_id == po->SCH_BLOCK) break; verboseDot(); } /* writes character to the current position in the standard output (stdout) and advances the internal file position indicator to the next position. * It is equivalent to putc(character,stdout).*/ putchar('\n'); /* free-up the candidate list */ free(candidates); free(allincluster); block_enrichment(fw, bb, block_id); return report_blocks(fw, bb, block_id); }
/* Core algorithm */ int cluster (FILE *fw, Edge **el, int n) { int block_id = 0; Block **bb; int allocated = po->SCH_BLOCK; AllocArray(bb, allocated); Edge *e; Block *b; struct dyStack *genes, *scores, *b_genes, *allincluster; int i, j, k, components; AllocArray(profile, cols); for (j = 0; j < cols; j++) AllocArray(profile[j], sigma); genes = dsNew(rows); scores = dsNew(rows); allincluster = dsNew(rows); bool *candidates; AllocArray(candidates, rows); e = *el; i = 0; while (i++ < n) { e = *el++; /*printf("a:%d b:%d score:%d\n",e->gene_one,e->gene_two,e->score);*/ /* check if both genes already enumerated in previous blocks */ bool flag = TRUE; /* speed up the program if the rows bigger than 200 */ if (rows > 200) { if ( isInStack(allincluster,e->gene_one) && isInStack(allincluster,e->gene_two) ) flag = FALSE; } else { flag = check_seed(e, bb, block_id); } if (!flag) continue; for (j = 0; j < cols; j++) for (k = 0; k < sigma; k++) profile[j][k] = 0; AllocVar(b); b->score = MIN(2, e->score); /* initialize the stacks genes and scores */ int ii; dsClear(genes); dsClear(scores); for(ii = 0; ii < rows; ii ++) { dsPush(genes,-1); dsPush(scores,-1); } dsClear(genes); dsClear(scores); dsPush(genes, e->gene_one); dsPush(genes, e->gene_two); dsPush(scores, 1); dsPush(scores, b->score); /* branch-and-cut condition for seed expansion */ int cand_threshold = floor(po->COL_WIDTH * po->TOLERANCE); if (cand_threshold < 2) cand_threshold = 2; /* maintain a candidate list to avoid looping through all rows */ for (j = 0; j < rows; j++) candidates[j] = TRUE; candidates[e->gene_one] = candidates[e->gene_two] = FALSE; components = 2; /* expansion step, generate a bicluster without noise */ block_init(e, b, genes, scores, candidates, cand_threshold, &components, allincluster); /* track back to find the best score that which genes makes it */ for(k = 0; k < components; k++) if ((dsItem(scores,k) == b->score)&&(dsItem(scores,k+1)!= b->score)) break; components = k + 1; int ki; for (ki=0; ki < rows; ki++) candidates[ki] = TRUE; for (ki=0; ki < components - 1 ; ki++) { seed_update(arr_c[dsItem(genes,ki)]); candidates[dsItem(genes,ki)] = FALSE; } candidates[dsItem(genes,k)] = FALSE; genes->top = k ; int cnt = 0; bool *colcand; AllocArray(colcand, cols); for(ki = 0; ki < cols; ki++) colcand[ki] = FALSE; /* add columns satisfy the conservative r */ seed_current_modify(arr_c[dsItem(genes,k)], colcand, &cnt, components); /* add some new possible genes */ int m_cnt; for ( ki = 0; ki < rows; ki++) { m_cnt = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]); if ( candidates[ki] && (m_cnt >= floor(cnt* po->TOLERANCE)) ) { dsPush(genes,ki); components++; candidates[ki] = FALSE; } } b->block_rows_pre = components; /* add genes that negative regulated to the consensus */ for ( ki = 0; ki < rows; ki++) { m_cnt = reverse_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]); if ( candidates[ki] && (m_cnt >= floor(cnt * po->TOLERANCE)) ) { dsPush(genes,ki); components++; candidates[ki] = FALSE; } } free(colcand); /* save the current cluster*/ b_genes = dsNew(b->block_rows_pre); for (ki = 0; ki < b->block_rows_pre; ki++) dsPush(b_genes, dsItem(genes,ki)); /* store gene arrays inside block */ b->genes = dsNew(components); b->conds = dsNew(cols); scan_block(b_genes, b); if (b->block_cols == 0) continue; b->block_rows = components; b->score = b->block_rows * b->block_cols; dsClear(b->genes); for ( ki=0; ki < components; ki++) dsPush(b->genes,dsItem(genes,ki)); for(ki = 0; ki < components; ki++) if(!isInStack(allincluster, dsItem(genes,ki))) dsPush(allincluster,dsItem(genes,ki)); bb[block_id++] = b; /* reaching the results number limit */ if (block_id == po->SCH_BLOCK) break; verboseDot(); } putchar('\n'); /* free-up the candidate list */ free(candidates); free(allincluster); return report_blocks(fw, bb, block_id); }