static void prune_el_on_cl ( mclMatrix* el_to_cl /* must be conforming */ , mclMatrix* el_on_cl /* this one will be pruned */ , double pct , int max ) { dim i ; for (i=0;i<N_COLS(el_on_cl);i++) { mclv* elclvec = el_on_cl->cols+i ; long clid = el_to_cl->cols[i].ivps[0].idx ; double sum = 0.0 ; int n_others = 0 ; dim k = 0 ; mcxbool selfok = FALSE ; mclvSort(elclvec, mclpValRevCmp) ; while (k++ < elclvec->n_ivps && sum < pct && n_others < max) { long y = elclvec->ivps[k-1].idx ; if (y == clid) selfok = TRUE ; sum += elclvec->ivps[k-1].val ; n_others++ ; } mclvResize(elclvec, k-1) /* careful recentchange */ ; mclvSort(elclvec, mclpIdxCmp) ; if (!selfok) mclvInsertIdx(elclvec, clid, 0.01) ; } }
int mclDagTest ( const mclMatrix* dag ) { mclv* v_transient = mclvCopy(NULL, dag->dom_cols) ; mclx* m_transient = NULL ; int maxdepth = 0 ; dim d ; mclvMakeCharacteristic(v_transient) ; for (d=0;d<N_COLS(dag);d++) { mclv* col = dag->cols+d ; if (mclvGetIvp(col, col->vid, NULL)) /* deemed attractor */ mclvInsertIdx(v_transient, col->vid, 0.25) ; } mclvSelectGqBar(v_transient, 0.5) ; m_transient = mclxSub(dag, v_transient, v_transient) ;if(0)mclxDebug("-", m_transient, 3, "transient") ; maxdepth = calc_depth(m_transient) ; mclxFree(&m_transient) ; mclvFree(&v_transient) ; return maxdepth ; }
void dag_diff_select ( mclx* mx , mclTab* tab , mcxIO* xfdiff , double child_diff_lq , double parent_diff_gq ) { dim i ; mclx* dag = mclxAllocClone(mx) ; for (i=0;i<N_COLS(mx); i++) { mclv* v = mx->cols+i ; dim j ; for (j=0;j<v->n_ivps;j++) { dim idx = v->ivps[j].idx ; double valv = v->ivps[j].val ; mclv* t = mclxGetVector(mx, idx, EXIT_ON_FAIL, NULL) ; mclp* p = mclvGetIvp(t, v->vid, NULL) ; double valt = p ? p->val : 0.0 ; double delta = valv - valt ; double lg = valv, sm = valt ; double child_diff, parent_diff ; int v_is_child = 0 ; if (delta < 0) delta = -delta , lg=valt, sm=valv , v_is_child = 1 ; child_diff = sm ; parent_diff = lg ;if(0 && i==111) fprintf(stderr, "nb %d delta %g\n", (int) idx, delta) ; if (child_diff > child_diff_lq || parent_diff < parent_diff_gq) NOTHING ; else { if (v_is_child) mclvInsertIdx(dag->cols+i, idx, delta) ; else mclvInsertIdx(dag->cols+(t-mx->cols), v->vid, delta) ; } } } ; mclxWrite(dag, xfdiff, MCLXIO_VALUE_GETENV, EXIT_ON_FAIL) ; mclxFree(&dag) ; }
static void mx_readd_diagonal ( mclx* mx , mclv* diag ) { dim i ; for (i=0;i<diag->n_ivps;i++) { mclp* ivp = diag->ivps+i ; if (ivp->val) mclvInsertIdx(mx->cols+i, ivp->idx, ivp->val) ; } }
mclMatrix* mclInterpret ( mclMatrix* dag ) { mclv* v_attr = mclvCopy(NULL, dag->dom_cols) ; mclx* m_attr = NULL, *m_cls = NULL, *m_clst = NULL ; dim d ; mclvMakeCharacteristic(v_attr) ; for (d=0;d<N_COLS(dag);d++) { mclv* col = dag->cols+d ; if (mclvGetIvp(col, col->vid, NULL)) /* deemed attractor */ mclvInsertIdx(v_attr, col->vid, 2.0) ; } mclvSelectGqBar(v_attr, 1.5) ; m_attr = mclxSub(dag, v_attr, v_attr) ; mclxAddTranspose(m_attr, 1.0) ; m_cls = clmUGraphComponents(m_attr, NULL) /* attractor systems as clusters */ ; mclvCopy(m_cls->dom_rows, dag->dom_cols) /* add all nodes to this cluster matrix */ ; m_clst = mclxTranspose(m_cls) /* nodes(columns) with zero neighbours need to be classified */ ; mclgUnionvReset(dag) /* make mx->dom-rows characteristic */ ; mclxFree(&m_cls) ; for (d=0;d<N_COLS(dag);d++) { mclv* closure, *clsids ; if (mclvGetIvp(v_attr, dag->cols[d].vid, NULL)) continue /* attractor already classified */ ; closure = get_closure(dag, dag->cols+d) /* take all [neighbours of [neighbours of [..]]] */ ; clsids = mclgUnionv(m_clst, closure, NULL, SCRATCH_READY, NULL) ; mclvAdd(m_clst->cols+d, clsids, m_clst->cols+d) ; mclvFree(&clsids) ; mclvFree(&closure) ; } m_cls = mclxTranspose(m_clst) ; mclxFree(&m_attr) ; mclxFree(&m_clst) ; mclvFree(&v_attr) ; return m_cls ; }
ofs fire_node ( const mclx* mx , dim i , mclv** seenpp ) { mclv* v = mx->cols+i ; mclv* seen = mclvInsertIdx(NULL, v->vid, 1.0) ; mclv* todo = mclvClone(v) ; mcxstatus s = STATUS_OK ; dim level = 0 ;if(0)fprintf(stderr, "node %d\n", (int) i) ; while(todo->n_ivps && !s) s = fire_node_next(mx, seen, todo, i) , level++ ; mclvFree(&todo) ; if (seenpp) seenpp[0] = seen ; else mclvFree(&seen) ; return s ? -1 : level ; }
static dim do_add ( mclx* mx , dim N_add , dim N_edge , double *l_mean , double l_radius , double l_sdev , double l_min , double l_max , double skew , double e_min , double e_max ) { dim n_add = 0 ; while (n_add < N_add) { unsigned long r = (unsigned long) random() ; unsigned long s = (unsigned long) random() ; long x, y ; double val ; mclp* ivp ; dim xo = r % N_COLS(mx) /* fixme, modulo is commonly recommended against */ ; dim yo = s % N_COLS(mx) ; if (xo > yo) { long zo = xo ; xo = yo ; yo = zo ; } else if (xo == yo) /* never add loops */ continue ; x = mx->dom_cols->ivps[xo].idx ; y = mx->dom_cols->ivps[yo].idx ; if (N_edge >= N_COLS(mx) * (N_COLS(mx)-1) / 2) break ; ivp = mclvGetIvp(mx->cols+xo, y, NULL) ; if (ivp && ivp->val) continue ; if (l_mean) { do { val = mcxNormalCut(l_radius, l_sdev) ; if (skew) { val = (l_radius + val) / (2 * l_radius) /* ^ map (l_radius + val) to lie within [0,1] */ ; val = pow(val, skew) /* skew it */ ; val = (val * 2 * l_radius) - l_radius /* map it back */ ; } val += l_mean[0] ; } while (l_min < l_max && (val < l_min || val > l_max)) ; } /* docme: uniform */ else { val = (((unsigned long) random()) * 1.0) / RAND_MAX ; if (skew) val = pow(val, skew) ; val = e_min + val * (e_max - e_min) ; } if (!val) continue ;if(DEBUG)fprintf(stderr, "add [%d] %ld %ld value %f\n", (int) n_add, (long) x, (long) y, val) ; mclvInsertIdx(mx->cols+xo, y, val) ; N_edge++ ; n_add++ ; } return n_add ; }
static dim clm_clm_prune ( mclx* mx , mclx* cl , dim prune_sz , mclx** cl_adjustedpp , dim* n_sink , dim* n_source ) { dim d, n_adjusted = 0 ; mclx* cl_adj = mclxCopy(cl) ; mclv* cid_affected = mclvClone(cl->dom_cols) ; const char* me = "clmAssimilate" ; double bar_affected = 1.5 ; mclx *el_to_cl = NULL ; mclx *el_on_cl = NULL ; mclx *cl_on_cl = NULL ; mclx *cl_on_el = NULL ; *n_sink = 0 ; *n_source = 0 ; mclvMakeConstant(cid_affected, 1.0) ; mclxColumnsRealign(cl_adj, mclvSizeCmp) ; *cl_adjustedpp = NULL ; clmCastActors (&mx, &cl_adj, &el_to_cl, &el_on_cl, &cl_on_cl, &cl_on_el, 0.95) ; mclxFree(&cl_on_el) ; for (d=0;d<N_COLS(cl_on_cl);d++) { mclv* clthis = cl_adj->cols+d ; mclv* cllist = cl_on_cl->cols+d ; mclp* pself = mclvGetIvp(cllist, clthis->vid, NULL) ; double self_val = -1.0 ; if (pself) self_val = pself->val , pself->val *= 1.001 /* to push it up in case of equal weights */ ;if(0)fprintf(stderr, "test size %d\n", (int) clthis->n_ivps) ; if (prune_sz && clthis->n_ivps > prune_sz) continue ; while (1) { mclv* clthat ; dim e ; if (cllist->n_ivps < 2) break ; mclvSort(cllist, mclpValRevCmp) /* now get biggest mass provided that cluster * ranks higher (has at least as many entries) * * fixme/todo: we probably have a slight order * dependency for some fringe cases. If provable * then either solve or document it. */ ; for (e=0;e<cllist->n_ivps;e++) if (cllist->ivps[e].idx >= clthis->vid) break /* found none or itself */ ; if (e == cllist->n_ivps || cllist->ivps[e].idx == clthis->vid) break ; if /* Should Not Happen */ (!(clthat = mclxGetVector(cl_adj, cllist->ivps[e].idx, RETURN_ON_FAIL, NULL) ) ) break /* works for special case prune_sz == 0 */ /* if (clthat->n_ivps + clthis->n_ivps > prune_sz) */ /* ^iced. inconsistent behaviour as k grows. */ ; { mcxLog ( MCX_LOG_LIST , me , "source %ld|%lu|%.3f absorbed by %ld|%lu|%.3f" , clthis->vid, (ulong) clthis->n_ivps, self_val , clthat->vid, (ulong) clthat->n_ivps, cllist->ivps[0].val ) ; n_adjusted += clthis->n_ivps ; (*n_sink)++ /* note: we could from our precomputed cl_on_cl * obtain that A is absorbed in B, B is absorbed in C. * below we see that A will be merged with B, * and the result will then be merged with C. * This depends on the fact that cl_adj is ordered * on increasing cluster size. */ ; mcldMerge(cl_adj->cols+d, clthat, clthat) ; mclvResize(cl_adj->cols+d, 0) ; mclvInsertIdx(cid_affected, clthat->vid, 2.0) ; } break ; } mclvSort(cllist, mclpIdxCmp) ; } mclxFree(&cl_on_cl) ; mclxFree(&el_on_cl) ; mclxFree(&el_to_cl) ; mclxMakeCharacteristic(cl) ; mclvUnary(cid_affected, fltxGT, &bar_affected) ; *n_source = cid_affected->n_ivps ; mclvFree(&cid_affected) ; mclxColumnsRealign(cl_adj, mclvSizeRevCmp) ; if (!n_adjusted) { mclxFree(&cl_adj) ; return 0 ; } mclxUnary(cl_adj, fltxCopy, NULL) ; mclxMakeCharacteristic(cl_adj) ; *cl_adjustedpp = cl_adj ; return n_adjusted ; }
static dim clm_clm_adjust ( mclx* mx , mclx* cl , dim cls_size_max , mclx** cl_adjustedpp , mclv** cid_affectedpp , mclv** nid_affectedpp ) { dim i, j, n_adjusted = 0 ; mclx* cl_adj = mclxCopy(cl) ; mclv* cid_affected = mclvClone(cl->dom_cols) ; mclv* nid_affected = mclvClone(mx->dom_cols) ; double bar_affected = 1.5 ; const char* e1 = getenv("MCL_ADJ_FMAX") ; const char* e2 = getenv("MCL_ADJ_EMASS") ; double f1 = e1 ? atof(e1) : 2 ; double f2 = e2 ? atof(e2) : 3 ; mcxbool loggit = mcxLogGet( MCX_LOG_CELL | MCX_LOG_INFO ) ; clmVScore sc ; mclx *el_to_cl = NULL ; mclx *el_on_cl = NULL ; mclx *cl_on_cl = NULL ; mclx *cl_on_el = NULL ; *cl_adjustedpp = NULL ; *cid_affectedpp = NULL ; *nid_affectedpp = NULL ; clmCastActors (&mx, &cl, &el_to_cl, &el_on_cl, &cl_on_cl, &cl_on_el, 0.95) ; mclxFree(&cl_on_cl) ; mclxFree(&cl_on_el) ; mclvMakeConstant(cid_affected, 1.0) ; mclvMakeConstant(nid_affected, 1.0) ; for (i=0;i<N_COLS(cl_adj);i++) cl_adj->cols[i].val = 0.5 /* Proceed with smallest clusters first. * Caller has to take care of mclxColumnsRealign */ ; for (i=0;i<N_COLS(cl);i++) { mclv* clself = cl->cols+i /* Only consider nodes in clusters of * size <= cls_size_max */ ; if (cls_size_max && clself->n_ivps > cls_size_max) break /* Clusters that have been marked for inclusion * cannot play. */ ; if (cl_adj->cols[i].val > 1) continue ; for (j=0;j<clself->n_ivps;j++) { long nid = clself->ivps[j].idx ; long nos = mclvGetIvpOffset(mx->dom_cols, nid, -1) ; mclv* clidvec = mclxGetVector(el_on_cl, nid, RETURN_ON_FAIL, NULL) ; double eff_alien_bsf = 0.0, eff_alien_max_bsf = 0.0 /* best so far*/ ; double eff_self = 0.0, eff_self_max = 0.0 ; long cid_alien = -1, cid_self = -1 ; clmVScore sc_self = { 0 }, sc_alien = { 0 } ; dim f ; if (nos < 0 || !clidvec) { mcxErr ("clmDumpNodeScores panic", "node <%ld> does not belong", nid) ; continue ; } clmVScanDomain(mx->cols+nos, clself, &sc) ; clmVScoreCoverage(&sc, &eff_self, &eff_self_max) ; cid_self = clself->vid ; sc_self = sc ; if (loggit) mcxLog2 ( us , "node %ld in cluster %ld eff %.3f,%.3f sum %.3f" , nid , cid_self , eff_self , eff_self_max , sc.sum_i ) ; for (f=0;f<clidvec->n_ivps;f++) { long cid = clidvec->ivps[f].idx ; mclv* clvec = mclxGetVector(cl, cid, RETURN_ON_FAIL, NULL) /* ^ overdoing: cid == clvec->vid */ ; double eff, eff_max ; if (!clvec) { mcxErr ( "clmAdjust panic" , "cluster <%ld> node <%ld> mishap" , cid , nid ) ; continue ; } /* fixme: document or remove first condition * */ if ((0 && clvec->n_ivps <= clself->n_ivps) || clvec->vid == cid_self) continue ; clmVScanDomain(mx->cols+nos, clvec, &sc) ; clmVScoreCoverage(&sc, &eff, &eff_max) #if 0 # define PIVOT eff > eff_alien_bsf #else # define PIVOT eff_max > eff_alien_max_bsf #endif ; if ( PIVOT || sc.sum_i >= 0.5 ) eff_alien_bsf = eff , eff_alien_max_bsf = eff_max , cid_alien = clvec->vid , sc_alien = sc ; if (sc.sum_i >= 0.5) break ; } if (loggit) mcxLog2 ( us , " -> best alien %ld eff %.3f,%.3f sum %.3f" , cid_alien , eff_alien_bsf , eff_alien_max_bsf , sc_alien.sum_i ) /* below: use sum_i as mass fraction * (clmAdjust framework uses stochastic * matrix) */ ; if ( cid_alien >= 0 && cid_self >= 0 && f1 * sc_alien.max_i >= sc_self.max_i && ( ( eff_alien_bsf > eff_self && sc_alien.sum_i > sc_self.sum_i ) || ( pow(sc_alien.sum_i, f2) >= sc_self.sum_i && pow(eff_self, f2) <= eff_alien_bsf ) ) /* So, if max is reasonable * and efficiency is better and mass is better * or if mass is ridiculously better -> move * Somewhat intricate and contrived, yes. */ ) { mclv* acceptor = mclxGetVector(cl_adj, cid_alien, RETURN_ON_FAIL, NULL) ; mclv* donor = mclxGetVector(cl_adj, cid_self, RETURN_ON_FAIL, NULL) ; if (!donor || !acceptor || acceptor == donor) continue ; mclvInsertIdx(donor, nid, 0.0) ; mclvInsertIdx(acceptor, nid, 1.0) ; acceptor->val = 1.5 ; if (mcxLogGet(MCX_LOG_LIST)) { mclv* nb = mx->cols+nos ; double mxv = mclvMaxValue(nb) ; double avg = nb->n_ivps ? mclvSum(nb) / nb->n_ivps : -1.0 ; mcxLog ( MCX_LOG_LIST , us , "mov %ld (%ld %.2f %.2f)" " %ld (cv=%.2f cm=%.2f s=%.2f m=%.2f #=%lu)" " to %ld (cv=%.2f cm=%.2f s=%.2f m=%.2f #=%lu)" , nid , (long) nb->n_ivps, mxv, avg , cid_self , eff_self, eff_self_max, sc_self.sum_i, sc_self.max_i , (ulong) (sc_self.n_meet + sc_self.n_ddif) , cid_alien , eff_alien_bsf, eff_alien_max_bsf, sc_alien.sum_i, sc_alien.max_i , (ulong) (sc_alien.n_meet + sc_alien.n_ddif) ) ; } n_adjusted++ ; mclvInsertIdx(cid_affected, cid_alien, 2.0) ; mclvInsertIdx(cid_affected, cid_self, 2.0) ; mclvInsertIdx(nid_affected, nid, 2.0) ; } } } mclxFree(&el_on_cl) ; mclxFree(&el_to_cl) ; for (i=0;i<N_COLS(cl_adj);i++) cl_adj->cols[i].val = 0.0 ; mclxMakeCharacteristic(cl) ; if (!n_adjusted) { mclxFree(&cl_adj) ; mclvFree(&cid_affected) ; mclvFree(&nid_affected) ; return 0 ; } mclxUnary(cl_adj, fltxCopy, NULL) ; mclxMakeCharacteristic(cl_adj) /* FIRST REMOVE ENTRIES set to zero (sssst now .. */ /* ...) and THEN make it characteristic again */ ; mclvUnary(cid_affected, fltxGT, &bar_affected) ; mclvUnary(nid_affected, fltxGT, &bar_affected) ; *cl_adjustedpp = cl_adj ; *cid_affectedpp = cid_affected ; *nid_affectedpp = nid_affected ; return n_adjusted ; }
/* this aids in finding heuristically likely starting points * for long shortest paths, by looking at dead ends * in the lattice. * experimental, oefully underdocumented. */ static dim diameter_rough ( mclv* vec , mclx* mx , u8* rough_scratch , long* rough_priority ) { mclv* curr = mclvInsertIdx(NULL, vec->vid, 1.0) ; mclpAR* par = mclpARensure(NULL, 1024) ; dim d = 0, n_dead_ends = 0, n_dead_ends_res = 0 ; memset(rough_scratch, 0, N_COLS(mx)) ; rough_scratch[vec->vid] = 1 /* seen */ ; rough_priority[vec->vid] = -1 /* remove from priority list */ ; while (1) { mclp* currivp = curr->ivps ; dim t ; mclpARreset(par) ; while (currivp < curr->ivps + curr->n_ivps) { mclv* ls = mx->cols+currivp->idx ; mclp* newivp = ls->ivps ; int hit = 0 ; while (newivp < ls->ivps + ls->n_ivps) { u8* tst = rough_scratch+newivp->idx ; if (!*tst || *tst & 2) { if (!*tst) mclpARextend(par, newivp->idx, 1.0) ; *tst = 2 ; hit = 1 ; } newivp++ ; } if (!hit && rough_priority[currivp->idx] >= 0) rough_priority[currivp->idx] += d+1 , n_dead_ends_res++ ; else if (!hit) n_dead_ends++ /* ,fprintf(stderr, "[%ld->%ld]", (long) currivp->idx, (long) rough_priority[currivp->idx]) */ ; #if 0 if (currivp->idx == 115 || currivp->idx == 128) fprintf(stdout, "pivot %d node %d d %d dead %d pri %d\n", (int) vec->vid, (int) currivp->idx, d, (int) (1-hit), (int) rough_priority[currivp->idx]) #endif ; currivp++ ; } if (!par->n_ivps) break ; d++ ; mclvFromIvps(curr, par->ivps, par->n_ivps) ; for (t=0;t<curr->n_ivps;t++) rough_scratch[curr->ivps[t].idx] = 1 ; } mclvFree(&curr) ; mclpARfree(&par) ;if(0)fprintf(stdout, "deadends %d / %d\n", (int) n_dead_ends, (int) n_dead_ends_res) ; return d ; }