static mclv* matrix_vector ( const mclx* mx , const mclv* vec ) { mclv* res = mclvClone(mx->dom_rows) ; dim i, j ; mclvMakeConstant(res, 0.0) ; for (i=0;i<vec->n_ivps;i++) { mclv* c = mx->cols + vec->ivps[i].idx ; for (j=0;j<c->n_ivps;j++) res->ivps[c->ivps[j].idx].val += 1.0 ; } mclvUnary(res, fltxCopy, NULL) ; return res ; }
static dim clm_clm_prune ( mclx* mx , mclx* cl , dim prune_sz , mclx** cl_adjustedpp , dim* n_sink , dim* n_source ) { dim d, n_adjusted = 0 ; mclx* cl_adj = mclxCopy(cl) ; mclv* cid_affected = mclvClone(cl->dom_cols) ; const char* me = "clmAssimilate" ; double bar_affected = 1.5 ; mclx *el_to_cl = NULL ; mclx *el_on_cl = NULL ; mclx *cl_on_cl = NULL ; mclx *cl_on_el = NULL ; *n_sink = 0 ; *n_source = 0 ; mclvMakeConstant(cid_affected, 1.0) ; mclxColumnsRealign(cl_adj, mclvSizeCmp) ; *cl_adjustedpp = NULL ; clmCastActors (&mx, &cl_adj, &el_to_cl, &el_on_cl, &cl_on_cl, &cl_on_el, 0.95) ; mclxFree(&cl_on_el) ; for (d=0;d<N_COLS(cl_on_cl);d++) { mclv* clthis = cl_adj->cols+d ; mclv* cllist = cl_on_cl->cols+d ; mclp* pself = mclvGetIvp(cllist, clthis->vid, NULL) ; double self_val = -1.0 ; if (pself) self_val = pself->val , pself->val *= 1.001 /* to push it up in case of equal weights */ ;if(0)fprintf(stderr, "test size %d\n", (int) clthis->n_ivps) ; if (prune_sz && clthis->n_ivps > prune_sz) continue ; while (1) { mclv* clthat ; dim e ; if (cllist->n_ivps < 2) break ; mclvSort(cllist, mclpValRevCmp) /* now get biggest mass provided that cluster * ranks higher (has at least as many entries) * * fixme/todo: we probably have a slight order * dependency for some fringe cases. If provable * then either solve or document it. */ ; for (e=0;e<cllist->n_ivps;e++) if (cllist->ivps[e].idx >= clthis->vid) break /* found none or itself */ ; if (e == cllist->n_ivps || cllist->ivps[e].idx == clthis->vid) break ; if /* Should Not Happen */ (!(clthat = mclxGetVector(cl_adj, cllist->ivps[e].idx, RETURN_ON_FAIL, NULL) ) ) break /* works for special case prune_sz == 0 */ /* if (clthat->n_ivps + clthis->n_ivps > prune_sz) */ /* ^iced. inconsistent behaviour as k grows. */ ; { mcxLog ( MCX_LOG_LIST , me , "source %ld|%lu|%.3f absorbed by %ld|%lu|%.3f" , clthis->vid, (ulong) clthis->n_ivps, self_val , clthat->vid, (ulong) clthat->n_ivps, cllist->ivps[0].val ) ; n_adjusted += clthis->n_ivps ; (*n_sink)++ /* note: we could from our precomputed cl_on_cl * obtain that A is absorbed in B, B is absorbed in C. * below we see that A will be merged with B, * and the result will then be merged with C. * This depends on the fact that cl_adj is ordered * on increasing cluster size. */ ; mcldMerge(cl_adj->cols+d, clthat, clthat) ; mclvResize(cl_adj->cols+d, 0) ; mclvInsertIdx(cid_affected, clthat->vid, 2.0) ; } break ; } mclvSort(cllist, mclpIdxCmp) ; } mclxFree(&cl_on_cl) ; mclxFree(&el_on_cl) ; mclxFree(&el_to_cl) ; mclxMakeCharacteristic(cl) ; mclvUnary(cid_affected, fltxGT, &bar_affected) ; *n_source = cid_affected->n_ivps ; mclvFree(&cid_affected) ; mclxColumnsRealign(cl_adj, mclvSizeRevCmp) ; if (!n_adjusted) { mclxFree(&cl_adj) ; return 0 ; } mclxUnary(cl_adj, fltxCopy, NULL) ; mclxMakeCharacteristic(cl_adj) ; *cl_adjustedpp = cl_adj ; return n_adjusted ; }
static dim clm_clm_adjust ( mclx* mx , mclx* cl , dim cls_size_max , mclx** cl_adjustedpp , mclv** cid_affectedpp , mclv** nid_affectedpp ) { dim i, j, n_adjusted = 0 ; mclx* cl_adj = mclxCopy(cl) ; mclv* cid_affected = mclvClone(cl->dom_cols) ; mclv* nid_affected = mclvClone(mx->dom_cols) ; double bar_affected = 1.5 ; const char* e1 = getenv("MCL_ADJ_FMAX") ; const char* e2 = getenv("MCL_ADJ_EMASS") ; double f1 = e1 ? atof(e1) : 2 ; double f2 = e2 ? atof(e2) : 3 ; mcxbool loggit = mcxLogGet( MCX_LOG_CELL | MCX_LOG_INFO ) ; clmVScore sc ; mclx *el_to_cl = NULL ; mclx *el_on_cl = NULL ; mclx *cl_on_cl = NULL ; mclx *cl_on_el = NULL ; *cl_adjustedpp = NULL ; *cid_affectedpp = NULL ; *nid_affectedpp = NULL ; clmCastActors (&mx, &cl, &el_to_cl, &el_on_cl, &cl_on_cl, &cl_on_el, 0.95) ; mclxFree(&cl_on_cl) ; mclxFree(&cl_on_el) ; mclvMakeConstant(cid_affected, 1.0) ; mclvMakeConstant(nid_affected, 1.0) ; for (i=0;i<N_COLS(cl_adj);i++) cl_adj->cols[i].val = 0.5 /* Proceed with smallest clusters first. * Caller has to take care of mclxColumnsRealign */ ; for (i=0;i<N_COLS(cl);i++) { mclv* clself = cl->cols+i /* Only consider nodes in clusters of * size <= cls_size_max */ ; if (cls_size_max && clself->n_ivps > cls_size_max) break /* Clusters that have been marked for inclusion * cannot play. */ ; if (cl_adj->cols[i].val > 1) continue ; for (j=0;j<clself->n_ivps;j++) { long nid = clself->ivps[j].idx ; long nos = mclvGetIvpOffset(mx->dom_cols, nid, -1) ; mclv* clidvec = mclxGetVector(el_on_cl, nid, RETURN_ON_FAIL, NULL) ; double eff_alien_bsf = 0.0, eff_alien_max_bsf = 0.0 /* best so far*/ ; double eff_self = 0.0, eff_self_max = 0.0 ; long cid_alien = -1, cid_self = -1 ; clmVScore sc_self = { 0 }, sc_alien = { 0 } ; dim f ; if (nos < 0 || !clidvec) { mcxErr ("clmDumpNodeScores panic", "node <%ld> does not belong", nid) ; continue ; } clmVScanDomain(mx->cols+nos, clself, &sc) ; clmVScoreCoverage(&sc, &eff_self, &eff_self_max) ; cid_self = clself->vid ; sc_self = sc ; if (loggit) mcxLog2 ( us , "node %ld in cluster %ld eff %.3f,%.3f sum %.3f" , nid , cid_self , eff_self , eff_self_max , sc.sum_i ) ; for (f=0;f<clidvec->n_ivps;f++) { long cid = clidvec->ivps[f].idx ; mclv* clvec = mclxGetVector(cl, cid, RETURN_ON_FAIL, NULL) /* ^ overdoing: cid == clvec->vid */ ; double eff, eff_max ; if (!clvec) { mcxErr ( "clmAdjust panic" , "cluster <%ld> node <%ld> mishap" , cid , nid ) ; continue ; } /* fixme: document or remove first condition * */ if ((0 && clvec->n_ivps <= clself->n_ivps) || clvec->vid == cid_self) continue ; clmVScanDomain(mx->cols+nos, clvec, &sc) ; clmVScoreCoverage(&sc, &eff, &eff_max) #if 0 # define PIVOT eff > eff_alien_bsf #else # define PIVOT eff_max > eff_alien_max_bsf #endif ; if ( PIVOT || sc.sum_i >= 0.5 ) eff_alien_bsf = eff , eff_alien_max_bsf = eff_max , cid_alien = clvec->vid , sc_alien = sc ; if (sc.sum_i >= 0.5) break ; } if (loggit) mcxLog2 ( us , " -> best alien %ld eff %.3f,%.3f sum %.3f" , cid_alien , eff_alien_bsf , eff_alien_max_bsf , sc_alien.sum_i ) /* below: use sum_i as mass fraction * (clmAdjust framework uses stochastic * matrix) */ ; if ( cid_alien >= 0 && cid_self >= 0 && f1 * sc_alien.max_i >= sc_self.max_i && ( ( eff_alien_bsf > eff_self && sc_alien.sum_i > sc_self.sum_i ) || ( pow(sc_alien.sum_i, f2) >= sc_self.sum_i && pow(eff_self, f2) <= eff_alien_bsf ) ) /* So, if max is reasonable * and efficiency is better and mass is better * or if mass is ridiculously better -> move * Somewhat intricate and contrived, yes. */ ) { mclv* acceptor = mclxGetVector(cl_adj, cid_alien, RETURN_ON_FAIL, NULL) ; mclv* donor = mclxGetVector(cl_adj, cid_self, RETURN_ON_FAIL, NULL) ; if (!donor || !acceptor || acceptor == donor) continue ; mclvInsertIdx(donor, nid, 0.0) ; mclvInsertIdx(acceptor, nid, 1.0) ; acceptor->val = 1.5 ; if (mcxLogGet(MCX_LOG_LIST)) { mclv* nb = mx->cols+nos ; double mxv = mclvMaxValue(nb) ; double avg = nb->n_ivps ? mclvSum(nb) / nb->n_ivps : -1.0 ; mcxLog ( MCX_LOG_LIST , us , "mov %ld (%ld %.2f %.2f)" " %ld (cv=%.2f cm=%.2f s=%.2f m=%.2f #=%lu)" " to %ld (cv=%.2f cm=%.2f s=%.2f m=%.2f #=%lu)" , nid , (long) nb->n_ivps, mxv, avg , cid_self , eff_self, eff_self_max, sc_self.sum_i, sc_self.max_i , (ulong) (sc_self.n_meet + sc_self.n_ddif) , cid_alien , eff_alien_bsf, eff_alien_max_bsf, sc_alien.sum_i, sc_alien.max_i , (ulong) (sc_alien.n_meet + sc_alien.n_ddif) ) ; } n_adjusted++ ; mclvInsertIdx(cid_affected, cid_alien, 2.0) ; mclvInsertIdx(cid_affected, cid_self, 2.0) ; mclvInsertIdx(nid_affected, nid, 2.0) ; } } } mclxFree(&el_on_cl) ; mclxFree(&el_to_cl) ; for (i=0;i<N_COLS(cl_adj);i++) cl_adj->cols[i].val = 0.0 ; mclxMakeCharacteristic(cl) ; if (!n_adjusted) { mclxFree(&cl_adj) ; mclvFree(&cid_affected) ; mclvFree(&nid_affected) ; return 0 ; } mclxUnary(cl_adj, fltxCopy, NULL) ; mclxMakeCharacteristic(cl_adj) /* FIRST REMOVE ENTRIES set to zero (sssst now .. */ /* ...) and THEN make it characteristic again */ ; mclvUnary(cid_affected, fltxGT, &bar_affected) ; mclvUnary(nid_affected, fltxGT, &bar_affected) ; *cl_adjustedpp = cl_adj ; *cid_affectedpp = cid_affected ; *nid_affectedpp = nid_affected ; return n_adjusted ; }