static void prune_el_on_cl ( mclMatrix* el_to_cl /* must be conforming */ , mclMatrix* el_on_cl /* this one will be pruned */ , double pct , int max ) { dim i ; for (i=0;i<N_COLS(el_on_cl);i++) { mclv* elclvec = el_on_cl->cols+i ; long clid = el_to_cl->cols[i].ivps[0].idx ; double sum = 0.0 ; int n_others = 0 ; dim k = 0 ; mcxbool selfok = FALSE ; mclvSort(elclvec, mclpValRevCmp) ; while (k++ < elclvec->n_ivps && sum < pct && n_others < max) { long y = elclvec->ivps[k-1].idx ; if (y == clid) selfok = TRUE ; sum += elclvec->ivps[k-1].val ; n_others++ ; } mclvResize(elclvec, k-1) /* careful recentchange */ ; mclvSort(elclvec, mclpIdxCmp) ; if (!selfok) mclvInsertIdx(elclvec, clid, 0.01) ; } }
void mclvSortAscVal ( mclVector* vec ) { mclvSort(vec, mclpValCmp) ; }
void mclvSortDescVal ( mclVector* vec ) { mclvSort(vec, mclpValRevCmp) ; }
void mclvSortUniq ( mclVector* vec ) { mclvSort(vec, mclpIdxCmp) ; mclvUniqIdx(vec, mclpMergeLeft) ; }
/* current dst content is thrown away if fltbinary not used */ mclv* mclvFromPAR ( mclv* dst , mclpAR* par , mcxbits warnbits , void (*ivpmerge)(void* ivp1, const void* ivp2) , double (*fltbinary)(pval val1, pval val2) ) { mcxbool warn_re = warnbits & MCLV_WARN_REPEAT_ENTRIES ; mcxbool warn_rv = warnbits & MCLV_WARN_REPEAT_VECTORS ; mclp* ivps = par->ivps ; dim n_ivps = par->n_ivps ; mcxbits sortbits = par->sorted ; dim n_old = dst ? dst->n_ivps : 0 ; const char* me = "mclvFromPAR" ; dim n_re = 0, n_rv = 0 ; if (!dst) dst = mclvInit(NULL) ; if (n_ivps) { if (dst->n_ivps && fltbinary) { mclVector* tmpvec = mclvNew(ivps, n_ivps) ; if (!(sortbits & MCLPAR_SORTED)) mclvSort(tmpvec, NULL) ; if (!(sortbits & MCLPAR_UNIQUE)) n_re = mclvUniqIdx(tmpvec, ivpmerge) ; n_rv += tmpvec->n_ivps ; n_rv += dst->n_ivps ; mclvBinary(dst, tmpvec, dst, fltbinary) ; n_rv -= dst->n_ivps ; mclvFree(&tmpvec) ; } else { if (dst->ivps == ivps) mcxErr(me, "DANGER dst->ivps == ivps (dst vid %d)", (int) dst->vid) ; mclvRenew(dst, ivps, n_ivps) ; if (!(sortbits & MCLPAR_SORTED)) mclvSort(dst, NULL) ; if (!(sortbits & MCLPAR_UNIQUE)) n_re += mclvUniqIdx(dst, ivpmerge) ; } } if (warn_re && n_re) mcxErr ( me , "<%ld> found <%ld> repeated entries within %svector" , (long) dst->vid , (long) n_re , n_rv ? "repeated " : "" ) ; if (warn_rv && n_rv) mcxErr ( me , "<%ld> new vector has <%ld> overlap with previous amalgam" , (long) dst->vid , (long) n_rv ) ; if (warnbits && n_re + n_rv) mcxErr ( me , "<%ld> vector went from <%ld> to <%ld> entries" , (long) dst->vid , (long) n_old , (long) dst->n_ivps ) ; return dst ; }
static dim clm_clm_prune ( mclx* mx , mclx* cl , dim prune_sz , mclx** cl_adjustedpp , dim* n_sink , dim* n_source ) { dim d, n_adjusted = 0 ; mclx* cl_adj = mclxCopy(cl) ; mclv* cid_affected = mclvClone(cl->dom_cols) ; const char* me = "clmAssimilate" ; double bar_affected = 1.5 ; mclx *el_to_cl = NULL ; mclx *el_on_cl = NULL ; mclx *cl_on_cl = NULL ; mclx *cl_on_el = NULL ; *n_sink = 0 ; *n_source = 0 ; mclvMakeConstant(cid_affected, 1.0) ; mclxColumnsRealign(cl_adj, mclvSizeCmp) ; *cl_adjustedpp = NULL ; clmCastActors (&mx, &cl_adj, &el_to_cl, &el_on_cl, &cl_on_cl, &cl_on_el, 0.95) ; mclxFree(&cl_on_el) ; for (d=0;d<N_COLS(cl_on_cl);d++) { mclv* clthis = cl_adj->cols+d ; mclv* cllist = cl_on_cl->cols+d ; mclp* pself = mclvGetIvp(cllist, clthis->vid, NULL) ; double self_val = -1.0 ; if (pself) self_val = pself->val , pself->val *= 1.001 /* to push it up in case of equal weights */ ;if(0)fprintf(stderr, "test size %d\n", (int) clthis->n_ivps) ; if (prune_sz && clthis->n_ivps > prune_sz) continue ; while (1) { mclv* clthat ; dim e ; if (cllist->n_ivps < 2) break ; mclvSort(cllist, mclpValRevCmp) /* now get biggest mass provided that cluster * ranks higher (has at least as many entries) * * fixme/todo: we probably have a slight order * dependency for some fringe cases. If provable * then either solve or document it. */ ; for (e=0;e<cllist->n_ivps;e++) if (cllist->ivps[e].idx >= clthis->vid) break /* found none or itself */ ; if (e == cllist->n_ivps || cllist->ivps[e].idx == clthis->vid) break ; if /* Should Not Happen */ (!(clthat = mclxGetVector(cl_adj, cllist->ivps[e].idx, RETURN_ON_FAIL, NULL) ) ) break /* works for special case prune_sz == 0 */ /* if (clthat->n_ivps + clthis->n_ivps > prune_sz) */ /* ^iced. inconsistent behaviour as k grows. */ ; { mcxLog ( MCX_LOG_LIST , me , "source %ld|%lu|%.3f absorbed by %ld|%lu|%.3f" , clthis->vid, (ulong) clthis->n_ivps, self_val , clthat->vid, (ulong) clthat->n_ivps, cllist->ivps[0].val ) ; n_adjusted += clthis->n_ivps ; (*n_sink)++ /* note: we could from our precomputed cl_on_cl * obtain that A is absorbed in B, B is absorbed in C. * below we see that A will be merged with B, * and the result will then be merged with C. * This depends on the fact that cl_adj is ordered * on increasing cluster size. */ ; mcldMerge(cl_adj->cols+d, clthat, clthat) ; mclvResize(cl_adj->cols+d, 0) ; mclvInsertIdx(cid_affected, clthat->vid, 2.0) ; } break ; } mclvSort(cllist, mclpIdxCmp) ; } mclxFree(&cl_on_cl) ; mclxFree(&el_on_cl) ; mclxFree(&el_to_cl) ; mclxMakeCharacteristic(cl) ; mclvUnary(cid_affected, fltxGT, &bar_affected) ; *n_source = cid_affected->n_ivps ; mclvFree(&cid_affected) ; mclxColumnsRealign(cl_adj, mclvSizeRevCmp) ; if (!n_adjusted) { mclxFree(&cl_adj) ; return 0 ; } mclxUnary(cl_adj, fltxCopy, NULL) ; mclxMakeCharacteristic(cl_adj) ; *cl_adjustedpp = cl_adj ; return n_adjusted ; }