/* current dst content is thrown away if fltbinary not used */ mclv* mclvFromPAR ( mclv* dst , mclpAR* par , mcxbits warnbits , void (*ivpmerge)(void* ivp1, const void* ivp2) , double (*fltbinary)(pval val1, pval val2) ) { mcxbool warn_re = warnbits & MCLV_WARN_REPEAT_ENTRIES ; mcxbool warn_rv = warnbits & MCLV_WARN_REPEAT_VECTORS ; mclp* ivps = par->ivps ; dim n_ivps = par->n_ivps ; mcxbits sortbits = par->sorted ; dim n_old = dst ? dst->n_ivps : 0 ; const char* me = "mclvFromPAR" ; dim n_re = 0, n_rv = 0 ; if (!dst) dst = mclvInit(NULL) ; if (n_ivps) { if (dst->n_ivps && fltbinary) { mclVector* tmpvec = mclvNew(ivps, n_ivps) ; if (!(sortbits & MCLPAR_SORTED)) mclvSort(tmpvec, NULL) ; if (!(sortbits & MCLPAR_UNIQUE)) n_re = mclvUniqIdx(tmpvec, ivpmerge) ; n_rv += tmpvec->n_ivps ; n_rv += dst->n_ivps ; mclvBinary(dst, tmpvec, dst, fltbinary) ; n_rv -= dst->n_ivps ; mclvFree(&tmpvec) ; } else { if (dst->ivps == ivps) mcxErr(me, "DANGER dst->ivps == ivps (dst vid %d)", (int) dst->vid) ; mclvRenew(dst, ivps, n_ivps) ; if (!(sortbits & MCLPAR_SORTED)) mclvSort(dst, NULL) ; if (!(sortbits & MCLPAR_UNIQUE)) n_re += mclvUniqIdx(dst, ivpmerge) ; } } if (warn_re && n_re) mcxErr ( me , "<%ld> found <%ld> repeated entries within %svector" , (long) dst->vid , (long) n_re , n_rv ? "repeated " : "" ) ; if (warn_rv && n_rv) mcxErr ( me , "<%ld> new vector has <%ld> overlap with previous amalgam" , (long) dst->vid , (long) n_rv ) ; if (warnbits && n_re + n_rv) mcxErr ( me , "<%ld> vector went from <%ld> to <%ld> entries" , (long) dst->vid , (long) n_old , (long) dst->n_ivps ) ; return dst ; }
void pairwise_setops ( mclx* mx1 , mclx* mx2 , mcxbits modes ) { dim t, u, n_tst = 0 ; mclv* cache = mclvInit(NULL) ; mclv* meet = mclvInit(NULL) ; mclv* join = mclvInit(NULL) ; mclv* diff = mclvInit(NULL) ; mcxbool overwrite = modes & MMM_OVERWRITE ; dim n_zero_meet = 0, n_plus_meet = 0 ; mclv* (*fn_meet)(const mclv* lft, const mclv* rgt, mclv* dst) = mcldMeet ; mclv* (*fn_minus)(const mclv* lft, const mclv* rgt, mclv* dst) = mcldMinus1 ; if (modes & MMM_MEET2) fn_meet = mcldMeet2 , fn_minus = mcldMinus /* the point of overwrite is to have * a lft == dst or rgt == dst pattern. */ ; for (t=0;t<N_COLS(mx1);t++) { for (u=0;u<N_COLS(mx2);u++) { mclv* dst = overwrite ? (modes & MMM_RIGHT ? mx1->cols+u : mx2->cols+t) : diff ; if (overwrite) mclvCopy(cache, dst) /* cache column, reinstate later */ ; if (modes & MMM_BINARY) mclvBinary(mx1->cols+t, mx2->cols+u, dst, fltLaNR) ; else fn_minus(mx1->cols+t, mx2->cols+u, dst) /* compute t / u */ ; if (overwrite) mclvCopy(diff, dst) , mclvCopy(dst, cache) /* reinstate column */ /* diff contains t / u */ ; dst = overwrite ? dst : meet /* cache column, same as above */ ; if (modes & MMM_BINARY) mclvBinary(mx1->cols+t, mx2->cols+u, dst, fltLaR) ; else fn_meet(mx1->cols+t, mx2->cols+u, dst) ; if (overwrite) mclvCopy(meet, dst) , mclvCopy(dst, cache) /* meet contains t /\ u */ ; mcldMerge(diff, meet, join) /* join should be identical to column t */ ; if (meet->n_ivps) n_plus_meet++ ; else n_zero_meet++ ; if (modes & MMM_CHECK) { mclv* dediff = mclvClone(mx1->cols+t) ; mclv* demeet = mclvClone(mx1->cols+t) ; dim nd = mclvUpdateMeet(dediff, diff, fltSubtract) ; dim nm = mclvUpdateMeet(demeet, meet, fltSubtract) ; if ( diff->n_ivps + meet->n_ivps != mx1->cols[t].n_ivps || !mcldEquate(join, mx1->cols+t, MCLD_EQT_EQUAL) || diff->n_ivps != nd || meet->n_ivps != nm ) { mclvaDump(mx1->cols+t, stdout, -1, " ", MCLVA_DUMP_HEADER_ON) ; mclvaDump(mx2->cols+u, stdout, -1, " ", MCLVA_DUMP_HEADER_ON) ; mclvaDump(meet, stdout, -1, " ", MCLVA_DUMP_HEADER_ON) ; mclvaDump(diff, stdout, -1, " ", MCLVA_DUMP_HEADER_ON) ; mcxDie(1, me, "rats") ; } mclvFree(&dediff) ; mclvFree(&demeet) ; } n_tst++ ; } } fprintf ( stdout , "meet was nonempty %.2f\n" , (double) (n_plus_meet * 1.0f / n_tst) ) ; fprintf ( stdout , "%d successful tests in %s%s %s mode (checked: %s)\n" , (int) n_tst , overwrite ? "overwrite" : "create" , overwrite ? ( modes & MMM_RIGHT ? "-right" : "-left" ) : "" , modes & MMM_BINARY ? "generic" : "update" , (modes & MMM_CHECK ? "yes" : "no") ) ; fprintf ( stdout , "meet-can: %10lu\n" "meet-zip: %10lu\n" "meet-s/l: %10lu\n" "diff-can: %10lu\n" "diff-zip: %10lu\n" "diff-s/l: %10lu\n" , (ulong) nu_meet_can , (ulong) nu_meet_zip , (ulong) nu_meet_sl , (ulong) nu_diff_can , (ulong) nu_diff_zip , (ulong) nu_diff_sl ) ; mclvFree(&cache) ; mclvFree(&meet) ; mclvFree(&join) ; mclvFree(&diff) ; }
mclMatrix* mclInterpret ( mclMatrix* dag ) { mclv* v_attr = mclvCopy(NULL, dag->dom_cols) ; mclx* m_attr = NULL, *m_cls = NULL, *m_clst = NULL ; dim d ; mclvMakeCharacteristic(v_attr) ; for (d=0; d<N_COLS(dag); d++) { mclv* col = dag->cols+d ; if (mclvGetIvp(col, col->vid, NULL)) /* deemed attractor */ mclvInsertIdx(v_attr, col->vid, 2.0) ; } mclvSelectGqBar(v_attr, 1.5) ; m_attr = mclxSub(dag, v_attr, v_attr) ; mclxAddTranspose(m_attr, 1.0) ; m_cls = clmUGraphComponents(m_attr, NULL) /* attractor systems as clusters */ ; mclvCopy(m_cls->dom_rows, dag->dom_cols) /* add all nodes to this cluster matrix */ ; m_clst = mclxTranspose(m_cls) /* nodes(columns) with zero neighbours need to be classified */ ; mclgUnionvReset(dag) /* make mx->dom-rows characteristic */ ; mclxFree(&m_cls) ; for (d=0; d<N_COLS(dag); d++) { mclv* closure, *clsids ; if (mclvGetIvp(v_attr, dag->cols[d].vid, NULL)) continue /* attractor already classified */ ; closure = get_closure(dag, dag->cols+d) /* take all [neighbours of [neighbours of [..]]] */ ; clsids = mclgUnionv(m_clst, closure, NULL, SCRATCH_READY, NULL) ; mclvAdd(m_clst->cols+d, clsids, m_clst->cols+d) ; mclvFree(&clsids) ; mclvFree(&closure) ; } m_cls = mclxTranspose(m_clst) ; mclxFree(&m_attr) ; mclxFree(&m_clst) ; mclvFree(&v_attr) ; return m_cls ; }
void mclgTFgraph ( mclx* mx , pnum mode , pval val ) { switch(mode) { case MCLG_TF_MAX: mclxMergeTranspose(mx, fltMax, 1.0) ; break ; case MCLG_TF_MIN: mclxMergeTranspose(mx, fltMin, 1.0) ; break ; case MCLG_TF_ADD: mclxMergeTranspose(mx, fltAdd, 1.0) ; break ; case MCLG_TF_SELFRM: mclxAdjustLoops(mx, mclxLoopCBremove, NULL) ; break ; case MCLG_TF_SELFMAX: mclxAdjustLoops(mx, mclxLoopCBmax, NULL) ; break ; case MCLG_TF_NORMSELF: mclxNormSelf(mx) ; break ; case MCLG_TF_MUL: mclxMergeTranspose(mx, fltMultiply, 1.0) ; break ; case MCLG_TF_ARCMAX: mclxMergeTranspose(mx, fltArcMax, 1.0) ; break ; case MCLG_TF_ARCMAXGQ: mclxMergeTranspose3(mx, fltArcMaxGQ, 1.0, val) ; break ; case MCLG_TF_ARCMAXGT: mclxMergeTranspose3(mx, fltArcMaxGT, 1.0, val) ; break ; case MCLG_TF_ARCMAXLQ: mclxMergeTranspose3(mx, fltArcMaxLQ, 1.0, val) ; break ; case MCLG_TF_ARCMAXLT: mclxMergeTranspose3(mx, fltArcMaxLT, 1.0, val) ; break ; case MCLG_TF_ARCMINGQ: mclxMergeTranspose3(mx, fltArcMinGQ, 1.0, val) ; break ; case MCLG_TF_ARCMINGT: mclxMergeTranspose3(mx, fltArcMinGT, 1.0, val) ; break ; case MCLG_TF_ARCMINLQ: mclxMergeTranspose3(mx, fltArcMinLQ, 1.0, val) ; break ; case MCLG_TF_ARCMINLT: mclxMergeTranspose3(mx, fltArcMinLT, 1.0, val) ; break ; case MCLG_TF_ARCDIFFGQ: mclxMergeTranspose3(mx, fltArcDiffGQ, 1.0, val) ; break ; case MCLG_TF_ARCDIFFGT: mclxMergeTranspose3(mx, fltArcDiffGT, 1.0, val) ; break ; case MCLG_TF_ARCDIFFLQ: mclxMergeTranspose3(mx, fltArcDiffLQ, 1.0, val) ; break ; case MCLG_TF_ARCDIFFLT: mclxMergeTranspose3(mx, fltArcDiffLT, 1.0, val) ; break ; case MCLG_TF_ARCSUB: mclxMergeTranspose(mx, fltSubtract, 1.0) ; break ; case MCLG_TF_TUG: mclxPerturb(mx, val, MCLX_PERTURB_SYMMETRIC) ; break ; case MCLG_TF_TRANSPOSE: { mclx* tp = mclxTranspose(mx); mclxTransplant(mx, &tp); } ; break ; case MCLG_TF_SHRUG: mclxPerturb(mx, val, MCLX_PERTURB_SYMMETRIC | MCLX_PERTURB_RAND) ; break ; case MCLG_TF_ILS: mclxILS(mx) ; break ; case MCLG_TF_TOPN: mclxKNNdispatch(mx, val+0.5, mclx_n_thread_g, 0) ; break ; case MCLG_TF_KNN: mclxKNNdispatch(mx, val+0.5, mclx_n_thread_g, 1) ; break ; case MCLG_TF_MCL: tf_do_mcl(mx, val, FALSE) ; break ; case MCLG_TF_ARC_MCL: tf_do_mcl(mx, val, TRUE) ; break ; case MCLG_TF_THREAD: mclx_n_thread_g = val + 0.5 ; break ; case MCLG_TF_CEILNB: { mclv* cv = mclgCeilNB(mx, val+0.5, NULL, NULL, NULL); mclvFree(&cv); } ; break ; case MCLG_TF_STEP: mclg_tf_step(mx, val+0.5) ; break ; case MCLG_TF_QT: mclxQuantiles(mx, val) ; break ; case MCLG_TF_SSQ: tf_ssq(mx, val) ; break ; case MCLG_TF_SHUFFLE: mcxErr("mclgTFgraph", "shuffle not yet done (lift from mcxrand)") ; break ; default: mcxErr("mclgTFgraph", "unknown mode") ; break ; } }
static void vary_threshold ( mcxIO* xf , FILE* fp , int vary_a , int vary_z , int vary_s , int vary_n , unsigned mode ) { dim cor_i = 0, j ; int step ; mclx* mx ; unsigned long noe ; pval* allvals ; dim n_allvals = 0 ; double sum_vals = 0.0 ; mx = mclxRead(xf, EXIT_ON_FAIL) ; mcxIOclose(xf) ; if (transform) mclgTFexec(mx, transform) ; noe = mclxNrofEntries(mx) ; allvals = mcxAlloc(noe * sizeof allvals[0], EXIT_ON_FAIL) ; if (!weight_scale) { if (mode == 'c') weight_scale = 1.0 ; else weight_scale = vary_n ; } n_allvals = get_n_sort_allvals(mx, allvals, noe, &sum_vals, FALSE) ; if (mode == 'c') { double smallest = n_allvals ? allvals[n_allvals-1] : -DBL_MAX ; if (vary_a * 1.0 / vary_n < smallest) { while (vary_a * 1.0 / vary_n < smallest) vary_a++ ; vary_a-- ; } mcxTell ( me , "smallest correlation is %.2f, using starting point %.2f" , smallest , vary_a * 1.0 / vary_n ) ; } if (output_flags & OUTPUT_TABLE) { ;fprintf(fp, "L\tD\tR\tS\tcce\tEWmean\tEWmed\tEWiqr\tNDmean\tNDmed\tNDiqr\tCCF\t%s\n", mode == 'k' ? "kNN" : mode == 'l' ? "N" : "Cutoff") ;} else { if (output_flags & OUTPUT_KEY) { ;fprintf(fp, "-------------------------------------------------------------------------------\n") ;fprintf(fp, " L Percentage of nodes in the largest component\n") ;fprintf(fp, " D Percentage of nodes in components of size at most %d [-div option]\n", (int) divide_g) ;fprintf(fp, " R Percentage of nodes not in L or D: 100 - L -D\n") ;fprintf(fp, " S Percentage of nodes that are singletons\n") ;fprintf(fp, " cce Expected size of component, nodewise [ sum(sz^2) / sum^2(sz) ]\n") ;fprintf(fp, "*EW Edge weight traits (mean, median and IQR, all scaled!)\n") ;fprintf(fp, " Scaling is used to avoid printing of fractional parts throughout.\n") ;fprintf(fp, " The scaling factor is %.2f [-report-scale option]\n", weight_scale) ;fprintf(fp, " ND Node degree traits [mean, median and IQR]\n") ;fprintf(fp, " CCF Clustering coefficient %s\n", compute_flags & COMPUTE_CLCF ? "(not computed; use --clcf to include this)" : "") ;fprintf(fp, " eff Induced component efficiency %s\n", compute_flags & COMPUTE_EFF ? "(not computed; use --eff to include this)" : "") ;if (mode == 'c') fprintf(fp, "Cutoff The threshold used.\n") ;else if (mode == 't') fprintf(fp, "*Cutoff The threshold with scale factor %.2f and fractional parts removed\n", weight_scale) ;else if (mode == 'k') fprintf(fp, "k-NN The knn parameter\n") ;else if (mode == 'l') fprintf(fp, "N The knn parameter (merge mode)\n") ;else if (mode == 'n') fprintf(fp, "ceil The ceil parameter\n") ;fprintf(fp, "Total number of nodes: %lu\n", (ulong) N_COLS(mx)) ;} fprintf(fp, "-------------------------------------------------------------------------------\n") ;fprintf(fp, " L D R S cce *EWmean *EWmed *EWiqr NDmean NDmed NDiqr CCF eff %6s \n", mode == 'k' ? "k-NN" : mode == 'l' ? "N" : mode == 'n' ? "Ceil" : "Cutoff") ;fprintf(fp, "-------------------------------------------------------------------------------\n") ; } for (step = vary_a; step <= vary_z; step += vary_s) { double cutoff = step * 1.0 / vary_n ; double eff = -1.0 ; mclv* nnodes = mclvCanonical(NULL, N_COLS(mx), 0.0) ; mclv* degree = mclvCanonical(NULL, N_COLS(mx), 0.0) ; dim i, n_sample = 0 ; double cor, y_prev, iqr = 0.0 ; mclx* cc = NULL, *res = NULL ; mclv* sz, *ccsz = NULL ; int step2 = vary_z + vary_a - step ; sum_vals = 0.0 ; if (mode == 't' || mode == 'c') mclxSelectValues(mx, &cutoff, NULL, MCLX_EQT_GQ) , res = mx ; else if (mode == 'k') { res = rebase_g ? mclxCopy(mx) : mx ; mclxKNNdispatch(res, step2, n_thread_l, 1) ; } else if (mode == 'l') { res = mx ; mclxKNNdispatch(res, step2, n_thread_l, 0) ; } else if (mode == 'n') { res = rebase_g ? mclxCopy(mx) : mx ; mclv* cv = mclgCeilNB(res, step2, NULL, NULL, NULL) ; mclvFree(&cv) ; } sz = mclxColSizes(res, MCL_VECTOR_COMPLETE) ; mclvSortDescVal(sz) ; cc = clmUGraphComponents(res, NULL) /* fixme: user has to specify -tf '#max()' if graph is directed */ ; if (cc) { ccsz = mclxColSizes(cc, MCL_VECTOR_COMPLETE) ; if (compute_flags & COMPUTE_EFF) { clmPerformanceTable pftable ; clmPerformance(mx, cc, &pftable) ; eff = pftable.efficiency ; } } if (mode == 't' || mode == 'c') { for ( ; n_allvals > 0 && allvals[n_allvals-1] < cutoff ; n_allvals-- ) ; sum_vals = 0.0 ; for (i=0;i<n_allvals;i++) sum_vals += allvals[i] ; } else if (mode == 'k' || mode == 'n' || mode == 'l') { n_allvals = get_n_sort_allvals(res, allvals, noe, &sum_vals, FALSE) ; } levels[cor_i].sim_median= mcxMedian(allvals, n_allvals, sizeof allvals[0], pval_get_double, &iqr) ; levels[cor_i].sim_iqr = iqr ; levels[cor_i].sim_mean = n_allvals ? sum_vals / n_allvals : 0.0 ; levels[cor_i].nb_median = mcxMedian(sz->ivps, sz->n_ivps, sizeof sz->ivps[0], ivp_get_double, &iqr) ; levels[cor_i].nb_iqr = iqr ; levels[cor_i].nb_mean = mclvSum(sz) / N_COLS(res) ; levels[cor_i].cc_exp = cc ? mclvPowSum(ccsz, 2.0) / N_COLS(res) : 0 ; levels[cor_i].nb_sum = mclxNrofEntries(res) ; if (compute_flags & COMPUTE_CLCF) { mclv* clcf = mclgCLCFdispatch(res, n_thread_l) ; levels[cor_i].clcf = mclvSum(clcf) / N_COLS(mx) ; mclvFree(&clcf) ; } else levels[cor_i].clcf = 0.0 ; levels[cor_i].threshold = mode == 'k' || mode == 'l' || mode == 'n' ? step2 : cutoff ; levels[cor_i].bigsize = cc ? cc->cols[0].n_ivps : 0 ; levels[cor_i].n_single = 0 ; levels[cor_i].n_edge = n_allvals ; levels[cor_i].n_lq = 0 ; if (cc) for (i=0;i<N_COLS(cc);i++) { dim n = cc->cols[N_COLS(cc)-1-i].n_ivps ; if (n == 1) levels[cor_i].n_single++ ; if (n <= divide_g) levels[cor_i].n_lq += n ; else break ; } if (levels[cor_i].bigsize <= divide_g) levels[cor_i].bigsize = 0 ; y_prev = sz->ivps[0].val /* wiki says: A scale-free network is a network whose degree distribution follows a power law, at least asymptotically. That is, the fraction P(k) of nodes in the network having k connections to other nodes goes for large values of k as P(k) ~ k^−g where g is a constant whose value is typically in the range 2<g<3, although occasionally it may lie outside these bounds. */ ; for (i=1;i<sz->n_ivps;i++) { double y = sz->ivps[i].val ; if (y > y_prev - 0.5) continue /* same as node degree seen last */ ; nnodes->ivps[n_sample].val = log( (i*1.0) / (1.0*N_COLS(res))) /* x = #nodes >= k, as fraction */ ; degree->ivps[n_sample].val = log(y_prev ? y_prev : 1) /* y = k = degree of node */ ; n_sample++ ;if(0)fprintf(stderr, "k=%.0f\tn=%d\t%.3f\t%.3f\n", (double) y_prev, (int) i, (double) nnodes->ivps[n_sample-1].val, (double) degree->ivps[n_sample-1].val) ; y_prev = y ; } nnodes->ivps[n_sample].val = 0 ; nnodes->ivps[n_sample++].val = log(y_prev ? y_prev : 1) ;if(0){fprintf(stderr, "k=%.0f\tn=%d\t%.3f\t%.3f\n", (double) sz->ivps[sz->n_ivps-1].val, (int) N_COLS(res), (double) nnodes->ivps[n_sample-1].val, (double) degree->ivps[n_sample-1].val) ;} ; mclvResize(nnodes, n_sample) ; mclvResize(degree, n_sample) ; cor = pearson(nnodes, degree, n_sample) ; levels[cor_i].degree_cor = cor * cor ;if(0)fprintf(stdout, "cor at cutoff %.2f %.3f\n\n", cutoff, levels[cor_i-1].degree_cor) ; mclvFree(&nnodes) ; mclvFree(°ree) ; mclvFree(&sz) ; mclvFree(&ccsz) ; mclxFree(&cc) ; if(output_flags & OUTPUT_TABLE) { fprintf ( fp , "%lu\t%lu\t%lu\t%lu\t%lu" "\t%6g\t%6g\t%6g" "\t%6g\t%lu\t%6g" , (ulong) levels[cor_i].bigsize , (ulong) levels[cor_i].n_lq , (ulong) N_COLS(mx) - levels[cor_i].bigsize - levels[cor_i].n_lq , (ulong) levels[cor_i].n_single , (ulong) levels[cor_i].cc_exp , (double) levels[cor_i].sim_mean , (double) levels[cor_i].sim_median , (double) levels[cor_i].sim_iqr , (double) levels[cor_i].nb_mean , (ulong) levels[cor_i].nb_median , (double) levels[cor_i].nb_iqr ) ; if (compute_flags & COMPUTE_CLCF) fprintf(fp, "\t%6g", levels[cor_i].clcf) ; else fputs("\tNA", fp) ; if (eff >= 0.0) fprintf(fp, "\t%4g", eff) ; else fputs("\tNA", fp) ; fprintf(fp, "\t%6g", (double) levels[cor_i].threshold) ; fputc('\n', fp) ; } else { fprintf ( fp , "%3d %3d %3d %3d %7d " "%7.0f %7.0f %6.0f" "%6.1f %6.0f %6.0f" , 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].bigsize) / N_COLS(mx)) , 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].n_lq) / N_COLS(mx)) , 0 ? 1 : (int) (0.5 + (100.0 * (N_COLS(mx) - levels[cor_i].bigsize - levels[cor_i].n_lq)) / N_COLS(mx)) , 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].n_single) / N_COLS(mx)) , 0 ? 1 : (int) (0.5 + levels[cor_i].cc_exp) , 0 ? 1.0 : (double) (levels[cor_i].sim_mean * weight_scale) , 0 ? 1.0 : (double) (levels[cor_i].sim_median * weight_scale) , 0 ? 1.0 : (double) (levels[cor_i].sim_iqr * weight_scale) , 0 ? 1.0 : (double) (levels[cor_i].nb_mean ) , 0 ? 1.0 : (double) (levels[cor_i].nb_median + 0.5 ) , 0 ? 1.0 : (double) (levels[cor_i].nb_iqr + 0.5 ) ) ; if (compute_flags & COMPUTE_CLCF) fprintf(fp, " %3d", 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].clcf))) ; else fputs(" -", fp) ; if (eff >= 0.0) fprintf(fp, " %3d", (int) (0.5 + 1000 * eff)) ; else fputs(" -", fp) ; if (mode == 'c') fprintf(fp, "%8.2f\n", (double) levels[cor_i].threshold) ; else if (mode == 't') fprintf(fp, "%8.0f\n", (double) levels[cor_i].threshold * weight_scale) ; else if (mode == 'k' || mode == 'n' || mode == 'l') fprintf(fp, "%8.0f\n", (double) levels[cor_i].threshold) ; } ; cor_i++ ; if (res != mx) mclxFree(&res) ; } if (!(output_flags & OUTPUT_TABLE)) { if (weefreemen) { fprintf(fp, "-------------------------------------------------------------------------------\n") ;fprintf(fp, "The graph below plots the R^2 squared value for the fit of a log-log plot of\n") ;fprintf(fp, "<node degree k> versus <#nodes with degree >= k>, for the network resulting\n") ;fprintf(fp, "from applying a particular %s cutoff.\n", mode == 'c' ? "correlation" : "similarity") ;fprintf(fp, "-------------------------------------------------------------------------------\n") ; for (j=0;j<cor_i;j++) { dim jj ; for (jj=30;jj<=100;jj++) { char c = ' ' ; if (jj * 0.01 < levels[j].degree_cor && (jj+1.0) * 0.01 > levels[j].degree_cor) c = 'X' ; else if (jj % 5 == 0) c = '|' ; fputc(c, fp) ; } if (mode == 'c') fprintf(fp, "%8.2f\n", (double) levels[j].threshold) ; else fprintf(fp, "%8.0f\n", (double) levels[j].threshold * weight_scale) ; } fprintf(fp, "|----+----|----+----|----+----|----+----|----+----|----+----|----+----|--------\n") ;fprintf(fp, "| R^2 0.4 0.5 0.6 0.7 0.8 0.9 | 1.0 -o)\n") ;fprintf(fp, "+----+----+----+----+----+---------+----+----+----+----+----+----+----+ /\\\\\n") ;fprintf(fp, "| 2 4 6 8 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | _\\_/\n") ;fprintf(fp, "+----+----|----+----|----+----|----+----|----+----|----+----|----+----+--------\n") ; } else fprintf(fp, "-------------------------------------------------------------------------------\n") ; } mclxFree(&mx) ; mcxFree(allvals) ; }
double get_score ( const mclv* c , const mclv* d , const mclv* c_start , const mclv* d_start , const mclv* c_end , const mclv* d_end ) { mclv* vecc = mclvClone(c) ; mclv* vecd = mclvClone(d) ; mclv* meet_c = mcldMeet(vecc, vecd, NULL) ; mclv* meet_d = mcldMeet(vecd, meet_c, NULL) ; mclv* cwid = mclvBinary(c_end, c_start, NULL, fltSubtract) ; mclv* dwid = mclvBinary(d_end, d_start, NULL, fltSubtract) ; mclv* rmin = mclvBinary(c_end, d_end, NULL, fltMin) ; mclv* lmax = mclvBinary(c_start, d_start, NULL, fltMax) ; mclv* delta = mclvBinary(rmin, lmax, NULL, fltSubtract) ; mclv* weightc, *weightd ; double ip, cd, csn, meanc, meand, mean, euclid, meet_fraction, score, sum_meet_c, sum_meet_d, reduction_c, reduction_d ; int nmeet = meet_c->n_ivps ; int nldif = vecc->n_ivps - nmeet ; int nrdif = vecd->n_ivps - nmeet ; mclvSelectGqBar(delta, 0.0) ; weightc= mclvBinary(delta, cwid, NULL, mydiv) ; weightd= mclvBinary(delta, dwid, NULL, mydiv) #if 0 ;if (c != d)mclvaDump ( cwid , stdout , 5 , "\n" , 0) ,mclvaDump ( dwid , stdout , 5 , "\n" , 0) #endif ; sum_meet_c = 0.01 + mclvSum(meet_c) ; sum_meet_d = 0.01 + mclvSum(meet_d) ; mclvBinary(meet_c, weightc, meet_c, fltMultiply) ; mclvBinary(meet_d, weightd, meet_d, fltMultiply) ; reduction_c = mclvSum(meet_c) / sum_meet_c ; reduction_d = mclvSum(meet_d) / sum_meet_d ; ip = mclvIn(meet_c, meet_d) ; cd = sqrt(mclvPowSum(meet_c, 2.0) * mclvPowSum(meet_d, 2.0)) ; csn = cd ? ip / cd : 0.0 ; meanc = meet_c->n_ivps ? mclvSum(meet_c) / meet_c->n_ivps : 0.0 ; meand = meet_d->n_ivps ? mclvSum(meet_d) / meet_d->n_ivps : 0.0 ; mean = MCX_MIN(meanc, meand) ; euclid = 0 ? 1.0 : ( mean ? sqrt(mclvPowSum(meet_c, 2.0) / mclvPowSum(vecc, 2.0)) : 0.0 ) ; meet_fraction = pow((meet_c->n_ivps * 1.0 / vecc->n_ivps), 1.0) ; score = mean * csn * euclid * meet_fraction * 1.0 ; mclvFree(&meet_c) ; mclvFree(&meet_d) ; fprintf ( stdout , "%10d%10d%10d%10d%10d%10g%10g%10g%10g%10g%10g%10g\n" , (int) c->vid , (int) d->vid , (int) nldif , (int) nrdif , (int) nmeet , score , mean , csn , euclid , meet_fraction , reduction_c , reduction_d ) ; return score ; }
static mclx* make_mx_from_pars ( mclxIOstreamer* streamer , stream_state* iface , void (*ivpmerge)(void* ivp1, const void* ivp2) , mcxbits bits ) { mclpAR* pars = iface->pars ; long dc_max_seen = iface->map_c->max_seen ; long dr_max_seen = iface->map_r->max_seen ; mclx* mx = NULL ; mclv* domc, *domr ; dim i ; if (bits & MCLXIO_STREAM_235ANY) { if (streamer->cmax_235 > 0 && dc_max_seen < streamer->cmax_235 - 1) dc_max_seen = streamer->cmax_235-1 ; } else if (bits & MCLXIO_STREAM_123) { if (streamer->cmax_123 > 0 && dc_max_seen < streamer->cmax_123 - 1) dc_max_seen = streamer->cmax_123-1 ; if (streamer->rmax_123 > 0 && dr_max_seen < streamer->rmax_123 - 1) dr_max_seen = streamer->rmax_123-1 ; } mcxTell("stream", "maxc=%d maxr=%d", (int) dc_max_seen, (int) dr_max_seen) ; if (iface->pars_n_used != iface->map_c->max_seen+1) mcxDie ( 1 , module , "internal discrepancy: n_pars=%lu max_seen+1=%lu" , (ulong) iface->pars_n_used , (ulong) (iface->map_c->max_seen+1) ) ; if (dc_max_seen < 0 || dr_max_seen < 0) { if (dc_max_seen < -1 || dr_max_seen < -1) { mcxErr(module, "bad apples %ld %ld", dc_max_seen, dr_max_seen) ; return NULL ; } else mcxTell(module, "no assignments yield void/empty matrix") ; } /* fixme: with extend and same tab, should still copy. * then, there are still occasions where one would * want to go the sparse route */ domc = iface->map_c->tab && (iface->bits & MCLXIO_STREAM_CTAB_RO) ? mclvClone(iface->map_c->tab->domain) : mclvCanonical(NULL, dc_max_seen+1, 1.0) ; domr = iface->map_r->tab && (iface->bits & MCLXIO_STREAM_RTAB_RO) ? mclvClone(iface->map_r->tab->domain) : mclvCanonical(NULL, dr_max_seen+1, 1.0) ; if (! (mx = mclxAllocZero(domc, domr))) { mclvFree(&domc) ; mclvFree(&domr) ; } else for (i=0;i<iface->pars_n_used;i++) /* careful with signedness */ { long d = domc->ivps[i].idx ;if(DEBUG3)fprintf(stderr, "column %d alloc %d\n", (int) d, (int) iface->pars_n_alloc); ; mclvFromPAR(mx->cols+i, pars+d, 0, ivpmerge, NULL) ; } return mx ; }
void get_attr ( mclx* mx , mclTab* tab , mcxIO* xfattr ) { mclx* tp = mclxTranspose(mx) ; mclx* G = mclxAdd(mx, tp) ; mclv* fwd = mclxColSizes(mx, MCL_VECTOR_COMPLETE) ; mclv* bwd = mclxColSizes(tp, MCL_VECTOR_COMPLETE) ; mclx* cc = clmComponents(G, NULL) ; mclx* node2cc = mclxTranspose(cc) ; dim i, n_cycle = 0 ; fprintf(xfattr->fp, "node\tup\tdown\tnparent\tnchild\tndag\n") ; for (i=0;i<bwd->n_ivps;i++) { mclv* seenpp1 = NULL, *seenpp2 = NULL ; ofs level_up = fire_node(mx, i, &seenpp1) ; ofs level_dn = fire_node(tp, i, &seenpp2) ; ofs ccidx = node2cc->cols[i].ivps[0].idx ; dim ccsize = cc->cols[ccidx].n_ivps ; mclvFree(&seenpp1) ; mclvFree(&seenpp2) ; if ((i+1) % 500 == 0) fputc('.', stderr) ; if (tab) { const char* label = mclTabGet(tab, mx->cols[i].vid, NULL) ; fputs(label, xfattr->fp) ; fputc('\t', xfattr->fp) ; } else fprintf ( xfattr->fp , "%lu\t" , (ulong) mx->cols[i].vid ) ; fprintf ( xfattr->fp , "%ld\t%ld\t%lu\t%lu\t%lu\n" , (long) level_up , (long) level_dn , (ulong) fwd->ivps[i].val , (ulong) bwd->ivps[i].val , (ulong) ccsize ) ; if (level_up < 0 || level_dn < 0) fputc('.', stderr) , n_cycle++ ; } if (n_cycle) fputc('\n', stderr) ; mclvFree(&bwd) ; mclvFree(&fwd) ; mclxFree(&tp) ; }
int main ( int argc , const char* argv[] ) { mcxIO* xfdagreduce = NULL, *xfattr = NULL, *xfdiff = NULL ; double child_diff_lq = 0.2 ; double parent_diff_gq = 0.4 ; mcxIO* xfimx = mcxIOnew("-", "r"), *xfdag = NULL, *xftab = NULL ; mclTab* tab = NULL ; int q = -1 ; mclx* mx ; unsigned char test_mode = 0 ; mcxstatus parseStatus = STATUS_OK ; mcxOption* opts, *opt ; mcxOptAnchorSortById(options, sizeof(options)/sizeof(mcxOptAnchor) -1) ; if (!(opts = mcxOptParse(options, (char**) argv, argc, 1, 0, &parseStatus))) exit(0) ; mcxLogLevel = MCX_LOG_AGGR | MCX_LOG_MODULE | MCX_LOG_IO | MCX_LOG_GAUGE | MCX_LOG_WARN ; mclxIOsetQMode("MCLXIOVERBOSITY", MCL_APP_VB_YES) ; mclx_app_init(stderr) ; for (opt=opts;opt->anch;opt++) { mcxOptAnchor* anch = opt->anch ; switch(anch->id) { case MY_OPT_HELP : mcxOptApropos(stdout, me, syntax, 0, 0, options) ; return 0 ; case MY_OPT_VERSION : app_report_version(me) ; return 0 ; case MY_OPT_TEST_CYCLE : test_mode = 'c' ; break ; case MY_OPT_TEST_CROSS : test_mode = 'x' ; break ; case MY_OPT_DAG_ATTR : xfattr = mcxIOnew(opt->val, "w") ; mcxIOopen(xfattr, EXIT_ON_FAIL) ; break ; case MY_OPT_DAG_DIFF : xfdiff = mcxIOnew(opt->val, "w") ; break ; case MY_OPT_DAG_REDUCE : xfdagreduce = mcxIOnew(opt->val, "w") ; break ; case MY_OPT_CHILD_DIFF_LQ : child_diff_lq = atof(opt->val) ; break ; case MY_OPT_PARENT_DIFF_GQ : parent_diff_gq = atof(opt->val) ; break ; case MY_OPT_QUERY : q = atoi(opt->val) ; break ; case MY_OPT_TAB : xftab = mcxIOnew(opt->val, "r") ; break ; case MY_OPT_IMX : mcxIOnewName(xfimx, opt->val) ; break ; } } ; if (xfimx) mx = mclxRead(xfimx, EXIT_ON_FAIL) ; else mcxDie(1, me, "need -imx") ; if (xftab) tab = mclTabRead(xftab, mx->dom_cols, EXIT_ON_FAIL) ; if (test_mode == 'c') test_for_cycles(mx) ; else if (test_mode == 'x') test_cross_ratio(mx) ; else if (xfattr) get_attr(mx, tab, xfattr) ; else if (xfdagreduce) { mclxComposeHelper* ch = mclxComposePrepare(mx, mx) ; dim i ; for (i=0;i<N_COLS(mx);i++) { mclv* in = mx->cols+i ; mclv* out = mclxVectorCompose(mx, in, NULL, ch) ; mcldMinus(in, out, in) ; mclvFree(&out) ; } mclxWrite(mx, xfdagreduce, MCLXIO_VALUE_GETENV, EXIT_ON_FAIL) ; mclxComposeRelease(&ch) ; } else if (xfdiff) dag_diff_select(mx, tab, xfdiff, child_diff_lq, parent_diff_gq) ; mclxFree(&mx) ; mcxIOfree(&xfimx) ; mcxIOfree(&xfdag) ; mcxIOfree(&xfattr) ; mcxIOfree(&xfdagreduce) ; return 0 ; }
static dim clm_clm_prune ( mclx* mx , mclx* cl , dim prune_sz , mclx** cl_adjustedpp , dim* n_sink , dim* n_source ) { dim d, n_adjusted = 0 ; mclx* cl_adj = mclxCopy(cl) ; mclv* cid_affected = mclvClone(cl->dom_cols) ; const char* me = "clmAssimilate" ; double bar_affected = 1.5 ; mclx *el_to_cl = NULL ; mclx *el_on_cl = NULL ; mclx *cl_on_cl = NULL ; mclx *cl_on_el = NULL ; *n_sink = 0 ; *n_source = 0 ; mclvMakeConstant(cid_affected, 1.0) ; mclxColumnsRealign(cl_adj, mclvSizeCmp) ; *cl_adjustedpp = NULL ; clmCastActors (&mx, &cl_adj, &el_to_cl, &el_on_cl, &cl_on_cl, &cl_on_el, 0.95) ; mclxFree(&cl_on_el) ; for (d=0;d<N_COLS(cl_on_cl);d++) { mclv* clthis = cl_adj->cols+d ; mclv* cllist = cl_on_cl->cols+d ; mclp* pself = mclvGetIvp(cllist, clthis->vid, NULL) ; double self_val = -1.0 ; if (pself) self_val = pself->val , pself->val *= 1.001 /* to push it up in case of equal weights */ ;if(0)fprintf(stderr, "test size %d\n", (int) clthis->n_ivps) ; if (prune_sz && clthis->n_ivps > prune_sz) continue ; while (1) { mclv* clthat ; dim e ; if (cllist->n_ivps < 2) break ; mclvSort(cllist, mclpValRevCmp) /* now get biggest mass provided that cluster * ranks higher (has at least as many entries) * * fixme/todo: we probably have a slight order * dependency for some fringe cases. If provable * then either solve or document it. */ ; for (e=0;e<cllist->n_ivps;e++) if (cllist->ivps[e].idx >= clthis->vid) break /* found none or itself */ ; if (e == cllist->n_ivps || cllist->ivps[e].idx == clthis->vid) break ; if /* Should Not Happen */ (!(clthat = mclxGetVector(cl_adj, cllist->ivps[e].idx, RETURN_ON_FAIL, NULL) ) ) break /* works for special case prune_sz == 0 */ /* if (clthat->n_ivps + clthis->n_ivps > prune_sz) */ /* ^iced. inconsistent behaviour as k grows. */ ; { mcxLog ( MCX_LOG_LIST , me , "source %ld|%lu|%.3f absorbed by %ld|%lu|%.3f" , clthis->vid, (ulong) clthis->n_ivps, self_val , clthat->vid, (ulong) clthat->n_ivps, cllist->ivps[0].val ) ; n_adjusted += clthis->n_ivps ; (*n_sink)++ /* note: we could from our precomputed cl_on_cl * obtain that A is absorbed in B, B is absorbed in C. * below we see that A will be merged with B, * and the result will then be merged with C. * This depends on the fact that cl_adj is ordered * on increasing cluster size. */ ; mcldMerge(cl_adj->cols+d, clthat, clthat) ; mclvResize(cl_adj->cols+d, 0) ; mclvInsertIdx(cid_affected, clthat->vid, 2.0) ; } break ; } mclvSort(cllist, mclpIdxCmp) ; } mclxFree(&cl_on_cl) ; mclxFree(&el_on_cl) ; mclxFree(&el_to_cl) ; mclxMakeCharacteristic(cl) ; mclvUnary(cid_affected, fltxGT, &bar_affected) ; *n_source = cid_affected->n_ivps ; mclvFree(&cid_affected) ; mclxColumnsRealign(cl_adj, mclvSizeRevCmp) ; if (!n_adjusted) { mclxFree(&cl_adj) ; return 0 ; } mclxUnary(cl_adj, fltxCopy, NULL) ; mclxMakeCharacteristic(cl_adj) ; *cl_adjustedpp = cl_adj ; return n_adjusted ; }
dim clmAdjust ( mclx* mx , mclx* cl , dim cls_size_max , mclx** cl_adjustedpp , mclv** ls_adjustedpp /* nodes that moved around */ , dim* sjd_left , dim* sjd_right ) { dim sum_adjusted = 0, n_ite = 0 ; dim dist_curr_adj = 0, dist_adj_curr = 0 ; mclx* cl_adj = NULL ; mclx* cl_curr = cl ; mclv* ls_adjusted = mclvInit(NULL) ; clmXScore score_curr, score_adj ; const char* me = "clmAdjust" ; *cl_adjustedpp = NULL ; *ls_adjustedpp = NULL ; while (1) { dim n_adjusted ; double cov_curr, cov_adj, frac_curr = 0.0, frac_adj = 0.0 ; mclv* cid_affected = NULL, *nid_affected = NULL ; dim o, m, e ; if (n_ite++ >= 100) break ; mclxColumnsRealign(cl_curr, mclvSizeCmp) ; if ( !(n_adjusted = clm_clm_adjust (mx, cl_curr, cls_size_max, &cl_adj, &cid_affected, &nid_affected) ) ) break ; mcxTell ( me , "assembled %lu nodes with %lu clusters affected" , (ulong) n_adjusted , (ulong) cid_affected->n_ivps ) ; clmXScanInit(&score_curr) ; clmXScanInit(&score_adj) ; clmXScanDomainSet(mx, cl_curr,cid_affected, &score_curr) ; clmXScanDomainSet(mx, cl_adj, cid_affected, &score_adj) ; clmXScoreCoverage(&score_curr, &cov_curr, NULL) ; clmXScoreCoverage(&score_adj , &cov_adj , NULL) ; if (score_curr.n_hits && score_adj.n_hits) frac_curr = score_curr.sum_i / score_curr.n_hits , frac_adj = score_adj.sum_i / score_adj.n_hits ; mcxLog ( MCX_LOG_LIST , me , "consider (%.5f|%.5f|%lu) vs (%.5f|%.5f|%lu)" , cov_adj, frac_adj, (ulong) score_adj.n_hits , cov_curr, frac_curr, (ulong) score_curr.n_hits ) /* experience tells us that mcl's funneling * worsens frac */ ; if (frac_adj <= frac_curr) { mclvFree(&cid_affected) ; mclvFree(&nid_affected) ; break ; } clmEnstrict(cl_adj, &o, &m, &e, 0) ; clmSJDistance(cl_curr, cl_adj, NULL, NULL, &dist_curr_adj, &dist_adj_curr) ; mcxLog ( MCX_LOG_AGGR , me , "distance %lu|%lu" , (ulong) dist_curr_adj, (ulong) dist_adj_curr ) ; mclvAdd(ls_adjusted, nid_affected, ls_adjusted) ; if (cl_curr != cl) mclxFree(&cl_curr) ; cl_curr = cl_adj ; sum_adjusted += n_adjusted ; mclvFree(&cid_affected) ; mclvFree(&nid_affected) ; } if (cl_curr != cl) /* fixme free logic */ { mclxColumnsRealign(cl_curr, mclvSizeRevCmp) ; *cl_adjustedpp = cl_curr ; *ls_adjustedpp = ls_adjusted ; clmSJDistance (cl, cl_curr, NULL, NULL, &dist_curr_adj, &dist_adj_curr) ; if (sjd_left) *sjd_left = dist_curr_adj , *sjd_right = dist_adj_curr ; } else { if (sjd_left) *sjd_left = 0 , *sjd_right = 0 ; mclvFree(&ls_adjusted) ; } mcxLog ( MCX_LOG_AGGR , me , "total adjusted %lu, final distance %lu|%lu" , (ulong) sum_adjusted , (ulong) dist_curr_adj , (ulong) dist_adj_curr ) ; mclxColumnsRealign(cl, mclvSizeRevCmp) ; return sum_adjusted ; }
static dim clm_clm_adjust ( mclx* mx , mclx* cl , dim cls_size_max , mclx** cl_adjustedpp , mclv** cid_affectedpp , mclv** nid_affectedpp ) { dim i, j, n_adjusted = 0 ; mclx* cl_adj = mclxCopy(cl) ; mclv* cid_affected = mclvClone(cl->dom_cols) ; mclv* nid_affected = mclvClone(mx->dom_cols) ; double bar_affected = 1.5 ; const char* e1 = getenv("MCL_ADJ_FMAX") ; const char* e2 = getenv("MCL_ADJ_EMASS") ; double f1 = e1 ? atof(e1) : 2 ; double f2 = e2 ? atof(e2) : 3 ; mcxbool loggit = mcxLogGet( MCX_LOG_CELL | MCX_LOG_INFO ) ; clmVScore sc ; mclx *el_to_cl = NULL ; mclx *el_on_cl = NULL ; mclx *cl_on_cl = NULL ; mclx *cl_on_el = NULL ; *cl_adjustedpp = NULL ; *cid_affectedpp = NULL ; *nid_affectedpp = NULL ; clmCastActors (&mx, &cl, &el_to_cl, &el_on_cl, &cl_on_cl, &cl_on_el, 0.95) ; mclxFree(&cl_on_cl) ; mclxFree(&cl_on_el) ; mclvMakeConstant(cid_affected, 1.0) ; mclvMakeConstant(nid_affected, 1.0) ; for (i=0;i<N_COLS(cl_adj);i++) cl_adj->cols[i].val = 0.5 /* Proceed with smallest clusters first. * Caller has to take care of mclxColumnsRealign */ ; for (i=0;i<N_COLS(cl);i++) { mclv* clself = cl->cols+i /* Only consider nodes in clusters of * size <= cls_size_max */ ; if (cls_size_max && clself->n_ivps > cls_size_max) break /* Clusters that have been marked for inclusion * cannot play. */ ; if (cl_adj->cols[i].val > 1) continue ; for (j=0;j<clself->n_ivps;j++) { long nid = clself->ivps[j].idx ; long nos = mclvGetIvpOffset(mx->dom_cols, nid, -1) ; mclv* clidvec = mclxGetVector(el_on_cl, nid, RETURN_ON_FAIL, NULL) ; double eff_alien_bsf = 0.0, eff_alien_max_bsf = 0.0 /* best so far*/ ; double eff_self = 0.0, eff_self_max = 0.0 ; long cid_alien = -1, cid_self = -1 ; clmVScore sc_self = { 0 }, sc_alien = { 0 } ; dim f ; if (nos < 0 || !clidvec) { mcxErr ("clmDumpNodeScores panic", "node <%ld> does not belong", nid) ; continue ; } clmVScanDomain(mx->cols+nos, clself, &sc) ; clmVScoreCoverage(&sc, &eff_self, &eff_self_max) ; cid_self = clself->vid ; sc_self = sc ; if (loggit) mcxLog2 ( us , "node %ld in cluster %ld eff %.3f,%.3f sum %.3f" , nid , cid_self , eff_self , eff_self_max , sc.sum_i ) ; for (f=0;f<clidvec->n_ivps;f++) { long cid = clidvec->ivps[f].idx ; mclv* clvec = mclxGetVector(cl, cid, RETURN_ON_FAIL, NULL) /* ^ overdoing: cid == clvec->vid */ ; double eff, eff_max ; if (!clvec) { mcxErr ( "clmAdjust panic" , "cluster <%ld> node <%ld> mishap" , cid , nid ) ; continue ; } /* fixme: document or remove first condition * */ if ((0 && clvec->n_ivps <= clself->n_ivps) || clvec->vid == cid_self) continue ; clmVScanDomain(mx->cols+nos, clvec, &sc) ; clmVScoreCoverage(&sc, &eff, &eff_max) #if 0 # define PIVOT eff > eff_alien_bsf #else # define PIVOT eff_max > eff_alien_max_bsf #endif ; if ( PIVOT || sc.sum_i >= 0.5 ) eff_alien_bsf = eff , eff_alien_max_bsf = eff_max , cid_alien = clvec->vid , sc_alien = sc ; if (sc.sum_i >= 0.5) break ; } if (loggit) mcxLog2 ( us , " -> best alien %ld eff %.3f,%.3f sum %.3f" , cid_alien , eff_alien_bsf , eff_alien_max_bsf , sc_alien.sum_i ) /* below: use sum_i as mass fraction * (clmAdjust framework uses stochastic * matrix) */ ; if ( cid_alien >= 0 && cid_self >= 0 && f1 * sc_alien.max_i >= sc_self.max_i && ( ( eff_alien_bsf > eff_self && sc_alien.sum_i > sc_self.sum_i ) || ( pow(sc_alien.sum_i, f2) >= sc_self.sum_i && pow(eff_self, f2) <= eff_alien_bsf ) ) /* So, if max is reasonable * and efficiency is better and mass is better * or if mass is ridiculously better -> move * Somewhat intricate and contrived, yes. */ ) { mclv* acceptor = mclxGetVector(cl_adj, cid_alien, RETURN_ON_FAIL, NULL) ; mclv* donor = mclxGetVector(cl_adj, cid_self, RETURN_ON_FAIL, NULL) ; if (!donor || !acceptor || acceptor == donor) continue ; mclvInsertIdx(donor, nid, 0.0) ; mclvInsertIdx(acceptor, nid, 1.0) ; acceptor->val = 1.5 ; if (mcxLogGet(MCX_LOG_LIST)) { mclv* nb = mx->cols+nos ; double mxv = mclvMaxValue(nb) ; double avg = nb->n_ivps ? mclvSum(nb) / nb->n_ivps : -1.0 ; mcxLog ( MCX_LOG_LIST , us , "mov %ld (%ld %.2f %.2f)" " %ld (cv=%.2f cm=%.2f s=%.2f m=%.2f #=%lu)" " to %ld (cv=%.2f cm=%.2f s=%.2f m=%.2f #=%lu)" , nid , (long) nb->n_ivps, mxv, avg , cid_self , eff_self, eff_self_max, sc_self.sum_i, sc_self.max_i , (ulong) (sc_self.n_meet + sc_self.n_ddif) , cid_alien , eff_alien_bsf, eff_alien_max_bsf, sc_alien.sum_i, sc_alien.max_i , (ulong) (sc_alien.n_meet + sc_alien.n_ddif) ) ; } n_adjusted++ ; mclvInsertIdx(cid_affected, cid_alien, 2.0) ; mclvInsertIdx(cid_affected, cid_self, 2.0) ; mclvInsertIdx(nid_affected, nid, 2.0) ; } } } mclxFree(&el_on_cl) ; mclxFree(&el_to_cl) ; for (i=0;i<N_COLS(cl_adj);i++) cl_adj->cols[i].val = 0.0 ; mclxMakeCharacteristic(cl) ; if (!n_adjusted) { mclxFree(&cl_adj) ; mclvFree(&cid_affected) ; mclvFree(&nid_affected) ; return 0 ; } mclxUnary(cl_adj, fltxCopy, NULL) ; mclxMakeCharacteristic(cl_adj) /* FIRST REMOVE ENTRIES set to zero (sssst now .. */ /* ...) and THEN make it characteristic again */ ; mclvUnary(cid_affected, fltxGT, &bar_affected) ; mclvUnary(nid_affected, fltxGT, &bar_affected) ; *cl_adjustedpp = cl_adj ; *cid_affectedpp = cid_affected ; *nid_affectedpp = nid_affected ; return n_adjusted ; }
/* this aids in finding heuristically likely starting points * for long shortest paths, by looking at dead ends * in the lattice. * experimental, oefully underdocumented. */ static dim diameter_rough ( mclv* vec , mclx* mx , u8* rough_scratch , long* rough_priority ) { mclv* curr = mclvInsertIdx(NULL, vec->vid, 1.0) ; mclpAR* par = mclpARensure(NULL, 1024) ; dim d = 0, n_dead_ends = 0, n_dead_ends_res = 0 ; memset(rough_scratch, 0, N_COLS(mx)) ; rough_scratch[vec->vid] = 1 /* seen */ ; rough_priority[vec->vid] = -1 /* remove from priority list */ ; while (1) { mclp* currivp = curr->ivps ; dim t ; mclpARreset(par) ; while (currivp < curr->ivps + curr->n_ivps) { mclv* ls = mx->cols+currivp->idx ; mclp* newivp = ls->ivps ; int hit = 0 ; while (newivp < ls->ivps + ls->n_ivps) { u8* tst = rough_scratch+newivp->idx ; if (!*tst || *tst & 2) { if (!*tst) mclpARextend(par, newivp->idx, 1.0) ; *tst = 2 ; hit = 1 ; } newivp++ ; } if (!hit && rough_priority[currivp->idx] >= 0) rough_priority[currivp->idx] += d+1 , n_dead_ends_res++ ; else if (!hit) n_dead_ends++ /* ,fprintf(stderr, "[%ld->%ld]", (long) currivp->idx, (long) rough_priority[currivp->idx]) */ ; #if 0 if (currivp->idx == 115 || currivp->idx == 128) fprintf(stdout, "pivot %d node %d d %d dead %d pri %d\n", (int) vec->vid, (int) currivp->idx, d, (int) (1-hit), (int) rough_priority[currivp->idx]) #endif ; currivp++ ; } if (!par->n_ivps) break ; d++ ; mclvFromIvps(curr, par->ivps, par->n_ivps) ; for (t=0;t<curr->n_ivps;t++) rough_scratch[curr->ivps[t].idx] = 1 ; } mclvFree(&curr) ; mclpARfree(&par) ;if(0)fprintf(stdout, "deadends %d / %d\n", (int) n_dead_ends, (int) n_dead_ends_res) ; return d ; }
int main ( int argc , const char* argv[] ) { mcxIO *xfcl = NULL , *xfctrl = NULL , *xfcoarse= NULL , *xfbase = NULL , *xfcone = NULL , *xfstack = NULL ; mclx* mxbase, *cl, *cl_coarse, *clprev, *clctrl = NULL ; mcxTing* shared = mcxTingNew("-I 4 -overlap split") ; mcxbool root = TRUE ; mcxbool have_bootstrap = FALSE ; const char* plexprefix = NULL ; const char* stem = "mcl" ; mcxbool same = FALSE ; mcxbool plex = TRUE ; mcxbool add_transpose = FALSE ; const char* b2opts = NULL ; const char* b1opts = NULL ; mcxbits write_modes = 0 ; mclAlgParam* mlp = NULL ; mcxstatus status = STATUS_OK ; mcxstatus parse_status = STATUS_OK ; int multiplex_idx = 1 ; int N = 0 ; int n_ite = 0 ; dim n_components = 0, n_cls = 0 ; int a = 1, i= 0 ; int n_arg_read = 0 ; int delta = 0 ; mcxOption* opts, *opt ; mcxTing* cline = mcxOptArgLine(argv+1, argc-1, '\'') ; mclgTF* transform = NULL ; mcxTing* transform_spec = NULL ; double iaf = 0.84 ; mclx_app_init(stderr) ; if (0) mcxLogLevel = MCX_LOG_AGGR | MCX_LOG_MODULE | MCX_LOG_IO | MCX_LOG_GAUGE | MCX_LOG_WARN ; else mcxLogLevelSetByString("xf4g1") ; mcxOptAnchorSortById(options, sizeof(options)/sizeof(mcxOptAnchor) -1) ; if (argc == 2 && argv[1][0] == '-' && mcxOptIsInfo(argv[1], options)) delta = 1 ; else if (argc < 2) { help(options, shared) ; exit(0) ; } opts = mcxOptExhaust (options, (char**) argv, argc, 2-delta, &n_arg_read, &parse_status) ; if (parse_status != STATUS_OK) { mcxErr(me, "initialization failed") ; exit(1) ; } ; for (opt=opts;opt->anch;opt++) { mcxOptAnchor* anch = opt->anch ; switch(anch->id) { case MY_OPT_HELP : help(options, shared) ; exit(0) ; case MY_OPT_APROPOS : help(options, shared) ; exit(0) ; break ; case MY_OPT_NMAX : N = atoi(opt->val) ; break ; case MY_OPT_Z : help(NULL, shared) ; exit(0) ; break ; case MY_OPT_SHARED : mcxTingPrintAfter(shared, " %s", opt->val) ; break ; case MY_OPT_TRANSFORM : transform_spec = mcxTingNew(opt->val) ; break ; case MY_OPT_B1 : b1opts = opt->val ; break ; case MY_OPT_B2 : b2opts = opt->val ; break ; case ALG_OPT_SETENV : mcxSetenv(opt->val) ; break ; case ALG_OPT_QUIET : mcxLogLevelSetByString(opt->val) ; break ; case MY_OPT_HDP : hdp_g = atof(opt->val) ; break ; case MY_OPT_ADDTP : add_transpose = TRUE ; break ; case MY_OPT_ANNOT /* only used in command-line copying */ : break ; case MY_OPT_IAF : iaf = atof(opt->val) / 100 ; break ; case MY_OPT_WRITE : if (strstr(opt->val, "stack")) write_modes |= OUTPUT_STACK ; if (strstr(opt->val, "cone")) write_modes |= OUTPUT_CONE ; if (strstr(opt->val, "levels")) write_modes |= OUTPUT_STEPS ; if (strstr(opt->val, "coarse")) write_modes |= OUTPUT_COARSE ; if (strstr(opt->val, "base")) write_modes |= OUTPUT_BASE ; break ; case MY_OPT_BASENAME : xfbase = mcxIOnew(opt->val, "w") ; break ; case MY_OPT_COARSE : xfcoarse = mcxIOnew(opt->val, "w") ; break ; case MY_OPT_CONE : xfcone = mcxIOnew(opt->val, "w") ; break ; case MY_OPT_ROOT : root = strchr("1yY", (u8) opt->val[0]) ? TRUE : FALSE ; break ; case MY_OPT_STACK : xfstack = mcxIOnew(opt->val, "w") ; break ; case MY_OPT_STEM : stem = opt->val ; break ; case MY_OPT_MULTIPLEX : plex = strchr("yY1", (unsigned char) opt->val[0]) ? TRUE : FALSE ; break ; case MY_OPT_DISPATCH : dispatch_g = TRUE ; break ; case MY_OPT_INTEGRATE : integrate_g = TRUE ; break ; case MY_OPT_CONTRACT : break ; case MY_OPT_SUBCLUSTERX : subclusterx_g = TRUE, subcluster_g = TRUE ; break ; case MY_OPT_SUBCLUSTER : subcluster_g = TRUE ; break ; case MY_OPT_CONTROL : xfctrl = mcxIOnew(opt->val, "r") ; break ; case MY_OPT_CL : xfcl = mcxIOnew(opt->val, "r") ; have_bootstrap = TRUE ; break ; case MY_OPT_VERSION : app_report_version(me) ; exit(0) ; default : mcxExit(1) ; } } mcxOptFree(&opts) ; a = 2 + n_arg_read ; if (a < argc) { if (strcmp(argv[a], "--")) mcxDie ( 1 , me , "trailing %s options require standalone '--' separator (found %s)" , integrate_g ? "integrate" : "mcl" , argv[a] ) ; a++ ; } if (subcluster_g + dispatch_g + integrate_g > 1) mcxDie(1, me, "too many modes!") ; if (N && N < argc-a) mcxErr(me, "-n argument leaves spurious option specifications") ; srandom(mcxSeed(89315)) ; signal(SIGALRM, mclSigCatch) ; if (dispatch_g) plexprefix = "dis" ; else if (!write_modes || (write_modes & OUTPUT_STEPS)) plexprefix = stem ; { mcxTing* tg = mcxTingEmpty(NULL, 30) ; if ((write_modes & OUTPUT_COARSE) && !xfcoarse) mcxTingPrint(tg, "%s.%s", stem, "coarse") , xfcoarse = mcxIOnew(tg->str, "w") ; if ((write_modes & OUTPUT_BASE) && !xfbase) mcxTingPrint(tg, "%s.%s", stem, "base") , xfbase = mcxIOnew(tg->str, "w") ; if ( (!write_modes || (write_modes & OUTPUT_CONE)) && !xfcone ) { mcxTingPrint(tg, "%s.%s", stem, "cone") ; xfcone = mcxIOnew(tg->str, "w") ; mcxIOopen(xfcone, EXIT_ON_FAIL) ; fprintf(xfcone->fp, "# %s %s\n", argv[0], cline->str) ; } if ((write_modes & OUTPUT_STACK) && !xfstack) { mcxTingPrint(tg, "%s.%s", stem, "stack") ; xfstack = mcxIOnew(tg->str, "w") ; mcxIOopen(xfstack, EXIT_ON_FAIL) ; fprintf(xfstack->fp, "# %s %s\n", argv[0], cline->str) ; } mcxTingFree(&tg) ; } if (integrate_g) { for (i=a;i<argc;i++) { mcxIO* xf = mcxIOnew(argv[i], "r") ; mclx* cl = mclxRead(xf, EXIT_ON_FAIL) ; mclxCatPush(&stck_g, cl, NULL, NULL, mclxCBdomStack, NULL, "dummy-integrate", n_cls++) ; } integrate_results(&stck_g) ; if (xfstack) mclxCatWrite(xfstack, &stck_g, MCLXIO_VALUE_NONE, RETURN_ON_FAIL) ; if (xfcone) mclxCatConify(&stck_g) , mclxCatWrite(xfcone, &stck_g, MCLXIO_VALUE_NONE, RETURN_ON_FAIL) ; return 0 ; } for (i=a;i<argc;i++) { if (get_interface(NULL, argv[1], shared->str, argv[i], NULL, 0, RETURN_ON_FAIL)) mcxDie(1, me, "error while testing mcl options viability (%s)", argv[i]) ; } mcxLog(MCX_LOG_APP, me, "pid %ld", (long) getpid()) /* make sure clusters align with this cluster * status: does not seem promising. */ ; if (xfctrl) clctrl = mclxRead(xfctrl, EXIT_ON_FAIL) ; /* * Below: compute cl and mxbase. */ ; if (xfcl) { cl = mclxRead(xfcl, EXIT_ON_FAIL) ; write_clustering (cl, NULL, xfcone, xfstack, plexprefix, multiplex_idx++, NULL) ; if (subcluster_g || dispatch_g) mclxCatPush(&stck_g, cl, NULL, NULL, mclxCBdomStack, NULL, "dummy-mclcm", n_cls++) ; mcxIOfree(&xfcl) ; if (!b1opts && !b2opts) b1opts = "" ; mxbase = get_base(argv[1], NULL, b1opts, b2opts) ; } else { mcxbits CACHE = b1opts || b2opts ? ALG_CACHE_INPUT /* cache, transform later */ : ALG_CACHE_START ; get_interface ( &mlp , argv[1] , shared->str , a < argc ? argv[a] : NULL , NULL , CACHE , EXIT_ON_FAIL ) ; if (a < argc) a++ ; if ((status = mclAlgorithm(mlp)) == STATUS_FAIL) { mcxErr(me, "failed at initial run") ; exit(1) ; } cl_coarse = mclAlgParamRelease(mlp, mlp->cl_result) ; cl_coarse = control_test(cl_coarse, clctrl) ; write_clustering (cl_coarse, NULL, xfcone, xfstack, plexprefix, multiplex_idx++, mlp) ; if (subcluster_g || dispatch_g) mclxCatPush(&stck_g, cl_coarse, NULL, NULL, mclxCBdomStack, NULL, "dummy-mclcm", n_cls++) ; cl = cl_coarse ; n_ite++ ; if (b1opts || b2opts) { mclx* mx_input = mclAlgParamRelease(mlp, mlp->mx_input) ; mxbase = get_base(NULL, mx_input, b1opts, b2opts) /* ^ get_base frees mx_input */ ; } else mxbase = mclAlgParamRelease(mlp, mlp->mx_start) ; } clprev = cl ; mclAlgParamFree(&mlp, TRUE) ; if (xfbase) { dim nre = mclxNrofEntries(mxbase) ; mcxLog(MCX_LOG_APP, me, "base has %lu entries", (ulong) nre) ; mclxaWrite(mxbase, xfbase, MCLXIO_VALUE_GETENV, EXIT_ON_FAIL) ; mcxIOclose(xfbase) ; } if (subcluster_g || dispatch_g) iaf = iaf ? 1/iaf : 1.414 ; while ( (!dispatch_g && (!N || n_ite < N)) || (dispatch_g && a < argc) ) { mclx* mx_coarse = NULL, *clnext = NULL ; dim dist_new_prev = 0, dist_prev_new = 0 ; mclx* clnew = NULL ; mcxbool faith = FALSE ; double inflation = -1.0 ; if (subcluster_g) mx_coarse = subclusterx_g ? mclxBlockPartition(mxbase, clprev, 50) : mclxBlockUnion(mxbase, clprev) /* have to copy mxbase as mx_coarse is freed. * Even if it were not freed, it is probably transformed. */ ; else if (dispatch_g) mx_coarse = mclxCopy(mxbase) ; else { mx_coarse = get_coarse(mxbase, clprev, add_transpose) ; if (n_ite == 1) { mclx* cc = clmUGraphComponents(mx_coarse, NULL) /* fixme; mx_coarse garantueed UD ? */ ; n_components = N_COLS(cc) ; mclxFree(&cc) ; } } if (xfcoarse) write_coarse(xfcoarse, mx_coarse) ; get_interface ( &mlp , NULL , shared->str , a < argc ? argv[a] : NULL , mx_coarse , ALG_CACHE_START , EXIT_ON_FAIL ) ; inflation = mlp->mpp->mainInflation ; BIT_OFF(mlp->modes, ALG_DO_SHOW_PID | ALG_DO_SHOW_JURY) ; if ((status = mclAlgorithm(mlp)) == STATUS_FAIL) { mcxErr(me, "failed") ; mcxExit(1) ; } cl_coarse = mclAlgParamRelease(mlp, mlp->cl_result) ; if (xfcoarse) mclxaWrite(cl_coarse, xfcoarse, MCLXIO_VALUE_NONE, RETURN_ON_FAIL) ; if (dispatch_g || subcluster_g) clnext = cl_coarse ; else clnext = mclxCompose(clprev, cl_coarse, 0) , clnext = control_test(clnext, clctrl) , mclxFree(&cl_coarse) ; clmSJDistance (clprev, clnext, NULL, NULL, &dist_prev_new, &dist_new_prev) ; if (dist_prev_new + dist_new_prev) { write_clustering (clnext, clprev, xfcone, xfstack, plexprefix, multiplex_idx++, mlp) ; clnew = clnext ; if (subcluster_g || dispatch_g) mclxCatPush(&stck_g, clnext, NULL, NULL, mclxCBdomStack, NULL, "dummy-mclcm", n_cls++) ; else mclxFree(&clprev) ; clprev = clnew ; } else if ( N_COLS(clnext) > n_components && inflation * iaf > 1.2 && inflation * iaf < 10 ) { mclxFree(&clnext) ; inflation *= iaf ; mcxTingPrintAfter(shared, " -I %.2f", inflation) ; mcxLog(MCX_LOG_APP, me, "setting inflation to %.2f", inflation) ; faith = TRUE ; } /* i.e. vanilla mode, contraction */ else if (!subcluster_g && !dispatch_g) { mclx* cc ; mclxFree(&clnext) ; mclxAddTranspose(mx_coarse, 1.0) ; cc = clmUGraphComponents(mx_coarse, NULL) ; if (N_COLS(cc) < N_COLS(clprev)) { mclx* ccback = mclxCompose(clprev, cc, 0) ; write_clustering (ccback, clprev, xfcone, xfstack, plexprefix, multiplex_idx++, NULL) ; mclxFree(&clprev) ; clprev = ccback ; mcxTell(me, "connected components added as root clustering") ; } if (root && N_COLS(cc) > 1) { mclx* root = mclxCartesian ( mclvCanonical(NULL, 1, 0) , mclvCopy(NULL, mxbase->dom_cols) , 1.0 ) ; write_clustering (root, clprev, xfcone, xfstack, plexprefix, multiplex_idx++, NULL) ; mclxFree(&clprev) ; mcxTell(me, "universe added as root clustering") ; clprev = root ; clnew = NULL ; } mclxFree(&cc) ; } else if (subcluster_g || dispatch_g) mclxFree(&clnext) ; mclAlgParamFree(&mlp, TRUE) /* frees mx_coarse */ ; if (!clnew && !faith) { same = TRUE ; break ; } a++ ; if (dispatch_g && a == argc) break ; n_ite++ ; } if (same) mcxLog(MCX_LOG_MODULE, me, "no further contraction: halting") ; if (dispatch_g) integrate_results(&stck_g) ; else if (subcluster_g) mclxCatReverse(&stck_g) ; if (dispatch_g || subcluster_g) { dim j ; if (xfstack) mclxCatWrite(xfstack, &stck_g, MCLXIO_VALUE_NONE, RETURN_ON_FAIL) ; if (xfcone && ! mclxCatConify(&stck_g)) mclxCatWrite(xfcone, &stck_g, MCLXIO_VALUE_NONE, RETURN_ON_FAIL) ; for (j=0;j<stck_g.n_level;j++) { mclxAnnot* an = stck_g.level+j ; mclxFree(&an->mx) ; } mcxFree(stck_g.level) ; } mcxIOfree(&xfcoarse) ; mcxIOfree(&xfbase) ; mcxIOfree(&xfcone) ; mcxIOfree(&xfstack) ; mcxTingFree(&shared) ; if (!dispatch_g && !subcluster_g) /* fixme fixme fixme */ mclxFree(&clprev) ; mclxFree(&mxbase) ; mclvFree(&start_col_sums_g) ; mcxTingFree(&cline) ; helpful_reminder() ; return STATUS_OK ; }