void handle_tf ( mclx* mx , mcxTing* sa ) { mclgTF* transform = mclgTFparse(NULL, sa) ; if (!transform) mcxErr(me, "tf spec does not parse") ; else mclgTFexec(mx, transform) ; mclgTFfree(&transform) ; }
static void vary_threshold ( mcxIO* xf , FILE* fp , int vary_a , int vary_z , int vary_s , int vary_n , unsigned mode ) { dim cor_i = 0, j ; int step ; mclx* mx ; unsigned long noe ; pval* allvals ; dim n_allvals = 0 ; double sum_vals = 0.0 ; mx = mclxRead(xf, EXIT_ON_FAIL) ; mcxIOclose(xf) ; if (transform) mclgTFexec(mx, transform) ; noe = mclxNrofEntries(mx) ; allvals = mcxAlloc(noe * sizeof allvals[0], EXIT_ON_FAIL) ; if (!weight_scale) { if (mode == 'c') weight_scale = 1.0 ; else weight_scale = vary_n ; } n_allvals = get_n_sort_allvals(mx, allvals, noe, &sum_vals, FALSE) ; if (mode == 'c') { double smallest = n_allvals ? allvals[n_allvals-1] : -DBL_MAX ; if (vary_a * 1.0 / vary_n < smallest) { while (vary_a * 1.0 / vary_n < smallest) vary_a++ ; vary_a-- ; } mcxTell ( me , "smallest correlation is %.2f, using starting point %.2f" , smallest , vary_a * 1.0 / vary_n ) ; } if (output_flags & OUTPUT_TABLE) { ;fprintf(fp, "L\tD\tR\tS\tcce\tEWmean\tEWmed\tEWiqr\tNDmean\tNDmed\tNDiqr\tCCF\t%s\n", mode == 'k' ? "kNN" : mode == 'l' ? "N" : "Cutoff") ;} else { if (output_flags & OUTPUT_KEY) { ;fprintf(fp, "-------------------------------------------------------------------------------\n") ;fprintf(fp, " L Percentage of nodes in the largest component\n") ;fprintf(fp, " D Percentage of nodes in components of size at most %d [-div option]\n", (int) divide_g) ;fprintf(fp, " R Percentage of nodes not in L or D: 100 - L -D\n") ;fprintf(fp, " S Percentage of nodes that are singletons\n") ;fprintf(fp, " cce Expected size of component, nodewise [ sum(sz^2) / sum^2(sz) ]\n") ;fprintf(fp, "*EW Edge weight traits (mean, median and IQR, all scaled!)\n") ;fprintf(fp, " Scaling is used to avoid printing of fractional parts throughout.\n") ;fprintf(fp, " The scaling factor is %.2f [-report-scale option]\n", weight_scale) ;fprintf(fp, " ND Node degree traits [mean, median and IQR]\n") ;fprintf(fp, " CCF Clustering coefficient %s\n", compute_flags & COMPUTE_CLCF ? "(not computed; use --clcf to include this)" : "") ;fprintf(fp, " eff Induced component efficiency %s\n", compute_flags & COMPUTE_EFF ? "(not computed; use --eff to include this)" : "") ;if (mode == 'c') fprintf(fp, "Cutoff The threshold used.\n") ;else if (mode == 't') fprintf(fp, "*Cutoff The threshold with scale factor %.2f and fractional parts removed\n", weight_scale) ;else if (mode == 'k') fprintf(fp, "k-NN The knn parameter\n") ;else if (mode == 'l') fprintf(fp, "N The knn parameter (merge mode)\n") ;else if (mode == 'n') fprintf(fp, "ceil The ceil parameter\n") ;fprintf(fp, "Total number of nodes: %lu\n", (ulong) N_COLS(mx)) ;} fprintf(fp, "-------------------------------------------------------------------------------\n") ;fprintf(fp, " L D R S cce *EWmean *EWmed *EWiqr NDmean NDmed NDiqr CCF eff %6s \n", mode == 'k' ? "k-NN" : mode == 'l' ? "N" : mode == 'n' ? "Ceil" : "Cutoff") ;fprintf(fp, "-------------------------------------------------------------------------------\n") ; } for (step = vary_a; step <= vary_z; step += vary_s) { double cutoff = step * 1.0 / vary_n ; double eff = -1.0 ; mclv* nnodes = mclvCanonical(NULL, N_COLS(mx), 0.0) ; mclv* degree = mclvCanonical(NULL, N_COLS(mx), 0.0) ; dim i, n_sample = 0 ; double cor, y_prev, iqr = 0.0 ; mclx* cc = NULL, *res = NULL ; mclv* sz, *ccsz = NULL ; int step2 = vary_z + vary_a - step ; sum_vals = 0.0 ; if (mode == 't' || mode == 'c') mclxSelectValues(mx, &cutoff, NULL, MCLX_EQT_GQ) , res = mx ; else if (mode == 'k') { res = rebase_g ? mclxCopy(mx) : mx ; mclxKNNdispatch(res, step2, n_thread_l, 1) ; } else if (mode == 'l') { res = mx ; mclxKNNdispatch(res, step2, n_thread_l, 0) ; } else if (mode == 'n') { res = rebase_g ? mclxCopy(mx) : mx ; mclv* cv = mclgCeilNB(res, step2, NULL, NULL, NULL) ; mclvFree(&cv) ; } sz = mclxColSizes(res, MCL_VECTOR_COMPLETE) ; mclvSortDescVal(sz) ; cc = clmUGraphComponents(res, NULL) /* fixme: user has to specify -tf '#max()' if graph is directed */ ; if (cc) { ccsz = mclxColSizes(cc, MCL_VECTOR_COMPLETE) ; if (compute_flags & COMPUTE_EFF) { clmPerformanceTable pftable ; clmPerformance(mx, cc, &pftable) ; eff = pftable.efficiency ; } } if (mode == 't' || mode == 'c') { for ( ; n_allvals > 0 && allvals[n_allvals-1] < cutoff ; n_allvals-- ) ; sum_vals = 0.0 ; for (i=0;i<n_allvals;i++) sum_vals += allvals[i] ; } else if (mode == 'k' || mode == 'n' || mode == 'l') { n_allvals = get_n_sort_allvals(res, allvals, noe, &sum_vals, FALSE) ; } levels[cor_i].sim_median= mcxMedian(allvals, n_allvals, sizeof allvals[0], pval_get_double, &iqr) ; levels[cor_i].sim_iqr = iqr ; levels[cor_i].sim_mean = n_allvals ? sum_vals / n_allvals : 0.0 ; levels[cor_i].nb_median = mcxMedian(sz->ivps, sz->n_ivps, sizeof sz->ivps[0], ivp_get_double, &iqr) ; levels[cor_i].nb_iqr = iqr ; levels[cor_i].nb_mean = mclvSum(sz) / N_COLS(res) ; levels[cor_i].cc_exp = cc ? mclvPowSum(ccsz, 2.0) / N_COLS(res) : 0 ; levels[cor_i].nb_sum = mclxNrofEntries(res) ; if (compute_flags & COMPUTE_CLCF) { mclv* clcf = mclgCLCFdispatch(res, n_thread_l) ; levels[cor_i].clcf = mclvSum(clcf) / N_COLS(mx) ; mclvFree(&clcf) ; } else levels[cor_i].clcf = 0.0 ; levels[cor_i].threshold = mode == 'k' || mode == 'l' || mode == 'n' ? step2 : cutoff ; levels[cor_i].bigsize = cc ? cc->cols[0].n_ivps : 0 ; levels[cor_i].n_single = 0 ; levels[cor_i].n_edge = n_allvals ; levels[cor_i].n_lq = 0 ; if (cc) for (i=0;i<N_COLS(cc);i++) { dim n = cc->cols[N_COLS(cc)-1-i].n_ivps ; if (n == 1) levels[cor_i].n_single++ ; if (n <= divide_g) levels[cor_i].n_lq += n ; else break ; } if (levels[cor_i].bigsize <= divide_g) levels[cor_i].bigsize = 0 ; y_prev = sz->ivps[0].val /* wiki says: A scale-free network is a network whose degree distribution follows a power law, at least asymptotically. That is, the fraction P(k) of nodes in the network having k connections to other nodes goes for large values of k as P(k) ~ k^−g where g is a constant whose value is typically in the range 2<g<3, although occasionally it may lie outside these bounds. */ ; for (i=1;i<sz->n_ivps;i++) { double y = sz->ivps[i].val ; if (y > y_prev - 0.5) continue /* same as node degree seen last */ ; nnodes->ivps[n_sample].val = log( (i*1.0) / (1.0*N_COLS(res))) /* x = #nodes >= k, as fraction */ ; degree->ivps[n_sample].val = log(y_prev ? y_prev : 1) /* y = k = degree of node */ ; n_sample++ ;if(0)fprintf(stderr, "k=%.0f\tn=%d\t%.3f\t%.3f\n", (double) y_prev, (int) i, (double) nnodes->ivps[n_sample-1].val, (double) degree->ivps[n_sample-1].val) ; y_prev = y ; } nnodes->ivps[n_sample].val = 0 ; nnodes->ivps[n_sample++].val = log(y_prev ? y_prev : 1) ;if(0){fprintf(stderr, "k=%.0f\tn=%d\t%.3f\t%.3f\n", (double) sz->ivps[sz->n_ivps-1].val, (int) N_COLS(res), (double) nnodes->ivps[n_sample-1].val, (double) degree->ivps[n_sample-1].val) ;} ; mclvResize(nnodes, n_sample) ; mclvResize(degree, n_sample) ; cor = pearson(nnodes, degree, n_sample) ; levels[cor_i].degree_cor = cor * cor ;if(0)fprintf(stdout, "cor at cutoff %.2f %.3f\n\n", cutoff, levels[cor_i-1].degree_cor) ; mclvFree(&nnodes) ; mclvFree(°ree) ; mclvFree(&sz) ; mclvFree(&ccsz) ; mclxFree(&cc) ; if(output_flags & OUTPUT_TABLE) { fprintf ( fp , "%lu\t%lu\t%lu\t%lu\t%lu" "\t%6g\t%6g\t%6g" "\t%6g\t%lu\t%6g" , (ulong) levels[cor_i].bigsize , (ulong) levels[cor_i].n_lq , (ulong) N_COLS(mx) - levels[cor_i].bigsize - levels[cor_i].n_lq , (ulong) levels[cor_i].n_single , (ulong) levels[cor_i].cc_exp , (double) levels[cor_i].sim_mean , (double) levels[cor_i].sim_median , (double) levels[cor_i].sim_iqr , (double) levels[cor_i].nb_mean , (ulong) levels[cor_i].nb_median , (double) levels[cor_i].nb_iqr ) ; if (compute_flags & COMPUTE_CLCF) fprintf(fp, "\t%6g", levels[cor_i].clcf) ; else fputs("\tNA", fp) ; if (eff >= 0.0) fprintf(fp, "\t%4g", eff) ; else fputs("\tNA", fp) ; fprintf(fp, "\t%6g", (double) levels[cor_i].threshold) ; fputc('\n', fp) ; } else { fprintf ( fp , "%3d %3d %3d %3d %7d " "%7.0f %7.0f %6.0f" "%6.1f %6.0f %6.0f" , 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].bigsize) / N_COLS(mx)) , 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].n_lq) / N_COLS(mx)) , 0 ? 1 : (int) (0.5 + (100.0 * (N_COLS(mx) - levels[cor_i].bigsize - levels[cor_i].n_lq)) / N_COLS(mx)) , 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].n_single) / N_COLS(mx)) , 0 ? 1 : (int) (0.5 + levels[cor_i].cc_exp) , 0 ? 1.0 : (double) (levels[cor_i].sim_mean * weight_scale) , 0 ? 1.0 : (double) (levels[cor_i].sim_median * weight_scale) , 0 ? 1.0 : (double) (levels[cor_i].sim_iqr * weight_scale) , 0 ? 1.0 : (double) (levels[cor_i].nb_mean ) , 0 ? 1.0 : (double) (levels[cor_i].nb_median + 0.5 ) , 0 ? 1.0 : (double) (levels[cor_i].nb_iqr + 0.5 ) ) ; if (compute_flags & COMPUTE_CLCF) fprintf(fp, " %3d", 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].clcf))) ; else fputs(" -", fp) ; if (eff >= 0.0) fprintf(fp, " %3d", (int) (0.5 + 1000 * eff)) ; else fputs(" -", fp) ; if (mode == 'c') fprintf(fp, "%8.2f\n", (double) levels[cor_i].threshold) ; else if (mode == 't') fprintf(fp, "%8.0f\n", (double) levels[cor_i].threshold * weight_scale) ; else if (mode == 'k' || mode == 'n' || mode == 'l') fprintf(fp, "%8.0f\n", (double) levels[cor_i].threshold) ; } ; cor_i++ ; if (res != mx) mclxFree(&res) ; } if (!(output_flags & OUTPUT_TABLE)) { if (weefreemen) { fprintf(fp, "-------------------------------------------------------------------------------\n") ;fprintf(fp, "The graph below plots the R^2 squared value for the fit of a log-log plot of\n") ;fprintf(fp, "<node degree k> versus <#nodes with degree >= k>, for the network resulting\n") ;fprintf(fp, "from applying a particular %s cutoff.\n", mode == 'c' ? "correlation" : "similarity") ;fprintf(fp, "-------------------------------------------------------------------------------\n") ; for (j=0;j<cor_i;j++) { dim jj ; for (jj=30;jj<=100;jj++) { char c = ' ' ; if (jj * 0.01 < levels[j].degree_cor && (jj+1.0) * 0.01 > levels[j].degree_cor) c = 'X' ; else if (jj % 5 == 0) c = '|' ; fputc(c, fp) ; } if (mode == 'c') fprintf(fp, "%8.2f\n", (double) levels[j].threshold) ; else fprintf(fp, "%8.0f\n", (double) levels[j].threshold * weight_scale) ; } fprintf(fp, "|----+----|----+----|----+----|----+----|----+----|----+----|----+----|--------\n") ;fprintf(fp, "| R^2 0.4 0.5 0.6 0.7 0.8 0.9 | 1.0 -o)\n") ;fprintf(fp, "+----+----+----+----+----+---------+----+----+----+----+----+----+----+ /\\\\\n") ;fprintf(fp, "| 2 4 6 8 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | _\\_/\n") ;fprintf(fp, "+----+----|----+----|----+----|----+----|----+----|----+----|----+----+--------\n") ; } else fprintf(fp, "-------------------------------------------------------------------------------\n") ; } mclxFree(&mx) ; mcxFree(allvals) ; }
static mcxstatus collectMain ( int argc , const char* argv[] ) { aggr* collect = NULL ; int a ; dim i, collect_n = 0 ; mclTab* tab = NULL ; double avg = 0.0 ; mclx* aggr = NULL, *mx = NULL /* mcxHash* map = NULL */ ; mcxIO* xfout = mcxIOnew(out_g, "w") ; mcxIOopen(xfout, EXIT_ON_FAIL) ; if ( transform_spec && (!(transform = mclgTFparse(NULL, transform_spec))) ) mcxDie(1, me, "input -tf spec does not parse") ; if (xftab_g) tab = mclTabRead(xftab_g, NULL, EXIT_ON_FAIL) /* map not used; perhaps someday we want to map labels to indexes? * in that case, we could also simply reverse the tab when reading .. , map = mclTabHash(tab) */ ; if (!collect_g) mcxDie(1, me, "require one of --paste, --add-column, --add-matrix") ; if (argc) { if (collect_g == 'm') { mcxIO* xf = mcxIOnew(argv[0], "r") ; mcxIOopen(xf, EXIT_ON_FAIL) ; aggr = mclxRead(xf, EXIT_ON_FAIL) ; mcxIOfree(&xf) ; } else collect_n = do_a_file(&collect, argv[0], 0) ; } if (tab && collect_n != N_TAB(tab) + (header_g ? 1 : 0)) mcxErr ( me , "tab has differing size (%lu vs %lu), continuing anyway" , (ulong) N_TAB(tab) , (ulong) (collect_n ? collect_n -1 : 0) ) ; for (a=1;a<argc;a++) { if (collect_g == 'm') { mcxIO* xf = mcxIOnew(argv[a], "r") ; mcxIOopen(xf, EXIT_ON_FAIL) ; mx = mclxRead(xf, EXIT_ON_FAIL) ; mclxAugment(aggr, mx, fltop_g) ; mcxIOfree(&xf) ; mclxFree(&mx) ; } else do_a_file(&collect, argv[a], collect_n) ; } if (collect_g == 'm') { if (transform) mclgTFexec(aggr, transform) ; if (mcx_wb_g) mclxbWrite(aggr, xfout, EXIT_ON_FAIL) ; else mclxWrite(aggr, xfout, MCLXIO_VALUE_GETENV, EXIT_ON_FAIL) ; mcxIOclose(xfout) ; exit(0) ; } /* fimxe: dispatch on binary_g */ for (i=0;i<collect_n;i++) { const char* lb = collect[i].label ; if (!i && collect[i].columns && collect_g != 'p') { fprintf(xfout->fp, "%s\t%s\n", lb, collect[i].columns->str) ; continue ; } if (tab && (!header_g || i > 0)) { unsigned u = atoi(lb) ; lb = mclTabGet(tab, u, NULL) ; if (TAB_IS_NA(tab, lb)) mcxDie(1, me, "no label found for index %ld - abort", (long) u) ; } if (summary_g) avg += collect[i].val ; else { if (collect_g == 'p') fprintf(xfout->fp, "%s%s\n", lb, collect[i].columns->str) ; else fprintf(xfout->fp, "%s\t%.8g\n", lb, collect[i].val) ; } } if (summary_g && collect_n) { dim middle1 = (collect_n-1)/2, middle2 = collect_n/2 ; qsort(collect, collect_n, sizeof collect[0], aggr_cmp_val) ; avg /= collect_n ; fprintf /* --summary option is a bit rubbish interface-wise */ ( xfout->fp , "%g %g %g %g\n" , collect[0].val , (collect[middle1].val + collect[middle2].val) / 2 , collect[collect_n-1].val , avg ) ; } return STATUS_OK ; }