mcxHeap* mcxHeapNew ( mcxHeap* h , dim heapSize , dim elemSize , int (*cmp) (const void* lft, const void* rgt) ) { mcxHeap* heap = mcxHeapInit(h) ; mcxstatus status = STATUS_FAIL ; char* base ; do { if (!heap) break ; if (!(heap->base = mcxAlloc (heapSize*elemSize, RETURN_ON_FAIL))) break ; status = STATUS_OK ; } while (0) ; if (status) { mcxHeapFree(&heap) ; return NULL ; } heap->heapSize = heapSize ; heap->elemSize = elemSize ; heap->cmp = cmp ; heap->n_inserted = 0 ; base = (char*) heap->base ; return heap ; }
static void mcxio_newpat ( mcxIOpat* md , const char* pattern ) { int i ; int *tbl = md->tbl ; const char* pat ; int patlen = strlen(pattern) /* truncintok */ ; md->circle = mcxAlloc(patlen * sizeof(int), EXIT_ON_FAIL) ; md->pat = pattern ; md->patlen = patlen ; pat = md->pat /* initialize */ ; for (i = 0; i < 256; i ++) tbl[i] = patlen ; for (i = 0; i < patlen-1; i++) tbl[(uchar) pat[i]] = patlen -i -1 #if DEBUG ; for (i=0; i<patlen; i++) fprintf ( stderr , "shift value for %c is %d\n", pat[i], tbl[(uchar) pat[i]] ) #endif ; md->circle_last = patlen -1 ; }
mcxHash* mcxHashNew ( dim n_buckets , u32 (*hash)(const void *a) , int (*cmp) (const void *a, const void *b) ) { mcxHash *h ; mcxbool ok = FALSE ; u8 n_bits = 0 ; if (!n_buckets) { mcxErr("mcxHashNew strange", "void alloc request") ; n_buckets = 2 ; } if (!(h = mcxAlloc(sizeof(mcxHash), RETURN_ON_FAIL))) return NULL ; while(n_buckets) { n_buckets >>= 1 ; n_bits++ ; } h->load = 0.5 ; h->n_entries = 0 ; h->n_buckets = n_buckets = (1 << n_bits) ; h->cmp = cmp ; h->hash = hash ; h->options = MCX_HASH_OPT_DEFAULTS ; h->src_link = NULL ; while (1) /* fixme 2nd arg below, have better choice? */ { h->src_link = mcxGrimNew(sizeof(hash_link), h->n_buckets, MCX_GRIM_ARITHMETIC) ; if (!h->src_link) break ; if (! ( h->buckets = mcxNAlloc ( h->n_buckets , sizeof(mcx_bucket) , mcx_bucket_init , RETURN_ON_FAIL ) ) ) break ; ok = TRUE ; break ; } if (!ok) { mcxGrimFree(&(h->src_link)) ; mcxFree(h) ; return NULL ; } return h ; }
void* mclvInit_v ( void* vecv ) { mclv *vec = vecv ; if (!vec && !(vec = mcxAlloc(sizeof(mclVector), ENQUIRE_ON_FAIL))) return NULL ; vec->ivps = NULL ; vec->n_ivps = 0 ; vec->vid = -1 ; vec->val = 0.0 ; return vec ; }
mclIvp* mclpInstantiate ( mclIvp* ivp , long index , double value ) { if (!ivp) ivp = mcxAlloc(sizeof(mclIvp), EXIT_ON_FAIL) ; ivp->idx = index ; ivp->val = value ; return ivp ; }
mclInterpretParam* mclInterpretParamNew ( void ) { mclInterpretParam* ipp = mcxAlloc ( sizeof(mclInterpretParam) , EXIT_ON_FAIL ) ; ipp->w_selfval = 0.999 ; ipp->w_maxval = 0.001 ; ipp->delta = 0.01 ; return ipp ; }
mclVector* mclvInstantiate ( mclVector* dst_vec , dim new_n_ivps , const mclIvp* src_ivps ) { mclIvp* new_ivps ; dim old_n_ivps ; if (!dst_vec && !(dst_vec = mclvInit(NULL))) /* create */ return NULL ; old_n_ivps = dst_vec->n_ivps /* I've had a suspicion that some reallocs might be too lazy * to reuse shrunk array space. */ ; if (old_n_ivps / 2 > new_n_ivps) { new_ivps = mcxAlloc(new_n_ivps * sizeof new_ivps[0], ENQUIRE_ON_FAIL) ; if (new_ivps && !src_ivps) memcpy(new_ivps, dst_vec->ivps, new_n_ivps * sizeof new_ivps[0]) ; mcxFree(dst_vec->ivps) ; dst_vec->ivps = new_ivps ; } else dst_vec->ivps = mcxRealloc(dst_vec->ivps, new_n_ivps * sizeof new_ivps[0], ENQUIRE_ON_FAIL) ; if ( !dst_vec->ivps && new_n_ivps ) { mcxMemDenied(stderr, "mclvInstantiate", "mclIvp", new_n_ivps) ; return NULL ; } /* ^ do not free; *dst_vec could be array element */ new_ivps = dst_vec->ivps ; if (!src_ivps) /* resize */ { dim k = old_n_ivps ; while (k < new_n_ivps) { mclpInit(new_ivps + k) ; k++ ; } } else if (src_ivps && new_n_ivps) /* copy */ memcpy(new_ivps, src_ivps, new_n_ivps * sizeof(mclIvp)) ; dst_vec->n_ivps = new_n_ivps ; return dst_vec ; }
grim_buf* grim_buf_new ( dim sz_unit , dim n_units ) { dim i ; grim_buf* buf ; char* units ; dim sz_load = sizeof(memnext) + sz_unit ; if (!(buf = mcxAlloc(sizeof(grim_buf), RETURN_ON_FAIL))) return NULL ; if ( !(buf->units = units = mcxAlloc(n_units * sz_load, RETURN_ON_FAIL) ) ) { mcxFree(buf) ; return NULL ; } buf->prev = NULL ; buf->n_units = n_units #if DEBUG ; fprintf (stderr, "Extending grim with <%lu> units\n", (ulong) n_units); #endif ; for (i=0;i<n_units-1;i++) ((memnext*) (units + i * sz_load))->next = (memnext*) (units + (i+1) * sz_load) ; ((memnext*) (buf->units + (n_units-1) * sz_load))->next = NULL ; return buf ; }
mcxHeap* mcxHeapInit ( void* h ) { mcxHeap* heap = h ; if (!heap && !(heap = mcxAlloc(sizeof(mcxHeap), RETURN_ON_FAIL))) return NULL ; heap->base = NULL ; heap->heapSize = 0 ; heap->elemSize = 0 ; heap->cmp = NULL ; heap->n_inserted = 0 ; return heap ; }
mclpAR* mclpARinit ( mclpAR* mclpar ) { if (!mclpar) mclpar = mcxAlloc(sizeof(mclpAR), EXIT_ON_FAIL) ; if (!mclpar) return NULL ; mclpar->ivps = NULL ; mclpar->n_ivps = 0 ; mclpar->n_alloc = 0 ; mclpar->sorted = MCLPAR_SORTED | MCLPAR_UNIQUE ; return mclpar ; }
void mclxInflateBoss ( mclMatrix* mx , double power , mclProcParam* mpp ) { int workLoad = N_COLS(mx) / mpp->n_ithreads ; int workTail = N_COLS(mx) % mpp->n_ithreads ; int i = 0 ; pthread_attr_t pthread_custom_attr ; pthread_t *threads_inflate = (pthread_t *) mcxAlloc ( mpp->n_ithreads*sizeof(pthread_t) , EXIT_ON_FAIL ) ; pthread_attr_init(&pthread_custom_attr) ; for (i=0;i<mpp->n_ithreads;i++) { mclvInflateLine_arg *a = (mclvInflateLine_arg *) malloc(sizeof(mclvInflateLine_arg)) ; a->id = i ; a->start = workLoad * i ; a->end = workLoad * (i+1) ; a->mx = mx ; a->power = power ; if (i+1==mpp->n_ithreads) a->end += workTail ; pthread_create ( &threads_inflate[i] , &pthread_custom_attr , (void *(*)(void*)) mclvInflateLine , (void *) a ) ; } for (i = 0; i < mpp->n_ithreads; i++) pthread_join(threads_inflate[i], NULL) ; mcxFree(threads_inflate) ; }
mcxGrim* mcxGrimNew ( dim sz_unit , dim n_units , mcxbits options ) { mcxGrim* src = mcxAlloc(sizeof(mcxGrim), RETURN_ON_FAIL) ; if (!src) return NULL ; if (!(src->buf = grim_buf_new(sz_unit, n_units))) { mcxFree(src) ; return NULL ; } src->buf->prev = NULL ; src->flags = options ; src->na = (void*) src->buf->units ; src->ct = 0 ; src->sz_unit = sz_unit ; return src ; }
mcxIO* mcxIOrenew ( mcxIO* xf , const char* name , const char* mode ) { mcxbool twas_stdio = xf && xf->stdio /* it was one of STD{IN,OUT,ERR} */ ; if ( mode && !strstr(mode, "w") && !strstr(mode, "r") && !strstr(mode, "a") ) { mcxErr ("mcxIOrenew PBD", "unsupported open mode <%s>", mode) ; return NULL ; } if ( getenv("TINGEA_PLUS_APPEND") && ( name && (uchar) name[0] == '+' ) && ( mode && strchr(mode, 'w') ) ) { name++ /* user can specify -o +foo to append to foo */ ; mode = "a" ; } if (!xf) /* case 1) create a new one */ { if (!name || !mode) { mcxErr("mcxIOrenew PBD", "too few arguments") ; return NULL ; } if (!(xf = (mcxIO*) mcxAlloc(sizeof(mcxIO), RETURN_ON_FAIL))) return NULL ; if (!(xf->fn = mcxTingEmpty(NULL, 20))) return NULL ; if (!(xf->buffer = mcxTingEmpty(NULL, getpagesize()))) return NULL ; xf->fp = NULL ; xf->mode = NULL ; xf->usr = NULL ; xf->usr_reset = NULL ; xf->buffer_consumed = 0 ; } else if (xf->stdio) /* case 2) have one, don't close */ NOTHING ; else if (mcxIOwarnOpenfp(xf, "mcxIOrenew")) mcxIOclose(xf) /* case 3) have one, warn and close if open */ ; mcxIOreset(xf) ; if (name && !mcxTingWrite(xf->fn, name)) return NULL ; if (mode) { if (xf->mode) mcxFree(xf->mode) ; xf->mode = mcxStrDup(mode) ; } xf->stdio = begets_stdio(xf->fn->str, xf->mode) /* name changed, no longer stdio */ ; if (twas_stdio && !xf->stdio) xf->fp = NULL ; if (xf->stdio && mode && strchr(mode, 'a')) /* recently added */ { if (xf->mode) mcxFree(xf->mode) ; xf->mode = mcxStrDup("w") ; } return xf ; }
mclVector* mclvBinaryx ( const mclVector* vec1 , const mclVector* vec2 , mclVector* dst , double (*op)(pval arg1, pval arg2, pval arg3) , double arg3 ) { mclIvp *ivp1, *ivp2, *ivp1max, *ivp2max, *ivpk, *ivpl ; long n1n2 = vec1->n_ivps+vec2->n_ivps ; if (vec1->n_ivps + vec2->n_ivps == 0) return mclvInstantiate(dst, 0, NULL) ; ivpl = ivpk = mcxAlloc ( n1n2 * sizeof(mclIvp) , RETURN_ON_FAIL ) ; if (!ivpk) { mcxMemDenied(stderr, "mclvBinary", "mclIvp", n1n2) ; return NULL ; } ivp1 = vec1->ivps ; ivp2 = vec2->ivps ; ivp1max = ivp1 + vec1->n_ivps ; ivp2max = ivp2 + vec2->n_ivps ; { double rval ; while (ivp1 < ivp1max && ivp2 < ivp2max) { pval val1 = 0.0 ; pval val2 = 0.0 ; long idx ; if (ivp1->idx < ivp2->idx) { idx = ivp1->idx ; val1 = (ivp1++)->val ; } else if (ivp1->idx > ivp2->idx) { idx = ivp2->idx ; val2 = (ivp2++)->val ; } else { idx = ivp1->idx ; val1 = (ivp1++)->val ; val2 = (ivp2++)->val ; } if ((rval = op(val1, val2, arg3)) != 0.0) { ivpl->idx = idx ; (ivpl++)->val = rval ; } } while (ivp1 < ivp1max) { if ((rval = op(ivp1->val, 0.0, arg3)) != 0.0) { ivpl->idx = ivp1->idx ; (ivpl++)->val = rval ; } ivp1++ ; } while (ivp2 < ivp2max) { if ((rval = op(0.0, ivp2->val, arg3)) != 0.0) { ivpl->idx = ivp2->idx ; (ivpl++)->val = rval ; } ivp2++ ; } } dst = mclvInstantiate(dst, ivpl-ivpk, ivpk) ; mcxFree(ivpk) ; return dst ; }
double mclvKBar ( mclVector *vec , dim k , double ignore /* ignore elements relative to this */ , int mode ) { int have_even = (k+1) % 2 ; dim n_inserted = 0 ; double ans = 0.0 ; mclIvp * vecivp = vec->ivps ; mclIvp* vecmaxivp = vecivp + vec->n_ivps ; pval * heap /* can select everything */ ; if (k >= vec->n_ivps) return mode == KBAR_SELECT_LARGE ? -FLT_MAX : FLT_MAX /* let's select nothing, it might even help */ ; if (!(heap = mcxAlloc ((k+have_even)*sizeof(pval), RETURN_ON_FAIL))) return mode == KBAR_SELECT_LARGE ? FLT_MAX : -FLT_MAX ; if (mode == KBAR_SELECT_LARGE) { if (have_even) *(heap+k) = PVAL_MAX ; while(vecivp < vecmaxivp) { pval val = vecivp->val ; if (val >= ignore) NOTHING ; else if (n_inserted < k) { dim d = n_inserted ; while (d != 0 && *(heap+(d-1)/2) > val) { *(heap+d) = *(heap+(d-1)/2) ; d = (d-1)/2 ; } *(heap+d) = val ; n_inserted++ ; } else if (val > *heap) { dim root = 0 ; dim d ; while((d = 2*root+1) < k) { if (*(heap+d) > *(heap+d+1)) d++ ; if (val > *(heap+d)) { *(heap+root) = *(heap+d) ; root = d ; } else break ; } *(heap+root) = val ; } vecivp++ ; } } else if (mode == KBAR_SELECT_SMALL) { if (have_even) *(heap+k) = -PVAL_MAX ; while(vecivp < vecmaxivp) { pval val = vecivp->val ; if (val < ignore) NOTHING ; else if (n_inserted < k) { dim d = n_inserted ; while (d != 0 && *(heap+(d-1)/2) < val) { *(heap+d) = *(heap+(d-1)/2) ; d = (d-1)/2 ; } *(heap+d) = val ; n_inserted++ ; } else if (val < *heap) { dim root = 0 ; dim d ; while((d = 2*root+1) < k) { if (*(heap+d) < *(heap+d+1)) d++ ; if (val < *(heap+d)) { *(heap+root) = *(heap+d) ; root = d ; } else break ; } *(heap+root) = val ; } vecivp++ ; } } else { mcxErr("mclvKBar PBD", "invalid mode") ; mcxExit(1) ; } ans = *heap ; mcxFree(heap) ; return ans ; }
static mcxstatus meetMain ( int argc , const char* argv[] ) { mcxIO **xfmcs = NULL ; mclMatrix *lft = NULL ; mclMatrix *rgt = NULL ; mclMatrix *dst = NULL ; int a = 0 ; int n_mx = 0 ; int j ; dim o, m, e ; mclxIOsetQMode("MCLXIOVERBOSITY", MCL_APP_VB_YES) ; mclx_app_init(stderr) ; xfmcs = (mcxIO**) mcxAlloc ( (argc)*sizeof(mcxIO*) , EXIT_ON_FAIL ) ; mcxIOopen(xfout, EXIT_ON_FAIL) ; for(j=a;j<argc;j++) { xfmcs[n_mx] = mcxIOnew(argv[j], "r") ; n_mx++ ; } if (!n_mx) mcxDie(1, me, "at least one clustering matrix required") /* Fixme: do a decent initialization with lft = clmTop() *before* * this loop (removing the need for ugly tmp assignment), but that requires * we know the correct domain to pass to it. For that, we need to peak into * the first matrix. */ ; for (j=0;j<n_mx;j++) { mclMatrix* tmp = mclxRead (xfmcs[j], EXIT_ON_FAIL) ; if (clmEnstrict(tmp, &o, &m, &e, ENSTRICT_SPLIT_OVERLAP)) report_partition("clmmeet", tmp, xfmcs[j]->fn, o, m, e) , mcxExit(1) ; if (!lft) { lft = tmp ; continue ; } else rgt = tmp ; if (!MCLD_EQUAL(lft->dom_rows, rgt->dom_rows)) mcxDie ( 1 , me , "domains not equal (files %s/%s)" , xfmcs[j-1]->fn->str , xfmcs[j]->fn->str ) ; mcxIOclose(xfmcs[j]) ; dst = clmMeet(lft, rgt) ; lft = dst ; mclxFree(&rgt) ; } mclxColumnsRealign(lft, mclvSizeRevCmp) ; mclxWrite(lft, xfout, MCLXIO_VALUE_NONE, EXIT_ON_FAIL) ; mclxFree(&lft) ; mcxIOfree(&xfout) ; free(xfmcs) ; return STATUS_OK ; }
static void vary_threshold ( mcxIO* xf , FILE* fp , int vary_a , int vary_z , int vary_s , int vary_n , unsigned mode ) { dim cor_i = 0, j ; int step ; mclx* mx ; unsigned long noe ; pval* allvals ; dim n_allvals = 0 ; double sum_vals = 0.0 ; mx = mclxRead(xf, EXIT_ON_FAIL) ; mcxIOclose(xf) ; if (transform) mclgTFexec(mx, transform) ; noe = mclxNrofEntries(mx) ; allvals = mcxAlloc(noe * sizeof allvals[0], EXIT_ON_FAIL) ; if (!weight_scale) { if (mode == 'c') weight_scale = 1.0 ; else weight_scale = vary_n ; } n_allvals = get_n_sort_allvals(mx, allvals, noe, &sum_vals, FALSE) ; if (mode == 'c') { double smallest = n_allvals ? allvals[n_allvals-1] : -DBL_MAX ; if (vary_a * 1.0 / vary_n < smallest) { while (vary_a * 1.0 / vary_n < smallest) vary_a++ ; vary_a-- ; } mcxTell ( me , "smallest correlation is %.2f, using starting point %.2f" , smallest , vary_a * 1.0 / vary_n ) ; } if (output_flags & OUTPUT_TABLE) { ;fprintf(fp, "L\tD\tR\tS\tcce\tEWmean\tEWmed\tEWiqr\tNDmean\tNDmed\tNDiqr\tCCF\t%s\n", mode == 'k' ? "kNN" : mode == 'l' ? "N" : "Cutoff") ;} else { if (output_flags & OUTPUT_KEY) { ;fprintf(fp, "-------------------------------------------------------------------------------\n") ;fprintf(fp, " L Percentage of nodes in the largest component\n") ;fprintf(fp, " D Percentage of nodes in components of size at most %d [-div option]\n", (int) divide_g) ;fprintf(fp, " R Percentage of nodes not in L or D: 100 - L -D\n") ;fprintf(fp, " S Percentage of nodes that are singletons\n") ;fprintf(fp, " cce Expected size of component, nodewise [ sum(sz^2) / sum^2(sz) ]\n") ;fprintf(fp, "*EW Edge weight traits (mean, median and IQR, all scaled!)\n") ;fprintf(fp, " Scaling is used to avoid printing of fractional parts throughout.\n") ;fprintf(fp, " The scaling factor is %.2f [-report-scale option]\n", weight_scale) ;fprintf(fp, " ND Node degree traits [mean, median and IQR]\n") ;fprintf(fp, " CCF Clustering coefficient %s\n", compute_flags & COMPUTE_CLCF ? "(not computed; use --clcf to include this)" : "") ;fprintf(fp, " eff Induced component efficiency %s\n", compute_flags & COMPUTE_EFF ? "(not computed; use --eff to include this)" : "") ;if (mode == 'c') fprintf(fp, "Cutoff The threshold used.\n") ;else if (mode == 't') fprintf(fp, "*Cutoff The threshold with scale factor %.2f and fractional parts removed\n", weight_scale) ;else if (mode == 'k') fprintf(fp, "k-NN The knn parameter\n") ;else if (mode == 'l') fprintf(fp, "N The knn parameter (merge mode)\n") ;else if (mode == 'n') fprintf(fp, "ceil The ceil parameter\n") ;fprintf(fp, "Total number of nodes: %lu\n", (ulong) N_COLS(mx)) ;} fprintf(fp, "-------------------------------------------------------------------------------\n") ;fprintf(fp, " L D R S cce *EWmean *EWmed *EWiqr NDmean NDmed NDiqr CCF eff %6s \n", mode == 'k' ? "k-NN" : mode == 'l' ? "N" : mode == 'n' ? "Ceil" : "Cutoff") ;fprintf(fp, "-------------------------------------------------------------------------------\n") ; } for (step = vary_a; step <= vary_z; step += vary_s) { double cutoff = step * 1.0 / vary_n ; double eff = -1.0 ; mclv* nnodes = mclvCanonical(NULL, N_COLS(mx), 0.0) ; mclv* degree = mclvCanonical(NULL, N_COLS(mx), 0.0) ; dim i, n_sample = 0 ; double cor, y_prev, iqr = 0.0 ; mclx* cc = NULL, *res = NULL ; mclv* sz, *ccsz = NULL ; int step2 = vary_z + vary_a - step ; sum_vals = 0.0 ; if (mode == 't' || mode == 'c') mclxSelectValues(mx, &cutoff, NULL, MCLX_EQT_GQ) , res = mx ; else if (mode == 'k') { res = rebase_g ? mclxCopy(mx) : mx ; mclxKNNdispatch(res, step2, n_thread_l, 1) ; } else if (mode == 'l') { res = mx ; mclxKNNdispatch(res, step2, n_thread_l, 0) ; } else if (mode == 'n') { res = rebase_g ? mclxCopy(mx) : mx ; mclv* cv = mclgCeilNB(res, step2, NULL, NULL, NULL) ; mclvFree(&cv) ; } sz = mclxColSizes(res, MCL_VECTOR_COMPLETE) ; mclvSortDescVal(sz) ; cc = clmUGraphComponents(res, NULL) /* fixme: user has to specify -tf '#max()' if graph is directed */ ; if (cc) { ccsz = mclxColSizes(cc, MCL_VECTOR_COMPLETE) ; if (compute_flags & COMPUTE_EFF) { clmPerformanceTable pftable ; clmPerformance(mx, cc, &pftable) ; eff = pftable.efficiency ; } } if (mode == 't' || mode == 'c') { for ( ; n_allvals > 0 && allvals[n_allvals-1] < cutoff ; n_allvals-- ) ; sum_vals = 0.0 ; for (i=0;i<n_allvals;i++) sum_vals += allvals[i] ; } else if (mode == 'k' || mode == 'n' || mode == 'l') { n_allvals = get_n_sort_allvals(res, allvals, noe, &sum_vals, FALSE) ; } levels[cor_i].sim_median= mcxMedian(allvals, n_allvals, sizeof allvals[0], pval_get_double, &iqr) ; levels[cor_i].sim_iqr = iqr ; levels[cor_i].sim_mean = n_allvals ? sum_vals / n_allvals : 0.0 ; levels[cor_i].nb_median = mcxMedian(sz->ivps, sz->n_ivps, sizeof sz->ivps[0], ivp_get_double, &iqr) ; levels[cor_i].nb_iqr = iqr ; levels[cor_i].nb_mean = mclvSum(sz) / N_COLS(res) ; levels[cor_i].cc_exp = cc ? mclvPowSum(ccsz, 2.0) / N_COLS(res) : 0 ; levels[cor_i].nb_sum = mclxNrofEntries(res) ; if (compute_flags & COMPUTE_CLCF) { mclv* clcf = mclgCLCFdispatch(res, n_thread_l) ; levels[cor_i].clcf = mclvSum(clcf) / N_COLS(mx) ; mclvFree(&clcf) ; } else levels[cor_i].clcf = 0.0 ; levels[cor_i].threshold = mode == 'k' || mode == 'l' || mode == 'n' ? step2 : cutoff ; levels[cor_i].bigsize = cc ? cc->cols[0].n_ivps : 0 ; levels[cor_i].n_single = 0 ; levels[cor_i].n_edge = n_allvals ; levels[cor_i].n_lq = 0 ; if (cc) for (i=0;i<N_COLS(cc);i++) { dim n = cc->cols[N_COLS(cc)-1-i].n_ivps ; if (n == 1) levels[cor_i].n_single++ ; if (n <= divide_g) levels[cor_i].n_lq += n ; else break ; } if (levels[cor_i].bigsize <= divide_g) levels[cor_i].bigsize = 0 ; y_prev = sz->ivps[0].val /* wiki says: A scale-free network is a network whose degree distribution follows a power law, at least asymptotically. That is, the fraction P(k) of nodes in the network having k connections to other nodes goes for large values of k as P(k) ~ k^−g where g is a constant whose value is typically in the range 2<g<3, although occasionally it may lie outside these bounds. */ ; for (i=1;i<sz->n_ivps;i++) { double y = sz->ivps[i].val ; if (y > y_prev - 0.5) continue /* same as node degree seen last */ ; nnodes->ivps[n_sample].val = log( (i*1.0) / (1.0*N_COLS(res))) /* x = #nodes >= k, as fraction */ ; degree->ivps[n_sample].val = log(y_prev ? y_prev : 1) /* y = k = degree of node */ ; n_sample++ ;if(0)fprintf(stderr, "k=%.0f\tn=%d\t%.3f\t%.3f\n", (double) y_prev, (int) i, (double) nnodes->ivps[n_sample-1].val, (double) degree->ivps[n_sample-1].val) ; y_prev = y ; } nnodes->ivps[n_sample].val = 0 ; nnodes->ivps[n_sample++].val = log(y_prev ? y_prev : 1) ;if(0){fprintf(stderr, "k=%.0f\tn=%d\t%.3f\t%.3f\n", (double) sz->ivps[sz->n_ivps-1].val, (int) N_COLS(res), (double) nnodes->ivps[n_sample-1].val, (double) degree->ivps[n_sample-1].val) ;} ; mclvResize(nnodes, n_sample) ; mclvResize(degree, n_sample) ; cor = pearson(nnodes, degree, n_sample) ; levels[cor_i].degree_cor = cor * cor ;if(0)fprintf(stdout, "cor at cutoff %.2f %.3f\n\n", cutoff, levels[cor_i-1].degree_cor) ; mclvFree(&nnodes) ; mclvFree(°ree) ; mclvFree(&sz) ; mclvFree(&ccsz) ; mclxFree(&cc) ; if(output_flags & OUTPUT_TABLE) { fprintf ( fp , "%lu\t%lu\t%lu\t%lu\t%lu" "\t%6g\t%6g\t%6g" "\t%6g\t%lu\t%6g" , (ulong) levels[cor_i].bigsize , (ulong) levels[cor_i].n_lq , (ulong) N_COLS(mx) - levels[cor_i].bigsize - levels[cor_i].n_lq , (ulong) levels[cor_i].n_single , (ulong) levels[cor_i].cc_exp , (double) levels[cor_i].sim_mean , (double) levels[cor_i].sim_median , (double) levels[cor_i].sim_iqr , (double) levels[cor_i].nb_mean , (ulong) levels[cor_i].nb_median , (double) levels[cor_i].nb_iqr ) ; if (compute_flags & COMPUTE_CLCF) fprintf(fp, "\t%6g", levels[cor_i].clcf) ; else fputs("\tNA", fp) ; if (eff >= 0.0) fprintf(fp, "\t%4g", eff) ; else fputs("\tNA", fp) ; fprintf(fp, "\t%6g", (double) levels[cor_i].threshold) ; fputc('\n', fp) ; } else { fprintf ( fp , "%3d %3d %3d %3d %7d " "%7.0f %7.0f %6.0f" "%6.1f %6.0f %6.0f" , 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].bigsize) / N_COLS(mx)) , 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].n_lq) / N_COLS(mx)) , 0 ? 1 : (int) (0.5 + (100.0 * (N_COLS(mx) - levels[cor_i].bigsize - levels[cor_i].n_lq)) / N_COLS(mx)) , 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].n_single) / N_COLS(mx)) , 0 ? 1 : (int) (0.5 + levels[cor_i].cc_exp) , 0 ? 1.0 : (double) (levels[cor_i].sim_mean * weight_scale) , 0 ? 1.0 : (double) (levels[cor_i].sim_median * weight_scale) , 0 ? 1.0 : (double) (levels[cor_i].sim_iqr * weight_scale) , 0 ? 1.0 : (double) (levels[cor_i].nb_mean ) , 0 ? 1.0 : (double) (levels[cor_i].nb_median + 0.5 ) , 0 ? 1.0 : (double) (levels[cor_i].nb_iqr + 0.5 ) ) ; if (compute_flags & COMPUTE_CLCF) fprintf(fp, " %3d", 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].clcf))) ; else fputs(" -", fp) ; if (eff >= 0.0) fprintf(fp, " %3d", (int) (0.5 + 1000 * eff)) ; else fputs(" -", fp) ; if (mode == 'c') fprintf(fp, "%8.2f\n", (double) levels[cor_i].threshold) ; else if (mode == 't') fprintf(fp, "%8.0f\n", (double) levels[cor_i].threshold * weight_scale) ; else if (mode == 'k' || mode == 'n' || mode == 'l') fprintf(fp, "%8.0f\n", (double) levels[cor_i].threshold) ; } ; cor_i++ ; if (res != mx) mclxFree(&res) ; } if (!(output_flags & OUTPUT_TABLE)) { if (weefreemen) { fprintf(fp, "-------------------------------------------------------------------------------\n") ;fprintf(fp, "The graph below plots the R^2 squared value for the fit of a log-log plot of\n") ;fprintf(fp, "<node degree k> versus <#nodes with degree >= k>, for the network resulting\n") ;fprintf(fp, "from applying a particular %s cutoff.\n", mode == 'c' ? "correlation" : "similarity") ;fprintf(fp, "-------------------------------------------------------------------------------\n") ; for (j=0;j<cor_i;j++) { dim jj ; for (jj=30;jj<=100;jj++) { char c = ' ' ; if (jj * 0.01 < levels[j].degree_cor && (jj+1.0) * 0.01 > levels[j].degree_cor) c = 'X' ; else if (jj % 5 == 0) c = '|' ; fputc(c, fp) ; } if (mode == 'c') fprintf(fp, "%8.2f\n", (double) levels[j].threshold) ; else fprintf(fp, "%8.0f\n", (double) levels[j].threshold * weight_scale) ; } fprintf(fp, "|----+----|----+----|----+----|----+----|----+----|----+----|----+----|--------\n") ;fprintf(fp, "| R^2 0.4 0.5 0.6 0.7 0.8 0.9 | 1.0 -o)\n") ;fprintf(fp, "+----+----+----+----+----+---------+----+----+----+----+----+----+----+ /\\\\\n") ;fprintf(fp, "| 2 4 6 8 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | _\\_/\n") ;fprintf(fp, "+----+----|----+----|----+----|----+----|----+----|----+----|----+----+--------\n") ; } else fprintf(fp, "-------------------------------------------------------------------------------\n") ; } mclxFree(&mx) ; mcxFree(allvals) ; }
; } return clid ; } static mcxstatus qMain ( int argc_unused cpl__unused , const char* argv_unused[] cpl__unused ) { mclx* cl = NULL, *cltp = NULL ; srandom(mcxSeed(135313531)) ; mcxIOopen(xfout_g, EXIT_ON_FAIL) ; levels = mcxAlloc(1001 * sizeof levels[0], EXIT_ON_FAIL) ; if (!mode_vary && !mode_get) mode_get = 'n' ; if ( transform_spec && !(transform = mclgTFparse(NULL, transform_spec)) ) mcxDie(1, me, "input -tf spec does not parse") ; if (xftab_g && mode_get != 'n') tab_g = mclTabRead(xftab_g, NULL, EXIT_ON_FAIL) ; if (xfcl_g) { dim i
int main ( int argc , const char* argv[] ) { mcxIO* xfmx = mcxIOnew("-", "r"), *xfout = mcxIOnew("-", "w") ; mclx* mx = NULL ; mclv* mx_diag = NULL ; mcxstatus parseStatus = STATUS_OK ; mcxOption* opts, *opt ; dim N_edge = 0 ; dim* offsets ; dim template_n_nodes = 0 ; mcxbool plus = FALSE ; double e_min = 1.0 ; double e_max = 0.0 ; double skew = 0.0 ; double radius = 0.0 ; double n_sdev = 0.5 ; double n_range = 2.0 ; double g_radius = 0.0 ; double g_mean = 0.0 ; double g_sdev = 0.0 ; double g_min = 1.0 ; double g_max = 0.0 ; mcxbool do_gaussian = FALSE ; dim i = 0 ; dim N_remove = 0 ; dim N_add = 0 ; dim N_shuffle = 0 ; unsigned long random_ignore = 0 ; srandom(mcxSeed(2308947)) ; mcxOptAnchorSortById(options, sizeof(options)/sizeof(mcxOptAnchor) -1) ; if (!(opts = mcxOptParse(options, (char**) argv, argc, 1, 0, &parseStatus))) exit(0) ; mcxLogLevel = MCX_LOG_AGGR | MCX_LOG_MODULE | MCX_LOG_IO | MCX_LOG_GAUGE | MCX_LOG_WARN ; mclxIOsetQMode("MCLXIOVERBOSITY", MCL_APP_VB_YES) ; mclx_app_init(stderr) ; for (opt=opts;opt->anch;opt++) { mcxOptAnchor* anch = opt->anch ; switch(anch->id) { case MY_OPT_HELP : case MY_OPT_APROPOS : mcxOptApropos(stdout, me, syntax, 20, MCX_OPT_DISPLAY_SKIP, options) ; return 0 ; case MY_OPT_VERSION : app_report_version(me) ; return 0 ; case MY_OPT_SKEW : skew = atof(opt->val) ; break ; case MY_OPT_GEN : template_n_nodes = atoi(opt->val) ; break ; case MY_OPT_IMX : mcxIOrenew(xfmx, opt->val, NULL) ; break ; case MY_OPT_PLUS : case MY_OPT_WB : plus = TRUE ; break ; case MY_OPT_OUT : mcxIOrenew(xfout, opt->val, NULL) ; break ; case MY_OPT_E_MAX : if (!strcmp(opt->val, "copy")) e_max = -DBL_MAX ; else e_max = atof(opt->val) ; break ; case MY_OPT_E_MIN : e_min = atof(opt->val) ; break ; case MY_OPT_G_MIN : g_min = atof(opt->val) ; break ; case MY_OPT_G_MAX : g_max = atof(opt->val) ; break ; case MY_OPT_G_SDEV : g_sdev = atof(opt->val) ; break ; case MY_OPT_G_MEAN : g_mean = atof(opt->val) ; do_gaussian = TRUE ; break ; case MY_OPT_G_RADIUS : g_radius = atof(opt->val) ; break ; case MY_OPT_N_RANGE : n_range = atof(opt->val) ; break ; case MY_OPT_N_SDEV : n_sdev = atof(opt->val) ; break ; case MY_OPT_N_RADIUS : radius = atof(opt->val) ; break ; case MY_OPT_SHUFFLE : N_shuffle = atoi(opt->val) ; break ; case MY_OPT_ADD : N_add = atoi(opt->val) ; break ; case MY_OPT_REMOVE : N_remove = atoi(opt->val) ; break ; } } /* hitting y% in vi tells me the size of this block */ { if (template_n_nodes) mx = mclxAllocZero ( mclvCanonical(NULL, template_n_nodes, 1.0) , mclvCanonical(NULL, template_n_nodes, 1.0) ) ; else mx = mclxReadx ( xfmx , EXIT_ON_FAIL , MCLX_REQUIRE_GRAPH ) ; mx_diag = mclxDiagValues(mx, MCL_VECTOR_COMPLETE) ; if (N_shuffle) mclxAdjustLoops(mx, mclxLoopCBremove, NULL) ; else mclxSelectUpper(mx) /* ^ apparently we always work on single arc representation (docme andsoon) */ ; offsets = mcxAlloc(sizeof offsets[0] * N_COLS(mx), EXIT_ON_FAIL) ; N_edge = 0 ; for (i=0;i<N_COLS(mx);i++) { offsets[i] = N_edge ; N_edge += mx->cols[i].n_ivps ; } if (N_edge < N_remove) { mcxErr ( me , "removal count %ld exceeds edge count %ld" , (long) N_remove , (long) N_edge ) ; N_remove = N_edge ; } random_ignore = RAND_MAX - (N_edge ? RAND_MAX % N_edge : 0) ; if (RAND_MAX / 2 < N_edge) mcxDie(1, me, "graph too large!") ; if (N_shuffle) { do_the_shuffle(mx, N_shuffle, offsets, N_edge, random_ignore) ; mx_readd_diagonal(mx, mx_diag) ; mclxWrite(mx, xfout, MCLXIO_VALUE_GETENV, RETURN_ON_FAIL) ; exit(0) ; } ; if (N_remove) { dim n_remove = do_remove(mx, N_remove, offsets, N_edge, random_ignore) /* Need to recompute N_edge and random_ignore. * NOTE we work with *upper* matrix; this counts graph edges. */ ; N_edge = mclxNrofEntries(mx) - n_remove ; random_ignore = RAND_MAX - (RAND_MAX % N_COLS(mx)) ; } if (g_mean) { if (!g_radius) { if (g_sdev) g_radius = 2 * g_sdev ; mcxWarn(me, "set radius to %.5f\n", g_radius) ; } } ; if (N_add) N_edge += do_add ( mx , N_add , N_edge , do_gaussian ? &g_mean : NULL, g_radius , g_sdev , g_min , g_max , skew , e_min , e_max ) ; if (radius) { for (i=0;i<N_COLS(mx);i++) { mclp* ivp = mx->cols[i].ivps, *ivpmax = ivp + mx->cols[i].n_ivps ;if(DEBUG)fprintf(stderr, "here %d\n", (int) i) ; while (ivp < ivpmax) { double val = ivp->val ; double r = mcxNormalCut(n_range * n_sdev, n_sdev) ; double newval = val + radius * (r / (n_range * n_sdev)) ; if (e_min < e_max && newval >= e_min && newval <= e_max) ; ivp->val = newval ; ivp++ ; } } } mclxUnary(mx, fltxCopy, NULL) /* remove zeroes */ ; mclxAddTranspose(mx, 0.0) ; mx_readd_diagonal(mx, mx_diag) ; if (plus) mclxbWrite(mx, xfout, RETURN_ON_FAIL) ; else mclxWrite(mx, xfout, MCLXIO_VALUE_GETENV, RETURN_ON_FAIL) ; } return 0 ; }
mclgTF* mclgTFparse ( mcxLink* encoding_link , mcxTing* thestring ) { mclgTF* gtf = mcxAlloc(sizeof gtf[0], EXIT_ON_FAIL) ; const char* me = "mclgTFparse" ; const char* a = thestring->str ; const char* z = thestring->str + thestring->len ; mcxTing* func = mcxTingEmpty(NULL, thestring->len) ; mcxTing* arg = mcxTingEmpty(NULL, thestring->len) ; int n = 0 ; if (!(gtf->par_edge = mclpARensure(NULL, 10))) return NULL /* +memleak gtf */ ; if (!(gtf->par_graph = mclpARensure(NULL, 10))) return NULL /* +memleak gtf, gtf->par_edge */ ; if ( thestring && !mcxStrChrAint(thestring->str, isspace, thestring->len) ) return gtf ; while (a < z) { const char* val, *key ; char* onw = NULL ; int tfe = -1, tfg = -1 ; mcxbool nought = FALSE ; unsigned char k0 ; double d ; int t ; mcxTingEmpty(arg, z-a) ; mcxTingEmpty(func, z-a) ; n = 0 ; if ((t = sscanf(a, " %[a-z_#-] ( )%n", func->str, &n)) >= 1 && n > 0) NOTHING ; else if ((t = sscanf(a, " %[a-z_#-] ( %[^)_ ] )%n", func->str, arg->str, &n)) >= 2 && n > 0) NOTHING ; else break ; a += n ; key= func->str ; val= arg->str ; k0 = key[0] ; d = strtod(val, &onw) ; if (!val || !strlen(val)) nought = TRUE ; else if (val == onw) { mcxErr(me, "failed to parse number <%s>", val) ; break ; } if (k0 == '#') { if (!strcmp(key, "#ceilnb")) tfg = MCLG_TF_CEILNB ; else if (!strcmp(key, "#knn")) tfg = MCLG_TF_KNN ; else if (!strcmp(key, "#n")) tfg = MCLG_TF_TOPN ; else if (!strcmp(key, "#ils")) tfg = MCLG_TF_ILS ; else if (!strcmp(key, "#mcl")) tfg = MCLG_TF_MCL ; else if (!strcmp(key, "#arcmcl")) tfg = MCLG_TF_ARC_MCL ; else if (!strcmp(key, "#arcsub")) tfg = MCLG_TF_ARCSUB ; else if (!strcmp(key, "#arcmax")) tfg = MCLG_TF_ARCMAX ; else if (!strcmp(key, "#arcmingq")) tfg = MCLG_TF_ARCMINGQ ; else if (!strcmp(key, "#arcmingt")) tfg = MCLG_TF_ARCMINGT ; else if (!strcmp(key, "#arcmimlq")) tfg = MCLG_TF_ARCMINLQ ; else if (!strcmp(key, "#arcminlt")) tfg = MCLG_TF_ARCMINLT ; else if (!strcmp(key, "#arcdiffgq")) tfg = MCLG_TF_ARCDIFFGQ ; else if (!strcmp(key, "#arcdiffgt")) tfg = MCLG_TF_ARCDIFFGT ; else if (!strcmp(key, "#arcdifflq")) tfg = MCLG_TF_ARCDIFFLQ ; else if (!strcmp(key, "#arcdifflt")) tfg = MCLG_TF_ARCDIFFLT ; else if (!strcmp(key, "#arcmaxgq")) tfg = MCLG_TF_ARCMAXGQ ; else if (!strcmp(key, "#arcmaxgt")) tfg = MCLG_TF_ARCMAXGT ; else if (!strcmp(key, "#arcmaxlq")) tfg = MCLG_TF_ARCMAXLQ ; else if (!strcmp(key, "#arcmaxlt")) tfg = MCLG_TF_ARCMAXLT ; else if (!strcmp(key, "#selfrm")) tfg = MCLG_TF_SELFRM ; else if (!strcmp(key, "#selfmax")) tfg = MCLG_TF_SELFMAX ; else if (!strcmp(key, "#normself")) tfg = MCLG_TF_NORMSELF ; else if (!strcmp(key, "#add")) tfg = MCLG_TF_ADD ; else if (!strcmp(key, "#max")) tfg = MCLG_TF_MAX ; else if (!strcmp(key, "#min")) tfg = MCLG_TF_MIN ; else if (!strcmp(key, "#mul")) tfg = MCLG_TF_MUL ; else if (!strcmp(key, "#tug")) tfg = MCLG_TF_TUG ; else if (!strcmp(key, "#ssq")) tfg = MCLG_TF_SSQ ; else if (!strcmp(key, "#qt")) tfg = MCLG_TF_QT ; else if (!strcmp(key, "#tp") || !strcmp(key, "#rev")) tfg = MCLG_TF_TRANSPOSE ; else if (!strcmp(key, "#step")) tfg = MCLG_TF_STEP ; else if (!strcmp(key, "#thread")) tfg = MCLG_TF_THREAD ; else if (!strcmp(key, "#shrug")) tfg = MCLG_TF_SHRUG ; else if (!strcmp(key, "#shuffle")) tfg = MCLG_TF_SHUFFLE ; } else { if (!strcmp(key, "gq")) tfe = MCLX_UNARY_GQ ; else if (!strcmp(key, "gt")) tfe = MCLX_UNARY_GT ; else if (!strcmp(key, "lt")) tfe = MCLX_UNARY_LT ; else if (!strcmp(key, "lq")) tfe = MCLX_UNARY_LQ ; else if (!strcmp(key, "rand")) tfe = MCLX_UNARY_RAND ; else if (!strcmp(key, "mul")) tfe = MCLX_UNARY_MUL ; else if (!strcmp(key, "scale")) tfe = MCLX_UNARY_SCALE ; else if (!strcmp(key, "add")) tfe = MCLX_UNARY_ADD ; else if (!strcmp(key, "abs")) tfe = MCLX_UNARY_ABS ; else if (!strcmp(key, "ceil")) tfe = MCLX_UNARY_CEIL ; else if (!strcmp(key, "floor")) tfe = MCLX_UNARY_FLOOR ; else if (!strcmp(key, "pow")) tfe = MCLX_UNARY_POW ; else if (!strcmp(key, "exp")) tfe = MCLX_UNARY_EXP ; else if (!strcmp(key, "log")) tfe = MCLX_UNARY_LOG ; else if (!strcmp(key, "neglog")) tfe = MCLX_UNARY_NEGLOG ; } if (tfe < 0 && tfg < 0) { mcxErr(me, "unknown value transform <%s>", key) ; break ; } if (tfe >= 0) { if (nought) { if ( tfe == MCLX_UNARY_LOG || tfe == MCLX_UNARY_ABS || tfe == MCLX_UNARY_EXP || tfe == MCLX_UNARY_NEGLOG ) d = 0.0 ; else { mcxErr(me, "transform <%s> needs value", key) ; break ; } ; } mclpARextend(gtf->par_edge, tfe, d) ; } else if (tfg >= 0) { if (nought) { if ( tfg >= MCLG_TF_DUMMY_NOVALUE_START && tfg <= MCLG_TF_DUMMY_NOVALUE_END ) d = 0.0 ; else if (tfg == MCLG_TF_TUG || tfg == MCLG_TF_SHRUG) d = 1000.0 ; else if (tfg == MCLG_TF_STEP) d = 2.0 ; else { mcxErr(me, "transform <%s> needs value", key) ; break ; } ; } mclpARextend(gtf->par_edge, MCLX_UNARY_UNUSED, 0.0) ; mclpARextend(gtf->par_graph, tfg, d) ; } a = mcxStrChrAint(a, isspace, z-a) ; if (!a || a[0] != ',') break ; a++ ; } if (a) { mcxErr(me, "trailing part <%s> not matched", a) ; mclpARfree(&(gtf->par_edge)) ; mcxFree(gtf) ; gtf = NULL ; } return gtf ; }