Пример #1
0
mclVector*  mcxAttractivityScale
(  const mclMatrix*           M
)
{   dim n_cols = N_COLS(M)
                 ;
    dim d

    ;
    mclVector* vec_values = mclvResize(NULL, n_cols)

                            ;
    for (d=0; d<n_cols; d++)
    {   mclVector*  vec   =  M->cols+d
                             ;
        double   selfval  =  mclvIdxVal(vec, d, NULL)
                             ;
        double   maxval   =  mclvMaxValue(vec)
                             ;
        if (maxval <= 0.0)
        {   mcxErr
            (  "mcxAttractivityScale"
               ,  "encountered nonpositive maximum value"
            )
            ;
            maxval = 1.0
                     ;
        }
        (vec_values->ivps+d)->idx = d
                                    ;
        (vec_values->ivps+d)->val = selfval / maxval
                                    ;
    }
    return vec_values
           ;
}
Пример #2
0
long mclvUnaryList
(  mclv*    vec
,  mclpAR*  ar       /* idx: MCLX_UNARY_mode, val: arg */
)
   {  dim      n_ivps
   ;  mclIvp   *src_ivp, *dst_ivp

   ;  n_ivps   =  vec->n_ivps
   ;  src_ivp  =  vec->ivps
   ;  dst_ivp  =  vec->ivps
   
   ;  while (n_ivps-- > 0)    /* careful with unsignedness */
      {  double val =  mclpUnary(src_ivp, ar)

      ;  if (val != 0.0)
         {  dst_ivp->idx =  src_ivp->idx
         ;  dst_ivp->val =  val
         ;  dst_ivp++
      ;  }
         src_ivp++
   ;  }

      mclvResize(vec, dst_ivp - vec->ivps)
   ;  return vec->n_ivps
;  }
Пример #3
0
mclVector* mclvCanonicalExtend
(  mclv*       dst
,  dim         N
,  double      val
)
   {  dim j, N_old
   ;  ofs idx
   ;  if (!dst)
      return mclvCanonical(NULL, N, val)

   ;  N_old = dst->n_ivps
   ;  if (N < N_old)          /* fixme: err? */
      return dst

   ;  if (N_old)
      {  idx = dst->ivps[N_old-1].idx+1
      ;  if ((dim) idx != N_old)
         mcxErr("mclvCanonicalExtend", "argument not canonical (proceeding)")
   ;  }
      else
      idx = 0

   ;  mclvResize(dst, N)
   ;  for (j=N_old; j<N; j++)
         dst->ivps[j].idx = idx++ 
      ,  dst->ivps[j].val = val
   ;  return dst
;  }
Пример #4
0
mclMatrix* mclDiagOrdering
(  const mclMatrix*     M
,  mclVector**          vecp_attr
)
   {  int         n_cols      =  N_COLS(M)
   ;  mclMatrix*  diago       =  mclxAllocZero(NULL, NULL)
   ;  long        col

   ;  if (*vecp_attr != NULL)
      mclvFree(vecp_attr)

   ;  *vecp_attr = mclvResize(NULL, n_cols)

   ;  for (col=0;col<n_cols;col++)
      {  ofs      offset      =  -1
      ;  double   selfval     =  mclvIdxVal(M->cols+col, col, &offset)
      ;  double   center      =  mclvPowSum(M->cols+col, 2.0)
     /*  double   maxval      =  mclvMaxValue(M->cols+col)
      */
      ;  double   bar         =  MCX_MAX(center, selfval) - dpsd_delta
      ;  mclIvp*  ivp         =  (*vecp_attr)->ivps+col

      ;  ivp->idx             =  col
      ;  ivp->val             =  center ? selfval / center : 0

      ;  if (offset >= 0)                 /* take only higher valued entries */
         mclvSelectGqBar(diago->cols+col, bar)
   ;  }
   ;  return diago
;  }
Пример #5
0
static void prune_el_on_cl
(  mclMatrix* el_to_cl  /* must be conforming */
,  mclMatrix* el_on_cl  /* this one will be pruned */
,  double pct
,  int max
)
   {  dim i
   ;  for (i=0;i<N_COLS(el_on_cl);i++)
      {  mclv*  elclvec =  el_on_cl->cols+i
      ;  long   clid    =  el_to_cl->cols[i].ivps[0].idx
      ;  double sum     =  0.0
      ;  int n_others   =  0
      ;  dim k          =  0
      ;  mcxbool selfok =  FALSE
      ;  mclvSort(elclvec, mclpValRevCmp)

      ;  while (k++ < elclvec->n_ivps && sum < pct && n_others < max)
         {  long y = elclvec->ivps[k-1].idx
         ;  if (y == clid)
            selfok = TRUE
         ;  sum += elclvec->ivps[k-1].val
         ;  n_others++
      ;  }

         mclvResize(elclvec, k-1)        /* careful recentchange */
      ;  mclvSort(elclvec, mclpIdxCmp)
      ;  if (!selfok)
         mclvInsertIdx(elclvec, clid, 0.01)
   ;  }
   }
Пример #6
0
CAMLprim value caml_mcl(value inflation, value arr)
{
    CAMLparam2(inflation, arr);
    int i, cols = Wosize_val(arr);
    mclv *domc = mclvCanonical(NULL, cols, 1.0);
    mclv *domr = mclvCanonical(NULL, cols, 1.0);
    mclx *res_mat, *mx = mclxAllocZero(domc, domr);
    mclAlgParam *mlp;
    value res;

    for (i = 0; i < cols; ++i) {
        value col = Field(arr, i);
        int j, rows = Wosize_val(col);
        mclv *col_vec = &mx->cols[i];
        if (!cols)
            continue;

        mclvResize(col_vec, rows);
        for (j = 0; j < rows; ++j) {
            value t = Field(col, j);
            col_vec->ivps[j].idx = Int_val(Field(t, 0));
            col_vec->ivps[j].val = Double_val(Field(t, 1));
        }
    }


    mclAlgInterface(&mlp, NULL, 0, NULL, mx, 0);

    /* Optionally set inflation */
    if (inflation != Val_none) {
        mlp->mpp->mainInflation = Double_val(Some_val(inflation));
    }

    mclAlgorithm(mlp);

    res_mat = mlp->cl_result;
    cols = res_mat->dom_cols->n_ivps;
    res = caml_alloc(cols, 0);
    for (i = 0; i < cols; ++i) {
        mclv *col_vec = &res_mat->cols[i];
        int j, rows = col_vec->n_ivps;
        value row = caml_alloc(rows, 0);
        for (j = 0; j < rows; ++j) {
            Store_field(row, j, Val_int(col_vec->ivps[j].idx));
        }
        Store_field(res, i, row);
    }

    mclAlgParamFree(&mlp, TRUE);

    CAMLreturn(res);
}
Пример #7
0
void mclvRemoveIdx
(  mclVector*  vec
,  long        idx
)  
   {  ofs offset = mclvGetIvpOffset(vec, idx, -1)
                     /* check for nonnull vector is done in mclvIdxVal */
   ;  if (offset >= 0)
      {  memmove
         (  vec->ivps + offset
         ,  vec->ivps + offset + 1
         ,  (vec->n_ivps - offset - 1) * sizeof(mclIvp)
         )
      ;  mclvResize(vec, vec->n_ivps - 1)
   ;  }
   }
Пример #8
0
mclVector* mclvCanonical
(  mclVector* dst
,  dim        nr
,  double     val
)  
   {  mclIvp* ivp
   ;  dim d  =  0
   ;  dst  =  mclvResize(dst, nr) 

   ;  ivp = dst->ivps

   ;  while (ivp < dst->ivps+dst->n_ivps)
      {  ivp->idx =  d++
      ;  (ivp++)->val =  val
   ;  }
      return dst
;  }
Пример #9
0
double mclvInflate
(  mclVector*  vec
,  double      power
)  
   {  mclIvp*  vecivps
   ;  dim      vecsize
   ;  double   powsum   =  0.0

   ;  if (!vec->n_ivps)
      return 0.0

   ;  vecivps  =  vec->ivps
   ;  vecsize  =  vec->n_ivps

   ;  while (vecsize-- > 0)
      {  (vecivps)->val = pow((double) (vecivps)->val, power)
      ;  powsum += (vecivps++)->val
   ;  }

     /* fixme static interface */
      if (powsum <= 0.0)
      {  mcxErr
         (  "mclvInflate"
         ,  "warning: nonpositive sum <%f> for vector %ld"
         ,  (double) powsum
         ,  (long) vec->vid
         )
      ;  mclvResize(vec, 0)
      ;  return 0.0
   ;  }

      vecivps = vec->ivps
   ;  vecsize = vec->n_ivps
   ;  while (vecsize-- > 0)
      (vecivps++)->val /= powsum

   ;  return pow((double) powsum, power > 1.0 ? 1/(power-1) : 1.0)
;  }
Пример #10
0
mclVector* mclvCanonicalEmbed
(  mclv*       dst
,  const mclv* src
,  dim         nr
,  double      val
)  
   {  mclIvp* ivp
   ;  dim d =  0
   ;  mclv* src_clone = NULL

   ;  if (dst == src)
         src_clone = mclvClone(src)
      ,  src = src_clone

   ;  dst = mclvResize(dst, nr) 

                           /* set everything to val */
   ;  ivp = dst->ivps
   ;  while (ivp < dst->ivps+dst->n_ivps)
      {  ivp->idx = d++
      ;  (ivp++)->val =  val
   ;  }

                           /* insert src values */
                           /* fixme: use better implementation,
                            * preferably with a callback
                           */
      ivp = dst->ivps
   ;  for (d=0;d<src->n_ivps;d++)
      {  ivp = mclvGetIvp(dst, src->ivps[d].idx, ivp)
      ;  if (ivp)
         ivp->val = src->ivps[d].val
   ;  }

      if (src_clone)
      mclvFree(&src_clone)
   ;  return dst
;  }
Пример #11
0
double mclvSelectGqBar
(  mclVector* vec
,  double     fbar
)
   {  mclIvp *writeivp, *readivp, *maxivp
   ;  double mass = 0.0      

   ;  writeivp =  vec->ivps
   ;  readivp  =  vec->ivps
   ;  maxivp   =  vec->ivps+vec->n_ivps

   ;  while (readivp < maxivp)
      {  if (readivp->val >= fbar)
         {  mass += readivp->val
         ;  *writeivp = *readivp
         ;  writeivp++
      ;  }
         readivp++
   ;  }

      mclvResize(vec, writeivp - (vec->ivps))
   ;  return mass
;  }
Пример #12
0
void mclvUnary
(  mclVector*  vec
,  double     (*operation)(pval val, void* arg)
,  void*       arg
)  
   {  dim      n_ivps
   ;  mclIvp   *src_ivp, *dst_ivp

   ;  n_ivps   =  vec->n_ivps
   ;  src_ivp  =  vec->ivps
   ;  dst_ivp  =  vec->ivps
   
   ;  while (n_ivps-- > 0)    /* careful with unsignedness */
      {  double val =  operation(src_ivp->val, arg)

      ;  if (val != 0.0)
         {  dst_ivp->idx =  src_ivp->idx
         ;  dst_ivp->val =  val
         ;  dst_ivp++
      ;  }
         src_ivp++
   ;  }
      mclvResize(vec, dst_ivp - vec->ivps)
;  }
Пример #13
0
mclVector* mclvCopyGiven
(  mclVector*     dst
,  mclVector*     src
,  mcxbool        (*operation)(mclIvp* ivp, void* arg)
,  void*          arg
,  dim            sup
)  
   {  dim         n_src
   ;  mclIvp      *src_ivp, *dst_ivp

                        /* dst allowed to be NULL */
   ;  if (dst != src)
      dst = mclvInstantiate(dst, sup ? sup : src->n_ivps, NULL)
   ; /*
      * else we must not destroy src before it is copied
     */

      n_src       =  src->n_ivps
   ;  src_ivp     =  src->ivps
   ;  dst_ivp     =  dst->ivps

                        /* BEWARE: this routine must work if dst==src */
                        /* n_src--: careful with unsignedness */

   ;  while (n_src-- > 0 && dst_ivp < dst->ivps + dst->n_ivps)
      {  if (operation(src_ivp, arg))
         {  dst_ivp->idx =  src_ivp->idx
         ;  dst_ivp->val =  src_ivp->val
         ;  dst_ivp++
      ;  }
         src_ivp++
   ;  }

      mclvResize(dst, dst_ivp - dst->ivps)
   ;  return dst
;  }
Пример #14
0
mclVector* mclvInsertIdx
(  mclVector*  vec
,  long        idx
,  double      val
)  
   {  ofs offset
   
   ;  if (!vec)
      {  vec = mclvInstantiate(NULL, 1, NULL)
      ;  mclpInstantiate(vec->ivps+0, idx, val)
   ;  }
      else if ((offset =  mclvGetIvpOffset(vec, idx, -1)) >= 0)
      vec->ivps[offset].val = val
   ;  else
      {  dim d = vec->n_ivps
      ;  mclvResize(vec, d+1)
      ;  while (d && vec->ivps[d-1].idx > idx)
            vec->ivps[d] = vec->ivps[d-1]
         ,  d--
      ;  vec->ivps[d].val = val
      ;  vec->ivps[d].idx = idx
   ;  }
      return vec
;  }
Пример #15
0
static void vary_threshold
(  mcxIO* xf
,  FILE*  fp
,  int vary_a
,  int vary_z
,  int vary_s
,  int vary_n
,  unsigned mode
)
   {  dim cor_i = 0, j
   ;  int step

   ;  mclx* mx
   ;  unsigned long noe
   ;  pval*  allvals
   ;  dim  n_allvals = 0
   ;  double sum_vals = 0.0

   ;  mx = mclxRead(xf, EXIT_ON_FAIL)
   ;  mcxIOclose(xf)

   ;  if (transform)
      mclgTFexec(mx, transform)

   ;  noe = mclxNrofEntries(mx)
   ;  allvals = mcxAlloc(noe * sizeof allvals[0], EXIT_ON_FAIL)

   ;  if (!weight_scale)
      {  if (mode == 'c')
         weight_scale = 1.0
      ;  else
         weight_scale = vary_n
   ;  }

      n_allvals = get_n_sort_allvals(mx, allvals, noe, &sum_vals, FALSE)

   ;  if (mode == 'c')
      {  double smallest = n_allvals ? allvals[n_allvals-1] : -DBL_MAX
      ;  if (vary_a * 1.0 / vary_n < smallest)
         {  while (vary_a * 1.0 / vary_n < smallest)
            vary_a++
         ;  vary_a--
      ;  }
         mcxTell
         (  me
         ,  "smallest correlation is %.2f, using starting point %.2f"
         ,  smallest
         ,  vary_a * 1.0 / vary_n
         )
   ;  }

      if (output_flags & OUTPUT_TABLE)
      {
;fprintf(fp, "L\tD\tR\tS\tcce\tEWmean\tEWmed\tEWiqr\tNDmean\tNDmed\tNDiqr\tCCF\t%s\n", mode == 'k' ? "kNN" : mode == 'l' ? "N" : "Cutoff")
;}    else
      {  if (output_flags & OUTPUT_KEY)
 {
;fprintf(fp, "-------------------------------------------------------------------------------\n")
;fprintf(fp, " L       Percentage of nodes in the largest component\n")
;fprintf(fp, " D       Percentage of nodes in components of size at most %d [-div option]\n", (int) divide_g)
;fprintf(fp, " R       Percentage of nodes not in L or D: 100 - L -D\n")
;fprintf(fp, " S       Percentage of nodes that are singletons\n")
;fprintf(fp, " cce     Expected size of component, nodewise [ sum(sz^2) / sum^2(sz) ]\n")
;fprintf(fp, "*EW      Edge weight traits (mean, median and IQR, all scaled!)\n")
;fprintf(fp, "            Scaling is used to avoid printing of fractional parts throughout.\n")
;fprintf(fp, "            The scaling factor is %.2f [-report-scale option]\n", weight_scale)
;fprintf(fp, " ND      Node degree traits [mean, median and IQR]\n")
;fprintf(fp, " CCF     Clustering coefficient %s\n", compute_flags & COMPUTE_CLCF ? "(not computed; use --clcf to include this)" : "")
;fprintf(fp, " eff     Induced component efficiency %s\n", compute_flags & COMPUTE_EFF ? "(not computed; use --eff to include this)" : "")

;if (mode == 'c')
 fprintf(fp, "Cutoff   The threshold used.\n")
;else if (mode == 't')
 fprintf(fp, "*Cutoff  The threshold with scale factor %.2f and fractional parts removed\n", weight_scale)
;else if (mode == 'k')
 fprintf(fp, "k-NN     The knn parameter\n")
;else if (mode == 'l')
 fprintf(fp, "N        The knn parameter (merge mode)\n")
;else if (mode == 'n')
 fprintf(fp, "ceil     The ceil parameter\n")
;fprintf(fp, "Total number of nodes: %lu\n", (ulong) N_COLS(mx))
;}
 fprintf(fp, "-------------------------------------------------------------------------------\n")
;fprintf(fp, "  L   D   R   S     cce *EWmean  *EWmed *EWiqr NDmean  NDmed  NDiqr CCF  eff %6s \n", mode == 'k' ? "k-NN" : mode == 'l' ? "N" : mode == 'n' ? "Ceil" : "Cutoff")
;fprintf(fp, "-------------------------------------------------------------------------------\n")
;     }

      for (step = vary_a; step <= vary_z; step += vary_s)
      {  double cutoff = step * 1.0 / vary_n
      ;  double eff = -1.0
      ;  mclv* nnodes = mclvCanonical(NULL, N_COLS(mx), 0.0)
      ;  mclv* degree = mclvCanonical(NULL, N_COLS(mx), 0.0)
      ;  dim i, n_sample = 0
      ;  double cor, y_prev, iqr = 0.0
      ;  mclx* cc = NULL, *res = NULL
      ;  mclv* sz, *ccsz = NULL
      ;  int step2 = vary_z + vary_a - step

      ;  sum_vals = 0.0
      
      ;  if (mode == 't' || mode == 'c')
            mclxSelectValues(mx, &cutoff, NULL, MCLX_EQT_GQ)
         ,  res = mx
      ;  else if (mode == 'k')
         {  res = rebase_g ? mclxCopy(mx) : mx
         ;  mclxKNNdispatch(res, step2, n_thread_l, 1)
      ;  }
         else if (mode == 'l')
         {  res = mx
         ;  mclxKNNdispatch(res, step2, n_thread_l, 0)
      ;  }
         else if (mode == 'n')
         {  res = rebase_g ? mclxCopy(mx) : mx
         ;  mclv* cv = mclgCeilNB(res, step2, NULL, NULL, NULL)
         ;  mclvFree(&cv)
      ;  }

         sz = mclxColSizes(res, MCL_VECTOR_COMPLETE)
      ;  mclvSortDescVal(sz)

      ;  cc = clmUGraphComponents(res, NULL)     /* fixme: user has to specify -tf '#max()' if graph is directed */
      ;  if (cc)
         {  ccsz = mclxColSizes(cc, MCL_VECTOR_COMPLETE)
         ;  if (compute_flags & COMPUTE_EFF)
            {  clmPerformanceTable pftable
            ;  clmPerformance(mx, cc, &pftable)
            ;  eff = pftable.efficiency
         ;  }
         }

         if (mode == 't' || mode == 'c')
         {  for
            (
            ;  n_allvals > 0 && allvals[n_allvals-1] < cutoff
            ;  n_allvals--
            )
         ;  sum_vals = 0.0
         ;  for (i=0;i<n_allvals;i++)
            sum_vals += allvals[i]
      ;  }
         else if (mode == 'k' || mode == 'n' || mode == 'l')
         {  n_allvals = get_n_sort_allvals(res, allvals, noe, &sum_vals, FALSE)
      ;  }

         levels[cor_i].sim_median=  mcxMedian(allvals, n_allvals, sizeof allvals[0], pval_get_double, &iqr)
      ;  levels[cor_i].sim_iqr   =  iqr
      ;  levels[cor_i].sim_mean  =  n_allvals ? sum_vals / n_allvals : 0.0

      ;  levels[cor_i].nb_median =  mcxMedian(sz->ivps, sz->n_ivps, sizeof sz->ivps[0], ivp_get_double, &iqr)
      ;  levels[cor_i].nb_iqr    =  iqr
      ;  levels[cor_i].nb_mean   =  mclvSum(sz) / N_COLS(res)
      ;  levels[cor_i].cc_exp    =  cc ? mclvPowSum(ccsz, 2.0) / N_COLS(res) : 0
      ;  levels[cor_i].nb_sum    =  mclxNrofEntries(res)

      ;  if (compute_flags & COMPUTE_CLCF)
         {  mclv* clcf = mclgCLCFdispatch(res, n_thread_l)
         ;  levels[cor_i].clcf   =  mclvSum(clcf) / N_COLS(mx)
         ;  mclvFree(&clcf)
      ;  }
         else
         levels[cor_i].clcf = 0.0

      ;  levels[cor_i].threshold =  mode == 'k' || mode == 'l' || mode == 'n' ? step2 : cutoff
      ;  levels[cor_i].bigsize   =  cc ? cc->cols[0].n_ivps : 0
      ;  levels[cor_i].n_single  =  0
      ;  levels[cor_i].n_edge    =  n_allvals
      ;  levels[cor_i].n_lq      =  0

      ;  if (cc)
         for (i=0;i<N_COLS(cc);i++)
         {  dim n = cc->cols[N_COLS(cc)-1-i].n_ivps
         ;  if (n == 1)
            levels[cor_i].n_single++
         ;  if (n <= divide_g)
            levels[cor_i].n_lq += n
         ;  else
            break
      ;  }

         if (levels[cor_i].bigsize <= divide_g)
         levels[cor_i].bigsize = 0

      ;  y_prev = sz->ivps[0].val

                  /* wiki says:
                     A scale-free network is a network whose degree distribution follows a power
                     law, at least asymptotically. That is, the fraction P(k) of nodes in the
                     network having k connections to other nodes goes for large values of k as P(k)
                     ~ k^−g where g is a constant whose value is typically in the range 2<g<3,
                     although occasionally it may lie outside these bounds.
                 */
      ;  for (i=1;i<sz->n_ivps;i++)
         {  double y = sz->ivps[i].val
         ;  if (y > y_prev - 0.5)
            continue                                              /* same as node degree seen last */
         ;  nnodes->ivps[n_sample].val = log( (i*1.0) / (1.0*N_COLS(res)))    /* x = #nodes >= k, as fraction   */
         ;  degree->ivps[n_sample].val = log(y_prev ? y_prev : 1)            /* y = k = degree of node         */
         ;  n_sample++
;if(0)fprintf(stderr, "k=%.0f\tn=%d\t%.3f\t%.3f\n", (double) y_prev, (int) i, (double) nnodes->ivps[n_sample-1].val, (double) degree->ivps[n_sample-1].val)
         ;  y_prev = y
      ;  }
         nnodes->ivps[n_sample].val = 0
      ;  nnodes->ivps[n_sample++].val = log(y_prev ? y_prev : 1)
;if(0){fprintf(stderr, "k=%.0f\tn=%d\t%.3f\t%.3f\n", (double) sz->ivps[sz->n_ivps-1].val, (int) N_COLS(res), (double) nnodes->ivps[n_sample-1].val, (double) degree->ivps[n_sample-1].val)
;}

      ;  mclvResize(nnodes, n_sample)
      ;  mclvResize(degree, n_sample)
      ;  cor = pearson(nnodes, degree, n_sample)

      ;  levels[cor_i].degree_cor =  cor * cor

;if(0)fprintf(stdout, "cor at cutoff %.2f %.3f\n\n", cutoff, levels[cor_i-1].degree_cor)
      ;  mclvFree(&nnodes)
      ;  mclvFree(&degree)
      ;  mclvFree(&sz)
      ;  mclvFree(&ccsz)
      ;  mclxFree(&cc)

;  if(output_flags & OUTPUT_TABLE)
   {  fprintf
      (  fp
      ,  "%lu\t%lu\t%lu\t%lu\t%lu"
         "\t%6g\t%6g\t%6g"
         "\t%6g\t%lu\t%6g"

      ,  (ulong) levels[cor_i].bigsize
      ,  (ulong) levels[cor_i].n_lq
      ,  (ulong) N_COLS(mx) - levels[cor_i].bigsize - levels[cor_i].n_lq
      ,  (ulong) levels[cor_i].n_single
      ,  (ulong) levels[cor_i].cc_exp

      ,  (double) levels[cor_i].sim_mean
      ,  (double) levels[cor_i].sim_median
      ,  (double) levels[cor_i].sim_iqr

      ,  (double) levels[cor_i].nb_mean
      ,  (ulong) levels[cor_i].nb_median
      ,  (double) levels[cor_i].nb_iqr
      )

   ;  if (compute_flags & COMPUTE_CLCF) fprintf(fp, "\t%6g", levels[cor_i].clcf)   ;  else fputs("\tNA", fp)
   ;  if (eff >= 0.0) fprintf(fp, "\t%4g", eff)              ;  else fputs("\tNA", fp)

   ;  fprintf(fp, "\t%6g", (double) levels[cor_i].threshold)
   ;  fputc('\n', fp)
;  }
   else
   {  fprintf
      (  fp
      ,  "%3d %3d %3d %3d %7d "
         "%7.0f %7.0f %6.0f"
         "%6.1f %6.0f %6.0f"

      ,  0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].bigsize) / N_COLS(mx))
      ,  0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].n_lq) / N_COLS(mx))
      ,  0 ? 1 : (int) (0.5 + (100.0 * (N_COLS(mx) - levels[cor_i].bigsize - levels[cor_i].n_lq)) / N_COLS(mx))
      ,  0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].n_single) / N_COLS(mx))
      ,  0 ? 1 : (int) (0.5 + levels[cor_i].cc_exp)

      ,  0 ? 1.0 : (double) (levels[cor_i].sim_mean   * weight_scale)
      ,  0 ? 1.0 : (double) (levels[cor_i].sim_median * weight_scale)
      ,  0 ? 1.0 : (double) (levels[cor_i].sim_iqr    * weight_scale)

      ,  0 ? 1.0 : (double) (levels[cor_i].nb_mean                 )
      ,  0 ? 1.0 : (double) (levels[cor_i].nb_median + 0.5         )
      ,  0 ? 1.0 : (double) (levels[cor_i].nb_iqr + 0.5            )
      )

   ;  if (compute_flags & COMPUTE_CLCF)
      fprintf(fp, " %3d", 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].clcf)))
   ;  else
      fputs("   -", fp)

   ;  if (eff >= 0.0)
      fprintf(fp, "  %3d", (int) (0.5 + 1000 * eff))
   ;  else
      fputs("    -", fp)

   ;  if (mode == 'c')
      fprintf(fp, "%8.2f\n", (double) levels[cor_i].threshold)
   ;  else if (mode == 't')
      fprintf(fp, "%8.0f\n", (double) levels[cor_i].threshold  * weight_scale)
   ;  else if (mode == 'k' || mode == 'n' || mode == 'l')
      fprintf(fp, "%8.0f\n", (double) levels[cor_i].threshold)
 ; }

      ;  cor_i++
      ;  if (res != mx)
         mclxFree(&res)
   ;  }

   if (!(output_flags & OUTPUT_TABLE))
   {  if (weefreemen)
      {
fprintf(fp, "-------------------------------------------------------------------------------\n")
;fprintf(fp, "The graph below plots the R^2 squared value for the fit of a log-log plot of\n")
;fprintf(fp, "<node degree k> versus <#nodes with degree >= k>, for the network resulting\n")
;fprintf(fp, "from applying a particular %s cutoff.\n", mode == 'c' ? "correlation" : "similarity")
;fprintf(fp, "-------------------------------------------------------------------------------\n")
   ;  for (j=0;j<cor_i;j++)
      {  dim jj
      ;  for (jj=30;jj<=100;jj++)
         {  char c = ' '
         ;  if (jj * 0.01 < levels[j].degree_cor && (jj+1.0) * 0.01 > levels[j].degree_cor)
            c = 'X'
         ;  else if (jj % 5 == 0)
            c = '|'
         ;  fputc(c, fp)
      ;  }
         if (mode == 'c')
         fprintf(fp, "%8.2f\n", (double) levels[j].threshold)
      ;  else
         fprintf(fp, "%8.0f\n", (double) levels[j].threshold * weight_scale)
   ;  }

 fprintf(fp, "|----+----|----+----|----+----|----+----|----+----|----+----|----+----|--------\n")
;fprintf(fp, "| R^2   0.4       0.5       0.6       0.7       0.8       0.9    |  1.0    -o)\n")
;fprintf(fp, "+----+----+----+----+----+---------+----+----+----+----+----+----+----+    /\\\\\n")
;fprintf(fp, "| 2 4 6 8   2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 |   _\\_/\n")
;fprintf(fp, "+----+----|----+----|----+----|----+----|----+----|----+----|----+----+--------\n")
;     }
      else
      fprintf(fp, "-------------------------------------------------------------------------------\n")
;  }

      mclxFree(&mx)
   ;  mcxFree(allvals)
;  }
Пример #16
0
static dim clm_clm_prune
(  mclx*    mx
,  mclx*    cl
,  dim      prune_sz
,  mclx**   cl_adjustedpp
,  dim*     n_sink
,  dim*     n_source
)
   {  dim d, n_adjusted = 0
   ;  mclx* cl_adj = mclxCopy(cl)
   ;  mclv* cid_affected = mclvClone(cl->dom_cols)
   ;  const char* me = "clmAssimilate"

   ;  double bar_affected = 1.5

   ;  mclx *el_to_cl = NULL
   ;  mclx *el_on_cl = NULL
   ;  mclx *cl_on_cl = NULL
   ;  mclx *cl_on_el = NULL

   ;  *n_sink = 0
   ;  *n_source = 0

   ;  mclvMakeConstant(cid_affected, 1.0)
   ;  mclxColumnsRealign(cl_adj, mclvSizeCmp)

   ;  *cl_adjustedpp = NULL

   ;  clmCastActors
      (&mx, &cl_adj, &el_to_cl, &el_on_cl, &cl_on_cl, &cl_on_el, 0.95)
   ;  mclxFree(&cl_on_el)

   ;  for (d=0;d<N_COLS(cl_on_cl);d++)
      {  mclv* clthis   =  cl_adj->cols+d
      ;  mclv* cllist   =  cl_on_cl->cols+d
      ;  mclp* pself    =  mclvGetIvp(cllist, clthis->vid, NULL)
      ;  double self_val = -1.0
      
      ;  if (pself)
            self_val = pself->val
         ,  pself->val *= 1.001  /* to push it up in case of equal weights */

;if(0)fprintf(stderr, "test size %d\n", (int) clthis->n_ivps)
      ;  if (prune_sz && clthis->n_ivps > prune_sz)
         continue

      ;  while (1)
         {  mclv* clthat
         ;  dim e
         ;  if (cllist->n_ivps < 2)
            break
         ;  mclvSort(cllist, mclpValRevCmp)

                     /* now get biggest mass provided that cluster
                      * ranks higher (has at least as many entries)
                      *
                      * fixme/todo: we probably have a slight order
                      * dependency for some fringe cases. If provable
                      * then either solve or document it.
                     */
         ;  for (e=0;e<cllist->n_ivps;e++)
            if (cllist->ivps[e].idx >= clthis->vid)
            break

                     /* found none or itself */
         ;  if (e == cllist->n_ivps || cllist->ivps[e].idx == clthis->vid)
            break

         ;  if       /* Should Not Happen */
            (!(clthat
            =  mclxGetVector(cl_adj, cllist->ivps[e].idx, RETURN_ON_FAIL, NULL)
            ) )
            break

                     /*    works for special case prune_sz == 0               */
                     /*    if (clthat->n_ivps + clthis->n_ivps > prune_sz)    */
                     /*    ^iced. inconsistent behaviour as k grows.          */
         ;  {  mcxLog
               (  MCX_LOG_LIST
               ,  me
               ,  "source %ld|%lu|%.3f absorbed by %ld|%lu|%.3f"
               ,  clthis->vid, (ulong) clthis->n_ivps, self_val
               ,  clthat->vid, (ulong) clthat->n_ivps, cllist->ivps[0].val
               )
            ;  n_adjusted += clthis->n_ivps
            ;  (*n_sink)++
                     /* note: we could from our precomputed cl_on_cl
                      * obtain that A is absorbed in B, B is absorbed in C.
                      * below we see that A will be merged with B,
                      * and the result will then be merged with C.
                      * This depends on the fact that cl_adj is ordered
                      * on increasing cluster size.
                     */
            ;  mcldMerge(cl_adj->cols+d, clthat, clthat)
            ;  mclvResize(cl_adj->cols+d, 0)
            ;  mclvInsertIdx(cid_affected, clthat->vid, 2.0)
         ;  }
            break
      ;  }
         mclvSort(cllist, mclpIdxCmp)
   ;  }

      mclxFree(&cl_on_cl)
   ;  mclxFree(&el_on_cl)
   ;  mclxFree(&el_to_cl)

   ;  mclxMakeCharacteristic(cl)

   ;  mclvUnary(cid_affected, fltxGT, &bar_affected)
   ;  *n_source = cid_affected->n_ivps
   ;  mclvFree(&cid_affected)

   ;  mclxColumnsRealign(cl_adj, mclvSizeRevCmp)

   ;  if (!n_adjusted)
      {  mclxFree(&cl_adj)
      ;  return 0
   ;  }

      mclxUnary(cl_adj, fltxCopy, NULL)
   ;  mclxMakeCharacteristic(cl_adj)   

   ;  *cl_adjustedpp  =  cl_adj
   ;  return n_adjusted
;  }