Exemplo n.º 1
0
int mclvSumCmp
(  const void* p1
,  const void* p2
)  
   {  double diff = mclvSum((mclVector*) p1) - mclvSum((mclVector*) p2)
   ;  return MCX_SIGN(diff)
;  }
Exemplo n.º 2
0
double pearson
(  const mclv* a
,  const mclv* b
,  dim n
)
   {  double suma = mclvSum(a)
   ;  double sumb = mclvSum(b)
   ;  double sumasq = mclvPowSum(a, 2.0)
   ;  double sumbsq = mclvPowSum(b, 2.0)

   ;  double nom = sqrt( (n*sumasq - suma*suma) * (n*sumbsq - sumb*sumb) )
   ;  double num = n * mclvIn(a, b) - suma * sumb
   ;  return nom ? num / nom : 0.0
;  }
Exemplo n.º 3
0
void mclvMean
(  const mclv*    vec
,  dim      N           /* vec does/might not store zeroes */
,  double   *meanp
,  double   *stdp
)
   {  dim d
   ;  double sum, mean, std = 0.0

   ;  *meanp = 0.0
   ;  *stdp  = 0.0

   ;  if (!N && !(N = vec->n_ivps))
      return
   ;  else if (N < vec->n_ivps)
      mcxErr("mclvMean PBD", "N < vec->n_ivps (vid %ld)", (long) vec->vid)

   ;  sum  = mclvSum(vec)
   ;  mean = sum/N

   ;  for (d=0;d<vec->n_ivps;d++)
      {  double dif = vec->ivps[d].val - mean
      ;  std += dif * dif
   ;  }

      if (N > vec->n_ivps)
      std += (N-vec->n_ivps) * mean * mean

   ;  *stdp = sqrt(std/N)
   ;  *meanp = mean
;  }
Exemplo n.º 4
0
double mclvNormalize
(  mclVector*  vec
)  
   {  dim      vecsize  = vec->n_ivps
   ;  mclIvp*  vecivps  = vec->ivps
   ;  double   sum      = mclvSum(vec)

   ;  vec->val =  sum

   ;  if (vec->n_ivps && sum == 0.0)
      {  mcxErr
         (  "mclvNormalize"
         ,  "warning: zero sum <%f> for vector <%ld>"
         ,  (double) sum
         ,  (long) vec->vid
         )
      ;  return 0.0
   ;  }
      else if (sum < 0.0)
      mcxErr("mclvNormalize", "warning: negative sum <%f>", (double) sum)

   ;  while (vecsize-- > 0)      /* careful with unsignedness */
      (vecivps++)->val /= sum
   ;  return sum
;  }
Exemplo n.º 5
0
dim get_n_sort_allvals
(  const mclx* mx
,  pval* allvals
,  dim noe
,  double* sum_vals
,  mcxbool asc
)
   {  dim n_allvals = 0
   ;  double s = 0.0
   ;  dim j
   ;  for (j=0;j<N_COLS(mx);j++)
      {  mclv* vec = mx->cols+j
      ;  dim k
      ;  if (n_allvals + vec->n_ivps > noe)
         mcxDie(1, me, "panic/impossible: not enough memory")
      ;  for (k=0;k<vec->n_ivps;k++)
         allvals[n_allvals+k] = vec->ivps[k].val
      ;  n_allvals += vec->n_ivps
      ;  s += mclvSum(vec)
   ;  }
      if (asc)
      qsort(allvals, n_allvals, sizeof(pval), valcmpasc)
   ;  else
      qsort(allvals, n_allvals, sizeof(pval), valcmpdesc)
   ;  sum_vals[0] = s
   ;  return n_allvals
;  }
Exemplo n.º 6
0
mclv* reduce_v
(  const mclv* u
)
   {  mclv* v = mclvClone(u)
   ;  dim n = v->n_ivps
   ;  double s = mclvSum(v)
   ;  double sq = mclvPowSum(v, 2.0)
   ;  if (s)
      mclvSelectGqBar(v, 0.25 * sq / s)
;fprintf(stderr, "from %d to %d entries\n", (int) n, (int) v->n_ivps)
   ;  return v
;  }
Exemplo n.º 7
0
static void tf_ssq
(  mclx* mx
,  double val
)
   {  dim i
   ;  for (i=0;i<N_COLS(mx);i++)
      {  mclv* v = mx->cols+i
      ;  double ssq = mclvPowSum(v, 2.0)
      ;  double sum = mclvSum(v)
      ;  double self = mclvSelf(v)
      ;  if (sum-self)
         mclvSelectGtBar(v, val * (ssq - self*self) / (sum - self))
   ;  }
;  }
Exemplo n.º 8
0
double mclvSelectValues
(  mclv        *src
,  double      *lft
,  double      *rgt
,  mcxbits     equate
,  mclv        *dst
)
   {  mclpVRange range
   ;  range.lft = lft
   ;  range.rgt = rgt
   ;  range.equate = equate

   ;  mclvCopyGiven(src, dst, mclpSelectValues, &range, 0)
   ;  return mclvSum(dst)
;  }
Exemplo n.º 9
0
static void vary_threshold
(  mcxIO* xf
,  FILE*  fp
,  int vary_a
,  int vary_z
,  int vary_s
,  int vary_n
,  unsigned mode
)
   {  dim cor_i = 0, j
   ;  int step

   ;  mclx* mx
   ;  unsigned long noe
   ;  pval*  allvals
   ;  dim  n_allvals = 0
   ;  double sum_vals = 0.0

   ;  mx = mclxRead(xf, EXIT_ON_FAIL)
   ;  mcxIOclose(xf)

   ;  if (transform)
      mclgTFexec(mx, transform)

   ;  noe = mclxNrofEntries(mx)
   ;  allvals = mcxAlloc(noe * sizeof allvals[0], EXIT_ON_FAIL)

   ;  if (!weight_scale)
      {  if (mode == 'c')
         weight_scale = 1.0
      ;  else
         weight_scale = vary_n
   ;  }

      n_allvals = get_n_sort_allvals(mx, allvals, noe, &sum_vals, FALSE)

   ;  if (mode == 'c')
      {  double smallest = n_allvals ? allvals[n_allvals-1] : -DBL_MAX
      ;  if (vary_a * 1.0 / vary_n < smallest)
         {  while (vary_a * 1.0 / vary_n < smallest)
            vary_a++
         ;  vary_a--
      ;  }
         mcxTell
         (  me
         ,  "smallest correlation is %.2f, using starting point %.2f"
         ,  smallest
         ,  vary_a * 1.0 / vary_n
         )
   ;  }

      if (output_flags & OUTPUT_TABLE)
      {
;fprintf(fp, "L\tD\tR\tS\tcce\tEWmean\tEWmed\tEWiqr\tNDmean\tNDmed\tNDiqr\tCCF\t%s\n", mode == 'k' ? "kNN" : mode == 'l' ? "N" : "Cutoff")
;}    else
      {  if (output_flags & OUTPUT_KEY)
 {
;fprintf(fp, "-------------------------------------------------------------------------------\n")
;fprintf(fp, " L       Percentage of nodes in the largest component\n")
;fprintf(fp, " D       Percentage of nodes in components of size at most %d [-div option]\n", (int) divide_g)
;fprintf(fp, " R       Percentage of nodes not in L or D: 100 - L -D\n")
;fprintf(fp, " S       Percentage of nodes that are singletons\n")
;fprintf(fp, " cce     Expected size of component, nodewise [ sum(sz^2) / sum^2(sz) ]\n")
;fprintf(fp, "*EW      Edge weight traits (mean, median and IQR, all scaled!)\n")
;fprintf(fp, "            Scaling is used to avoid printing of fractional parts throughout.\n")
;fprintf(fp, "            The scaling factor is %.2f [-report-scale option]\n", weight_scale)
;fprintf(fp, " ND      Node degree traits [mean, median and IQR]\n")
;fprintf(fp, " CCF     Clustering coefficient %s\n", compute_flags & COMPUTE_CLCF ? "(not computed; use --clcf to include this)" : "")
;fprintf(fp, " eff     Induced component efficiency %s\n", compute_flags & COMPUTE_EFF ? "(not computed; use --eff to include this)" : "")

;if (mode == 'c')
 fprintf(fp, "Cutoff   The threshold used.\n")
;else if (mode == 't')
 fprintf(fp, "*Cutoff  The threshold with scale factor %.2f and fractional parts removed\n", weight_scale)
;else if (mode == 'k')
 fprintf(fp, "k-NN     The knn parameter\n")
;else if (mode == 'l')
 fprintf(fp, "N        The knn parameter (merge mode)\n")
;else if (mode == 'n')
 fprintf(fp, "ceil     The ceil parameter\n")
;fprintf(fp, "Total number of nodes: %lu\n", (ulong) N_COLS(mx))
;}
 fprintf(fp, "-------------------------------------------------------------------------------\n")
;fprintf(fp, "  L   D   R   S     cce *EWmean  *EWmed *EWiqr NDmean  NDmed  NDiqr CCF  eff %6s \n", mode == 'k' ? "k-NN" : mode == 'l' ? "N" : mode == 'n' ? "Ceil" : "Cutoff")
;fprintf(fp, "-------------------------------------------------------------------------------\n")
;     }

      for (step = vary_a; step <= vary_z; step += vary_s)
      {  double cutoff = step * 1.0 / vary_n
      ;  double eff = -1.0
      ;  mclv* nnodes = mclvCanonical(NULL, N_COLS(mx), 0.0)
      ;  mclv* degree = mclvCanonical(NULL, N_COLS(mx), 0.0)
      ;  dim i, n_sample = 0
      ;  double cor, y_prev, iqr = 0.0
      ;  mclx* cc = NULL, *res = NULL
      ;  mclv* sz, *ccsz = NULL
      ;  int step2 = vary_z + vary_a - step

      ;  sum_vals = 0.0
      
      ;  if (mode == 't' || mode == 'c')
            mclxSelectValues(mx, &cutoff, NULL, MCLX_EQT_GQ)
         ,  res = mx
      ;  else if (mode == 'k')
         {  res = rebase_g ? mclxCopy(mx) : mx
         ;  mclxKNNdispatch(res, step2, n_thread_l, 1)
      ;  }
         else if (mode == 'l')
         {  res = mx
         ;  mclxKNNdispatch(res, step2, n_thread_l, 0)
      ;  }
         else if (mode == 'n')
         {  res = rebase_g ? mclxCopy(mx) : mx
         ;  mclv* cv = mclgCeilNB(res, step2, NULL, NULL, NULL)
         ;  mclvFree(&cv)
      ;  }

         sz = mclxColSizes(res, MCL_VECTOR_COMPLETE)
      ;  mclvSortDescVal(sz)

      ;  cc = clmUGraphComponents(res, NULL)     /* fixme: user has to specify -tf '#max()' if graph is directed */
      ;  if (cc)
         {  ccsz = mclxColSizes(cc, MCL_VECTOR_COMPLETE)
         ;  if (compute_flags & COMPUTE_EFF)
            {  clmPerformanceTable pftable
            ;  clmPerformance(mx, cc, &pftable)
            ;  eff = pftable.efficiency
         ;  }
         }

         if (mode == 't' || mode == 'c')
         {  for
            (
            ;  n_allvals > 0 && allvals[n_allvals-1] < cutoff
            ;  n_allvals--
            )
         ;  sum_vals = 0.0
         ;  for (i=0;i<n_allvals;i++)
            sum_vals += allvals[i]
      ;  }
         else if (mode == 'k' || mode == 'n' || mode == 'l')
         {  n_allvals = get_n_sort_allvals(res, allvals, noe, &sum_vals, FALSE)
      ;  }

         levels[cor_i].sim_median=  mcxMedian(allvals, n_allvals, sizeof allvals[0], pval_get_double, &iqr)
      ;  levels[cor_i].sim_iqr   =  iqr
      ;  levels[cor_i].sim_mean  =  n_allvals ? sum_vals / n_allvals : 0.0

      ;  levels[cor_i].nb_median =  mcxMedian(sz->ivps, sz->n_ivps, sizeof sz->ivps[0], ivp_get_double, &iqr)
      ;  levels[cor_i].nb_iqr    =  iqr
      ;  levels[cor_i].nb_mean   =  mclvSum(sz) / N_COLS(res)
      ;  levels[cor_i].cc_exp    =  cc ? mclvPowSum(ccsz, 2.0) / N_COLS(res) : 0
      ;  levels[cor_i].nb_sum    =  mclxNrofEntries(res)

      ;  if (compute_flags & COMPUTE_CLCF)
         {  mclv* clcf = mclgCLCFdispatch(res, n_thread_l)
         ;  levels[cor_i].clcf   =  mclvSum(clcf) / N_COLS(mx)
         ;  mclvFree(&clcf)
      ;  }
         else
         levels[cor_i].clcf = 0.0

      ;  levels[cor_i].threshold =  mode == 'k' || mode == 'l' || mode == 'n' ? step2 : cutoff
      ;  levels[cor_i].bigsize   =  cc ? cc->cols[0].n_ivps : 0
      ;  levels[cor_i].n_single  =  0
      ;  levels[cor_i].n_edge    =  n_allvals
      ;  levels[cor_i].n_lq      =  0

      ;  if (cc)
         for (i=0;i<N_COLS(cc);i++)
         {  dim n = cc->cols[N_COLS(cc)-1-i].n_ivps
         ;  if (n == 1)
            levels[cor_i].n_single++
         ;  if (n <= divide_g)
            levels[cor_i].n_lq += n
         ;  else
            break
      ;  }

         if (levels[cor_i].bigsize <= divide_g)
         levels[cor_i].bigsize = 0

      ;  y_prev = sz->ivps[0].val

                  /* wiki says:
                     A scale-free network is a network whose degree distribution follows a power
                     law, at least asymptotically. That is, the fraction P(k) of nodes in the
                     network having k connections to other nodes goes for large values of k as P(k)
                     ~ k^−g where g is a constant whose value is typically in the range 2<g<3,
                     although occasionally it may lie outside these bounds.
                 */
      ;  for (i=1;i<sz->n_ivps;i++)
         {  double y = sz->ivps[i].val
         ;  if (y > y_prev - 0.5)
            continue                                              /* same as node degree seen last */
         ;  nnodes->ivps[n_sample].val = log( (i*1.0) / (1.0*N_COLS(res)))    /* x = #nodes >= k, as fraction   */
         ;  degree->ivps[n_sample].val = log(y_prev ? y_prev : 1)            /* y = k = degree of node         */
         ;  n_sample++
;if(0)fprintf(stderr, "k=%.0f\tn=%d\t%.3f\t%.3f\n", (double) y_prev, (int) i, (double) nnodes->ivps[n_sample-1].val, (double) degree->ivps[n_sample-1].val)
         ;  y_prev = y
      ;  }
         nnodes->ivps[n_sample].val = 0
      ;  nnodes->ivps[n_sample++].val = log(y_prev ? y_prev : 1)
;if(0){fprintf(stderr, "k=%.0f\tn=%d\t%.3f\t%.3f\n", (double) sz->ivps[sz->n_ivps-1].val, (int) N_COLS(res), (double) nnodes->ivps[n_sample-1].val, (double) degree->ivps[n_sample-1].val)
;}

      ;  mclvResize(nnodes, n_sample)
      ;  mclvResize(degree, n_sample)
      ;  cor = pearson(nnodes, degree, n_sample)

      ;  levels[cor_i].degree_cor =  cor * cor

;if(0)fprintf(stdout, "cor at cutoff %.2f %.3f\n\n", cutoff, levels[cor_i-1].degree_cor)
      ;  mclvFree(&nnodes)
      ;  mclvFree(&degree)
      ;  mclvFree(&sz)
      ;  mclvFree(&ccsz)
      ;  mclxFree(&cc)

;  if(output_flags & OUTPUT_TABLE)
   {  fprintf
      (  fp
      ,  "%lu\t%lu\t%lu\t%lu\t%lu"
         "\t%6g\t%6g\t%6g"
         "\t%6g\t%lu\t%6g"

      ,  (ulong) levels[cor_i].bigsize
      ,  (ulong) levels[cor_i].n_lq
      ,  (ulong) N_COLS(mx) - levels[cor_i].bigsize - levels[cor_i].n_lq
      ,  (ulong) levels[cor_i].n_single
      ,  (ulong) levels[cor_i].cc_exp

      ,  (double) levels[cor_i].sim_mean
      ,  (double) levels[cor_i].sim_median
      ,  (double) levels[cor_i].sim_iqr

      ,  (double) levels[cor_i].nb_mean
      ,  (ulong) levels[cor_i].nb_median
      ,  (double) levels[cor_i].nb_iqr
      )

   ;  if (compute_flags & COMPUTE_CLCF) fprintf(fp, "\t%6g", levels[cor_i].clcf)   ;  else fputs("\tNA", fp)
   ;  if (eff >= 0.0) fprintf(fp, "\t%4g", eff)              ;  else fputs("\tNA", fp)

   ;  fprintf(fp, "\t%6g", (double) levels[cor_i].threshold)
   ;  fputc('\n', fp)
;  }
   else
   {  fprintf
      (  fp
      ,  "%3d %3d %3d %3d %7d "
         "%7.0f %7.0f %6.0f"
         "%6.1f %6.0f %6.0f"

      ,  0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].bigsize) / N_COLS(mx))
      ,  0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].n_lq) / N_COLS(mx))
      ,  0 ? 1 : (int) (0.5 + (100.0 * (N_COLS(mx) - levels[cor_i].bigsize - levels[cor_i].n_lq)) / N_COLS(mx))
      ,  0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].n_single) / N_COLS(mx))
      ,  0 ? 1 : (int) (0.5 + levels[cor_i].cc_exp)

      ,  0 ? 1.0 : (double) (levels[cor_i].sim_mean   * weight_scale)
      ,  0 ? 1.0 : (double) (levels[cor_i].sim_median * weight_scale)
      ,  0 ? 1.0 : (double) (levels[cor_i].sim_iqr    * weight_scale)

      ,  0 ? 1.0 : (double) (levels[cor_i].nb_mean                 )
      ,  0 ? 1.0 : (double) (levels[cor_i].nb_median + 0.5         )
      ,  0 ? 1.0 : (double) (levels[cor_i].nb_iqr + 0.5            )
      )

   ;  if (compute_flags & COMPUTE_CLCF)
      fprintf(fp, " %3d", 0 ? 1 : (int) (0.5 + (100.0 * levels[cor_i].clcf)))
   ;  else
      fputs("   -", fp)

   ;  if (eff >= 0.0)
      fprintf(fp, "  %3d", (int) (0.5 + 1000 * eff))
   ;  else
      fputs("    -", fp)

   ;  if (mode == 'c')
      fprintf(fp, "%8.2f\n", (double) levels[cor_i].threshold)
   ;  else if (mode == 't')
      fprintf(fp, "%8.0f\n", (double) levels[cor_i].threshold  * weight_scale)
   ;  else if (mode == 'k' || mode == 'n' || mode == 'l')
      fprintf(fp, "%8.0f\n", (double) levels[cor_i].threshold)
 ; }

      ;  cor_i++
      ;  if (res != mx)
         mclxFree(&res)
   ;  }

   if (!(output_flags & OUTPUT_TABLE))
   {  if (weefreemen)
      {
fprintf(fp, "-------------------------------------------------------------------------------\n")
;fprintf(fp, "The graph below plots the R^2 squared value for the fit of a log-log plot of\n")
;fprintf(fp, "<node degree k> versus <#nodes with degree >= k>, for the network resulting\n")
;fprintf(fp, "from applying a particular %s cutoff.\n", mode == 'c' ? "correlation" : "similarity")
;fprintf(fp, "-------------------------------------------------------------------------------\n")
   ;  for (j=0;j<cor_i;j++)
      {  dim jj
      ;  for (jj=30;jj<=100;jj++)
         {  char c = ' '
         ;  if (jj * 0.01 < levels[j].degree_cor && (jj+1.0) * 0.01 > levels[j].degree_cor)
            c = 'X'
         ;  else if (jj % 5 == 0)
            c = '|'
         ;  fputc(c, fp)
      ;  }
         if (mode == 'c')
         fprintf(fp, "%8.2f\n", (double) levels[j].threshold)
      ;  else
         fprintf(fp, "%8.0f\n", (double) levels[j].threshold * weight_scale)
   ;  }

 fprintf(fp, "|----+----|----+----|----+----|----+----|----+----|----+----|----+----|--------\n")
;fprintf(fp, "| R^2   0.4       0.5       0.6       0.7       0.8       0.9    |  1.0    -o)\n")
;fprintf(fp, "+----+----+----+----+----+---------+----+----+----+----+----+----+----+    /\\\\\n")
;fprintf(fp, "| 2 4 6 8   2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 | 2 4 6 8 |   _\\_/\n")
;fprintf(fp, "+----+----|----+----|----+----|----+----|----+----|----+----|----+----+--------\n")
;     }
      else
      fprintf(fp, "-------------------------------------------------------------------------------\n")
;  }

      mclxFree(&mx)
   ;  mcxFree(allvals)
;  }
Exemplo n.º 10
0
double get_score
(  const mclv* c
,  const mclv* d
,  const mclv* c_start
,  const mclv* d_start
,  const mclv* c_end
,  const mclv* d_end
)
   {  mclv* vecc   = mclvClone(c)
   ;  mclv* vecd   = mclvClone(d)
   ;  mclv* meet_c = mcldMeet(vecc, vecd, NULL)
   ;  mclv* meet_d = mcldMeet(vecd, meet_c, NULL)

   ;  mclv* cwid   = mclvBinary(c_end, c_start, NULL, fltSubtract)
   ;  mclv* dwid   = mclvBinary(d_end, d_start, NULL, fltSubtract)

   ;  mclv* rmin   = mclvBinary(c_end, d_end, NULL, fltMin)
   ;  mclv* lmax   = mclvBinary(c_start, d_start, NULL, fltMax)
   ;  mclv* delta  = mclvBinary(rmin, lmax, NULL, fltSubtract)

   ;  mclv* weightc, *weightd
   ;  double ip, cd, csn, meanc, meand, mean, euclid, meet_fraction, score, sum_meet_c, sum_meet_d, reduction_c, reduction_d

   ;  int nmeet = meet_c->n_ivps
   ;  int nldif = vecc->n_ivps - nmeet
   ;  int nrdif = vecd->n_ivps - nmeet

   ;  mclvSelectGqBar(delta, 0.0)

   ;  weightc= mclvBinary(delta, cwid, NULL, mydiv)
   ;  weightd= mclvBinary(delta, dwid, NULL, mydiv)

#if 0
;if (c != d)mclvaDump
(  cwid
,  stdout
,  5
,  "\n"
,  0)
,mclvaDump
(  dwid
,  stdout
,  5
,  "\n"
,  0)
#endif

   ;  sum_meet_c  = 0.01 + mclvSum(meet_c)
   ;  sum_meet_d  = 0.01 + mclvSum(meet_d)

   ;  mclvBinary(meet_c, weightc, meet_c, fltMultiply)
   ;  mclvBinary(meet_d, weightd, meet_d, fltMultiply)

   ;  reduction_c = mclvSum(meet_c) / sum_meet_c
   ;  reduction_d = mclvSum(meet_d) / sum_meet_d

   ;  ip    = mclvIn(meet_c, meet_d)
   ;  cd    = sqrt(mclvPowSum(meet_c, 2.0) * mclvPowSum(meet_d, 2.0))
   ;  csn   = cd ? ip / cd : 0.0
   ;  meanc = meet_c->n_ivps ? mclvSum(meet_c) / meet_c->n_ivps : 0.0
   ;  meand = meet_d->n_ivps ? mclvSum(meet_d) / meet_d->n_ivps : 0.0
   ;  mean  = MCX_MIN(meanc, meand)

   ;  euclid =   0
              ?  1.0
              :  (  mean
                 ?  sqrt(mclvPowSum(meet_c, 2.0) / mclvPowSum(vecc, 2.0))
                 :  0.0
                 )
   ;  meet_fraction = pow((meet_c->n_ivps * 1.0 / vecc->n_ivps), 1.0)

   ;  score  =  mean * csn * euclid  * meet_fraction * 1.0

   ;  mclvFree(&meet_c)
   ;  mclvFree(&meet_d)

   ;  fprintf
      (  stdout
      ,  "%10d%10d%10d%10d%10d%10g%10g%10g%10g%10g%10g%10g\n"
      ,  (int) c->vid
      ,  (int) d->vid
      ,  (int) nldif
      ,  (int) nrdif
      ,  (int) nmeet
      ,  score
      ,  mean
      ,  csn
      ,  euclid
      ,  meet_fraction
      ,  reduction_c
      ,  reduction_d
      )

   ;  return score
;  }
Exemplo n.º 11
0
void clmDumpNodeScores
(  const char* name
,  mclx* mx
,  mclx* cl
,  mcxenum  mode
)
   {  dim d, e
   ;  clmVScore sc

   ;  if (mode == CLM_NODE_SELF)
      {  for (d=0;d<N_COLS(cl);d++)
         {  ofs o = -1
         ;  dim clsize = cl->cols[d].n_ivps
         ;  for (e=0;e<clsize;e++)
            {  long idx = cl->cols[d].ivps[e].idx
            ;  o = mclxGetVectorOffset(mx, idx, EXIT_ON_FAIL, o)
            ;  mx->cols[o].val = mclvSum(mx->cols+o)              /* fixme stupid dependency. */
            ;  clmVScanDomain(mx->cols+o, cl->cols+d, &sc)
            ;  clm_dump_line
               (name, &sc, idx, cl->cols[d].vid, mx->cols[o].n_ivps, clsize, 0)
         ;  }
         }
         /* fixme: sum_e not set, pbb due to missing clmCastActors */
      }
      else if (mode == CLM_NODE_INCIDENT)
      {  mclx *el_to_cl = NULL
      ;  mclx *el_on_cl = NULL
      ;  mclx *cl_on_cl = NULL
      ;  mclx *cl_on_el = NULL
      ;  clmCastActors
         (&mx, &cl, &el_to_cl, &el_on_cl, &cl_on_cl, &cl_on_el, 0.95)
      ;  mclxFree(&cl_on_cl)
      ;  mclxFree(&cl_on_el)

      ;  for (d=0;d<N_COLS(mx);d++)
         {  long nid  = mx->cols[d].vid
         ;  long nsize = mx->cols[d].n_ivps
         ;  mclv* clidvec = mclxGetVector(el_on_cl, nid, RETURN_ON_FAIL, NULL)
         ;  mclv* clself = mclxGetVector(el_to_cl, nid, RETURN_ON_FAIL, NULL)
         ;  dim f

         ;  if (!clself)
            mcxErr
            ("clmDumpNodeScores panic", "node <%ld> does not belong", nid)

         ;  for (f=0;f<clidvec->n_ivps;f++)
            {  long cid = clidvec->ivps[f].idx
            ;  mclv* clvec = mclxGetVector(cl, cid, RETURN_ON_FAIL, NULL)
                          /* ^ overdoing: cid == clvec->vid */
            ;  int alien
            ;  if (!clvec)
               {  mcxErr
                  (  "clmDumpNodeScores panic"
                  ,  "cluster <%ld> node <%ld> mishap"
                  ,  cid
                  ,  nid
                  )
               ;  continue
            ;  }
               clmVScanDomain(mx->cols+d, clvec, &sc)
            ;  alien = clself && clvec->vid == clself->ivps[0].idx ? 0 : 1
            ;  clm_dump_line
               (name, &sc, nid, clvec->vid, nsize, clvec->n_ivps, alien)
         ;  }
         }
         mclxFree(&el_on_cl)
      ;  mclxFree(&el_to_cl)
   ;  }
   }
Exemplo n.º 12
0
static dim clm_clm_adjust
(  mclx* mx
,  mclx* cl
,  dim cls_size_max
,  mclx** cl_adjustedpp
,  mclv** cid_affectedpp
,  mclv** nid_affectedpp
)
   {  dim i, j, n_adjusted = 0
   ;  mclx* cl_adj = mclxCopy(cl)

   ;  mclv* cid_affected = mclvClone(cl->dom_cols)
   ;  mclv* nid_affected = mclvClone(mx->dom_cols)
   ;  double bar_affected = 1.5

   ;  const char* e1 = getenv("MCL_ADJ_FMAX")
   ;  const char* e2 = getenv("MCL_ADJ_EMASS")
   
   ;  double f1 = e1 ? atof(e1) : 2
   ;  double f2 = e2 ? atof(e2) : 3

   ;  mcxbool loggit = mcxLogGet( MCX_LOG_CELL | MCX_LOG_INFO )

   ;  clmVScore sc

   ;  mclx *el_to_cl = NULL
   ;  mclx *el_on_cl = NULL
   ;  mclx *cl_on_cl = NULL
   ;  mclx *cl_on_el = NULL

   ;  *cl_adjustedpp = NULL
   ;  *cid_affectedpp = NULL
   ;  *nid_affectedpp = NULL

   ;  clmCastActors
      (&mx, &cl, &el_to_cl, &el_on_cl, &cl_on_cl, &cl_on_el, 0.95)

   ;  mclxFree(&cl_on_cl)
   ;  mclxFree(&cl_on_el)

   ;  mclvMakeConstant(cid_affected, 1.0)
   ;  mclvMakeConstant(nid_affected, 1.0)


   ;  for (i=0;i<N_COLS(cl_adj);i++)
      cl_adj->cols[i].val = 0.5

                     /*    Proceed with smallest clusters first.
                      *    Caller has to take care of mclxColumnsRealign
                     */
   ;  for (i=0;i<N_COLS(cl);i++)
      {  mclv* clself = cl->cols+i

                     /*    Only consider nodes in clusters of
                      *    size <= cls_size_max
                     */
      ;  if (cls_size_max && clself->n_ivps > cls_size_max)
         break
                     /*    Clusters that have been marked for inclusion
                      *    cannot play.
                     */
      ;  if (cl_adj->cols[i].val > 1)
         continue

      ;  for (j=0;j<clself->n_ivps;j++)
         {  long nid  = clself->ivps[j].idx
         ;  long nos  = mclvGetIvpOffset(mx->dom_cols, nid, -1)
         ;  mclv* clidvec  =  mclxGetVector(el_on_cl, nid, RETURN_ON_FAIL, NULL)

         ;  double eff_alien_bsf = 0.0, eff_alien_max_bsf = 0.0 /* best so far*/
         ;  double eff_self = 0.0, eff_self_max = 0.0
         ;  long cid_alien  = -1, cid_self = -1
         ;  clmVScore sc_self = { 0 }, sc_alien = { 0 }
         ;  dim f

         ;  if (nos < 0 || !clidvec)
            {  mcxErr
               ("clmDumpNodeScores panic", "node <%ld> does not belong", nid)
            ;  continue
         ;  }

            clmVScanDomain(mx->cols+nos, clself, &sc)
         ;  clmVScoreCoverage(&sc, &eff_self, &eff_self_max)
         ;  cid_self = clself->vid
         ;  sc_self  = sc

         ;  if (loggit)
            mcxLog2
            (  us
            ,  "node %ld in cluster %ld eff %.3f,%.3f sum %.3f"
            ,  nid
            ,  cid_self
            ,  eff_self
            ,  eff_self_max
            ,  sc.sum_i
            )

         ;  for (f=0;f<clidvec->n_ivps;f++)
            {  long cid = clidvec->ivps[f].idx
            ;  mclv* clvec = mclxGetVector(cl, cid, RETURN_ON_FAIL, NULL)
                          /* ^ overdoing: cid == clvec->vid */
            ;  double eff, eff_max
            ;  if (!clvec)
               {  mcxErr
                  (  "clmAdjust panic"
                  ,  "cluster <%ld> node <%ld> mishap"
                  ,  cid
                  ,  nid
                  )
               ;  continue
            ;  }


                        /* fixme: document or remove first condition
                         *
                        */
               if ((0 && clvec->n_ivps <= clself->n_ivps) || clvec->vid == cid_self)
               continue

            ;  clmVScanDomain(mx->cols+nos, clvec, &sc)
            ;  clmVScoreCoverage(&sc, &eff, &eff_max)

#if 0
#  define PIVOT eff > eff_alien_bsf
#else
#  define PIVOT eff_max > eff_alien_max_bsf
#endif

            ;  if
               (  PIVOT
               || sc.sum_i >= 0.5
               )
                  eff_alien_bsf = eff
               ,  eff_alien_max_bsf = eff_max
               ,  cid_alien = clvec->vid
               ,  sc_alien = sc

            ;  if (sc.sum_i >= 0.5)
               break
         ;  }

            if (loggit)
            mcxLog2
            (  us
            ,  " -> best alien %ld eff %.3f,%.3f sum %.3f"
            ,  cid_alien
            ,  eff_alien_bsf
            ,  eff_alien_max_bsf
            ,  sc_alien.sum_i
            )

                  /* below: use sum_i as mass fraction
                   * (clmAdjust framework uses stochastic * matrix)
                  */
         ;  if
            (  cid_alien >= 0
            && cid_self >= 0
            && f1 * sc_alien.max_i >= sc_self.max_i
            && (  (  eff_alien_bsf > eff_self
                  && sc_alien.sum_i > sc_self.sum_i
                  )
               || (  pow(sc_alien.sum_i, f2) >= sc_self.sum_i
                  && pow(eff_self, f2) <= eff_alien_bsf
                  )
               )
                  /* So, if max is reasonable
                   * and efficiency is better and mass is better
                   * or if mass is ridiculously better -> move
                   * Somewhat intricate and contrived, yes.
                  */
            )
            {  mclv* acceptor
               =  mclxGetVector(cl_adj, cid_alien, RETURN_ON_FAIL, NULL)
            ;  mclv* donor
               =  mclxGetVector(cl_adj, cid_self,  RETURN_ON_FAIL, NULL)
            ;  if (!donor || !acceptor || acceptor == donor)
               continue

            ;  mclvInsertIdx(donor, nid, 0.0)
            ;  mclvInsertIdx(acceptor, nid, 1.0)
            ;  acceptor->val = 1.5

            ;  if (mcxLogGet(MCX_LOG_LIST))
               {  mclv* nb = mx->cols+nos
               ;  double mxv = mclvMaxValue(nb)
               ;  double avg = nb->n_ivps ? mclvSum(nb) / nb->n_ivps : -1.0
               ;  mcxLog
                  (  MCX_LOG_LIST
                  ,  us
                  ,  "mov %ld (%ld %.2f %.2f)"
                     " %ld (cv=%.2f cm=%.2f s=%.2f m=%.2f #=%lu)"
                     " to %ld (cv=%.2f cm=%.2f s=%.2f m=%.2f #=%lu)"
                  ,  nid
                  ,     (long) nb->n_ivps, mxv, avg
                  ,  cid_self
                  ,     eff_self, eff_self_max, sc_self.sum_i, sc_self.max_i
                  ,              (ulong) (sc_self.n_meet + sc_self.n_ddif)
                  ,  cid_alien
                  ,     eff_alien_bsf, eff_alien_max_bsf, sc_alien.sum_i, sc_alien.max_i
                  ,              (ulong) (sc_alien.n_meet + sc_alien.n_ddif)
                  )
            ;  }

               n_adjusted++                  
            ;  mclvInsertIdx(cid_affected, cid_alien, 2.0)
            ;  mclvInsertIdx(cid_affected, cid_self, 2.0)
            ;  mclvInsertIdx(nid_affected, nid, 2.0)
         ;  }
         }
      }
      mclxFree(&el_on_cl)
   ;  mclxFree(&el_to_cl)

   ;  for (i=0;i<N_COLS(cl_adj);i++)
      cl_adj->cols[i].val = 0.0

   ;  mclxMakeCharacteristic(cl)

   ;  if (!n_adjusted)
      {  mclxFree(&cl_adj)
      ;  mclvFree(&cid_affected)
      ;  mclvFree(&nid_affected)
      ;  return 0
   ;  }

      mclxUnary(cl_adj, fltxCopy, NULL)
   ;  mclxMakeCharacteristic(cl_adj)   
                     /* FIRST REMOVE ENTRIES set to zero (sssst now .. */
                     /* ...) and THEN make it characteristic again     */

   ;  mclvUnary(cid_affected, fltxGT, &bar_affected)
   ;  mclvUnary(nid_affected, fltxGT, &bar_affected)

   ;  *cl_adjustedpp  =  cl_adj
   ;  *cid_affectedpp =  cid_affected
   ;  *nid_affectedpp =  nid_affected

   ;  return n_adjusted
;  }