Ejemplo n.º 1
0
static mclv* matrix_vector
(  const mclx* mx
,  const mclv* vec
)
   {  mclv* res = mclvClone(mx->dom_rows)
   ;  dim i, j
   ;  mclvMakeConstant(res, 0.0)
   ;  for (i=0;i<vec->n_ivps;i++)
      {  mclv* c = mx->cols + vec->ivps[i].idx
      ;  for (j=0;j<c->n_ivps;j++)
         res->ivps[c->ivps[j].idx].val += 1.0
   ;  }
      mclvUnary(res, fltxCopy, NULL)
   ;  return res
;  }
Ejemplo n.º 2
0
static dim clm_clm_prune
(  mclx*    mx
,  mclx*    cl
,  dim      prune_sz
,  mclx**   cl_adjustedpp
,  dim*     n_sink
,  dim*     n_source
)
   {  dim d, n_adjusted = 0
   ;  mclx* cl_adj = mclxCopy(cl)
   ;  mclv* cid_affected = mclvClone(cl->dom_cols)
   ;  const char* me = "clmAssimilate"

   ;  double bar_affected = 1.5

   ;  mclx *el_to_cl = NULL
   ;  mclx *el_on_cl = NULL
   ;  mclx *cl_on_cl = NULL
   ;  mclx *cl_on_el = NULL

   ;  *n_sink = 0
   ;  *n_source = 0

   ;  mclvMakeConstant(cid_affected, 1.0)
   ;  mclxColumnsRealign(cl_adj, mclvSizeCmp)

   ;  *cl_adjustedpp = NULL

   ;  clmCastActors
      (&mx, &cl_adj, &el_to_cl, &el_on_cl, &cl_on_cl, &cl_on_el, 0.95)
   ;  mclxFree(&cl_on_el)

   ;  for (d=0;d<N_COLS(cl_on_cl);d++)
      {  mclv* clthis   =  cl_adj->cols+d
      ;  mclv* cllist   =  cl_on_cl->cols+d
      ;  mclp* pself    =  mclvGetIvp(cllist, clthis->vid, NULL)
      ;  double self_val = -1.0
      
      ;  if (pself)
            self_val = pself->val
         ,  pself->val *= 1.001  /* to push it up in case of equal weights */

;if(0)fprintf(stderr, "test size %d\n", (int) clthis->n_ivps)
      ;  if (prune_sz && clthis->n_ivps > prune_sz)
         continue

      ;  while (1)
         {  mclv* clthat
         ;  dim e
         ;  if (cllist->n_ivps < 2)
            break
         ;  mclvSort(cllist, mclpValRevCmp)

                     /* now get biggest mass provided that cluster
                      * ranks higher (has at least as many entries)
                      *
                      * fixme/todo: we probably have a slight order
                      * dependency for some fringe cases. If provable
                      * then either solve or document it.
                     */
         ;  for (e=0;e<cllist->n_ivps;e++)
            if (cllist->ivps[e].idx >= clthis->vid)
            break

                     /* found none or itself */
         ;  if (e == cllist->n_ivps || cllist->ivps[e].idx == clthis->vid)
            break

         ;  if       /* Should Not Happen */
            (!(clthat
            =  mclxGetVector(cl_adj, cllist->ivps[e].idx, RETURN_ON_FAIL, NULL)
            ) )
            break

                     /*    works for special case prune_sz == 0               */
                     /*    if (clthat->n_ivps + clthis->n_ivps > prune_sz)    */
                     /*    ^iced. inconsistent behaviour as k grows.          */
         ;  {  mcxLog
               (  MCX_LOG_LIST
               ,  me
               ,  "source %ld|%lu|%.3f absorbed by %ld|%lu|%.3f"
               ,  clthis->vid, (ulong) clthis->n_ivps, self_val
               ,  clthat->vid, (ulong) clthat->n_ivps, cllist->ivps[0].val
               )
            ;  n_adjusted += clthis->n_ivps
            ;  (*n_sink)++
                     /* note: we could from our precomputed cl_on_cl
                      * obtain that A is absorbed in B, B is absorbed in C.
                      * below we see that A will be merged with B,
                      * and the result will then be merged with C.
                      * This depends on the fact that cl_adj is ordered
                      * on increasing cluster size.
                     */
            ;  mcldMerge(cl_adj->cols+d, clthat, clthat)
            ;  mclvResize(cl_adj->cols+d, 0)
            ;  mclvInsertIdx(cid_affected, clthat->vid, 2.0)
         ;  }
            break
      ;  }
         mclvSort(cllist, mclpIdxCmp)
   ;  }

      mclxFree(&cl_on_cl)
   ;  mclxFree(&el_on_cl)
   ;  mclxFree(&el_to_cl)

   ;  mclxMakeCharacteristic(cl)

   ;  mclvUnary(cid_affected, fltxGT, &bar_affected)
   ;  *n_source = cid_affected->n_ivps
   ;  mclvFree(&cid_affected)

   ;  mclxColumnsRealign(cl_adj, mclvSizeRevCmp)

   ;  if (!n_adjusted)
      {  mclxFree(&cl_adj)
      ;  return 0
   ;  }

      mclxUnary(cl_adj, fltxCopy, NULL)
   ;  mclxMakeCharacteristic(cl_adj)   

   ;  *cl_adjustedpp  =  cl_adj
   ;  return n_adjusted
;  }
Ejemplo n.º 3
0
static dim clm_clm_adjust
(  mclx* mx
,  mclx* cl
,  dim cls_size_max
,  mclx** cl_adjustedpp
,  mclv** cid_affectedpp
,  mclv** nid_affectedpp
)
   {  dim i, j, n_adjusted = 0
   ;  mclx* cl_adj = mclxCopy(cl)

   ;  mclv* cid_affected = mclvClone(cl->dom_cols)
   ;  mclv* nid_affected = mclvClone(mx->dom_cols)
   ;  double bar_affected = 1.5

   ;  const char* e1 = getenv("MCL_ADJ_FMAX")
   ;  const char* e2 = getenv("MCL_ADJ_EMASS")
   
   ;  double f1 = e1 ? atof(e1) : 2
   ;  double f2 = e2 ? atof(e2) : 3

   ;  mcxbool loggit = mcxLogGet( MCX_LOG_CELL | MCX_LOG_INFO )

   ;  clmVScore sc

   ;  mclx *el_to_cl = NULL
   ;  mclx *el_on_cl = NULL
   ;  mclx *cl_on_cl = NULL
   ;  mclx *cl_on_el = NULL

   ;  *cl_adjustedpp = NULL
   ;  *cid_affectedpp = NULL
   ;  *nid_affectedpp = NULL

   ;  clmCastActors
      (&mx, &cl, &el_to_cl, &el_on_cl, &cl_on_cl, &cl_on_el, 0.95)

   ;  mclxFree(&cl_on_cl)
   ;  mclxFree(&cl_on_el)

   ;  mclvMakeConstant(cid_affected, 1.0)
   ;  mclvMakeConstant(nid_affected, 1.0)


   ;  for (i=0;i<N_COLS(cl_adj);i++)
      cl_adj->cols[i].val = 0.5

                     /*    Proceed with smallest clusters first.
                      *    Caller has to take care of mclxColumnsRealign
                     */
   ;  for (i=0;i<N_COLS(cl);i++)
      {  mclv* clself = cl->cols+i

                     /*    Only consider nodes in clusters of
                      *    size <= cls_size_max
                     */
      ;  if (cls_size_max && clself->n_ivps > cls_size_max)
         break
                     /*    Clusters that have been marked for inclusion
                      *    cannot play.
                     */
      ;  if (cl_adj->cols[i].val > 1)
         continue

      ;  for (j=0;j<clself->n_ivps;j++)
         {  long nid  = clself->ivps[j].idx
         ;  long nos  = mclvGetIvpOffset(mx->dom_cols, nid, -1)
         ;  mclv* clidvec  =  mclxGetVector(el_on_cl, nid, RETURN_ON_FAIL, NULL)

         ;  double eff_alien_bsf = 0.0, eff_alien_max_bsf = 0.0 /* best so far*/
         ;  double eff_self = 0.0, eff_self_max = 0.0
         ;  long cid_alien  = -1, cid_self = -1
         ;  clmVScore sc_self = { 0 }, sc_alien = { 0 }
         ;  dim f

         ;  if (nos < 0 || !clidvec)
            {  mcxErr
               ("clmDumpNodeScores panic", "node <%ld> does not belong", nid)
            ;  continue
         ;  }

            clmVScanDomain(mx->cols+nos, clself, &sc)
         ;  clmVScoreCoverage(&sc, &eff_self, &eff_self_max)
         ;  cid_self = clself->vid
         ;  sc_self  = sc

         ;  if (loggit)
            mcxLog2
            (  us
            ,  "node %ld in cluster %ld eff %.3f,%.3f sum %.3f"
            ,  nid
            ,  cid_self
            ,  eff_self
            ,  eff_self_max
            ,  sc.sum_i
            )

         ;  for (f=0;f<clidvec->n_ivps;f++)
            {  long cid = clidvec->ivps[f].idx
            ;  mclv* clvec = mclxGetVector(cl, cid, RETURN_ON_FAIL, NULL)
                          /* ^ overdoing: cid == clvec->vid */
            ;  double eff, eff_max
            ;  if (!clvec)
               {  mcxErr
                  (  "clmAdjust panic"
                  ,  "cluster <%ld> node <%ld> mishap"
                  ,  cid
                  ,  nid
                  )
               ;  continue
            ;  }


                        /* fixme: document or remove first condition
                         *
                        */
               if ((0 && clvec->n_ivps <= clself->n_ivps) || clvec->vid == cid_self)
               continue

            ;  clmVScanDomain(mx->cols+nos, clvec, &sc)
            ;  clmVScoreCoverage(&sc, &eff, &eff_max)

#if 0
#  define PIVOT eff > eff_alien_bsf
#else
#  define PIVOT eff_max > eff_alien_max_bsf
#endif

            ;  if
               (  PIVOT
               || sc.sum_i >= 0.5
               )
                  eff_alien_bsf = eff
               ,  eff_alien_max_bsf = eff_max
               ,  cid_alien = clvec->vid
               ,  sc_alien = sc

            ;  if (sc.sum_i >= 0.5)
               break
         ;  }

            if (loggit)
            mcxLog2
            (  us
            ,  " -> best alien %ld eff %.3f,%.3f sum %.3f"
            ,  cid_alien
            ,  eff_alien_bsf
            ,  eff_alien_max_bsf
            ,  sc_alien.sum_i
            )

                  /* below: use sum_i as mass fraction
                   * (clmAdjust framework uses stochastic * matrix)
                  */
         ;  if
            (  cid_alien >= 0
            && cid_self >= 0
            && f1 * sc_alien.max_i >= sc_self.max_i
            && (  (  eff_alien_bsf > eff_self
                  && sc_alien.sum_i > sc_self.sum_i
                  )
               || (  pow(sc_alien.sum_i, f2) >= sc_self.sum_i
                  && pow(eff_self, f2) <= eff_alien_bsf
                  )
               )
                  /* So, if max is reasonable
                   * and efficiency is better and mass is better
                   * or if mass is ridiculously better -> move
                   * Somewhat intricate and contrived, yes.
                  */
            )
            {  mclv* acceptor
               =  mclxGetVector(cl_adj, cid_alien, RETURN_ON_FAIL, NULL)
            ;  mclv* donor
               =  mclxGetVector(cl_adj, cid_self,  RETURN_ON_FAIL, NULL)
            ;  if (!donor || !acceptor || acceptor == donor)
               continue

            ;  mclvInsertIdx(donor, nid, 0.0)
            ;  mclvInsertIdx(acceptor, nid, 1.0)
            ;  acceptor->val = 1.5

            ;  if (mcxLogGet(MCX_LOG_LIST))
               {  mclv* nb = mx->cols+nos
               ;  double mxv = mclvMaxValue(nb)
               ;  double avg = nb->n_ivps ? mclvSum(nb) / nb->n_ivps : -1.0
               ;  mcxLog
                  (  MCX_LOG_LIST
                  ,  us
                  ,  "mov %ld (%ld %.2f %.2f)"
                     " %ld (cv=%.2f cm=%.2f s=%.2f m=%.2f #=%lu)"
                     " to %ld (cv=%.2f cm=%.2f s=%.2f m=%.2f #=%lu)"
                  ,  nid
                  ,     (long) nb->n_ivps, mxv, avg
                  ,  cid_self
                  ,     eff_self, eff_self_max, sc_self.sum_i, sc_self.max_i
                  ,              (ulong) (sc_self.n_meet + sc_self.n_ddif)
                  ,  cid_alien
                  ,     eff_alien_bsf, eff_alien_max_bsf, sc_alien.sum_i, sc_alien.max_i
                  ,              (ulong) (sc_alien.n_meet + sc_alien.n_ddif)
                  )
            ;  }

               n_adjusted++                  
            ;  mclvInsertIdx(cid_affected, cid_alien, 2.0)
            ;  mclvInsertIdx(cid_affected, cid_self, 2.0)
            ;  mclvInsertIdx(nid_affected, nid, 2.0)
         ;  }
         }
      }
      mclxFree(&el_on_cl)
   ;  mclxFree(&el_to_cl)

   ;  for (i=0;i<N_COLS(cl_adj);i++)
      cl_adj->cols[i].val = 0.0

   ;  mclxMakeCharacteristic(cl)

   ;  if (!n_adjusted)
      {  mclxFree(&cl_adj)
      ;  mclvFree(&cid_affected)
      ;  mclvFree(&nid_affected)
      ;  return 0
   ;  }

      mclxUnary(cl_adj, fltxCopy, NULL)
   ;  mclxMakeCharacteristic(cl_adj)   
                     /* FIRST REMOVE ENTRIES set to zero (sssst now .. */
                     /* ...) and THEN make it characteristic again     */

   ;  mclvUnary(cid_affected, fltxGT, &bar_affected)
   ;  mclvUnary(nid_affected, fltxGT, &bar_affected)

   ;  *cl_adjustedpp  =  cl_adj
   ;  *cid_affectedpp =  cid_affected
   ;  *nid_affectedpp =  nid_affected

   ;  return n_adjusted
;  }