Пример #1
0
static void prune_el_on_cl
(  mclMatrix* el_to_cl  /* must be conforming */
,  mclMatrix* el_on_cl  /* this one will be pruned */
,  double pct
,  int max
)
   {  dim i
   ;  for (i=0;i<N_COLS(el_on_cl);i++)
      {  mclv*  elclvec =  el_on_cl->cols+i
      ;  long   clid    =  el_to_cl->cols[i].ivps[0].idx
      ;  double sum     =  0.0
      ;  int n_others   =  0
      ;  dim k          =  0
      ;  mcxbool selfok =  FALSE
      ;  mclvSort(elclvec, mclpValRevCmp)

      ;  while (k++ < elclvec->n_ivps && sum < pct && n_others < max)
         {  long y = elclvec->ivps[k-1].idx
         ;  if (y == clid)
            selfok = TRUE
         ;  sum += elclvec->ivps[k-1].val
         ;  n_others++
      ;  }

         mclvResize(elclvec, k-1)        /* careful recentchange */
      ;  mclvSort(elclvec, mclpIdxCmp)
      ;  if (!selfok)
         mclvInsertIdx(elclvec, clid, 0.01)
   ;  }
   }
Пример #2
0
void mclvSortAscVal
(  mclVector*              vec
)
   {  mclvSort(vec, mclpValCmp)
;  }
Пример #3
0
void mclvSortDescVal
(  mclVector*              vec
)
   {  mclvSort(vec, mclpValRevCmp)
;  }
Пример #4
0
void mclvSortUniq
(  mclVector*  vec
)
   {  mclvSort(vec, mclpIdxCmp)
   ;  mclvUniqIdx(vec, mclpMergeLeft)
;  }
Пример #5
0
/* current dst content is thrown away if fltbinary not used */
mclv* mclvFromPAR
(  mclv*      dst
,  mclpAR*    par  
,  mcxbits    warnbits
,  void     (*ivpmerge)(void* ivp1, const void* ivp2)
,  double   (*fltbinary)(pval val1, pval val2)
)
   {  mcxbool  warn_re   =  warnbits & MCLV_WARN_REPEAT_ENTRIES
   ;  mcxbool  warn_rv   =  warnbits & MCLV_WARN_REPEAT_VECTORS
   ;  mclp*    ivps      =  par->ivps
   ;  dim      n_ivps    =  par->n_ivps
   ;  mcxbits  sortbits  =  par->sorted
   ;  dim      n_old     =  dst ? dst->n_ivps : 0
   ;  const char* me     =  "mclvFromPAR"
   ;  dim n_re = 0, n_rv = 0
   ;  if (!dst)
      dst = mclvInit(NULL)

   ;  if (n_ivps)
      {  if (dst->n_ivps && fltbinary)
         {  mclVector* tmpvec = mclvNew(ivps, n_ivps)

         ;  if (!(sortbits & MCLPAR_SORTED))
            mclvSort(tmpvec, NULL)

         ;  if (!(sortbits & MCLPAR_UNIQUE))
            n_re = mclvUniqIdx(tmpvec, ivpmerge)

         ;  n_rv += tmpvec->n_ivps
         ;  n_rv += dst->n_ivps
         ;  mclvBinary(dst, tmpvec, dst, fltbinary)
         ;  n_rv -= dst->n_ivps

         ;  mclvFree(&tmpvec)
      ;  }
         else
         {  if (dst->ivps == ivps)
            mcxErr(me, "DANGER dst->ivps == ivps (dst vid %d)", (int) dst->vid)

         ;  mclvRenew(dst, ivps, n_ivps)

         ;  if (!(sortbits & MCLPAR_SORTED))
            mclvSort(dst, NULL)

         ;  if (!(sortbits & MCLPAR_UNIQUE))
            n_re += mclvUniqIdx(dst, ivpmerge)
      ;  }
      }

      if (warn_re && n_re)
      mcxErr
      (  me
      ,  "<%ld> found <%ld> repeated entries within %svector"
      ,  (long) dst->vid
      ,  (long) n_re
      ,  n_rv ? "repeated " : ""
      )

   ;  if (warn_rv && n_rv)
      mcxErr
      (  me
      ,  "<%ld> new vector has <%ld> overlap with previous amalgam"
      ,  (long) dst->vid
      ,  (long) n_rv
      )

   ;  if (warnbits && n_re + n_rv)
      mcxErr
      (  me
      ,  "<%ld> vector went from <%ld> to <%ld> entries"
      ,  (long) dst->vid
      ,  (long) n_old
      ,  (long) dst->n_ivps
      )
   ;  return dst
;  }
Пример #6
0
static dim clm_clm_prune
(  mclx*    mx
,  mclx*    cl
,  dim      prune_sz
,  mclx**   cl_adjustedpp
,  dim*     n_sink
,  dim*     n_source
)
   {  dim d, n_adjusted = 0
   ;  mclx* cl_adj = mclxCopy(cl)
   ;  mclv* cid_affected = mclvClone(cl->dom_cols)
   ;  const char* me = "clmAssimilate"

   ;  double bar_affected = 1.5

   ;  mclx *el_to_cl = NULL
   ;  mclx *el_on_cl = NULL
   ;  mclx *cl_on_cl = NULL
   ;  mclx *cl_on_el = NULL

   ;  *n_sink = 0
   ;  *n_source = 0

   ;  mclvMakeConstant(cid_affected, 1.0)
   ;  mclxColumnsRealign(cl_adj, mclvSizeCmp)

   ;  *cl_adjustedpp = NULL

   ;  clmCastActors
      (&mx, &cl_adj, &el_to_cl, &el_on_cl, &cl_on_cl, &cl_on_el, 0.95)
   ;  mclxFree(&cl_on_el)

   ;  for (d=0;d<N_COLS(cl_on_cl);d++)
      {  mclv* clthis   =  cl_adj->cols+d
      ;  mclv* cllist   =  cl_on_cl->cols+d
      ;  mclp* pself    =  mclvGetIvp(cllist, clthis->vid, NULL)
      ;  double self_val = -1.0
      
      ;  if (pself)
            self_val = pself->val
         ,  pself->val *= 1.001  /* to push it up in case of equal weights */

;if(0)fprintf(stderr, "test size %d\n", (int) clthis->n_ivps)
      ;  if (prune_sz && clthis->n_ivps > prune_sz)
         continue

      ;  while (1)
         {  mclv* clthat
         ;  dim e
         ;  if (cllist->n_ivps < 2)
            break
         ;  mclvSort(cllist, mclpValRevCmp)

                     /* now get biggest mass provided that cluster
                      * ranks higher (has at least as many entries)
                      *
                      * fixme/todo: we probably have a slight order
                      * dependency for some fringe cases. If provable
                      * then either solve or document it.
                     */
         ;  for (e=0;e<cllist->n_ivps;e++)
            if (cllist->ivps[e].idx >= clthis->vid)
            break

                     /* found none or itself */
         ;  if (e == cllist->n_ivps || cllist->ivps[e].idx == clthis->vid)
            break

         ;  if       /* Should Not Happen */
            (!(clthat
            =  mclxGetVector(cl_adj, cllist->ivps[e].idx, RETURN_ON_FAIL, NULL)
            ) )
            break

                     /*    works for special case prune_sz == 0               */
                     /*    if (clthat->n_ivps + clthis->n_ivps > prune_sz)    */
                     /*    ^iced. inconsistent behaviour as k grows.          */
         ;  {  mcxLog
               (  MCX_LOG_LIST
               ,  me
               ,  "source %ld|%lu|%.3f absorbed by %ld|%lu|%.3f"
               ,  clthis->vid, (ulong) clthis->n_ivps, self_val
               ,  clthat->vid, (ulong) clthat->n_ivps, cllist->ivps[0].val
               )
            ;  n_adjusted += clthis->n_ivps
            ;  (*n_sink)++
                     /* note: we could from our precomputed cl_on_cl
                      * obtain that A is absorbed in B, B is absorbed in C.
                      * below we see that A will be merged with B,
                      * and the result will then be merged with C.
                      * This depends on the fact that cl_adj is ordered
                      * on increasing cluster size.
                     */
            ;  mcldMerge(cl_adj->cols+d, clthat, clthat)
            ;  mclvResize(cl_adj->cols+d, 0)
            ;  mclvInsertIdx(cid_affected, clthat->vid, 2.0)
         ;  }
            break
      ;  }
         mclvSort(cllist, mclpIdxCmp)
   ;  }

      mclxFree(&cl_on_cl)
   ;  mclxFree(&el_on_cl)
   ;  mclxFree(&el_to_cl)

   ;  mclxMakeCharacteristic(cl)

   ;  mclvUnary(cid_affected, fltxGT, &bar_affected)
   ;  *n_source = cid_affected->n_ivps
   ;  mclvFree(&cid_affected)

   ;  mclxColumnsRealign(cl_adj, mclvSizeRevCmp)

   ;  if (!n_adjusted)
      {  mclxFree(&cl_adj)
      ;  return 0
   ;  }

      mclxUnary(cl_adj, fltxCopy, NULL)
   ;  mclxMakeCharacteristic(cl_adj)   

   ;  *cl_adjustedpp  =  cl_adj
   ;  return n_adjusted
;  }