Пример #1
0
static void
VL_XCAT(_vl_kmeans_quantize_, SFX)
(VlKMeans * self,
 vl_uint32 * assignments,
 TYPE * distances,
 TYPE const * data,
 vl_size numData)
{
  vl_uindex i ;
#if (FLT == VL_TYPE_FLOAT)
  VlFloatVectorComparisonFunction distFn = vl_get_vector_comparison_function_f(self->distance) ;
#else
  VlDoubleVectorComparisonFunction distFn = vl_get_vector_comparison_function_d(self->distance) ;
#endif
  TYPE * distanceToCenters = vl_malloc (sizeof(TYPE) * self->numCenters) ;

  for (i = 0 ; i < numData ; ++i) {
    vl_size k ;
    TYPE bestDistance = (TYPE) VL_INFINITY_D ;
    VL_XCAT(vl_eval_vector_comparison_on_all_pairs_, SFX)(distanceToCenters,
                                                          self->dimension,
                                                          data + self->dimension * i, 1,
                                                          (TYPE*)self->centers, self->numCenters,
                                                          distFn) ;
    for (k = 0 ; k < self->numCenters ; ++k) {
      if (distanceToCenters[k] < bestDistance) {
        bestDistance = distanceToCenters[k] ;
        assignments[i] = k ;
      }
    }

    if (distances) distances[i] = bestDistance ;
  }
  vl_free(distanceToCenters) ;
}
Пример #2
0
VlKDForest *
load_VlKDForest(const char *fname)
{
    FILE *fp;
    size_t n;
    VlKDForest *self = vl_malloc (sizeof(VlKDForest)) ;

    if((fp = fopen(fname, "rb")) == NULL)
        return -1;

    n = read_VlKDForest(fp, self);
    fclose(fp);

    self -> rand = vl_get_rand ();
    self -> searchHeapArray = 0;
    self -> searchIdBook = 0;

    switch (self->dataType) {
    case VL_TYPE_FLOAT:
        self -> distanceFunction = (void(*)(void))
            vl_get_vector_comparison_function_f (VlDistanceL2) ;
        break ;
    case VL_TYPE_DOUBLE :
        self -> distanceFunction = (void(*)(void))
            vl_get_vector_comparison_function_d (VlDistanceL2) ;
        break ;
    default :
        abort() ;
    }

    return self;
}
Пример #3
0
static void
VL_XCAT(_vl_kmeans_seed_centers_with_rand_data_, SFX)
(VlKMeans * self,
 TYPE const * data,
 vl_size dimension,
 vl_size numData,
 vl_size numCenters)
{
  vl_uindex i, j, k ;
  VlRand * rand = vl_get_rand () ;

  self->dimension = dimension ;
  self->numCenters = numCenters ;
  self->centers = vl_malloc (sizeof(TYPE) * dimension * numCenters) ;

  {
    vl_uindex * perm = vl_malloc (sizeof(vl_uindex) * numData) ;
#if (FLT == VL_TYPE_FLOAT)
    VlFloatVectorComparisonFunction distFn = vl_get_vector_comparison_function_f(self->distance) ;
#else
    VlDoubleVectorComparisonFunction distFn = vl_get_vector_comparison_function_d(self->distance) ;
#endif
    TYPE * distances = vl_malloc (sizeof(TYPE) * numCenters) ;

    /* get a random permutation of the data point */
    for (i = 0 ; i < numData ; ++i) perm[i] = i ;
    _vl_kmeans_shuffle (perm, numData, rand) ;

    for (k = 0, i = 0 ; k < numCenters ; ++ i) {

      /* compare the next data point to all centers collected so far
       to detect duplicates (if there are enough left)
       */
      if (numCenters - k < numData - i) {
        vl_bool duplicateDetected = VL_FALSE ;
        VL_XCAT(vl_eval_vector_comparison_on_all_pairs_, SFX)(distances,
                                                              dimension,
                                                              data + dimension * perm[i], 1,
                                                              (TYPE*)self->centers, k,
                                                              distFn) ;
        for (j = 0 ; j < k ; ++j) { duplicateDetected |= (distances[j] == 0) ; }
        if (duplicateDetected) continue ;
      }

      /* ok, it is not a duplicate so we can accept it! */
      memcpy ((TYPE*)self->centers + dimension * k,
              data + dimension * perm[i],
              sizeof(TYPE) * dimension) ;
      k ++ ;
    }
    vl_free(distances) ;
    vl_free(perm) ;
  }
}
Пример #4
0
VL_EXPORT VlKDForest *
vl_kdforest_new (vl_type dataType,
                 vl_size dimension, vl_size numTrees)
{
  VlKDForest * self = vl_malloc (sizeof(VlKDForest)) ;

  assert(dataType == VL_TYPE_FLOAT || dataType == VL_TYPE_DOUBLE) ;
  assert(dimension >= 1) ;
  assert(numTrees >= 1) ;

  self -> rand = vl_get_rand () ;
  self -> dataType = dataType ;
  self -> numData = 0 ;
  self -> data = 0 ;
  self -> dimension = dimension ;
  self -> numTrees = numTrees ;
  self -> trees = 0 ;
  self -> thresholdingMethod = VL_KDTREE_MEDIAN ;
  self -> splitHeapSize = (numTrees == 1) ? 1 : VL_KDTREE_SPLIT_HEALP_SIZE ;
  self -> splitHeapNumNodes = 0 ;

  self -> searchHeapArray = 0 ;
  self -> searchHeapNumNodes = 0 ;

  self -> searchMaxNumComparisons = 0 ;
  self -> searchIdBook = 0 ;
  self -> searchId = 0 ;

  switch (self->dataType) {
    case VL_TYPE_FLOAT:
      self -> distanceFunction = (void(*)(void))
      vl_get_vector_comparison_function_f (VlDistanceL2) ;
      break ;
    case VL_TYPE_DOUBLE :
      self -> distanceFunction = (void(*)(void))
      vl_get_vector_comparison_function_d (VlDistanceL2) ;
      break ;
    default :
      abort() ;
  }

  return self ;
}
Пример #5
0
static double
VL_XCAT(_vl_kmeans_update_center_distances_, SFX)
(VlKMeans * self)
{
#if (FLT == VL_TYPE_FLOAT)
  VlFloatVectorComparisonFunction distFn = vl_get_vector_comparison_function_f(self->distance) ;
#else
  VlDoubleVectorComparisonFunction distFn = vl_get_vector_comparison_function_d(self->distance) ;
#endif

  if (! self->centerDistances) {
    self->centerDistances = vl_malloc (sizeof(TYPE) *
                                       self->numCenters *
                                       self->numCenters) ;
  }
  VL_XCAT(vl_eval_vector_comparison_on_all_pairs_, SFX)(self->centerDistances,
                                                        self->dimension,
                                                        self->centers, self->numCenters,
                                                        NULL, 0,
                                                        distFn) ;
  return self->numCenters * (self->numCenters - 1) / 2 ;
}
Пример #6
0
VlKDForest *
vl_kdforest_new (vl_type dataType,
                 vl_size dimension, vl_size numTrees, VlVectorComparisonType distance)
{
  VlKDForest * self = vl_calloc (sizeof(VlKDForest), 1) ;

  assert(dataType == VL_TYPE_FLOAT || dataType == VL_TYPE_DOUBLE) ;
  assert(dimension >= 1) ;
  assert(numTrees >= 1) ;

  self -> rand = vl_get_rand () ;
  self -> dataType = dataType ;
  self -> numData = 0 ;
  self -> data = 0 ;
  self -> dimension = dimension ;
  self -> numTrees = numTrees ;
  self -> trees = 0 ;
  self -> thresholdingMethod = VL_KDTREE_MEDIAN ;
  self -> splitHeapSize = VL_MIN(numTrees, VL_KDTREE_SPLIT_HEAP_SIZE) ;
  self -> splitHeapNumNodes = 0 ;
  self -> distance = distance;
  self -> maxNumNodes = 0 ;

  switch (self->dataType) {
    case VL_TYPE_FLOAT:
      self -> distanceFunction = (void(*)(void))
      vl_get_vector_comparison_function_f (distance) ;
      break;
    case VL_TYPE_DOUBLE :
      self -> distanceFunction = (void(*)(void))
      vl_get_vector_comparison_function_d (distance) ;
      break ;
    default :
      abort() ;
  }

  return self ;
}
Пример #7
0
static double
VL_XCAT(_vl_kmeans_refine_centers_elkan_, SFX)
(VlKMeans * self,
 TYPE const * data,
 vl_size numData)
{
  vl_size d, iteration, x ;
  vl_uint32 c, j ;
  vl_bool allDone ;
  TYPE * distances = vl_malloc (sizeof(TYPE) * numData) ;
  vl_uint32 * assignments = vl_malloc (sizeof(vl_uint32) * numData) ;
  vl_size * clusterMasses = vl_malloc (sizeof(vl_size) * numData) ;

#if (FLT == VL_TYPE_FLOAT)
    VlFloatVectorComparisonFunction distFn = vl_get_vector_comparison_function_f(self->distance) ;
#else
    VlDoubleVectorComparisonFunction distFn = vl_get_vector_comparison_function_d(self->distance) ;
#endif

  TYPE * nextCenterDistances = vl_malloc (sizeof(TYPE) * self->numCenters) ;
  TYPE * pointToClosestCenterUB = vl_malloc (sizeof(TYPE) * numData) ;
  vl_bool * pointToClosestCenterUBIsStrict = vl_malloc (sizeof(vl_bool) * numData) ;
  TYPE * pointToCenterLB = vl_malloc (sizeof(TYPE) * numData * self->numCenters) ;
  TYPE * newCenters = vl_malloc(sizeof(TYPE) * self->dimension * self->numCenters) ;
  TYPE * centerToNewCenterDistances = vl_malloc (sizeof(TYPE) * self->numCenters) ;

  vl_uint32 * permutations = NULL ;
  vl_size * numSeenSoFar = NULL ;

  double energy ;

  vl_size totDistanceComputationsToInit = 0 ;
  vl_size totDistanceComputationsToRefreshUB = 0 ;
  vl_size totDistanceComputationsToRefreshLB = 0 ;
  vl_size totDistanceComputationsToRefreshCenterDistances = 0 ;
  vl_size totDistanceComputationsToNewCenters = 0 ;
  vl_size totDistanceComputationsToFinalize = 0 ;

  if (self->distance == VlDistanceL1) {
    permutations = vl_malloc(sizeof(vl_uint32) * numData * self->dimension) ;
    numSeenSoFar = vl_malloc(sizeof(vl_size) * self->numCenters) ;
    VL_XCAT(_vl_kmeans_sort_data_helper_, SFX)(self, permutations, data, numData) ;
  }

  /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
  /*                          Initialization                        */
  /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */

  /* An iteration is: get_new_centers + reassign + get_energy.
     This counts as iteration 0, where get_new_centers is assumed
     to be performed before calling the train function by
     the initialization function */

  /* update distances between centers */
  totDistanceComputationsToInit +=
  VL_XCAT(_vl_kmeans_update_center_distances_, SFX)(self) ;

  /* assigmen points to the initial centers and initialize bounds */
  memset(pointToCenterLB, 0, sizeof(TYPE) * self->numCenters *  numData) ;
  for (x = 0 ; x < numData ; ++x) {
    TYPE distance ;

    /* do the first center */
    assignments[x] = 0 ;
    distance = distFn(self->dimension,
                      data + x * self->dimension,
                      (TYPE*)self->centers + 0) ;
    pointToClosestCenterUB[x] = distance ;
    pointToClosestCenterUBIsStrict[x] = VL_TRUE ;
    pointToCenterLB[0 + x * self->numCenters] = distance ;
    totDistanceComputationsToInit += 1 ;

    /* do other centers */
    for (c = 1 ; c < self->numCenters ; ++c) {

      /* Can skip if the center assigned so far is twice as close
         as its distance to the center under consideration */

      if (((self->distance == VlDistanceL1) ? 2.0 : 4.0) *
          pointToClosestCenterUB[x] <=
          ((TYPE*)self->centerDistances)
          [c + assignments[x] * self->numCenters]) {
        continue ;
      }

      distance = distFn(self->dimension,
                        data + x * self->dimension,
                        (TYPE*)self->centers + c * self->dimension) ;
      pointToCenterLB[c + x * self->numCenters] = distance ;
      totDistanceComputationsToInit += 1 ;
      if (distance < pointToClosestCenterUB[x]) {
        pointToClosestCenterUB[x] = distance ;
        assignments[x] = c ;
      }
    }
  }

  /* compute UB on energy */
  energy = 0 ;
  for (x = 0 ; x < numData ; ++x) {
    energy += pointToClosestCenterUB[x] ;
  }

  if (self->verbosity) {
    VL_PRINTF("kmeans: Elkan iter 0: energy = %g, dist. calc. = %d\n",
              energy, totDistanceComputationsToInit) ;
  }

/* #define SANITY*/
#ifdef SANITY
  {
    int xx ; int cc ;
    TYPE tol = 1e-5 ;
    VL_PRINTF("inconsistencies after initial assignments:\n");
    for (xx = 0 ; xx < numData ; ++xx) {
      for (cc = 0 ; cc < self->numCenters ; ++cc) {
        TYPE a = pointToCenterLB[cc + xx * self->numCenters] ;
        TYPE b = distFn(self->dimension,
                        data + self->dimension * xx,
                        (TYPE*)self->centers + self->dimension * cc) ;
        if (cc == assignments[xx]) {
          TYPE z = pointToClosestCenterUB[xx] ;
          if (z+tol<b) VL_PRINTF("UB %d %d = %f < %f\n",
                             cc, xx, z, b) ;
        }
        if (a>b+tol) VL_PRINTF("LB %d %d = %f  > %f\n",
                           cc, xx, a, b) ;
      }
    }
  }
#endif

  /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
  /*                          Iterations                            */
  /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */

  for (iteration = 1 ; 1; ++iteration) {

    vl_size numDistanceComputationsToRefreshUB = 0 ;
    vl_size numDistanceComputationsToRefreshLB = 0 ;
    vl_size numDistanceComputationsToRefreshCenterDistances = 0 ;
    vl_size numDistanceComputationsToNewCenters = 0 ;

    /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
    /*                         Compute new centers                  */
    /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */

    memset(clusterMasses, 0, sizeof(vl_size) * numData) ;
    for (x = 0 ; x < numData ; ++x) {
      clusterMasses[assignments[x]] ++ ;
    }

    switch (self->distance) {
      case VlDistanceL2:
        memset(newCenters, 0, sizeof(TYPE) * self->dimension * self->numCenters) ;
        for (x = 0 ; x < numData ; ++x) {
          TYPE * cpt = newCenters + assignments[x] * self->dimension ;
          TYPE const * xpt = data + x * self->dimension ;
          for (d = 0 ; d < self->dimension ; ++d) { cpt[d] += xpt[d] ; }
        }
        for (c = 0 ; c < self->numCenters ; ++c) {
          TYPE mass = clusterMasses[c] ;
          TYPE * cpt = newCenters + c * self->dimension ;
          for (d = 0 ; d < self->dimension ; ++d) { cpt[d] /= mass ; }
        }
        break ;
      case VlDistanceL1:
        for (d = 0 ; d < self->dimension ; ++d) {
          vl_uint32 * perm = permutations + d * numData ;
          memset(numSeenSoFar, 0, sizeof(vl_size) * self->numCenters) ;
          for (x = 0; x < numData ; ++x) {
            c = assignments[perm[x]] ;
            if (2 * numSeenSoFar[c] < clusterMasses[c]) {
              newCenters [d + c * self->dimension] =
              data [d + perm[x] * self->dimension] ;
            }
            numSeenSoFar[c] ++ ;
          }
        }
        break ;
      default:
        abort();
    } /* done compute centers */

    /* compute the distance from the old centers to the new centers */
    for (c = 0 ; c < self->numCenters ; ++c) {
      TYPE distance = distFn(self->dimension,
                             newCenters + c * self->dimension,
                             (TYPE*)self->centers + c * self->dimension) ;
      centerToNewCenterDistances[c] = distance ;
      numDistanceComputationsToNewCenters += 1 ;
    }

    /* make the new centers current */
    {
      TYPE * tmp = self->centers ;
      self->centers = newCenters ;
      newCenters = tmp ;
    }

    /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
    /*                Reassign points to a centers                  */
    /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */

    /*
     Update distances between centers.
     */
    numDistanceComputationsToRefreshCenterDistances
    += VL_XCAT(_vl_kmeans_update_center_distances_, SFX)(self) ;

    for (c = 0 ; c < self->numCenters ; ++c) {
      nextCenterDistances[c] = (TYPE) VL_INFINITY_D ;
      for (j = 0 ; j < self->numCenters ; ++j) {
        if (j == c) continue ;
        nextCenterDistances[c] = VL_MIN(nextCenterDistances[c],
                                        ((TYPE*)self->centerDistances)
                                        [j + c * self->numCenters]) ;
      }
    }

    /*
     Update upper bounds on point-to-closest-center distances
     based on the center variation.
     */
    for (x = 0 ; x < numData ; ++x) {
      TYPE a = pointToClosestCenterUB[x] ;
      TYPE b = centerToNewCenterDistances[assignments[x]] ;
      if (self->distance == VlDistanceL1) {
        pointToClosestCenterUB[x] = a + b ;
      } else {
#if (FLT == VL_TYPE_FLOAT)
        TYPE sqrtab =  sqrtf (a * b) ;
#else
        TYPE sqrtab =  sqrt (a * b) ;
#endif
        pointToClosestCenterUB[x] = a + b + 2.0 * sqrtab ;
      }
      pointToClosestCenterUBIsStrict[x] = VL_FALSE ;
    }

    /*
     Update lower bounds on point-to-center distances
     based on the center variation.
     */
    for (x = 0 ; x < numData ; ++x) {
      for (c = 0 ; c < self->numCenters ; ++c) {
        TYPE a = pointToCenterLB[c + x * self->numCenters] ;
        TYPE b = centerToNewCenterDistances[c] ;
        if (a < b) {
          pointToCenterLB[c + x * self->numCenters] = 0 ;
        } else {
          if (self->distance == VlDistanceL1) {
             pointToCenterLB[c + x * self->numCenters]  = a - b ;
          } else {
#if (FLT == VL_TYPE_FLOAT)
            TYPE sqrtab =  sqrtf (a * b) ;
#else
            TYPE sqrtab =  sqrt (a * b) ;
#endif
             pointToCenterLB[c + x * self->numCenters]  = a + b - 2.0 * sqrtab ;
          }
        }
      }
    }

   #ifdef SANITY
    {
      int xx ; int cc ;
      TYPE tol = 1e-5 ;
      VL_PRINTF("inconsistencies before assignments:\n");
      for (xx = 0 ; xx < numData ; ++xx) {
        for (cc = 0 ; cc < self->numCenters ; ++cc) {
          TYPE a = pointToCenterLB[cc + xx * self->numCenters] ;
          TYPE b = distFn(self->dimension,
                          data + self->dimension * xx,
                          (TYPE*)self->centers + self->dimension * cc) ;
          if (cc == assignments[xx]) {
            TYPE z = pointToClosestCenterUB[xx] ;
            if (z+tol<b) VL_PRINTF("UB %d %d = %f < %f\n",
                                            cc, xx, z, b) ;
          }
          if (a>b+tol) VL_PRINTF("LB %d %d = %f  > %f (assign = %d)\n",
                                          cc, xx, a, b, assignments[xx]) ;
        }
      }
    }
#endif

    /*
     Scan the data and to the reassignments. Use the bounds to
     skip as many point-to-center distance calculations as possible.
     */
    for (allDone = VL_TRUE, x = 0 ; x < numData ; ++x) {
      /*
       A point x sticks with its current center assignmets[x]
       the UB to d(x, c[assigmnets[x]]) is not larger than half
       the distance of c[assigments[x]] to any other center c.
       */
      if (((self->distance == VlDistanceL1) ? 2.0 : 4.0) *
          pointToClosestCenterUB[x] <= nextCenterDistances[assignments[x]]) {
        continue ;
      }

      for (c = 0 ; c < self->numCenters ; ++c) {
        vl_uint32 cx = assignments[x] ;
        TYPE distance ;

        /* The point is not reassigned to a given center c
         if either:

         0 - c is already the assigned center
         1 - The UB of d(x, c[assignments[x]]) is smaller than half
             the distance of c[assigments[x]] to c, OR
         2 - The UB of d(x, c[assignmets[x]]) is smaller than the
             LB of the distance of x to c.
         */
        if (cx == c) {
          continue ;
        }
        if (((self->distance == VlDistanceL1) ? 2.0 : 4.0) *
            pointToClosestCenterUB[x] <= ((TYPE*)self->centerDistances)
            [c + cx * self->numCenters]) {
          continue ;
        }
        if (pointToClosestCenterUB[x] <= pointToCenterLB
            [c + x * self->numCenters]) {
          continue ;
        }

        /* If the UB is loose, try recomputing it and test again */
        if (! pointToClosestCenterUBIsStrict[x]) {
          distance = distFn(self->dimension,
                            data + self->dimension * x,
                            (TYPE*)self->centers + self->dimension * cx) ;
          pointToClosestCenterUB[x] = distance ;
          pointToClosestCenterUBIsStrict[x] = VL_TRUE ;
          pointToCenterLB[cx + x * self->numCenters] = distance ;
          numDistanceComputationsToRefreshUB += 1 ;

          if (((self->distance == VlDistanceL1) ? 2.0 : 4.0) *
              pointToClosestCenterUB[x] <= ((TYPE*)self->centerDistances)
              [c + cx * self->numCenters]) {
            continue ;
          }
          if (pointToClosestCenterUB[x] <= pointToCenterLB
              [c + x * self->numCenters]) {
            continue ;
          }
        }

        /*
         Now the UB is strict (equal to d(x, assignments[x])), but
         we still could not exclude that x should be reassigned to
         c. We therefore compute the distance, update the LB,
         and check if a reassigmnet must be made
         */
        distance = distFn(self->dimension,
                          data + x * self->dimension,
                          (TYPE*)self->centers + c *  self->dimension) ;
        numDistanceComputationsToRefreshLB += 1 ;
        pointToCenterLB[c + x * self->numCenters] = distance ;

        if (distance < pointToClosestCenterUB[x]) {
          assignments[x] = c ;
          pointToClosestCenterUB[x] = distance ;
          allDone = VL_FALSE ;
          /* the UB strict flag is already set here */
        }

      } /* assign center */
    } /* next data point */

    totDistanceComputationsToRefreshUB
    += numDistanceComputationsToRefreshUB ;

    totDistanceComputationsToRefreshLB
    += numDistanceComputationsToRefreshLB ;

    totDistanceComputationsToRefreshCenterDistances
    += numDistanceComputationsToRefreshCenterDistances ;

    totDistanceComputationsToNewCenters
    += numDistanceComputationsToNewCenters ;

#ifdef SANITY
    {
      int xx ; int cc ;
      TYPE tol = 1e-5 ;
      VL_PRINTF("inconsistencies after assignments:\n");
      for (xx = 0 ; xx < numData ; ++xx) {
        for (cc = 0 ; cc < self->numCenters ; ++cc) {
          TYPE a = pointToCenterLB[cc + xx * self->numCenters] ;
          TYPE b = distFn(self->dimension,
                          data + self->dimension * xx,
                          (TYPE*)self->centers + self->dimension * cc) ;
          if (cc == assignments[xx]) {
            TYPE z = pointToClosestCenterUB[xx] ;
            if (z+tol<b) VL_PRINTF("UB %d %d = %f < %f\n",
                               cc, xx, z, b) ;
          }
          if (a>b+tol) VL_PRINTF("LB %d %d = %f  > %f (assign = %d)\n",
                             cc, xx, a, b, assignments[xx]) ;
        }
      }
    }
#endif

    /* compute UB on energy */
    energy = 0 ;
    for (x = 0 ; x < numData ; ++x) {
      energy += pointToClosestCenterUB[x] ;
    }

    if (self->verbosity) {
      vl_size numDistanceComputations =
      numDistanceComputationsToRefreshUB +
      numDistanceComputationsToRefreshLB +
      numDistanceComputationsToRefreshCenterDistances +
      numDistanceComputationsToNewCenters ;
      VL_PRINTF("kmeans: Elkan iter %d: energy <= %g, dist. calc. = %d\n",
                iteration,
                energy,
                numDistanceComputations) ;
      if (self->verbosity > 1) {
        VL_PRINTF("kmeans: Elkan iter %d: total dist. calc. per type: "
                  "UB: %.1f%% (%d), LB: %.1f%% (%d), "
                  "intra_center: %.1f%% (%d), "
                  "new_center: %.1f%% (%d)\n",
                  iteration,
                  100.0 * numDistanceComputationsToRefreshUB / numDistanceComputations,
                  numDistanceComputationsToRefreshUB,
                  100.0 *numDistanceComputationsToRefreshLB / numDistanceComputations,
                  numDistanceComputationsToRefreshLB,
                  100.0 * numDistanceComputationsToRefreshCenterDistances / numDistanceComputations,
                  numDistanceComputationsToRefreshCenterDistances,
                  100.0 * numDistanceComputationsToNewCenters / numDistanceComputations,
                  numDistanceComputationsToNewCenters) ;
      }
    }

    /* check termination conditions */
    if (iteration >= self->maxNumIterations) {
      if (self->verbosity) {
        VL_PRINTF("kmeans: Elkan terminating because maximum number of iterations reached\n") ;
      }
      break ;
    }
    if (allDone) {
      if (self->verbosity) {
        VL_PRINTF("kmeans: Elkan terminating because the algorithm fully converged\n") ;
      }
      break ;
    }

  } /* next Elkan iteration */


  /* compute true energy */
  energy = 0 ;
  for (x = 0 ; x < numData ; ++ x) {
    vl_uindex cx = assignments [x] ;
    energy += distFn(self->dimension,
                     data + self->dimension * x,
                     (TYPE*)self->centers + self->dimension * cx) ;
    totDistanceComputationsToFinalize += 1 ;
  }

  {
    vl_size totDistanceComputations =
    totDistanceComputationsToInit +
    totDistanceComputationsToRefreshUB +
    totDistanceComputationsToRefreshLB +
    totDistanceComputationsToRefreshCenterDistances +
    totDistanceComputationsToNewCenters +
    totDistanceComputationsToFinalize ;

    double saving = (double)totDistanceComputations
    / (iteration * self->numCenters * numData) ;

    if (self->verbosity) {
      VL_PRINTF("kmeans: Elkan: total dist. calc.: %d (%.2f %% of Lloyd)\n",
                totDistanceComputations, saving * 100.0) ;
    }

    if (self->verbosity > 1) {
      VL_PRINTF("kmeans: Elkan: total dist. calc. per type: "
                "init: %.1f%% (%d), UB: %.1f%% (%d), LB: %.1f%% (%d), "
                "intra_center: %.1f%% (%d), "
                "new_center: %.1f%% (%d), "
                "finalize: %.1f%% (%d)\n",
                100.0 * totDistanceComputationsToInit / totDistanceComputations,
                totDistanceComputationsToInit,
                100.0 * totDistanceComputationsToRefreshUB / totDistanceComputations,
                totDistanceComputationsToRefreshUB,
                100.0 *totDistanceComputationsToRefreshLB / totDistanceComputations,
                totDistanceComputationsToRefreshLB,
                100.0 * totDistanceComputationsToRefreshCenterDistances / totDistanceComputations,
                totDistanceComputationsToRefreshCenterDistances,
                100.0 * totDistanceComputationsToNewCenters / totDistanceComputations,
                totDistanceComputationsToNewCenters,
                100.0 * totDistanceComputationsToFinalize / totDistanceComputations,
                totDistanceComputationsToFinalize) ;
    }
  }

  if (permutations) { vl_free(permutations) ; }
  if (numSeenSoFar) { vl_free(numSeenSoFar) ; }

  vl_free(distances) ;
  vl_free(assignments) ;
  vl_free(clusterMasses) ;

  vl_free(nextCenterDistances) ;
  vl_free(pointToClosestCenterUB) ;
  vl_free(pointToClosestCenterUBIsStrict) ;
  vl_free(pointToCenterLB) ;
  vl_free(newCenters) ;
  vl_free(centerToNewCenterDistances) ;

  return energy ;
}
Пример #8
0
static void
VL_XCAT(_vl_kmeans_seed_centers_plus_plus_, SFX)
(VlKMeans * self,
 TYPE const * data,
 vl_size dimension,
 vl_size numData,
 vl_size numCenters)
{
  vl_uindex x, c ;
  VlRand * rand = vl_get_rand () ;
  TYPE * distances = vl_malloc (sizeof(TYPE) * numData) ;
  TYPE * minDistances = vl_malloc (sizeof(TYPE) * numData) ;
#if (FLT == VL_TYPE_FLOAT)
  VlFloatVectorComparisonFunction distFn = vl_get_vector_comparison_function_f(self->distance) ;
#else
  VlDoubleVectorComparisonFunction distFn = vl_get_vector_comparison_function_d(self->distance) ;
#endif

  self->dimension = dimension ;
  self->numCenters = numCenters ;
  self->centers = vl_malloc (sizeof(TYPE) * dimension * numCenters) ;

  for (x = 0 ; x < numData ; ++x) {
    minDistances[x] = (TYPE) VL_INFINITY_D ;
  }

  /* select the first point at random */
  x = vl_rand_uindex (rand, numData) ;
  c = 0 ;
  while (1) {
    TYPE energy = 0 ;
    TYPE acc = 0 ;
    TYPE thresh = (TYPE) vl_rand_real1 (rand) ;

    memcpy ((TYPE*)self->centers + c * dimension,
            data + x * dimension,
            sizeof(TYPE) * dimension) ;

    c ++ ;
    if (c == numCenters) break ;

    VL_XCAT(vl_eval_vector_comparison_on_all_pairs_, SFX)
    (distances,
     dimension,
     (TYPE*)self->centers + (c - 1) * dimension, 1,
     data, numData,
     distFn) ;

    for (x = 0 ; x < numData ; ++x) {
      minDistances[x] = VL_MIN(minDistances[x], distances[x]) ;
      energy += minDistances[x] ;
    }

    for (x = 0 ; x < numData - 1 ; ++x) {
      acc += minDistances[x] ;
      if (acc >= thresh * energy) break ;
    }
  }

  vl_free(distances) ;
  vl_free(minDistances) ;
}
Пример #9
0
/* driver */
void
mexFunction(int nout, mxArray *out[],
            int nin, const mxArray *in[])
{

  typedef int  unsigned data_t ;

  vl_bool autoComparison = VL_TRUE ;
  VlVectorComparisonType comparisonType = VlDistanceL2 ;

  enum {IN_X = 0, IN_Y} ;
  enum {OUT_D = 0} ;
  mwSize numDataX = 0 ;
  mwSize numDataY = 0 ;
  mwSize dimension ;
  mxClassID classId ;

  /* for option parsing */
  int opt ;
  int next ;
  mxArray const *optarg ;

  VL_USE_MATLAB_ENV ;

  if (nout > 1) {
    vlmxError(vlmxErrTooManyOutputArguments, NULL) ;
  }
  if (nin < 1) {
    vlmxError(vlmxErrNotEnoughInputArguments, NULL) ;
  }
  if (! (vlmxIsMatrix (in[IN_X],-1,-1) && vlmxIsReal(in[IN_X]))) {
    vlmxError(vlmxErrInvalidArgument, "X must be a real matrix.") ;
  }
  next = 1 ;
  classId = mxGetClassID(in[IN_X]) ;
  dimension = mxGetM(in[IN_X]) ;
  numDataX = mxGetN(in[IN_X]) ;

  if (nin > 1 && vlmxIsMatrix (in[IN_Y],-1,-1) && vlmxIsReal(in[IN_Y])) {
    next = 2 ;
    autoComparison = VL_FALSE ;
    numDataY = mxGetN(in[IN_Y]) ;
    if (mxGetClassID(in[IN_Y]) != classId) {
      vlmxError(vlmxErrInvalidArgument, "X and Y must have the same class.") ;
    }
    if (dimension != mxGetM(in[IN_Y])) {
      vlmxError(vlmxErrInvalidArgument, "X and Y must have the same number of rows.") ;
    }
  }

  if (classId != mxSINGLE_CLASS && classId != mxDOUBLE_CLASS) {
    vlmxError(vlmxErrInvalidArgument,
             "X must be either of class SINGLE or DOUBLE.");
  }

  while ((opt = vlmxNextOption (in, nin, options, &next, &optarg)) >= 0) {
    switch (opt) {
      case opt_L2    : comparisonType = VlDistanceL2 ; break ;
      case opt_L1    : comparisonType = VlDistanceL1 ; break ;
      case opt_CHI2  : comparisonType = VlDistanceChi2 ; break ;
      case opt_HELL  : comparisonType = VlDistanceHellinger ; break ;
      case opt_JS    : comparisonType = VlDistanceJS ; break ;
      case opt_KL2   : comparisonType = VlKernelL2 ; break ;
      case opt_KL1   : comparisonType = VlKernelL1 ; break ;
      case opt_KCHI2 : comparisonType = VlKernelChi2 ; break ;
      case opt_KHELL : comparisonType = VlKernelHellinger ; break ;
      case opt_KJS   : comparisonType = VlKernelJS ; break ;
      default:
        abort() ;
    }
  }

  /* allocate output */
  {
    mwSize dims [2] ;
    dims[0] = numDataX ;
    dims[1] = autoComparison ? numDataX : numDataY ;
    out[OUT_D] = mxCreateNumericArray (2, dims, classId, mxREAL) ;
  }

  /* If either numDataX or numDataY are null, their data pointers are
     null as well. This may confuse
     vl_eval_vector_comparison_on_all_pairs_*, so we intercept this as
     a special case. The same is true if dimension is null.
  */

  if (numDataX == 0 || (! autoComparison && numDataY == 0)) {
    return ;
  }
  if (dimension == 0) {
    return ;
  }

  /* make calculation */
  switch (classId) {
  case mxSINGLE_CLASS:
    {
      VlFloatVectorComparisonFunction f = vl_get_vector_comparison_function_f (comparisonType) ;
      if (autoComparison) {
        vl_eval_vector_comparison_on_all_pairs_f ((float*)mxGetData(out[OUT_D]),
                                                  dimension,
                                                  (float*)mxGetData(in[IN_X]), numDataX,
                                                  0, 0,
                                                  f) ;
      } else {
        vl_eval_vector_comparison_on_all_pairs_f ((float*)mxGetData(out[OUT_D]),
                                                  dimension,
                                                  (float*)mxGetData(in[IN_X]), numDataX,
                                                  (float*)mxGetData(in[IN_Y]), numDataY,
                                                  f) ;
      }
    }
    break ;

    case mxDOUBLE_CLASS:
    {
      VlDoubleVectorComparisonFunction f = vl_get_vector_comparison_function_d (comparisonType) ;
      if (autoComparison) {
        vl_eval_vector_comparison_on_all_pairs_d ((double*)mxGetData(out[OUT_D]),
                                                  dimension,
                                                  (double*)mxGetData(in[IN_X]), numDataX,
                                                  0, 0,
                                                  f) ;
      } else {
        vl_eval_vector_comparison_on_all_pairs_d ((double*)mxGetData(out[OUT_D]),
                                                  dimension,
                                                  (double*)mxGetData(in[IN_X]), numDataX,
                                                  (double*)mxGetData(in[IN_Y]), numDataY,
                                                  f) ;
      }
    }
    break ;

  default:
    abort() ;
  }
}