Exemplo n.º 1
0
void
VL_XCAT(vl_svmdataset_accumulate_hom_,SFX)(VlSvmDataset const *self,
                                           vl_uindex element,
                                           double *model,
                                           const double multiplier)
{
  T* data = ((T*)self->data) + self->dimension * element ;
  T* end = data + self->dimension ;
  T* bufEnd = ((T*)self->homBuffer)+ self->homDimension ;
  while (data != end) {
    /* TODO: zeros in data could be optimized by skipping over them */
    T* buf = self->homBuffer ;
    VL_XCAT(vl_homogeneouskernelmap_evaluate_,SFX)(self->hom,
                                                   self->homBuffer,
                                                   1,
                                                   (*data++)) ;
    while (buf != bufEnd) {
      *model += (*buf++) * multiplier ;
      model++ ;
    }
  }
}
Exemplo n.º 2
0
static double
VL_XCAT(_vl_kmeans_update_center_distances_, SFX)
(VlKMeans * self)
{
#if (FLT == VL_TYPE_FLOAT)
  VlFloatVectorComparisonFunction distFn = vl_get_vector_comparison_function_f(self->distance) ;
#else
  VlDoubleVectorComparisonFunction distFn = vl_get_vector_comparison_function_d(self->distance) ;
#endif

  if (! self->centerDistances) {
    self->centerDistances = vl_malloc (sizeof(TYPE) *
                                       self->numCenters *
                                       self->numCenters) ;
  }
  VL_XCAT(vl_eval_vector_comparison_on_all_pairs_, SFX)(self->centerDistances,
                                                        self->dimension,
                                                        self->centers, self->numCenters,
                                                        NULL, 0,
                                                        distFn) ;
  return self->numCenters * (self->numCenters - 1) / 2 ;
}
Exemplo n.º 3
0
double
VL_XCAT(_vl_svmdataset_inner_product_hom_,SFX) (VlSvmDataset const *self,
                                                vl_uindex element,
                                                double const *model)
{
  double product = 0 ;
  T* data = ((T*)self->data) + self->dimension * element ;
  T* end = data + self->dimension ;
  T* bufEnd = ((T*)self->homBuffer)+ self->homDimension ;
  while (data != end) {
    /* TODO: zeros in data could be optimized by skipping over them */
    T* buf = self->homBuffer ;
    VL_XCAT(vl_homogeneouskernelmap_evaluate_,SFX)(self->hom,
                                                   self->homBuffer,
                                                   1,
                                                   (*data++)) ;
    while (buf != bufEnd) {
      product += (*buf++) * (*model++) ;
    }
  }
  return product ;
}
Exemplo n.º 4
0
static void
VL_XCAT(_vl_fisher_encode_, SFX)
(TYPE * enc,
 TYPE const * means, vl_size dimension, vl_size numClusters,
 TYPE const * covariances,
 TYPE const * priors,
 TYPE const * data, vl_size numData,
 int flags)
{
  vl_size dim;
  vl_index i_cl, i_d;
  TYPE * posteriors ;
  TYPE * sqrtInvSigma;

  posteriors = vl_malloc(sizeof(TYPE) * numClusters * numData);
  sqrtInvSigma = vl_malloc(sizeof(TYPE) * dimension * numClusters);

  memset(enc, 0, sizeof(TYPE) * 2 * dimension * numClusters) ;

  for (i_cl = 0 ; i_cl < (signed)numClusters ; ++i_cl) {
    for(dim = 0; dim < dimension; dim++) {
      sqrtInvSigma[i_cl*dimension + dim] = sqrt(1.0 / covariances[i_cl*dimension + dim]);
    }
  }

  VL_XCAT(vl_get_gmm_data_posteriors_, SFX)(posteriors, numClusters, numData,
                                            priors,
                                            means, dimension,
                                            covariances,
                                            data) ;

#if defined(_OPENMP)
#pragma omp parallel for default(shared) private(i_cl, i_d, dim) num_threads(vl_get_max_threads())
#endif
  for(i_cl = 0; i_cl < (signed)numClusters; ++ i_cl) {
    TYPE uprefix;
    TYPE vprefix;

    TYPE * uk = enc + i_cl*dimension ;
    TYPE * vk = enc + i_cl*dimension + numClusters * dimension ;

    if (priors[i_cl] < 1e-6) { continue ; }

    for(i_d = 0; i_d < (signed)numData; i_d++) {
      TYPE p = posteriors[i_cl + i_d * numClusters] ;
      if (p == 0) continue ;
      for(dim = 0; dim < dimension; dim++) {
        TYPE diff = data[i_d*dimension + dim] - means[i_cl*dimension + dim] ;
        diff *= sqrtInvSigma[i_cl*dimension + dim] ;
        *(uk + dim) += p * diff ;
        *(vk + dim) += p * (diff * diff - 1);
      }
    }

    uprefix = 1/(numData*sqrt(priors[i_cl]));
    vprefix = 1/(numData*sqrt(2*priors[i_cl]));

    for(dim = 0; dim < dimension; dim++) {
      *(uk + dim) = *(uk + dim) * uprefix;
      *(vk + dim) = *(vk + dim) * vprefix;
    }
  }

  vl_free(posteriors);
  vl_free(sqrtInvSigma) ;

  if (flags & VL_FISHER_FLAG_SQUARE_ROOT) {
    for(dim = 0; dim < 2 * dimension * numClusters ; dim++) {
      TYPE z = enc [dim] ;
      if (z >= 0) {
        enc[dim] = VL_XCAT(vl_sqrt_, SFX)(z) ;
      } else {
        enc[dim] = - VL_XCAT(vl_sqrt_, SFX)(- z) ;
      }
    }
  }

  if (flags & VL_FISHER_FLAG_NORMALIZED) {
    TYPE n = 0 ;
    for(dim = 0 ; dim < 2 * dimension * numClusters ; dim++) {
      TYPE z = enc [dim] ;
      n += z * z ;
    }
    n = VL_XCAT(vl_sqrt_, SFX)(n) ;
    n = VL_MAX(n, 1e-12) ;
    for(dim = 0 ; dim < 2 * dimension * numClusters ; dim++) {
      enc[dim] /= n ;
    }
  }
}
Exemplo n.º 5
0
static double
VL_XCAT(_vl_kmeans_refine_centers_elkan_, SFX)
(VlKMeans * self,
 TYPE const * data,
 vl_size numData)
{
  vl_size d, iteration, x ;
  vl_uint32 c, j ;
  vl_bool allDone ;
  TYPE * distances = vl_malloc (sizeof(TYPE) * numData) ;
  vl_uint32 * assignments = vl_malloc (sizeof(vl_uint32) * numData) ;
  vl_size * clusterMasses = vl_malloc (sizeof(vl_size) * numData) ;

#if (FLT == VL_TYPE_FLOAT)
    VlFloatVectorComparisonFunction distFn = vl_get_vector_comparison_function_f(self->distance) ;
#else
    VlDoubleVectorComparisonFunction distFn = vl_get_vector_comparison_function_d(self->distance) ;
#endif

  TYPE * nextCenterDistances = vl_malloc (sizeof(TYPE) * self->numCenters) ;
  TYPE * pointToClosestCenterUB = vl_malloc (sizeof(TYPE) * numData) ;
  vl_bool * pointToClosestCenterUBIsStrict = vl_malloc (sizeof(vl_bool) * numData) ;
  TYPE * pointToCenterLB = vl_malloc (sizeof(TYPE) * numData * self->numCenters) ;
  TYPE * newCenters = vl_malloc(sizeof(TYPE) * self->dimension * self->numCenters) ;
  TYPE * centerToNewCenterDistances = vl_malloc (sizeof(TYPE) * self->numCenters) ;

  vl_uint32 * permutations = NULL ;
  vl_size * numSeenSoFar = NULL ;

  double energy ;

  vl_size totDistanceComputationsToInit = 0 ;
  vl_size totDistanceComputationsToRefreshUB = 0 ;
  vl_size totDistanceComputationsToRefreshLB = 0 ;
  vl_size totDistanceComputationsToRefreshCenterDistances = 0 ;
  vl_size totDistanceComputationsToNewCenters = 0 ;
  vl_size totDistanceComputationsToFinalize = 0 ;

  if (self->distance == VlDistanceL1) {
    permutations = vl_malloc(sizeof(vl_uint32) * numData * self->dimension) ;
    numSeenSoFar = vl_malloc(sizeof(vl_size) * self->numCenters) ;
    VL_XCAT(_vl_kmeans_sort_data_helper_, SFX)(self, permutations, data, numData) ;
  }

  /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
  /*                          Initialization                        */
  /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */

  /* An iteration is: get_new_centers + reassign + get_energy.
     This counts as iteration 0, where get_new_centers is assumed
     to be performed before calling the train function by
     the initialization function */

  /* update distances between centers */
  totDistanceComputationsToInit +=
  VL_XCAT(_vl_kmeans_update_center_distances_, SFX)(self) ;

  /* assigmen points to the initial centers and initialize bounds */
  memset(pointToCenterLB, 0, sizeof(TYPE) * self->numCenters *  numData) ;
  for (x = 0 ; x < numData ; ++x) {
    TYPE distance ;

    /* do the first center */
    assignments[x] = 0 ;
    distance = distFn(self->dimension,
                      data + x * self->dimension,
                      (TYPE*)self->centers + 0) ;
    pointToClosestCenterUB[x] = distance ;
    pointToClosestCenterUBIsStrict[x] = VL_TRUE ;
    pointToCenterLB[0 + x * self->numCenters] = distance ;
    totDistanceComputationsToInit += 1 ;

    /* do other centers */
    for (c = 1 ; c < self->numCenters ; ++c) {

      /* Can skip if the center assigned so far is twice as close
         as its distance to the center under consideration */

      if (((self->distance == VlDistanceL1) ? 2.0 : 4.0) *
          pointToClosestCenterUB[x] <=
          ((TYPE*)self->centerDistances)
          [c + assignments[x] * self->numCenters]) {
        continue ;
      }

      distance = distFn(self->dimension,
                        data + x * self->dimension,
                        (TYPE*)self->centers + c * self->dimension) ;
      pointToCenterLB[c + x * self->numCenters] = distance ;
      totDistanceComputationsToInit += 1 ;
      if (distance < pointToClosestCenterUB[x]) {
        pointToClosestCenterUB[x] = distance ;
        assignments[x] = c ;
      }
    }
  }

  /* compute UB on energy */
  energy = 0 ;
  for (x = 0 ; x < numData ; ++x) {
    energy += pointToClosestCenterUB[x] ;
  }

  if (self->verbosity) {
    VL_PRINTF("kmeans: Elkan iter 0: energy = %g, dist. calc. = %d\n",
              energy, totDistanceComputationsToInit) ;
  }

/* #define SANITY*/
#ifdef SANITY
  {
    int xx ; int cc ;
    TYPE tol = 1e-5 ;
    VL_PRINTF("inconsistencies after initial assignments:\n");
    for (xx = 0 ; xx < numData ; ++xx) {
      for (cc = 0 ; cc < self->numCenters ; ++cc) {
        TYPE a = pointToCenterLB[cc + xx * self->numCenters] ;
        TYPE b = distFn(self->dimension,
                        data + self->dimension * xx,
                        (TYPE*)self->centers + self->dimension * cc) ;
        if (cc == assignments[xx]) {
          TYPE z = pointToClosestCenterUB[xx] ;
          if (z+tol<b) VL_PRINTF("UB %d %d = %f < %f\n",
                             cc, xx, z, b) ;
        }
        if (a>b+tol) VL_PRINTF("LB %d %d = %f  > %f\n",
                           cc, xx, a, b) ;
      }
    }
  }
#endif

  /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
  /*                          Iterations                            */
  /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */

  for (iteration = 1 ; 1; ++iteration) {

    vl_size numDistanceComputationsToRefreshUB = 0 ;
    vl_size numDistanceComputationsToRefreshLB = 0 ;
    vl_size numDistanceComputationsToRefreshCenterDistances = 0 ;
    vl_size numDistanceComputationsToNewCenters = 0 ;

    /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
    /*                         Compute new centers                  */
    /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */

    memset(clusterMasses, 0, sizeof(vl_size) * numData) ;
    for (x = 0 ; x < numData ; ++x) {
      clusterMasses[assignments[x]] ++ ;
    }

    switch (self->distance) {
      case VlDistanceL2:
        memset(newCenters, 0, sizeof(TYPE) * self->dimension * self->numCenters) ;
        for (x = 0 ; x < numData ; ++x) {
          TYPE * cpt = newCenters + assignments[x] * self->dimension ;
          TYPE const * xpt = data + x * self->dimension ;
          for (d = 0 ; d < self->dimension ; ++d) { cpt[d] += xpt[d] ; }
        }
        for (c = 0 ; c < self->numCenters ; ++c) {
          TYPE mass = clusterMasses[c] ;
          TYPE * cpt = newCenters + c * self->dimension ;
          for (d = 0 ; d < self->dimension ; ++d) { cpt[d] /= mass ; }
        }
        break ;
      case VlDistanceL1:
        for (d = 0 ; d < self->dimension ; ++d) {
          vl_uint32 * perm = permutations + d * numData ;
          memset(numSeenSoFar, 0, sizeof(vl_size) * self->numCenters) ;
          for (x = 0; x < numData ; ++x) {
            c = assignments[perm[x]] ;
            if (2 * numSeenSoFar[c] < clusterMasses[c]) {
              newCenters [d + c * self->dimension] =
              data [d + perm[x] * self->dimension] ;
            }
            numSeenSoFar[c] ++ ;
          }
        }
        break ;
      default:
        abort();
    } /* done compute centers */

    /* compute the distance from the old centers to the new centers */
    for (c = 0 ; c < self->numCenters ; ++c) {
      TYPE distance = distFn(self->dimension,
                             newCenters + c * self->dimension,
                             (TYPE*)self->centers + c * self->dimension) ;
      centerToNewCenterDistances[c] = distance ;
      numDistanceComputationsToNewCenters += 1 ;
    }

    /* make the new centers current */
    {
      TYPE * tmp = self->centers ;
      self->centers = newCenters ;
      newCenters = tmp ;
    }

    /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
    /*                Reassign points to a centers                  */
    /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */

    /*
     Update distances between centers.
     */
    numDistanceComputationsToRefreshCenterDistances
    += VL_XCAT(_vl_kmeans_update_center_distances_, SFX)(self) ;

    for (c = 0 ; c < self->numCenters ; ++c) {
      nextCenterDistances[c] = (TYPE) VL_INFINITY_D ;
      for (j = 0 ; j < self->numCenters ; ++j) {
        if (j == c) continue ;
        nextCenterDistances[c] = VL_MIN(nextCenterDistances[c],
                                        ((TYPE*)self->centerDistances)
                                        [j + c * self->numCenters]) ;
      }
    }

    /*
     Update upper bounds on point-to-closest-center distances
     based on the center variation.
     */
    for (x = 0 ; x < numData ; ++x) {
      TYPE a = pointToClosestCenterUB[x] ;
      TYPE b = centerToNewCenterDistances[assignments[x]] ;
      if (self->distance == VlDistanceL1) {
        pointToClosestCenterUB[x] = a + b ;
      } else {
#if (FLT == VL_TYPE_FLOAT)
        TYPE sqrtab =  sqrtf (a * b) ;
#else
        TYPE sqrtab =  sqrt (a * b) ;
#endif
        pointToClosestCenterUB[x] = a + b + 2.0 * sqrtab ;
      }
      pointToClosestCenterUBIsStrict[x] = VL_FALSE ;
    }

    /*
     Update lower bounds on point-to-center distances
     based on the center variation.
     */
    for (x = 0 ; x < numData ; ++x) {
      for (c = 0 ; c < self->numCenters ; ++c) {
        TYPE a = pointToCenterLB[c + x * self->numCenters] ;
        TYPE b = centerToNewCenterDistances[c] ;
        if (a < b) {
          pointToCenterLB[c + x * self->numCenters] = 0 ;
        } else {
          if (self->distance == VlDistanceL1) {
             pointToCenterLB[c + x * self->numCenters]  = a - b ;
          } else {
#if (FLT == VL_TYPE_FLOAT)
            TYPE sqrtab =  sqrtf (a * b) ;
#else
            TYPE sqrtab =  sqrt (a * b) ;
#endif
             pointToCenterLB[c + x * self->numCenters]  = a + b - 2.0 * sqrtab ;
          }
        }
      }
    }

   #ifdef SANITY
    {
      int xx ; int cc ;
      TYPE tol = 1e-5 ;
      VL_PRINTF("inconsistencies before assignments:\n");
      for (xx = 0 ; xx < numData ; ++xx) {
        for (cc = 0 ; cc < self->numCenters ; ++cc) {
          TYPE a = pointToCenterLB[cc + xx * self->numCenters] ;
          TYPE b = distFn(self->dimension,
                          data + self->dimension * xx,
                          (TYPE*)self->centers + self->dimension * cc) ;
          if (cc == assignments[xx]) {
            TYPE z = pointToClosestCenterUB[xx] ;
            if (z+tol<b) VL_PRINTF("UB %d %d = %f < %f\n",
                                            cc, xx, z, b) ;
          }
          if (a>b+tol) VL_PRINTF("LB %d %d = %f  > %f (assign = %d)\n",
                                          cc, xx, a, b, assignments[xx]) ;
        }
      }
    }
#endif

    /*
     Scan the data and to the reassignments. Use the bounds to
     skip as many point-to-center distance calculations as possible.
     */
    for (allDone = VL_TRUE, x = 0 ; x < numData ; ++x) {
      /*
       A point x sticks with its current center assignmets[x]
       the UB to d(x, c[assigmnets[x]]) is not larger than half
       the distance of c[assigments[x]] to any other center c.
       */
      if (((self->distance == VlDistanceL1) ? 2.0 : 4.0) *
          pointToClosestCenterUB[x] <= nextCenterDistances[assignments[x]]) {
        continue ;
      }

      for (c = 0 ; c < self->numCenters ; ++c) {
        vl_uint32 cx = assignments[x] ;
        TYPE distance ;

        /* The point is not reassigned to a given center c
         if either:

         0 - c is already the assigned center
         1 - The UB of d(x, c[assignments[x]]) is smaller than half
             the distance of c[assigments[x]] to c, OR
         2 - The UB of d(x, c[assignmets[x]]) is smaller than the
             LB of the distance of x to c.
         */
        if (cx == c) {
          continue ;
        }
        if (((self->distance == VlDistanceL1) ? 2.0 : 4.0) *
            pointToClosestCenterUB[x] <= ((TYPE*)self->centerDistances)
            [c + cx * self->numCenters]) {
          continue ;
        }
        if (pointToClosestCenterUB[x] <= pointToCenterLB
            [c + x * self->numCenters]) {
          continue ;
        }

        /* If the UB is loose, try recomputing it and test again */
        if (! pointToClosestCenterUBIsStrict[x]) {
          distance = distFn(self->dimension,
                            data + self->dimension * x,
                            (TYPE*)self->centers + self->dimension * cx) ;
          pointToClosestCenterUB[x] = distance ;
          pointToClosestCenterUBIsStrict[x] = VL_TRUE ;
          pointToCenterLB[cx + x * self->numCenters] = distance ;
          numDistanceComputationsToRefreshUB += 1 ;

          if (((self->distance == VlDistanceL1) ? 2.0 : 4.0) *
              pointToClosestCenterUB[x] <= ((TYPE*)self->centerDistances)
              [c + cx * self->numCenters]) {
            continue ;
          }
          if (pointToClosestCenterUB[x] <= pointToCenterLB
              [c + x * self->numCenters]) {
            continue ;
          }
        }

        /*
         Now the UB is strict (equal to d(x, assignments[x])), but
         we still could not exclude that x should be reassigned to
         c. We therefore compute the distance, update the LB,
         and check if a reassigmnet must be made
         */
        distance = distFn(self->dimension,
                          data + x * self->dimension,
                          (TYPE*)self->centers + c *  self->dimension) ;
        numDistanceComputationsToRefreshLB += 1 ;
        pointToCenterLB[c + x * self->numCenters] = distance ;

        if (distance < pointToClosestCenterUB[x]) {
          assignments[x] = c ;
          pointToClosestCenterUB[x] = distance ;
          allDone = VL_FALSE ;
          /* the UB strict flag is already set here */
        }

      } /* assign center */
    } /* next data point */

    totDistanceComputationsToRefreshUB
    += numDistanceComputationsToRefreshUB ;

    totDistanceComputationsToRefreshLB
    += numDistanceComputationsToRefreshLB ;

    totDistanceComputationsToRefreshCenterDistances
    += numDistanceComputationsToRefreshCenterDistances ;

    totDistanceComputationsToNewCenters
    += numDistanceComputationsToNewCenters ;

#ifdef SANITY
    {
      int xx ; int cc ;
      TYPE tol = 1e-5 ;
      VL_PRINTF("inconsistencies after assignments:\n");
      for (xx = 0 ; xx < numData ; ++xx) {
        for (cc = 0 ; cc < self->numCenters ; ++cc) {
          TYPE a = pointToCenterLB[cc + xx * self->numCenters] ;
          TYPE b = distFn(self->dimension,
                          data + self->dimension * xx,
                          (TYPE*)self->centers + self->dimension * cc) ;
          if (cc == assignments[xx]) {
            TYPE z = pointToClosestCenterUB[xx] ;
            if (z+tol<b) VL_PRINTF("UB %d %d = %f < %f\n",
                               cc, xx, z, b) ;
          }
          if (a>b+tol) VL_PRINTF("LB %d %d = %f  > %f (assign = %d)\n",
                             cc, xx, a, b, assignments[xx]) ;
        }
      }
    }
#endif

    /* compute UB on energy */
    energy = 0 ;
    for (x = 0 ; x < numData ; ++x) {
      energy += pointToClosestCenterUB[x] ;
    }

    if (self->verbosity) {
      vl_size numDistanceComputations =
      numDistanceComputationsToRefreshUB +
      numDistanceComputationsToRefreshLB +
      numDistanceComputationsToRefreshCenterDistances +
      numDistanceComputationsToNewCenters ;
      VL_PRINTF("kmeans: Elkan iter %d: energy <= %g, dist. calc. = %d\n",
                iteration,
                energy,
                numDistanceComputations) ;
      if (self->verbosity > 1) {
        VL_PRINTF("kmeans: Elkan iter %d: total dist. calc. per type: "
                  "UB: %.1f%% (%d), LB: %.1f%% (%d), "
                  "intra_center: %.1f%% (%d), "
                  "new_center: %.1f%% (%d)\n",
                  iteration,
                  100.0 * numDistanceComputationsToRefreshUB / numDistanceComputations,
                  numDistanceComputationsToRefreshUB,
                  100.0 *numDistanceComputationsToRefreshLB / numDistanceComputations,
                  numDistanceComputationsToRefreshLB,
                  100.0 * numDistanceComputationsToRefreshCenterDistances / numDistanceComputations,
                  numDistanceComputationsToRefreshCenterDistances,
                  100.0 * numDistanceComputationsToNewCenters / numDistanceComputations,
                  numDistanceComputationsToNewCenters) ;
      }
    }

    /* check termination conditions */
    if (iteration >= self->maxNumIterations) {
      if (self->verbosity) {
        VL_PRINTF("kmeans: Elkan terminating because maximum number of iterations reached\n") ;
      }
      break ;
    }
    if (allDone) {
      if (self->verbosity) {
        VL_PRINTF("kmeans: Elkan terminating because the algorithm fully converged\n") ;
      }
      break ;
    }

  } /* next Elkan iteration */


  /* compute true energy */
  energy = 0 ;
  for (x = 0 ; x < numData ; ++ x) {
    vl_uindex cx = assignments [x] ;
    energy += distFn(self->dimension,
                     data + self->dimension * x,
                     (TYPE*)self->centers + self->dimension * cx) ;
    totDistanceComputationsToFinalize += 1 ;
  }

  {
    vl_size totDistanceComputations =
    totDistanceComputationsToInit +
    totDistanceComputationsToRefreshUB +
    totDistanceComputationsToRefreshLB +
    totDistanceComputationsToRefreshCenterDistances +
    totDistanceComputationsToNewCenters +
    totDistanceComputationsToFinalize ;

    double saving = (double)totDistanceComputations
    / (iteration * self->numCenters * numData) ;

    if (self->verbosity) {
      VL_PRINTF("kmeans: Elkan: total dist. calc.: %d (%.2f %% of Lloyd)\n",
                totDistanceComputations, saving * 100.0) ;
    }

    if (self->verbosity > 1) {
      VL_PRINTF("kmeans: Elkan: total dist. calc. per type: "
                "init: %.1f%% (%d), UB: %.1f%% (%d), LB: %.1f%% (%d), "
                "intra_center: %.1f%% (%d), "
                "new_center: %.1f%% (%d), "
                "finalize: %.1f%% (%d)\n",
                100.0 * totDistanceComputationsToInit / totDistanceComputations,
                totDistanceComputationsToInit,
                100.0 * totDistanceComputationsToRefreshUB / totDistanceComputations,
                totDistanceComputationsToRefreshUB,
                100.0 *totDistanceComputationsToRefreshLB / totDistanceComputations,
                totDistanceComputationsToRefreshLB,
                100.0 * totDistanceComputationsToRefreshCenterDistances / totDistanceComputations,
                totDistanceComputationsToRefreshCenterDistances,
                100.0 * totDistanceComputationsToNewCenters / totDistanceComputations,
                totDistanceComputationsToNewCenters,
                100.0 * totDistanceComputationsToFinalize / totDistanceComputations,
                totDistanceComputationsToFinalize) ;
    }
  }

  if (permutations) { vl_free(permutations) ; }
  if (numSeenSoFar) { vl_free(numSeenSoFar) ; }

  vl_free(distances) ;
  vl_free(assignments) ;
  vl_free(clusterMasses) ;

  vl_free(nextCenterDistances) ;
  vl_free(pointToClosestCenterUB) ;
  vl_free(pointToClosestCenterUBIsStrict) ;
  vl_free(pointToCenterLB) ;
  vl_free(newCenters) ;
  vl_free(centerToNewCenterDistances) ;

  return energy ;
}
Exemplo n.º 6
0
static double
VL_XCAT(_vl_kmeans_refine_centers_lloyd_, SFX)
(VlKMeans * self,
 TYPE const * data,
 vl_size numData)
{
  vl_size c, d, x, iteration ;
  vl_bool allDone ;
  double previousEnergy = VL_INFINITY_D ;
  double energy ;
  TYPE * distances = vl_malloc (sizeof(TYPE) * numData) ;
  vl_uint32 * assignments = vl_malloc (sizeof(vl_uint32) * numData) ;
  vl_size * clusterMasses = vl_malloc (sizeof(vl_size) * numData) ;
  vl_uint32 * permutations = NULL ;
  vl_size * numSeenSoFar = NULL ;

  if (self->distance == VlDistanceL1) {
    permutations = vl_malloc(sizeof(vl_uint32) * numData * self->dimension) ;
    numSeenSoFar = vl_malloc(sizeof(vl_size) * self->numCenters) ;
    VL_XCAT(_vl_kmeans_sort_data_helper_, SFX)(self, permutations, data, numData) ;
  }

  for (energy = VL_INFINITY_D,
       iteration = 0,
       allDone = VL_FALSE ;
       1 ;
       ++ iteration) {

    /* assign data to cluters */
    VL_XCAT(_vl_kmeans_quantize_, SFX)(self, assignments, distances, data, numData) ;

    /* compute energy */
    energy = 0 ;
    for (x = 0 ; x < numData ; ++x) energy += distances[x] ;
    if (self->verbosity) {
      VL_PRINTF("kmeans: Lloyd iter %d: energy = %g\n", iteration,
                energy) ;
    }

    /* check termination conditions */
    if (iteration >= self->maxNumIterations) {
      if (self->verbosity) {
        VL_PRINTF("kmeans: Lloyd terminating because maximum number of iterations reached\n") ;
      }
      break ;
    }
    if (energy == previousEnergy) {
      if (self->verbosity) {
        VL_PRINTF("kmeans: Lloyd terminating because the algorithm fully converged\n") ;
      }
      break ;
    }

    /* begin next iteration */
    previousEnergy = energy ;

    /* update clusters */
    memset(clusterMasses, 0, sizeof(vl_size) * numData) ;
    for (x = 0 ; x < numData ; ++x) {
      clusterMasses[assignments[x]] ++ ;
    }

    switch (self->distance) {
      case VlDistanceL2:
        memset(self->centers, 0, sizeof(TYPE) * self->dimension * self->numCenters) ;
        for (x = 0 ; x < numData ; ++x) {
          TYPE * cpt = (TYPE*)self->centers + assignments[x] * self->dimension ;
          TYPE const * xpt = data + x * self->dimension ;
          for (d = 0 ; d < self->dimension ; ++d) { cpt[d] += xpt[d] ; }
        }
        for (c = 0 ; c < self->numCenters ; ++c) {
          TYPE mass = clusterMasses[c] ;
          TYPE * cpt = (TYPE*)self->centers + c * self->dimension ;
          for (d = 0 ; d < self->dimension ; ++d) { cpt[d] /= mass ; }
        }
        break ;
      case VlDistanceL1:
        for (d = 0 ; d < self->dimension ; ++d) {
          vl_uint32 * perm = permutations + d * numData ;
          memset(numSeenSoFar, 0, sizeof(vl_size) * self->numCenters) ;
          for (x = 0; x < numData ; ++x) {
            c = assignments[perm[x]] ;
            if (2 * numSeenSoFar[c] < clusterMasses[c]) {
              ((TYPE*)self->centers) [d + c * self->dimension] =
              data [d + perm[x] * self->dimension] ;
            }
            numSeenSoFar[c] ++ ;
          }
        }
        break ;
      default:
        abort();
    } /* done compute centers */
  } /* next Lloyd iteration */

  if (permutations) { vl_free(permutations) ; }
  if (numSeenSoFar) { vl_free(numSeenSoFar) ; }
  vl_free(distances) ;
  vl_free(assignments) ;
  vl_free(clusterMasses) ;
  return energy ;
}
Exemplo n.º 7
0
static void
VL_XCAT(_vl_kmeans_seed_centers_plus_plus_, SFX)
(VlKMeans * self,
 TYPE const * data,
 vl_size dimension,
 vl_size numData,
 vl_size numCenters)
{
  vl_uindex x, c ;
  VlRand * rand = vl_get_rand () ;
  TYPE * distances = vl_malloc (sizeof(TYPE) * numData) ;
  TYPE * minDistances = vl_malloc (sizeof(TYPE) * numData) ;
#if (FLT == VL_TYPE_FLOAT)
  VlFloatVectorComparisonFunction distFn = vl_get_vector_comparison_function_f(self->distance) ;
#else
  VlDoubleVectorComparisonFunction distFn = vl_get_vector_comparison_function_d(self->distance) ;
#endif

  self->dimension = dimension ;
  self->numCenters = numCenters ;
  self->centers = vl_malloc (sizeof(TYPE) * dimension * numCenters) ;

  for (x = 0 ; x < numData ; ++x) {
    minDistances[x] = (TYPE) VL_INFINITY_D ;
  }

  /* select the first point at random */
  x = vl_rand_uindex (rand, numData) ;
  c = 0 ;
  while (1) {
    TYPE energy = 0 ;
    TYPE acc = 0 ;
    TYPE thresh = (TYPE) vl_rand_real1 (rand) ;

    memcpy ((TYPE*)self->centers + c * dimension,
            data + x * dimension,
            sizeof(TYPE) * dimension) ;

    c ++ ;
    if (c == numCenters) break ;

    VL_XCAT(vl_eval_vector_comparison_on_all_pairs_, SFX)
    (distances,
     dimension,
     (TYPE*)self->centers + (c - 1) * dimension, 1,
     data, numData,
     distFn) ;

    for (x = 0 ; x < numData ; ++x) {
      minDistances[x] = VL_MIN(minDistances[x], distances[x]) ;
      energy += minDistances[x] ;
    }

    for (x = 0 ; x < numData - 1 ; ++x) {
      acc += minDistances[x] ;
      if (acc >= thresh * energy) break ;
    }
  }

  vl_free(distances) ;
  vl_free(minDistances) ;
}
Exemplo n.º 8
0
VL_EXPORT void
VL_XCAT(vl_pegasos_train_binary_svm_,SFX)(T *  model,
                                          T const * data,
                                          vl_size dimension,
                                          vl_size numSamples,
                                          vl_int8 const * labels,
                                          double regularizer,
                                          double biasMultiplier,
                                          vl_uindex startingIteration,
                                          vl_size numIterations,
                                          VlRand * randomGenerator)
{
  vl_uindex iteration ;
  vl_uindex i ;
  T const * x ;
  T acc, eta, y, scale = 1 ;
  double lambda = regularizer ;
  double sqrtLambda = sqrt(lambda) ;


#if (FLT == VL_TYPE_FLOAT)
  VlFloatVectorComparisonFunction dotFn =
#else
  VlDoubleVectorComparisonFunction dotFn =
#endif
  VL_XCAT(vl_get_vector_comparison_function_,SFX)(VlKernelL2) ;

  if (randomGenerator == NULL) randomGenerator = vl_get_rand() ;

  assert(startingIteration >= 1) ;

  /*
     The model is stored as scale*model[]. When a sample does not violate
     the margin, only scale needs to be updated.
   */

  for (iteration = startingIteration ;
       iteration < startingIteration + numIterations ;
       ++ iteration) {
    /* pick a sample  */
    vl_uindex k = vl_rand_uindex(randomGenerator, numSamples) ;
    x = data + dimension * k ;
    y = labels[k] ;

    /* project on the weight vector */
    acc = dotFn(dimension, x, model) ;
    if (biasMultiplier) acc += biasMultiplier * model[dimension] ;
    acc *= scale ;

    /* learning rate */
    eta = 1.0 / (iteration * lambda) ;

    if (y * acc < (T) 1.0) {
      /* margin violated */
      T a = scale * (1 - eta * lambda)  ;
      T b = y * eta ;

      acc = 0 ;
      for (i = 0 ; i < dimension ; ++i) {
        model[i] = a * model[i] + b * x[i] ;
        acc += model[i] * model[i] ;
      }
      if (biasMultiplier) {
        model[dimension] = a * model[dimension] + b * biasMultiplier ;
        acc += model[dimension] * model[dimension] ;
      }
      scale = VL_MIN((T)1.0 / (sqrtLambda * sqrt(acc + VL_EPSILON_D)), (T)1.0) ;
    } else {
      /* margin not violated */
      scale *= 1 - eta * lambda ;
    }
  }

  /* denormalize representation */
  for (i = 0 ; i < dimension + (biasMultiplier ? 1 : 0) ; ++i) {
    model[i] *= scale ;
  }
}
Exemplo n.º 9
0
Arquivo: vlad.c Projeto: 1wy/ULDF
static void
VL_XCAT(_vl_vlad_encode_, SFX)
(TYPE * enc,
 TYPE const * means, vl_size dimension, vl_size numClusters,
 TYPE const * data, vl_size numData,
 TYPE const * assignments,
 int flags)
{
  vl_uindex dim ;
  vl_index i_cl, i_d ;

  memset(enc, 0, sizeof(TYPE) * dimension * numClusters) ;

#if defined(_OPENMP)
#pragma omp parallel for default(shared) private(i_cl,i_d,dim) num_threads(vl_get_max_threads())
#endif
  for (i_cl = 0; i_cl < (signed)numClusters; i_cl++) {
    double clusterMass = 0 ;
    for (i_d = 0; i_d < (signed)numData; i_d++) {
      if (assignments[i_d*numClusters + i_cl] > 0) {
        double q = assignments[i_d*numClusters+i_cl] ;
        clusterMass +=  q ;
        for(dim = 0; dim < dimension; dim++) {
          enc [i_cl * dimension + dim] += q * data [i_d  * dimension + dim] ;
        }
      }
    }

    if (clusterMass > 0) {
      if (flags & VL_VLAD_FLAG_NORMALIZE_MASS) {
        for(dim = 0; dim < dimension; dim++) {
          enc[i_cl*dimension + dim] /= clusterMass ;
          enc[i_cl*dimension + dim] -= means[i_cl*dimension+dim];
        }
      } else {
        for(dim = 0; dim < dimension; dim++) {
          enc[i_cl*dimension + dim] -= clusterMass * means[i_cl*dimension+dim];
        }
      }
    }

    if (flags & VL_VLAD_FLAG_SQUARE_ROOT) {
      for(dim = 0; dim < dimension; dim++) {
        TYPE z = enc[i_cl*dimension + dim] ;
        if (z >= 0) {
          enc[i_cl*dimension + dim] = VL_XCAT(vl_sqrt_, SFX)(z) ;
        } else {
          enc[i_cl*dimension + dim] = - VL_XCAT(vl_sqrt_, SFX)(- z) ;
        }
      }
    }

    if (flags & VL_VLAD_FLAG_NORMALIZE_COMPONENTS) {
      TYPE n = 0 ;
      dim = 0 ;
      for(dim = 0; dim < dimension; dim++) {
        TYPE z = enc[i_cl*dimension + dim] ;
        n += z * z ;
      }
      n = VL_XCAT(vl_sqrt_, SFX)(n) ;
      n = VL_MAX(n, 1e-12) ;
      for(dim = 0; dim < dimension; dim++) {
        enc[i_cl*dimension + dim] /= n ;
      }
    }
  }

  if (! (flags & VL_VLAD_FLAG_UNNORMALIZED)) {
    TYPE n = 0 ;
    for(dim = 0 ; dim < dimension * numClusters ; dim++) {
      TYPE z = enc [dim] ;
      n += z * z ;
    }
    n = VL_XCAT(vl_sqrt_, SFX)(n) ;
    n = VL_MAX(n, 1e-12) ;
    for(dim = 0 ; dim < dimension * numClusters ; dim++) {
      enc[dim] /= n ;
    }
  }
}
Exemplo n.º 10
0
VL_EXPORT COMPARISONFUNCTION_TYPE
VL_XCAT(vl_get_vector_comparison_function_, SFX)(VlVectorComparisonType type)
{
  COMPARISONFUNCTION_TYPE function = 0 ;
  switch (type) {
    case VlDistanceL2        : function = VL_XCAT(_vl_distance_l2_,        SFX) ; break ;
    case VlDistanceL1        : function = VL_XCAT(_vl_distance_l1_,        SFX) ; break ;
    case VlDistanceChi2      : function = VL_XCAT(_vl_distance_chi2_,      SFX) ; break ;
    case VlDistanceHellinger : function = VL_XCAT(_vl_distance_hellinger_, SFX) ; break ;
    case VlDistanceJS        : function = VL_XCAT(_vl_distance_js_,        SFX) ; break ;
    case VlKernelL2          : function = VL_XCAT(_vl_kernel_l2_,          SFX) ; break ;
    case VlKernelL1          : function = VL_XCAT(_vl_kernel_l1_,          SFX) ; break ;
    case VlKernelChi2        : function = VL_XCAT(_vl_kernel_chi2_,        SFX) ; break ;
    case VlKernelHellinger   : function = VL_XCAT(_vl_kernel_hellinger_,   SFX) ; break ;
    case VlKernelJS          : function = VL_XCAT(_vl_kernel_js_,          SFX) ; break ;
    default: abort() ;
  }

#ifndef VL_DISABLE_SSE2
  /* if a SSE2 implementation is available, use it */
  if (vl_cpu_has_sse2() && vl_get_simd_enabled()) {
    switch (type) {
      case VlDistanceL2   : function = VL_XCAT(_vl_distance_l2_sse2_,   SFX) ; break ;
      case VlDistanceL1   : function = VL_XCAT(_vl_distance_l1_sse2_,   SFX) ; break ;
      case VlDistanceChi2 : function = VL_XCAT(_vl_distance_chi2_sse2_, SFX) ; break ;
      case VlKernelL2     : function = VL_XCAT(_vl_kernel_l2_sse2_,     SFX) ; break ;
      case VlKernelL1     : function = VL_XCAT(_vl_kernel_l1_sse2_,     SFX) ; break ;
      case VlKernelChi2   : function = VL_XCAT(_vl_kernel_chi2_sse2_,   SFX) ; break ;
      default: break ;
    }
  }
#endif

  return function ;
}
Exemplo n.º 11
0
static vl_size
VL_XCAT(_vl_fisher_encode_, SFX)
(TYPE * enc,
 TYPE const * means, vl_size dimension, vl_size numClusters,
 TYPE const * covariances,
 TYPE const * priors,
 TYPE const * data, vl_size numData,
 int flags)
{
  vl_size dim;
  vl_index i_cl, i_d;
  vl_size numTerms = 0 ;
  TYPE * posteriors ;
  TYPE * sqrtInvSigma;

  assert(numClusters >= 1) ;
  assert(dimension >= 1) ;

  posteriors = vl_malloc(sizeof(TYPE) * numClusters * numData);
  sqrtInvSigma = vl_malloc(sizeof(TYPE) * dimension * numClusters);

  memset(enc, 0, sizeof(TYPE) * 2 * dimension * numClusters) ;

  for (i_cl = 0 ; i_cl < (signed)numClusters ; ++i_cl) {
    for(dim = 0; dim < dimension; dim++) {
      sqrtInvSigma[i_cl*dimension + dim] = sqrt(1.0 / covariances[i_cl*dimension + dim]);
    }
  }

  VL_XCAT(vl_get_gmm_data_posteriors_, SFX)(posteriors, numClusters, numData,
                                            priors,
                                            means, dimension,
                                            covariances,
                                            data) ;

  /* sparsify posterior assignments with the FAST option */
  if (flags & VL_FISHER_FLAG_FAST) {
    for(i_d = 0; i_d < (signed)numData; i_d++) {
      /* find largest posterior assignment for datum i_d */
      vl_index best = 0 ;
      TYPE bestValue = posteriors[i_d * numClusters] ;
      for (i_cl = 1 ; i_cl < (signed)numClusters; ++ i_cl) {
        TYPE p = posteriors[i_cl + i_d * numClusters] ;
        if (p > bestValue) {
          bestValue = p ;
          best = i_cl ;
        }
      }
      /* make all posterior assignments zero but the best one */
      for (i_cl = 0 ; i_cl < (signed)numClusters; ++ i_cl) {
        posteriors[i_cl + i_d * numClusters] =
        (TYPE)(i_cl == best) ;
      }
    }
  }

#if defined(_OPENMP)
#pragma omp parallel for default(shared) private(i_cl, i_d, dim) num_threads(vl_get_max_threads()) reduction(+:numTerms)
#endif
  for(i_cl = 0; i_cl < (signed)numClusters; ++ i_cl) {
    TYPE uprefix;
    TYPE vprefix;

    TYPE * uk = enc + i_cl*dimension ;
    TYPE * vk = enc + i_cl*dimension + numClusters * dimension ;

    for(i_d = 0; i_d < (signed)numData; i_d++) {
      TYPE p = posteriors[i_cl + i_d * numClusters] ;
      if (p < 1e-6) continue ;
      numTerms += 1;
      for(dim = 0; dim < dimension; dim++) {
        TYPE diff = data[i_d*dimension + dim] - means[i_cl*dimension + dim] ;
        diff *= sqrtInvSigma[i_cl*dimension + dim] ;
        *(uk + dim) += p * diff ;
        *(vk + dim) += p * (diff * diff - 1);
      }
    }

    uprefix = 1/(numData*sqrt(priors[i_cl]));
    vprefix = 1/(numData*sqrt(2*priors[i_cl]));

    for(dim = 0; dim < dimension; dim++) {
      *(uk + dim) = *(uk + dim) * uprefix;
      *(vk + dim) = *(vk + dim) * vprefix;
    }
  }

  vl_free(posteriors);
  vl_free(sqrtInvSigma) ;

  if (flags & VL_FISHER_FLAG_SQUARE_ROOT) {
    for(dim = 0; dim < 2 * dimension * numClusters ; dim++) {
      TYPE z = enc [dim] ;
      if (z >= 0) {
        enc[dim] = VL_XCAT(vl_sqrt_, SFX)(z) ;
      } else {
        enc[dim] = - VL_XCAT(vl_sqrt_, SFX)(- z) ;
      }
    }
  }

  if (flags & VL_FISHER_FLAG_NORMALIZED) {
    TYPE n = 0 ;
    for(dim = 0 ; dim < 2 * dimension * numClusters ; dim++) {
      TYPE z = enc [dim] ;
      n += z * z ;
    }
    n = VL_XCAT(vl_sqrt_, SFX)(n) ;
    n = VL_MAX(n, 1e-12) ;
    for(dim = 0 ; dim < 2 * dimension * numClusters ; dim++) {
      enc[dim] /= n ;
    }
  }

  return numTerms ;
}