Beispiel #1
0
pcluster
build_simsub_cluster(pclustergeometry cf, uint size, uint * idx, uint clf)
{
  pcluster  t;

  uint      leaves;
  pcluster  s;

  assert(size > 0);

  leaves = 0;

  if (size > clf) {
    s = build_help_cluster(cf, idx, size, clf, 0, &leaves);
    t = new_cluster(size, idx, leaves, cf->dim);

    leaves = 0;
    leaves_into_sons(cf, clf, s, t, &leaves);

    update_bbox_cluster(t);
  }
  else {
    t = new_cluster(size, idx, 0, cf->dim);
    update_support_bbox_cluster(cf, t);
  }

  update_cluster(t);

  return t;
}
int
UnboundedLeaderFollowerClassifier::ClassifySignature(
    const spi::util::Signature& signature)
{

    //
    int idx = FindNearestCluster(clusters, signature, similarity_threshold);

    // If we found an interval that is similar enough
    if (idx != -1)
    {

        // Update cluster signature/center
        clusters[idx].signature.Merge(signature, 1.0 / clusters[idx].count);

        // Increment count
        clusters[idx].count++;

        // If the count is above the treshold, change it to a stable
        // signature and give it a new phaseid
        if (clusters[idx].phase == TRANSITION_PHASE_ID &&
            clusters[idx].count >= transition_threshold)
        {
            clusters[idx].phase = next_phase_id;

            // Generate the next phase id
            next_phase_id++;
        }

        return clusters[idx].phase;

    }
    // Else add a new cluster with the new signature
    else
    {
        // Copy the signature
        Cluster new_cluster(signature);

        // If the count is above the treshold, change it to a stable
        // signature and give it a new phaseid
        if (new_cluster.count >= transition_threshold)
        {
            new_cluster.phase = next_phase_id;

            // Generate the next phase id
            next_phase_id++;
        }
        else
        {
            new_cluster.phase = TRANSITION_PHASE_ID;
        }

        // Add new cluster
        clusters.push_back(new_cluster);

        return new_cluster.phase;

    }

}
Beispiel #3
0
void db::new_term(const string& term)
{
	throw_if_reader();
	cluster * first_cluster = new_cluster();
	if (first_cluster == NULL) //New file.
	{
		first_cluster = new_cluster();
	}
	uint pos = ((char*)first_cluster) - m_work_file;
	uint cur = ((char*)&(first_cluster->records)) - m_work_file;
	first_cluster->term = 0;
	term_desc desc;
	desc.cur = cur;
	desc.pos = pos;
	m_descriptors->insert(pair<string, term_desc>(term, desc));
}
Beispiel #4
0
void db::add(const string & term_c, int doc_id, int freq)
{
	throw_if_reader();
	string term = term_c;
	fancy_term(term);
	auto descriptor = m_descriptors->find(term);
	if (descriptor == m_descriptors->end())
	{
		new_term(term);
	}
	descriptor = m_descriptors->find(term);
	uint cur = descriptor->second.cur;
	uint cur_in_clusters = cur - sizeof(file_header);
	//uint idx = cur_in_clusters % sizeof(cluster);
	cluster * added_cluster;
	cluster * cur_cluster = (cluster*)(((char*)m_cluster_space) + cur_in_clusters / sizeof(cluster) * sizeof(cluster));
	if (cur_cluster->cur_pos == CLUSTER_SIZE_RECORDS - 1)
	{
		//cluster is complete
		added_cluster = new_cluster();
		if (added_cluster == NULL)
		{
			add(term_c, doc_id, freq);
			return; //New file - new rules.
		}
		cur_cluster->next_cluster = ((char*)added_cluster) - m_work_file;
		//cur_in_clusters = ((char*)&(write_to->records)) - m_work_file;
	}
	term_record rec;
	rec.doc_id = doc_id;
	rec.freq = freq;
	record(cur, rec);
	descriptor->second.cur += sizeof(term_record);
	if (cur_cluster->next_cluster != (uint)(-1))
	{
		descriptor->second.cur = ((char*)&added_cluster->records) - m_work_file;
	}
	cur_cluster->cur_pos++;
	//cout << "[" << m_worker_id << "] Term " << term << " in doc " << doc_id
	//		<< " with freq " << freq << "\n";
}
Beispiel #5
0
void
extend_cluster(pcluster t, uint depth)
{
  uint      i;

  if ((t->son) && (depth > 0))
    for (i = 0; i < t->sons; i++)
      extend_cluster(t->son[i], depth - 1);

  else if ((!t->son) && (depth > 0)) {
    t->sons = 1;
    t->son = (pcluster *) allocmem(sizeof(pcluster));
    t->son[0] = new_cluster(t->size, t->idx, 0, t->dim);
    for (i = 0; i < t->dim; i++) {
      t->son[0]->bmin[i] = t->bmin[i];
      t->son[0]->bmax[i] = t->bmax[i];
    }
    extend_cluster(t->son[0], depth - 1);
  }
  update_cluster(t);
}
void FoF::find_friends (const Zbin &zbin, Galaxy &gal, double rfriend) {
  //! Function to find galaxies linked to the galaxy in question.
  /**< Loop through kd-tree nodes */
  for(int j = 0; j < tree.node_list.size(); j++) {
    /**< Check if galaxy is compatible with kd-tree node */
    if(node_check(gal, tree.node_list[j], rfriend)) {
      /**< Loop through node members */
      for(int k = 0; k < tree.node_list[j].members.size(); k++) {
	/**< Check if galaxies are friends */
	int gal_now = tree.node_list[j].members[k].num;
	if(friendship(zbin, gal, gal_list[gal_now], rfriend)) {
	  /**< Create new cluster */
	  if(!gal.in_cluster[zbin.num]) 
	    new_cluster(zbin, gal, gal_list[gal_now]);
	  /**< Add new member to existing cluster */
	  else 
	    add_member(zbin, gal_list[gal_now], list_of_clusters[cluster_count]);
	}
      } // end of node member loop
    }
  } // end of node loop
}
Beispiel #7
0
pcluster
build_adaptive_cluster(pclustergeometry cf, uint size, uint * idx, uint clf)
{
  pcluster  t;

  uint      direction;
  uint      size0, size1;
  uint      i, j;
  real      a, m;

  assert(size > 0);

  if (size > clf) {

    update_point_bbox_clustergeometry(cf, size, idx);

    /* compute the direction of partition */
    direction = 0;
    a = cf->hmax[0] - cf->hmin[0];

    for (j = 1; j < cf->dim; j++) {
      m = cf->hmax[j] - cf->hmin[j];
      if (a < m) {
	a = m;
	direction = j;
      }
    }

    /* build sons */
    if (a > 0.0) {
      m = (cf->hmax[direction] + cf->hmin[direction]) / 2.0;
      size0 = 0;
      size1 = 0;

      for (i = 0; i < size; i++) {
	if (cf->x[idx[i]][direction] < m) {
	  j = idx[i];
	  idx[i] = idx[size0];
	  idx[size0] = j;
	  size0++;
	}
	else {
	  size1++;
	}
      }

      /* build sons */
      if (size0 > 0) {
	if (size1 > 0) {
	  /* both sons are not empty */
	  t = new_cluster(size, idx, 2, cf->dim);

	  t->son[0] = build_adaptive_cluster(cf, size0, idx, clf);
	  t->son[1] = build_adaptive_cluster(cf, size1, idx + size0, clf);

	  update_bbox_cluster(t);
	}
	else {
	  /* only the first son is not empty */
	  assert(size0 == size);

	  t = new_cluster(size, idx, 0, cf->dim);
	  update_bbox_cluster(t);
	}
      }
      else {
	/* only the second son is not empty */
	assert(size1 > 0);
	assert(size1 == size);

	t = new_cluster(size, idx, 0, cf->dim);
	update_bbox_cluster(t);
      }
    }
    else {
      assert(a == 0.0);
      t = new_cluster(size, idx, 0, cf->dim);
      update_support_bbox_cluster(cf, t);
    }
  }
  else {
    t = new_cluster(size, idx, 0, cf->dim);
    update_support_bbox_cluster(cf, t);
  }

  update_cluster(t);

  return t;
}
Beispiel #8
0
pcluster
build_pca_cluster(pclustergeometry cf, uint size, uint * idx, uint clf)
{
  const uint dim = cf->dim;

  pamatrix  C, Q;
  pavector  v;
  prealavector lambda;
  real     *x, *y;
  real      w;
  uint      i, j, k, size0, size1;

  pcluster  t;

  assert(size > 0);

  size0 = 0;
  size1 = 0;

  if (size > clf) {
    x = allocreal(dim);
    y = allocreal(dim);

    /* determine weight of current cluster */
    w = 0.0;
    for (i = 0; i < size; ++i) {
      w += cf->w[idx[i]];
    }
    w = 1.0 / w;

    for (j = 0; j < dim; ++j) {
      x[j] = 0.0;
    }

    /* determine center of mass */
    for (i = 0; i < size; ++i) {
      for (j = 0; j < dim; ++j) {
	x[j] += cf->w[idx[i]] * cf->x[idx[i]][j];
      }
    }
    for (j = 0; j < dim; ++j) {
      x[j] *= w;
    }

    C = new_zero_amatrix(dim, dim);
    Q = new_zero_amatrix(dim, dim);
    lambda = new_realavector(dim);

    /* setup covariance matrix */
    for (i = 0; i < size; ++i) {

      for (j = 0; j < dim; ++j) {
	y[j] = cf->x[idx[i]][j] - x[j];
      }

      for (j = 0; j < dim; ++j) {
	for (k = 0; k < dim; ++k) {
	  C->a[j + k * C->ld] += cf->w[idx[i]] * y[j] * y[k];
	}
      }
    }

    /* get eigenvalues and eigenvectors of covariance matrix */
    eig_amatrix(C, lambda, Q);

    /* get eigenvector from largest eigenvalue */
    v = new_avector(0);
    init_column_avector(v, Q, dim - 1);

    /* separate cluster with v as separation-plane */
    for (i = 0; i < size; ++i) {
      /* x_i - X */
      for (j = 0; j < dim; ++j) {
	y[j] = cf->x[idx[i]][j] - x[j];
      }

      /* <y,v> */
      w = 0.0;
      for (j = 0; j < dim; ++j) {
	w += y[j] * v->v[j];
      }

      if (w >= 0.0) {
	j = idx[i];
	idx[i] = idx[size0];
	idx[size0] = j;
	size0++;
      }
      else {
	size1++;
      }
    }

    assert(size0 + size1 == size);

    del_amatrix(Q);
    del_amatrix(C);
    del_realavector(lambda);
    del_avector(v);
    freemem(x);
    freemem(y);

    /* recursion */
    if (size0 > 0) {
      if (size1 > 0) {
	t = new_cluster(size, idx, 2, cf->dim);

	t->son[0] = build_pca_cluster(cf, size0, idx, clf);
	t->son[1] = build_pca_cluster(cf, size1, idx + size0, clf);

	update_bbox_cluster(t);
      }
      else {
	t = new_cluster(size, idx, 1, cf->dim);
	t->son[0] = build_pca_cluster(cf, size0, idx, clf);

	update_bbox_cluster(t);
      }
    }
    else {
      assert(size1 > 0);
      t = new_cluster(size, idx, 1, cf->dim);
      t->son[0] = build_pca_cluster(cf, size1, idx, clf);

      update_bbox_cluster(t);
    }

  }
  else {
    t = new_cluster(size, idx, 0, cf->dim);
    update_support_bbox_cluster(cf, t);
  }

  update_cluster(t);

  return t;
}
Beispiel #9
0
/* auxiliary routine for build_simsub_cluster */
static pcluster
build_help_cluster(pclustergeometry cf, uint * idx, uint size,
		   uint clf, uint direction, uint * leaves)
{
  pcluster  s;

  uint      size0, size1;
  uint      i, j;
  real      a, b, m;

  if (direction < cf->dim) {
    size0 = 0;
    size1 = 0;

    m = cf->hmax[direction] - cf->hmin[direction];

    if (m > 0.0) {
      m = (cf->hmax[direction] + cf->hmin[direction]) / 2.0;

      for (i = 0; i < size; i++) {
	if (cf->x[idx[i]][direction] < m) {
	  j = idx[i];
	  idx[i] = idx[size0];
	  idx[size0] = j;
	  size0++;
	}
	else {
	  size1++;
	}
      }

      /* build sons */
      if (size0 > 0) {
	if (size1 > 0) {
	  /* both sons are not empty */
	  s = new_cluster(size, idx, 2, cf->dim);
	  a = cf->hmin[direction];
	  b = cf->hmax[direction];

	  cf->hmax[direction] = m;
	  s->son[0] = build_help_cluster(cf, idx, size0, clf, direction + 1,
					 leaves);
	  cf->hmax[direction] = b;

	  cf->hmin[direction] = m;
	  s->son[1] = build_help_cluster(cf, idx + size0, size1, clf,
					 direction + 1, leaves);
	  cf->hmin[direction] = a;
	}
	else {
	  /* only the first son is not empty */
	  s = new_cluster(size, idx, 1, cf->dim);
	  b = cf->hmax[direction];

	  cf->hmax[direction] = m;
	  s->son[0] = build_help_cluster(cf, idx, size, clf, direction + 1,
					 leaves);
	  cf->hmax[direction] = b;
	}
      }
      else {
	/* only the second son is not empty */ assert(size1 > 0);

	s = new_cluster(size, idx, 1, cf->dim);
	a = cf->hmin[direction];

	cf->hmin[direction] = m;
	s->son[0] = build_help_cluster(cf, idx, size, clf, direction + 1,
				       leaves);
	cf->hmin[direction] = a;
      }
    }
    else {
      assert(m == 0.0);
      s = new_cluster(size, idx, 1, cf->dim);
      s->son[0] =
	build_help_cluster(cf, idx, size, clf, direction + 1, leaves);
    }
  }
  else {
    s = new_cluster(size, idx, 0, cf->dim);
    for (i = 0; i < cf->dim; i++) {
      s->bmin[i] = cf->hmin[i];
      s->bmax[i] = cf->hmax[i];
    }
    leaves[0]++;
  }

  return s;
}
Beispiel #10
0
pcluster
build_regular_cluster(pclustergeometry cf, uint size, uint * idx,
		      uint clf, uint direction)
{
  pcluster  t;

  uint      newd;
  uint      size0, size1;
  uint      i, j;
  real      a, b, m;

  assert(size > 0);

  if (size > clf) {
    size0 = 0;
    size1 = 0;

    update_point_bbox_clustergeometry(cf, size, idx);

    if (direction < cf->dim - 1) {
      newd = direction + 1;
    }
    else {
      newd = 0;
    }

    m = cf->hmax[direction] - cf->hmin[direction];

    if (m > 0.0) {
      m = (cf->hmax[direction] + cf->hmin[direction]) / 2.0;

      for (i = 0; i < size; i++) {
	if (cf->x[idx[i]][direction] < m) {
	  j = idx[i];
	  idx[i] = idx[size0];
	  idx[size0] = j;
	  size0++;
	}
	else {
	  size1++;
	}
      }

      /* build sons */
      if (size0 > 0) {
	if (size1 > 0) {
	  /* both sons are not empty */
	  t = new_cluster(size, idx, 2, cf->dim);

	  a = cf->hmin[direction];
	  b = cf->hmax[direction];
	  cf->hmax[direction] = m;

	  t->son[0] = build_regular_cluster(cf, size0, idx, clf, newd);

	  cf->hmax[direction] = b;
	  cf->hmin[direction] = m;

	  t->son[1] =
	    build_regular_cluster(cf, size1, idx + size0, clf, newd);

	  cf->hmin[direction] = a;

	  update_bbox_cluster(t);
	}
	else {
	  /* only the first son is not empty */
	  t = new_cluster(size, idx, 1, cf->dim);

	  b = cf->hmax[direction];
	  cf->hmax[direction] = m;

	  t->son[0] = build_regular_cluster(cf, size, idx, clf, newd);

	  cf->hmax[direction] = b;

	  update_bbox_cluster(t);
	}
      }
      else {
	/* only the second son is not empty */
	assert(size1 > 0);

	t = new_cluster(size, idx, 1, cf->dim);

	a = cf->hmin[direction];
	cf->hmin[direction] = m;

	t->son[0] = build_regular_cluster(cf, size, idx, clf, newd);

	cf->hmin[direction] = a;

	update_bbox_cluster(t);
      }
    }
    else {
      assert(m == 0.0);
      t = new_cluster(size, idx, 1, cf->dim);

      t->son[0] = build_regular_cluster(cf, size, idx, clf, newd);

      update_bbox_cluster(t);
    }
  }
  else {
    t = new_cluster(size, idx, 0, cf->dim);
    update_support_bbox_cluster(cf, t);
  }

  update_cluster(t);

  return t;
}
Beispiel #11
0
static pcluster
read_cdf_part(int nc_file, size_t clusters, size_t coeffs,
	      int nc_sons, int nc_size, int nc_coeff,
	      uint * idx, int dim, size_t * clusteridx, size_t * coeffidx)
{
  pcluster  t, t1;
  uint     *idx1;
  uint      size;
  uint      sons;
  uint      i;
  size_t    start, count;
  ptrdiff_t stride;
  int       val, result;

  /* Get number of sons */
  start = *clusteridx;
  count = 1;
  stride = 1;
  result = nc_get_vars(nc_file, nc_sons, &start, &count, &stride, &val);
  assert(result == NC_NOERR);
  sons = val;

  /* Get size of cluster */
  result = nc_get_vars(nc_file, nc_size, &start, &count, &stride, &val);
  assert(result == NC_NOERR);
  size = val;

  /* Create new cluster */
  t = new_cluster(size, idx, sons, dim);

  /* Increase cluster index */
  (*clusteridx)++;

  /* Handle sons */
  if (sons > 0) {
    idx1 = idx;
    for (i = 0; i < sons; i++) {
      t1 = read_cdf_part(nc_file, clusters, coeffs,
			 nc_sons, nc_size, nc_coeff,
			 idx1, dim, clusteridx, coeffidx);
      t->son[i] = t1;

      idx1 += t1->size;
    }
    assert(idx1 == idx + size);
  }

  /* Get bounding box */
  start = (*coeffidx);
  count = dim;
  result = nc_get_vars(nc_file, nc_coeff, &start, &count, &stride, t->bmin);
  start += dim;

  result = nc_get_vars(nc_file, nc_coeff, &start, &count, &stride, t->bmax);
  start += dim;
  (*coeffidx) = start;

  /* Finish initialization */
  update_cluster(t);

  return t;
}