DenseSymmetricMatrix compute_distance_matrix(RandomAccessIterator begin, RandomAccessIterator /*end*/, Landmarks& landmarks, PairwiseCallback callback) { timed_context context("Multidimensional scaling distance matrix computation"); const IndexType n_landmarks = landmarks.size(); DenseSymmetricMatrix distance_matrix(n_landmarks,n_landmarks); #pragma omp parallel shared(begin,landmarks,distance_matrix,callback) default(none) { IndexType i_index_iter,j_index_iter; #pragma omp for nowait for (i_index_iter=0; i_index_iter<n_landmarks; ++i_index_iter) { for (j_index_iter=i_index_iter; j_index_iter<n_landmarks; ++j_index_iter) { ScalarType d = callback.distance(begin[landmarks[i_index_iter]],begin[landmarks[j_index_iter]]); d *= d; distance_matrix(i_index_iter,j_index_iter) = d; distance_matrix(j_index_iter,i_index_iter) = d; } } } return distance_matrix; }
/** * Be careful. The number of clusters changes and the number of members in * each cluster can also change. Make sure memory is being managed well. * @param n_in Number of input points to cluster * @param dim_in Dimension of the points to cluster * @param pts_in The list of n_in points of dimension dim_in * @param ep_in Epsilon distance to use to search point density * @param minpts_in Minimum number of points required to form a cluster * @param cs_out Cluster size vector output * @param clst_out Cluster list, a pass-by-reference of a int ** * @return Returns the number of clusters found */ int cluster_dbscan_density( int n_in, int dim_in, double *pts_in, double (*ep_in)(double*), int minpts_in, int **cs_out, int ***clst_out ) { /* Variables nc = number of clusters, cs_out = cluster sizes */ int i,j,k,m,nc,nca,nnb,nba,nnbp,nbap,*nb,*nbp,*csa; /* Variable nb is for storing neighbors at each iteration */ int *vtd; /* Variables vtd[i] = -3 if pts_in[i] not visited, vtd[i] = -2 if visited, vtd[i] = -1 if noise, vtd[i] >= 0 if i belongs to cluster vtd[i] */ double tmp,dist,*mat; /* Allocate space to store if each vertex is visited or not visited */ vtd = (int*) malloc( n_in * sizeof(int) ); for(i=0;i<n_in;i++) vtd[i] = -3; /* Initialize not visited */ /* Allocate space for temporary neighbor storage */ nba = CLUSTER_MEMBER_INCREMENT; nb = (int*) malloc( CLUSTER_MEMBER_INCREMENT * sizeof(int) ); nbap = CLUSTER_MEMBER_INCREMENT; nbp = (int*) malloc( CLUSTER_MEMBER_INCREMENT * sizeof(int) ); /* Allocate base memory for outputting the clusters and cluster sizes */ nca = CLUSTER_INCREMENT; /* Initialize an allocated size for cluster data */ csa = (int*) malloc( CLUSTER_INCREMENT * sizeof(int) ); /* Variable csa stores the amount of space allocated for each cluster individually */ *cs_out = (int*) malloc( CLUSTER_INCREMENT * sizeof(int) ); *clst_out = (int**) malloc( CLUSTER_INCREMENT * sizeof(int*) ); #ifdef CLUSTER_USE_MATRIX distance_matrix( n_in, dim_in, pts_in, &mat ); #endif /* Iterate through all unvisited nodes and their neighbors and mark them visited */ nc = 0; /* Start with zero clusters */ for(i=0;i<n_in;i++) { /* If point i has been visited then continue */ if( vtd[i] != -3 ) continue; vtd[i] = -2; /* Mark as visited but not necessarily noise */ /* Get all epsilon-neighbors of pts[i*dim] */ nnb = 0; /* Initialize number of neighbors found to zero */ for(j=0;j<n_in;j++) { if( j == i ) continue; #ifdef CLUSTER_USE_MATRIX dist = mat[i*n_in+j]; #else dist = 0.0; for(k=0;k<dim_in;k++) tmp = pts_in[i*dim_in+k] - pts_in[j*dim_in+k], dist += tmp * tmp; dist = sqrt( dist ); #endif /* Tack it on to the end of nb for temporary keeping */ if( dist < ep_in( pts_in + i * dim_in ) ) { /* Allocate more neighbor space if not enough already */ if( nnb + 1 > nba ) { nba += CLUSTER_MEMBER_INCREMENT; nb = (int*) realloc( nb, nba * sizeof(int) ); if( nb == NULL ) return -1; } nb[nnb++] = j; } } /* Now have all neighbors counted; figure out what to do */ if( nnb < minpts_in ) vtd[i] = -1; /* Mark as noise and move on */ else /* Expand to create a new cluster */ { /* Create a new cluster */ if( nc + 1 > nca ) { nca += CLUSTER_INCREMENT; /* Increment nca to show that there is more space allocated */ csa = (int*) realloc( csa, nca * sizeof(int) ); /* Allocate more space for storing individual allocated cluster sizes */ *cs_out = (int*) realloc( *cs_out, nca * sizeof(int) ); /* Allocate more space for actual cluster sizes */ *clst_out = (int**) realloc( *clst_out, nca * sizeof(int*) ); /* Space for the actual cluster members */ } /* Add onto the end at position nc */ csa[nc] = CLUSTER_MEMBER_INCREMENT; (*cs_out)[nc] = 1; (*clst_out)[nc] = (int*) malloc( CLUSTER_MEMBER_INCREMENT * sizeof(int) ); (*clst_out)[nc][0] = i; vtd[i] = nc; /* Save the index of the cluster to which it belongs */ for(j=0;j<nnb;j++) { /* Do epsilon-density for each neighbor nb[j] */ if( vtd[nb[j]] == -3 ) { vtd[nb[j]] = -2; /* Mark as visited but not necessarily noise */ nnbp = 0; for(k=0;k<n_in;k++) { if( k == nb[j] ) continue; #ifdef CLUSTER_USE_MATRIX dist = mat[nb[j]*n_in+k]; #else dist = 0.0; for(m=0;m<dim_in;m++) tmp = pts_in[nb[j]*dim_in+m] - pts_in[k*dim_in+m], dist += tmp * tmp; dist = sqrt( dist ); #endif if( dist < ep_in( pts_in + j * dim_in ) ) { /* Allocate more space if necessary */ if( nnbp + 1 > nbap ) { nbap += CLUSTER_MEMBER_INCREMENT; nbp = (int*) realloc( nbp, nbap * sizeof(int) ); if( nbp == NULL ) return -1; } /* Add it to the neighbors prime list */ nbp[nnbp++] = k; } } if( nnbp >= minpts_in ) { /* Combine nb with nbp */ if( nnb + nnbp + 1 > nba ) { nba += CLUSTER_MEMBER_INCREMENT; nb = (int*) realloc( nb, nba * sizeof(int) ); if( nb == NULL ) return -1; } /* Combine these two neighbor groups */ for(k=0;k<nnbp;k++) nb[nnb+k] = nbp[k]; nnb += nnbp; /* Important: Note that because nnb can increase, the loop expands! */ } } /* If nb[j] is not yet a part of a cluster, then add it to cluster nc */ if( vtd[nb[j]] < 0 ) /* Variable vtd[nb[j]] < 0 means taht nb[j] vertex is not part of a cluster yet */ { /* Then add it to the new cluster just created */ if( (*cs_out)[nc] + 1 > csa[nc] ) { csa[nc] += CLUSTER_MEMBER_INCREMENT; (*clst_out)[nc] = (int*) realloc( (*clst_out)[nc], csa[nc] * sizeof(int) ); if( (*clst_out)[nc] == NULL ) return -1; } (*clst_out)[nc][(*cs_out)[nc]++] = nb[j]; vtd[nb[j]] = nc; } } /* Finally increment after all additions made to the output cluster data structures */ ++nc; } } /* Clean up here; this will leak memory if you don't */ free( vtd ); free( csa ); free( nb ); free( nbp ); return nc; }
inline tapkee::ScalarType operator()(int a, int b) const { return distance_matrix(a,b); }