예제 #1
0
    int DBSCAN_Grid::find_nearest_in_neighbour(int point_id, HashType cell_key){
        // return the proper label of a un-clustered point
        // return -1 if this is a noise
        mi.set_start(cell_key);
        HashType cell_iter = mi.get();
        int num_of_neighbour = mi.get_counter();

        // iterate on core points only
        float min_distance = m_eps_sqr;
        float which_label = -1;
        for(int i=0; i<num_of_neighbour; i++){
            std::unordered_map<HashType, Cell>::const_iterator got = m_hash_grid.find(cell_iter);
            if(got != m_hash_grid.end()){
                for(unsigned int j=0; j<got->second.data.size(); j++){
                    int which = got->second.data[j];
                    if(!m_is_core[which])
                        continue;

                    float dist_sqr = 0.0;
                    for(unsigned int k=0; k<cl_d.size2(); k++){
                        float diff = cl_d(which, k) - cl_d(point_id, k);
                        dist_sqr += diff * diff;
                    }
                    if(dist_sqr < min_distance){
                        min_distance = dist_sqr;
                        which_label = m_labels[which];
                    }
                }
            }
            cell_iter = mi.next();
        }
        return which_label;
    }
예제 #2
0
    bool DBSCAN_Grid::search_in_neighbour(int point_id, HashType center_key){
        mi.set_start(center_key);
        HashType cell_iter = mi.get();
        unsigned int counter = 0;
        int num_of_neighbour = mi.get_counter();

        for(int i=0; i<num_of_neighbour; i++){
            std::unordered_map<HashType, Cell>::const_iterator got = m_hash_grid.find(cell_iter);
            if(got != m_hash_grid.end()){
                for(unsigned int j=0; j<got->second.data.size(); j++){
                    int which = got->second.data.at(j);

                    float dist_sqr = 0.0;
                    for(unsigned int k=0; k<cl_d.size2(); k++){
                        float diff = cl_d(which, k) - cl_d(point_id, k);
                        dist_sqr += diff * diff;
                    }

                    if(dist_sqr < m_eps_sqr)
                        counter++;
                    // here we use '>', because it should not include the center point itself
                    if(counter > m_min_elems)
                        return true;
                }
            }
            cell_iter = mi.next();        
        }
        return false;
    }
예제 #3
0
    void DBSCAN_Grid::merge_in_neighbour(int point_id, HashType center_key){
        mi.set_start(center_key);
        HashType cell_iter = mi.get();
        int num_of_neighbour = mi.get_counter();
        int cell_index = m_hash_grid.find(center_key)->second.ufID;

        // iterate on core points only
        for(int i=0; i<num_of_neighbour; i++){
            std::unordered_map<HashType, Cell>::const_iterator got = m_hash_grid.find(cell_iter);
            if(got != m_hash_grid.end()){
                for(unsigned int j=0; j<got->second.data.size(); j++){
                    int which = got->second.data.at(j);
                    if(!m_is_core[which])
                        continue;

                    float dist_sqr = 0.0;
                    for(unsigned int k=0; k<cl_d.size2(); k++){
                        float diff = cl_d(which, k) - cl_d(point_id, k);
                        dist_sqr += diff * diff;
                    }
                    if(dist_sqr < m_eps_sqr){
                        int belong_index = got->second.ufID;
                        uf.make_union(belong_index, cell_index);
                        break;
                    }
                }
            }
            cell_iter = mi.next();
        }
    }
예제 #4
0
 void DBSCAN_Grid::getMinMax_grid(std::vector<float>& min_vec, std::vector<float>& max_vec) const{
     assert(min_vec.size() == cl_d.size2());
     assert(max_vec.size() == cl_d.size2());
     for(unsigned int i=0; i<cl_d.size1(); i++){
         for(unsigned int j=0; j<cl_d.size2(); j++){
             if(cl_d(i, j) > max_vec[j])
                 max_vec[j] = cl_d(i, j);
             if(cl_d(i, j) < min_vec[j])
                 min_vec[j] = cl_d(i, j);
         }
     }
 }
예제 #5
0
DBSCAN::ClusterData DBSCAN::gen_cluster_data( size_t features_num, size_t elements_num )
{
    DBSCAN::ClusterData cl_d( elements_num, features_num );

    for ( size_t i = 0; i < elements_num; ++i ) {
        for ( size_t j = 0; j < features_num; ++j ) {
            cl_d( i, j ) = ( -1.0 + rand() * ( 2.0 ) / RAND_MAX );
        }
    }

    return cl_d;
}
예제 #6
0
 void DBSCAN_Grid::print_point_info() const{
     // this function should be called after the init of m_is_core and m_labels
     cout<<"-----------print point information-----------"<<endl;
     for(unsigned int i=0; i<cl_d.size1(); i++){
         cout<<"(";
         for(unsigned int j=0; j<cl_d.size2(); j++)
             cout<<cl_d(i,j)<<",";
         cout<<")       [";
         cout<<m_labels[i]<<" "<<m_is_core[i]<<"]"<<endl;
     }
     cout<<"-------------------------------------"<<endl;
 }
예제 #7
0
    void DBSCAN_Grid::determine_boarder_point(){
        std::vector<int> temp(m_min_val.size());
        for(unsigned int i=0; i<m_labels.size(); i++){
            if(m_labels[i] == -1){
                for(unsigned int k=0; k<cl_d.size2(); k++)
                    temp[k] = int((cl_d(i, k) - m_min_val[k]) / m_cell_width) + 1;
                HashType key = mi.hash(temp);

                int label = find_nearest_in_neighbour(i, key);
                m_labels[i] = label;
            }
        }
    }
예제 #8
0
 void DBSCAN_Grid::print_grid_info() const{
     cout<<"-----------print hash grid-----------"<<endl;
     for(std::unordered_map<HashType, Cell>::const_iterator iter = m_hash_grid.begin(); iter != m_hash_grid.end(); ++iter){
         HashType key = iter->first;
         // can not decode the key using the hash function in util.cpp
         cout<<"key : "<<key<<endl;
         for(unsigned int j=0; j<iter->second.data.size(); j++){
             int which = iter->second.data[j];
             cout<<"(";
             for(unsigned int k=0; k<cl_d.size2(); k++)
                 cout<<cl_d(which, k)<<",";
             cout<<")"<<endl;
         }
         cout<<endl;
     }
     cout<<"-------------------------------------"<<endl;
 }
예제 #9
0
    void DBSCAN_Grid::hash_construct_grid(){
        // do some initialization and detect the size of the grid
        unsigned int features_num = cl_d.size2();
        grid_init(features_num);

        std::vector<float> min_vec(features_num, std::numeric_limits<float>::max());
        std::vector<float> max_vec(features_num, std::numeric_limits<float>::min());
        getMinMax_grid(min_vec, max_vec);
        
        m_min_val.resize(features_num);
        std::copy(min_vec.begin(), min_vec.end(), m_min_val.begin());

        m_n_cnt.resize(features_num);
        for(unsigned int i=0; i<features_num; i++)
            m_n_cnt[i] = int((max_vec[i] - min_vec[i]) / m_cell_width) + 1;

        // for debug
        for(unsigned int i=0; i<features_num; i++)
            cout<<m_n_cnt[i]<<" ";
        cout<<endl;

        std::vector<int> temp(m_n_cnt.size());
        for(unsigned int i=0; i<m_n_cnt.size(); i++)
            temp[i] = m_n_cnt[i] + 1;
        mi.set_dimension(features_num);
        mi.set_max(temp);

        int uf_counter = 0;
        int length = (int)cl_d.size1();
        for(int i=0; i<length; i++){
            for(unsigned int j=0; j<cl_d.size2(); j++)
                temp[j] = int((cl_d(i, j) - m_min_val[j]) / m_cell_width) + 1;
            HashType key = mi.hash(temp);
            std::unordered_map<HashType, Cell>::iterator got = m_hash_grid.find(key);
            if(got == m_hash_grid.end()){
                Cell c;
                c.ufID = uf_counter++;
                c.data.push_back(i);
                m_hash_grid.insert(std::make_pair(key,c));
            }
            else
                got->second.data.push_back(i);
        }
    }