예제 #1
0
bool fit(DataVec& data, DataVec& centroids) {
    bool converged(true);

    // assign points to closest centroid
    for (DataVec::iterator it1 = data.begin(); it1!=data.end(); ++it1) {
        double min_dist = std::numeric_limits<double>::max();
        int min_clust   = -1;
        for (DataVec::iterator it2 = centroids.begin(); it2!=centroids.end(); ++it2) {
            double d = dist(*it1,*it2);
            if (d < min_dist) {
                min_dist = d;
                min_clust = it2-centroids.begin();
            }
        }
        //std::cout << "Point " << *it1 << "\n";
        //std::cout << "min_dist=" << min_dist << " min_clust=" << min_clust << "\n";
        it1->cluster_ = min_clust;
    }
    
    // re-estimate centroids
    for (size_t i=0;i<K;++i) {
        std::cout << "Centroid at " << i << " was " << centroids[i] << "\n";
        bool centroidUpdated = getCentroid(data,i,centroids[i]);
        if (centroidUpdated) converged = false;
        std::cout << "Centroid at " << i << " is now " << centroids[i] << "\n";
    }

    return converged;
}
예제 #2
0
bool getCentroid(const DataVec& data, const int cluster, Point& centroid) {
    size_t num(0);
    Point new_centroid(0.0,0.0,0.0,0.0);
    for (DataVec::const_iterator ct=data.begin();ct!=data.end();++ct) {
        if (ct->cluster_ == cluster) {
            new_centroid.x1_ += ct->x1_;
            new_centroid.x2_ += ct->x2_;
            new_centroid.x3_ += ct->x3_;
            new_centroid.x4_ += ct->x4_;
            ++new_centroid.cluster_;
            ++num;
        }
    }

    if (num==0) {
        std::cout << "Cluster unchanged \n";
        return true;
    }


    new_centroid.x1_ /= num;
    new_centroid.x2_ /= num;
    new_centroid.x3_ /= num;
    new_centroid.x4_ /= num;

    double d = dist(centroid,new_centroid);
    std::cout << "getCentroid: d=" << d << "\n";
    std::cout << "num=" << num << "\n";
    bool changed = d>0.05 ? true : false;
    centroid = new_centroid;
    return changed;
}
예제 #3
0
파일: systest.cpp 프로젝트: Thekian/nepeta
/// Test casting functions
void testCast()
{
	typedef std::vector<int8_t> DataVec;
	DataVec dvec;
	
	Nepeta nep;
	nep.getRoot().createNode("Test").createArg("Hello!");
	nep.getRoot().getNode("Test").readArg(dvec, 0);
	
	dvec.push_back(0);
	std::cout << "Casted to: " << dvec.data() << "\n";
	
	assert(std::string((const char*)dvec.data()) == nep.getRoot().getNode("Test").getArg(0)
		&& "STRINGS MUST BE EQUAL!\n");
}
예제 #4
0
double dunnIndex(DataVec& data, DataVec& centroids) {
    double res = 0.0;

    // compute max cluster diameter
    double max_clust_diam = 0.0;
    for (size_t i=0;i<K;++i) {
        double clust_diam = 0.0;
        double n = 0.0;
        for (DataVec::const_iterator ct=data.begin();ct!=data.end();++ct) {
            if (ct->cluster_ == i) {
                double d = dist(*ct,centroids[K]);
                clust_diam += d;
                ++n;
            }
        }
        std::cout << "clust_diam = " << clust_diam << "\n";
        if (n>0) clust_diam /= n;
        std::cout << "clust_diam = " << clust_diam << "\n";
        if (clust_diam > max_clust_diam) max_clust_diam = clust_diam;
    }

    // compute min intercluster distance
    double min_clust_dist = std::numeric_limits<double>::max();
    for (size_t i=0;i<K;++i) {
        double d = 0.0;
        for (size_t j=(i+1);j<K;++j) {
            d = dist(centroids[i],centroids[j]);
            //std::cout << "distance btw " << i << " " << j << " d= " << d << "\n";
        }
        if (d>0 && d<min_clust_dist) min_clust_dist = d;
    }
    std::cout << "min_clust_dist = " << min_clust_dist << "\n";
    std::cout << "max_clust_diam = " << max_clust_diam << "\n";
    if (max_clust_diam > 0) res = min_clust_dist / max_clust_diam;
    return res;
}
예제 #5
0
 Buffer(size_t bufferSize) : 
   len(0), empty(true)
 {
   data.resize(bufferSize);
 }
예제 #6
0
파일: main.cpp 프로젝트: jfermon22/sim_csma
int main(int argc, const char * argv[]) {
    
    parseCommandLineArgs(argc,argv);
    
    vector<uint32_t> msgFreqVec;
    msgFreqVec.push_back(50);
    msgFreqVec.push_back(100);
    msgFreqVec.push_back(200);
    msgFreqVec.push_back(300);
    msgFreqVec.push_back(400);
    msgFreqVec.push_back(500);
    
    map <uint,float> fairnessIndexMap;
    map <uint,uint32_t> collisionsIndexMap;
    map <uint,float> utilizationMap;
    map <uint,uint32_t> thruputMap;
    
    DataVec dataMap;
    for (uint32_t testNum =1; testNum < g_uSimNum; testNum++)
    {
        uint jjj(1);
        if(testNum%10 == 0)
            cout << "Test: " << testNum << endl;
        for (uint hiddenNode = 0; hiddenNode <= 1; hiddenNode++ )
        {
            for (uint useVcs = 0; useVcs <= 1; useVcs++ )
            {
                for (uint nodeAFreqScale = 1; nodeAFreqScale <= 2; nodeAFreqScale++)
                {
                    for (vector<uint32_t>::iterator it = msgFreqVec.begin();
                         it != msgFreqVec.end(); ++it )
                    {
                        
                        //cout << "sim: " << jjj << endl;
                        //init simulation
                        Simulation *sim = new Simulation(simDuration);
                        Channel *channel = new Channel();
                        if (! sim || ! channel) {
                            cout << "failed to allocate memory for simulation and channel"<< endl;
                            exit(1);
                        }
                        
                        //init nodes
                        uint32_t nodeAFreq = *it*nodeAFreqScale;
                        uint32_t nodeCFreq = (*it);
                        
                        //init nodes
                        RxNode *nodeB = new RxNode(2,sim,channel,ACK_RTS_CTS_SND_DUR,SLOT_DUR);
                        RxNode *nodeD = new RxNode(4,sim,channel,ACK_RTS_CTS_SND_DUR,SLOT_DUR);
                        TxNode *nodeA = new TxNode(0,sim,channel,nodeB,nodeAFreq,DIFS,SIFS,PACKET_SEND_DUR,ACK_RTS_CTS_SND_DUR,SLOT_DUR,useVcs,hiddenNode);
                        TxNode *nodeC = new TxNode(1,sim,channel,nodeD,nodeCFreq,DIFS,SIFS,PACKET_SEND_DUR,ACK_RTS_CTS_SND_DUR,SLOT_DUR,useVcs,hiddenNode);
                        
                        if (! nodeA ||!nodeB || ! nodeC || !nodeD) {
                            cout << "failed to allocate memory for nodes"<< endl;
                            exit(1);
                        }
                        
                        //seed starting events
                        nodeA->schedulePacketReady(random_distro::exponential(nodeAFreq,random_distro::TEN_USECS));
                        nodeC->schedulePacketReady(random_distro::exponential(nodeCFreq,random_distro::TEN_USECS));
                        
                        //run simulation
                        sim->Run();
                        
                        uint32_t aThruput(0),cThruput(0);
                        float aUtil(0.0f),cUtil(0.0f);
                        if ( useVcs)
                        {
                            aThruput = nodeA->SuccessfulSends() * (PACKET_SIZE_BYTES + 3 * ACK_SIZE_BYTES);
                            aUtil = (((float)nodeA->SuccessfulSends()*VCS_RTT)/(float)simDuration) * 100.0f;
                            cThruput = nodeC->SuccessfulSends() * (PACKET_SIZE_BYTES + 3* ACK_SIZE_BYTES);
                            cUtil = (((float)nodeC->SuccessfulSends()*VCS_RTT)/(float)simDuration) * 100.0f;
                        }
                        else
                        {
                            aThruput = nodeA->SuccessfulSends() * (PACKET_SIZE_BYTES + ACK_SIZE_BYTES);
                            aUtil = (((float)nodeA->SuccessfulSends()*RTT)/(float)simDuration) * 100.0f;
                            cThruput = nodeC->SuccessfulSends() * (PACKET_SIZE_BYTES + ACK_SIZE_BYTES);
                            cUtil = (((float)nodeC->SuccessfulSends()*RTT)/(float)simDuration) * 100.0f;
                        }
                        
                        
                        dataMap.insert(Data(jjj, nodeAFreq, nodeCFreq, aThruput, cThruput, aUtil, cUtil, nodeC->TotalCollisions(), nodeC->IsHiddenNode(), nodeC->UsesVCS(), aThruput + cThruput, aUtil + cUtil, aUtil/cUtil));
                        
                        
                        //sim->PrintData();
                        
                        // destroy objects
                        nodeA = NULL;
                        nodeC = NULL;
                        channel = NULL;
                        sim = NULL;
                        delete nodeA;
                        delete nodeC;
                        delete channel;
                        delete sim;
                        jjj++;
                    } // for lamdas
                } //for nodeA Freq scale
            } //for use Vcs
        } // hiddenNode
    } // testNum
    dataMap.print();
    
    stringstream ss;
    ss<< g_uSimNum;
    string strNumOfTests("");
    ss >>strNumOfTests;
    dataMap.OutputToFile("data_"+ strNumOfTests+".csv");
    
    if (g_bShowEnhancedStats)
    {
        cout <<endl;
        cout << "Row 1: Parallel, No VCS" << endl;
        cout << "Row 2: Parallel, VCS" << endl;
        cout << "Row 3: Hidden Node, No VCS" << endl;
        cout << "Row 4: Hidden Node, VCS" << endl;
        
        
        cout << endl<<"Throughput A:" << endl;
        
        for (  map<uint32_t, Data*>::iterator it2 = dataMap.begin();
             it2 != dataMap.end(); ++it2 )
        {
            cout << left << setw(2) <<it2->first << ":"
            <<setw(10) << std::setprecision( 3 ) << fixed <<it2->second->thruput_a << "\t";
            if (it2->first && it2->first % 12 == 0)
                cout << endl;
        }
        cout << endl<<"Throughput C:" << endl;
        
        for (  map<uint32_t, Data*>::iterator it2 = dataMap.begin();
             it2 != dataMap.end(); ++it2 )
        {
            cout << left << setw(2) <<it2->first << ":"
            <<setw(10) << std::setprecision( 3 ) << fixed << it2->second->thruput_c << "\t";
            if (it2->first && it2->first % 12 == 0)
                cout << endl;
        }
        
        cout << endl<<"Throughput:" << endl;
        
        for (  map<uint32_t, Data*>::iterator it2 = dataMap.begin();
             it2 != dataMap.end(); ++it2 )
        {
            cout << left << setw(2) <<it2->first << ":"
            <<setw(10) << std::setprecision( 3 ) << fixed <<it2->second->thruput_tot << "\t";
            if (it2->first && it2->first % 12 == 0)
                cout << endl;
        }
        
        cout <<endl<< "Utilization:" << endl;
        for ( map <uint32_t, Data*>::iterator it2 = dataMap.begin();
             it2 != dataMap.end(); ++it2 )
        {
            cout << left << setw(2) <<it2->first << ":"
            <<setw(5) << std::setprecision( 3 ) <<fixed<< it2->second->util_tot << "\t";
            if (it2->first && it2->first % 12 == 0)
                cout << endl;
        }
        
        cout <<endl<< "Fairness Indeces:" << endl;
        for ( map <uint32_t, Data*>::iterator it2 = dataMap.begin();
             it2 != dataMap.end(); ++it2 )
        {
                cout << left << setw(2) <<it2->first << ":"
                <<setw(5) << std::setprecision( 3 ) <<fixed<< it2->second->fairness<< "\t";
                if (it2->first && it2->first % 12 == 0)
                    cout << endl;
        }
        
        cout << endl<<"Collisions:" << endl;
        for ( map<uint32_t, Data*>::iterator it2 = dataMap.begin();
             it2 != dataMap.end(); ++it2 )
        {
            cout << left << setw(2) <<it2->first << ":"
            <<setw(10) << std::setprecision( 3 ) <<fixed<< it2->second->colls << "\t";
            if (it2->first && it2->first % 12 == 0)
                cout << endl;
        }
        
    }
    return 0;
}
예제 #7
0
int main(int argc, char** argv) {
    
    // seed random generator
    srand(time(NULL));

    ifstream infile("../iris.data");
    string line;
    DataVec data;
    double fac = 4.0;
    while (std::getline(infile, line))
    {
        std::vector<std::string> fields;
        boost::split(fields,line, boost::is_any_of(","));
        assert(fields.size() == 5);

        double x1 = atof(fields[0].c_str())*fac;
        double x2 = atof(fields[1].c_str())*fac;
        double x3 = atof(fields[2].c_str())*fac;
        double x4 = atof(fields[3].c_str())*fac;

        Point p(x1,x2,x3,x4);
        //std::cout << p << std::endl;
        data.push_back(p);
    }
    std::cout << "Collected " << data.size() << " points. " << std::endl;
    assert(data.size()>K);
    
    // init centroids to random points
    DataVec centroids;
    //centroids.reserve(K);
    DataVec::iterator dataBegin = random_unique(data.begin(),data.end(),K);
    std::cout << K << " random points " << std::endl;
    for (size_t i=0;i<K;++i) {
        std::cout << data[i] << "\n";
        centroids.push_back(data[i]);
    }
    std::cout << centroids.size() << " centroids " << std::endl;

    // Lloyd's algorithm to iteratively fit the cluster centroids.
    bool done = fit(data,centroids);
    while(!done) {
        done = fit(data,centroids);
        std::cout << done << "\n";
    }

    double idx = dunnIndex(data,centroids);
    cout << "Dunn Index for this clustering " << idx << "\n";

    // write clustering to file
    ofstream of("clusters.dat");
    for (DataVec::iterator it = data.begin(); it!=data.end(); ++it) {
        of << *it << std::endl;
    }
    of.close();

    return EXIT_SUCCESS;
}
int main (int argc, char *argv[]) {
    // handle cmd args
	int batch_size, maxiter;
	std::string datadir;
	std::string output_file;

	if ( argc > 5 || argc < 2 ) {
		printf( "Usage: ./logistic_mpi <data_directory> <batch_size> "
				"<max_iterations> <model_output_file>\n");
		MPI_Finalize();
		exit( 0 );
	} else if ( argc == 5 ) {
		datadir = argv[1];
		batch_size = atoi( argv[2] ); // mini-batch processing
		if ( batch_size == -1 ) { batch_size = INT_MIN; }
		maxiter = atoi( argv[3] );
		output_file = argv[4];
	} else if ( argc == 4 ) {
		datadir = argv[1];
		batch_size = atoi( argv[2] ); // mini-batch processing
		if ( batch_size == -1 ) { batch_size = INT_MIN; }
		maxiter = atoi( argv[3] );
		output_file = "logistic.model";
	} else if ( argc == 4 ) {
		datadir = argv[1];
		batch_size = atoi( argv[2] ); // mini-batch processing
		if ( batch_size == -1 ) { batch_size = INT_MIN; }
		maxiter = 100;
		output_file = "logistic.model";
	} else {
		datadir = argv[1];
		batch_size = INT_MIN; // batch processing
		maxiter = 100;
		output_file = "logistic.model";
	}

	// initialize/populate mpi specific vars local to each node
	double t1,t2; // elapsed time computation
	int  numtasks, taskid, len;
	char hostname[MPI_MAX_PROCESSOR_NAME];

	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
	MPI_Comm_rank(MPI_COMM_WORLD,&taskid);
	MPI_Get_processor_name(hostname, &len);
	MPI_Op op;


	/* DATA PREPROCESSING */
	if ( taskid == MASTER ) {
		printf( "\nLoading and Preprocessing Data\n" );
	}
	t1 = MPI_Wtime();

	// determine number of instances
	DataVec datavec;
	mlu::count_instances( datadir, datavec, num_inst );

	// determine number of features
	mlu::count_features( datavec[0], n );


	/* DATA INITIALIZATION */
	// randomize instances
	std::random_shuffle( datavec.begin(), datavec.end() );

	// partition data based on taskid
	size_t div = datavec.size() / numtasks;
	ProbSize limit = ( taskid == numtasks - 1 ) ? num_inst : div * ( taskid + 1 );
	m = limit - div * taskid;

    // danamically allocate data
	Mat X( m, n );
	Vec labels( m );

    // load data partition
    double feat_val, label;
    ProbSize i = 0;
	for ( ProbSize idx = taskid * div; idx < limit; ++idx ) {
	    std::ifstream data( datavec[idx] );
		for ( ProbSize j=0; j<n; ++j ) {
			data >> feat_val;
			X(i,j) = feat_val;
		}
		data >> label;
		labels[i] = label;
        i++;
	}

    // perform feature scaling (optional)
    if ( scaling ) {
    	// Allreduce to find global min
    	Vec X_min_tmp = X.colwise().minCoeff();
    	X_min_data = X_min_tmp.data();
    	Vec X_min = Vec( X_min_tmp.size() );
		MPI_Allreduce( X_min_tmp.data(), X_min.data(), X_min_tmp.size(), MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD );

    	// Allreduce to find global max
		Vec X_max_tmp = X.colwise().maxCoeff();
		X_max_data = X_max_tmp.data();
		Vec X_max = Vec( X_max_tmp.size() );
		MPI_Allreduce( X_max_tmp.data(), X_max.data(), X_max_tmp.size(), MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );

		// scale features using global min and max
		mlu::scale_features( X, X_min, X_max, 1, 0 );
    }


	/* FORMAT LABELS */
	// get unique labels
	mlu::get_unique_labels( labels, classmap );

	// allreduce to obtain maximum label set size
	int local_size = classmap.size();
	int max_size = 0;
	MPI_Allreduce( &local_size, &max_size, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );

	// allreduce to obtain global unique label set
	int unique_labels[max_size];
	int global_unique_labels[max_size];
	for ( int i=0; i<max_size; ++i ) {
		unique_labels[i] = -1;
		global_unique_labels[i] = -1;
	}
	int idx = 0;
	for ( auto& kv : classmap ) {
		unique_labels[idx++] = kv.first;
	}
	MPI_Op_create( (MPI_User_function *)reduce_unique_labels, 1, &op );
	MPI_Allreduce( unique_labels, global_unique_labels, max_size, MPI_INT, op, MPI_COMM_WORLD );
	MPI_Op_free( &op );
	
	// update local classmap
	std::sort( global_unique_labels, global_unique_labels + max_size );
	classmap.clear();
	int labeltmp;
	idx=0;
	for ( int i=0; i<max_size; ++i ) {
		labeltmp = global_unique_labels[i];
		if ( labeltmp != -1 ) {
			classmap.emplace( labeltmp, idx++ );
		}
	}

	// format the local label set into a matrix based on global class map
	Mat y = mlu::format_labels( labels, classmap );
	numlabels = (LayerSize) classmap.size();

	// output total data loading time for each task
	MPI_Barrier( MPI_COMM_WORLD );
    t2 = MPI_Wtime();
	printf( "--- task %d loading time %lf\n", taskid, t2 - t1 ); 


	/* INIT LOCAL CLASSIFIER */
	LogisticRegression logistic_layer( n, numlabels, true );


	/* OPTIMIZATION */
	if ( taskid == MASTER ) {
		printf( "\nPerforming Gradient Descent\n" );
	}

	int update_size; // stores the number of instances read for each update
	double grad_mag; // stores the magnitude of the gradient for each update
	int delta_size = logistic_layer.get_theta_size();
	Vec delta_update = Vec::Zero( delta_size );
	int global_update_size;
	if ( taskid == MASTER ) {
		printf( "iteration : elapsed time : magnitude\n" );
	}

	for ( int i=0; i<maxiter; ++i ) {
		// compute gradient update
		t1 = MPI_Wtime();
		logistic_layer.compute_gradient( X, y, batch_size, update_size );
		delta_data = logistic_layer.get_delta().data();

		// sum updates across all partitions
		MPI_Allreduce( 
			delta_data,
			delta_update.data(),
			delta_size,
			MPI_DOUBLE,
			MPI_SUM,
			MPI_COMM_WORLD
		);
		logistic_layer.set_delta( delta_update );

		// sum the update sizes
		MPI_Allreduce( &update_size, &global_update_size, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );

		// normalize + regularize gradient update
		logistic_layer.normalize_gradient( global_update_size );
		logistic_layer.regularize_gradient( global_update_size );

		// update logistic_layer parameters
		t2 = MPI_Wtime();
		if ( logistic_layer.converged( grad_mag ) ) { break; }
		if ( taskid == MASTER ) {
			printf( "%d : %lf : %lf\n", i+1, t2 - t1, grad_mag );
		}
		logistic_layer.update_theta();
	}


	/* MODEL STORAGE */
	if (taskid == MASTER) {
		FILE *output;
		output = fopen ( output_file.c_str(), "w" );
		int idx;
		Vec theta = logistic_layer.get_theta();
		printf( "\nWriting Model to File: %s\n\n", output_file.c_str() );

		fprintf( output, "%lu\n", theta.size() );
		for ( idx=0; idx<theta.size()-1; ++idx ) {
			fprintf( output, "%lf\t", theta[idx] );
		}
		fprintf( output, "%lf\n", theta[idx] );

		fclose( output );
	} 

	MPI_Finalize();
	return 0;
}