Example #1
0
		//--------------------------------------------------------------
		size_t DataSet::loadFragment(const std::string & filePath, const std::string & particleType)
		{
			static const int stride = 1;

			ofxHDF5File h5File;
			h5File.open(filePath, true);
			ofxHDF5GroupPtr h5Group = h5File.loadGroup(particleType);

			// Load the coordinate data and convert angles to radians.
			auto coordDataSet = h5Group->loadDataSet("Coordinates");
			int coordCount = coordDataSet->getDimensionSize(0) / stride;
			coordDataSet->setHyperslab(0, coordCount, stride);

			vector<glm::vec3> coordData(coordCount);
			coordDataSet->read(coordData.data());

			// Load the mass data.
			auto massDataSet = h5Group->loadDataSet("Masses");
			int massCount = massDataSet->getDimensionSize(0) / stride;
			massDataSet->setHyperslab(0, massCount, stride);

			vector<float> massData(massCount);
			massDataSet->read(massData.data());

			// Load the star formation rate data.
			auto sfrDataSet = h5Group->loadDataSet("StarFormationRate");
			int sfrCount = sfrDataSet->getDimensionSize(0) / stride;
			sfrDataSet->setHyperslab(0, sfrCount, stride);

			vector<float> sfrData(sfrCount);
			sfrDataSet->read(sfrData.data());

			// Add valid points to the data set.
			size_t total = 0;
			for (int i = 0; i < coordData.size(); ++i)
			{
				if (coordData[i].z > 0.0f)
				{
					this->coordinates.push_back(glm::vec3(ofDegToRad(coordData[i].x), ofDegToRad(coordData[i].y), coordData[i].z));
					this->minRadius = std::min(this->minRadius, coordData[i].z);
					this->maxRadius = std::max(this->maxRadius, coordData[i].z);

					this->masses.push_back(massData[i]);
					
					if (particleType == "PartType6")
					{
						this->starFormationRates.push_back(sfrData[i]);
					}
					else
					{
						// These are stars so just put in dummy data.
						this->starFormationRates.push_back(-1.0f);
					}

					++total;
				}
			}

			return total;
		}
Example #2
0
/*****************************************************************************************
 *  vector< int > K_MeansPredict::Train( const vector< vector< float > >& Data, const float stopDist, const int stopIter, const int fast )
 *
 *  Purpose: Train predictor
 *  input:
 *	Data: vector of data
 *	stopDist: Distance stopping criteria
 *	stopIter: Max Iteration stopping criteria
 *
 *  return:
 *	vector of cluster membership
 *
 *  01.07.2006 	djh	added stoping criterion parameters
 *  			stopDist minimum euclidean distance
 * 			stopIter maximum iterations
 *			extra error output
 * 03.06.2006	djh	replaced _totalUpper/_totalLowerConfBound with _totalBoundStub
 *
 ******************************************************************************************/
 vector< int > K_MeansPredict::Train( const vector< vector< float > >& Data, const float stopDist, const int stopIter, const int fast ){
   // create vector of example coordinates
   vector< Coord< float > > coordData( Data.size() );
   // create vector of example key values
   vector< float > dataKey( Data.size() );
   //
   for( int i=0; i<Data.size(); i++)
   {
     vector< float > tempCoords( Data[i].size()-1 );
     dataKey[i]=Data[i][0];
     for( int j=1; j<Data[i].size(); j++ )
     {
       tempCoords[j-1] = Data[i][j];
     }
     
     coordData[i] = Coord< float >( tempCoords );
   }
   //  calculate clusters
   float dist;
   int numIter;
   vector<int> clusterMap = CreateClusters( coordData, stopDist, stopIter, dist, numIter );
   if( fast == 1 ){
     return( clusterMap );
   }
   cout << "# Training:\n";
   cout << "#   Training required " << numIter << " rounds, the max Euclid. Dist. is: " << dist << endl;
   //  calculate cluster stats
   vector< float > sum_x( _k, 0. );
   vector< float > sum_x2( _k, 0. );
   _key_supports = vector< int >( _k, 0);
   // find n and sums
   for( int i=0; i<Data.size(); i++ ){
     _key_supports[ clusterMap[i] ]++;
     sum_x[ clusterMap[i] ] += dataKey[ clusterMap[i] ]; 
     sum_x2[ clusterMap[i] ] += pow( dataKey[ clusterMap[i] ], 2); 
   }
   // compute mean and variance
   _key_means = vector< float >(_k,0.);
   _key_variances = vector< float >(_k,0.);
   for( int i=0; i<_k; i++ ){
     _key_means[i]=sum_x[i]/_key_supports[i];
     _key_variances[i] = ( sum_x2[i] - (sum_x[i]/float(_key_supports[i])) )/float( _key_supports[i]-1 );
   }
   //
   // Calc error means and variances
   sum_x = vector<float>( _k, 0.);
   sum_x2 = vector<float>( _k, 0.);
   float tot_sum_x = 0.0;
   float tot_sum_x2 = 0.0;
   for( int i=0; i<coordData.size(); i++ ){
     int clusterIdx = FindClusterIdx( coordData[i] );
     float err = _key_means[ clusterIdx ] - dataKey[i];
     sum_x[ clusterIdx ] += err;
     sum_x2[ clusterIdx ] += pow( err, 2 );
     tot_sum_x += err;
     tot_sum_x2 += pow( err, 2 );
   }
   _errMean = vector< float >( _k );
   _lowerConfBound = vector< float >( _k );
   _upperConfBound = vector< float >( _k );
   for( int i=0; i< _k; i++ ){
     _errMean[i] = sum_x[i]/( float( _key_supports[i] ) );
     float errVar = ( sum_x2[i] - (sum_x[i]/float(_key_supports[i])) )/float( _key_supports[i]-1 );
     float t_val = TDist( _key_supports[i] );
     _lowerConfBound[i] = _errMean[i] - t_val*sqrt( errVar * (1.0+( 1.0/float(_key_supports[i]) )) );
     _upperConfBound[i] = _errMean[i] + t_val*sqrt( errVar * (1.0+( 1.0/float(_key_supports[i]) )) );
   }
   //
   _totalErrMean = tot_sum_x / coordData.size();
   float totalErrVar = ( tot_sum_x2 - (tot_sum_x/float(coordData.size())) )/float( coordData.size()-1 );
   _totalBoundStub = sqrt( totalErrVar * (1.0+( 1.0/float(coordData.size()) )) );
   //_totalLowerConfBound = _totalErrMean - TDist( coordData.size() )*sqrt( totalErrVar * (1.0+( 1.0/float(coordData.size()) )) );
   //_totalUpperConfBound = _totalErrMean + TDist( coordData.size() )*sqrt( totalErrVar * (1.0+( 1.0/float(coordData.size()) )) );
   //  return labels
   cout << "#   Error:\n";
   cout << "#     Mean Squared Error (MSE) is: " << tot_sum_x2/float(coordData.size() ) << endl;
   cout << "#     Error Mean is : " << _totalErrMean << endl;
   cout << "#     Error Variance is : " << totalErrVar << endl;
   //
   return( clusterMap );
 }