Example #1
0
void Permutohedral::compute ( MatrixXf & out, const MatrixXf & in, bool reverse ) const
{
    if( out.cols() != in.cols() || out.rows() != in.rows() )
        out = 0*in;
    if( in.rows() <= 2 )
        seqCompute( out.data(), in.data(), in.rows(), reverse );
    else
        sseCompute( out.data(), in.data(), in.rows(), reverse );
}
Example #2
0
void blas_gemm(const MatrixXf& a, const MatrixXf& b, MatrixXf& c)
{
  int M = c.rows(); int N = c.cols(); int K = a.cols();
  int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows();

  sgemm_(&notrans,&notrans,&M,&N,&K,&fone,
         const_cast<float*>(a.data()),&lda,
         const_cast<float*>(b.data()),&ldb,&fone,
         c.data(),&ldc);
}
Example #3
0
void MapOptimizer::scale_stepsize (MatrixXf & dJ) const
{
  if (logging) LOG("  computing stepsize");

  const size_t XY = dom.size * cod.size;

  const float * restrict J_ = m_joint.data();
  const float * restrict dJ_ = dJ.data();

  float time_to_boundary = INFINITY;

  for (size_t xy = 0; xy < XY; ++xy) {

    float dJ_xy = dJ_[xy];
    if (dJ_xy < 0) {

      imin(time_to_boundary, -J_[xy] / dJ_xy);
    }
  }

  float keepaway = 1 / M_E;
  float scale = time_to_boundary * keepaway;
  if (logging) LOG("   scaling step size by " << scale);

  dJ *= scale;
}
Example #4
0
double density (const MatrixXf & x)
{
  const size_t I = x.rows() * x.cols();

  double sum_x1 = 0;
  double sum_x2 = 0;

  const float * restrict x_ = x.data();

  for (size_t i = 0; i < I; ++i) {
    double xi = x_[i];

    sum_x1 += max(-xi,xi);
    sum_x2 += xi * xi;
  }

  return sqr(sum_x1) / sum_x2 / I;
}
Example #5
0
	// Compute d/df a^T*K*b
	MatrixXf kernelGradient( const MatrixXf & a, const MatrixXf & b ) const {
		MatrixXf g = 0*f_;
		lattice_.gradient( g.data(), a.data(), b.data(), a.rows() );
		return g;
	}
Example #6
0
void MapOptimizer::constrain_direction (MatrixXf & dJ, float tol) const
{
  // Enforce the simultaneous constraints
  //
  //   /\x. sum y. dJ(y,x) = 0
  //   /\y. sum x. dJ(y,x) = 0
  //
  // We combine the two constraints by iteratively weakly enforcing both:
  // Let Px,Py project to the feasible subspaces for constraints 1,2, resp.
  // Each projection has eigenvalues in {0,1}.
  // We approximate the desired projection Pxy as a linear combination of Px,Py
  //   Pxy' = 1 - alpha ((1-Px) + (1-Py))
  // which has eigenvalues in {1} u [1 - alpha, 1 - 2 alpha].
  // Hence Pxy = lim n->infty Pxy'^n, where convergence rate depends on alpha.
  // The optimal alpha is 2/3, yielding Pxy' eigenvalues in {1} u [-1/3,1/3],
  // and resulting in project_scale = -alpha below.

  if (logging) LOG("  constraining direction");

  const size_t X = dom.size;
  const size_t Y = cod.size;

  const MatrixXf & J = m_joint;
  const VectorXf & sum_y_J = m_dom_prior;
  const VectorXf & sum_x_J = m_cod_prior;
  const float sum_xy_J = m_cod_prior.sum();

  VectorXf sum_y_dJ(J.cols());
  VectorXf sum_x_dJ(J.rows());

  // this is iterative, so we hand-optimize by merging loops

  const float * restrict J_ = J.data();
  const float * restrict sum_y_J_ = sum_y_J.data();
  const float * restrict sum_x_J_ = sum_x_J.data();

  float * restrict dJ_ = dJ.data();
  float * restrict project_y_ = sum_y_dJ.data();
  float * restrict project_x_ = sum_x_dJ.data();
  const float project_scale = -2/3.0;

  Vector<float> accum_x_dJ(Y);
  float * restrict accum_x_dJ_ = accum_x_dJ;

  // accumulate first projection
  accum_x_dJ.zero();

  for (size_t x = 0; x < X; ++x) {

    const float * restrict dJ_x_ = dJ_ + Y * x;

    float accum_y_dJ = 0;

    for (size_t y = 0; y < Y; ++y) {

      float dJ_xy = dJ_x_[y];

      accum_y_dJ += dJ_xy;
      accum_x_dJ_[y] += dJ_xy;
    }

    project_y_[x] = project_scale * accum_y_dJ / sum_y_J_[x];
  }

  for (size_t y = 0; y < Y; ++y) {
    project_x_[y] = project_scale * accum_x_dJ_[y] / sum_x_J_[y];
    accum_x_dJ_[y] = 0;
  }

  // apply previous projection and accumulate next projection
  for (size_t iter = 0; iter < 100; ++iter) {

    float error = 0;

    for (size_t x = 0; x < X; ++x) {

      const float * restrict J_x_ = J_ + Y * x;
      float * restrict dJ_x_ = dJ_ + Y * x;

      float accum_y_dJ = 0;

      for (size_t y = 0; y < Y; ++y) {

        float dJ_xy = dJ_x_[y] += J_x_[y] * (project_x_[y] + project_y_[x]);

        accum_y_dJ += dJ_xy;
        accum_x_dJ_[y] += dJ_xy;
      }

      project_y_[x] = project_scale * accum_y_dJ / sum_y_J_[x];
      imax(error, max(-accum_y_dJ, accum_y_dJ));
    }

    for (size_t y = 0; y < Y; ++y) {

      float accum_x_dJ_y = accum_x_dJ_[y];
      accum_x_dJ_[y] = 0;

      project_x_[y] = project_scale * accum_x_dJ_y / sum_x_J_[y];
      imax(error, max(-accum_x_dJ_y, accum_x_dJ_y));
    }

    if (error < tol) {
      if (logging) {
        LOG("   after " << (1+iter) << " iterations, error < " << error);
      }
      break;
    }
  }

  // apply final projection
  for (size_t x = 0; x < X; ++x) {

    const float * restrict J_x_ = J_ + Y * x;
    float * restrict dJ_x_ = dJ_ + Y * x;

    for (size_t y = 0; y < Y; ++y) {
      dJ_x_[y] += J_x_[y] * (project_x_[y] + project_y_[x]);
    }
  }

  if (debug) {

    sum_y_dJ = dJ.colwise().sum();
    sum_x_dJ = dJ.rowwise().sum();
    float sum_xy_dJ = sum_x_dJ.sum();

    DEBUG("max constraint errors = "
        << sqrt(sum_x_dJ.array().square().maxCoeff())<< ", "
        << sqrt(sum_y_dJ.array().square().maxCoeff())<< ", "
        << sum_xy_dJ);

    sum_y_dJ.array() /= sum_y_J.array();
    sum_x_dJ.array() /= sum_x_J.array();
    sum_xy_dJ /= sum_xy_J;

    DEBUG("max relative constraints errors = "
        << sqrt(sum_x_dJ.array().square().maxCoeff()) << ", "
        << sqrt(sum_y_dJ.array().square().maxCoeff()) << ", "
        << sum_xy_dJ);

    DEBUG("max(|dJ|) = " << dJ.array().abs().maxCoeff()
        << ", rms(dJ) = " << sqrt(dJ.array().square().mean()));
    DEBUG("max(J) / min(J) = " << (J.maxCoeff() / J.minCoeff()));
    DEBUG("max(sum x. J) / min(sum x. J) = "
        << (sum_x_J.maxCoeff() / sum_x_J.minCoeff()));
    DEBUG("max(sum y. J) / min(sum y. J) = "
        << (sum_y_J.maxCoeff() / sum_y_J.minCoeff()));
  }
}
MatrixXf M1(2,6);    // Column-major storage
M1 << 1, 2, 3,  4,  5,  6,
      7, 8, 9, 10, 11, 12;

Map<MatrixXf> M2(M1.data(), 6,2);
cout << "M2:" << endl << M2 << endl;
int main (int argc, char *argv[]) {
    // handle cmd args
	// TODO: add <hidden_layer_sizes> (i.e., "100-100-50") argument processing
	int batch_size;
	if ( argc > 1 ) {
		printf( " Usage: ./neuralnet_mpi <batch_size>");
		exit( 0 );
	} else if ( argc == 1 ) {
		batch_size = atoi( argv[0] ); // mini-batch processing
	} else {
		batch_size = INT_MIN; // batch processing
	}

	// initialize/populate mpi specific vars local to each node
	int  numtasks, taskid, len, dest, source;
	char hostname[MPI_MAX_PROCESSOR_NAME];
	MPI_Status status;

	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
	MPI_Comm_rank(MPI_COMM_WORLD,&taskid);
	MPI_Get_processor_name(hostname, &len);


	/***** MASTER TASK ONLY ******/

	// perform data preprocessing based on number of workers and batch_size
	if (taskid == MASTER) {
		printf( "MASTER: Number of MPI tasks is: %d\n",numtasks );

		/* DATA PREPROCESSING */

		// Load dataset
        // TEMP: populate fictitious dataset
        // NOTE: record max and min for each column in the max/min arrays
		long datasize = 16;
        long numfeats = 4; // to be populated while loading dataset
        long numlabels = 1; // for testing binary classification only
        
        //float min[numfeats]; // stores the overall min for each column
        //float max[numfeats]; // stores the overall max for each column
        MatrixXf data = MatrixXf( datasize, numfeats ); // row-major order!
        VectorXf labels_vec = VectorXf( datasize );
        
        for ( long i=0; i<datasize; ++i ) {
            for ( long j=0; j<numfeats; ++j ) {
                data(i, j) = i + j;
            }
            labels_vec[i] = 1.0; // populate a vector with labels for each instance
        }
        data(0,0) = 100.1;
        data(1,0) = 200.1;
        data(0,1) = 300.1;

        // Shuffle data/labels to randomize instances
        
        // Reformat labels for multi-class classification
        MatrixXf labels = MatrixXf::Zero( datasize, numlabels );
        // TODO: create class_map from unique classes with k:v = <true_label>:<column_index> 
        for ( long i=0; i<datasize; ++i ) {
            // labels( i, class_map[y_vec[i]] ) = 1.0;
        }

        // Scale features
        // TODO: use max/min arrays to scale data to between -1 and 1
	        // # scales all features in dataset X to values between new_min and new_max
			// X_min, X_max = X.min(0), X.max(0)
			// return (((X - X_min) / (X_max - X_min + 0.000001)) * (new_max - new_min)) + new_min


        /* DATA MARSHALLING */

        long chunksize = datasize / numtasks;
        
        // load MASTER data
        MatrixXf X = MatrixXf( chunksize, numfeats );
        MatrixXf y = MatrixXf( chunksize, numlabels );
        memcpy( X.data(), data.data(), chunksize * numfeats * sizeof(float) ); 
        memcpy( y.data(), labels.data(), chunksize * numlabels *  sizeof(float) );
        std::cout << "MASTER X:\n" << X << std::endl;
        std::cout << "MASTER y:\n" << y << std::endl;
        
        // send data to workers
        long offset = chunksize;
		for (dest=1; dest<numtasks; dest++) {
			MPI_Send( &chunksize, 1, MPI_LONG, dest, TAG_0, MPI_COMM_WORLD );
			MPI_Send( &numfeats, 1, MPI_LONG, dest, TAG_0, MPI_COMM_WORLD );
			MPI_Send( &numlabels, 1, MPI_LONG, dest, TAG_0, MPI_COMM_WORLD );
			MPI_Send( data.data() + offset * numfeats, chunksize * numfeats, MPI_FLOAT, dest, TAG_0, MPI_COMM_WORLD );
			MPI_Send( labels.data() + offset * numlabels, chunksize * numlabels, MPI_FLOAT, dest, TAG_0, MPI_COMM_WORLD );
			printf( "Sent %ld instances to task %d offset= %ld\n", chunksize, dest, offset );
			offset += chunksize;
		}


		/* CLASSIFICATION MODEL INITIALIZATION */

		// pass network structure and processing parameters message

		// initialze MASTER NN

		// initialize network parameters
		// TODO: create randomized set of parameters stored in contiguous memory
		// to be packed and unpacked as needed.  The important part of doing this on
		// the MASTER first is that all networks will start with the same parameter set.

		// set MASTER NN parameters


		/* OPTIMIZATION */

		// optimize
		offset = 0;


		/* PREDICTION */

		// predict on validation set

		// output prediction results


		/* MODEL STORAGE */

		// store parameters
	} 

	/***** NON-MASTER TASKS ONLY *****/

	if (taskid > MASTER) {
		printf ("Hello from task %d on %s!\n", taskid, hostname);

		/* DATA INITIALIZATION */

		long chunksize, numfeats, numlabels;
		source = MASTER;
		
		// recieve data partition
		MPI_Recv( &chunksize, 1, MPI_LONG, source, MPI_ANY_TAG, MPI_COMM_WORLD, &status );
		MPI_Recv( &numfeats, 1, MPI_LONG, source, MPI_ANY_TAG, MPI_COMM_WORLD, &status );
		MPI_Recv( &numlabels, 1, MPI_LONG, source, MPI_ANY_TAG, MPI_COMM_WORLD, &status );
		printf( "Task %d chunksize = %ld\n", taskid, chunksize );
		printf( "Task %d numfeats = %ld\n", taskid, numfeats );
		printf( "Task %d numlabels = %ld\n", taskid, numlabels );
        
        // initialize local data storage
        MatrixXf X = MatrixXf( chunksize, numfeats );
        MatrixXf y = MatrixXf( chunksize, numlabels );

        // receive data and labels
		MPI_Recv( X.data(), chunksize * numfeats, MPI_FLOAT, source, MPI_ANY_TAG, MPI_COMM_WORLD, &status );
		MPI_Recv( y.data(), chunksize * numlabels, MPI_FLOAT, source, MPI_ANY_TAG, MPI_COMM_WORLD, &status );
		std::cout << "task " << taskid << " X:\n" << X << std::endl;
        std::cout << "task " << taskid << " y:\n" << y << std::endl;
        
        
        /* CLASSIFICATION MODEL INITIALIZATION */

		// recieve network structure and processing paramters info

		// initialize local neuralnet_openmp instance
		// TODO: each NN instance is set with identical structure and processing parameters

		// recieve network parameters and pass to local instance
		// TODO: each NN instance gets the same set of randomized parameters


		/* OPTIMIZATION */

		// optimize

	}

	MPI_Finalize();
}
Example #9
0
int main(void)
{
  cout << "Eigen v" << EIGEN_WORLD_VERSION << "." << EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION << endl;
  static const int R = 288;
  static const int N = R*(R+1)/2;
  static const int M = 63;
  static const int HALF_M = M/2;
  static const float nsigma = 2.5f;

  MatrixXf data = MatrixXf::Random(M, N);
  MatrixXf mask = MatrixXf::Zero(M, N);
  MatrixXf result = MatrixXf::Zero(1, N);
  VectorXf std = VectorXf::Zero(N);
  VectorXf centroid = VectorXf::Zero(N);
  VectorXf mean = VectorXf::Zero(N);
  VectorXf minval = VectorXf::Zero(N);
  VectorXf maxval = VectorXf::Zero(N);

  cout << "computing..." << flush;
  double t = GetRealTime();

  // computes the exact median
  if (M&1)
  {
#pragma omp parallel for
    for (int i = 0; i < N; i++)
    {
      vector<float> row(data.data()+i*M, data.data()+(i+1)*M);
      nth_element(row.begin(), row.begin()+HALF_M, row.end());
      centroid(i) = row[HALF_M];
    }
  }
  // nth_element guarantees x_0,...,x_{n-1} < x_n
  else
  {
#pragma omp parallel for
    for (int i = 0; i < N; i++)
    {
      vector<float> row(data.data()+i*M, data.data()+(i+1)*M);
      nth_element(row.begin(), row.begin()+HALF_M, row.end());
      centroid(i) = row[HALF_M];
      centroid(i) += *max_element(row.begin(), row.begin()+HALF_M);
      centroid(i) *= 0.5f;
    }
  }

  // compute the mean
  mean = data.colwise().mean();

  // compute std (x) = sqrt ( 1/N SUM_i (x(i) - mean(x))^2 )
  std = (((data.rowwise() - mean.transpose()).array().square()).colwise().sum() *
         (1.0f / M))
            .array()
            .sqrt();

  // compute n sigmas from centroid
  minval = centroid - std * nsigma;
  maxval = centroid + std * nsigma;
  
  // compute clip mask
  for (int i = 0; i < N; i++)
  {
    mask.col(i) = (data.col(i).array() > minval(i)).select(VectorXf::Ones(M), 0.0f);
    mask.col(i) = (data.col(i).array() < maxval(i)).select(VectorXf::Ones(M), 0.0f);
  }

  // apply clip mask to data
  data.array() *= mask.array();

  // compute mean such that we ignore clipped data, this is our final result
  result = data.colwise().sum().array() / mask.colwise().sum().array();

  t = GetRealTime() - t;
  cout << "[done]" << endl << endl;

  size_t bytes = data.size()*sizeof(float);
  cout << "data: " << M << "x" << N << endl;
  cout << "size: " << bytes*1e-6f << " MB" << endl;
  cout << "rate: " << bytes/(1e6f*t) << " MB/s" << endl;
  cout << "time: " << t << " s" << endl;

  return 0;
}
Example #10
0
Vector<float> as_vector (const MatrixXf & x)
{
  return Vector<float>(x.size(), const_cast<float *>(x.data()));
}