Пример #1
0
void dnn::train(mat * weights, mat * biases)
{
  //z stores forward activations, delta stores backward errors
  //allocate z and delta buffers
  float ** z=new float*[num_layers];
  for(int i=0;i<num_layers;i++)
    z[i]=new float[num_units_ineach_layer[i]];

  float ** delta=new float*[num_layers-1]; 
  for(int i=0;i<num_layers-1;i++)
    delta[i]=new float[num_units_ineach_layer[i+1]];

  //each iteration, we fetch the prameters from the PS table to local parameter buffers
  //local_weights is the local copy of weight matrices and local_biases is the local copy of bias vectors
  //create parameter buffer
  float *** local_weights=new float **[num_layers-1];
  for(int l=0;l<num_layers-1;l++){
    int dim1=num_units_ineach_layer[l+1], dim2=num_units_ineach_layer[l];
    local_weights[l]=new float*[dim1];
    for(int i=0;i<dim1;i++){
      local_weights[l][i]=new float[dim2];
      memset(local_weights[l][i],0,sizeof(float)*dim2);
    }
  }
  float ** local_biases=new float*[num_layers-1];
  for(int l=0;l<num_layers-1;l++){
    local_biases[l]=new float[num_units_ineach_layer[l+1]];
    memset(local_biases[l],0,sizeof(float)*num_units_ineach_layer[l+1]);
  }

  //delta_weights stores the gradient of weight matrices and delta_biases stores the gradient of bias vectors
  float *** delta_weights=new float **[num_layers-1];
  for(int l=0;l<num_layers-1;l++){
    int dim1=num_units_ineach_layer[l+1], dim2=num_units_ineach_layer[l];
    delta_weights[l]=new float*[dim1];
    for(int i=0;i<dim1;i++){
      delta_weights[l][i]=new float[dim2];
      memset(delta_weights[l][i],0,sizeof(float)*dim2);
    }
  }
  float ** delta_biases=new float*[num_layers-1];
  for(int l=0;l<num_layers-1;l++){
    delta_biases[l]=new float[num_units_ineach_layer[l+1]];
    memset(delta_biases[l],0,sizeof(float)*num_units_ineach_layer[l+1]);
  }

  int * idxes_batch=new int[size_minibatch];
  int inner_iter=num_train_data/num_worker_threads/size_minibatch;
  srand (time(NULL));
  int it=0;

  //randomly permute the row indexes to reduce thread contention of tables
  int ** rand_idxes_weight=new int*[num_layers-1];
  for(int l=0;l<num_layers-1;l++)
  {
    int dim=num_units_ineach_layer[l+1];
    rand_idxes_weight[l]=new int[dim];
    std::vector<int> output_idx_perm;
    for(int i=0;i<dim;i++)
      output_idx_perm.push_back(i);
    std::random_shuffle ( output_idx_perm.begin(), output_idx_perm.end(), myrandom);
    for(int i=0;i<dim;i++)
      rand_idxes_weight[l][i]=output_idx_perm[i];
  }
  int * rand_idxes_bias=new int[num_layers-1];
  {
    std::vector<int> output_idx_perm;
    for(int i=0;i<num_layers-1;i++)
      output_idx_perm.push_back(i);
    std::random_shuffle ( output_idx_perm.begin(), output_idx_perm.end(), myrandom);
    for(int i=0;i<num_layers-1;i++)
      rand_idxes_bias[i]=output_idx_perm[i];
  }

  for(int iter=0;iter<num_epochs;iter++){
    for(int i=0;i<inner_iter;i++){
      //sample mini batch
      rand_init_vec_int(idxes_batch,size_minibatch, num_train_data);
      //run sgd
      sgd_mini_batch(idxes_batch, weights, biases, local_weights,  local_biases, delta_weights,  delta_biases, z,  delta, rand_idxes_weight,rand_idxes_bias);

      // Advance Parameter Server iteration
      petuum::PSTableGroup::Clock();

      it++;	
      
       //evalutate objective function
      	if(it%num_iters_evaluate==0&&client_id==0&&(*thread_id)==0)
       {
         petuum::RowAccessor row_acc;
         //fetch parameters
         for(int l=0;l<num_layers-1;l++){
           int dim1=num_units_ineach_layer[l+1], dim2=num_units_ineach_layer[l];
           for(int j=0;j<dim1;j++){
       	     const auto& r = weights[l].Get<petuum::DenseRow<float> >(j, &row_acc);
//             weights[l].Get(j, &row_acc);
//             const petuum::DenseRow<float>& r = row_acc.Get<petuum::DenseRow<float> >();
             for(int i=0;i<dim2;i++)
               local_weights[l][j][i]=r[i];
	    }
         }
         for(int l=0;l<num_layers-1;l++){
           int dim=num_units_ineach_layer[l+1];
           const auto& r = biases[l].Get<petuum::DenseRow<float> >(0, &row_acc);
//		   biases[l].Get(0, &row_acc);
//           const petuum::DenseRow<float>& r = row_acc.Get<petuum::DenseRow<float> >();
            for(int j=0;j<dim;j++)
              local_biases[l][j]=r[j];
          }
          float loss=compute_loss(local_weights, local_biases);
          if(client_id==0&&(*thread_id)==0)
            std::cout<<"client "<<client_id<<" worker "<<(*thread_id)<<" iter "<<it<<" loss is "<<loss<<std::endl;
       }
    }
  }

  //release data
  delete []idxes_batch;
  for(int i=0;i<num_layers-1;i++)
    delete[]delta[i];
  delete[]delta;
  for(int i=0;i<num_layers;i++)
    delete[]z[i];
  delete []z;
	
  //release parameter buffer
  for(int l=0;l<num_layers-1;l++){
    int dim1=num_units_ineach_layer[l+1];
    for(int i=0;i<dim1;i++)
      delete []local_weights[l][i];
    delete[]local_weights[l];
  }
  delete[]local_weights;

  for(int l=0;l<num_layers-1;l++)
    delete []local_biases[l];
  delete []local_biases;

  for(int l=0;l<num_layers-1;l++)
  {
    int dim1=num_units_ineach_layer[l+1];
    for(int i=0;i<dim1;i++)
      delete []delta_weights[l][i];
    delete[]delta_weights[l];
  }
  delete[]delta_weights;
  for(int l=0;l<num_layers-1;l++)
    delete []delta_biases[l];
  delete []delta_biases;
}
Пример #2
0
// train model
void rbm::train(float ** weights, float * visible_bias, float * hidden_bias){
    std::cout << "Training model..." << std::endl;

    // weights, visible_bias and hidden_bias were randomly initialized
    // Initialize delta_weights, delta_hidden_bias, and delta_visible_bias to 0s
    float ** delta_weights = new float*[num_hidden_units];
    for (int i=0; i<num_hidden_units; i++){
        delta_weights[i] = new float[num_visible_units];
    }
    float * delta_hidden_bias = new float[num_hidden_units];
    float * delta_visible_bias = new float[num_visible_units];

    float ** features = new float*[number_of_data_points];
    for (int i=0; i<number_of_data_points; i++){
        features[i] = new float[num_visible_units];
    }

    for (int i=0; i<number_of_data_points; i++){
        for (int j=0; j<num_visible_units; j++){
            features[i][j] = input_features[i][j];
        }
    }

    // random indexes of the data points that will be chosen at each iteration of sga
    int * idxes_batch = new int[size_minibatch];

	int inner_iter = number_of_data_points/num_epochs;
    // Perform K-step cd num_epochs time
    for (int iter=0; iter<num_epochs; iter++){
		for (int i=0; i<inner_iter; i++){
       		// sample minibatch and perform cd on this mini batch
        	// sample minibatch
        	rand_init_vec_int(idxes_batch, size_minibatch, number_of_data_points);

        	// set deltas to zeros at every iteration in cd
        	cd(features, weights, hidden_bias, visible_bias, delta_weights, delta_hidden_bias,
            	    delta_visible_bias, K, idxes_batch, size_minibatch);

        	// update parameters
        	update_weights(weights, delta_weights, learning_rate, num_hidden_units, num_visible_units, size_minibatch);
        	update_visible_bias(visible_bias, delta_visible_bias, learning_rate, num_visible_units, size_minibatch);
        	update_hidden_bias(hidden_bias, delta_hidden_bias, learning_rate, num_hidden_units, size_minibatch);
		}
    }

    // release data
    delete[] idxes_batch;
    delete[] delta_hidden_bias;
    delete[] delta_visible_bias;

    for (int i=0; i<num_hidden_units; i++){
        delete[] delta_weights[i];
    }
    delete[] delta_weights;

    for (int i=0; i<number_of_data_points; i++){
        delete[] features[i];
    }
    delete[] features;
//    std::cout << "Training model: DONE" << std::endl;
}
Пример #3
0
void DML::Learn(float learn_rate, int epochs, const char * model_file) {
  // tmp buffers
  float *vec_buf_1 = new float[src_feat_dim];  // src_feat_dim
  float *vec_buf_2 = new float[dst_feat_dim];  // dst_feat_dim
  // assign id to threads
  if (!thread_id.get()) {
    thread_id.reset(new int(thread_counter++));
  }
  // get access to tables
  petuum::PSTableGroup::RegisterThread();
  mat L = petuum::PSTableGroup::GetTableOrDie<float>(0);
  // Run additional iterations to let stale values finish propagating
  for (int iter = 0; iter < staleness; ++iter) {
    petuum::PSTableGroup::Clock();
  }
  // initialize parameters
  if (client_id == 0 && (*thread_id) == 0) {
    std::cout << "init parameters" << std::endl;
    for (int i = 0; i < dst_feat_dim; i++) {
      petuum::DenseUpdateBatch<float> update_batch(0, src_feat_dim);

      for (int j = 0; j < src_feat_dim; j++) {
      float a = rand()%1000/1000.0/2000;
      update_batch[j]=a;
    }
    L.DenseBatchInc(i, update_batch);
  }
    std::cout << "init parameters done" << std::endl;
  }
  process_barrier->wait();
  if (client_id == 0 && (*thread_id) == 0)
    std::cout << "training starts" << std::endl;
  sleep((client_id+(*thread_id))*2);
  std::vector<int> idx_perm_simi_pairs;
  for (int i = 0; i < num_simi_pairs; i++)
    idx_perm_simi_pairs.push_back(i);
  std::random_shuffle(idx_perm_simi_pairs.begin(), \
    idx_perm_simi_pairs.end(), myrandom2);
  int * idx_perm_arr_simi_pairs = new int[num_simi_pairs];
  for (int i = 0; i < num_simi_pairs; i++)
    idx_perm_arr_simi_pairs[i] = idx_perm_simi_pairs[i];
  std::vector<int> idx_perm_diff_pairs;
  for (int i = 0; i < num_diff_pairs; i++)
    idx_perm_diff_pairs.push_back(i);
  std::random_shuffle(idx_perm_diff_pairs.begin(), \
       idx_perm_diff_pairs.end(), myrandom2);
  int * idx_perm_arr_diff_pairs = new int[num_diff_pairs];
  for (int i = 0; i < num_diff_pairs; i++)
    idx_perm_arr_diff_pairs[i] = idx_perm_diff_pairs[i];
  std::vector<int> idx_perm;
  for (int i = 0; i < dst_feat_dim; i++)
    idx_perm.push_back(i);
  std::random_shuffle(idx_perm.begin(), idx_perm.end(), myrandom2);
  int * idx_perm_arr = new int[dst_feat_dim];
  for (int i = 0; i < dst_feat_dim; i++)
    idx_perm_arr[i] = idx_perm[i];
  
  //local buffer of parameter
  float ** local_paras = new float *[dst_feat_dim];
  for (int i = 0; i < dst_feat_dim; i++)
    local_paras[i] = new float[src_feat_dim];

  float ** grad=new float *[dst_feat_dim];
  for ( int i=0;i<dst_feat_dim;i++)
    grad[i]=new float[src_feat_dim];

  int inner_iters=(num_simi_pairs+num_diff_pairs)/size_mb/num_clients/num_worker_threads;
  int * mb_idx=new int[size_mb/2];

  for (int e = 0; e < epochs; e++) {
    for(int it=0;it<inner_iters;it++){
      //copy parameters
      petuum::RowAccessor row_acc;
      for (int i = 0; i < dst_feat_dim; i++) {

        const petuum::DenseRow<float>& r = L.Get<petuum::DenseRow<float> >(i, &row_acc);

        for (int j = 0; j < src_feat_dim; j++) {
          local_paras[i][j] = r[j];
        }
      }
      //evaluate
      if (client_id == 0 && (*thread_id) == 0 && it%num_iters_evaluate==0) {
        // evaluate
        float simi_loss = 0, diff_loss = 0, total_loss = 0;
        Evaluate(local_paras, simi_loss, diff_loss, total_loss, vec_buf_1, vec_buf_2);
        //std::cout << "epoch:\t" << e << "\tsimi_loss:\t" << simi_loss \
        //<< "\tdiff_loss:\t" << diff_loss << "\ttotal_loss:\t" \
        //<< total_loss << std::endl;
        std::cout << "epoch: " << e << " iter: " << it << " loss: " << total_loss <<std::endl;
      }
      //set gradient to zero
      for(int i=0;i<dst_feat_dim;i++)
        memset(grad[i], 0, src_feat_dim*sizeof(float));

      rand_init_vec_int(mb_idx, size_mb/2,num_simi_pairs);
      for(int i=0;i<size_mb/2;i++){
        int idx = idx_perm_arr_simi_pairs[mb_idx[i]];
        Update(local_paras, grad, data[simi_pairs[idx].x], data[simi_pairs[idx].y], \
           1,  vec_buf_1, vec_buf_2);
      }
      rand_init_vec_int(mb_idx, size_mb/2,num_diff_pairs);

      for(int i=0;i<size_mb/2;i++){
        int idx = idx_perm_arr_diff_pairs[mb_idx[i]];
        Update(local_paras, grad, data[diff_pairs[idx].x], data[diff_pairs[idx].y], \
           0,  vec_buf_1, vec_buf_2);
      }
      //update parameters
      float coeff =- learn_rate*2/size_mb;
      for (int i = 0; i < dst_feat_dim; i++) {
        petuum::DenseUpdateBatch<float> update_batch(0,src_feat_dim);
        for (int j = 0; j < src_feat_dim; j++) 
          update_batch[j]= coeff*grad[i][j];
        L.DenseBatchInc(i, update_batch);
      }
      petuum::PSTableGroup::Clock();
    }
  }
  if (client_id == 0 && (*thread_id) == 0)
    SaveModel(L, model_file);

  delete[] mb_idx;
  delete[] vec_buf_1;
  delete[] vec_buf_2;
  for(int i=0;i< dst_feat_dim;i++)
    delete[]local_paras[i];
  delete[] local_paras;
  for(int i=0;i<dst_feat_dim;i++)
    delete[]grad[i];
  delete[]grad;
  petuum::PSTableGroup::DeregisterThread();
}