void dnn::train(mat * weights, mat * biases) { //z stores forward activations, delta stores backward errors //allocate z and delta buffers float ** z=new float*[num_layers]; for(int i=0;i<num_layers;i++) z[i]=new float[num_units_ineach_layer[i]]; float ** delta=new float*[num_layers-1]; for(int i=0;i<num_layers-1;i++) delta[i]=new float[num_units_ineach_layer[i+1]]; //each iteration, we fetch the prameters from the PS table to local parameter buffers //local_weights is the local copy of weight matrices and local_biases is the local copy of bias vectors //create parameter buffer float *** local_weights=new float **[num_layers-1]; for(int l=0;l<num_layers-1;l++){ int dim1=num_units_ineach_layer[l+1], dim2=num_units_ineach_layer[l]; local_weights[l]=new float*[dim1]; for(int i=0;i<dim1;i++){ local_weights[l][i]=new float[dim2]; memset(local_weights[l][i],0,sizeof(float)*dim2); } } float ** local_biases=new float*[num_layers-1]; for(int l=0;l<num_layers-1;l++){ local_biases[l]=new float[num_units_ineach_layer[l+1]]; memset(local_biases[l],0,sizeof(float)*num_units_ineach_layer[l+1]); } //delta_weights stores the gradient of weight matrices and delta_biases stores the gradient of bias vectors float *** delta_weights=new float **[num_layers-1]; for(int l=0;l<num_layers-1;l++){ int dim1=num_units_ineach_layer[l+1], dim2=num_units_ineach_layer[l]; delta_weights[l]=new float*[dim1]; for(int i=0;i<dim1;i++){ delta_weights[l][i]=new float[dim2]; memset(delta_weights[l][i],0,sizeof(float)*dim2); } } float ** delta_biases=new float*[num_layers-1]; for(int l=0;l<num_layers-1;l++){ delta_biases[l]=new float[num_units_ineach_layer[l+1]]; memset(delta_biases[l],0,sizeof(float)*num_units_ineach_layer[l+1]); } int * idxes_batch=new int[size_minibatch]; int inner_iter=num_train_data/num_worker_threads/size_minibatch; srand (time(NULL)); int it=0; //randomly permute the row indexes to reduce thread contention of tables int ** rand_idxes_weight=new int*[num_layers-1]; for(int l=0;l<num_layers-1;l++) { int dim=num_units_ineach_layer[l+1]; rand_idxes_weight[l]=new int[dim]; std::vector<int> output_idx_perm; for(int i=0;i<dim;i++) output_idx_perm.push_back(i); std::random_shuffle ( output_idx_perm.begin(), output_idx_perm.end(), myrandom); for(int i=0;i<dim;i++) rand_idxes_weight[l][i]=output_idx_perm[i]; } int * rand_idxes_bias=new int[num_layers-1]; { std::vector<int> output_idx_perm; for(int i=0;i<num_layers-1;i++) output_idx_perm.push_back(i); std::random_shuffle ( output_idx_perm.begin(), output_idx_perm.end(), myrandom); for(int i=0;i<num_layers-1;i++) rand_idxes_bias[i]=output_idx_perm[i]; } for(int iter=0;iter<num_epochs;iter++){ for(int i=0;i<inner_iter;i++){ //sample mini batch rand_init_vec_int(idxes_batch,size_minibatch, num_train_data); //run sgd sgd_mini_batch(idxes_batch, weights, biases, local_weights, local_biases, delta_weights, delta_biases, z, delta, rand_idxes_weight,rand_idxes_bias); // Advance Parameter Server iteration petuum::PSTableGroup::Clock(); it++; //evalutate objective function if(it%num_iters_evaluate==0&&client_id==0&&(*thread_id)==0) { petuum::RowAccessor row_acc; //fetch parameters for(int l=0;l<num_layers-1;l++){ int dim1=num_units_ineach_layer[l+1], dim2=num_units_ineach_layer[l]; for(int j=0;j<dim1;j++){ const auto& r = weights[l].Get<petuum::DenseRow<float> >(j, &row_acc); // weights[l].Get(j, &row_acc); // const petuum::DenseRow<float>& r = row_acc.Get<petuum::DenseRow<float> >(); for(int i=0;i<dim2;i++) local_weights[l][j][i]=r[i]; } } for(int l=0;l<num_layers-1;l++){ int dim=num_units_ineach_layer[l+1]; const auto& r = biases[l].Get<petuum::DenseRow<float> >(0, &row_acc); // biases[l].Get(0, &row_acc); // const petuum::DenseRow<float>& r = row_acc.Get<petuum::DenseRow<float> >(); for(int j=0;j<dim;j++) local_biases[l][j]=r[j]; } float loss=compute_loss(local_weights, local_biases); if(client_id==0&&(*thread_id)==0) std::cout<<"client "<<client_id<<" worker "<<(*thread_id)<<" iter "<<it<<" loss is "<<loss<<std::endl; } } } //release data delete []idxes_batch; for(int i=0;i<num_layers-1;i++) delete[]delta[i]; delete[]delta; for(int i=0;i<num_layers;i++) delete[]z[i]; delete []z; //release parameter buffer for(int l=0;l<num_layers-1;l++){ int dim1=num_units_ineach_layer[l+1]; for(int i=0;i<dim1;i++) delete []local_weights[l][i]; delete[]local_weights[l]; } delete[]local_weights; for(int l=0;l<num_layers-1;l++) delete []local_biases[l]; delete []local_biases; for(int l=0;l<num_layers-1;l++) { int dim1=num_units_ineach_layer[l+1]; for(int i=0;i<dim1;i++) delete []delta_weights[l][i]; delete[]delta_weights[l]; } delete[]delta_weights; for(int l=0;l<num_layers-1;l++) delete []delta_biases[l]; delete []delta_biases; }
// train model void rbm::train(float ** weights, float * visible_bias, float * hidden_bias){ std::cout << "Training model..." << std::endl; // weights, visible_bias and hidden_bias were randomly initialized // Initialize delta_weights, delta_hidden_bias, and delta_visible_bias to 0s float ** delta_weights = new float*[num_hidden_units]; for (int i=0; i<num_hidden_units; i++){ delta_weights[i] = new float[num_visible_units]; } float * delta_hidden_bias = new float[num_hidden_units]; float * delta_visible_bias = new float[num_visible_units]; float ** features = new float*[number_of_data_points]; for (int i=0; i<number_of_data_points; i++){ features[i] = new float[num_visible_units]; } for (int i=0; i<number_of_data_points; i++){ for (int j=0; j<num_visible_units; j++){ features[i][j] = input_features[i][j]; } } // random indexes of the data points that will be chosen at each iteration of sga int * idxes_batch = new int[size_minibatch]; int inner_iter = number_of_data_points/num_epochs; // Perform K-step cd num_epochs time for (int iter=0; iter<num_epochs; iter++){ for (int i=0; i<inner_iter; i++){ // sample minibatch and perform cd on this mini batch // sample minibatch rand_init_vec_int(idxes_batch, size_minibatch, number_of_data_points); // set deltas to zeros at every iteration in cd cd(features, weights, hidden_bias, visible_bias, delta_weights, delta_hidden_bias, delta_visible_bias, K, idxes_batch, size_minibatch); // update parameters update_weights(weights, delta_weights, learning_rate, num_hidden_units, num_visible_units, size_minibatch); update_visible_bias(visible_bias, delta_visible_bias, learning_rate, num_visible_units, size_minibatch); update_hidden_bias(hidden_bias, delta_hidden_bias, learning_rate, num_hidden_units, size_minibatch); } } // release data delete[] idxes_batch; delete[] delta_hidden_bias; delete[] delta_visible_bias; for (int i=0; i<num_hidden_units; i++){ delete[] delta_weights[i]; } delete[] delta_weights; for (int i=0; i<number_of_data_points; i++){ delete[] features[i]; } delete[] features; // std::cout << "Training model: DONE" << std::endl; }
void DML::Learn(float learn_rate, int epochs, const char * model_file) { // tmp buffers float *vec_buf_1 = new float[src_feat_dim]; // src_feat_dim float *vec_buf_2 = new float[dst_feat_dim]; // dst_feat_dim // assign id to threads if (!thread_id.get()) { thread_id.reset(new int(thread_counter++)); } // get access to tables petuum::PSTableGroup::RegisterThread(); mat L = petuum::PSTableGroup::GetTableOrDie<float>(0); // Run additional iterations to let stale values finish propagating for (int iter = 0; iter < staleness; ++iter) { petuum::PSTableGroup::Clock(); } // initialize parameters if (client_id == 0 && (*thread_id) == 0) { std::cout << "init parameters" << std::endl; for (int i = 0; i < dst_feat_dim; i++) { petuum::DenseUpdateBatch<float> update_batch(0, src_feat_dim); for (int j = 0; j < src_feat_dim; j++) { float a = rand()%1000/1000.0/2000; update_batch[j]=a; } L.DenseBatchInc(i, update_batch); } std::cout << "init parameters done" << std::endl; } process_barrier->wait(); if (client_id == 0 && (*thread_id) == 0) std::cout << "training starts" << std::endl; sleep((client_id+(*thread_id))*2); std::vector<int> idx_perm_simi_pairs; for (int i = 0; i < num_simi_pairs; i++) idx_perm_simi_pairs.push_back(i); std::random_shuffle(idx_perm_simi_pairs.begin(), \ idx_perm_simi_pairs.end(), myrandom2); int * idx_perm_arr_simi_pairs = new int[num_simi_pairs]; for (int i = 0; i < num_simi_pairs; i++) idx_perm_arr_simi_pairs[i] = idx_perm_simi_pairs[i]; std::vector<int> idx_perm_diff_pairs; for (int i = 0; i < num_diff_pairs; i++) idx_perm_diff_pairs.push_back(i); std::random_shuffle(idx_perm_diff_pairs.begin(), \ idx_perm_diff_pairs.end(), myrandom2); int * idx_perm_arr_diff_pairs = new int[num_diff_pairs]; for (int i = 0; i < num_diff_pairs; i++) idx_perm_arr_diff_pairs[i] = idx_perm_diff_pairs[i]; std::vector<int> idx_perm; for (int i = 0; i < dst_feat_dim; i++) idx_perm.push_back(i); std::random_shuffle(idx_perm.begin(), idx_perm.end(), myrandom2); int * idx_perm_arr = new int[dst_feat_dim]; for (int i = 0; i < dst_feat_dim; i++) idx_perm_arr[i] = idx_perm[i]; //local buffer of parameter float ** local_paras = new float *[dst_feat_dim]; for (int i = 0; i < dst_feat_dim; i++) local_paras[i] = new float[src_feat_dim]; float ** grad=new float *[dst_feat_dim]; for ( int i=0;i<dst_feat_dim;i++) grad[i]=new float[src_feat_dim]; int inner_iters=(num_simi_pairs+num_diff_pairs)/size_mb/num_clients/num_worker_threads; int * mb_idx=new int[size_mb/2]; for (int e = 0; e < epochs; e++) { for(int it=0;it<inner_iters;it++){ //copy parameters petuum::RowAccessor row_acc; for (int i = 0; i < dst_feat_dim; i++) { const petuum::DenseRow<float>& r = L.Get<petuum::DenseRow<float> >(i, &row_acc); for (int j = 0; j < src_feat_dim; j++) { local_paras[i][j] = r[j]; } } //evaluate if (client_id == 0 && (*thread_id) == 0 && it%num_iters_evaluate==0) { // evaluate float simi_loss = 0, diff_loss = 0, total_loss = 0; Evaluate(local_paras, simi_loss, diff_loss, total_loss, vec_buf_1, vec_buf_2); //std::cout << "epoch:\t" << e << "\tsimi_loss:\t" << simi_loss \ //<< "\tdiff_loss:\t" << diff_loss << "\ttotal_loss:\t" \ //<< total_loss << std::endl; std::cout << "epoch: " << e << " iter: " << it << " loss: " << total_loss <<std::endl; } //set gradient to zero for(int i=0;i<dst_feat_dim;i++) memset(grad[i], 0, src_feat_dim*sizeof(float)); rand_init_vec_int(mb_idx, size_mb/2,num_simi_pairs); for(int i=0;i<size_mb/2;i++){ int idx = idx_perm_arr_simi_pairs[mb_idx[i]]; Update(local_paras, grad, data[simi_pairs[idx].x], data[simi_pairs[idx].y], \ 1, vec_buf_1, vec_buf_2); } rand_init_vec_int(mb_idx, size_mb/2,num_diff_pairs); for(int i=0;i<size_mb/2;i++){ int idx = idx_perm_arr_diff_pairs[mb_idx[i]]; Update(local_paras, grad, data[diff_pairs[idx].x], data[diff_pairs[idx].y], \ 0, vec_buf_1, vec_buf_2); } //update parameters float coeff =- learn_rate*2/size_mb; for (int i = 0; i < dst_feat_dim; i++) { petuum::DenseUpdateBatch<float> update_batch(0,src_feat_dim); for (int j = 0; j < src_feat_dim; j++) update_batch[j]= coeff*grad[i][j]; L.DenseBatchInc(i, update_batch); } petuum::PSTableGroup::Clock(); } } if (client_id == 0 && (*thread_id) == 0) SaveModel(L, model_file); delete[] mb_idx; delete[] vec_buf_1; delete[] vec_buf_2; for(int i=0;i< dst_feat_dim;i++) delete[]local_paras[i]; delete[] local_paras; for(int i=0;i<dst_feat_dim;i++) delete[]grad[i]; delete[]grad; petuum::PSTableGroup::DeregisterThread(); }