void train_batch(std::size_t /*epoch*/, const dll::batch<T>& data_batch, const dll::batch<L>& label_batch) { cpp_assert(data_batch.size() == label_batch.size(), "Invalid sizes"); auto n = label_batch.size(); decltype(auto) first_layer = dbn.template layer_get<0>(); decltype(auto) first_ctx = first_layer.template get_sgd_context<dbn_t>(); decltype(auto) last_layer = dbn.template layer_get<layers - 1>(); decltype(auto) last_ctx = last_layer.template get_sgd_context<dbn_t>(); using inputs_t = typename input_batch_t<0>::type; using outputs_t = typename output_batch_t<layers - 1>::type; inputs_t inputs; outputs_t labels; //Copy inputs and labels into suitable data structure copy_inputs(inputs, data_batch.begin(), data_batch.end()); copy_labels(labels, label_batch.begin(), label_batch.end()); //Feedforward pass compute_outputs(inputs); static_assert( decay_layer_traits<decltype(last_layer)>::is_dense_layer() || decay_layer_traits<decltype(last_layer)>::is_standard_rbm_layer(), "The last layer must be dense for SGD trainining"); //Compute the errors of the last layer compute_last_errors(last_layer, last_ctx, labels); //Compute the gradients of each layer dbn.for_each_layer_rpair([](auto& r1, auto& r2) { auto& ctx1 = r1.template get_sgd_context<dbn_t>(); auto& ctx2 = r2.template get_sgd_context<dbn_t>(); this_type::compute_gradients(r2, ctx2, ctx1.output); this_type::compute_errors(r1, ctx1, r2, ctx2); }); compute_gradients(first_layer, first_ctx, inputs); //Apply gradients dbn.for_each_layer([this, n](auto& layer) { this->apply_gradients(layer, n); }); }
void MeanShift::update(){ index->buildIndex(); iterations=0; while(iterations < max_iterations){ compute_gradients(); difference = arma::norm(centroids - centroids_tmp,2); if(difference < convergence_threashold){ break; } iterations++; } }
/** * This method runs only for the query nodes. Its actual function is divided * into several methods, as not all is needed in each phase. */ void update(graphchi_vertex<TypeVertex, FeatureEdge> &v, graphchi_context &ginfo) { // TODO Use a scheduler instead of this? if (v.get_data().type == QUERY) { // Only queries have outedges (TODO: ???) /* We count the number of queries. */ if (ginfo.iteration == 0) { num_queries++; } score_documents(v, ginfo); if (phase == TRAINING) { compute_gradients(v, parallel_models[omp_get_thread_num()]); } if (phase == TRAINING || phase == VALIDATION || phase == TESTING) { evaluate_model(v, ginfo); } } }
void rtlr::compute_gradients(device_array* desiredOutputs) { auto& outputs = _mlp->_netOutputs; idx_t computationIndex = 0; idx_t layersCount = _mlp->_layers.size(); for (idx_t lidx = 1; lidx < layersCount; lidx++) { idx_t iLayerIndex = lidx - 1; auto& pValuesOfLayer = _pValues[iLayerIndex]; idx_t pValuesOfLayersCount = pValuesOfLayer.size(); for (idx_t jLayerIndex = 0; jLayerIndex < pValuesOfLayersCount; jLayerIndex++) { // 0: Bias // 1..: Weights auto& pValuesOfWeights = pValuesOfLayer[jLayerIndex]; sequence_marker seqMark = sequence_marker::inner; if (lidx == 1 && jLayerIndex == 0) seqMark = sequence_marker::begin; else if (lidx == layersCount - 1 && jLayerIndex == pValuesOfLayersCount - 1) seqMark = sequence_marker::end; compute_gradients(iLayerIndex, jLayerIndex, pValuesOfWeights, outputs, desiredOutputs, computationIndex++, seqMark); } } }
// Set gradients into a gsl vector void get_gradients(const gsl_vector *x, void *params, gsl_vector *g) { int i, idx; Dataset *data = (Dataset *) params; double *dQdAlpha = (double *) malloc(sizeof(double) * data->num_labelers); double *dQdBeta = (double *) malloc(sizeof(double) * data->num_beta); unpackX(x, data); compute_gradients(data, dQdAlpha, dQdBeta); /* Pack dQdAlpha and dQdBeta into gsl_vector */ for (i = 0; i < data->num_labelers; i++) { gsl_vector_set(g, i, - dQdAlpha[i]); /* Flip the sign since we want to minimize */ } for (idx = 0; idx < data->num_beta; idx++) { gsl_vector_set(g, data->num_labelers + idx, - dQdBeta[idx]); /* Flip the sign since we want to minimize */ } free(dQdAlpha); free(dQdBeta); }
void MeanShift::one_step_update(){ index->buildIndex(); compute_gradients(); }