bool Trainer::TrainDistributedMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, bool sweepEnd, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/) { std::unordered_map<Parameter, NDArrayViewPtr> gradients; gradients.reserve(m_learnerParameters.size()); bool emptyMinibatch = arguments.empty() || (arguments.begin()->second == nullptr); NDArrayViewPtr trainingLoss = nullptr; NDArrayViewPtr evalCriterion = nullptr; if (emptyMinibatch) { m_prevMinibatchNumSamples = 0; // Gradients are not existing. for (const auto& parameter : m_learnerParameters) gradients[parameter] = nullptr; trainingLoss = MakeSharedObject<NDArrayView>(0, (m_aggregatedLossFunction ? m_aggregatedLossFunction->Output().GetDataType() : DataType::Float), NDShape{}, computeDevice); evalCriterion = MakeSharedObject<NDArrayView>(0, (m_aggregatedEvaluationFunction ? m_aggregatedEvaluationFunction->Output().GetDataType() : DataType::Float), NDShape{}, computeDevice); } else { // Get gradients after forward/backward pass. std::unordered_map<Variable, ValuePtr> parameterGradients; ExecuteForwardBackward(arguments, outputsToFetch, computeDevice, parameterGradients); for (const auto& parameter : m_learnerParameters) gradients[parameter] = parameterGradients[parameter]->Data(); trainingLoss = m_prevMinibatchAggregateTrainingLossValue->Data(); evalCriterion = m_prevMinibatchAggregateEvalCriterionValue->Data(); } auto currentWorkerNumSamples = m_prevMinibatchNumSamples; auto prevTotalNumSamples = TotalNumberOfSamplesSeen(); MinibatchInfo info{ arguments.empty(), sweepEnd, m_prevMinibatchNumSamples, trainingLoss, evalCriterion }; bool updated = m_parameterLearners->Update(gradients, info); m_prevMinibatchNumSamples = info.numberOfSamples; // Update internal state. if (emptyMinibatch) { // Have to reassign loss and criterion. m_prevMinibatchAggregateEvalCriterionValue = std::make_shared<Value>(info.evalCriterionValue); m_prevMinibatchAggregateTrainingLossValue = std::make_shared<Value>(info.trainingLossValue); } // Did we do a distributed sync? // We determine this by checking if the increase in total #samples is > #samples processed by local worker auto currentTotalNumSamples = TotalNumberOfSamplesSeen(); if ((currentTotalNumSamples - prevTotalNumSamples) > currentWorkerNumSamples) { for (auto& progressWriter : m_progressWriters) progressWriter->UpdateDistributedSync(currentTotalNumSamples - m_prevDistributedTotalNumSamples, nullptr); m_prevDistributedTotalNumSamples = currentTotalNumSamples; } return updated; }
bool Trainer::TrainLocalMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/) { bool emptyMinibatch = arguments.empty() || (arguments.begin()->second == nullptr); if (emptyMinibatch) // Nothing to train with. return false; std::unordered_map<Variable, ValuePtr> parameterGradients; ExecuteForwardBackward(arguments, outputsToFetch, computeDevice, parameterGradients); std::unordered_map<Parameter, NDArrayViewPtr> gradients; for (const auto& parameter : m_combinedTrainingFunction->Parameters()) gradients[parameter] = parameterGradients[parameter]->Data(); return m_parameterLearners->Update(gradients, m_prevMinibatchNumSamples); }
bool Trainer::TrainLocalMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, bool sweepEnd, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/) { bool emptyMinibatch = arguments.empty() || (arguments.begin()->second == nullptr); if (emptyMinibatch) // Nothing to train with. { m_prevMinibatchNumSamples = 0; return false; } std::unordered_map<Variable, ValuePtr> parameterGradients; ExecuteForwardBackward(arguments, outputsToFetch, computeDevice, parameterGradients); #ifndef CNTK_UWP auto profWeights = Microsoft::MSR::CNTK::ScopeProfile(Microsoft::MSR::CNTK::profilerEvtMainWeights); #endif std::unordered_map<Parameter, NDArrayViewPtr> gradients; for (const auto& parameter : m_learnerParameters) gradients[parameter] = parameterGradients[parameter]->Data(); return m_parameterLearners->Update(gradients, m_prevMinibatchNumSamples, sweepEnd); }
bool Trainer::TrainDistributedMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/) { std::unordered_map<Parameter, NDArrayViewPtr> gradients; auto modelParameters = m_combinedTrainingFunction->Parameters(); gradients.reserve(modelParameters.size()); bool emptyMinibatch = arguments.empty() || (arguments.begin()->second == nullptr); NDArrayViewPtr trainingLoss = nullptr; NDArrayViewPtr evalCriterion = nullptr; if (emptyMinibatch) { m_prevMinibatchNumSamples = 0; // Gradients are not existing. for (const auto& parameter : modelParameters) gradients[parameter] = nullptr; } else { // Get gradients after forward/backward pass. std::unordered_map<Variable, ValuePtr> parameterGradients; ExecuteForwardBackward(arguments, outputsToFetch, computeDevice, parameterGradients); for (const auto& parameter : modelParameters) gradients[parameter] = parameterGradients[parameter]->Data(); trainingLoss = m_prevMinibatchAggregateTrainingLossValue->Data(); evalCriterion = m_prevMinibatchAggregateEvalCriterionValue->Data(); } MinibatchInfo info { arguments.empty(), m_prevMinibatchNumSamples, trainingLoss, evalCriterion }; bool updated = m_parameterLearners->Update(gradients, info); m_prevMinibatchNumSamples = info.numberOfSamples; // Update internal state. if (emptyMinibatch) { // Have to reassign loss and criterion. m_prevMinibatchAggregateEvalCriterionValue = std::make_shared<Value>(info.evalCriterionValue); m_prevMinibatchAggregateTrainingLossValue = std::make_shared<Value>(info.trainingLossValue); } return updated; }