Example #1
0
void AdaGradCpp::min(MPI_Comm comm, const double tol, const int MaxNumIterations) {
		
	int i, IterationNum, BatchNum, BatchBegin, BatchEnd, BatchSize, GlobalBatchSize, CurrentBatchSize, WBatchBegin, WBatchEnd;
	double GlobalBatchSizeDouble;
	
	double CurrentLearningRate, LocalSumGradients;
	
	this->SumdZdWSquared = (double*)calloc(this->lengthW, sizeof(double));
			
	for (IterationNum = 0; IterationNum < MaxNumIterations; ++IterationNum) {//IterationNum layer
			
			//Calculate CurrentLearningRate
			CurrentLearningRate = this->LearningRate/pow((double)(IterationNum + 1), this->LearningRatePower);
			
			//this->NumBatches is inherited from the Optimiser class
			for (BatchNum = 0; BatchNum < this->NumBatches; ++BatchNum) {//BatchNum layer
				
				//We must find out our current values for BatchBegin and BatchEnd. We do so by calling this->CalcBatchBeginEnd, which is inherited from the optimiser class.
				this->CalcBatchBeginEnd(BatchBegin, BatchEnd, BatchSize, BatchNum);
	
				//VERY IMPORTANT CONVENTION: When passing this->localdZdw to g(), always set to zero first.
				for (i=0; i<this->lengthW; ++i) this->localdZdW[i] = 0.0;

				//If weight is greater than wMax or smaller than wMin, then clip
				//If there is no wMin or wMax, this will have no effect				
				wClip(this->W);
				
				//Barrier: Wait until all processes have reached this point
				MPI_Barrier(comm);	
																
				//Call g()
				//Note that it is the responsibility of whoever writes the MLalgorithm to make sure that this->dZdW and this->SumdZdW are passed to ALL processes
				//It is, however, your responsibility to place a barrier after that, if required
				this->MLalgorithm->g(comm, this->dZdW, this->localdZdW, this->W, BatchBegin, BatchEnd, BatchSize, BatchNum, IterationNum);
				
				//Add all BatchSize and store the result in GlobalBatchSize
				MPI_Allreduce(&BatchSize, &GlobalBatchSize, 1, MPI_INT, MPI_SUM, comm);		
				GlobalBatchSizeDouble = (double)GlobalBatchSize;	
				
				//If weight equals wMin (wMax) and dZdW is smaller than zero (greater than zero), set dZdW to zero
				//If there is no wMin or wMax, this will have no effect
				dZdWClipMin();
								
				//Barrier: Wait until all processes have reached this point
				MPI_Barrier(comm);							
				for (i=0; i<this->lengthW; ++i) this->SumdZdW[i] += this->dZdW[i];  	
				
				//Calculate "dampening parameter"
				for (i=0; i<this->lengthW; ++i) this->SumdZdWSquared[i] += this->dZdW[i]*this->dZdW[i];  		
					
				//Update W
				for (i=0; i<this->lengthW; ++i) if (this->SumdZdWSquared[i] > 0.0) this->W[i] -= (this->dZdW[i]/sqrt(this->SumdZdWSquared[i]))*CurrentLearningRate;						
								
													
			}//BatchNum layer
									
			//The following lines should be left unchanged for all gradient-based-optimisers
			//Calculate LocalSumGradients
			this->MLalgorithm->SumGradients[IterationNum] = 0.0;
			for (i=0; i<this->lengthW; ++i) this->MLalgorithm->SumGradients[IterationNum] += this->SumdZdW[i]*this->SumdZdW[i];
			
			//Set SumdZdW to 0 for next iteration
			for (i=0; i<this->lengthW; ++i) this->SumdZdW[i] = 0.0;
									
			//Check whether convergence condition is met. If yes, break
			if (this->MLalgorithm->SumGradients[IterationNum]/((double)(this->lengthW)) < tol) break;
			
	}//IterationNum layer
		
		//If weight is greater than wMax or smaller than wMin, then clip
		//If there is no wMin or wMax, this will have no effect				
		wClip(this->W);			
		
		//Store number of IterationNums needed
		this->MLalgorithm->IterationsNeeded = IterationNum;
		
		free(this->SumdZdWSquared);
				
}
Example #2
0
void GradientDescentCpp::min(MPI_Comm comm, const double tol, const int MaxNumIterations) {
		
  int i, IterationNum, BatchNum, BatchBegin, BatchEnd, BatchSize, GlobalBatchSize;
  double GlobalBatchSizeDouble;
	
  double CurrentLearningRate;
		
  for (IterationNum = 0; IterationNum < MaxNumIterations; ++IterationNum) {//IterationNum layer
			
    //Calculate CurrentLearningRate
    CurrentLearningRate = this->LearningRate/pow((double)(IterationNum + 1), this->LearningRatePower);
			
    //this->NumBatches is inherited from the Optimiser class
    for (BatchNum = 0; BatchNum < this->NumBatches; ++BatchNum) {//BatchNum layer
				
      //We must find out our current values for BatchBegin and BatchEnd. We do so by calling this->CalcBatchBeginEnd, which is inherited from the optimiser class.
      this->CalcBatchBeginEnd(BatchBegin, BatchEnd, BatchSize, BatchNum);
	
      //VERY IMPORTANT CONVENTION: When passing this->localdZdw to g(), always set to zero first.
      for (i=0; i<this->lengthW; ++i) this->localdZdW[i] = 0.0;

      //If weight is greater than wMax or smaller than wMin, then clip
      //If there is no wMin or wMax, this will have no effect				
      wClip(this->W);
				
      //Barrier: Wait until all processes have reached this point
      MPI_Barrier(comm);	
																
      //Call g()
      //Note that it is the responsibility of whoever writes the MLalgorithm to make sure that this->dZdW and this->SumdZdW are passed to ALL processes
      //It is, however, your responsibility to place a barrier after that, if required
      this->MLalgorithm->g(comm, this->dZdW, this->localdZdW, this->W, BatchBegin, BatchEnd, BatchSize, BatchNum, IterationNum);
				
      //Add all BatchSize and store the result in GlobalBatchSize
      MPI_Allreduce(&BatchSize, &GlobalBatchSize, 1, MPI_INT, MPI_SUM, comm);		
      GlobalBatchSizeDouble = (double)GlobalBatchSize;	
				
      //If weight equals wMin (wMax) and dZdW is smaller than zero (greater than zero), set dZdW to zero
      //If there is no wMin or wMax, this will have no effect
      dZdWClipMin();
								
      //Barrier: Wait until all processes have reached this point
      MPI_Barrier(comm);							
      for (i=0; i<this->lengthW; ++i) {
				
	//Record this->SumdZdW for SumGradients
	this->SumdZdW[i] += this->dZdW[i];  	
					
	//Record this->SumdZdW for SumSubgradients					
	this->MLalgorithm->SumSubgradients[this->NumBatches*IterationNum + BatchNum] += this->dZdW[i]*this->dZdW[i];
				
	//Update W
	//Learning rates are always divided by the sample size				
	this->W[i] -= this->dZdW[i]*CurrentLearningRate/GlobalBatchSizeDouble;
				
      }
				
      //Run post-update manipulation
      //Some algorithms need the ability to change W once it has been updated by the optimiser, for instance to ensure that certain conditions be met.
      //If there is no post-update manipulation function defined, this will have no impact whatsoever.
      this->MLalgorithm->PostUpdateManipulation(comm, BatchBegin, BatchEnd, BatchSize, BatchNum, IterationNum);
																	
    }//BatchNum layer
									
    //The following lines should be left unchanged for all gradient-based-optimisers
    //Calculate SumGradients
    this->MLalgorithm->SumGradients[IterationNum] = 0.0;
    for (i=0; i<this->lengthW; ++i) {
				
      this->MLalgorithm->SumGradients[IterationNum] += this->SumdZdW[i]*this->SumdZdW[i];
				
      this->SumdZdW[i] = 0.0;//Set SumdZdW to 0 for next iteration

    }
												
    //Check whether convergence condition is met. If yes, break
    if (this->MLalgorithm->SumGradients[IterationNum]/((double)(this->lengthW)) < tol) break;
			
  }//IterationNum layer
		
  //If weight is greater than wMax or smaller than wMin, then clip
  //If there is no wMin or wMax, this will have no effect				
  wClip(this->W);		
	
  //Store number of IterationNums needed
  this->MLalgorithm->SumGradientsLength = IterationNum;
  this->MLalgorithm->SumSubgradientsLength = this->NumBatches*IterationNum;
		
}