double Gradient::computeGradient(dVector& vecGradient, Model* m, DataSet* X) { double ans = 0.0; #ifdef _OPENMP if( nbThreadsMP < 1 ) nbThreadsMP = omp_get_max_threads(); setMaxNumberThreads(nbThreadsMP); pInfEngine->setMaxNumberThreads(nbThreadsMP); pFeatureGen->setMaxNumberThreads(nbThreadsMP); #endif //Check the size of vecGradient int nbFeatures = pFeatureGen->getNumberOfFeatures(); if(vecGradient.getLength() != nbFeatures) vecGradient.create(nbFeatures); else vecGradient.set(0); //////////////////////////////////////////////////////////// // Start of parallel Region // Some weird stuff in gcc 4.1, with openmp 2.5 support // // Note 1: In OpenMP 2.5, the iteration variable in "for" must be // a signed integer variable type. In OpenMP 3.0 (_OPENMP>=200805), // it may also be an unsigned integer variable type, a pointer type, // or a constant-time random access iterator type. // // Note 2: schedule(static | dynamic): In the dynamic schedule, there // is no predictable order in which the loop items are assigned to // different threads. Each thread asks the OpenMP runtime library for // an iteration number, then handles it, then asks for the next one. // It is thus useful when different iterations in the loop may take // different time to execute. #pragma omp parallel default(none) \ shared(vecGradient, X, m, ans, nbFeatures, std::cout) { // code inside this region runs in parallel dVector g(nbFeatures, COLVECTOR, 0.0); #pragma omp for schedule(dynamic) reduction(+:ans) for(int i=0; (int)i<X->size(); i++) { DataSequence* x = X->at(i); if( m->isWeightSequence() && x->getWeightSequence() != 1.0) { dVector tmp(nbFeatures, COLVECTOR, 0.0); ans += computeGradient(tmp, m, x) * x->getWeightSequence(); tmp.multiply(x->getWeightSequence()); g.add(tmp); } else { ans += computeGradient(g, m, x); } } // We now put togheter the gradients // No two threads can execute a critical directive of the same name at the same time #pragma omp critical (reduce_sum) { vecGradient.add(g); } } // End of parallel Region //////////////////////////////////////////////////////////// vecGradient.negate(); // MaxMargin objective: min L = 0.5*\L2sigma*W*W + Loss() // MLE objective: min L = 0.5*1/(\L2sigma*\L2sigma)*W*W - log p(y|x) // Add the regularization term double scale = (m->isMaxMargin()) ? m->getRegL2Sigma() : 1/(double)(m->getRegL2Sigma()*m->getRegL2Sigma()); if( m->isMaxMargin() ) ans = (1/(double)X->size()) * ans; if(m->getRegL2Sigma()!=0.0f) { for(int f=0; f<nbFeatures; f++) vecGradient[f] += (*m->getWeights())[f]*scale; ans += 0.5*scale*m->getWeights()->l2Norm(false); } return ans; }
double Gradient::computeGradient(dVector& vecGradient, Model* m, DataSet* X) { //Check the size of vecGradient int nbFeatures = pFeatureGen->getNumberOfFeatures(); double ans = 0.0; int TID = 0; if(vecGradient.getLength() != nbFeatures) vecGradient.create(nbFeatures); else vecGradient.set(0); // Initialize the buffers (vecFeaturesMP) for each thread #ifdef _OPENMP setMaxNumberThreads(omp_get_max_threads()); pInfEngine->setMaxNumberThreads(omp_get_max_threads()); pFeatureGen->setMaxNumberThreads(omp_get_max_threads()); #endif for(int t=0;t<nbThreadsMP;t++) { if(localGrads[t].getLength() != nbFeatures) localGrads[t].resize(1,nbFeatures,0); else localGrads[t].set(0); } //////////////////////////////////////////////////////////// // Start of parallel Region // Some weird stuff in gcc 4.1, with openmp 2.5 support #if ((_OPENMP == 200505) && __GNUG__) #pragma omp parallel \ shared(X, m, ans, nbFeatures, std::cout) \ private(TID) \ default(none) #else #pragma omp parallel \ shared(vecGradient, X, m, ans, nbFeatures, std::cout) \ private(TID) \ default(none) #endif { #ifdef _OPENMP TID = omp_get_thread_num(); #endif // Create a temporary gradient double localSum = 0; #ifdef WITH_SEQUENCE_WEIGHTS dVector tmpVecGradient(nbFeatures); #endif #pragma omp for // we can use unsigned if we have openmp 3.0 support (_OPENMP>=200805). #ifdef _OPENMP #if _OPENMP >= 200805 for(unsigned int i = 0; i< X->size(); i++){ #else for(int i = 0; i< X->size(); i++){ #endif #else for(unsigned int i = 0; i< X->size(); i++){ #endif if (m->getDebugLevel() >=2){ #pragma omp critical(output) std::cout << "Thread "<<TID<<" computes gradient for sequence " << i <<" out of " << (int)X->size() << " (Size: " << X->at(i)->length() << ")" << std::endl; } DataSequence* x = X->at(i); #ifdef WITH_SEQUENCE_WEIGHTS tmpVecGradient.set(0); localSum += computeGradient(tmpVecGradient, m, x) * x->getWeightSequence(); if(x->getWeightSequence() != 1.0) tmpVecGradient.multiply(x->getWeightSequence()); localGrads[TID].add(tmpVecGradient); #else localSum += computeGradient(localGrads[TID], m, x);// * x->getWeightSequence(); #endif } #pragma omp critical (reduce_sum) // We now put togheter the sums { if( m->getDebugLevel() >= 2){ std::cout<<"Thread "<<TID<<" update sums"<<std::endl; } ans += localSum; vecGradient.add(localGrads[TID]); } } // End of parallel Region //////////////////////////////////////////////////////////// // because we are minimizing -LogP vecGradient.negate(); // Add the regularization term double sigmaL2Square = m->getRegL2Sigma()*m->getRegL2Sigma(); if(sigmaL2Square != 0.0f) { if (m->getDebugLevel() >= 2){ std::cout << "Adding L2 norm gradient\n"; } for(int f = 0; f < nbFeatures; f++) { vecGradient[f] += (*m->getWeights())[f]/sigmaL2Square; } double weightNorm = m->getWeights()->l2Norm(false); ans += weightNorm / (2.0*m->getRegL2Sigma()*m->getRegL2Sigma()); } return ans; }