bool MLP::computeErrorNorm(const ivector& ids) { dvector vct(outputs,off*ids.size()); const double fix=on-off; int i; double tmp,tmp2,v; // compute first the average outputs for the training set for (i=0;i<ids.size();++i) { vct.at(ids.at(i))+=fix; } vct.divide(ids.size()); double offError(0.0); // now compute the error for (i=0;i<vct.size();++i) { tmp = off - vct.at(i); offError += (tmp*tmp); } errorNorm = 0.0; for (i=0;i<ids.size();++i) { v = vct.at(ids.at(i)); tmp = off - v; tmp2 = on - v; errorNorm += (offError - tmp*tmp + tmp2*tmp2); } errorNorm *= 0.5; return true; }
//get most(best) available label from histogram int kNearestNeighFilter::getMostLabel(const ivector& histogram, const imatrix& src, const int& row, const int& col) const{ int numOfMax = 0; int maxIndex = -1; // first index, which is max int max = 0; // for(int i=0;i<histoSize;++i) { if(histogram.at(i) < max); // for speed up (probability) else if(histogram.at(i) > max) { max = histogram.at(i); numOfMax = 1; maxIndex = i; } else //if(histogram.at(i) == max) ++numOfMax; } //is there more than one possibility ? if (numOfMax == 1) return maxIndex; // is the kernel center one of the max's? else if(histogram.at(src.at(row,col)) == max) return src.at(row,col); else return getMedian(histogram,max,numOfMax); };
bool MLP::calcGradient(const dmatrix& inputs, const ivector& ids, dvector& grad) { if (inputs.rows() != ids.size()) { setStatusString("Number of vectors not consistent with number of ids"); return false; } dvector tmp; int i; double tmpError; totalError = 0; calcGradient(inputs.getRow(0),ids.at(0),grad); computeActualError(ids.at(0),totalError); for (i=1;i<inputs.rows();++i) { calcGradient(inputs.getRow(i),ids.at(i),tmp); computeActualError(ids.at(i),tmpError); grad.add(tmp); totalError+=tmpError; } return true; }
// the kernel runs inside the image void kNearestNeighFilter::histogramMethodMiddle(const imatrix& src, imatrix& dest, ivector& histogram, const int& row,int& col) const { int i,j;//index int numOfMax, maxIndex; int max=0; const int maxChange = sizeOfKernel+1;//max change for "max" const int limit = sizeOfKernel/2; //half size of the kernel const int lastCol = src.lastColumn()-limit; const int r = row+limit; col = limit; int v; //del test while(col <= (lastCol-1)) { j = col-limit; // sub labels left form the kernel for(i=row-limit;i<=r;++i) { --histogram.at(src.at(i,j)); } // add labels right from the kernel ++col; j = col+limit; for(i=row-limit;i<=r;++i) { v = src.at(i,j); ++histogram.at(src.at(i,j)); } //get most(best) available label numOfMax = 0; maxIndex = -1; max -= maxChange; //=0; for(i=0;i<histoSize;++i) { if(histogram.at(i) < max);// for speed up (probability) else if(histogram.at(i) > max) { max = histogram.at(i); numOfMax = 1; maxIndex = i; } else //if(histogram.at(i) == max) ++numOfMax; } //is there more than one possibility ? if(numOfMax == 1) dest.at(row,col) = maxIndex; // is the kernel center one of the max's? else if(histogram.at(src.at(row,col)) == max) dest.at(row,col) = src.at(row,col); else dest.at(row,col) = getMedian(histogram,max,numOfMax); }//while };
void svm::buildIdMaps(const ivector& ids) { int j=0; // create reverse id map idMap.clear(); for (int i=0; i<ids.size(); i++) { if (idMap.find(ids.at(i)) == idMap.end()) { _lti_debug("Mapping external id " << ids.at(i) << " to " << j << std::endl); rIdMap[j]=ids.at(i); idMap[ids.at(i)]=j++; } } nClasses=j; }
void MLP::checkHowManyOutputs(const ivector& ids) { // count how many different ids are present in the training set std::map<int,int> extToInt; std::map<int,int>::iterator it; int i,k; for (i=0,k=0;i<ids.size();++i) { it = extToInt.find(ids.at(i)); if (it == extToInt.end()) { extToInt[ids.at(i)] = k; ++k; } } outputs = extToInt.size(); }
// Calls the same method of the superclass. bool shClassifier::train(const dmatrix& input, const ivector& ids) { buildIdMaps(ids); boundsFunctor<double> bounds; const parameters& par=getParameters(); dvector min,max; if (par.autoBounds) { bounds.boundsOfRows(input,min,max); } else { min=par.minimum; max=par.maximum; } _lti_debug("Binvector.size = " << par.binVector.size() << "\n"); int i; // build one histogram per object models.resize(nClasses); for (i=0; i<nClasses; i++) { if (par.binVector.size() == min.size()) { models[i]=new sparseHistogram(par.binVector,min,max); } else { models[i]=new sparseHistogram(par.numberOfBins,min,max); } } ivector sum(nClasses); // fill histograms for (i=0; i<input.rows(); i++) { int id=idMap[ids.at(i)]; models[id]->add(input.getRow(i)); sum[id]++; } // normalize histograms for (i=0; i<nClasses; i++) { _lti_debug("Sum of " << i << " is " << sum.at(i) << "\n"); if (sum.at(i) == 0) { delete models[i]; models[i]=0; } else { models[i]->divide(static_cast<float>(sum.at(i))); } } defineOutputTemplate(); return true; }
void svm::makeTargets(const ivector& ids) { // expand each class label i to a vector v with v[j]=1 if j == i, // and j[j]=-1 if j != i srcIds=ids; dmatrix* t=new dmatrix(nClasses,ids.size(),-1.0); // iterate over training labels for (int i=0; i<t->columns(); i++) { t->at(idMap[ids.at(i)],i)=1; } if (target != 0) { delete target; } target=t; }
int kNearestNeighFilter::getMedian(const ivector& histogram, const int max, const int numOfMax) const { ivector vect(numOfMax,0); int i,z=0; const int size=histogram.size(); for(i=0;i<size;++i) { if (histogram.at(i) == max) { vect.at(z++) = i; } } return vect.at(z/2); }
/* * compute the error of the given weights for the whole training set. */ bool MLP::computeTotalError(const std::vector<dmatrix>& mWeights, const dmatrix& inputs, const ivector& ids, double& totalError) const { if (ids.size() != inputs.rows()) { return false; } const parameters& param = getParameters(); const int layers = param.hiddenUnits.size()+1; std::vector<dvector> uNet(layers),uOut(layers); int i; double tmp; totalError=0.0; for (i=0;i<ids.size();++i) { propagate(inputs.getRow(i),mWeights,uNet,uOut); computePatternError(ids.at(i),uOut.back(),tmp); totalError+=tmp; } return true; }
bool MLP::trainSteepestSequential(const dmatrix& data, const ivector& internalIds) { const parameters& param = getParameters(); char buffer[256]; bool abort = false; scramble<int> scrambler; int i,j,k; double tmpError; ivector idx; idx.resize(data.rows(),0,false,false); for (i=0;i<idx.size();++i) { idx.at(i)=i; } if (param.momentum > 0) { // with momentum dvector grad,delta(weights.size(),0.0); for (i=0; !abort && (i<param.maxNumberOfEpochs); ++i) { scrambler.apply(idx); // present the pattern in a random sequence totalError = 0; for (j=0;j<idx.size();++j) { k=idx.at(j); calcGradient(data.getRow(k),internalIds.at(k),grad); computeActualError(internalIds.at(k),tmpError); totalError+=tmpError; delta.addScaled(param.learnrate,grad,param.momentum,delta); weights.add(delta); } // update progress info object if (validProgressObject()) { sprintf(buffer,"Error=%f",totalError/errorNorm); getProgressObject().step(buffer); abort = abort || (totalError/errorNorm <= param.stopError); abort = abort || getProgressObject().breakRequested(); } } } else { // without momentum ivector idx; idx.resize(data.rows(),0,false,false); dvector grad; int i,j,k; double tmpError; for (i=0;i<idx.size();++i) { idx.at(i)=i; } for (i=0; !abort && (i<param.maxNumberOfEpochs); ++i) { scrambler.apply(idx); // present the pattern in a random sequence totalError = 0; for (j=0;j<idx.size();++j) { k=idx.at(j); calcGradient(data.getRow(k),internalIds.at(k),grad); computeActualError(internalIds.at(k),tmpError); totalError+=tmpError; weights.addScaled(param.learnrate,grad); } // update progress info object if (validProgressObject()) { sprintf(buffer,"Error=%f",totalError/errorNorm); getProgressObject().step(buffer); abort = abort || (totalError/errorNorm <= param.stopError); abort = abort || getProgressObject().breakRequested(); } } } return true; }
// TODO: comment your train method bool MLP::train(const dvector& theWeights, const dmatrix& data, const ivector& ids) { if (data.empty()) { setStatusString("Train data empty"); return false; } if (ids.size()!=data.rows()) { std::string str; str = "dimensionality of IDs vector and the number of rows "; str+= "of the input matrix must have the same size."; setStatusString(str.c_str()); return false; } // tracks the status of the training process. // if an error occurs set to false and use setStatusString() // however, training should continue, fixing the error as well as possible bool b=true; // vector with internal ids ivector newIds,idsLUT; newIds.resize(ids.size(),0,false,false); // map to get the internal Id to an external Id; std::map<int,int> extToInt; std::map<int,int>::iterator it; int i,k; for (i=0,k=0;i<ids.size();++i) { it = extToInt.find(ids.at(i)); if (it != extToInt.end()) { newIds.at(i) = (*it).second; } else { extToInt[ids.at(i)] = k; newIds.at(i) = k; ++k; } } idsLUT.resize(extToInt.size()); for (it=extToInt.begin();it!=extToInt.end();++it) { idsLUT.at((*it).second) = (*it).first; } // initialize the inputs and output units from the given data outputs = idsLUT.size(); inputs = data.columns(); const parameters& param = getParameters(); // display which kind of algorithm is to be used if (validProgressObject()) { getProgressObject().reset(); std::string str("MLP: Training using "); switch(param.trainingMode) { case parameters::ConjugateGradients: str += "conjugate gradients"; break; case parameters::SteepestDescent: str += "steepest descent"; break; default: str += "unnamed method"; } getProgressObject().setTitle(str); getProgressObject().setMaxSteps(param.maxNumberOfEpochs+1); } dvector grad; if (&theWeights != &weights) { weights.copy(theWeights); } if (!initWeights(true)) { // keep the weights setStatusString("Wrong weights!"); return false; }; computeErrorNorm(newIds); if (param.trainingMode == parameters::ConjugateGradients) { b = trainConjugateGradients(data,newIds); } else { if (param.batchMode) { // batch training mode: b = trainSteepestBatch(data,newIds); } else { // sequential training mode: b = trainSteepestSequential(data,newIds); } } if (validProgressObject()) { getProgressObject().step("Training ready."); } outputTemplate tmpOutTemp(idsLUT); setOutputTemplate(tmpOutTemp); // create the appropriate outputTemplate makeOutputTemplate(outputs,data,ids); return b; }