void KNN::train(Examples& exs){ TRACE_V(TAG,"train"); //Maybe we didnt calculate this before... stats->calculateIDF(); for(int i = 0; i < exs.getNumberOfNumericalAttibutes(); i++){ maxv[i] = numeric_limits<double>::min(); minv[i] = numeric_limits<double>::max(); } for(ExampleIterator e = exs.getBegin(); e != exs.getEnd(); e++){ vector<string> textTokens = (e)->getTextTokens(); vector<int> textFrequencyTokens = (e)->getTextFrequency(); string exampleClass = (e)->getClass(); string eId = (e)->getId(); double docSize = 0.0; // cout<<" Tokens categoricos = " << tokens.size() << endl; for(unsigned int i = 3; i < textTokens.size(); i++){ int tf = textFrequencyTokens[i-3]; string termId = textTokens[i]; double tfidf = tf * stats->getIDF(termId); docSize += (tfidf * tfidf); docWeighted dw(eId, tfidf); termDocWset[termId].insert(dw); } vector<double> numTokens = (e)->getNumericalTokens(); for(unsigned int i = 0; i < numTokens.size(); i++){ if(greaterThan(numTokens[i], maxv[i])){ maxv[i] = numTokens[i]; } if(lesserThan(numTokens[i], minv[i])){ minv[i] = numTokens[i]; } } exNumTrain[eId] = numTokens; exCatTrain[eId] = (e)->getCategoricalTokens(); docTrainSizes[eId] = docSize; } }