コード例 #1
0
ファイル: KNN.cpp プロジェクト: andersonbr/gpcred
void KNN::train(Examples& exs){

	TRACE_V(TAG,"train");
    
    //Maybe we didnt calculate this before...
    stats->calculateIDF();
 
    for(int i = 0; i < exs.getNumberOfNumericalAttibutes(); i++){
        maxv[i] = numeric_limits<double>::min();
        minv[i] = numeric_limits<double>::max();
    }
    
	for(ExampleIterator e = exs.getBegin(); e != exs.getEnd(); e++){
   
        vector<string> textTokens = (e)->getTextTokens();
        vector<int> textFrequencyTokens = (e)->getTextFrequency();
		string exampleClass = (e)->getClass();
        string eId = (e)->getId();
        double docSize = 0.0;

//      cout<<" Tokens categoricos  =  " << tokens.size() << endl;
		for(unsigned int i = 3; i < textTokens.size(); i++){
			int tf = textFrequencyTokens[i-3];
			string termId = textTokens[i];
            
            double tfidf = tf * stats->getIDF(termId);

            docSize += (tfidf * tfidf);

            docWeighted dw(eId, tfidf);
            termDocWset[termId].insert(dw);
		}
        
        vector<double> numTokens = (e)->getNumericalTokens();
       
        for(unsigned int i = 0; i < numTokens.size(); i++){
            if(greaterThan(numTokens[i], maxv[i])){
                maxv[i] = numTokens[i];
            }
            if(lesserThan(numTokens[i], minv[i])){
                minv[i] = numTokens[i];
            }
        }

        exNumTrain[eId] = numTokens;
        exCatTrain[eId] = (e)->getCategoricalTokens();
        
        docTrainSizes[eId] = docSize;
    }

}