/* * calculates the pca for the static code features or dynamic setup features */ size_t PcaSeparateExt::calcSpecializedPca(double toBeCovered, bool dynamic) { Array<double> in; Array<int64> ids; if(dynamic) { if(dynamicQuery.size() == 0) genDefaultDynamicQuery(); readDatabase(in, ids, dynamicFeatures.size(), dynamicQuery); } else { if(query.size() == 0) genDefaultQuery(); readDatabase(in, ids, staticFeatures.size(), query); } AffineLinearMap model(in.cols(), in.cols()); Array<double> eigenvalues; genPCAmodel(model, in, eigenvalues); double sum = 0, partSum = 0; for(size_t i = 0; i < eigenvalues.nelem(); ++i) { sum += eigenvalues(i); } size_t nPCs = 0; toBeCovered /= 100.0; for(size_t i = 0; i < model.getOutputDimension(); ++i) { partSum += eigenvalues(i); if(partSum / sum > toBeCovered) { nPCs = i+1; break; } } AffineLinearMap reductionModel(in.cols(), nPCs); genPCAmodel(reductionModel, in); // (reductionModel.getOutputDimension(), dynamic ? dynamicFeatures.size() : staticFeatures.size()); LOG(INFO) << reductionModel.getOutputDimension() << " PCs cover " << (partSum/sum)*100.0 << "% of the static feature's total variance\n"; Array<double> out = genPCs(reductionModel, in); // std::cout << "REsult: " << trans << std::endl; // std::cout << "AFTER " << eigenvalues << std::endl; // std::cout << "modeld " << out << std::endl; if(dynamic) writeToSetup(out, ids); else writeToCode(out, ids); return out.cols(); }
/* * calculates the principal components of static features based on the given query and stores them in the database */ double PcaSeparateExt::calcSpecializedPca(size_t nInFeatures, size_t nOutFeatures, bool dynamic) { Array<double> in; Array<int64> ids; if(dynamic) { if(dynamicQuery.size() == 0) genDefaultDynamicQuery(); readDatabase(in, ids, dynamicFeatures.size(), dynamicQuery); } else { if(query.size() == 0) genDefaultQuery(); readDatabase(in, ids, staticFeatures.size(), query); } AffineLinearMap model(nInFeatures, nOutFeatures); Array<double> eigenvalues; genPCAmodel(model, in, eigenvalues); // calculate the percentage of covered variance double sum = 0, partSum = 0; size_t i = 0; for(; i < nOutFeatures; ++i) partSum += eigenvalues(i); sum = partSum; for(; i < eigenvalues.nelem(); ++i) sum += eigenvalues(i); double covered = (partSum / sum) * 100.0; LOG(INFO) << nOutFeatures << " PCs cover " << covered << "% of the " << (dynamic ? "dynamic" : "static") << " feature's total variance\n"; Array<double> out = genPCs(model, in); // std::cout << "REsult: " << trans << std::endl; // std::cout << "AFTER " << eigenvalues << std::endl; // std::cout << "modeld " << out << std::endl; if(dynamic) writeToSetup(out, ids); else writeToCode(out, ids); return covered; }
/* * Reads values form the database and stores the features in in, the targets (mapped according to the set policy) in targets as one-of-n coding */ size_t Trainer::readDatabase(Array<double>& in, Array<double>& target) throw(Kompex::SQLiteException) { // if no query has been set, use default query if(query.size() == 0) genDefaultQuery(); // read the maximum of the column in measurement for which to train double max = 0.0, min = 0.0; if(genOut != GenNNoutput::ML_KEEP_INT && genOut != GenNNoutput::ML_FUZZY_VECTOR) max = getMaximum(trainForName), min = getMinimum(trainForName); Kompex::SQLiteStatement *localStmt = new Kompex::SQLiteStatement(pDatabase); unsigned int nClasses = model.getOutputDimension(); localStmt->Sql(query); size_t nRows = localStmt->GetNumberOfRows(); in = Array<double>(nRows, nFeatures()); LOG(INFO) << "Queried Rows: " << nRows << ", Number of features: " << staticFeatures.size() << " + " << dynamicFeatures.size() << " + " << pcaFeatures.size() << std::endl; if(nRows == 0) throw MachineLearningException("No dataset for the requested features could be found"); std::list<std::pair<double, size_t> > measurements; Array<double> oneOfN(nClasses); for(Array<double>::iterator I = oneOfN.begin(); I != oneOfN.end() && model.usesOneOfNCoding(); ++I) { *I = NEG; } //Train machine size_t i = 0; // fetch all results while(localStmt->FetchRow()){ // std::cout << "Result: " << localStmt->GetColumnName(2) << " " << localStmt->GetColumnName(3) << " " << localStmt->GetColumnName(4) << std::endl; // std::cout << "Data: " << localStmt->GetColumnInt(2) << " " << localStmt->GetColumnInt(3) << " " << localStmt->GetColumnInt(4) << std::endl; //std::cout << "["; // construct training vectors for(size_t j = 0; j < nFeatures(); ++j) { in(i, j) = localStmt->GetColumnDouble(j); //std::cout << in(i, j) << " "; } // translate index to one-of-n coding if(genOut == ML_MAP_TO_N_CLASSES) measurements.push_back(std::make_pair(localStmt->GetColumnDouble(nFeatures()), i)); else appendToTrainArray(target, localStmt, nFeatures(), max, min, oneOfN); //std::cout << target(i) << "]\n"; ++i; } if(genOut == ML_MAP_TO_N_CLASSES) mapToNClasses(measurements, model.getOutputDimension(), NEG, POS, target); // reset the prepared statement localStmt->Reset(); // do not forget to clean-up localStmt->FreeQuery(); delete localStmt; FeaturePreconditioner fp; featureNormalization = fp.normalize(in, -1, 1); return nRows; }