Exemple #1
0
/*
 * calculates the pca for the static code features or dynamic setup features
 */
size_t PcaSeparateExt::calcSpecializedPca(double toBeCovered, bool dynamic) {
	Array<double> in;
	Array<int64> ids;

	if(dynamic) {
		if(dynamicQuery.size() == 0)
			genDefaultDynamicQuery();
		readDatabase(in, ids, dynamicFeatures.size(), dynamicQuery);
	} else {
		if(query.size() == 0)
			genDefaultQuery();
		readDatabase(in, ids, staticFeatures.size(), query);
	}

	AffineLinearMap model(in.cols(), in.cols());
	Array<double> eigenvalues;

	genPCAmodel(model, in, eigenvalues);

	double sum = 0, partSum = 0;

	for(size_t i = 0; i < eigenvalues.nelem(); ++i) {
		sum += eigenvalues(i);
	}

	size_t nPCs = 0;
	toBeCovered /= 100.0;
	for(size_t i = 0; i < model.getOutputDimension(); ++i) {
		partSum += eigenvalues(i);
		if(partSum / sum > toBeCovered) {
			nPCs = i+1;
			break;
		}
	}


	AffineLinearMap reductionModel(in.cols(), nPCs);

	genPCAmodel(reductionModel, in);

//	(reductionModel.getOutputDimension(), dynamic ? dynamicFeatures.size() : staticFeatures.size());

	LOG(INFO) << reductionModel.getOutputDimension() << " PCs cover " << (partSum/sum)*100.0 << "% of the static feature's total variance\n";

 	Array<double> out = genPCs(reductionModel, in);

// 	std::cout << "REsult: " << trans << std::endl;
//	std::cout << "AFTER " << eigenvalues << std::endl;
//    std::cout << "modeld " << out << std::endl;

 	if(dynamic)
 		writeToSetup(out, ids);
 	else
 		writeToCode(out, ids);

    return out.cols();

}
Exemple #2
0
/*
 * calculates the principal components of static features based on the given query and stores them in the database
 */
double PcaSeparateExt::calcSpecializedPca(size_t nInFeatures, size_t nOutFeatures, bool dynamic) {
	Array<double> in;
	Array<int64> ids;

	if(dynamic) {
		if(dynamicQuery.size() == 0)
			genDefaultDynamicQuery();
		readDatabase(in, ids, dynamicFeatures.size(), dynamicQuery);
	} else {
		if(query.size() == 0)
			genDefaultQuery();
		readDatabase(in, ids, staticFeatures.size(), query);
	}

	AffineLinearMap model(nInFeatures, nOutFeatures);
	Array<double> eigenvalues;

	genPCAmodel(model, in, eigenvalues);

	// calculate the percentage of covered variance
	double sum = 0, partSum = 0;
	size_t i = 0;
	for(; i < nOutFeatures; ++i)
		partSum += eigenvalues(i);
	sum = partSum;
	for(; i < eigenvalues.nelem(); ++i)
		sum += eigenvalues(i);
	double covered = (partSum / sum) * 100.0;
	LOG(INFO) << nOutFeatures << " PCs cover " << covered << "% of the " << (dynamic ? "dynamic" : "static") << " feature's total variance\n";

 	Array<double> out = genPCs(model, in);

// 	std::cout << "REsult: " << trans << std::endl;
//	std::cout << "AFTER " << eigenvalues << std::endl;
//    std::cout << "modeld " << out << std::endl;

 	if(dynamic)
 		writeToSetup(out, ids);
 	else
 		writeToCode(out, ids);

	return covered;

}
Exemple #3
0
/*
 * Reads values form the database and stores the features in in, the targets (mapped according to the set policy) in targets as one-of-n coding
*/
size_t Trainer::readDatabase(Array<double>& in, Array<double>& target) throw(Kompex::SQLiteException) {
	// if no query has been set, use default query
	if(query.size() == 0)
		genDefaultQuery();

	// read the maximum of the column in measurement for which to train
	double max = 0.0, min = 0.0;
	if(genOut != GenNNoutput::ML_KEEP_INT && genOut != GenNNoutput::ML_FUZZY_VECTOR)
		max = getMaximum(trainForName), min = getMinimum(trainForName);

	Kompex::SQLiteStatement *localStmt = new Kompex::SQLiteStatement(pDatabase);
	unsigned int nClasses = model.getOutputDimension();

	localStmt->Sql(query);

	size_t nRows = localStmt->GetNumberOfRows();
	in = Array<double>(nRows, nFeatures());
	LOG(INFO) << "Queried Rows: " << nRows << ", Number of features: " << staticFeatures.size() << " + " << dynamicFeatures.size()  <<
			" + " << pcaFeatures.size() << std::endl;
	if(nRows == 0)
		throw MachineLearningException("No dataset for the requested features could be found");

	std::list<std::pair<double, size_t> > measurements;

	Array<double> oneOfN(nClasses);
	for(Array<double>::iterator I = oneOfN.begin(); I != oneOfN.end() && model.usesOneOfNCoding(); ++I) {
		*I = NEG;
	}

	//Train machine
	size_t i = 0;
	// fetch all results
	while(localStmt->FetchRow()){
//				std::cout << "Result: " << localStmt->GetColumnName(2) << " " << localStmt->GetColumnName(3) << " " << localStmt->GetColumnName(4) << std::endl;
//				std::cout << "Data:   " << localStmt->GetColumnInt(2) << " " << localStmt->GetColumnInt(3) << " " << localStmt->GetColumnInt(4) << std::endl;

//std::cout << "[";
		// construct training vectors
		for(size_t j = 0; j < nFeatures(); ++j) {
			in(i, j) = localStmt->GetColumnDouble(j);
//std::cout << in(i, j) << " ";
		}

		// translate index to one-of-n coding
		if(genOut == ML_MAP_TO_N_CLASSES)
			measurements.push_back(std::make_pair(localStmt->GetColumnDouble(nFeatures()), i));
		else
			appendToTrainArray(target, localStmt, nFeatures(), max, min, oneOfN);

//std::cout << target(i) << "]\n";
		++i;
	}


	if(genOut == ML_MAP_TO_N_CLASSES)
		mapToNClasses(measurements, model.getOutputDimension(), NEG, POS, target);

	// reset the prepared statement
	localStmt->Reset();

	// do not forget to clean-up
	localStmt->FreeQuery();
	delete localStmt;

	FeaturePreconditioner fp;
	featureNormalization = fp.normalize(in, -1, 1);
	return nRows;
}