bool BernoulliRBM::predict_(VectorFloat &inputData,VectorFloat &outputData){ if( !trained ){ errorLog << "predict_(VectorFloat &inputData,VectorFloat &outputData) - Failed to run prediction - the model has not been trained." << std::endl; return false; } if( inputData.size() != numVisibleUnits ){ errorLog << "predict_(VectorFloat &inputData,VectorFloat &outputData) - Failed to run prediction - the input data size (" << inputData.size() << ")"; errorLog << " does not match the number of visible units (" << numVisibleUnits << "). " << std::endl; return false; } if( outputData.size() != numHiddenUnits ){ outputData.resize( numHiddenUnits ); } //Scale the data if needed if( useScaling ){ for(UINT i=0; i<numVisibleUnits; i++){ inputData[i] = grt_scale(inputData[i],ranges[i].minValue,ranges[i].maxValue,0.0,1.0); } } //Propagate the data up through the RBM Float x = 0.0; for(UINT i=0; i<numHiddenUnits; i++){ for(UINT j=0; j<numVisibleUnits; j++) { x += weightsMatrix[i][j] * inputData[j]; } outputData[i] = grt_sigmoid( x + hiddenLayerBias[i] ); } return true; }
//Compute the regression data that will be stored at this node bool RegressionTree::computeNodeRegressionData( const RegressionData &trainingData, VectorFloat ®ressionData ){ const UINT M = trainingData.getNumSamples(); const UINT N = trainingData.getNumInputDimensions(); const UINT T = trainingData.getNumTargetDimensions(); if( M == 0 ){ Regressifier::errorLog << "computeNodeRegressionData(...) - Failed to compute regression data, there are zero training samples!" << std::endl; return false; } //Make sure the regression data is the correct size regressionData.clear(); regressionData.resize( T, 0 ); //The regression data at this node is simply an average over all the training data at this node for(unsigned int j=0; j<N; j++){ for(unsigned int i=0; i<M; i++){ regressionData[j] += trainingData[i].getTargetVector()[j]; } regressionData[j] /= M; } return true; }
void generate_rows ( const protobuf::Config::Generate & config, CrossCat & cross_cat, Assignments & assignments, const char * rows_out, rng_t & rng) { const size_t kind_count = cross_cat.kinds.size(); const size_t row_count = config.row_count(); const float density = config.density(); LOOM_ASSERT_LE(0.0, density); LOOM_ASSERT_LE(density, 1.0); VectorFloat scores; std::vector<ProductModel::Value> partial_values(kind_count); protobuf::Row row; protobuf::OutFile rows(rows_out); for (auto & kind : cross_cat.kinds) { kind.model.realize(rng); } cross_cat.schema.clear(* row.mutable_diff()); ProductValue & full_value = * row.mutable_diff()->mutable_pos(); for (size_t id = 0; id < row_count; ++id) { assignments.rowids().try_push(id); for (size_t k = 0; k < kind_count; ++k) { auto & kind = cross_cat.kinds[k]; ProductModel & model = kind.model; auto & mixture = kind.mixture; ProductValue & value = partial_values[k]; auto & groupids = assignments.groupids(k); scores.resize(mixture.clustering.counts().size()); mixture.clustering.score_value(model.clustering, scores); distributions::scores_to_probs(scores); const VectorFloat & probs = scores; auto & observed = * value.mutable_observed(); ValueSchema::clear(observed); observed.set_sparsity(ProductModel::Value::Observed::DENSE); const size_t feature_count = kind.featureids.size(); for (size_t f = 0; f < feature_count; ++f) { observed.add_dense( distributions::sample_bernoulli(rng, density)); } size_t groupid = mixture.sample_value(model, probs, value, rng); model.add_value(value, rng); mixture.add_value(model, groupid, value, rng); groupids.push(groupid); } row.set_id(id); cross_cat.splitter.join(full_value, partial_values); rows.write_stream(row); } }
bool PrincipalComponentAnalysis::project(const VectorFloat &data,VectorFloat &prjData){ const unsigned int N = (unsigned int)data.size(); if( !trained ){ warningLog << "project(const VectorFloat &data,VectorFloat &prjData) - The PrincipalComponentAnalysis module has not been trained!" << std::endl; return false; } if( N != numInputDimensions ){ warningLog << "project(const VectorFloat &data,VectorFloat &prjData) - The size of the input vector (" << N << ") does not match the number of input dimensions (" << numInputDimensions << ")!" << std::endl; return false; } VectorFloat msData = data; if( normData ){ //Mean subtract the data for(UINT j=0; j<numInputDimensions; j++) msData[j] = (msData[j]-mean[j])/stdDev[j]; }else{ //Mean subtract the data for(UINT j=0; j<numInputDimensions; j++) msData[j] -= mean[j]; } //Projected Data prjData.resize( numPrincipalComponents ); for(UINT i=0; i<numPrincipalComponents; i++){//For each PC prjData[i]=0; for(UINT j=0; j<N; j++)//For each feature prjData[i] += msData[j] * eigenvectors[j][sortedEigenvalues[i].index]; } return true; }