예제 #1
0
bool BernoulliRBM::predict_(VectorFloat &inputData,VectorFloat &outputData){
    
    if( !trained ){
        errorLog << "predict_(VectorFloat &inputData,VectorFloat &outputData) - Failed to run prediction - the model has not been trained." << std::endl;
        return false;
    }
    
    if( inputData.size() != numVisibleUnits ){
        errorLog << "predict_(VectorFloat &inputData,VectorFloat &outputData) - Failed to run prediction - the input data size (" << inputData.size() << ")";
        errorLog << " does not match the number of visible units (" << numVisibleUnits << "). " << std::endl;
        return false;
    }
    
    if( outputData.size() != numHiddenUnits ){
        outputData.resize( numHiddenUnits );
    }
    
    //Scale the data if needed
    if( useScaling ){
        for(UINT i=0; i<numVisibleUnits; i++){
            inputData[i] = grt_scale(inputData[i],ranges[i].minValue,ranges[i].maxValue,0.0,1.0);
        }
    }
    
    //Propagate the data up through the RBM
    Float x = 0.0;
    for(UINT i=0; i<numHiddenUnits; i++){
        for(UINT j=0; j<numVisibleUnits; j++) {
            x += weightsMatrix[i][j] * inputData[j];
        }
        outputData[i] = grt_sigmoid( x + hiddenLayerBias[i] );
    }
    
    return true;
}
예제 #2
0
    //Compute the regression data that will be stored at this node
bool RegressionTree::computeNodeRegressionData( const RegressionData &trainingData, VectorFloat &regressionData ){
    
    const UINT M = trainingData.getNumSamples();
    const UINT N = trainingData.getNumInputDimensions();
    const UINT T = trainingData.getNumTargetDimensions();
    
    if( M == 0 ){
        Regressifier::errorLog << "computeNodeRegressionData(...) - Failed to compute regression data, there are zero training samples!" << std::endl;
        return false;
    }
    
    //Make sure the regression data is the correct size
    regressionData.clear();
    regressionData.resize( T, 0 );
    
    //The regression data at this node is simply an average over all the training data at this node
    for(unsigned int j=0; j<N; j++){
        for(unsigned int i=0; i<M; i++){
            regressionData[j] += trainingData[i].getTargetVector()[j];
        }
        regressionData[j] /= M;
    }
    
    return true;
}
예제 #3
0
파일: generate.hpp 프로젝트: jostheim/loom
void generate_rows (
        const protobuf::Config::Generate & config,
        CrossCat & cross_cat,
        Assignments & assignments,
        const char * rows_out,
        rng_t & rng)
{
    const size_t kind_count = cross_cat.kinds.size();
    const size_t row_count = config.row_count();
    const float density = config.density();
    LOOM_ASSERT_LE(0.0, density);
    LOOM_ASSERT_LE(density, 1.0);
    VectorFloat scores;
    std::vector<ProductModel::Value> partial_values(kind_count);
    protobuf::Row row;
    protobuf::OutFile rows(rows_out);

    for (auto & kind : cross_cat.kinds) {
        kind.model.realize(rng);
    }

    cross_cat.schema.clear(* row.mutable_diff());
    ProductValue & full_value = * row.mutable_diff()->mutable_pos();
    for (size_t id = 0; id < row_count; ++id) {
        assignments.rowids().try_push(id);

        for (size_t k = 0; k < kind_count; ++k) {
            auto & kind = cross_cat.kinds[k];
            ProductModel & model = kind.model;
            auto & mixture = kind.mixture;
            ProductValue & value = partial_values[k];
            auto & groupids = assignments.groupids(k);

            scores.resize(mixture.clustering.counts().size());
            mixture.clustering.score_value(model.clustering, scores);
            distributions::scores_to_probs(scores);
            const VectorFloat & probs = scores;

            auto & observed = * value.mutable_observed();
            ValueSchema::clear(observed);
            observed.set_sparsity(ProductModel::Value::Observed::DENSE);
            const size_t feature_count = kind.featureids.size();
            for (size_t f = 0; f < feature_count; ++f) {
                observed.add_dense(
                    distributions::sample_bernoulli(rng, density));
            }
            size_t groupid = mixture.sample_value(model, probs, value, rng);

            model.add_value(value, rng);
            mixture.add_value(model, groupid, value, rng);
            groupids.push(groupid);
        }

        row.set_id(id);
        cross_cat.splitter.join(full_value, partial_values);
        rows.write_stream(row);
    }
}
예제 #4
0
bool PrincipalComponentAnalysis::project(const VectorFloat &data,VectorFloat &prjData){
    
    const unsigned int N = (unsigned int)data.size();
    
    if( !trained ){
        warningLog << "project(const VectorFloat &data,VectorFloat &prjData) - The PrincipalComponentAnalysis module has not been trained!" << std::endl;
        return false;
    }

    if( N != numInputDimensions ){
        warningLog << "project(const VectorFloat &data,VectorFloat &prjData) - The size of the input vector (" << N << ") does not match the number of input dimensions (" << numInputDimensions << ")!" << std::endl;
        return false;
    }
    
    VectorFloat msData = data;
    
    if( normData ){
        //Mean subtract the data
        for(UINT j=0; j<numInputDimensions; j++)
            msData[j] = (msData[j]-mean[j])/stdDev[j];
    }else{
        //Mean subtract the data
        for(UINT j=0; j<numInputDimensions; j++)
            msData[j] -= mean[j];
    }
    
    //Projected Data
    prjData.resize( numPrincipalComponents );
    for(UINT i=0; i<numPrincipalComponents; i++){//For each PC
        prjData[i]=0;
        for(UINT j=0; j<N; j++)//For each feature
            prjData[i] += msData[j] * eigenvectors[j][sortedEigenvalues[i].index];
    }
    
    return true;
}