void scale_output( const Mat& _src, Mat& _dst ) const { int cols = _src.cols; const double* w = weights[layer_count()].ptr<double>(); if( _dst.type() == CV_32F ) { for( int i = 0; i < _src.rows; i++ ) { const double* src = _src.ptr<double>(i); float* dst = _dst.ptr<float>(i); for( int j = 0; j < cols; j++ ) dst[j] = (float)(src[j]*w[j*2] + w[j*2+1]); } } else { for( int i = 0; i < _src.rows; i++ ) { const double* src = _src.ptr<double>(i); double* dst = _dst.ptr<double>(i); for( int j = 0; j < cols; j++ ) dst[j] = src[j]*w[j*2] + w[j*2+1]; } } }
void setLayerSizes( InputArray _layer_sizes ) { clear(); _layer_sizes.copyTo(layer_sizes); int l_count = layer_count(); weights.resize(l_count + 2); max_lsize = 0; if( l_count > 0 ) { for( int i = 0; i < l_count; i++ ) { int n = layer_sizes[i]; if( n < 1 + (0 < i && i < l_count-1)) CV_Error( CV_StsOutOfRange, "there should be at least one input and one output " "and every hidden layer must have more than 1 neuron" ); max_lsize = std::max( max_lsize, n ); if( i > 0 ) weights[i].create(layer_sizes[i-1]+1, n, CV_64F); } int ninputs = layer_sizes.front(); int noutputs = layer_sizes.back(); weights[0].create(1, ninputs*2, CV_64F); weights[l_count].create(1, noutputs*2, CV_64F); weights[l_count+1].create(1, noutputs*2, CV_64F); } }
void init_weights() { int i, j, k, l_count = layer_count(); for( i = 1; i < l_count; i++ ) { int n1 = layer_sizes[i-1]; int n2 = layer_sizes[i]; double val = 0, G = n2 > 2 ? 0.7*pow((double)n1,1./(n2-1)) : 1.; double* w = weights[i].ptr<double>(); // initialize weights using Nguyen-Widrow algorithm for( j = 0; j < n2; j++ ) { double s = 0; for( k = 0; k <= n1; k++ ) { val = rng.uniform(0., 1.)*2-1.; w[k*n2 + j] = val; s += fabs(val); } if( i < l_count - 1 ) { s = 1./(s - fabs(val)); for( k = 0; k <= n1; k++ ) w[k*n2 + j] *= s; w[n1*n2 + j] *= G*(-1+j*2./n2); } } } }
void read( const FileNode& fn ) { clear(); vector<int> _layer_sizes; readVectorOrMat(fn["layer_sizes"], _layer_sizes); setLayerSizes( _layer_sizes ); int i, l_count = layer_count(); read_params(fn); size_t esz = weights[0].elemSize(); FileNode w = fn["input_scale"]; w.readRaw("d", weights[0].ptr(), weights[0].total()*esz); w = fn["output_scale"]; w.readRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz); w = fn["inv_output_scale"]; w.readRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz); FileNodeIterator w_it = fn["weights"].begin(); for( i = 1; i < l_count; i++, ++w_it ) (*w_it).readRaw("d", weights[i].ptr(), weights[i].total()*esz); trained = true; }
void write( FileStorage& fs ) const { if( layer_sizes.empty() ) return; int i, l_count = layer_count(); fs << "layer_sizes" << layer_sizes; write_params( fs ); size_t esz = weights[0].elemSize(); fs << "input_scale" << "["; fs.writeRaw("d", weights[0].ptr(), weights[0].total()*esz); fs << "]" << "output_scale" << "["; fs.writeRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz); fs << "]" << "inv_output_scale" << "["; fs.writeRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz); fs << "]" << "weights" << "["; for( i = 1; i < l_count; i++ ) { fs << "["; fs.writeRaw("d", weights[i].ptr(), weights[i].total()*esz); fs << "]"; } fs << "]"; }
neuron_range fc_rnn::get_neurons(layer_filter const& lfilter, neuron_filter const& nfilter) const { // TODO: Optimize by considering lfilter and nfilter together. // -> eg. only look for Input neurons within Input layer. neuron_iterator::data_array_t data; auto const NumLayerTypes = 3; auto const layer_types = std::array < LayerType, NumLayerTypes > { { LayerType::Input, LayerType::Hidden, LayerType::Output } }; // Iterate over our layers for(size_t ly = 0; ly < NumLayerTypes; ++ly) { // Construct layer data layer_data ld{ layer_types[ly] }; // And skip if doesn't pass layer filter if(!lfilter.test(ld)) { continue; } // Now iterate over neurons in the layer auto const start = layer_offset(layer_types[ly]); auto const end = start + layer_count(layer_types[ly]); for(auto id = start; id < end; ++id) { // And test against neuron filter auto nd = as_neuron_data(id); if(nfilter.test(nd)) { data.emplace_back(std::move(nd)); } } } return neuron_range( neuron_iterator(std::move(data)), neuron_iterator(), data.size() ); }
static int GenerateShaders(std::vector<AutoGLProgram> *blend_programs) { // Limits: GL_MAX_VARYING_COMPONENTS, GL_MAX_TEXTURE_IMAGE_UNITS, // GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS // clang-format off const GLchar *shader_preamble = "#version 300 es\n#define LAYER_COUNT "; const GLchar *vertex_shader_source = "\n" "precision mediump int; \n" "uniform vec4 uViewport; \n" "uniform sampler2D uLayerTextures[LAYER_COUNT]; \n" "uniform vec4 uLayerCrop[LAYER_COUNT]; \n" "in vec2 vPosition; \n" "in vec2 vTexCoords; \n" "out vec2 fTexCoords[LAYER_COUNT]; \n" "void main() { \n" " for (int i = 0; i < LAYER_COUNT; i++) { \n" " fTexCoords[i] = (uLayerCrop[i].xy + vTexCoords * uLayerCrop[i].zw) / \n" " vec2(textureSize(uLayerTextures[i], 0)); \n" " } \n" " vec2 scaledPosition = uViewport.xy + vPosition * uViewport.zw; \n" " gl_Position = vec4(scaledPosition * vec2(2.0) - vec2(1.0), 0.0, 1.0); \n" "} \n"; const GLchar *fragment_shader_source = "\n" "precision mediump float; \n" "uniform sampler2D uLayerTextures[LAYER_COUNT]; \n" "uniform float uLayerAlpha[LAYER_COUNT]; \n" "in vec2 fTexCoords[LAYER_COUNT]; \n" "out vec4 oFragColor; \n" "void main() { \n" " vec3 color = vec3(0.0, 0.0, 0.0); \n" " float alphaCover = 1.0; \n" " for (int i = 0; i < LAYER_COUNT; i++) { \n" " vec4 texSample = texture(uLayerTextures[i], fTexCoords[i]); \n" " float a = texSample.a * uLayerAlpha[i]; \n" " color += a * alphaCover * texSample.rgb; \n" " alphaCover *= 1.0 - a; \n" " if (alphaCover <= 0.5/255.0) \n" " break; \n" " } \n" " oFragColor = vec4(color, 1.0 - alphaCover); \n" "} \n"; // clang-format on int i, ret = 1; GLint max_texture_images, status; AutoGLShader vertex_shader, fragment_shader; AutoGLProgram program; std::string shader_log; glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &max_texture_images); for (i = 1; i <= max_texture_images; i++) { std::ostringstream layer_count_formatter; layer_count_formatter << i; std::string layer_count(layer_count_formatter.str()); const GLchar *shader_sources[3] = {shader_preamble, layer_count.c_str(), NULL}; shader_sources[2] = vertex_shader_source; vertex_shader = CompileAndCheckShader(GL_VERTEX_SHADER, 3, shader_sources, ret ? &shader_log : NULL); if (!vertex_shader.get()) { if (ret) ALOGE("Failed to make vertex shader:\n%s", shader_log.c_str()); break; } shader_sources[2] = fragment_shader_source; fragment_shader = CompileAndCheckShader( GL_FRAGMENT_SHADER, 3, shader_sources, ret ? &shader_log : NULL); if (!fragment_shader.get()) { if (ret) ALOGE("Failed to make fragment shader:\n%s", shader_log.c_str()); break; } program = AutoGLProgram(glCreateProgram()); if (!program.get()) { if (ret) ALOGE("Failed to create program %s", GetGLError()); break; } glAttachShader(program.get(), vertex_shader.get()); glAttachShader(program.get(), fragment_shader.get()); glBindAttribLocation(program.get(), 0, "vPosition"); glBindAttribLocation(program.get(), 1, "vTexCoords"); glLinkProgram(program.get()); glDetachShader(program.get(), vertex_shader.get()); glDetachShader(program.get(), fragment_shader.get()); glGetProgramiv(program.get(), GL_LINK_STATUS, &status); if (!status) { if (ret) { GLint log_length; glGetProgramiv(program.get(), GL_INFO_LOG_LENGTH, &log_length); std::string program_log(log_length, ' '); glGetProgramInfoLog(program.get(), log_length, NULL, &program_log[0]); ALOGE("Failed to link program: \n%s", program_log.c_str()); } break; } ret = 0; blend_programs->emplace_back(std::move(program)); } return ret; }
int train_rprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit ) { const int max_buf_size = 1 << 16; int i, iter = -1, count = inputs.rows; double prev_E = DBL_MAX*0.5; int max_iter = termCrit.maxCount; double epsilon = termCrit.epsilon; double dw_plus = params.rpDWPlus; double dw_minus = params.rpDWMinus; double dw_min = params.rpDWMin; double dw_max = params.rpDWMax; int l_count = layer_count(); // allocate buffers vector<Mat> dw(l_count), dEdw(l_count), prev_dEdw_sign(l_count); int total = 0; for( i = 0; i < l_count; i++ ) { total += layer_sizes[i]; dw[i].create(weights[i].size(), CV_64F); dw[i].setTo(Scalar::all(params.rpDW0)); prev_dEdw_sign[i] = Mat::zeros(weights[i].size(), CV_8S); dEdw[i] = Mat::zeros(weights[i].size(), CV_64F); } int dcount0 = max_buf_size/(2*total); dcount0 = std::max( dcount0, 1 ); dcount0 = std::min( dcount0, count ); int chunk_count = (count + dcount0 - 1)/dcount0; // run rprop loop /* y_i(t) = w_i(t)*x_{i-1}(t) x_i(t) = f(y_i(t)) E = sum_over_all_samples(1/2*||u - x_N||^2) grad_N = (x_N - u)*f'(y_i) std::min(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0 dw_i{jk}(t) = std::max(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0 dw_i{jk}(t-1) else if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0) dE/dw_i{jk}(t)<-0 else w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t) grad_{i-1}(t) = w_i^t(t)*grad_i(t) */ for( iter = 0; iter < max_iter; iter++ ) { double E = 0; for( i = 0; i < l_count; i++ ) dEdw[i].setTo(Scalar::all(0)); // first, iterate through all the samples and compute dEdw RPropLoop invoker(this, inputs, outputs, _sw, dcount0, dEdw, &E); parallel_for_(Range(0, chunk_count), invoker); //invoker(Range(0, chunk_count)); // now update weights for( i = 1; i < l_count; i++ ) { int n1 = layer_sizes[i-1], n2 = layer_sizes[i]; for( int k = 0; k <= n1; k++ ) { CV_Assert(weights[i].size() == Size(n2, n1+1)); double* wk = weights[i].ptr<double>(k); double* dwk = dw[i].ptr<double>(k); double* dEdwk = dEdw[i].ptr<double>(k); schar* prevEk = prev_dEdw_sign[i].ptr<schar>(k); for( int j = 0; j < n2; j++ ) { double Eval = dEdwk[j]; double dval = dwk[j]; double wval = wk[j]; int s = CV_SIGN(Eval); int ss = prevEk[j]*s; if( ss > 0 ) { dval *= dw_plus; dval = std::min( dval, dw_max ); dwk[j] = dval; wk[j] = wval + dval*s; } else if( ss < 0 ) { dval *= dw_minus; dval = std::max( dval, dw_min ); prevEk[j] = 0; dwk[j] = dval; wk[j] = wval + dval*s; } else { prevEk[j] = (schar)s; wk[j] = wval + dval*s; } dEdwk[j] = 0.; } } } //printf("%d. E = %g\n", iter, E); if( fabs(prev_E - E) < epsilon ) break; prev_E = E; } return iter; }
int train_backprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit ) { int i, j, k; double prev_E = DBL_MAX*0.5, E = 0; int itype = inputs.type(), otype = outputs.type(); int count = inputs.rows; int iter = -1, max_iter = termCrit.maxCount*count; double epsilon = termCrit.epsilon*count; int l_count = layer_count(); int ivcount = layer_sizes[0]; int ovcount = layer_sizes.back(); // allocate buffers vector<vector<double> > x(l_count); vector<vector<double> > df(l_count); vector<Mat> dw(l_count); for( i = 0; i < l_count; i++ ) { int n = layer_sizes[i]; x[i].resize(n+1); df[i].resize(n); dw[i] = Mat::zeros(weights[i].size(), CV_64F); } Mat _idx_m(1, count, CV_32S); int* _idx = _idx_m.ptr<int>(); for( i = 0; i < count; i++ ) _idx[i] = i; AutoBuffer<double> _buf(max_lsize*2); double* buf[] = { _buf, (double*)_buf + max_lsize }; const double* sw = _sw.empty() ? 0 : _sw.ptr<double>(); // run back-propagation loop /* y_i = w_i*x_{i-1} x_i = f(y_i) E = 1/2*||u - x_N||^2 grad_N = (x_N - u)*f'(y_i) dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i w_i(t+1) = w_i(t) + dw_i(t) grad_{i-1} = w_i^t*grad_i */ for( iter = 0; iter < max_iter; iter++ ) { int idx = iter % count; double sweight = sw ? count*sw[idx] : 1.; if( idx == 0 ) { //printf("%d. E = %g\n", iter/count, E); if( fabs(prev_E - E) < epsilon ) break; prev_E = E; E = 0; // shuffle indices for( i = 0; i < count; i++ ) { j = rng.uniform(0, count); k = rng.uniform(0, count); std::swap(_idx[j], _idx[k]); } } idx = _idx[idx]; const uchar* x0data_p = inputs.ptr(idx); const float* x0data_f = (const float*)x0data_p; const double* x0data_d = (const double*)x0data_p; double* w = weights[0].ptr<double>(); for( j = 0; j < ivcount; j++ ) x[0][j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2 + 1]; Mat x1( 1, ivcount, CV_64F, &x[0][0] ); // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i]) for( i = 1; i < l_count; i++ ) { int n = layer_sizes[i]; Mat x2(1, n, CV_64F, &x[i][0] ); Mat _w = weights[i].rowRange(0, x1.cols); gemm(x1, _w, 1, noArray(), 0, x2); Mat _df(1, n, CV_64F, &df[i][0] ); calc_activ_func_deriv( x2, _df, weights[i] ); x1 = x2; } Mat grad1( 1, ovcount, CV_64F, buf[l_count&1] ); w = weights[l_count+1].ptr<double>(); // calculate error const uchar* udata_p = outputs.ptr(idx); const float* udata_f = (const float*)udata_p; const double* udata_d = (const double*)udata_p; double* gdata = grad1.ptr<double>(); for( k = 0; k < ovcount; k++ ) { double t = (otype == CV_32F ? (double)udata_f[k] : udata_d[k])*w[k*2] + w[k*2+1] - x[l_count-1][k]; gdata[k] = t*sweight; E += t*t; } E *= sweight; // backward pass, update weights for( i = l_count-1; i > 0; i-- ) { int n1 = layer_sizes[i-1], n2 = layer_sizes[i]; Mat _df(1, n2, CV_64F, &df[i][0]); multiply( grad1, _df, grad1 ); Mat _x(n1+1, 1, CV_64F, &x[i-1][0]); x[i-1][n1] = 1.; gemm( _x, grad1, params.bpDWScale, dw[i], params.bpMomentScale, dw[i] ); add( weights[i], dw[i], weights[i] ); if( i > 1 ) { Mat grad2(1, n1, CV_64F, buf[i&1]); Mat _w = weights[i].rowRange(0, n1); gemm( grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T ); grad1 = grad2; } } } iter /= count; return iter; }
void calc_output_scale( const Mat& outputs, int flags ) { int i, j, vcount = layer_sizes.back(); int type = outputs.type(); double m = min_val, M = max_val, m1 = min_val1, M1 = max_val1; bool reset_weights = (flags & UPDATE_WEIGHTS) == 0; bool no_scale = (flags & NO_OUTPUT_SCALE) != 0; int l_count = layer_count(); double* scale = weights[l_count].ptr<double>(); double* inv_scale = weights[l_count+1].ptr<double>(); int count = outputs.rows; if( reset_weights ) { double a0 = no_scale ? 1 : DBL_MAX, b0 = no_scale ? 0 : -DBL_MAX; for( j = 0; j < vcount; j++ ) { scale[2*j] = inv_scale[2*j] = a0; scale[j*2+1] = inv_scale[2*j+1] = b0; } if( no_scale ) return; } for( i = 0; i < count; i++ ) { const uchar* p = outputs.ptr(i); const float* f = (const float*)p; const double* d = (const double*)p; for( j = 0; j < vcount; j++ ) { double t = type == CV_32F ? (double)f[j] : d[j]; if( reset_weights ) { double mj = scale[j*2], Mj = scale[j*2+1]; if( mj > t ) mj = t; if( Mj < t ) Mj = t; scale[j*2] = mj; scale[j*2+1] = Mj; } else if( !no_scale ) { t = t*inv_scale[j*2] + inv_scale[2*j+1]; if( t < m1 || t > M1 ) CV_Error( CV_StsOutOfRange, "Some of new output training vector components run exceed the original range too much" ); } } } if( reset_weights ) for( j = 0; j < vcount; j++ ) { // map mj..Mj to m..M double mj = scale[j*2], Mj = scale[j*2+1]; double a, b; double delta = Mj - mj; if( delta < DBL_EPSILON ) a = 1, b = (M + m - Mj - mj)*0.5; else a = (M - m)/delta, b = m - mj*a; inv_scale[j*2] = a; inv_scale[j*2+1] = b; a = 1./a; b = -b*a; scale[j*2] = a; scale[j*2+1] = b; } }
float predict( InputArray _inputs, OutputArray _outputs, int ) const { if( !trained ) CV_Error( CV_StsError, "The network has not been trained or loaded" ); Mat inputs = _inputs.getMat(); int type = inputs.type(), l_count = layer_count(); int n = inputs.rows, dn0 = n; CV_Assert( (type == CV_32F || type == CV_64F) && inputs.cols == layer_sizes[0] ); int noutputs = layer_sizes[l_count-1]; Mat outputs; int min_buf_sz = 2*max_lsize; int buf_sz = n*min_buf_sz; if( buf_sz > max_buf_sz ) { dn0 = max_buf_sz/min_buf_sz; dn0 = std::max( dn0, 1 ); buf_sz = dn0*min_buf_sz; } cv::AutoBuffer<double> _buf(buf_sz+noutputs); double* buf = _buf; if( !_outputs.needed() ) { CV_Assert( n == 1 ); outputs = Mat(n, noutputs, type, buf + buf_sz); } else { _outputs.create(n, noutputs, type); outputs = _outputs.getMat(); } int dn = 0; for( int i = 0; i < n; i += dn ) { dn = std::min( dn0, n - i ); Mat layer_in = inputs.rowRange(i, i + dn); Mat layer_out( dn, layer_in.cols, CV_64F, buf); scale_input( layer_in, layer_out ); layer_in = layer_out; for( int j = 1; j < l_count; j++ ) { double* data = buf + ((j&1) ? max_lsize*dn0 : 0); int cols = layer_sizes[j]; layer_out = Mat(dn, cols, CV_64F, data); Mat w = weights[j].rowRange(0, layer_in.cols); gemm(layer_in, w, 1, noArray(), 0, layer_out); calc_activ_func( layer_out, weights[j] ); layer_in = layer_out; } layer_out = outputs.rowRange(i, i + dn); scale_output( layer_in, layer_out ); } if( n == 1 ) { int maxIdx[] = {0, 0}; minMaxIdx(outputs, 0, 0, 0, maxIdx); return (float)(maxIdx[0] + maxIdx[1]); } return 0.f; }
connection_range fc_rnn::get_connections( layer_filter const& src_lfilter, layer_filter const& dst_lfilter, neuron_filter const& src_nfilter, neuron_filter const& dst_nfilter ) const { connection_iterator::data_array_t data; auto const src_layers = std::array < LayerType, 2 > { { LayerType::Input, LayerType::Hidden } }; auto const dst_layers = std::map< LayerType, std::array < LayerType, 2 > >{ { LayerType::Input, { { LayerType::Hidden, LayerType::Output } } }, { LayerType::Hidden, { { LayerType::Hidden, LayerType::Output } } } }; // Iterate over possible source layers for(auto ly_src : src_layers) { layer_data ld_src{ ly_src }; // Skip if don't pass layer filters if(!src_lfilter.test(ld_src)) { continue; } for(auto ly_dst : dst_layers.at(ly_src)) { layer_data ld_dst{ ly_dst }; // Skip if don't pass layer filters if(!dst_lfilter.test(ld_dst)) { continue; } // Now iterate over neurons in src layer auto const src_start = layer_offset(ly_src); auto const src_end = src_start + layer_count(ly_src); for(auto src_id = src_start; src_id < src_end; ++src_id) { // And test against src neuron filter auto src_nd = as_neuron_data(src_id); if(!src_nfilter.test(src_nd)) { continue; } // Neuron passed source filter, so iterate over dst neurons // TODO: should just test each neuron against src/dest filters once at most!! auto const dst_start = layer_offset(ly_dst); auto const dst_end = dst_start + layer_count(ly_dst); for(auto dst_id = dst_start; dst_id < dst_end; ++dst_id) { auto dst_nd = as_neuron_data(dst_id); if(dst_nfilter.test(dst_nd)) { auto const id = get_connection_id(src_id, dst_id); data.emplace_back(as_connection_data(id)); } } } } } return connection_range( connection_iterator(std::move(data)), connection_iterator(), data.size() ); }