int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw ) { CvMat* dw = 0; CvMat* buf = 0; double **x = 0, **df = 0; CvMat* _idx = 0; int iter = -1, count = x0.count; CV_FUNCNAME( "CvANN_MLP::train_backprop" ); __BEGIN__; int i, j, k, ivcount, ovcount, l_count, total = 0, max_iter; double *buf_ptr; double prev_E = DBL_MAX*0.5, E = 0, epsilon; max_iter = params.term_crit.max_iter*count; epsilon = params.term_crit.epsilon*count; l_count = layer_sizes->cols; ivcount = layer_sizes->data.i[0]; ovcount = layer_sizes->data.i[l_count-1]; // allocate buffers for( i = 0; i < l_count; i++ ) total += layer_sizes->data.i[i] + 1; CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type )); cvZero( dw ); CV_CALL( buf = cvCreateMat( 1, (total + max_count)*2, CV_64F )); CV_CALL( _idx = cvCreateMat( 1, count, CV_32SC1 )); for( i = 0; i < count; i++ ) _idx->data.i[i] = i; CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) )); df = x + total; buf_ptr = buf->data.db; for( j = 0; j < l_count; j++ ) { x[j] = buf_ptr; df[j] = x[j] + layer_sizes->data.i[j]; buf_ptr += (df[j] - x[j])*2; } // run back-propagation loop /* y_i = w_i*x_{i-1} x_i = f(y_i) E = 1/2*||u - x_N||^2 grad_N = (x_N - u)*f'(y_i) dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i w_i(t+1) = w_i(t) + dw_i(t) grad_{i-1} = w_i^t*grad_i */ for( iter = 0; iter < max_iter; iter++ ) { int idx = iter % count; double* w = weights[0]; double sweight = sw ? count*sw[idx] : 1.; CvMat _w, _dw, hdr1, hdr2, ghdr1, ghdr2, _df; CvMat *x1 = &hdr1, *x2 = &hdr2, *grad1 = &ghdr1, *grad2 = &ghdr2, *temp; if( idx == 0 ) { if( fabs(prev_E - E) < epsilon ) break; prev_E = E; E = 0; // shuffle indices for( i = 0; i < count; i++ ) { int tt; j = (unsigned)cvRandInt(&rng) % count; k = (unsigned)cvRandInt(&rng) % count; CV_SWAP( _idx->data.i[j], _idx->data.i[k], tt ); } } idx = _idx->data.i[idx]; if( x0.type == CV_32F ) { const float* x0data = x0.data.fl[idx]; for( j = 0; j < ivcount; j++ ) x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1]; } else { const double* x0data = x0.data.db[idx]; for( j = 0; j < ivcount; j++ ) x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1]; } cvInitMatHeader( x1, 1, ivcount, CV_64F, x[0] ); // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i]) for( i = 1; i < l_count; i++ ) { cvInitMatHeader( x2, 1, layer_sizes->data.i[i], CV_64F, x[i] ); cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] ); cvGEMM( x1, &_w, 1, 0, 0, x2 ); _df = *x2; _df.data.db = df[i]; calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols ); CV_SWAP( x1, x2, temp ); } cvInitMatHeader( grad1, 1, ovcount, CV_64F, buf_ptr ); *grad2 = *grad1; grad2->data.db = buf_ptr + max_count; w = weights[l_count+1]; // calculate error if( u.type == CV_32F ) { const float* udata = u.data.fl[idx]; for( k = 0; k < ovcount; k++ ) { double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k]; grad1->data.db[k] = t*sweight; E += t*t; } } else { const double* udata = u.data.db[idx]; for( k = 0; k < ovcount; k++ ) { double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k]; grad1->data.db[k] = t*sweight; E += t*t; } } E *= sweight; // backward pass, update weights for( i = l_count-1; i > 0; i-- ) { int n1 = layer_sizes->data.i[i-1], n2 = layer_sizes->data.i[i]; cvInitMatHeader( &_df, 1, n2, CV_64F, df[i] ); cvMul( grad1, &_df, grad1 ); cvInitMatHeader( &_w, n1+1, n2, CV_64F, weights[i] ); cvInitMatHeader( &_dw, n1+1, n2, CV_64F, dw->data.db + (weights[i] - weights[0]) ); cvInitMatHeader( x1, n1+1, 1, CV_64F, x[i-1] ); x[i-1][n1] = 1.; cvGEMM( x1, grad1, params.bp_dw_scale, &_dw, params.bp_moment_scale, &_dw ); cvAdd( &_w, &_dw, &_w ); if( i > 1 ) { grad2->cols = n1; _w.rows = n1; cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T ); } CV_SWAP( grad1, grad2, temp ); } } iter /= count; __END__; cvReleaseMat( &dw ); cvReleaseMat( &buf ); cvReleaseMat( &_idx ); cvFree( &x ); return iter; }
int train_backprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit ) { int i, j, k; double prev_E = DBL_MAX*0.5, E = 0; int itype = inputs.type(), otype = outputs.type(); int count = inputs.rows; int iter = -1, max_iter = termCrit.maxCount*count; double epsilon = termCrit.epsilon*count; int l_count = layer_count(); int ivcount = layer_sizes[0]; int ovcount = layer_sizes.back(); // allocate buffers vector<vector<double> > x(l_count); vector<vector<double> > df(l_count); vector<Mat> dw(l_count); for( i = 0; i < l_count; i++ ) { int n = layer_sizes[i]; x[i].resize(n+1); df[i].resize(n); dw[i] = Mat::zeros(weights[i].size(), CV_64F); } Mat _idx_m(1, count, CV_32S); int* _idx = _idx_m.ptr<int>(); for( i = 0; i < count; i++ ) _idx[i] = i; AutoBuffer<double> _buf(max_lsize*2); double* buf[] = { _buf, (double*)_buf + max_lsize }; const double* sw = _sw.empty() ? 0 : _sw.ptr<double>(); // run back-propagation loop /* y_i = w_i*x_{i-1} x_i = f(y_i) E = 1/2*||u - x_N||^2 grad_N = (x_N - u)*f'(y_i) dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i w_i(t+1) = w_i(t) + dw_i(t) grad_{i-1} = w_i^t*grad_i */ for( iter = 0; iter < max_iter; iter++ ) { int idx = iter % count; double sweight = sw ? count*sw[idx] : 1.; if( idx == 0 ) { //printf("%d. E = %g\n", iter/count, E); if( fabs(prev_E - E) < epsilon ) break; prev_E = E; E = 0; // shuffle indices for( i = 0; i < count; i++ ) { j = rng.uniform(0, count); k = rng.uniform(0, count); std::swap(_idx[j], _idx[k]); } } idx = _idx[idx]; const uchar* x0data_p = inputs.ptr(idx); const float* x0data_f = (const float*)x0data_p; const double* x0data_d = (const double*)x0data_p; double* w = weights[0].ptr<double>(); for( j = 0; j < ivcount; j++ ) x[0][j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2 + 1]; Mat x1( 1, ivcount, CV_64F, &x[0][0] ); // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i]) for( i = 1; i < l_count; i++ ) { int n = layer_sizes[i]; Mat x2(1, n, CV_64F, &x[i][0] ); Mat _w = weights[i].rowRange(0, x1.cols); gemm(x1, _w, 1, noArray(), 0, x2); Mat _df(1, n, CV_64F, &df[i][0] ); calc_activ_func_deriv( x2, _df, weights[i] ); x1 = x2; } Mat grad1( 1, ovcount, CV_64F, buf[l_count&1] ); w = weights[l_count+1].ptr<double>(); // calculate error const uchar* udata_p = outputs.ptr(idx); const float* udata_f = (const float*)udata_p; const double* udata_d = (const double*)udata_p; double* gdata = grad1.ptr<double>(); for( k = 0; k < ovcount; k++ ) { double t = (otype == CV_32F ? (double)udata_f[k] : udata_d[k])*w[k*2] + w[k*2+1] - x[l_count-1][k]; gdata[k] = t*sweight; E += t*t; } E *= sweight; // backward pass, update weights for( i = l_count-1; i > 0; i-- ) { int n1 = layer_sizes[i-1], n2 = layer_sizes[i]; Mat _df(1, n2, CV_64F, &df[i][0]); multiply( grad1, _df, grad1 ); Mat _x(n1+1, 1, CV_64F, &x[i-1][0]); x[i-1][n1] = 1.; gemm( _x, grad1, params.bpDWScale, dw[i], params.bpMomentScale, dw[i] ); add( weights[i], dw[i], weights[i] ); if( i > 1 ) { Mat grad2(1, n1, CV_64F, buf[i&1]); Mat _w = weights[i].rowRange(0, n1); gemm( grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T ); grad1 = grad2; } } } iter /= count; return iter; }
int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw ) { const int max_buf_sz = 1 << 16; CvMat* dw = 0; CvMat* dEdw = 0; CvMat* prev_dEdw_sign = 0; CvMat* buf = 0; double **x = 0, **df = 0; int iter = -1, count = x0.count; CV_FUNCNAME( "CvANN_MLP::train" ); __BEGIN__; int i, ivcount, ovcount, l_count, total = 0, max_iter, buf_sz, dcount0, dcount=0; double *buf_ptr; double prev_E = DBL_MAX*0.5, epsilon; double dw_plus, dw_minus, dw_min, dw_max; double inv_count; max_iter = params.term_crit.max_iter; epsilon = params.term_crit.epsilon; dw_plus = params.rp_dw_plus; dw_minus = params.rp_dw_minus; dw_min = params.rp_dw_min; dw_max = params.rp_dw_max; l_count = layer_sizes->cols; ivcount = layer_sizes->data.i[0]; ovcount = layer_sizes->data.i[l_count-1]; // allocate buffers for( i = 0; i < l_count; i++ ) total += layer_sizes->data.i[i]; CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type )); cvSet( dw, cvScalarAll(params.rp_dw0) ); CV_CALL( dEdw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type )); cvZero( dEdw ); CV_CALL( prev_dEdw_sign = cvCreateMat( wbuf->rows, wbuf->cols, CV_8SC1 )); cvZero( prev_dEdw_sign ); inv_count = 1./count; dcount0 = max_buf_sz/(2*total); dcount0 = MAX( dcount0, 1 ); dcount0 = MIN( dcount0, count ); buf_sz = dcount0*(total + max_count)*2; CV_CALL( buf = cvCreateMat( 1, buf_sz, CV_64F )); CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) )); df = x + total; buf_ptr = buf->data.db; for( i = 0; i < l_count; i++ ) { x[i] = buf_ptr; df[i] = x[i] + layer_sizes->data.i[i]*dcount0; buf_ptr += (df[i] - x[i])*2; } // run rprop loop /* y_i(t) = w_i(t)*x_{i-1}(t) x_i(t) = f(y_i(t)) E = sum_over_all_samples(1/2*||u - x_N||^2) grad_N = (x_N - u)*f'(y_i) MIN(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0 dw_i{jk}(t) = MAX(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0 dw_i{jk}(t-1) else if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0) dE/dw_i{jk}(t)<-0 else w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t) grad_{i-1}(t) = w_i^t(t)*grad_i(t) */ for( iter = 0; iter < max_iter; iter++ ) { int n1, n2, si, j, k; double* w; CvMat _w, _dEdw, hdr1, hdr2, ghdr1, ghdr2, _df; CvMat *x1, *x2, *grad1, *grad2, *temp; double E = 0; // first, iterate through all the samples and compute dEdw for( si = 0; si < count; si += dcount ) { dcount = MIN( count - si, dcount0 ); w = weights[0]; grad1 = &ghdr1; grad2 = &ghdr2; x1 = &hdr1; x2 = &hdr2; // grab and preprocess input data if( x0.type == CV_32F ) for( i = 0; i < dcount; i++ ) { const float* x0data = x0.data.fl[si+i]; double* xdata = x[0]+i*ivcount; for( j = 0; j < ivcount; j++ ) xdata[j] = x0data[j]*w[j*2] + w[j*2+1]; } else for( i = 0; i < dcount; i++ ) { const double* x0data = x0.data.db[si+i]; double* xdata = x[0]+i*ivcount; for( j = 0; j < ivcount; j++ ) xdata[j] = x0data[j]*w[j*2] + w[j*2+1]; } cvInitMatHeader( x1, dcount, ivcount, CV_64F, x[0] ); // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i]) for( i = 1; i < l_count; i++ ) { cvInitMatHeader( x2, dcount, layer_sizes->data.i[i], CV_64F, x[i] ); cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] ); cvGEMM( x1, &_w, 1, 0, 0, x2 ); _df = *x2; _df.data.db = df[i]; calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols ); CV_SWAP( x1, x2, temp ); } cvInitMatHeader( grad1, dcount, ovcount, CV_64F, buf_ptr ); w = weights[l_count+1]; grad2->data.db = buf_ptr + max_count*dcount; // calculate error if( u.type == CV_32F ) for( i = 0; i < dcount; i++ ) { const float* udata = u.data.fl[si+i]; const double* xdata = x[l_count-1] + i*ovcount; double* gdata = grad1->data.db + i*ovcount; double sweight = sw ? sw[si+i] : inv_count, E1 = 0; for( j = 0; j < ovcount; j++ ) { double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j]; gdata[j] = t*sweight; E1 += t*t; } E += sweight*E1; } else for( i = 0; i < dcount; i++ ) { const double* udata = u.data.db[si+i]; const double* xdata = x[l_count-1] + i*ovcount; double* gdata = grad1->data.db + i*ovcount; double sweight = sw ? sw[si+i] : inv_count, E1 = 0; for( j = 0; j < ovcount; j++ ) { double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j]; gdata[j] = t*sweight; E1 += t*t; } E += sweight*E1; } // backward pass, update dEdw for( i = l_count-1; i > 0; i-- ) { n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i]; cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] ); cvMul( grad1, &_df, grad1 ); cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) ); cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] ); cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T ); // update bias part of dEdw for( k = 0; k < dcount; k++ ) { double* dst = _dEdw.data.db + n1*n2; const double* src = grad1->data.db + k*n2; for( j = 0; j < n2; j++ ) dst[j] += src[j]; } cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] ); cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db ); if( i > 1 ) cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T ); CV_SWAP( grad1, grad2, temp ); } } // now update weights for( i = 1; i < l_count; i++ ) { n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i]; for( k = 0; k <= n1; k++ ) { double* wk = weights[i]+k*n2; size_t delta = wk - weights[0]; double* dwk = dw->data.db + delta; double* dEdwk = dEdw->data.db + delta; char* prevEk = (char*)(prev_dEdw_sign->data.ptr + delta); for( j = 0; j < n2; j++ ) { double Eval = dEdwk[j]; double dval = dwk[j]; double wval = wk[j]; int s = CV_SIGN(Eval); int ss = prevEk[j]*s; if( ss > 0 ) { dval *= dw_plus; dval = MIN( dval, dw_max ); dwk[j] = dval; wk[j] = wval + dval*s; } else if( ss < 0 ) { dval *= dw_minus; dval = MAX( dval, dw_min ); prevEk[j] = 0; dwk[j] = dval; wk[j] = wval + dval*s; } else { prevEk[j] = (char)s; wk[j] = wval + dval*s; } dEdwk[j] = 0.; } } } if( fabs(prev_E - E) < epsilon ) break; prev_E = E; E = 0; } __END__; cvReleaseMat( &dw ); cvReleaseMat( &dEdw ); cvReleaseMat( &prev_dEdw_sign ); cvReleaseMat( &buf ); cvFree( &x ); return iter; }