示例#1
0
int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw )
{
    CvMat* dw = 0;
    CvMat* buf = 0;
    double **x = 0, **df = 0;
    CvMat* _idx = 0;
    int iter = -1, count = x0.count;
   
    CV_FUNCNAME( "CvANN_MLP::train_backprop" );

    __BEGIN__;

    int i, j, k, ivcount, ovcount, l_count, total = 0, max_iter;
    double *buf_ptr;
    double prev_E = DBL_MAX*0.5, E = 0, epsilon;

    max_iter = params.term_crit.max_iter*count;
    epsilon = params.term_crit.epsilon*count;

    l_count = layer_sizes->cols;
    ivcount = layer_sizes->data.i[0];
    ovcount = layer_sizes->data.i[l_count-1];

    // allocate buffers
    for( i = 0; i < l_count; i++ )
        total += layer_sizes->data.i[i] + 1;

    CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
    cvZero( dw );
    CV_CALL( buf = cvCreateMat( 1, (total + max_count)*2, CV_64F ));
    CV_CALL( _idx = cvCreateMat( 1, count, CV_32SC1 ));
    for( i = 0; i < count; i++ )
        _idx->data.i[i] = i;

    CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) ));
    df = x + total;
    buf_ptr = buf->data.db;

    for( j = 0; j < l_count; j++ )
    {
        x[j] = buf_ptr;
        df[j] = x[j] + layer_sizes->data.i[j];
        buf_ptr += (df[j] - x[j])*2;
    }

    // run back-propagation loop
    /*
        y_i = w_i*x_{i-1}
        x_i = f(y_i)
        E = 1/2*||u - x_N||^2
        grad_N = (x_N - u)*f'(y_i)
        dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i
        w_i(t+1) = w_i(t) + dw_i(t)
        grad_{i-1} = w_i^t*grad_i
    */
    for( iter = 0; iter < max_iter; iter++ )
    {
        int idx = iter % count;
        double* w = weights[0];
        double sweight = sw ? count*sw[idx] : 1.;
        CvMat _w, _dw, hdr1, hdr2, ghdr1, ghdr2, _df;
        CvMat *x1 = &hdr1, *x2 = &hdr2, *grad1 = &ghdr1, *grad2 = &ghdr2, *temp;

        if( idx == 0 )
        {
            if( fabs(prev_E - E) < epsilon )
                break;
            prev_E = E;
            E = 0;

            // shuffle indices
            for( i = 0; i < count; i++ )
            {
                int tt;
                j = (unsigned)cvRandInt(&rng) % count;
                k = (unsigned)cvRandInt(&rng) % count;
                CV_SWAP( _idx->data.i[j], _idx->data.i[k], tt );
            }
        }

        idx = _idx->data.i[idx];

        if( x0.type == CV_32F )
        {
            const float* x0data = x0.data.fl[idx];
            for( j = 0; j < ivcount; j++ )
                x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1];
        }
        else
        {
            const double* x0data = x0.data.db[idx];
            for( j = 0; j < ivcount; j++ )
                x[0][j] = x0data[j]*w[j*2] + w[j*2 + 1];
        }

        cvInitMatHeader( x1, 1, ivcount, CV_64F, x[0] );

        // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
        for( i = 1; i < l_count; i++ )
        {
            cvInitMatHeader( x2, 1, layer_sizes->data.i[i], CV_64F, x[i] );
            cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] );
            cvGEMM( x1, &_w, 1, 0, 0, x2 );
            _df = *x2;
            _df.data.db = df[i];
            calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols );
            CV_SWAP( x1, x2, temp );
        }

        cvInitMatHeader( grad1, 1, ovcount, CV_64F, buf_ptr );
        *grad2 = *grad1;
        grad2->data.db = buf_ptr + max_count;

        w = weights[l_count+1];

        // calculate error
        if( u.type == CV_32F )
        {
            const float* udata = u.data.fl[idx];
            for( k = 0; k < ovcount; k++ )
            {
                double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k];
                grad1->data.db[k] = t*sweight;
                E += t*t;
            }
        }
        else
        {
            const double* udata = u.data.db[idx];
            for( k = 0; k < ovcount; k++ )
            {
                double t = udata[k]*w[k*2] + w[k*2+1] - x[l_count-1][k];
                grad1->data.db[k] = t*sweight;
                E += t*t;
            }
        }
        E *= sweight;

        // backward pass, update weights
        for( i = l_count-1; i > 0; i-- )
        {
            int n1 = layer_sizes->data.i[i-1], n2 = layer_sizes->data.i[i];
            cvInitMatHeader( &_df, 1, n2, CV_64F, df[i] );
            cvMul( grad1, &_df, grad1 );
            cvInitMatHeader( &_w, n1+1, n2, CV_64F, weights[i] );
            cvInitMatHeader( &_dw, n1+1, n2, CV_64F, dw->data.db + (weights[i] - weights[0]) );
            cvInitMatHeader( x1, n1+1, 1, CV_64F, x[i-1] );
            x[i-1][n1] = 1.;
            cvGEMM( x1, grad1, params.bp_dw_scale, &_dw, params.bp_moment_scale, &_dw );
            cvAdd( &_w, &_dw, &_w );
            if( i > 1 )
            {
                grad2->cols = n1;
                _w.rows = n1;
                cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
            }
            CV_SWAP( grad1, grad2, temp );
        }
    }

    iter /= count;

    __END__;

    cvReleaseMat( &dw );
    cvReleaseMat( &buf );
    cvReleaseMat( &_idx );
    cvFree( &x );

    return iter;
}
示例#2
0
文件: ann_mlp.cpp 项目: 4ker/opencv
    int train_backprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
    {
        int i, j, k;
        double prev_E = DBL_MAX*0.5, E = 0;
        int itype = inputs.type(), otype = outputs.type();

        int count = inputs.rows;

        int iter = -1, max_iter = termCrit.maxCount*count;
        double epsilon = termCrit.epsilon*count;

        int l_count = layer_count();
        int ivcount = layer_sizes[0];
        int ovcount = layer_sizes.back();

        // allocate buffers
        vector<vector<double> > x(l_count);
        vector<vector<double> > df(l_count);
        vector<Mat> dw(l_count);

        for( i = 0; i < l_count; i++ )
        {
            int n = layer_sizes[i];
            x[i].resize(n+1);
            df[i].resize(n);
            dw[i] = Mat::zeros(weights[i].size(), CV_64F);
        }

        Mat _idx_m(1, count, CV_32S);
        int* _idx = _idx_m.ptr<int>();
        for( i = 0; i < count; i++ )
            _idx[i] = i;

        AutoBuffer<double> _buf(max_lsize*2);
        double* buf[] = { _buf, (double*)_buf + max_lsize };

        const double* sw = _sw.empty() ? 0 : _sw.ptr<double>();

        // run back-propagation loop
        /*
         y_i = w_i*x_{i-1}
         x_i = f(y_i)
         E = 1/2*||u - x_N||^2
         grad_N = (x_N - u)*f'(y_i)
         dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i
         w_i(t+1) = w_i(t) + dw_i(t)
         grad_{i-1} = w_i^t*grad_i
        */
        for( iter = 0; iter < max_iter; iter++ )
        {
            int idx = iter % count;
            double sweight = sw ? count*sw[idx] : 1.;

            if( idx == 0 )
            {
                //printf("%d. E = %g\n", iter/count, E);
                if( fabs(prev_E - E) < epsilon )
                    break;
                prev_E = E;
                E = 0;

                // shuffle indices
                for( i = 0; i < count; i++ )
                {
                    j = rng.uniform(0, count);
                    k = rng.uniform(0, count);
                    std::swap(_idx[j], _idx[k]);
                }
            }

            idx = _idx[idx];

            const uchar* x0data_p = inputs.ptr(idx);
            const float* x0data_f = (const float*)x0data_p;
            const double* x0data_d = (const double*)x0data_p;

            double* w = weights[0].ptr<double>();
            for( j = 0; j < ivcount; j++ )
                x[0][j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2 + 1];

            Mat x1( 1, ivcount, CV_64F, &x[0][0] );

            // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
            for( i = 1; i < l_count; i++ )
            {
                int n = layer_sizes[i];
                Mat x2(1, n, CV_64F, &x[i][0] );
                Mat _w = weights[i].rowRange(0, x1.cols);
                gemm(x1, _w, 1, noArray(), 0, x2);
                Mat _df(1, n, CV_64F, &df[i][0] );
                calc_activ_func_deriv( x2, _df, weights[i] );
                x1 = x2;
            }

            Mat grad1( 1, ovcount, CV_64F, buf[l_count&1] );
            w = weights[l_count+1].ptr<double>();

            // calculate error
            const uchar* udata_p = outputs.ptr(idx);
            const float* udata_f = (const float*)udata_p;
            const double* udata_d = (const double*)udata_p;

            double* gdata = grad1.ptr<double>();
            for( k = 0; k < ovcount; k++ )
            {
                double t = (otype == CV_32F ? (double)udata_f[k] : udata_d[k])*w[k*2] + w[k*2+1] - x[l_count-1][k];
                gdata[k] = t*sweight;
                E += t*t;
            }
            E *= sweight;

            // backward pass, update weights
            for( i = l_count-1; i > 0; i-- )
            {
                int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
                Mat _df(1, n2, CV_64F, &df[i][0]);
                multiply( grad1, _df, grad1 );
                Mat _x(n1+1, 1, CV_64F, &x[i-1][0]);
                x[i-1][n1] = 1.;
                gemm( _x, grad1, params.bpDWScale, dw[i], params.bpMomentScale, dw[i] );
                add( weights[i], dw[i], weights[i] );
                if( i > 1 )
                {
                    Mat grad2(1, n1, CV_64F, buf[i&1]);
                    Mat _w = weights[i].rowRange(0, n1);
                    gemm( grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T );
                    grad1 = grad2;
                }
            }
        }

        iter /= count;
        return iter;
    }
示例#3
0
int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw )
{
    const int max_buf_sz = 1 << 16;
    CvMat* dw = 0;
    CvMat* dEdw = 0;
    CvMat* prev_dEdw_sign = 0;
    CvMat* buf = 0;
    double **x = 0, **df = 0;
    int iter = -1, count = x0.count;
   
    CV_FUNCNAME( "CvANN_MLP::train" );

    __BEGIN__;

    int i, ivcount, ovcount, l_count, total = 0, max_iter, buf_sz, dcount0, dcount=0;
    double *buf_ptr;
    double prev_E = DBL_MAX*0.5, epsilon;
    double dw_plus, dw_minus, dw_min, dw_max;
    double inv_count;

    max_iter = params.term_crit.max_iter;
    epsilon = params.term_crit.epsilon;
    dw_plus = params.rp_dw_plus;
    dw_minus = params.rp_dw_minus;
    dw_min = params.rp_dw_min;
    dw_max = params.rp_dw_max;

    l_count = layer_sizes->cols;
    ivcount = layer_sizes->data.i[0];
    ovcount = layer_sizes->data.i[l_count-1];

    // allocate buffers
    for( i = 0; i < l_count; i++ )
        total += layer_sizes->data.i[i];

    CV_CALL( dw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
    cvSet( dw, cvScalarAll(params.rp_dw0) );
    CV_CALL( dEdw = cvCreateMat( wbuf->rows, wbuf->cols, wbuf->type ));
    cvZero( dEdw );
    CV_CALL( prev_dEdw_sign = cvCreateMat( wbuf->rows, wbuf->cols, CV_8SC1 ));
    cvZero( prev_dEdw_sign );

    inv_count = 1./count;
    dcount0 = max_buf_sz/(2*total);
    dcount0 = MAX( dcount0, 1 );
    dcount0 = MIN( dcount0, count );
    buf_sz = dcount0*(total + max_count)*2;

    CV_CALL( buf = cvCreateMat( 1, buf_sz, CV_64F ));

    CV_CALL( x = (double**)cvAlloc( total*2*sizeof(x[0]) ));
    df = x + total;
    buf_ptr = buf->data.db;

    for( i = 0; i < l_count; i++ )
    {
        x[i] = buf_ptr;
        df[i] = x[i] + layer_sizes->data.i[i]*dcount0;
        buf_ptr += (df[i] - x[i])*2;
    }

    // run rprop loop
    /*
        y_i(t) = w_i(t)*x_{i-1}(t)
        x_i(t) = f(y_i(t))
        E = sum_over_all_samples(1/2*||u - x_N||^2)
        grad_N = (x_N - u)*f'(y_i)

                      MIN(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0
        dw_i{jk}(t) = MAX(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0
                      dw_i{jk}(t-1) else

        if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0)
           dE/dw_i{jk}(t)<-0
        else
           w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t)
        grad_{i-1}(t) = w_i^t(t)*grad_i(t)
    */
    for( iter = 0; iter < max_iter; iter++ )
    {
        int n1, n2, si, j, k;
        double* w;
        CvMat _w, _dEdw, hdr1, hdr2, ghdr1, ghdr2, _df;
        CvMat *x1, *x2, *grad1, *grad2, *temp;
        double E = 0;

        // first, iterate through all the samples and compute dEdw
        for( si = 0; si < count; si += dcount )
        {
            dcount = MIN( count - si, dcount0 );
            w = weights[0];
            grad1 = &ghdr1; grad2 = &ghdr2;
            x1 = &hdr1; x2 = &hdr2;

            // grab and preprocess input data
            if( x0.type == CV_32F )
                for( i = 0; i < dcount; i++ )
                {
                    const float* x0data = x0.data.fl[si+i];
                    double* xdata = x[0]+i*ivcount;
                    for( j = 0; j < ivcount; j++ )
                        xdata[j] = x0data[j]*w[j*2] + w[j*2+1];
                }
            else
                for( i = 0; i < dcount; i++ )
                {
                    const double* x0data = x0.data.db[si+i];
                    double* xdata = x[0]+i*ivcount;
                    for( j = 0; j < ivcount; j++ )
                        xdata[j] = x0data[j]*w[j*2] + w[j*2+1];
                }

            cvInitMatHeader( x1, dcount, ivcount, CV_64F, x[0] );

            // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
            for( i = 1; i < l_count; i++ )
            {
                cvInitMatHeader( x2, dcount, layer_sizes->data.i[i], CV_64F, x[i] );
                cvInitMatHeader( &_w, x1->cols, x2->cols, CV_64F, weights[i] );
                cvGEMM( x1, &_w, 1, 0, 0, x2 );
                _df = *x2;
                _df.data.db = df[i];
                calc_activ_func_deriv( x2, &_df, _w.data.db + _w.rows*_w.cols );
                CV_SWAP( x1, x2, temp );
            }

            cvInitMatHeader( grad1, dcount, ovcount, CV_64F, buf_ptr );
            w = weights[l_count+1];
            grad2->data.db = buf_ptr + max_count*dcount;

            // calculate error
            if( u.type == CV_32F )
                for( i = 0; i < dcount; i++ )
                {
                    const float* udata = u.data.fl[si+i];
                    const double* xdata = x[l_count-1] + i*ovcount;
                    double* gdata = grad1->data.db + i*ovcount;
                    double sweight = sw ? sw[si+i] : inv_count, E1 = 0;

                    for( j = 0; j < ovcount; j++ )
                    {
                        double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j];
                        gdata[j] = t*sweight;
                        E1 += t*t;
                    }
                    E += sweight*E1;
                }
            else
                for( i = 0; i < dcount; i++ )
                {
                    const double* udata = u.data.db[si+i];
                    const double* xdata = x[l_count-1] + i*ovcount;
                    double* gdata = grad1->data.db + i*ovcount;
                    double sweight = sw ? sw[si+i] : inv_count, E1 = 0;

                    for( j = 0; j < ovcount; j++ )
                    {
                        double t = udata[j]*w[j*2] + w[j*2+1] - xdata[j];
                        gdata[j] = t*sweight;
                        E1 += t*t;
                    }
                    E += sweight*E1;
                }

            // backward pass, update dEdw            
            for( i = l_count-1; i > 0; i-- )
            {
                n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
                cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] );
                cvMul( grad1, &_df, grad1 );
                cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
                cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
                cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
                // update bias part of dEdw
                for( k = 0; k < dcount; k++ )
                {
                    double* dst = _dEdw.data.db + n1*n2;
                    const double* src = grad1->data.db + k*n2;
                    for( j = 0; j < n2; j++ )
                        dst[j] += src[j];
                }
                cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
                cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db );

                if( i > 1 )
                    cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
                CV_SWAP( grad1, grad2, temp );
            }
        }

        // now update weights
        for( i = 1; i < l_count; i++ )
        {
            n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
            for( k = 0; k <= n1; k++ )
            {
                double* wk = weights[i]+k*n2;
                size_t delta = wk - weights[0];
                double* dwk = dw->data.db + delta;
                double* dEdwk = dEdw->data.db + delta;
                char* prevEk = (char*)(prev_dEdw_sign->data.ptr + delta);

                for( j = 0; j < n2; j++ )
                {
                    double Eval = dEdwk[j];
                    double dval = dwk[j];
                    double wval = wk[j];
                    int s = CV_SIGN(Eval);
                    int ss = prevEk[j]*s;
                    if( ss > 0 )
                    {
                        dval *= dw_plus;
                        dval = MIN( dval, dw_max );
                        dwk[j] = dval;
                        wk[j] = wval + dval*s;
                    }
                    else if( ss < 0 )
                    {
                        dval *= dw_minus;
                        dval = MAX( dval, dw_min );
                        prevEk[j] = 0;
                        dwk[j] = dval;
                        wk[j] = wval + dval*s;
                    }
                    else
                    {
                        prevEk[j] = (char)s;
                        wk[j] = wval + dval*s;
                    }
                    dEdwk[j] = 0.;
                }
            }
        }

        if( fabs(prev_E - E) < epsilon )
            break;
        prev_E = E;
        E = 0;
    }

    __END__;

    cvReleaseMat( &dw );
    cvReleaseMat( &dEdw );
    cvReleaseMat( &prev_dEdw_sign );
    cvReleaseMat( &buf );
    cvFree( &x );

    return iter;
}