Exemplo n.º 1
0
Arquivo: mlem.cpp Projeto: glo/ee384b
/* log_weight_div_det[k] = -2*log(weights_k) + log(det(Sigma_k)))

   covs[k] = cov_rotate_mats[k] * cov_eigen_values[k] * (cov_rotate_mats[k])'
   cov_rotate_mats[k] are orthogonal matrices of eigenvectors and
   cov_eigen_values[k] are diagonal matrices (represented by 1D vectors) of eigen values.

   The <alpha_ik> is the probability of the vector x_i to belong to the k-th cluster:
   <alpha_ik> ~ weights_k * exp{ -0.5[ln(det(Sigma_k)) + (x_i - mu_k)' Sigma_k^(-1) (x_i - mu_k)] }
   We calculate these probabilities here by the equivalent formulae:
   Denote
   S_ik = -0.5(log(det(Sigma_k)) + (x_i - mu_k)' Sigma_k^(-1) (x_i - mu_k)) + log(weights_k),
   M_i = max_k S_ik = S_qi, so that the q-th class is the one where maximum reaches. Then
   alpha_ik = exp{ S_ik - M_i } / ( 1 + sum_j!=q exp{ S_ji - M_i })
*/
double CvEM::run_em( const CvVectors& train_data )
{
    CvMat* centered_sample = 0;
    CvMat* covs_item = 0;
    CvMat* log_det = 0;
    CvMat* log_weights = 0;
    CvMat* cov_eigen_values = 0;
    CvMat* samples = 0;
    CvMat* sum_probs = 0;
    log_likelihood = -DBL_MAX;

    CV_FUNCNAME( "CvEM::run_em" );
    __BEGIN__;

    int nsamples = train_data.count, dims = train_data.dims, nclusters = params.nclusters;
    double min_variation = FLT_EPSILON;
    double min_det_value = MAX( DBL_MIN, pow( min_variation, dims ));
    double likelihood_bias = -CV_LOG2PI * (double)nsamples * (double)dims / 2., _log_likelihood = -DBL_MAX;
    int start_step = params.start_step;

    int i, j, k, n;
    int is_general = 0, is_diagonal = 0, is_spherical = 0;
    double prev_log_likelihood = -DBL_MAX / 1000., det, d;
    CvMat whdr, iwhdr, diag, *w, *iw;
    double* w_data;
    double* sp_data;

    if( nclusters == 1 )
    {
        double log_weight;
        CV_CALL( cvSet( probs, cvScalar(1.)) );

        if( params.cov_mat_type == COV_MAT_SPHERICAL )
        {
            d = cvTrace(*covs).val[0]/dims;
            d = MAX( d, FLT_EPSILON );
            inv_eigen_values->data.db[0] = 1./d;
            log_weight = pow( d, dims*0.5 );
        }
        else
        {
            w_data = inv_eigen_values->data.db;

            if( params.cov_mat_type == COV_MAT_GENERIC )
                cvSVD( *covs, inv_eigen_values, *cov_rotate_mats, 0, CV_SVD_U_T );
            else
                cvTranspose( cvGetDiag(*covs, &diag), inv_eigen_values );

            cvMaxS( inv_eigen_values, FLT_EPSILON, inv_eigen_values );
            for( j = 0, det = 1.; j < dims; j++ )
                det *= w_data[j];
            log_weight = sqrt(det);
            cvDiv( 0, inv_eigen_values, inv_eigen_values );
        }

        log_weight_div_det->data.db[0] = -2*log(weights->data.db[0]/log_weight);
        log_likelihood = DBL_MAX/1000.;
        EXIT;
    }

    if( params.cov_mat_type == COV_MAT_GENERIC )
        is_general  = 1;
    else if( params.cov_mat_type == COV_MAT_DIAGONAL )
        is_diagonal = 1;
    else if( params.cov_mat_type == COV_MAT_SPHERICAL )
        is_spherical  = 1;
    /* In the case of <cov_mat_type> == COV_MAT_DIAGONAL, the k-th row of cov_eigen_values
    contains the diagonal elements (variations). In the case of
    <cov_mat_type> == COV_MAT_SPHERICAL - the 0-ths elements of the vectors cov_eigen_values[k]
    are to be equal to the mean of the variations over all the dimensions. */

    CV_CALL( log_det = cvCreateMat( 1, nclusters, CV_64FC1 ));
    CV_CALL( log_weights = cvCreateMat( 1, nclusters, CV_64FC1 ));
    CV_CALL( covs_item = cvCreateMat( dims, dims, CV_64FC1 ));
    CV_CALL( centered_sample = cvCreateMat( 1, dims, CV_64FC1 ));
    CV_CALL( cov_eigen_values = cvCreateMat( inv_eigen_values->rows, inv_eigen_values->cols, CV_64FC1 ));
    CV_CALL( samples = cvCreateMat( nsamples, dims, CV_64FC1 ));
    CV_CALL( sum_probs = cvCreateMat( 1, nclusters, CV_64FC1 ));
    sp_data = sum_probs->data.db;

    // copy the training data into double-precision matrix
    for( i = 0; i < nsamples; i++ )
    {
        const float* src = train_data.data.fl[i];
        double* dst = (double*)(samples->data.ptr + samples->step*i);

        for( j = 0; j < dims; j++ )
            dst[j] = src[j];
    }

    if( start_step != START_M_STEP )
    {
        for( k = 0; k < nclusters; k++ )
        {
            if( is_general || is_diagonal )
            {
                w = cvGetRow( cov_eigen_values, &whdr, k );
                if( is_general )
                    cvSVD( covs[k], w, cov_rotate_mats[k], 0, CV_SVD_U_T );
                else
                    cvTranspose( cvGetDiag( covs[k], &diag ), w );
                w_data = w->data.db;
                for( j = 0, det = 1.; j < dims; j++ )
                    det *= w_data[j];
                if( det < min_det_value )
                {
                    if( start_step == START_AUTO_STEP )
                        det = min_det_value;
                    else
                        EXIT;
                }
                log_det->data.db[k] = det;
            }
            else
            {
                d = cvTrace(covs[k]).val[0]/(double)dims;
                if( d < min_variation )
                {
                    if( start_step == START_AUTO_STEP )
                        d = min_variation;
                    else
                        EXIT;
                }
                cov_eigen_values->data.db[k] = d;
                log_det->data.db[k] = d;
            }
        }

        cvLog( log_det, log_det );
        if( is_spherical )
            cvScale( log_det, log_det, dims );
    }

    for( n = 0; n < params.term_crit.max_iter; n++ )
    {
        if( n > 0 || start_step != START_M_STEP )
        {
            // e-step: compute probs_ik from means_k, covs_k and weights_k.
            CV_CALL(cvLog( weights, log_weights ));

            // S_ik = -0.5[log(det(Sigma_k)) + (x_i - mu_k)' Sigma_k^(-1) (x_i - mu_k)] + log(weights_k)
            for( k = 0; k < nclusters; k++ )
            {
                CvMat* u = cov_rotate_mats[k];
                const double* mean = (double*)(means->data.ptr + means->step*k);
                w = cvGetRow( cov_eigen_values, &whdr, k );
                iw = cvGetRow( inv_eigen_values, &iwhdr, k );
                cvDiv( 0, w, iw );

                w_data = (double*)(inv_eigen_values->data.ptr + inv_eigen_values->step*k);

                for( i = 0; i < nsamples; i++ )
                {
                    double *csample = centered_sample->data.db, p = log_det->data.db[k];
                    const double* sample = (double*)(samples->data.ptr + samples->step*i);
                    double* pp = (double*)(probs->data.ptr + probs->step*i);
                    for( j = 0; j < dims; j++ )
                        csample[j] = sample[j] - mean[j];
                    if( is_general )
                        cvGEMM( centered_sample, u, 1, 0, 0, centered_sample, CV_GEMM_B_T );
                    for( j = 0; j < dims; j++ )
                        p += csample[j]*csample[j]*w_data[is_spherical ? 0 : j];
                    pp[k] = -0.5*p + log_weights->data.db[k];

                    // S_ik <- S_ik - max_j S_ij
                    if( k == nclusters - 1 )
                    {
                        double max_val = 0;
                        for( j = 0; j < nclusters; j++ )
                            max_val = MAX( max_val, pp[j] );
                        for( j = 0; j < nclusters; j++ )
                            pp[j] -= max_val;
                    }
                }
            }

            CV_CALL(cvExp( probs, probs )); // exp( S_ik )
            cvZero( sum_probs );

            // alpha_ik = exp( S_ik ) / sum_j exp( S_ij ),
            // log_likelihood = sum_i log (sum_j exp(S_ij))
            for( i = 0, _log_likelihood = likelihood_bias; i < nsamples; i++ )
            {
                double* pp = (double*)(probs->data.ptr + probs->step*i), sum = 0;
                for( j = 0; j < nclusters; j++ )
                    sum += pp[j];
                sum = 1./MAX( sum, DBL_EPSILON );
                for( j = 0; j < nclusters; j++ )
                {
                    double p = pp[j] *= sum;
                    sp_data[j] += p;
                }
                _log_likelihood -= log( sum );
            }

            // check termination criteria
            if( fabs( (_log_likelihood - prev_log_likelihood) / prev_log_likelihood ) < params.term_crit.epsilon )
                break;
            prev_log_likelihood = _log_likelihood;
        }

        // m-step: update means_k, covs_k and weights_k from probs_ik
        cvGEMM( probs, samples, 1, 0, 0, means, CV_GEMM_A_T );

        for( k = 0; k < nclusters; k++ )
        {
            double sum = sp_data[k], inv_sum = 1./sum;
            CvMat* cov = covs[k], _mean, _sample;

            w = cvGetRow( cov_eigen_values, &whdr, k );
            w_data = w->data.db;
            cvGetRow( means, &_mean, k );
            cvGetRow( samples, &_sample, k );

            // update weights_k
            weights->data.db[k] = sum;

            // update means_k
            cvScale( &_mean, &_mean, inv_sum );

            // compute covs_k
            cvZero( cov );
            cvZero( w );

            for( i = 0; i < nsamples; i++ )
            {
                double p = probs->data.db[i*nclusters + k]*inv_sum;
                _sample.data.db = (double*)(samples->data.ptr + samples->step*i);

                if( is_general )
                {
                    cvMulTransposed( &_sample, covs_item, 1, &_mean );
                    cvScaleAdd( covs_item, cvRealScalar(p), cov, cov );
                }
                else
                    for( j = 0; j < dims; j++ )
                    {
                        double val = _sample.data.db[j] - _mean.data.db[j];
                        w_data[is_spherical ? 0 : j] += p*val*val;
                    }
            }

            if( is_spherical )
            {
                d = w_data[0]/(double)dims;
                d = MAX( d, min_variation );
                w->data.db[0] = d;
                log_det->data.db[k] = d;
            }
            else
            {
                if( is_general )
                    cvSVD( cov, w, cov_rotate_mats[k], 0, CV_SVD_U_T );
                cvMaxS( w, min_variation, w );
                for( j = 0, det = 1.; j < dims; j++ )
                    det *= w_data[j];
                log_det->data.db[k] = det;
            }
        }

        cvConvertScale( weights, weights, 1./(double)nsamples, 0 );
        cvMaxS( weights, DBL_MIN, weights );

        cvLog( log_det, log_det );
        if( is_spherical )
            cvScale( log_det, log_det, dims );
    } // end of iteration process

    //log_weight_div_det[k] = -2*log(weights_k/det(Sigma_k))^0.5) = -2*log(weights_k) + log(det(Sigma_k)))
    if( log_weight_div_det )
    {
        cvScale( log_weights, log_weight_div_det, -2 );
        cvAdd( log_weight_div_det, log_det, log_weight_div_det );
    }

    /* Now finalize all the covariation matrices:
    1) if <cov_mat_type> == COV_MAT_DIAGONAL we used array of <w> as diagonals.
       Now w[k] should be copied back to the diagonals of covs[k];
    2) if <cov_mat_type> == COV_MAT_SPHERICAL we used the 0-th element of w[k]
       as an average variation in each cluster. The value of the 0-th element of w[k]
       should be copied to the all of the diagonal elements of covs[k]. */
    if( is_spherical )
    {
        for( k = 0; k < nclusters; k++ )
            cvSetIdentity( covs[k], cvScalar(cov_eigen_values->data.db[k]));
    }
    else if( is_diagonal )
    {
        for( k = 0; k < nclusters; k++ )
            cvTranspose( cvGetRow( cov_eigen_values, &whdr, k ),
                         cvGetDiag( covs[k], &diag ));
    }
    cvDiv( 0, cov_eigen_values, inv_eigen_values );

    log_likelihood = _log_likelihood;

    __END__;

    cvReleaseMat( &log_det );
    cvReleaseMat( &log_weights );
    cvReleaseMat( &covs_item );
    cvReleaseMat( &centered_sample );
    cvReleaseMat( &cov_eigen_values );
    cvReleaseMat( &samples );
    cvReleaseMat( &sum_probs );

    return log_likelihood;
}
Exemplo n.º 2
0
CvMat* cveGetDiag(CvArr* arr, CvMat* submat, int diag)
{
   return cvGetDiag(arr, submat, diag);
}