static void transform_position(float P[3], float p[3], int id) { float t[4]; float q[4]; q[0] = p[0]; q[1] = p[1]; q[2] = p[2]; q[3] = 1.0f; mult_mat_vec(t, transform[id].M, q); P[0] = t[0] / t[3]; P[1] = t[1] / t[3]; P[2] = t[2] / t[3]; }
mxArray* omp_ls(const double m_dict[], const double m_x[], mwSize M, mwSize N, mwSize S, mwSize K, double res_norm_bnd, int sparse_output, int verbose){ // List of indices of selected atoms mwIndex *selected_atoms = 0; // Simple binary mask of selected atoms int* selected_atoms_mask = 0; // The submatrix of selected atoms double* m_subdict = 0; // Copy of subdictionary for least squares double* m_subdict_copy = 0; // The proxy D' x double* v_proxy = 0; // The inner product of residual with atoms double* v_h = 0; // The residual double* v_r = 0; // Result of orthogonal projection LL' c = p_I double* v_c = 0; // Some temporary vectors double *v_t1 = 0, *v_t2 = 0; // Pointer to new atom const double* wv_new_atom; // residual norm squared double res_norm_sqr; // square of upper bound on residual norm double res_norm_bnd_sqr = SQR(res_norm_bnd); // Pointer to current signal const double *wv_x = 0; /// Output array mxArray* p_alpha; double* m_alpha; // row indices for non-zero entries in Alpha mwIndex *ir_alpha; // indices for first non-zero entry in column mwIndex *jc_alpha; /// Index for non-zero entries in alpha mwIndex nz_index; // counters int i, j , k, s; // index of new atom mwIndex new_atom_index; // misc variables double d1, d2; // Maximum number of columns to be used in representations mwSize max_cols; // structure for tracking time spent. omp_profile profile; if (K < 0 || K > M) { // K cannot be greater than M. K = M; } max_cols = (mwSize)(ceil(sqrt((double)M)/2.0) + 1.01); if(max_cols < K){ max_cols = K; } // Memory allocations // Number of selected atoms cannot exceed M selected_atoms = (mwIndex*) mxMalloc(M*sizeof(mwIndex)); // Total number of atoms is N selected_atoms_mask = (int*) mxMalloc(N*sizeof(int)); // Coefficients of solution of least square problem v_c = (double*)mxMalloc(M*sizeof(double)); // Giving enough space for temporary vectors v_t1 = (double*)mxMalloc(N*sizeof(double)); v_t2 = (double*)mxMalloc(N*sizeof(double)); // Keeping max_cols space for subdictionary. m_subdict = (double*)mxMalloc(max_cols*M*sizeof(double)); m_subdict_copy = (double*)mxMalloc(max_cols*M*sizeof(double)); // Proxy vector is in R^N v_proxy = (double*)mxMalloc(N*sizeof(double)); // h is in R^N. v_h = (double*)mxMalloc(N*sizeof(double)); // Residual is in signal space R^M. v_r = (double*)mxMalloc(M*sizeof(double)); if (sparse_output == 0){ p_alpha = mxCreateDoubleMatrix(N, S, mxREAL); m_alpha = mxGetPr(p_alpha); ir_alpha = 0; jc_alpha = 0; }else{ p_alpha = mxCreateSparse(N, S, max_cols*S, mxREAL); m_alpha = mxGetPr(p_alpha); ir_alpha = mxGetIr(p_alpha); jc_alpha = mxGetJc(p_alpha); nz_index = 0; jc_alpha[0] = 0; } omp_profile_init(&profile); for(s=0; s<S; ++s){ wv_x = m_x + M*s; // Initialization res_norm_sqr = inner_product(wv_x, wv_x, M); //Compute proxy p = D' * x mult_mat_t_vec(1, m_dict, wv_x, v_proxy, M, N); omp_profile_toctic(&profile, TIME_DtR); // h = p = D' * r copy_vec_vec(v_proxy, v_h, N); for (i=0; i<N; ++i){ selected_atoms_mask[i] = 0; } // Number of atoms selected so far. k = 0; // Iterate for each atom while (k < K && res_norm_sqr > res_norm_bnd_sqr){ omp_profile_tic(&profile); // Pick the index of (k+1)-th atom new_atom_index = abs_max_index(v_h, N); omp_profile_toctic(&profile, TIME_MaxAbs); // If this atom is already selected, we will break if (selected_atoms_mask[new_atom_index]){ // This is unlikely due to orthogonal structure of OMP if (verbose){ mexPrintf("This atom is already selected."); } break; } // Check for small values d2 = v_h[new_atom_index]; if (SQR(d2) < 1e-14){ // The inner product of residual with new atom is way too small. break; } // Store the index of new atom selected_atoms[k] = new_atom_index; selected_atoms_mask[new_atom_index] = 1; // Copy the new atom to the sub-dictionary wv_new_atom = m_dict + new_atom_index*M; copy_vec_vec(wv_new_atom, m_subdict+k*M, M); omp_profile_toctic(&profile, TIME_DictSubMatrixUpdate); // It is time to increase the count of selected atoms ++k; // Least squares copy_vec_vec(m_subdict, m_subdict_copy, M*k); copy_vec_vec(wv_x, v_t1, M); least_square(m_subdict_copy, v_t1, v_c, M, k, 1); omp_profile_toctic(&profile, TIME_LeastSquares); // Compute residual // r = x - D_I c mult_mat_vec(-1, m_subdict, v_c, v_r, M, k); sum_vec_vec(1, wv_x, v_r, M); omp_profile_toctic(&profile, TIME_RUpdate); // Update h = D' r mult_mat_t_vec(1, m_dict, v_r, v_h, M, N); // Update residual norm squared res_norm_sqr = inner_product(v_r, v_r, M); omp_profile_toctic(&profile, TIME_DtR); //mexPrintf(".\n"); } // Write the output vector if(sparse_output == 0){ // Write the output vector double* wv_alpha = m_alpha + N*s; fill_vec_sparse_vals(v_c, selected_atoms, wv_alpha, N, k); } else{ // Sort the row indices quicksort_indices(selected_atoms, v_c, k); // add the non-zero entries for this column for(j=0; j <k; ++j){ m_alpha[nz_index] = v_c[j]; ir_alpha[nz_index] = selected_atoms[j]; ++nz_index; } // fill in the total number of nonzero entries in the end. jc_alpha[s+1] = jc_alpha[s] + k; } } if(verbose){ omp_profile_print(&profile); } // Memory cleanup mxFree(selected_atoms); mxFree(selected_atoms_mask); mxFree(v_c); mxFree(v_t1); mxFree(v_t2); mxFree(m_subdict); mxFree(m_subdict_copy); mxFree(v_proxy); mxFree(v_h); mxFree(v_r); // Return the result return p_alpha; }
static float get_value(float b[6], const float M[16], float bias) { float u[8][4]; float v[8][4]; float c[4]; float k; u[0][0] = b[0]; u[0][1] = b[1]; u[0][2] = b[2]; u[0][3] = 1.0f; u[1][0] = b[3]; u[1][1] = b[1]; u[1][2] = b[2]; u[1][3] = 1.0f; u[2][0] = b[0]; u[2][1] = b[4]; u[2][2] = b[2]; u[2][3] = 1.0f; u[3][0] = b[3]; u[3][1] = b[4]; u[3][2] = b[2]; u[3][3] = 1.0f; u[4][0] = b[0]; u[4][1] = b[1]; u[4][2] = b[5]; u[4][3] = 1.0f; u[5][0] = b[3]; u[5][1] = b[1]; u[5][2] = b[5]; u[5][3] = 1.0f; u[6][0] = b[0]; u[6][1] = b[4]; u[6][2] = b[5]; u[6][3] = 1.0f; u[7][0] = b[3]; u[7][1] = b[4]; u[7][2] = b[5]; u[7][3] = 1.0f; mult_mat_vec(v[0], M, u[0]); mult_mat_vec(v[1], M, u[1]); mult_mat_vec(v[2], M, u[2]); mult_mat_vec(v[3], M, u[3]); mult_mat_vec(v[4], M, u[4]); mult_mat_vec(v[5], M, u[5]); mult_mat_vec(v[6], M, u[6]); mult_mat_vec(v[7], M, u[7]); v[0][0] /= v[0][3]; v[1][0] /= v[1][3]; v[2][0] /= v[2][3]; v[3][0] /= v[3][3]; v[4][0] /= v[4][3]; v[5][0] /= v[5][3]; v[6][0] /= v[6][3]; v[7][0] /= v[7][3]; v[0][1] /= v[0][3]; v[1][1] /= v[1][3]; v[2][1] /= v[2][3]; v[3][1] /= v[3][3]; v[4][1] /= v[4][3]; v[5][1] /= v[5][3]; v[6][1] /= v[6][3]; v[7][1] /= v[7][3]; c[0] = MIN(v[1][0], v[0][0]); c[0] = MIN(v[2][0], c[0]); c[0] = MIN(v[3][0], c[0]); c[0] = MIN(v[4][0], c[0]); c[0] = MIN(v[5][0], c[0]); c[0] = MIN(v[6][0], c[0]); c[0] = MIN(v[7][0], c[0]); c[1] = MIN(v[1][1], v[0][1]); c[1] = MIN(v[2][1], c[1]); c[1] = MIN(v[3][1], c[1]); c[1] = MIN(v[4][1], c[1]); c[1] = MIN(v[5][1], c[1]); c[1] = MIN(v[6][1], c[1]); c[1] = MIN(v[7][1], c[1]); c[2] = MAX(v[1][0], v[0][0]); c[2] = MAX(v[2][0], c[2]); c[2] = MAX(v[3][0], c[2]); c[2] = MAX(v[4][0], c[2]); c[2] = MAX(v[5][0], c[2]); c[2] = MAX(v[6][0], c[2]); c[2] = MAX(v[7][0], c[2]); c[3] = MAX(v[1][1], v[0][1]); c[3] = MAX(v[2][1], c[3]); c[3] = MAX(v[3][1], c[3]); c[3] = MAX(v[4][1], c[3]); c[3] = MAX(v[5][1], c[3]); c[3] = MAX(v[6][1], c[3]); c[3] = MAX(v[7][1], c[3]); /* k = MAX(fabs(c[0] - c[2]), fabs(c[1] - c[3])) * bias; */ k = 0.5f * (float) (fabs(c[0] - c[2]) + fabs(c[1] - c[3])) * bias; return k; }