void DiscreteProblemFormAssembler<Scalar>::assemble_matrix_form(MatrixForm<double, Scalar>* form, int order, Func<double>** base_fns, Func<double>** test_fns, Func<Scalar>** ext, Func<Scalar>** u_ext, AsmList<Scalar>* current_als_i, AsmList<Scalar>* current_als_j, Traverse::State* current_state, int n_quadrature_points, Geom<double>* geometry, double* jacobian_x_weights) { bool surface_form = (dynamic_cast<MatrixFormVol<Scalar>*>(form) == nullptr); double block_scaling_coefficient = this->block_scaling_coeff(form); bool tra = (form->i != form->j) && (form->sym != 0); bool sym = (form->i == form->j) && (form->sym == 1); // Assemble the local stiffness matrix for the form form. Scalar **local_stiffness_matrix = new_matrix<Scalar>(std::max(current_als_i->cnt, current_als_j->cnt)); Func<Scalar>** local_ext = ext; // If the user supplied custom ext functions for this form. if(form->ext.size() > 0) { int local_ext_count = form->ext.size(); local_ext = malloc_with_check(local_ext_count, this); for(int ext_i = 0; ext_i < local_ext_count; ext_i++) if(form->ext[ext_i]) local_ext[ext_i] = current_state->e[ext_i] == nullptr ? nullptr : init_fn(form->ext[ext_i].get(), order); else local_ext[ext_i] = nullptr; } // Account for the previous time level solution previously inserted at the back of ext. if(rungeKutta) u_ext += form->u_ext_offset; // Actual form-specific calculation. for (unsigned int i = 0; i < current_als_i->cnt; i++) { if(current_als_i->dof[i] < 0) continue; if((!tra || surface_form) && current_als_i->dof[i] < 0) continue; if(std::abs(current_als_i->coef[i]) < Hermes::HermesSqrtEpsilon) continue; if(!sym) { for (unsigned int j = 0; j < current_als_j->cnt; j++) { if(current_als_j->dof[j] >= 0) { // Is this necessary, i.e. is there a coefficient smaller than Hermes::HermesSqrtEpsilon? if(std::abs(current_als_j->coef[j]) < Hermes::HermesSqrtEpsilon) continue; Func<double>* u = base_fns[j]; Func<double>* v = test_fns[i]; if(surface_form) local_stiffness_matrix[i][j] = 0.5 * block_scaling_coefficient * form->value(n_quadrature_points, jacobian_x_weights, u_ext, u, v, geometry, local_ext) * form->scaling_factor * current_als_j->coef[j] * current_als_i->coef[i]; else local_stiffness_matrix[i][j] = block_scaling_coefficient * form->value(n_quadrature_points, jacobian_x_weights, u_ext, u, v, geometry, local_ext) * form->scaling_factor * current_als_j->coef[j] * current_als_i->coef[i]; } } } // Symmetric block. else { for (unsigned int j = 0; j < current_als_j->cnt; j++) { if(j < i && current_als_j->dof[j] >= 0) continue; if(current_als_j->dof[j] >= 0) { // Is this necessary, i.e. is there a coefficient smaller than Hermes::HermesSqrtEpsilon? if(std::abs(current_als_j->coef[j]) < Hermes::HermesSqrtEpsilon) continue; Func<double>* u = base_fns[j]; Func<double>* v = test_fns[i]; Scalar val = block_scaling_coefficient * form->value(n_quadrature_points, jacobian_x_weights, u_ext, u, v, geometry, local_ext) * form->scaling_factor * current_als_j->coef[j] * current_als_i->coef[i]; local_stiffness_matrix[i][j] = local_stiffness_matrix[j][i] = val; } } } } // Insert the local stiffness matrix into the global one. current_mat->add(current_als_i->cnt, current_als_j->cnt, local_stiffness_matrix, current_als_i->dof, current_als_j->dof); // Insert also the off-diagonal (anti-)symmetric block, if required. if(tra) { if(form->sym < 0) chsgn(local_stiffness_matrix, current_als_i->cnt, current_als_j->cnt); transpose(local_stiffness_matrix, current_als_i->cnt, current_als_j->cnt); current_mat->add(current_als_j->cnt, current_als_i->cnt, local_stiffness_matrix, current_als_j->dof, current_als_i->dof); } if(form->ext.size() > 0) { for(int ext_i = 0; ext_i < form->ext.size(); ext_i++) if(form->ext[ext_i]) { local_ext[ext_i]->free_fn(); delete local_ext[ext_i]; } free_with_check(local_ext); } if(rungeKutta) u_ext -= form->u_ext_offset; // Cleanup. free_with_check(local_stiffness_matrix); }
void Bspline3DSetBase::setLattice(const CrystalLattice<RealType,DIM>& lat) { Lattice.set(lat); UnitLattice.set(lat); GGt=dot(Lattice.G,transpose(Lattice.G)); }
void on_draw() override { glfwMakeContextCurrent(window); glEnable(GL_CULL_FACE); glEnable(GL_DEPTH_TEST); int width, height; glfwGetWindowSize(window, &width, &height); glViewport(0, 0, width, height); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glClearColor(0.1f, 0.1f, 0.5f, 1.0f); const auto proj = camera.get_projection_matrix((float) width / (float) height); const float4x4 view = camera.get_view_matrix(); const float4x4 viewProj = mul(proj, view); skydome.render(viewProj, camera.get_eye_point(), camera.farClip); // Simple Shader { simpleShader->bind(); simpleShader->uniform("u_viewProj", viewProj); simpleShader->uniform("u_eye", camera.get_eye_point()); simpleShader->uniform("u_emissive", float3(.10f, 0.10f, 0.10f)); simpleShader->uniform("u_diffuse", float3(0.4f, 0.4f, 0.4f)); for (int i = 0; i < lights.size(); i++) { auto light = lights[i]; simpleShader->uniform("u_lights[" + std::to_string(i) + "].position", light.pose.position); simpleShader->uniform("u_lights[" + std::to_string(i) + "].color", light.color); } for (const auto & model : proceduralModels) { simpleShader->uniform("u_modelMatrix", model.get_model()); simpleShader->uniform("u_modelMatrixIT", inv(transpose(model.get_model()))); model.draw(); } for (const auto & model : cameraPositions) { simpleShader->uniform("u_modelMatrix", model.get_model()); simpleShader->uniform("u_modelMatrixIT", inv(transpose(model.get_model()))); model.draw(); } gl_check_error(__FILE__, __LINE__); simpleShader->unbind(); } grid.render(proj, view); gl_check_error(__FILE__, __LINE__); glfwSwapBuffers(window); frameCount++; }
Angel::mat4 RotMat::inverse( void ) const { return transpose( _mat ); }
/***********************************************************************//** * @brief Test matrix functions * * Tests matrix functions. ***************************************************************************/ void TestGSymMatrix::matrix_functions(void) { // Minimum double min = m_test.min(); // Check mimimum double value = g_matrix[0]; for (int row = 0; row < g_rows; ++row) { for (int col = 0; col < g_cols; ++col) { if (g_matrix[col+row*g_cols] < value) { value = g_matrix[col+row*g_cols]; } } } test_value(min, value, 0.0, "Test minimum function"); // Maximum double max = m_test.max(); // Check maximum value = g_matrix[0]; for (int row = 0; row < g_rows; ++row) { for (int col = 0; col < g_cols; ++col) { if (g_matrix[col+row*g_cols] > value) { value = g_matrix[col+row*g_cols]; } } } test_value(max, value, 0.0, "Test maximum function"); // Sum double sum = m_test.sum(); // Check sum value = 0.0; for (int row = 0; row < g_rows; ++row) { for (int col = 0; col < g_cols; ++col) { value += g_matrix[col+row*g_cols]; } } test_value(sum, value, 1.0e-20, "Test sum function"); // Transpose function GSymMatrix test1 = transpose(m_test); test_assert(check_matrix(m_test), "Test source matrix"); test_assert(check_matrix(test1, 1.0, 0.0), "Test transpose(GSymMatrix) function", "Unexpected transposed matrix:\n"+test1.print()); // Transpose method test1 = m_test; test1.transpose(); test_assert(check_matrix(m_test), "Test source matrix"); test_assert(check_matrix(test1, 1.0, 0.0), "Test GSymMatrix.transpose() method", "Unexpected transposed matrix:\n"+test1.print()); // Convert to general matrix GMatrix test2 = GMatrix(m_test); test_assert(check_matrix(m_test), "Test source matrix"); test_assert(check_matrix(test2, 1.0, 0.0), "Test GMatrix(GSymMatrix) constructor", "Unexpected GMatrix:\n"+test2.print()); // Extract lower triangle test2 = m_test.extract_lower_triangle(); test_assert(check_matrix(m_test), "Test source matrix"); test_assert(check_matrix_lt(test2, 1.0, 0.0), "Test GSymMatrix.extract_lower_triangle() method", "Unexpected GMatrix:\n"+test2.print()); // Extract upper triangle test2 = m_test.extract_upper_triangle(); test_assert(check_matrix(m_test), "Test source matrix"); test_assert(check_matrix_ut(test2, 1.0, 0.0), "Test GSymMatrix.extract_upper_triangle() method", "Unexpected GMatrix:\n"+test2.print()); // Return return; }
array array::H() const { return transpose(*this, true); }
int *adat_symbolic(int m, int n, int P_per[], int A_ptr[], int A_ind[], int S_ptr[]) { int i, j, t, ii, jj, tt, k, size, len; int *S_ind, *AT_ptr, *AT_ind, *ind, *map, *temp; /* build the pattern of A', which is a matrix transposed to A, to efficiently access A in column-wise manner */ AT_ptr = xcalloc(1+n+1, sizeof(int)); AT_ind = xcalloc(A_ptr[m+1], sizeof(int)); transpose(m, n, A_ptr, A_ind, NULL, AT_ptr, AT_ind, NULL); /* allocate the array S_ind */ size = A_ptr[m+1] - 1; if (size < m) size = m; S_ind = xcalloc(1+size, sizeof(int)); /* allocate and initialize working arrays */ ind = xcalloc(1+m, sizeof(int)); map = xcalloc(1+m, sizeof(int)); for (jj = 1; jj <= m; jj++) map[jj] = 0; /* compute pattern of S; note that symbolically S = B*B', where B = P*A, B' is matrix transposed to B */ S_ptr[1] = 1; for (ii = 1; ii <= m; ii++) { /* compute pattern of ii-th row of S */ len = 0; i = P_per[ii]; /* i-th row of A = ii-th row of B */ for (t = A_ptr[i]; t < A_ptr[i+1]; t++) { k = A_ind[t]; /* walk through k-th column of A */ for (tt = AT_ptr[k]; tt < AT_ptr[k+1]; tt++) { j = AT_ind[tt]; jj = P_per[m+j]; /* j-th row of A = jj-th row of B */ /* a[i,k] != 0 and a[j,k] != 0 ergo s[ii,jj] != 0 */ if (ii < jj && !map[jj]) ind[++len] = jj, map[jj] = 1; } } /* now (ind) is pattern of ii-th row of S */ S_ptr[ii+1] = S_ptr[ii] + len; /* at least (S_ptr[ii+1] - 1) locations should be available in the array S_ind */ if (S_ptr[ii+1] - 1 > size) { temp = S_ind; size += size; S_ind = xcalloc(1+size, sizeof(int)); memcpy(&S_ind[1], &temp[1], (S_ptr[ii] - 1) * sizeof(int)); xfree(temp); } xassert(S_ptr[ii+1] - 1 <= size); /* (ii-th row of S) := (ind) */ memcpy(&S_ind[S_ptr[ii]], &ind[1], len * sizeof(int)); /* clear the row pattern map */ for (t = 1; t <= len; t++) map[ind[t]] = 0; } /* free working arrays */ xfree(AT_ptr); xfree(AT_ind); xfree(ind); xfree(map); /* reallocate the array S_ind to free unused locations */ temp = S_ind; size = S_ptr[m+1] - 1; S_ind = xcalloc(1+size, sizeof(int)); memcpy(&S_ind[1], &temp[1], size * sizeof(int)); xfree(temp); return S_ind; }
Disposable<std::vector<Size> > qrDecomposition(const Matrix& M, Matrix& q, Matrix& r, bool pivot) { Matrix mT = transpose(M); const Size m = M.rows(); const Size n = M.columns(); boost::scoped_array<int> lipvt(new int[n]); boost::scoped_array<Real> rdiag(new Real[n]); boost::scoped_array<Real> wa(new Real[n]); MINPACK::qrfac(m, n, mT.begin(), 0, (pivot)?1:0, lipvt.get(), n, rdiag.get(), rdiag.get(), wa.get()); if (r.columns() != n || r.rows() !=n) r = Matrix(n, n); for (Size i=0; i < n; ++i) { std::fill(r.row_begin(i), r.row_begin(i)+i, 0.0); r[i][i] = rdiag[i]; if (i < m) { std::copy(mT.column_begin(i)+i+1, mT.column_end(i), r.row_begin(i)+i+1); } else { std::fill(r.row_begin(i)+i+1, r.row_end(i), 0.0); } } if (q.rows() != m || q.columns() != n) q = Matrix(m, n); if (m > n) { std::fill(q.begin(), q.end(), 0.0); Integer u = std::min(n,m); for (Size i=0; i < u; ++i) q[i][i] = 1.0; Array v(m); for (Integer i=u-1; i >=0; --i) { if (std::fabs(mT[i][i]) > QL_EPSILON) { const Real tau = 1.0/mT[i][i]; std::fill(v.begin(), v.begin()+i, 0.0); std::copy(mT.row_begin(i)+i, mT.row_end(i), v.begin()+i); Array w(n, 0.0); for (Size l=0; l < n; ++l) w[l] += std::inner_product( v.begin()+i, v.end(), q.column_begin(l)+i, 0.0); for (Size k=i; k < m; ++k) { const Real a = tau*v[k]; for (Size l=0; l < n; ++l) q[k][l] -= a*w[l]; } } } } else { Array w(m); for (Size k=0; k < m; ++k) { std::fill(w.begin(), w.end(), 0.0); w[k] = 1.0; for (Size j=0; j < std::min(n, m); ++j) { const Real t3 = mT[j][j]; if (t3 != 0.0) { const Real t = std::inner_product(mT.row_begin(j)+j, mT.row_end(j), w.begin()+j, 0.0)/t3; for (Size i=j; i<m; ++i) { w[i]-=mT[j][i]*t; } } q[k][j] = w[j]; } std::fill(q.row_begin(k) + std::min(n, m), q.row_end(k), 0.0); } } std::vector<Size> ipvt(n); if (pivot) { std::copy(lipvt.get(), lipvt.get()+n, ipvt.begin()); } else { for (Size i=0; i < n; ++i) ipvt[i] = i; } return ipvt; }
void Matrix::transpose() { transpose(this); }
void LocalLinearLeastSquaresExtrapolator::extrapolateElement( std::size_t const element_index, const unsigned num_components, ExtrapolatableElementCollection const& extrapolatables, const double t, GlobalVector const& current_solution, LocalToGlobalIndexMap const& dof_table, GlobalVector& counts) { auto const& integration_point_values = extrapolatables.getIntegrationPointValues( element_index, t, current_solution, dof_table, _integration_point_values_cache); auto const& N_0 = extrapolatables.getShapeMatrix(element_index, 0); auto const num_nodes = static_cast<unsigned>(N_0.cols()); auto const num_values = static_cast<unsigned>(integration_point_values.size()); if (num_values % num_components != 0) OGS_FATAL( "The number of computed integration point values is not divisable " "by the number of num_components. Maybe the computed property is " "not a %d-component vector for each integration point.", num_components); // number of integration points in the element const auto num_int_pts = num_values / num_components; if (num_int_pts < num_nodes) OGS_FATAL( "Least squares is not possible if there are more nodes than" "integration points."); auto const pair_it_inserted = _qr_decomposition_cache.emplace( std::make_pair(num_nodes, num_int_pts), CachedData{}); auto& cached_data = pair_it_inserted.first->second; if (pair_it_inserted.second) { DBUG("Computing new singular value decomposition"); // interpolation_matrix * nodal_values = integration_point_values // We are going to pseudo-invert this relation now using singular value // decomposition. auto& interpolation_matrix = cached_data.A; interpolation_matrix.resize(num_int_pts, num_nodes); interpolation_matrix.row(0) = N_0; for (unsigned int_pt = 1; int_pt < num_int_pts; ++int_pt) { auto const& shp_mat = extrapolatables.getShapeMatrix(element_index, int_pt); assert(shp_mat.cols() == num_nodes); // copy shape matrix to extrapolation matrix row-wise interpolation_matrix.row(int_pt) = shp_mat; } // JacobiSVD is extremely reliable, but fast only for small matrices. // But we usually have small matrices and we don't compute very often. // Cf. // http://eigen.tuxfamily.org/dox/group__TopicLinearAlgebraDecompositions.html // // Decomposes interpolation_matrix = U S V^T. Eigen::JacobiSVD<Eigen::MatrixXd> svd( interpolation_matrix, Eigen::ComputeThinU | Eigen::ComputeThinV); auto const& S = svd.singularValues(); auto const& U = svd.matrixU(); auto const& V = svd.matrixV(); // Compute and save the pseudo inverse V * S^{-1} * U^T. auto const rank = svd.rank(); assert(rank == num_nodes); // cf. http://eigen.tuxfamily.org/dox/JacobiSVD_8h_source.html cached_data.A_pinv.noalias() = V.leftCols(rank) * S.head(rank).asDiagonal().inverse() * U.leftCols(rank).transpose(); } else if (cached_data.A.row(0) != N_0) { OGS_FATAL("The cached and the passed shapematrices differ."); } auto const& global_indices = _dof_table_single_component(element_index, 0).rows; if (num_components == 1) { auto const integration_point_values_vec = MathLib::toVector(integration_point_values); // Apply the pre-computed pseudo-inverse. Eigen::VectorXd const nodal_values = cached_data.A_pinv * integration_point_values_vec; // TODO does that give rise to PETSc problems? E.g., writing to ghost // nodes? Furthermore: Is ghost nodes communication necessary for PETSc? _nodal_values->add(global_indices, nodal_values); counts.add(global_indices, std::vector<double>(global_indices.size(), 1.0)); } else { auto const integration_point_values_mat = MathLib::toMatrix( integration_point_values, num_components, num_int_pts); // Apply the pre-computed pseudo-inverse. Eigen::MatrixXd const nodal_values = cached_data.A_pinv * integration_point_values_mat.transpose(); std::vector<GlobalIndexType> indices; indices.reserve(num_components * global_indices.size()); // _nodal_values is ordered location-wise for (unsigned comp = 0; comp < num_components; ++comp) { for (auto i : global_indices) { indices.push_back(num_components * i + comp); } } // Nodal_values are passed as a raw pointer, because PETScVector and // EigenVector implementations differ slightly. _nodal_values->add(indices, nodal_values.data()); counts.add(indices, std::vector<double>(indices.size(), 1.0)); } }
void on_draw() override { glfwMakeContextCurrent(window); if (igm) igm->begin_frame(); int width, height; glfwGetWindowSize(window, &width, &height); glViewport(0, 0, width, height); glEnable(GL_DEPTH_TEST); glEnable(GL_CULL_FACE); glDepthMask(GL_TRUE); float windowAspectRatio = (float) width / (float) height; const auto proj = camera.get_projection_matrix(windowAspectRatio); const float4x4 view = camera.get_view_matrix(); const float4x4 viewProj = mul(proj, view); glClearColor(0.1f, 0.1f, 0.1f, 1.0f); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); skydome.render(viewProj, camera.get_eye_point(), camera.farClip); float3 target = camera.pose.position; //////////////////////////////////////////// // Directional Light Shadowmap Pass (sun) // //////////////////////////////////////////// { shadowFramebuffer.bind_to_draw(); shadowmapShader->bind(); glClear(GL_DEPTH_BUFFER_BIT); glViewport(0, 0, shadowmapResolution, shadowmapResolution); shadowmapShader->uniform("u_lightViewProj", sunLight->get_view_proj_matrix(target)); for (auto & object : sceneObjects) { if (object->castsShadow) { shadowmapShader->uniform("u_modelMatrix", object->get_model()); object->draw(); } } shadowmapShader->unbind(); shadowFramebuffer.unbind(); } /////////////////////////////// // Spot Light Shadowmap Pass // /////////////////////////////// { for (int i = 0; i < spotLightFramebuffers.size(); ++i) { spotLightFramebuffers[i]->shadowFramebuffer.bind_to_draw(); shadowmapShader->bind(); glClear(GL_DEPTH_BUFFER_BIT); glViewport(0, 0, shadowmapResolution, shadowmapResolution); shadowmapShader->uniform("u_lightViewProj", spotLights[0]->get_view_proj_matrix()); // only take the first into account for debugging for (auto & object : sceneObjects) { if (object->castsShadow) { shadowmapShader->uniform("u_modelMatrix", object->get_model()); object->draw(); } } shadowmapShader->unbind(); spotLightFramebuffers[i]->shadowFramebuffer.unbind(); } } //////////////////////////////// // Point Light Shadowmap Pass // //////////////////////////////// { glViewport(0, 0, shadowmapResolution, shadowmapResolution); for (int i = 0; i < 6; ++i) { pointLightFramebuffer->bind(pointLightFramebuffer->faces[i].face); pointLightShader->bind(); glClear(GL_DEPTH_BUFFER_BIT); pointLightFramebuffer->faces[i].faceCamera.set_position(pointLight->position); // set position from light data to camera for shadow fbo auto viewProj = mul(pointLightFramebuffer->get_projection(), pointLightFramebuffer->faces[i].faceCamera.get_view_matrix()); pointLightShader->uniform("u_lightWorldPosition", pointLight->position); pointLightShader->uniform("u_lightViewProj", viewProj); for (auto & object : sceneObjects) { if (object->castsShadow) { pointLightShader->uniform("u_modelMatrix", object->get_model()); object->draw(); } } pointLightShader->unbind(); pointLightFramebuffer->unbind(); } } // Blur applied to the directional light shadowmap only (others later) { shadowBlurFramebuffer.bind_to_draw(); glDrawBuffer(GL_COLOR_ATTACHMENT0); gaussianBlurShader->bind(); // Configured for a 7x7 gaussianBlurShader->uniform("blurSize", 1.0f / shadowmapResolution); gaussianBlurShader->uniform("sigma", blurSigma); gaussianBlurShader->uniform("u_modelViewProj", Identity4x4); // Horizontal gaussianBlurShader->texture("s_blurTexure", 0, shadowDepthTexture); gaussianBlurShader->uniform("numBlurPixelsPerSide", 3.0f); gaussianBlurShader->uniform("blurMultiplyVec", float2(1.0f, 0.0f)); fullscreen_post_quad.draw_elements(); // Vertical gaussianBlurShader->texture("s_blurTexure", 0, shadowBlurTexture); gaussianBlurShader->uniform("numBlurPixelsPerSide", 3.0f); gaussianBlurShader->uniform("blurMultiplyVec", float2(0.0f, 1.0f)); fullscreen_post_quad.draw_elements(); gaussianBlurShader->unbind(); shadowBlurFramebuffer.unbind(); } { glViewport(0, 0, width, height); sceneShader->bind(); sceneShader->uniform("u_viewProj", viewProj); sceneShader->uniform("u_eye", camera.get_eye_point()); sceneShader->uniform("u_directionalLight.color", sunLight->color); sceneShader->uniform("u_directionalLight.direction", sunLight->direction); sceneShader->uniform("u_dirLightViewProjectionMat", sunLight->get_view_proj_matrix(target)); int samplerIndex = 0; sceneShader->uniform("u_shadowMapBias", 0.01f / shadowmapResolution); // fixme sceneShader->uniform("u_shadowMapTexelSize", float2(1.0f / shadowmapResolution)); sceneShader->texture("s_directionalShadowMap", samplerIndex++, shadowBlurTexture); sceneShader->uniform("u_spotLightViewProjectionMat[0]", spotLights[0]->get_view_proj_matrix()); sceneShader->uniform("u_spotLights[0].color", spotLights[0]->color); sceneShader->uniform("u_spotLights[0].direction", spotLights[0]->direction); sceneShader->uniform("u_spotLights[0].position", spotLights[0]->position); sceneShader->uniform("u_spotLights[0].cutoff", spotLights[0]->get_cutoff()); sceneShader->uniform("u_spotLights[0].constantAtten", spotLights[0]->attenuation.x); sceneShader->uniform("u_spotLights[0].linearAtten", spotLights[0]->attenuation.y); sceneShader->uniform("u_spotLights[0].quadraticAtten", spotLights[0]->attenuation.z); sceneShader->uniform("u_pointLights[0].color", pointLight->color); sceneShader->uniform("u_pointLights[0].position", pointLight->position); sceneShader->uniform("u_pointLights[0].constantAtten", pointLight->attenuation.x); sceneShader->uniform("u_pointLights[0].linearAtten", pointLight->attenuation.y); sceneShader->uniform("u_pointLights[0].quadraticAtten", pointLight->attenuation.z); // Update the spotlight 2D sampler for (int i = 0; i < spotLightFramebuffers.size(); ++i) { auto & fbo = spotLightFramebuffers[i]; std::string uniformLocation = "s_spotLightShadowMap[" + std::to_string(i) + "]"; sceneShader->texture(uniformLocation.c_str(), samplerIndex + i, fbo->shadowDepthTexture); } // Update the pointlight cube sampler for (int i = 0; i < 6; i++) sceneShader->texture("s_pointLightCubemap[0]", 2 + i, pointLightFramebuffer->cubeMapHandle, GL_TEXTURE_CUBE_MAP); for (auto & object : sceneObjects) { sceneShader->uniform("u_modelMatrix", object->get_model()); sceneShader->uniform("u_modelMatrixIT", inv(transpose(object->get_model()))); object->draw(); gl_check_error(__FILE__, __LINE__); } sceneShader->unbind(); } { ImGui::Separator(); ImGui::SliderFloat("Near Clip", &camera.nearClip, 0.1f, 2.0f); ImGui::SliderFloat("Far Clip", &camera.farClip, 2.0f, 75.0f); ImGui::DragFloat3("Light Direction", &sunLight->direction[0], 0.1f, -1.0f, 1.0f); ImGui::Separator(); ImGui::SliderFloat("Blur Sigma", &blurSigma, 0.05f, 9.0f); ImGui::Separator(); ImGui::Text("Application average %.3f ms/frame (%.1f FPS)", 1000.0f / ImGui::GetIO().Framerate, ImGui::GetIO().Framerate); } viewA->draw(uiSurface.children[0]->bounds, int2(width, height)); viewB->draw(uiSurface.children[1]->bounds, int2(width, height)); viewC->draw(uiSurface.children[2]->bounds, int2(width, height)); viewD->draw(uiSurface.children[3]->bounds, int2(width, height)); gl_check_error(__FILE__, __LINE__); if (igm) igm->end_frame(); glfwSwapBuffers(window); }
void varinit(void) { int i; /* * Resetting all flags */ Intr1_Cnt=0; Intr2_Cnt=0; IRQ1Flag = 1; IRQ2Flag = 1; WSZ = 34; TA_cnt =0; count = 0; qcnt = 0; velcnt = 0; rtime = 0.0; rcnt = 0; cnt_10ms = 0; latm = MasterLat; longm = MasterLon; epsilon = 0.0; four_delt = 4.0 * del_t; eight_delt = 8.0 * del_t; cdr_delt = cdr * del_t; cdr_delt_ms = cdr_delt / 3600; for(i=0;i<32;i++){ Array_SA[i] = 0; } for(i=0;i<3;i++) { velo_ref_y[i] = 0.0; velo_ref_yold[i] = 0.0;; velo_ref_x[i] = 0.0; velo_ref_xold[i] = 0.0; pure_vel[i] = 0.0; p_velo_20ms[i] = 0.0; p_velo[i] = 0.0; pure_v_old[i] = 0.0; p_Ang[i] = 0.0; pure_gyro_drift[i] = 0.0; pure_acc_residu[i] = 0.0; } #if 0 /* these are known misalignment angles between M and S - * Measured w.r.t Master to give DCM from slave to Master. * Beware they are not between slave to NED */ known_si = 0.0 * cdr; known_theta = 0.0 * cdr; known_phi = 0.0 * cdr; euler2dcm_stp(0, 0, 0, (double*)CSkew_est); transpose(3, 3, (double*)CSkew_est, (double*)CSkew_est_T); euler2dcm_stp(known_si, known_theta, known_phi, (double*)CS2M_K); transpose(3, 3, (double*)CS2M_K, (double*)CM2S_K); euler2dcm_stp(THDG, PITCH, ROLL, (double*)Cb2ned_M); matmul(3, 3, (double*)Cb2ned_M, 3, 3, (double*)CS2M_K, (double*)Cb2ned_S); if(ta_flag==1 && nav_flag==1) { dcm2quat((double*)Cb2ned_S, (double *)p_q_body2ned); } else if(ta_flag ==0 && level_flag==1) #endif { euler2quat_spt(mdl_si,mdl_phi,mdl_theta,(double *)p_q_body2ned); p_si = mdl_si; p_phi = mdl_phi; p_theta = mdl_theta; } ned2ecef_q(latm, longm,(double*) q_ned2ecef); quat_mult((double*)q_ned2ecef,(double*)p_q_body2ned, (double*)p_q_body2ecef); /* * Modification after Manjit discussion */ quat2dcm((double *)p_q_body2ecef,(double*)p_dcm); quat2dcm((double *)q_ned2ecef,(double*)p_dcm_n); matmul(3,3, (double*)p_dcm_n,3,1,(double*)MasterVel,(double*)pure_vel); pure_v_old[0] = pure_vel[0]; pure_v_old[1] = pure_vel[1]; pure_v_old[2] = pure_vel[2]; init(0.0, 0.0, 0.0, p_velo_20ms); init(0.0, 0.0, 0.0, p_velo); init(0.0,0.0,0.0,pure_gyro_drift); init(0.0,0.0,0.0,pure_acc_residu); for (i = 0; i < 3; i++) { p_alp1[i] = 0.0; p_alp2[i] = 0.0; p_alp3[i] = 0.0; p_alp4[i] = 0.0; } for (i = 0; i < 3; i++) Delta_Angle[i] = 0.0; for (i = 0; i < 6; i++) accum1[i] = 0.0; init(0.0, 0.0, earth_rate, omega); //earth rate vector ECEF //used in levelling Ned_omega[0] = earth_rate * cos(latm); Ned_omega[1] = 0.0; Ned_omega[2] = -earth_rate *sin(latm); for (i = 0; i < 3; i++) omg_dub[i] = 2.0 * omega[i]; r_init = r0 * (1.0 - eccen * (sin(latm) * sin(latm))); pure_R = r_init + MasterAlt; // altitude; lla2ecef(latm,longm,MasterAlt,(double *)pure_ecef_pos); //input is geodetic pure_g_ecef(); /**** for epsilon estimation ****/ init(0.0, 0.0, -pure_g_ecef_mag, Ned_gravity_detic); } //end of varinit()
void svd_gpu(int m, int n, double* A,double * sigma, double * U, double* V){ /********************************************** * Description: Function call that performs an SVD on the matrix A. * * Author: Steven Delong * * inputs: * A - m x n matrix, stored in column-major order. * m - number of rows of A * n - number of columns of A * * Outputs: * Uout - left singular vectors of A * Vout - right singular vectors of A * sigma - singular values of A, length min(m,n) * ***********************************************************/ //declare variables used for timing. timestamp_type time1, time2; double elapsed, flops; // figure out minimum dimension int mn = (m >= n) ? n : m; int len_beta = (m >= n) ? n-1 : m; // sizes to double for flop calcs double dmn = (double) mn; double dm = (double) m; double dn = (double) n; // allocate space for intermediate values double * AT = (double *) malloc(sizeof(double) * m *n); if(!AT) { fprintf(stderr,"in main: failed to allocate AT\n"); abort();} double * alpha = (double *) malloc(sizeof(double) *mn); if(!alpha) { fprintf(stderr,"in main: failed to allocate alpha\n"); abort();} double *beta = (double *) malloc(sizeof(double) *len_beta); if(!beta) { fprintf(stderr,"in main: failed to allocate beta\n"); abort();} double *X = (double *) malloc(sizeof(double) *mn*(len_beta + 1)); if(!X) { fprintf(stderr,"in main: failed to allocate X\n"); abort();} double *Y = (double *) malloc(sizeof(double) *mn*mn); if(!Y) { fprintf(stderr,"in main: failed to allocate Y\n"); abort();} // bidiagonalize bidiag_par(m,n,A,alpha,beta); //transpose householder reflectors for applying to X later transpose(m,n,A,AT); //get singular values GetSingularValues_Parallel( mn , alpha, beta, sigma); // calculate right singular vectors CalcRightSingularVectors(mn,len_beta + 1,alpha,beta,sigma,X); // get left singular vectors RighttoLeftSingularVectors(mn,len_beta + 1,alpha,beta,sigma,X,Y); // apply householder reflectors to X,Y to get U,V #pragma omp parallel for default(shared) firstprivate(A) for(int i = 0; i < mn; i++){ multU(m,n,i,A,Y,U + i*m); multV(m,n,i,AT,X,V + i*n); } // free malloc'd intermediates free(AT); free(alpha); free(beta); free(X); free(Y); }
void kernel(mat_ZZ_p& X, const mat_ZZ_p& A) { long m = A.NumRows(); long n = A.NumCols(); mat_ZZ_p M; long r; transpose(M, A); r = gauss(M); X.SetDims(m-r, m); long i, j, k, s; ZZ t1, t2; ZZ_p T3; vec_long D; D.SetLength(m); for (j = 0; j < m; j++) D[j] = -1; vec_ZZ_p inverses; inverses.SetLength(m); j = -1; for (i = 0; i < r; i++) { do { j++; } while (IsZero(M[i][j])); D[j] = i; inv(inverses[j], M[i][j]); } for (k = 0; k < m-r; k++) { vec_ZZ_p& v = X[k]; long pos = 0; for (j = m-1; j >= 0; j--) { if (D[j] == -1) { if (pos == k) set(v[j]); else clear(v[j]); pos++; } else { i = D[j]; clear(t1); for (s = j+1; s < m; s++) { mul(t2, rep(v[s]), rep(M[i][s])); add(t1, t1, t2); } conv(T3, t1); mul(T3, T3, inverses[j]); negate(v[j], T3); } } } }
void ONNXImporter::populateNet(Net dstNet) { CV_Assert(model_proto.has_graph()); opencv_onnx::GraphProto graph_proto = model_proto.graph(); std::map<std::string, Mat> constBlobs = getGraphTensors(graph_proto); // List of internal blobs shapes. std::map<std::string, MatShape> outShapes; // Add all the inputs shapes. It includes as constant blobs as network's inputs shapes. for (int i = 0; i < graph_proto.input_size(); ++i) { opencv_onnx::ValueInfoProto valueInfoProto = graph_proto.input(i); CV_Assert(valueInfoProto.has_type()); opencv_onnx::TypeProto typeProto = valueInfoProto.type(); CV_Assert(typeProto.has_tensor_type()); opencv_onnx::TypeProto::Tensor tensor = typeProto.tensor_type(); CV_Assert(tensor.has_shape()); opencv_onnx::TensorShapeProto tensorShape = tensor.shape(); MatShape inpShape(tensorShape.dim_size()); for (int j = 0; j < inpShape.size(); ++j) { inpShape[j] = tensorShape.dim(j).dim_value(); } outShapes[valueInfoProto.name()] = inpShape; } std::string framework_name; if (model_proto.has_producer_name()) { framework_name = model_proto.producer_name(); } // create map with network inputs (without const blobs) std::map<std::string, LayerInfo> layer_id; std::map<std::string, LayerInfo>::iterator layerId; std::map<std::string, MatShape>::iterator shapeIt; // fill map: push layer name, layer id and output id std::vector<String> netInputs; for (int j = 0; j < graph_proto.input_size(); j++) { const std::string& name = graph_proto.input(j).name(); if (constBlobs.find(name) == constBlobs.end()) { netInputs.push_back(name); layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1))); } } dstNet.setInputsNames(netInputs); int layersSize = graph_proto.node_size(); LayerParams layerParams; opencv_onnx::NodeProto node_proto; for(int li = 0; li < layersSize; li++) { node_proto = graph_proto.node(li); layerParams = getLayerParams(node_proto); CV_Assert(node_proto.output_size() >= 1); layerParams.name = node_proto.output(0); std::string layer_type = node_proto.op_type(); layerParams.type = layer_type; if (layer_type == "MaxPool") { layerParams.type = "Pooling"; layerParams.set("pool", "MAX"); layerParams.set("ceil_mode", isCeilMode(layerParams)); } else if (layer_type == "AveragePool") { layerParams.type = "Pooling"; layerParams.set("pool", "AVE"); layerParams.set("ceil_mode", isCeilMode(layerParams)); layerParams.set("ave_pool_padded_area", framework_name == "pytorch"); } else if (layer_type == "GlobalAveragePool") { layerParams.type = "Pooling"; layerParams.set("pool", "AVE"); layerParams.set("global_pooling", true); } else if (layer_type == "Add" || layer_type == "Sum") { if (layer_id.find(node_proto.input(1)) == layer_id.end()) { Mat blob = getBlob(node_proto, constBlobs, 1); blob = blob.reshape(1, 1); if (blob.total() == 1) { layerParams.type = "Power"; layerParams.set("shift", blob.at<float>(0)); } else { layerParams.type = "Scale"; layerParams.set("bias_term", true); layerParams.blobs.push_back(blob); } } else { layerParams.type = "Eltwise"; } } else if (layer_type == "Sub") { Mat blob = getBlob(node_proto, constBlobs, 1); if (blob.total() == 1) { layerParams.type = "Power"; layerParams.set("shift", -blob.at<float>(0)); } else { layerParams.type = "Scale"; layerParams.set("has_bias", true); layerParams.blobs.push_back(-1.0f * blob.reshape(1, 1)); } } else if (layer_type == "Div") { Mat blob = getBlob(node_proto, constBlobs, 1); CV_Assert_N(blob.type() == CV_32F, blob.total()); if (blob.total() == 1) { layerParams.set("scale", 1.0f / blob.at<float>(0)); layerParams.type = "Power"; } else { layerParams.type = "Scale"; divide(1.0, blob, blob); layerParams.blobs.push_back(blob); layerParams.set("bias_term", false); } } else if (layer_type == "Constant") { CV_Assert(node_proto.input_size() == 0); CV_Assert(layerParams.blobs.size() == 1); constBlobs.insert(std::make_pair(layerParams.name, layerParams.blobs[0])); continue; } else if (layer_type == "ImageScaler") { const float scale = layerParams.has("scale") ? layerParams.get<float>("scale") : 1.0f; layerParams.erase("scale"); if (layerParams.has("bias")) { layerParams.type = "Scale"; layerParams.blobs.push_back( Mat(Size(1, layerParams.get("bias").size()), CV_32FC1, scale)); layerParams.set("bias_term", true); Mat bias(1, layerParams.get("bias").size(), CV_32FC1); for (int j = 0; j < bias.total(); j++) { bias.at<float>(0, j) = layerParams.get("bias").getRealValue(j); } layerParams.blobs.push_back(bias); layerParams.erase("bias"); } else { layerParams.set("scale", scale); layerParams.type = "Power"; } } else if (layer_type == "LeakyRelu") { layerParams.type = "ReLU"; replaceLayerParam(layerParams, "alpha", "negative_slope"); } else if (layer_type == "LRN") { replaceLayerParam(layerParams, "size", "local_size"); } else if (layer_type == "BatchNormalization") { if (node_proto.input_size() != 5) CV_Error(Error::StsNotImplemented, "Expected input, scale, bias, mean and var"); layerParams.type = "BatchNorm"; replaceLayerParam(layerParams, "epsilon", "eps"); replaceLayerParam(layerParams, "spatial", "use_global_stats"); Mat meanData = getBlob(node_proto, constBlobs, 3); Mat stdData = getBlob(node_proto, constBlobs, 4); layerParams.blobs.push_back(meanData); layerParams.blobs.push_back(stdData); if (!node_proto.input(1).empty()) { layerParams.set("has_weight", true); layerParams.blobs.push_back(getBlob(node_proto, constBlobs, 1)); // weightData } else { layerParams.set("has_weight", false); } if (!node_proto.input(2).empty()) { layerParams.set("has_bias", true); layerParams.blobs.push_back(getBlob(node_proto, constBlobs, 2)); // biasData } else { layerParams.set("has_bias", false); } } else if (layer_type == "Gemm") { CV_Assert(node_proto.input_size() >= 2); layerParams.type = "InnerProduct"; Mat weights = getBlob(node_proto, constBlobs, 1); int ind_num_out = 0; if (layerParams.has("transB") && !layerParams.get<int>("transB")) { transpose(weights, weights); ind_num_out = 1; } layerParams.blobs.push_back(weights); if (node_proto.input_size() == 3) { Mat bias = getBlob(node_proto, constBlobs, 2); layerParams.blobs.push_back(bias); } layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]); layerParams.set("bias_term", node_proto.input_size() == 3); } else if (layer_type == "MatMul") { CV_Assert(node_proto.input_size() == 2); layerParams.type = "InnerProduct"; Mat blob = getBlob(node_proto, constBlobs, 1); layerParams.blobs.push_back(blob.t()); layerParams.set("bias_term", false); layerParams.set("num_output", layerParams.blobs[0].size[0]); } else if (layer_type == "Mul") { CV_Assert(node_proto.input_size() == 2); if (layer_id.find(node_proto.input(1)) == layer_id.end()) { Mat blob = getBlob(node_proto, constBlobs, 1); blob = blob.reshape(1, 1); if (blob.total() == 1) { layerParams.set("scale", blob.at<float>(0)); layerParams.type = "Power"; } else { layerParams.blobs.push_back(blob); layerParams.type = "Scale"; } } else { layerParams.type = "Eltwise"; layerParams.set("operation", "prod"); } } else if (layer_type == "Conv") { CV_Assert(node_proto.input_size() >= 2); layerParams.type = "Convolution"; for (int j = 1; j < node_proto.input_size(); j++) { layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j)); } layerParams.set("num_output", layerParams.blobs[0].size[0]); layerParams.set("bias_term", node_proto.input_size() == 3); } else if (layer_type == "ConvTranspose") { CV_Assert(node_proto.input_size() >= 2); layerParams.type = "Deconvolution"; for (int j = 1; j < node_proto.input_size(); j++) { layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j)); } layerParams.set("num_output", layerParams.blobs[0].size[1]); layerParams.set("bias_term", node_proto.input_size() == 3); } else if (layer_type == "Transpose") { layerParams.type = "Permute"; replaceLayerParam(layerParams, "perm", "order"); } else if (layer_type == "Unsqueeze") { CV_Assert(node_proto.input_size() == 1); Mat input = getBlob(node_proto, constBlobs, 0); DictValue axes = layerParams.get("axes"); std::vector<int> dims; for (int j = 0; j < input.dims; j++) { dims.push_back(input.size[j]); } CV_Assert(axes.getIntValue(axes.size()-1) <= dims.size()); for (int j = 0; j < axes.size(); j++) { dims.insert(dims.begin() + axes.getIntValue(j), 1); } Mat out = input.reshape(0, dims); constBlobs.insert(std::make_pair(layerParams.name, out)); continue; } else if (layer_type == "Reshape") { CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape")); if (node_proto.input_size() == 2) { Mat blob = getBlob(node_proto, constBlobs, 1); CV_Assert(blob.type() == CV_32SC1); if (layer_id.find(node_proto.input(0)) == layer_id.end()) { Mat input = getBlob(node_proto, constBlobs, 0); Mat out = input.reshape(0, static_cast<std::vector<int> >(blob)); constBlobs.insert(std::make_pair(layerParams.name, out)); continue; } layerParams.set("dim", DictValue::arrayInt<int*>( blob.ptr<int>(), blob.total() )); } else { DictValue shape = layerParams.get("shape"); std::vector<int> dim; for (int j = 0; j < shape.size(); j++) { dim.push_back(shape.getIntValue(j)); } if (layer_id.find(node_proto.input(0)) == layer_id.end()) { Mat input = getBlob(node_proto, constBlobs, 0); Mat out = input.reshape(0, dim); constBlobs.insert(std::make_pair(layerParams.name, out)); continue; } replaceLayerParam(layerParams, "shape", "dim"); } } else if (layer_type == "Pad") { layerParams.type = "Padding"; } else if (layer_type == "Shape") { CV_Assert(node_proto.input_size() == 1); shapeIt = outShapes.find(node_proto.input(0)); CV_Assert(shapeIt != outShapes.end()); MatShape inpShape = shapeIt->second; Mat shapeMat(inpShape.size(), 1, CV_32S); for (int j = 0; j < inpShape.size(); ++j) shapeMat.at<int>(j) = inpShape[j]; shapeMat.dims = 1; constBlobs.insert(std::make_pair(layerParams.name, shapeMat)); continue; } else if (layer_type == "Gather") { CV_Assert(node_proto.input_size() == 2); CV_Assert(layerParams.has("axis")); Mat input = getBlob(node_proto, constBlobs, 0); Mat indexMat = getBlob(node_proto, constBlobs, 1); CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1); int index = indexMat.at<int>(0); int axis = layerParams.get<int>("axis"); std::vector<cv::Range> ranges(input.dims, Range::all()); ranges[axis] = Range(index, index + 1); Mat out = input(ranges); constBlobs.insert(std::make_pair(layerParams.name, out)); continue; } else if (layer_type == "Concat") { bool hasVariableInps = false; for (int i = 0; i < node_proto.input_size(); ++i) { if (layer_id.find(node_proto.input(i)) != layer_id.end()) { hasVariableInps = true; break; } } if (!hasVariableInps) { std::vector<Mat> inputs(node_proto.input_size()), concatenated; for (size_t i = 0; i < inputs.size(); ++i) { inputs[i] = getBlob(node_proto, constBlobs, i); } Ptr<Layer> concat = ConcatLayer::create(layerParams); runLayer(concat, inputs, concatenated); CV_Assert(concatenated.size() == 1); constBlobs.insert(std::make_pair(layerParams.name, concatenated[0])); continue; } } else { for (int j = 0; j < node_proto.input_size(); j++) { if (layer_id.find(node_proto.input(j)) == layer_id.end()) layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j)); } } int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); layer_id.insert(std::make_pair(layerParams.name, LayerInfo(id, 0))); std::vector<MatShape> layerInpShapes, layerOutShapes, layerInternalShapes; for (int j = 0; j < node_proto.input_size(); j++) { layerId = layer_id.find(node_proto.input(j)); if (layerId != layer_id.end()) { dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, j); // Collect input shapes. shapeIt = outShapes.find(node_proto.input(j)); CV_Assert(shapeIt != outShapes.end()); layerInpShapes.push_back(shapeIt->second); } } // Compute shape of output blob for this layer. Ptr<Layer> layer = dstNet.getLayer(id); layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes); CV_Assert(!layerOutShapes.empty()); outShapes[layerParams.name] = layerOutShapes[0]; } }
int main(int argc, char **argv) { double wall_start = MPI_Wtime(); Real *diag, **b, **bt, **z; Real pi, h, omp_local_max, local_max, global_max; int i, j, omp_id; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); omp_tot_threads = omp_get_max_threads(); /* the total number of grid points in each spatial direction is (n+1) */ /* the total number of degrees-of-freedom in each spatial direction is (n-1) */ /* this version requires n to be a power of 2 */ if (argc < 2) { if (mpi_rank == 0){ printf("need a problem size\n"); } MPI_Finalize(); return 0; } n = atoi(argv[1]); m = n-1; // mpi_work is the amount of work needed to be done by each mpi node. The last // mpi node may do slightly less work than the others, but that's the closest // we'll get to proper load balancing. mpi_work = 1 + ((m - 1) / mpi_size); nn = 4*n; diag = createRealArray (m); b = createReal2DArray (mpi_work, mpi_size*mpi_work); bt = createReal2DArray (mpi_work, mpi_size*mpi_work); z = createReal2DArray (omp_tot_threads, nn); h = 1./(Real)n; pi = 4.*atan(1.); #pragma omp parallel for private(i) for (i=0; i < m; i++) { diag[i] = 2.*(1.-cos((i+1)*pi/(Real)n)); } #pragma omp parallel for private(j, i) for (j=0; j < mpi_work; j++) { // MPI for (i=0; j + mpi_work * mpi_rank < m && i < m; i++) { // OMP b[j][i] = h*h; } } #pragma omp parallel for private(omp_id, i) for (j=0; j < mpi_work; j++) { // MPI cut + OMP omp_id = omp_get_thread_num(); fst_(b[j], &n, z[omp_id], &nn); } transpose (bt,b); #pragma omp parallel for private(i, omp_id) schedule(static) for (i=0; i < mpi_work; i++) { // MPI cut + OMP omp_id = omp_get_thread_num(); fstinv_(bt[i], &n, z[omp_id], &nn); } #pragma omp parallel for private(j, i) for (j=0; j < mpi_work; j++) { // MPI for (i=0; i < m; i++) { bt[j][i] = bt[j][i]/(diag[i]+diag[j + mpi_work * mpi_rank]); } } #pragma omp parallel for private(i, omp_id) schedule(static) for (i=0; i < mpi_work; i++) { // MPI cut + OMP omp_id = omp_get_thread_num(); fst_(bt[i], &n, z[omp_id], &nn); } transpose (b,bt); #pragma omp parallel for private(j, omp_id) for (j=0; j < mpi_work; j++) { // MPI cut + OMP omp_id = omp_get_thread_num(); fstinv_(b[j], &n, z[omp_id], &nn); } local_max = 0.0; omp_local_max = 0.0; #pragma omp parallel shared(local_max) private(j,i) firstprivate(omp_local_max) { // MPI, work in range (and handle last node overflow) #pragma omp for nowait for (j=0; j < mpi_work; j++) { for (i=0; j + mpi_work * mpi_rank < m && i < m; i++) { if (b[j][i] > omp_local_max) omp_local_max = b[j][i]; } } #pragma omp critical { if (omp_local_max > local_max) { local_max = omp_local_max; } } } MPI_Reduce(&local_max, &global_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); free(diag); free(b[0]); free(b); free(bt[0]); free(bt); free(z[0]); free(z); MPI_Finalize(); double wall_end = MPI_Wtime(); if (mpi_rank == 0) { printf (" umax = %e, time = %.3fs \n", global_max,wall_end-wall_start); printf(" mpi_size = %d, omp_max_threads = %d, n = %d\n", mpi_size, omp_tot_threads, n); } }
// Transpose and Conjugate Transpose array array::T() const { return transpose(*this); }
CMT_INLINE vec<vec<T, 2>, N> process(const vec<vec<T, 2>, N>& x, csizes_t<indices...>) const { return vec<vec<T, 2>, N>(hadd(transpose(x[indices] * matrix))...); }
bool AffineMapMetric::evaluate( PatchData& pd, size_t handle, double& value, MsqError& err ) { Sample s = ElemSampleQM::sample( handle ); size_t e = ElemSampleQM:: elem( handle ); MsqMeshEntity& elem = pd.element_by_index( e ); EntityTopology type = elem.get_element_type(); unsigned edim = TopologyInfo::dimension( type ); const size_t* conn = elem.get_vertex_index_array(); // This metric only supports sampling at corners, except for simplices. // If element is a simpex, then the Jacobian is constant over a linear // element. In this case, always evaluate at any vertex. //unsigned corner = s.number; if (s.dimension != 0) { if (type == TRIANGLE || type == TETRAHEDRON) /*corner = 0*/; else { MSQ_SETERR(err)("Invalid sample point for AffineMapMetric", MsqError::UNSUPPORTED_ELEMENT ); return false; } } bool rval; if (edim == 3) { // 3x3 or 3x2 targets ? Vector3D c[3] = { Vector3D(0,0,0), Vector3D(0,0,0), Vector3D(0,0,0) }; unsigned n; const unsigned* adj = TopologyInfo::adjacent_vertices( type, s.number, n ); c[0] = pd.vertex_by_index( conn[adj[0]] ) - pd.vertex_by_index( conn[s.number] ); c[1] = pd.vertex_by_index( conn[adj[1]] ) - pd.vertex_by_index( conn[s.number] ); c[2] = pd.vertex_by_index( conn[adj[2]] ) - pd.vertex_by_index( conn[s.number] ); MsqMatrix<3,3> A; A.set_column( 0, MsqMatrix<3,1>(c[0].to_array()) ); A.set_column( 1, MsqMatrix<3,1>(c[1].to_array()) ); A.set_column( 2, MsqMatrix<3,1>(c[2].to_array()) ); if (type == TETRAHEDRON) A = A * TET_XFORM; MsqMatrix<3,3> W; targetCalc->get_3D_target( pd, e, s, W, err ); MSQ_ERRZERO(err); rval = targetMetric->evaluate( A * inverse(W), value, err ); MSQ_ERRZERO(err); } else { Vector3D c[2] = { Vector3D(0,0,0), Vector3D(0,0,0) }; unsigned n; const unsigned* adj = TopologyInfo::adjacent_vertices( type, s.number, n ); c[0] = pd.vertex_by_index( conn[adj[0]] ) - pd.vertex_by_index( conn[s.number] ); c[1] = pd.vertex_by_index( conn[adj[1]] ) - pd.vertex_by_index( conn[s.number] ); MsqMatrix<3,2> App; App.set_column( 0, MsqMatrix<3,1>(c[0].to_array()) ); App.set_column( 1, MsqMatrix<3,1>(c[1].to_array()) ); MsqMatrix<3,2> Wp; targetCalc->get_surface_target( pd, e, s, Wp, err ); MSQ_ERRZERO(err); MsqMatrix<2,2> A, W; MsqMatrix<3,2> RZ; surface_to_2d( App, Wp, W, RZ ); A = transpose(RZ) * App; if (type == TRIANGLE) A = A * TRI_XFORM; rval = targetMetric->evaluate( A*inverse(W), value, err ); MSQ_ERRZERO(err); } // apply target weight to value if (weightCalc) { double ck = weightCalc->get_weight( pd, e, s, err ); MSQ_ERRZERO(err); value *= ck; } return rval; }
void Camera::moveRelative(vec3 translation) { move(vec3(transpose(worldToCameraRot) * vec4(translation, 1))); }
int test_main (int, char *[]) { // fundamental types A_matrix_3x1_fundamentals_type m_A1; bub::at<boost::mpl::size_t<0>, boost::mpl::size_t<0> >(m_A1) = 3.0; bub::at<boost::mpl::size_t<1>, boost::mpl::size_t<0> >(m_A1) = 1.0; bub::at<boost::mpl::size_t<2>, boost::mpl::size_t<0> >(m_A1) = 2.0; BOOST_CHECK((m_A1.at<0, 0>() == 3.0)); BOOST_CHECK((m_A1.at<1, 0>() == 1.0)); BOOST_CHECK((m_A1.at<2, 0>() == 2.0)); bub::at<boost::mpl::size_t<0>, boost::mpl::size_t<0> >(m_A1) = 2.0; bub::at<boost::mpl::size_t<1>, boost::mpl::size_t<0> >(m_A1) = 3.0; bub::at<boost::mpl::size_t<2>, boost::mpl::size_t<0> >(m_A1) = 1.0; BOOST_CHECK((m_A1.at<0, 0>() == 2.0)); BOOST_CHECK((m_A1.at<1, 0>() == 3.0)); BOOST_CHECK((m_A1.at<2, 0>() == 1.0)); bub::at_c<0, 0>(m_A1) = 1.0; bub::at_c<1, 0>(m_A1) = 2.0; bub::at_c<2, 0>(m_A1) = 3.0; BOOST_CHECK((m_A1.at<0, 0>() == 1.0)); BOOST_CHECK((m_A1.at<1, 0>() == 2.0)); BOOST_CHECK((m_A1.at<2, 0>() == 3.0)); A_matrix_3x1_fundamentals_type m_A2 = prod(3.0, m_A1); BOOST_CHECK((m_A2.at<0, 0>() == 3.0)); BOOST_CHECK((m_A2.at<1, 0>() == 6.0)); BOOST_CHECK((m_A2.at<2, 0>() == 9.0)); m_A2 = 3.0 * m_A1; BOOST_CHECK((m_A2.at<0, 0>() == 3.0)); BOOST_CHECK((m_A2.at<1, 0>() == 6.0)); BOOST_CHECK((m_A2.at<2, 0>() == 9.0)); A_matrix_3x1_fundamentals_type m_A3 = prod(m_A1, 3.0); BOOST_CHECK((m_A3.at<0, 0>() == 3.0)); BOOST_CHECK((m_A3.at<1, 0>() == 6.0)); BOOST_CHECK((m_A3.at<2, 0>() == 9.0)); m_A3 = m_A1 * 3.0; BOOST_CHECK((m_A3.at<0, 0>() == 3.0)); BOOST_CHECK((m_A3.at<1, 0>() == 6.0)); BOOST_CHECK((m_A3.at<2, 0>() == 9.0)); A_matrix_3x1_fundamentals_type m_A4 = div(m_A1, 3.0); BOOST_CHECK((m_A4.at<0, 0>() == 1.0 / 3.0)); BOOST_CHECK((m_A4.at<1, 0>() == 2.0 / 3.0)); BOOST_CHECK((m_A4.at<2, 0>() == 3.0 / 3.0)); m_A4 = m_A1 / 3.0; BOOST_CHECK((m_A4.at<0, 0>() == 1.0 / 3.0)); BOOST_CHECK((m_A4.at<1, 0>() == 2.0 / 3.0)); BOOST_CHECK((m_A4.at<2, 0>() == 3.0 / 3.0)); A_matrix_3x1_fundamentals_type m_A5 = neg(m_A1); BOOST_CHECK((m_A5.at<0, 0>() == -1.0)); BOOST_CHECK((m_A5.at<1, 0>() == -2.0)); BOOST_CHECK((m_A5.at<2, 0>() == -3.0)); m_A5 = -m_A1; BOOST_CHECK((m_A5.at<0, 0>() == -1.0)); BOOST_CHECK((m_A5.at<1, 0>() == -2.0)); BOOST_CHECK((m_A5.at<2, 0>() == -3.0)); D_matrix_4x4_fundamentals_type m_D1; m_D1.at<0, 0>() = 1.0; m_D1.at<0, 1>() = 2.0; m_D1.at<0, 2>() = 3.0; m_D1.at<0, 3>() = 4.0; m_D1.at<1, 0>() = 5.0; m_D1.at<1, 1>() = 6.0; m_D1.at<1, 2>() = 7.0; m_D1.at<1, 3>() = 8.0; m_D1.at<2, 0>() = 9.0; m_D1.at<2, 1>() = 10.0; m_D1.at<2, 2>() = 11.0; m_D1.at<2, 3>() = 12.0; m_D1.at<3, 0>() = 13.0; m_D1.at<3, 1>() = 14.0; m_D1.at<3, 2>() = 15.0; m_D1.at<3, 3>() = 16.0; derived_from_D_matrix_4x4_fundamentals_type m_D1_d; m_D1_d.at<0, 0>() = 1.0; m_D1_d.at<0, 1>() = 2.0; m_D1_d.at<0, 2>() = 3.0; m_D1_d.at<0, 3>() = 4.0; m_D1_d.at<1, 0>() = 5.0; m_D1_d.at<1, 1>() = 6.0; m_D1_d.at<1, 2>() = 7.0; m_D1_d.at<1, 3>() = 8.0; m_D1_d.at<2, 0>() = 9.0; m_D1_d.at<2, 1>() = 10.0; m_D1_d.at<2, 2>() = 11.0; m_D1_d.at<2, 3>() = 12.0; m_D1_d.at<3, 0>() = 13.0; m_D1_d.at<3, 1>() = 14.0; m_D1_d.at<3, 2>() = 15.0; m_D1_d.at<3, 3>() = 16.0; D_matrix_4x4_fundamentals_type m_D2; m_D2 = transpose(m_D1); BOOST_CHECK((m_D2.at<0, 0>() == 1.0)); BOOST_CHECK((m_D2.at<1, 0>() == 2.0)); BOOST_CHECK((m_D2.at<2, 0>() == 3.0)); BOOST_CHECK((m_D2.at<3, 0>() == 4.0)); BOOST_CHECK((m_D2.at<0, 1>() == 5.0)); BOOST_CHECK((m_D2.at<1, 1>() == 6.0)); BOOST_CHECK((m_D2.at<2, 1>() == 7.0)); BOOST_CHECK((m_D2.at<3, 1>() == 8.0)); BOOST_CHECK((m_D2.at<0, 2>() == 9.0)); BOOST_CHECK((m_D2.at<1, 2>() == 10.0)); BOOST_CHECK((m_D2.at<2, 2>() == 11.0)); BOOST_CHECK((m_D2.at<3, 2>() == 12.0)); BOOST_CHECK((m_D2.at<0, 3>() == 13.0)); BOOST_CHECK((m_D2.at<1, 3>() == 14.0)); BOOST_CHECK((m_D2.at<2, 3>() == 15.0)); BOOST_CHECK((m_D2.at<3, 3>() == 16.0)); m_D2 = transpose(m_D1_d); BOOST_CHECK((m_D2.at<0, 0>() == 1.0)); BOOST_CHECK((m_D2.at<1, 0>() == 2.0)); BOOST_CHECK((m_D2.at<2, 0>() == 3.0)); BOOST_CHECK((m_D2.at<3, 0>() == 4.0)); BOOST_CHECK((m_D2.at<0, 1>() == 5.0)); BOOST_CHECK((m_D2.at<1, 1>() == 6.0)); BOOST_CHECK((m_D2.at<2, 1>() == 7.0)); BOOST_CHECK((m_D2.at<3, 1>() == 8.0)); BOOST_CHECK((m_D2.at<0, 2>() == 9.0)); BOOST_CHECK((m_D2.at<1, 2>() == 10.0)); BOOST_CHECK((m_D2.at<2, 2>() == 11.0)); BOOST_CHECK((m_D2.at<3, 2>() == 12.0)); BOOST_CHECK((m_D2.at<0, 3>() == 13.0)); BOOST_CHECK((m_D2.at<1, 3>() == 14.0)); BOOST_CHECK((m_D2.at<2, 3>() == 15.0)); BOOST_CHECK((m_D2.at<3, 3>() == 16.0)); typedef bub::result_of::slice< D_matrix_4x4_fundamentals_type, rows_type_1, columns_type_1 >::type D_fundamentals_slice_type_1; D_fundamentals_slice_type_1 m_D_slice_1; m_D_slice_1 = bub::slice<rows_type_1, columns_type_1>(m_D1); BOOST_CHECK((m_D_slice_1.at<0, 0>() == 1.0)); BOOST_CHECK((m_D_slice_1.at<0, 1>() == 2.0)); BOOST_CHECK((m_D_slice_1.at<1, 0>() == 5.0)); BOOST_CHECK((m_D_slice_1.at<1, 1>() == 6.0)); m_D_slice_1 = bub::slice<rows_type_1, columns_type_1>(m_D1_d); BOOST_CHECK((m_D_slice_1.at<0, 0>() == 1.0)); BOOST_CHECK((m_D_slice_1.at<0, 1>() == 2.0)); BOOST_CHECK((m_D_slice_1.at<1, 0>() == 5.0)); BOOST_CHECK((m_D_slice_1.at<1, 1>() == 6.0)); typedef bub::result_of::slice< D_matrix_4x4_fundamentals_type, rows_type_2, columns_type_2 >::type D_fundamentals_slice_type_2; D_fundamentals_slice_type_2 m_D_slice_2; m_D_slice_2 = bub::slice<rows_type_2, columns_type_2>(m_D1); BOOST_CHECK((m_D_slice_2.at<0, 0>() == 3.0)); BOOST_CHECK((m_D_slice_2.at<1, 0>() == 15.0)); BOOST_CHECK((m_D_slice_2.at<2, 0>() == 11.0)); m_D_slice_2 = bub::slice<rows_type_2, columns_type_2>(m_D1_d); BOOST_CHECK((m_D_slice_2.at<0, 0>() == 3.0)); BOOST_CHECK((m_D_slice_2.at<1, 0>() == 15.0)); BOOST_CHECK((m_D_slice_2.at<2, 0>() == 11.0)); typedef bub::result_of::slice< D_matrix_4x4_fundamentals_type, rows_type_3, columns_type_3 >::type D_fundamentals_slice_type_3; D_fundamentals_slice_type_3 m_D_slice_3; m_D_slice_3 = bub::slice<rows_type_3, columns_type_3>(m_D1); BOOST_CHECK((m_D_slice_3.at<0, 0>() == 2.0)); BOOST_CHECK((m_D_slice_3.at<0, 1>() == 4.0)); m_D_slice_3 = bub::slice<rows_type_3, columns_type_3>(m_D1_d); BOOST_CHECK((m_D_slice_3.at<0, 0>() == 2.0)); BOOST_CHECK((m_D_slice_3.at<0, 1>() == 4.0)); // unit types A_matrix_3x1_units_type m_A1_u; bub::at<boost::mpl::size_t<0>, boost::mpl::size_t<0> >(m_A1_u) = time_::from_value(3.0); bub::at<boost::mpl::size_t<1>, boost::mpl::size_t<0> >(m_A1_u) = length::from_value(1.0); bub::at<boost::mpl::size_t<2>, boost::mpl::size_t<0> >(m_A1_u) = dimensionless::from_value(2.0); BOOST_CHECK((m_A1_u.at<0, 0>().value() == 3.0)); BOOST_CHECK((m_A1_u.at<1, 0>().value() == 1.0)); BOOST_CHECK((m_A1_u.at<2, 0>().value() == 2.0)); bub::at<boost::mpl::size_t<0>, boost::mpl::size_t<0> >(m_A1_u) = time_::from_value(2.0); bub::at<boost::mpl::size_t<1>, boost::mpl::size_t<0> >(m_A1_u) = length::from_value(3.0); bub::at<boost::mpl::size_t<2>, boost::mpl::size_t<0> >(m_A1_u) = dimensionless::from_value(1.0); BOOST_CHECK((m_A1_u.at<0, 0>().value() == 2.0)); BOOST_CHECK((m_A1_u.at<1, 0>().value() == 3.0)); BOOST_CHECK((m_A1_u.at<2, 0>().value() == 1.0)); bub::at_c<0, 0>(m_A1_u) = time_::from_value(1.0); bub::at_c<1, 0>(m_A1_u) = length::from_value(2.0); bub::at_c<2, 0>(m_A1_u) = dimensionless::from_value(3.0); BOOST_CHECK((m_A1_u.at<0, 0>().value() == 1.0)); BOOST_CHECK((m_A1_u.at<1, 0>().value() == 2.0)); BOOST_CHECK((m_A1_u.at<2, 0>().value() == 3.0)); A_matrix_3x1_units_type m_A2_u = prod(3.0, m_A1_u); BOOST_CHECK((m_A2_u.at<0, 0>().value() == 3.0)); BOOST_CHECK((m_A2_u.at<1, 0>().value() == 6.0)); BOOST_CHECK((m_A2_u.at<2, 0>().value() == 9.0)); m_A2_u = 3.0 * m_A1_u; BOOST_CHECK((m_A2_u.at<0, 0>().value() == 3.0)); BOOST_CHECK((m_A2_u.at<1, 0>().value() == 6.0)); BOOST_CHECK((m_A2_u.at<2, 0>().value() == 9.0)); A_matrix_3x1_units_type m_A3_u = prod(m_A1_u, 3.0); BOOST_CHECK((m_A3_u.at<0, 0>().value() == 3.0)); BOOST_CHECK((m_A3_u.at<1, 0>().value() == 6.0)); BOOST_CHECK((m_A3_u.at<2, 0>().value() == 9.0)); m_A3_u = m_A1_u * 3.0; BOOST_CHECK((m_A3_u.at<0, 0>().value() == 3.0)); BOOST_CHECK((m_A3_u.at<1, 0>().value() == 6.0)); BOOST_CHECK((m_A3_u.at<2, 0>().value() == 9.0)); A_matrix_3x1_units_type m_A4_u = div(m_A1_u, 3.0); BOOST_CHECK((m_A4_u.at<0, 0>().value() == 1.0 / 3.0)); BOOST_CHECK((m_A4_u.at<1, 0>().value() == 2.0 / 3.0)); BOOST_CHECK((m_A4_u.at<2, 0>().value() == 3.0 / 3.0)); m_A4_u = m_A1_u / 3.0; BOOST_CHECK((m_A4_u.at<0, 0>().value() == 1.0 / 3.0)); BOOST_CHECK((m_A4_u.at<1, 0>().value() == 2.0 / 3.0)); BOOST_CHECK((m_A4_u.at<2, 0>().value() == 3.0 / 3.0)); A_matrix_3x1_units_type m_A5_u = neg(m_A1_u); BOOST_CHECK((m_A5_u.at<0, 0>().value() == -1.0)); BOOST_CHECK((m_A5_u.at<1, 0>().value() == -2.0)); BOOST_CHECK((m_A5_u.at<2, 0>().value() == -3.0)); m_A5_u = -m_A1_u; BOOST_CHECK((m_A5_u.at<0, 0>().value() == -1.0)); BOOST_CHECK((m_A5_u.at<1, 0>().value() == -2.0)); BOOST_CHECK((m_A5_u.at<2, 0>().value() == -3.0)); D_matrix_4x4_units_type m_D1_u; m_D1_u.at<0, 0>() = length::from_value(1.0); m_D1_u.at<0, 1>() = length::from_value(2.0); m_D1_u.at<0, 2>() = length::from_value(3.0); m_D1_u.at<0, 3>() = length::from_value(4.0); m_D1_u.at<1, 0>() = length::from_value(5.0); m_D1_u.at<1, 1>() = length::from_value(6.0); m_D1_u.at<1, 2>() = length::from_value(7.0); m_D1_u.at<1, 3>() = length::from_value(8.0); m_D1_u.at<2, 0>() = length::from_value(9.0); m_D1_u.at<2, 1>() = length::from_value(10.0); m_D1_u.at<2, 2>() = length::from_value(11.0); m_D1_u.at<2, 3>() = length::from_value(12.0); m_D1_u.at<3, 0>() = length::from_value(13.0); m_D1_u.at<3, 1>() = length::from_value(14.0); m_D1_u.at<3, 2>() = length::from_value(15.0); m_D1_u.at<3, 3>() = length::from_value(16.0); derived_from_D_matrix_4x4_units_type m_D1_u_d; m_D1_u_d.at<0, 0>() = length::from_value(1.0); m_D1_u_d.at<0, 1>() = length::from_value(2.0); m_D1_u_d.at<0, 2>() = length::from_value(3.0); m_D1_u_d.at<0, 3>() = length::from_value(4.0); m_D1_u_d.at<1, 0>() = length::from_value(5.0); m_D1_u_d.at<1, 1>() = length::from_value(6.0); m_D1_u_d.at<1, 2>() = length::from_value(7.0); m_D1_u_d.at<1, 3>() = length::from_value(8.0); m_D1_u_d.at<2, 0>() = length::from_value(9.0); m_D1_u_d.at<2, 1>() = length::from_value(10.0); m_D1_u_d.at<2, 2>() = length::from_value(11.0); m_D1_u_d.at<2, 3>() = length::from_value(12.0); m_D1_u_d.at<3, 0>() = length::from_value(13.0); m_D1_u_d.at<3, 1>() = length::from_value(14.0); m_D1_u_d.at<3, 2>() = length::from_value(15.0); m_D1_u_d.at<3, 3>() = length::from_value(16.0); D_matrix_4x4_units_type m_D2_u; m_D2_u = transpose(m_D1_u); BOOST_CHECK((m_D2_u.at<0, 0>().value() == 1.0)); BOOST_CHECK((m_D2_u.at<1, 0>().value() == 2.0)); BOOST_CHECK((m_D2_u.at<2, 0>().value() == 3.0)); BOOST_CHECK((m_D2_u.at<3, 0>().value() == 4.0)); BOOST_CHECK((m_D2_u.at<0, 1>().value() == 5.0)); BOOST_CHECK((m_D2_u.at<1, 1>().value() == 6.0)); BOOST_CHECK((m_D2_u.at<2, 1>().value() == 7.0)); BOOST_CHECK((m_D2_u.at<3, 1>().value() == 8.0)); BOOST_CHECK((m_D2_u.at<0, 2>().value() == 9.0)); BOOST_CHECK((m_D2_u.at<1, 2>().value() == 10.0)); BOOST_CHECK((m_D2_u.at<2, 2>().value() == 11.0)); BOOST_CHECK((m_D2_u.at<3, 2>().value() == 12.0)); BOOST_CHECK((m_D2_u.at<0, 3>().value() == 13.0)); BOOST_CHECK((m_D2_u.at<1, 3>().value() == 14.0)); BOOST_CHECK((m_D2_u.at<2, 3>().value() == 15.0)); BOOST_CHECK((m_D2_u.at<3, 3>().value() == 16.0)); m_D2_u = transpose(m_D1_u_d); BOOST_CHECK((m_D2_u.at<0, 0>().value() == 1.0)); BOOST_CHECK((m_D2_u.at<1, 0>().value() == 2.0)); BOOST_CHECK((m_D2_u.at<2, 0>().value() == 3.0)); BOOST_CHECK((m_D2_u.at<3, 0>().value() == 4.0)); BOOST_CHECK((m_D2_u.at<0, 1>().value() == 5.0)); BOOST_CHECK((m_D2_u.at<1, 1>().value() == 6.0)); BOOST_CHECK((m_D2_u.at<2, 1>().value() == 7.0)); BOOST_CHECK((m_D2_u.at<3, 1>().value() == 8.0)); BOOST_CHECK((m_D2_u.at<0, 2>().value() == 9.0)); BOOST_CHECK((m_D2_u.at<1, 2>().value() == 10.0)); BOOST_CHECK((m_D2_u.at<2, 2>().value() == 11.0)); BOOST_CHECK((m_D2_u.at<3, 2>().value() == 12.0)); BOOST_CHECK((m_D2_u.at<0, 3>().value() == 13.0)); BOOST_CHECK((m_D2_u.at<1, 3>().value() == 14.0)); BOOST_CHECK((m_D2_u.at<2, 3>().value() == 15.0)); BOOST_CHECK((m_D2_u.at<3, 3>().value() == 16.0)); typedef bub::result_of::slice< D_matrix_4x4_units_type, rows_type_1, columns_type_1 >::type D_units_slice_type_1; D_units_slice_type_1 m_D_slice_1_u; m_D_slice_1_u = bub::slice<rows_type_1, columns_type_1>(m_D1_u); BOOST_CHECK((m_D_slice_1_u.at<0, 0>().value() == 1.0)); BOOST_CHECK((m_D_slice_1_u.at<0, 1>().value() == 2.0)); BOOST_CHECK((m_D_slice_1_u.at<1, 0>().value() == 5.0)); BOOST_CHECK((m_D_slice_1_u.at<1, 1>().value() == 6.0)); m_D_slice_1_u = bub::slice<rows_type_1, columns_type_1>(m_D1_u_d); BOOST_CHECK((m_D_slice_1_u.at<0, 0>().value() == 1.0)); BOOST_CHECK((m_D_slice_1_u.at<0, 1>().value() == 2.0)); BOOST_CHECK((m_D_slice_1_u.at<1, 0>().value() == 5.0)); BOOST_CHECK((m_D_slice_1_u.at<1, 1>().value() == 6.0)); typedef bub::result_of::slice< D_matrix_4x4_units_type, rows_type_2, columns_type_2 >::type D_units_slice_type_2; D_units_slice_type_2 m_D_slice_2_u; m_D_slice_2_u = bub::slice<rows_type_2, columns_type_2>(m_D1_u); BOOST_CHECK((m_D_slice_2_u.at<0, 0>().value() == 3.0)); BOOST_CHECK((m_D_slice_2_u.at<1, 0>().value() == 15.0)); BOOST_CHECK((m_D_slice_2_u.at<2, 0>().value() == 11.0)); m_D_slice_2_u = bub::slice<rows_type_2, columns_type_2>(m_D1_u_d); BOOST_CHECK((m_D_slice_2_u.at<0, 0>().value() == 3.0)); BOOST_CHECK((m_D_slice_2_u.at<1, 0>().value() == 15.0)); BOOST_CHECK((m_D_slice_2_u.at<2, 0>().value() == 11.0)); typedef bub::result_of::slice< D_matrix_4x4_units_type, rows_type_3, columns_type_3 >::type D_units_slice_type_3; D_units_slice_type_3 m_D_slice_3_u; m_D_slice_3_u = bub::slice<rows_type_3, columns_type_3>(m_D1_u); BOOST_CHECK((m_D_slice_3_u.at<0, 0>().value() == 2.0)); BOOST_CHECK((m_D_slice_3_u.at<0, 1>().value() == 4.0)); m_D_slice_3_u = bub::slice<rows_type_3, columns_type_3>(m_D1_u_d); BOOST_CHECK((m_D_slice_3_u.at<0, 0>().value() == 2.0)); BOOST_CHECK((m_D_slice_3_u.at<0, 1>().value() == 4.0)); return 0; }
/** * Constructor that fills a coxph_data object with phenotype and genotype * data. * * @param phed Reference to a phedata object with phenotype data * @param gend Reference to a gendata object with genotype data * @param snpnum The number of the SNP in the genotype data object to * be added to the design matrix regdata::X. When set to a number < 0 * no SNP data is added to the design matrix (e.g. when calculating * the null model). */ coxph_data::coxph_data(const phedata &phed, const gendata &gend, const int snpnum) { freq = 0; gcount = 0; nids = gend.nids; masked_data = std::vector<bool>(nids, false); ngpreds = gend.ngpreds; if (snpnum >= 0) { ncov = phed.ncov + ngpreds; } else { ncov = phed.ncov; } if (phed.noutcomes != 2) { std::cerr << "coxph_data: number of outcomes should be 2 (now: " << phed.noutcomes << ")\n"; exit(1); } X.reinit(nids, (ncov + 1)); // Note: ncov takes ngpreds into // account, see above! stime.reinit(nids, 1); sstat.reinit(nids, 1); weights.reinit(nids, 1); offset.reinit(nids, 1); strata.reinit(nids, 1); order.reinit(nids, 1); for (int i = 0; i < nids; i++) { stime[i] = (phed.Y).get(i, 0); sstat[i] = static_cast<int>((phed.Y).get(i, 1)); if (sstat[i] != 1 && sstat[i] != 0) { std::cerr << "coxph_data: status not 0/1 " <<"(correct order: id, fuptime, status ...)" << endl; exit(1); } } // Add a column with a constant (=1) to the X matrix (the mean) for (int i = 0; i < nids; i++) { X.put(1., i, 0); } // Insert the covariate data into X (note we use phed.ncov and not // ncov, which includes ngpreds is not computing the null model!) for (int j = 1; j <= phed.ncov; j++) { for (int i = 0; i < nids; i++) { X.put((phed.X).get(i, j - 1), i, j); } } // Insert the genotype data into X if (snpnum > 0) { for (int j = 0; j < ngpreds; j++) { double *snpdata = new double[nids]; gend.get_var(snpnum * ngpreds + j, snpdata); for (int i = 0; i < nids; i++) { X.put(snpdata[i], i, (ncov - ngpreds + j)); } delete[] snpdata; } } for (int i = 0; i < nids; i++) { weights[i] = 1.0; offset[i] = 0.0; strata[i] = 0; } // sort by time double *tmptime = new double[nids]; int *passed_sorted = new int[nids]; std::fill(passed_sorted, passed_sorted + nids, 0); for (int i = 0; i < nids; i++) { tmptime[i] = stime[i]; } qsort(tmptime, nids, sizeof(double), cmpfun); for (int i = 0; i < nids; i++) { int passed = 0; for (int j = 0; j < nids; j++) { if (tmptime[j] == stime[i]) { if (!passed_sorted[j]) { order[i] = j; passed_sorted[j] = 1; passed = 1; break; } } } if (passed != 1) { std::cerr << "cannot recover element " << i << "\n"; exit(1); } } stime = reorder(stime, order); sstat = reorder(sstat, order); weights = reorder(weights, order); strata = reorder(strata, order); offset = reorder(offset, order); X = reorder(X, order); // The coxfit2() function expects data in column major order. X = transpose(X); // X.print(); // offset.print(); // weights.print(); // stime.print(); // sstat.print(); delete[] tmptime; delete[] passed_sorted; }
void main(int argc, char **argv) { double start_t, end_t; int my_rank, p; complex A[512*512], B[512*512], C[512*512]; /* initialize MPI */ MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &p); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* Create MPI Datatype for Complex */ const float nitems=2; int blocklengths[2] = {1,1}; MPI_Datatype types[2] = {MPI_FLOAT, MPI_FLOAT}; MPI_Aint offsets[2]; offsets[0] = offsetof(complex, r); offsets[1] = offsetof(complex, i); MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_complex); MPI_Type_commit(&mpi_complex); /* Initialize Data*/ if(my_rank == 0) { initialize_data(f1_name, A); initialize_data(f2_name, B); start_t = MPI_Wtime(); dist_data(A, p); dist_data(B, p); } else { recv_data(A, p, my_rank); recv_data(B, p, my_rank); } /* 2D FFT on A */ execute_fft(A, 1, p, my_rank); collect_data(A, p, my_rank); if(my_rank == 0) { transpose(A); dist_data(A, p); } else { recv_data(A, p, my_rank); } execute_fft(A, 1, p, my_rank); /* 2D FFT on B */ execute_fft(B, 1, p, my_rank); collect_data(B, p, my_rank); if(my_rank == 0) { transpose(B); dist_data(B, p); } else { recv_data(B, p, my_rank); } execute_fft(B, 1, p, my_rank); /* Multiplication Step */ execute_mm(A, B, C, p, my_rank); /* 2D FFT on C */ execute_fft(C, -1, p, my_rank); collect_data(C, p, my_rank); if(my_rank == 0) { transpose(C); dist_data(C, p); } else { recv_data(C, p, my_rank); } execute_fft(C, -1, p, my_rank); collect_data(C, p, my_rank); end_t = MPI_Wtime(); if(my_rank == 0) { output_data(f_out, C); printf("\nElapsed time = %g s\n", end_t - start_t); printf("--------------------------------------------\n"); } MPI_Finalize(); }
void runPoisson(int rank, int size, int n){ double time=MPI_Wtime(); Real **b, *diag, *RecvBuf,*z, h, maxError; int i, j, m, nn, *len, *disp; m = n-1; nn = 4*n; splitVector(m, size, &len, &disp); diag = createRealArray (m); b = createReal2DArray (len[rank],m); z = createRealArray (nn); h = 1./(Real)n; #pragma omp parallel for schedule(static) for (i=0; i < m; i++) { diag[i] = 2.*(1.-cos((i+1)*M_PI/(Real)n)); } #pragma omp for for (j=0; j < len[rank]; j++) { #pragma omp parallel for schedule(static) for (i=0; i < m; i++) { Real x=(Real)(j+1+disp[rank])/n; Real y=(Real) (i+1)/n; b[j][i] = h*h * funcf(x,y); } } #pragma omp parallel for schedule(static) for (j=0; j < len[rank]; j++) { Real* zt = createRealArray (nn); fst_(b[j], &n, zt, &nn); free(zt); } transpose(b, size, len, disp, rank, m); #pragma omp parallel for schedule(static) for (i=0; i < len[rank]; i++) { Real* zt = createRealArray (nn); fstinv_(b[i], &n, zt, &nn); free(zt); } #pragma omp for for (j=0; j < len[rank]; j++) { #pragma omp parallel for schedule(static) for (i=0; i < m; i++) { b[j][i] = b[j][i]/(diag[i]+diag[j+disp[rank]]); } } #pragma omp parallel for schedule(static) for (i=0; i < len[rank]; i++) { Real* zt = createRealArray (nn); fst_(b[i], &n, zt, &nn); free(zt); } transpose(b, size, len, disp, rank, m); #pragma omp parallel for schedule(static) for (j=0; j < len[rank]; j++) { Real* zt = createRealArray (nn); fstinv_(b[j], &n, zt, &nn); free(zt); } if (rank==0) { RecvBuf = createRealArray (m*m); } gatherMatrix(b, m, RecvBuf, len, disp,0); if (rank==0) { for (int j=0; j < m; j++) { for (int i=0; i < m; i++) { printf("%e %e %e \n",(Real)i/m,(Real)j/m,RecvBuf[j*m+i] ); } } } }
/***********************************************************************//** * @brief Test Cholesky decomposition ***************************************************************************/ void TestGSymMatrix::matrix_cholesky(void) { // Test Cholesky decomposition GSymMatrix cd = cholesky_decompose(m_test); GMatrix cd_lower = cd.extract_lower_triangle(); GMatrix cd_upper = transpose(cd_lower); GMatrix cd_product = cd_lower * cd_upper; GMatrix cd_residuals = GMatrix(m_test) - cd_product; double res = (abs(cd_residuals)).max(); test_value(res, 0.0, 1.0e-15, "Test cholesky_decompose() method"); // Test compressed Cholesky decomposition GSymMatrix test_zero = set_matrix_zero(); GSymMatrix cd_zero = cholesky_decompose(test_zero); GMatrix cd_zero_lower = cd_zero.extract_lower_triangle(); GMatrix cd_zero_upper = transpose(cd_zero_lower); GMatrix cd_zero_product = cd_zero_lower * cd_zero_upper; GMatrix cd_zero_residuals = GMatrix(test_zero) - cd_zero_product; res = (abs(cd_zero_residuals)).max(); test_value(res, 0.0, 1.0e-15, "Test compressed cholesky_decompose() method"); // Test Cholesky inplace decomposition GSymMatrix test = m_test; test.cholesky_decompose(); GMatrix cd_lower2 = test.extract_lower_triangle(); test_assert((cd_lower2 == cd_lower), "Test inplace cholesky_decompose() method"); // Test Cholesky solver (first test) GVector e0(g_rows); GVector a0(g_rows); e0[0] = 1.0; e0[1] = 0.0; e0[2] = 0.0; a0[0] = g_matrix[0]; a0[1] = g_matrix[3]; a0[2] = g_matrix[6]; GVector s0 = cd.cholesky_solver(a0) - e0; res = max(abs(s0)); test_value(res, 0.0, 1.0e-15, "Test cholesky_solver() method"); // Test Cholesky solver (second test) e0[0] = 0.0; e0[1] = 1.0; e0[2] = 0.0; a0[0] = g_matrix[1]; a0[1] = g_matrix[4]; a0[2] = g_matrix[7]; s0 = cd.cholesky_solver(a0) - e0; res = max(abs(s0)); test_value(res, 0.0, 1.0e-15, "Test cholesky_solver() method"); // Test Cholesky solver (third test) e0[0] = 0.0; e0[1] = 0.0; e0[2] = 1.0; a0[0] = g_matrix[2]; a0[1] = g_matrix[5]; a0[2] = g_matrix[8]; s0 = cd.cholesky_solver(a0) - e0; res = max(abs(s0)); test_value(res, 0.0, 1.0e-15, "Test cholesky_solver() method"); // Test compressed Cholesky solver (first test) e0 = GVector(g_rows+1); a0 = GVector(g_rows+1); e0[0] = 1.0; e0[1] = 0.0; e0[2] = 0.0; e0[3] = 0.0; a0[0] = g_matrix[0]; a0[1] = g_matrix[3]; a0[2] = 0.0; a0[3] = g_matrix[6]; s0 = cd_zero.cholesky_solver(a0) - e0; res = max(abs(s0)); test_value(res, 0.0, 1.0e-15, "Test compressed cholesky_solver() method"); // Test compressed Cholesky solver (second test) e0[0] = 0.0; e0[1] = 1.0; e0[2] = 0.0; e0[3] = 0.0; a0[0] = g_matrix[1]; a0[1] = g_matrix[4]; a0[2] = 0.0; a0[3] = g_matrix[7]; s0 = cd_zero.cholesky_solver(a0) - e0; res = max(abs(s0)); test_value(res, 0.0, 1.0e-15, "Test compressed cholesky_solver() method"); // Test compressed Cholesky solver (third test) e0[0] = 0.0; e0[1] = 0.0; e0[2] = 0.0; e0[3] = 1.0; a0[0] = g_matrix[2]; a0[1] = g_matrix[5]; a0[2] = 0.0; a0[3] = g_matrix[8]; s0 = cd_zero.cholesky_solver(a0) - e0; res = max(abs(s0)); test_value(res, 0.0, 1.0e-15, "Test compressed cholesky_solver() method"); // Test Cholesky inverter GSymMatrix unit(g_rows,g_cols); unit(0,0) = unit(1,1) = unit(2,2) = 1.0; GSymMatrix test_inv = m_test; test_inv.cholesky_invert(); GMatrix ci_product = m_test * test_inv; GMatrix ci_residuals = ci_product - unit; res = (abs(ci_residuals)).max(); test_value(res, 0.0, 1.0e-15, "Test cholesky_invert method"); // Test Cholesky inverter for compressed matrix unit = GSymMatrix(4,4); unit(0,0) = unit(1,1) = unit(3,3) = 1.0; GSymMatrix test_zero_inv = test_zero; test_zero_inv.cholesky_invert(); GMatrix ciz_product = test_zero * test_zero_inv; GMatrix ciz_residuals = ciz_product - unit; res = (abs(ciz_residuals)).max(); test_value(res, 0.0, 1.0e-15, "Test compressed cholesky_invert method"); // Return return; }
/* ----------------------------- MNI Header ----------------------------------- @NAME : procrustes @INPUT : npoints - number of input point pairs ndim - number of dimensions for each point Apoints - Matrix of point set 1 (in zero offset form). The dimensions of this matrix should be defined to be 1 to npoints and 1 to ndim (when calling the numerical recipes routine matrix). Bpoints - Matrix of point set 2 (in zero offset form). The dimensions of this matrix should be defined to be 1 to npoints and 1 to ndim (when calling the numerical recipes routine matrix). @OUTPUT : translation - zero offset vector (1 to ndim) that specifies the translation to be applied to Bpoints to line up the centroid with that of Apoints. Calling routine must allocate space for this vector. centre_of_rotation - zero offset vector (1 to ndim) that specifies the centre of rotation and scaling (this is in fact only the centroid of Apoints). Calling routine must allocate space for this vector. rotation - zero offset matrix (1 to ndim by 1 to ndim) to rotate translated Bpoints so that they line up with Apoints. Calling routine must allocate space for this matrix. scale - Scalar value giving global scaling to be applied to translated and rotated Bpoints to match Apoints. @RETURNS : (nothing) @DESCRIPTION: Calculates n-dimensional linear transformation from one set of points to another, minimizing distance between equivalent points. Transformation from Bpoints to Apoints is calculated. @METHOD : See Matrix Computations, Golub and Van Loan, pp. 425-426 and paper by Sibson, Robin, J.R.Statist.Soc. B(1978), Vol. 40, No. 2, pp 234-238. Steps of calculations are as follows : 1) Calculate translation that aligns the centroids of the two point sets. 2) Calculate rotation/reflexion that minimizes residual. 3) Calculate scaling of points to minimize residual. The process can be broken into independent steps because the best translation aligns centroids independently of the choice of rotation/reflexion and scaling and the best rotation/reflexion can be found independently of scale (after the best translation has been found). (See Sibson for more). @GLOBALS : (none) @CALLS : calc_centroid translate transpose matrix_multiply svdcmp (zero offset) trace @CREATED : Long time ago (Sean Marrett) @MODIFIED : Some time later (Shyan Ku) Feb. 26, 1990 (Weiqian Dai) January 30, 1992 (Peter Neelin) - complete rewrite for roughly NIL-abiding code. Modified name and calling parameters. ---------------------------------------------------------------------------- */ void procrustes(int npoints, int ndim, float **Apoints, float **Bpoints, float *translation, float *centre_of_rotation, float **rotation, float *scale) { int i; float *Atranslation, *Btranslation, *svd_W; float **Ashift, **Bshift, **Atranspose, **Btranspose; float **svd_U, **svd_V, **svd_VT; float **Brotated, **product; float trace1, trace2; /* Get the vectors for centroids */ Atranslation=vector(1,ndim); Btranslation=vector(1,ndim); svd_W=vector(1,ndim); /* Get various matrices */ Ashift=matrix(1,npoints,1,ndim); Bshift=matrix(1,npoints,1,ndim); Atranspose=matrix(1,ndim,1,npoints); Btranspose=matrix(1,ndim,1,npoints); svd_U=matrix(1,ndim,1,ndim); svd_V=matrix(1,ndim,1,ndim); svd_VT=matrix(1,ndim,1,ndim); Brotated=matrix(1,npoints,1,ndim); product=matrix(1,npoints,1,npoints); /* Calculate the centroids, remove them from A and B points and save the translation */ calc_centroid(npoints, ndim, Apoints, centre_of_rotation); for (i=1; i<=ndim; i++) Atranslation[i] = -centre_of_rotation[i]; translate(npoints, ndim, Apoints, Atranslation, Ashift); calc_centroid(npoints, ndim, Bpoints, Btranslation); for (i=1; i<=ndim; i++) Btranslation[i] *= -1; translate(npoints, ndim, Bpoints, Btranslation, Bshift); for (i=1; i<=ndim; i++) translation[i] = Btranslation[i] - Atranslation[i]; /* Calculate the rotation/reflexion matrix */ transpose(npoints, ndim, Bshift, Btranspose); matrix_multiply(ndim, npoints, ndim, Btranspose, Ashift, svd_U); svdcmp(svd_U, ndim, ndim, svd_W, svd_V); transpose(ndim, ndim, svd_V, svd_VT); matrix_multiply(ndim, ndim, ndim, svd_U, svd_VT, rotation); /* Calculate the scale */ matrix_multiply(npoints, ndim, ndim, Bshift, rotation, Brotated); transpose(npoints, ndim, Ashift, Atranspose); matrix_multiply(npoints, ndim, npoints, Brotated, Atranspose, product); trace1 = trace(npoints, product); matrix_multiply(npoints, ndim, npoints, Bshift, Btranspose, product); trace2 = trace(npoints, product); if (trace2 != 0.0) { *scale = trace1 / trace2; } else { *scale = 0.0; } /* transpose back the rotation matrix */ transpose(ndim, ndim, rotation, rotation); /* Free vectors */ free_vector(Atranslation,1,ndim); free_vector(Btranslation,1,ndim); free_vector(svd_W,1,ndim); /* Free matrices */ free_matrix(Ashift,1,npoints,1,ndim); free_matrix(Bshift,1,npoints,1,ndim); free_matrix(Atranspose,1,ndim,1,npoints); free_matrix(Btranspose,1,ndim,1,npoints); free_matrix(svd_U,1,ndim,1,ndim); free_matrix(svd_V,1,ndim,1,ndim); free_matrix(svd_VT,1,ndim,1,ndim); free_matrix(Brotated,1,npoints,1,ndim); free_matrix(product,1,npoints,1,npoints); }
void build_relations(void) { register int i; register int j; register int k; register short *rulep; register short *rp; register shifts *sp; register int length; register int nedges; register int done; register int state1; register int stateno; register int symbol1; register int symbol2; register short *shortp; register short *edge; register short *states; register short **new_includes; includes = NEW2(ngotos, short *); edge = NEW2(ngotos + 1, short); states = NEW2(maxrhs + 1, short); for (i = 0; i < ngotos; i++) { nedges = 0; state1 = from_state[i]; symbol1 = accessing_symbol[to_state[i]]; for (rulep = derives[symbol1]; *rulep >= 0; rulep++) { length = 1; states[0] = state1; stateno = state1; for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++) { symbol2 = *rp; sp = shift_table[stateno]; k = sp->nshifts; for (j = 0; j < k; j++) { stateno = sp->shift[j]; if (accessing_symbol[stateno] == symbol2) break; } states[length++] = stateno; } add_lookback_edge(stateno, *rulep, i); length--; done = 0; while (!done) { done = 1; rp--; if (ISVAR(*rp)) { stateno = states[--length]; edge[nedges++] = map_goto(stateno, *rp); if (nullable[*rp] && length > 0) done = 0; } } } if (nedges) { includes[i] = shortp = NEW2(nedges + 1, short); for (j = 0; j < nedges; j++) shortp[j] = edge[j]; shortp[nedges] = -1; } } new_includes = transpose(includes, ngotos); for (i = 0; i < ngotos; i++) if (includes[i]) FREE(includes[i]); FREE(includes); includes = new_includes; FREE(edge); FREE(states); }
int main(int argc, char * argv[]) { parse_args(argc, argv); const int size = n * n; int data_size_bytes = size * sizeof(float); float *mat_a = malloc(data_size_bytes); float *mat_b = malloc(data_size_bytes); float *vector; float *output = malloc(data_size_bytes); float *expected = malloc(data_size_bytes); generate_matrix(n, mat_a, range); generate_matrix(n, mat_b, range); timing_t timer; timer_start(&timer); float *mat_b_trans = malloc(data_size_bytes); transpose(n, mat_b, mat_b_trans); for (int i=0; i<n; ++i) { vector = &mat_b_trans[n*i]; MatMatMultiply(n, mat_a, vector, &output[n*i]); } float *output_trans = malloc(data_size_bytes); transpose(n, output, output_trans); timer_stop(&timer); float sum = sum_mat(size, output_trans); printf("%d %f %ld %ld\n", n, sum, timer.realtime, timer.cputime); int status = 0; if (trace == 1) { printf("\nMatrix A\n"); for (int i=0; i<n; i++){ for (int j=0; j<n; j++){ printf("%f " , mat_a[i*n+j]); } printf("\n"); } printf("\nMatrix B \n"); for (int i=0; i<n; i++){ for (int j=0; j<n; j++){ printf("%f " , mat_b[i*n+j]); } printf("\n"); } printf("\n\nResult\n"); for (int i=0; i<n; i++){ for (int j=0; j<n; j++){ printf("%f " , output[i*n+j]); } printf("\n"); } } else if (trace == 2) { multiply_CPU_matrix(n, mat_a, mat_b, expected); int status = check(size, output_trans, expected); if (status) { printf("Test failed.\n"); status = 1; } else printf("Test passed OK!\n"); } free(mat_a); free(mat_b); free(mat_b_trans); free(output); free(expected); free(output_trans); return status; }
/* rank 2, in place, square transpose, tiled, buffered */ static void apply_ip_sq_tiledbuf(const plan *ego_, R *I, R *O) { const P *ego = (const P *) ego_; UNUSED(O); transpose(ego->d, ego->rnk, ego->vl, I, X(transpose_tiledbuf)); }
// this function builds a projection matrix for rendering from the shadow's POV. // First, it computes the appropriate z-range and sets an orthogonal projection. // Then, it translates and scales it, so that it exactly captures the bounding box // of the current frustum slice float CShadowMap::ApplyCropMatrix(frustum &f) { float shad_modelview[16]; float shad_proj[16]; float shad_crop[16]; float shad_mvp[16]; float maxX = -2000.0f; float maxY = -2000.0f; float maxZ; float minX = 2000.0f; float minY = 2000.0f; float minZ; matrix4<float> nv_mvp; vec4f transf; // find the z-range of the current frustum as seen from the light // in order to increase precision glGetFloatv(GL_MODELVIEW_MATRIX, shad_modelview); nv_mvp.set_value(shad_modelview); // note that only the z-component is need and thus // the multiplication can be simplified // transf.z = shad_modelview[2] * f.point[0].x + shad_modelview[6] * f.point[0].y + shad_modelview[10] * f.point[0].z + shad_modelview[14]; transf = nv_mvp*vec4f(f.point[0], 1.0f); minZ = -1000/*transf.z*/; maxZ = 1000/*transf.z*/; for(int i=1; i<8; i++) { transf = nv_mvp*vec4f(f.point[i], 1.0f); if(transf.z > maxZ) maxZ = transf.z; if(transf.z < minZ) minZ = transf.z; } // make sure all relevant shadow casters are included // note that these here are dummy objects at the edges of our scene for(int i=0; i<NUM_OBJECTS; i++) { transf = nv_mvp*vec4f(BSphere[i].center, 1.0f); if(transf.z + BSphere[i].radius > maxZ) maxZ = transf.z + BSphere[i].radius; //if(transf.z - BSphere[i].radius < minZ) minZ = transf.z - BSphere[i].radius; } glMatrixMode(GL_PROJECTION); glLoadIdentity(); // set the projection matrix with the new z-bounds // note the inversion because the light looks at the neg. z axis // gluPerspective(LIGHT_FOV, 1.0, maxZ, minZ); // for point lights glOrtho(-1.0, 1.0, -1.0, 1.0, -maxZ, -minZ); glGetFloatv(GL_PROJECTION_MATRIX, shad_proj); glPushMatrix(); glMultMatrixf(shad_modelview); glGetFloatv(GL_PROJECTION_MATRIX, shad_mvp); glPopMatrix(); // find the extends of the frustum slice as projected in light's homogeneous coordinates nv_mvp.set_value(shad_mvp); for(int i=0; i<8; i++) { transf = nv_mvp*vec4f(f.point[i], 1.0f); transf.x /= transf.w; transf.y /= transf.w; if(transf.x > maxX) maxX = transf.x; if(transf.x < minX) minX = transf.x; if(transf.y > maxY) maxY = transf.y; if(transf.y < minY) minY = transf.y; } float scaleX = 2.0f/(maxX - minX); float scaleY = 2.0f/(maxY - minY); float offsetX = -0.5f*(maxX + minX)*scaleX; float offsetY = -0.5f*(maxY + minY)*scaleY; // apply a crop matrix to modify the projection matrix we got from glOrtho. nv_mvp.make_identity(); nv_mvp.element(0,0) = scaleX; nv_mvp.element(1,1) = scaleY; nv_mvp.element(0,3) = offsetX; nv_mvp.element(1,3) = offsetY; transpose(nv_mvp); nv_mvp.get_value(shad_crop); glLoadMatrixf(shad_crop); glMultMatrixf(shad_proj); return minZ; }