void DiscreteProblemFormAssembler<Scalar>::assemble_matrix_form(MatrixForm<double, Scalar>* form, int order, Func<double>** base_fns, Func<double>** test_fns, Func<Scalar>** ext, Func<Scalar>** u_ext,
      AsmList<Scalar>* current_als_i, AsmList<Scalar>* current_als_j, Traverse::State* current_state, int n_quadrature_points, Geom<double>* geometry, double* jacobian_x_weights)
    {
      bool surface_form = (dynamic_cast<MatrixFormVol<Scalar>*>(form) == nullptr);

      double block_scaling_coefficient = this->block_scaling_coeff(form);

      bool tra = (form->i != form->j) && (form->sym != 0);
      bool sym = (form->i == form->j) && (form->sym == 1);

      // Assemble the local stiffness matrix for the form form.
      Scalar **local_stiffness_matrix = new_matrix<Scalar>(std::max(current_als_i->cnt, current_als_j->cnt));

      Func<Scalar>** local_ext = ext;
      // If the user supplied custom ext functions for this form.
      if(form->ext.size() > 0)
      {
        int local_ext_count = form->ext.size();
        local_ext = malloc_with_check(local_ext_count, this);
        for(int ext_i = 0; ext_i < local_ext_count; ext_i++)
          if(form->ext[ext_i])
            local_ext[ext_i] = current_state->e[ext_i] == nullptr ? nullptr : init_fn(form->ext[ext_i].get(), order);
          else
            local_ext[ext_i] = nullptr;
      }

      // Account for the previous time level solution previously inserted at the back of ext.
      if(rungeKutta)
        u_ext += form->u_ext_offset;

      // Actual form-specific calculation.
      for (unsigned int i = 0; i < current_als_i->cnt; i++)
      {
        if(current_als_i->dof[i] < 0)
          continue;

        if((!tra || surface_form) && current_als_i->dof[i] < 0)
          continue;
        if(std::abs(current_als_i->coef[i]) < Hermes::HermesSqrtEpsilon)
          continue;
        if(!sym)
        {
          for (unsigned int j = 0; j < current_als_j->cnt; j++)
          {
            if(current_als_j->dof[j] >= 0)
            {
              // Is this necessary, i.e. is there a coefficient smaller than Hermes::HermesSqrtEpsilon?
              if(std::abs(current_als_j->coef[j]) < Hermes::HermesSqrtEpsilon)
                continue;

              Func<double>* u = base_fns[j];
              Func<double>* v = test_fns[i];

              if(surface_form)
                local_stiffness_matrix[i][j] = 0.5 * block_scaling_coefficient * form->value(n_quadrature_points, jacobian_x_weights, u_ext, u, v, geometry, local_ext) * form->scaling_factor * current_als_j->coef[j] * current_als_i->coef[i];
              else
                local_stiffness_matrix[i][j] = block_scaling_coefficient * form->value(n_quadrature_points, jacobian_x_weights, u_ext, u, v, geometry, local_ext) * form->scaling_factor * current_als_j->coef[j] * current_als_i->coef[i];
            }
          }
        }
        // Symmetric block.
        else
        {
          for (unsigned int j = 0; j < current_als_j->cnt; j++)
          {
            if(j < i && current_als_j->dof[j] >= 0)
              continue;
            if(current_als_j->dof[j] >= 0)
            {
              // Is this necessary, i.e. is there a coefficient smaller than Hermes::HermesSqrtEpsilon?
              if(std::abs(current_als_j->coef[j]) < Hermes::HermesSqrtEpsilon)
                continue;

              Func<double>* u = base_fns[j];
              Func<double>* v = test_fns[i];

              Scalar val = block_scaling_coefficient * form->value(n_quadrature_points, jacobian_x_weights, u_ext, u, v, geometry, local_ext) * form->scaling_factor * current_als_j->coef[j] * current_als_i->coef[i];

              local_stiffness_matrix[i][j] = local_stiffness_matrix[j][i] = val;
            }
          }
        }
      }

      // Insert the local stiffness matrix into the global one.
      current_mat->add(current_als_i->cnt, current_als_j->cnt, local_stiffness_matrix, current_als_i->dof, current_als_j->dof);

      // Insert also the off-diagonal (anti-)symmetric block, if required.
      if(tra)
      {
        if(form->sym < 0)
          chsgn(local_stiffness_matrix, current_als_i->cnt, current_als_j->cnt);
        transpose(local_stiffness_matrix, current_als_i->cnt, current_als_j->cnt);

        current_mat->add(current_als_j->cnt, current_als_i->cnt, local_stiffness_matrix, current_als_j->dof, current_als_i->dof);
      }

      if(form->ext.size() > 0)
      {
        for(int ext_i = 0; ext_i < form->ext.size(); ext_i++)
          if(form->ext[ext_i])
          {
            local_ext[ext_i]->free_fn();
            delete local_ext[ext_i];
          }
          free_with_check(local_ext);
      }

      if(rungeKutta)
        u_ext -= form->u_ext_offset;

      // Cleanup.
      free_with_check(local_stiffness_matrix);
    }
Beispiel #2
0
void Bspline3DSetBase::setLattice(const CrystalLattice<RealType,DIM>& lat)
{
  Lattice.set(lat);
  UnitLattice.set(lat);
  GGt=dot(Lattice.G,transpose(Lattice.G));
}
Beispiel #3
0
    void on_draw() override
    {
        glfwMakeContextCurrent(window);
        
        glEnable(GL_CULL_FACE);
        glEnable(GL_DEPTH_TEST);

        int width, height;
        glfwGetWindowSize(window, &width, &height);
        glViewport(0, 0, width, height);
     
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
        glClearColor(0.1f, 0.1f, 0.5f, 1.0f);

        const auto proj = camera.get_projection_matrix((float) width / (float) height);
        const float4x4 view = camera.get_view_matrix();
        const float4x4 viewProj = mul(proj, view);
        
        skydome.render(viewProj, camera.get_eye_point(), camera.farClip);

        // Simple Shader
        {
            simpleShader->bind();
            
            simpleShader->uniform("u_viewProj", viewProj);
            simpleShader->uniform("u_eye", camera.get_eye_point());
            
            simpleShader->uniform("u_emissive", float3(.10f, 0.10f, 0.10f));
            simpleShader->uniform("u_diffuse", float3(0.4f, 0.4f, 0.4f));
            
            for (int i = 0; i < lights.size(); i++)
            {
                auto light = lights[i];
                
                simpleShader->uniform("u_lights[" + std::to_string(i) + "].position", light.pose.position);
                simpleShader->uniform("u_lights[" + std::to_string(i) + "].color", light.color);
            }
            
            for (const auto & model : proceduralModels)
            {
                simpleShader->uniform("u_modelMatrix", model.get_model());
                simpleShader->uniform("u_modelMatrixIT", inv(transpose(model.get_model())));
                model.draw();
            }
            
            for (const auto & model : cameraPositions)
            {
                simpleShader->uniform("u_modelMatrix", model.get_model());
                simpleShader->uniform("u_modelMatrixIT", inv(transpose(model.get_model())));
                model.draw();
            }
            gl_check_error(__FILE__, __LINE__);
            
            simpleShader->unbind();
        }
        
        grid.render(proj, view);

        gl_check_error(__FILE__, __LINE__);
        
        glfwSwapBuffers(window);
        
        frameCount++;
    }
Beispiel #4
0
Angel::mat4 RotMat::inverse( void ) const {

  return transpose( _mat );

}
Beispiel #5
0
/***********************************************************************//**
 * @brief Test matrix functions
 *
 * Tests matrix functions.
 ***************************************************************************/
void TestGSymMatrix::matrix_functions(void)
{
    // Minimum
	double min = m_test.min();

    // Check mimimum
    double value = g_matrix[0];
    for (int row = 0; row < g_rows; ++row) {
        for (int col = 0; col < g_cols; ++col) {
            if (g_matrix[col+row*g_cols] < value) {
                value = g_matrix[col+row*g_cols];
            }
        }
    }
    test_value(min, value, 0.0, "Test minimum function");

    // Maximum
	double max = m_test.max();

    // Check maximum
    value = g_matrix[0];
    for (int row = 0; row < g_rows; ++row) {
        for (int col = 0; col < g_cols; ++col) {
            if (g_matrix[col+row*g_cols] > value) {
                value = g_matrix[col+row*g_cols];
            }
        }
    }
    test_value(max, value, 0.0, "Test maximum function");

	// Sum
	double sum = m_test.sum();

    // Check sum
    value = 0.0;
    for (int row = 0; row < g_rows; ++row) {
        for (int col = 0; col < g_cols; ++col) {
            value += g_matrix[col+row*g_cols];
        }
    }
    test_value(sum, value, 1.0e-20, "Test sum function");

    // Transpose function
	GSymMatrix test1 = transpose(m_test);
    test_assert(check_matrix(m_test), "Test source matrix");
    test_assert(check_matrix(test1, 1.0, 0.0),
                "Test transpose(GSymMatrix) function",
                "Unexpected transposed matrix:\n"+test1.print());

    // Transpose method
	test1 = m_test;
	test1.transpose();
    test_assert(check_matrix(m_test), "Test source matrix");
    test_assert(check_matrix(test1, 1.0, 0.0), 
                "Test GSymMatrix.transpose() method",
                "Unexpected transposed matrix:\n"+test1.print());

    // Convert to general matrix
    GMatrix test2 = GMatrix(m_test);
    test_assert(check_matrix(m_test), "Test source matrix");
    test_assert(check_matrix(test2, 1.0, 0.0), 
                "Test GMatrix(GSymMatrix) constructor",
                "Unexpected GMatrix:\n"+test2.print());

    // Extract lower triangle
    test2 = m_test.extract_lower_triangle();
    test_assert(check_matrix(m_test), "Test source matrix");
    test_assert(check_matrix_lt(test2, 1.0, 0.0), 
                "Test GSymMatrix.extract_lower_triangle() method",
                "Unexpected GMatrix:\n"+test2.print());

    // Extract upper triangle
    test2 = m_test.extract_upper_triangle();
    test_assert(check_matrix(m_test), "Test source matrix");
    test_assert(check_matrix_ut(test2, 1.0, 0.0), 
                "Test GSymMatrix.extract_upper_triangle() method",
                "Unexpected GMatrix:\n"+test2.print());

    // Return
    return;
}
Beispiel #6
0
 array array::H() const
 {
     return transpose(*this, true);
 }
Beispiel #7
0
int *adat_symbolic(int m, int n, int P_per[], int A_ptr[], int A_ind[],
      int S_ptr[])
{     int i, j, t, ii, jj, tt, k, size, len;
      int *S_ind, *AT_ptr, *AT_ind, *ind, *map, *temp;
      /* build the pattern of A', which is a matrix transposed to A, to
         efficiently access A in column-wise manner */
      AT_ptr = xcalloc(1+n+1, sizeof(int));
      AT_ind = xcalloc(A_ptr[m+1], sizeof(int));
      transpose(m, n, A_ptr, A_ind, NULL, AT_ptr, AT_ind, NULL);
      /* allocate the array S_ind */
      size = A_ptr[m+1] - 1;
      if (size < m) size = m;
      S_ind = xcalloc(1+size, sizeof(int));
      /* allocate and initialize working arrays */
      ind = xcalloc(1+m, sizeof(int));
      map = xcalloc(1+m, sizeof(int));
      for (jj = 1; jj <= m; jj++) map[jj] = 0;
      /* compute pattern of S; note that symbolically S = B*B', where
         B = P*A, B' is matrix transposed to B */
      S_ptr[1] = 1;
      for (ii = 1; ii <= m; ii++)
      {  /* compute pattern of ii-th row of S */
         len = 0;
         i = P_per[ii]; /* i-th row of A = ii-th row of B */
         for (t = A_ptr[i]; t < A_ptr[i+1]; t++)
         {  k = A_ind[t];
            /* walk through k-th column of A */
            for (tt = AT_ptr[k]; tt < AT_ptr[k+1]; tt++)
            {  j = AT_ind[tt];
               jj = P_per[m+j]; /* j-th row of A = jj-th row of B */
               /* a[i,k] != 0 and a[j,k] != 0 ergo s[ii,jj] != 0 */
               if (ii < jj && !map[jj]) ind[++len] = jj, map[jj] = 1;
            }
         }
         /* now (ind) is pattern of ii-th row of S */
         S_ptr[ii+1] = S_ptr[ii] + len;
         /* at least (S_ptr[ii+1] - 1) locations should be available in
            the array S_ind */
         if (S_ptr[ii+1] - 1 > size)
         {  temp = S_ind;
            size += size;
            S_ind = xcalloc(1+size, sizeof(int));
            memcpy(&S_ind[1], &temp[1], (S_ptr[ii] - 1) * sizeof(int));
            xfree(temp);
         }
         xassert(S_ptr[ii+1] - 1 <= size);
         /* (ii-th row of S) := (ind) */
         memcpy(&S_ind[S_ptr[ii]], &ind[1], len * sizeof(int));
         /* clear the row pattern map */
         for (t = 1; t <= len; t++) map[ind[t]] = 0;
      }
      /* free working arrays */
      xfree(AT_ptr);
      xfree(AT_ind);
      xfree(ind);
      xfree(map);
      /* reallocate the array S_ind to free unused locations */
      temp = S_ind;
      size = S_ptr[m+1] - 1;
      S_ind = xcalloc(1+size, sizeof(int));
      memcpy(&S_ind[1], &temp[1], size * sizeof(int));
      xfree(temp);
      return S_ind;
}
    Disposable<std::vector<Size> > qrDecomposition(const Matrix& M,
                                                   Matrix& q, Matrix& r,
                                                   bool pivot) {
        Matrix mT = transpose(M);
        const Size m = M.rows();
        const Size n = M.columns();

        boost::scoped_array<int> lipvt(new int[n]);
        boost::scoped_array<Real> rdiag(new Real[n]);
        boost::scoped_array<Real> wa(new Real[n]);

        MINPACK::qrfac(m, n, mT.begin(), 0, (pivot)?1:0,
                       lipvt.get(), n, rdiag.get(), rdiag.get(), wa.get());
        if (r.columns() != n || r.rows() !=n)
            r = Matrix(n, n);

        for (Size i=0; i < n; ++i) {
            std::fill(r.row_begin(i), r.row_begin(i)+i, 0.0);
            r[i][i] = rdiag[i];
            if (i < m) {
                std::copy(mT.column_begin(i)+i+1, mT.column_end(i),
                          r.row_begin(i)+i+1);
            }
            else {
                std::fill(r.row_begin(i)+i+1, r.row_end(i), 0.0);
            }
        }

        if (q.rows() != m || q.columns() != n)
            q = Matrix(m, n);

        if (m > n) {
            std::fill(q.begin(), q.end(), 0.0);

            Integer u = std::min(n,m);
            for (Size i=0; i < u; ++i)
                q[i][i] = 1.0;

            Array v(m);
            for (Integer i=u-1; i >=0; --i) {
                if (std::fabs(mT[i][i]) > QL_EPSILON) {
                    const Real tau = 1.0/mT[i][i];

                    std::fill(v.begin(), v.begin()+i, 0.0);
                    std::copy(mT.row_begin(i)+i, mT.row_end(i), v.begin()+i);

                    Array w(n, 0.0);
                    for (Size l=0; l < n; ++l)
                        w[l] += std::inner_product(
                            v.begin()+i, v.end(), q.column_begin(l)+i, 0.0);

                    for (Size k=i; k < m; ++k) {
                        const Real a = tau*v[k];
                        for (Size l=0; l < n; ++l)
                            q[k][l] -= a*w[l];
                    }
                }
            }
        }
        else {
            Array w(m);
            for (Size k=0; k < m; ++k) {
                std::fill(w.begin(), w.end(), 0.0);
                w[k] = 1.0;

                for (Size j=0; j < std::min(n, m); ++j) {
                    const Real t3 = mT[j][j];
                    if (t3 != 0.0) {
                        const Real t
                            = std::inner_product(mT.row_begin(j)+j, mT.row_end(j),
                                                 w.begin()+j, 0.0)/t3;
                        for (Size i=j; i<m; ++i) {
                            w[i]-=mT[j][i]*t;
                        }
                    }
                    q[k][j] = w[j];
                }
                std::fill(q.row_begin(k) + std::min(n, m), q.row_end(k), 0.0);
            }
        }

        std::vector<Size> ipvt(n);

        if (pivot) {
            std::copy(lipvt.get(), lipvt.get()+n, ipvt.begin());
        }
        else {
            for (Size i=0; i < n; ++i)
                ipvt[i] = i;
        }

        return ipvt;
    }
Beispiel #9
0
void Matrix::transpose()
{
    transpose(this);
}
void LocalLinearLeastSquaresExtrapolator::extrapolateElement(
    std::size_t const element_index,
    const unsigned num_components,
    ExtrapolatableElementCollection const& extrapolatables,
    const double t,
    GlobalVector const& current_solution,
    LocalToGlobalIndexMap const& dof_table,
    GlobalVector& counts)
{
    auto const& integration_point_values =
        extrapolatables.getIntegrationPointValues(
            element_index, t, current_solution, dof_table,
            _integration_point_values_cache);

    auto const& N_0 = extrapolatables.getShapeMatrix(element_index, 0);
    auto const num_nodes = static_cast<unsigned>(N_0.cols());
    auto const num_values =
        static_cast<unsigned>(integration_point_values.size());

    if (num_values % num_components != 0)
        OGS_FATAL(
            "The number of computed integration point values is not divisable "
            "by the number of num_components. Maybe the computed property is "
            "not a %d-component vector for each integration point.",
            num_components);

    // number of integration points in the element
    const auto num_int_pts = num_values / num_components;

    if (num_int_pts < num_nodes)
        OGS_FATAL(
            "Least squares is not possible if there are more nodes than"
            "integration points.");

    auto const pair_it_inserted = _qr_decomposition_cache.emplace(
        std::make_pair(num_nodes, num_int_pts), CachedData{});

    auto& cached_data = pair_it_inserted.first->second;
    if (pair_it_inserted.second)
    {
        DBUG("Computing new singular value decomposition");

        // interpolation_matrix * nodal_values = integration_point_values
        // We are going to pseudo-invert this relation now using singular value
        // decomposition.
        auto& interpolation_matrix = cached_data.A;
        interpolation_matrix.resize(num_int_pts, num_nodes);

        interpolation_matrix.row(0) = N_0;
        for (unsigned int_pt = 1; int_pt < num_int_pts; ++int_pt)
        {
            auto const& shp_mat =
                extrapolatables.getShapeMatrix(element_index, int_pt);
            assert(shp_mat.cols() == num_nodes);

            // copy shape matrix to extrapolation matrix row-wise
            interpolation_matrix.row(int_pt) = shp_mat;
        }

        // JacobiSVD is extremely reliable, but fast only for small matrices.
        // But we usually have small matrices and we don't compute very often.
        // Cf.
        // http://eigen.tuxfamily.org/dox/group__TopicLinearAlgebraDecompositions.html
        //
        // Decomposes interpolation_matrix = U S V^T.
        Eigen::JacobiSVD<Eigen::MatrixXd> svd(
            interpolation_matrix, Eigen::ComputeThinU | Eigen::ComputeThinV);

        auto const& S = svd.singularValues();
        auto const& U = svd.matrixU();
        auto const& V = svd.matrixV();

        // Compute and save the pseudo inverse V * S^{-1} * U^T.
        auto const rank = svd.rank();
        assert(rank == num_nodes);

        // cf. http://eigen.tuxfamily.org/dox/JacobiSVD_8h_source.html
        cached_data.A_pinv.noalias() = V.leftCols(rank) *
                                       S.head(rank).asDiagonal().inverse() *
                                       U.leftCols(rank).transpose();
    }
    else if (cached_data.A.row(0) != N_0)
    {
        OGS_FATAL("The cached and the passed shapematrices differ.");
    }

    auto const& global_indices =
        _dof_table_single_component(element_index, 0).rows;

    if (num_components == 1)
    {
        auto const integration_point_values_vec =
            MathLib::toVector(integration_point_values);

        // Apply the pre-computed pseudo-inverse.
        Eigen::VectorXd const nodal_values =
            cached_data.A_pinv * integration_point_values_vec;

        // TODO does that give rise to PETSc problems? E.g., writing to ghost
        // nodes? Furthermore: Is ghost nodes communication necessary for PETSc?
        _nodal_values->add(global_indices, nodal_values);
        counts.add(global_indices,
                   std::vector<double>(global_indices.size(), 1.0));
    }
    else
    {
        auto const integration_point_values_mat = MathLib::toMatrix(
            integration_point_values, num_components, num_int_pts);

        // Apply the pre-computed pseudo-inverse.
        Eigen::MatrixXd const nodal_values =
            cached_data.A_pinv * integration_point_values_mat.transpose();

        std::vector<GlobalIndexType> indices;
        indices.reserve(num_components * global_indices.size());

        // _nodal_values is ordered location-wise
        for (unsigned comp = 0; comp < num_components; ++comp)
        {
            for (auto i : global_indices)
            {
                indices.push_back(num_components * i + comp);
            }
        }

        // Nodal_values are passed as a raw pointer, because PETScVector and
        // EigenVector implementations differ slightly.
        _nodal_values->add(indices, nodal_values.data());
        counts.add(indices, std::vector<double>(indices.size(), 1.0));
    }
}
Beispiel #11
0
    void on_draw() override
    {
        glfwMakeContextCurrent(window);

        if (igm) igm->begin_frame();

        int width, height;
        glfwGetWindowSize(window, &width, &height);
        glViewport(0, 0, width, height);

        glEnable(GL_DEPTH_TEST);
        glEnable(GL_CULL_FACE);
        glDepthMask(GL_TRUE);

        float windowAspectRatio = (float) width / (float) height;

        const auto proj = camera.get_projection_matrix(windowAspectRatio);
        const float4x4 view = camera.get_view_matrix();
        const float4x4 viewProj = mul(proj, view);

        glClearColor(0.1f, 0.1f, 0.1f, 1.0f);
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

        skydome.render(viewProj, camera.get_eye_point(), camera.farClip);

        float3 target = camera.pose.position;

        ////////////////////////////////////////////
        // Directional Light Shadowmap Pass (sun) //
        ////////////////////////////////////////////
        {
            shadowFramebuffer.bind_to_draw();
            shadowmapShader->bind();

            glClear(GL_DEPTH_BUFFER_BIT);
            glViewport(0, 0, shadowmapResolution, shadowmapResolution);

            shadowmapShader->uniform("u_lightViewProj", sunLight->get_view_proj_matrix(target));

            for (auto & object : sceneObjects)
            {
                if (object->castsShadow)
                {
                    shadowmapShader->uniform("u_modelMatrix", object->get_model());
                    object->draw();
                }
            }

            shadowmapShader->unbind();
            shadowFramebuffer.unbind();
        }


        ///////////////////////////////
        // Spot Light Shadowmap Pass //
        ///////////////////////////////
        {

            for (int i = 0; i < spotLightFramebuffers.size(); ++i)
            {
                spotLightFramebuffers[i]->shadowFramebuffer.bind_to_draw();
                shadowmapShader->bind();

                glClear(GL_DEPTH_BUFFER_BIT);
                glViewport(0, 0, shadowmapResolution, shadowmapResolution);

                shadowmapShader->uniform("u_lightViewProj", spotLights[0]->get_view_proj_matrix()); // only take the first into account for debugging

                for (auto & object : sceneObjects)
                {
                    if (object->castsShadow)
                    {
                        shadowmapShader->uniform("u_modelMatrix", object->get_model());
                        object->draw();
                    }
                }

                shadowmapShader->unbind();
                spotLightFramebuffers[i]->shadowFramebuffer.unbind();
            }

        }

        ////////////////////////////////
        // Point Light Shadowmap Pass //
        ////////////////////////////////
        {
            glViewport(0, 0, shadowmapResolution, shadowmapResolution);

            for (int i = 0; i < 6; ++i)
            {
                pointLightFramebuffer->bind(pointLightFramebuffer->faces[i].face);

                pointLightShader->bind();

                glClear(GL_DEPTH_BUFFER_BIT);

                pointLightFramebuffer->faces[i].faceCamera.set_position(pointLight->position); // set position from light data to camera for shadow fbo
                auto viewProj = mul(pointLightFramebuffer->get_projection(), pointLightFramebuffer->faces[i].faceCamera.get_view_matrix());

                pointLightShader->uniform("u_lightWorldPosition", pointLight->position);
                pointLightShader->uniform("u_lightViewProj", viewProj);

                for (auto & object : sceneObjects)
                {
                    if (object->castsShadow)
                    {
                        pointLightShader->uniform("u_modelMatrix", object->get_model());
                        object->draw();
                    }
                }

                pointLightShader->unbind();
                pointLightFramebuffer->unbind();
            }
        }

        // Blur applied to the directional light shadowmap only (others later)
        {
            shadowBlurFramebuffer.bind_to_draw();
            glDrawBuffer(GL_COLOR_ATTACHMENT0);

            gaussianBlurShader->bind();

            // Configured for a 7x7
            gaussianBlurShader->uniform("blurSize", 1.0f / shadowmapResolution);
            gaussianBlurShader->uniform("sigma", blurSigma);
            gaussianBlurShader->uniform("u_modelViewProj", Identity4x4);

            // Horizontal
            gaussianBlurShader->texture("s_blurTexure", 0, shadowDepthTexture);
            gaussianBlurShader->uniform("numBlurPixelsPerSide", 3.0f);
            gaussianBlurShader->uniform("blurMultiplyVec", float2(1.0f, 0.0f));
            fullscreen_post_quad.draw_elements();

            // Vertical
            gaussianBlurShader->texture("s_blurTexure", 0, shadowBlurTexture);
            gaussianBlurShader->uniform("numBlurPixelsPerSide", 3.0f);
            gaussianBlurShader->uniform("blurMultiplyVec", float2(0.0f, 1.0f));
            fullscreen_post_quad.draw_elements();

            gaussianBlurShader->unbind();

            shadowBlurFramebuffer.unbind();
        }

        {
            glViewport(0, 0, width, height);
            sceneShader->bind();

            sceneShader->uniform("u_viewProj", viewProj);
            sceneShader->uniform("u_eye", camera.get_eye_point());
            sceneShader->uniform("u_directionalLight.color", sunLight->color);
            sceneShader->uniform("u_directionalLight.direction", sunLight->direction);
            sceneShader->uniform("u_dirLightViewProjectionMat", sunLight->get_view_proj_matrix(target));

            int samplerIndex = 0;
            sceneShader->uniform("u_shadowMapBias", 0.01f / shadowmapResolution); // fixme
            sceneShader->uniform("u_shadowMapTexelSize", float2(1.0f / shadowmapResolution));
            sceneShader->texture("s_directionalShadowMap", samplerIndex++, shadowBlurTexture);

            sceneShader->uniform("u_spotLightViewProjectionMat[0]", spotLights[0]->get_view_proj_matrix());

            sceneShader->uniform("u_spotLights[0].color", spotLights[0]->color);
            sceneShader->uniform("u_spotLights[0].direction", spotLights[0]->direction);
            sceneShader->uniform("u_spotLights[0].position", spotLights[0]->position);
            sceneShader->uniform("u_spotLights[0].cutoff", spotLights[0]->get_cutoff());
            sceneShader->uniform("u_spotLights[0].constantAtten", spotLights[0]->attenuation.x);
            sceneShader->uniform("u_spotLights[0].linearAtten", spotLights[0]->attenuation.y);
            sceneShader->uniform("u_spotLights[0].quadraticAtten", spotLights[0]->attenuation.z);

            sceneShader->uniform("u_pointLights[0].color", pointLight->color);
            sceneShader->uniform("u_pointLights[0].position", pointLight->position);
            sceneShader->uniform("u_pointLights[0].constantAtten", pointLight->attenuation.x);
            sceneShader->uniform("u_pointLights[0].linearAtten", pointLight->attenuation.y);
            sceneShader->uniform("u_pointLights[0].quadraticAtten", pointLight->attenuation.z);

            // Update the spotlight 2D sampler
            for (int i = 0; i < spotLightFramebuffers.size(); ++i)
            {
                auto & fbo = spotLightFramebuffers[i];
                std::string uniformLocation = "s_spotLightShadowMap[" + std::to_string(i) + "]";
                sceneShader->texture(uniformLocation.c_str(), samplerIndex + i, fbo->shadowDepthTexture);
            }

            // Update the pointlight cube sampler
            for (int i = 0; i < 6; i++)
                sceneShader->texture("s_pointLightCubemap[0]", 2 + i, pointLightFramebuffer->cubeMapHandle, GL_TEXTURE_CUBE_MAP);

            for (auto & object : sceneObjects)
            {
                sceneShader->uniform("u_modelMatrix", object->get_model());
                sceneShader->uniform("u_modelMatrixIT", inv(transpose(object->get_model())));
                object->draw();
                gl_check_error(__FILE__, __LINE__);
            }

            sceneShader->unbind();
        }

        {
            ImGui::Separator();
            ImGui::SliderFloat("Near Clip", &camera.nearClip, 0.1f, 2.0f);
            ImGui::SliderFloat("Far Clip", &camera.farClip, 2.0f, 75.0f);
            ImGui::DragFloat3("Light Direction", &sunLight->direction[0], 0.1f, -1.0f, 1.0f);
            ImGui::Separator();
            ImGui::SliderFloat("Blur Sigma", &blurSigma, 0.05f, 9.0f);
            ImGui::Separator();
            ImGui::Text("Application average %.3f ms/frame (%.1f FPS)", 1000.0f / ImGui::GetIO().Framerate, ImGui::GetIO().Framerate);
        }

        viewA->draw(uiSurface.children[0]->bounds, int2(width, height));
        viewB->draw(uiSurface.children[1]->bounds, int2(width, height));
        viewC->draw(uiSurface.children[2]->bounds, int2(width, height));
        viewD->draw(uiSurface.children[3]->bounds, int2(width, height));

        gl_check_error(__FILE__, __LINE__);

        if (igm) igm->end_frame();

        glfwSwapBuffers(window);
    }
void varinit(void)
{
	int i;
	/*
	 * Resetting all flags
	 */
	Intr1_Cnt=0;
	Intr2_Cnt=0;
	IRQ1Flag = 1;
	IRQ2Flag = 1;

	WSZ = 34;
	TA_cnt =0;


	count = 0;
	qcnt = 0;

	velcnt = 0;
	rtime = 0.0;
	rcnt = 0;
	cnt_10ms = 0;




	latm = MasterLat;
	longm = MasterLon;

	epsilon = 0.0;
	four_delt = 4.0 * del_t;
	eight_delt = 8.0 * del_t;
	cdr_delt = cdr * del_t;
	cdr_delt_ms = cdr_delt / 3600;

	for(i=0;i<32;i++){
		Array_SA[i] = 0;
	}

	for(i=0;i<3;i++)
	{
		velo_ref_y[i] = 0.0;
		velo_ref_yold[i] = 0.0;;
		velo_ref_x[i] = 0.0;
		velo_ref_xold[i] = 0.0;

		pure_vel[i] = 0.0;

		p_velo_20ms[i] = 0.0;
		p_velo[i] = 0.0;

		pure_v_old[i] = 0.0;
		p_Ang[i] = 0.0;

		pure_gyro_drift[i] = 0.0;
		pure_acc_residu[i] = 0.0;

	}

#if 0

	/* these are known misalignment angles between M and S -
	 * Measured w.r.t Master to give DCM from slave to Master.
	 * Beware they are not between slave to NED */
	known_si    =  0.0 * cdr;
	known_theta =  0.0 * cdr;
	known_phi   =  0.0 * cdr;

	euler2dcm_stp(0, 0, 0, (double*)CSkew_est);
	transpose(3, 3, (double*)CSkew_est, (double*)CSkew_est_T);

	euler2dcm_stp(known_si, known_theta, known_phi, (double*)CS2M_K);
	transpose(3, 3, (double*)CS2M_K, (double*)CM2S_K);

	euler2dcm_stp(THDG, PITCH, ROLL, (double*)Cb2ned_M);
	matmul(3, 3, (double*)Cb2ned_M, 3, 3, (double*)CS2M_K, (double*)Cb2ned_S);

	if(ta_flag==1 && nav_flag==1)

	{
		dcm2quat((double*)Cb2ned_S, (double *)p_q_body2ned);

	}

	else if(ta_flag ==0 && level_flag==1)
#endif
	{

		euler2quat_spt(mdl_si,mdl_phi,mdl_theta,(double *)p_q_body2ned);


		p_si = mdl_si;
		p_phi = mdl_phi;
		p_theta = mdl_theta;



	}

	ned2ecef_q(latm, longm,(double*) q_ned2ecef);
	quat_mult((double*)q_ned2ecef,(double*)p_q_body2ned, (double*)p_q_body2ecef);


	/*
	 * Modification after Manjit discussion
	 */
	quat2dcm((double *)p_q_body2ecef,(double*)p_dcm);


	quat2dcm((double *)q_ned2ecef,(double*)p_dcm_n);
	matmul(3,3, (double*)p_dcm_n,3,1,(double*)MasterVel,(double*)pure_vel);




	pure_v_old[0] = pure_vel[0];
	pure_v_old[1] = pure_vel[1];
	pure_v_old[2] = pure_vel[2];

	init(0.0, 0.0, 0.0, p_velo_20ms);

	init(0.0, 0.0, 0.0, p_velo);


	init(0.0,0.0,0.0,pure_gyro_drift);
	init(0.0,0.0,0.0,pure_acc_residu);



	for (i = 0; i < 3; i++)
	{
		p_alp1[i] = 0.0;    p_alp2[i] = 0.0;    p_alp3[i] = 0.0;    p_alp4[i] = 0.0;

	}

	for (i = 0; i < 3; i++)
		Delta_Angle[i] = 0.0;

	for (i = 0; i < 6; i++)
		accum1[i] = 0.0;

	init(0.0, 0.0, earth_rate, omega);	 //earth rate vector ECEF

	//used in levelling
	Ned_omega[0] = earth_rate * cos(latm);
	Ned_omega[1] = 0.0;
	Ned_omega[2] = -earth_rate *sin(latm);

	for (i = 0; i < 3; i++)
		omg_dub[i] = 2.0 * omega[i];

	r_init = r0 * (1.0 - eccen * (sin(latm) * sin(latm)));


	pure_R = r_init + MasterAlt; // altitude;


	lla2ecef(latm,longm,MasterAlt,(double *)pure_ecef_pos); //input is geodetic



	pure_g_ecef();

	/****  for epsilon estimation   ****/

	init(0.0, 0.0, -pure_g_ecef_mag, Ned_gravity_detic);

}								 //end of varinit()
Beispiel #13
0
void svd_gpu(int m, int n, double* A,double * sigma, double * U, double* V){
  /**********************************************
   * Description: Function call that performs an SVD on the matrix A.
   *
   *  Author: Steven Delong
   * 
   * inputs:
   *         A - m x n matrix, stored in column-major order.
   *	     m - number of rows of A
   *	     n - number of columns of A
   *
   * Outputs:
   *        Uout - left singular vectors of A
   *        Vout - right singular vectors of A
   *        sigma - singular values of A, length min(m,n)
   *
   ***********************************************************/
  //declare variables used for timing.
  timestamp_type time1, time2;
  double elapsed, flops;

  // figure out minimum dimension
  int mn = (m >= n) ? n : m;
  int len_beta = (m >= n) ? n-1 : m;

  // sizes to double for flop calcs
  double dmn = (double) mn;
  double dm = (double) m;
  double dn = (double) n;

  // allocate space for intermediate values
  double * AT = (double *) malloc(sizeof(double) * m *n);
  if(!AT) { fprintf(stderr,"in main: failed to allocate AT\n"); abort();}
  double * alpha = (double *) malloc(sizeof(double) *mn);
  if(!alpha) { fprintf(stderr,"in main: failed to allocate alpha\n"); abort();}

  double *beta = (double *) malloc(sizeof(double) *len_beta);
  if(!beta) { fprintf(stderr,"in main: failed to allocate beta\n"); abort();}
    

  double *X = (double *) malloc(sizeof(double) *mn*(len_beta + 1));
  if(!X) { fprintf(stderr,"in main: failed to allocate X\n"); abort();}
  double *Y = (double *) malloc(sizeof(double) *mn*mn);
  if(!Y) { fprintf(stderr,"in main: failed to allocate Y\n"); abort();}


  // bidiagonalize
  bidiag_par(m,n,A,alpha,beta);

  //transpose householder reflectors for applying to X later
  transpose(m,n,A,AT);
   
  //get singular values
  GetSingularValues_Parallel( mn , alpha, beta, sigma);


  // calculate right singular vectors
  CalcRightSingularVectors(mn,len_beta + 1,alpha,beta,sigma,X);
	
  // get left singular vectors
  RighttoLeftSingularVectors(mn,len_beta + 1,alpha,beta,sigma,X,Y);


  // apply householder reflectors to X,Y to get U,V
#pragma omp parallel for default(shared) firstprivate(A)
  for(int i = 0; i < mn; i++){
      multU(m,n,i,A,Y,U + i*m);
      multV(m,n,i,AT,X,V + i*n);
  }


  // free malloc'd intermediates 
  free(AT);
  free(alpha);
  free(beta);
  free(X);
  free(Y);

}
void kernel(mat_ZZ_p& X, const mat_ZZ_p& A)
{
   long m = A.NumRows();
   long n = A.NumCols();

   mat_ZZ_p M;
   long r;

   transpose(M, A);
   r = gauss(M);

   X.SetDims(m-r, m);

   long i, j, k, s;
   ZZ t1, t2;

   ZZ_p T3;

   vec_long D;
   D.SetLength(m);
   for (j = 0; j < m; j++) D[j] = -1;

   vec_ZZ_p inverses;
   inverses.SetLength(m);

   j = -1;
   for (i = 0; i < r; i++) {
      do {
         j++;
      } while (IsZero(M[i][j]));

      D[j] = i;
      inv(inverses[j], M[i][j]); 
   }

   for (k = 0; k < m-r; k++) {
      vec_ZZ_p& v = X[k];
      long pos = 0;
      for (j = m-1; j >= 0; j--) {
         if (D[j] == -1) {
            if (pos == k)
               set(v[j]);
            else
               clear(v[j]);
            pos++;
         }
         else {
            i = D[j];

            clear(t1);

            for (s = j+1; s < m; s++) {
               mul(t2, rep(v[s]), rep(M[i][s]));
               add(t1, t1, t2);
            }

            conv(T3, t1);
            mul(T3, T3, inverses[j]);
            negate(v[j], T3);
         }
      }
   }
}
Beispiel #15
0
void ONNXImporter::populateNet(Net dstNet)
{
    CV_Assert(model_proto.has_graph());
    opencv_onnx::GraphProto graph_proto = model_proto.graph();
    std::map<std::string, Mat> constBlobs = getGraphTensors(graph_proto);
    // List of internal blobs shapes.
    std::map<std::string, MatShape> outShapes;
    // Add all the inputs shapes. It includes as constant blobs as network's inputs shapes.
    for (int i = 0; i < graph_proto.input_size(); ++i)
    {
        opencv_onnx::ValueInfoProto valueInfoProto = graph_proto.input(i);
        CV_Assert(valueInfoProto.has_type());
        opencv_onnx::TypeProto typeProto = valueInfoProto.type();
        CV_Assert(typeProto.has_tensor_type());
        opencv_onnx::TypeProto::Tensor tensor = typeProto.tensor_type();
        CV_Assert(tensor.has_shape());
        opencv_onnx::TensorShapeProto tensorShape = tensor.shape();

        MatShape inpShape(tensorShape.dim_size());
        for (int j = 0; j < inpShape.size(); ++j)
        {
            inpShape[j] = tensorShape.dim(j).dim_value();
        }
        outShapes[valueInfoProto.name()] = inpShape;
    }

    std::string framework_name;
    if (model_proto.has_producer_name()) {
        framework_name = model_proto.producer_name();
    }

    // create map with network inputs (without const blobs)
    std::map<std::string, LayerInfo> layer_id;
    std::map<std::string, LayerInfo>::iterator layerId;
    std::map<std::string, MatShape>::iterator shapeIt;
    // fill map: push layer name, layer id and output id
    std::vector<String> netInputs;
    for (int j = 0; j < graph_proto.input_size(); j++)
    {
        const std::string& name = graph_proto.input(j).name();
        if (constBlobs.find(name) == constBlobs.end()) {
            netInputs.push_back(name);
            layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1)));
        }
    }
    dstNet.setInputsNames(netInputs);

    int layersSize = graph_proto.node_size();
    LayerParams layerParams;
    opencv_onnx::NodeProto node_proto;

    for(int li = 0; li < layersSize; li++)
    {
        node_proto = graph_proto.node(li);
        layerParams = getLayerParams(node_proto);
        CV_Assert(node_proto.output_size() >= 1);
        layerParams.name = node_proto.output(0);

        std::string layer_type = node_proto.op_type();
        layerParams.type = layer_type;


        if (layer_type == "MaxPool")
        {
            layerParams.type = "Pooling";
            layerParams.set("pool", "MAX");
            layerParams.set("ceil_mode", isCeilMode(layerParams));
        }
        else if (layer_type == "AveragePool")
        {
            layerParams.type = "Pooling";
            layerParams.set("pool", "AVE");
            layerParams.set("ceil_mode", isCeilMode(layerParams));
            layerParams.set("ave_pool_padded_area", framework_name == "pytorch");
        }
        else if (layer_type == "GlobalAveragePool")
        {
            layerParams.type = "Pooling";
            layerParams.set("pool", "AVE");
            layerParams.set("global_pooling", true);
        }
        else if (layer_type == "Add" || layer_type == "Sum")
        {
            if (layer_id.find(node_proto.input(1)) == layer_id.end())
            {
                Mat blob = getBlob(node_proto, constBlobs, 1);
                blob = blob.reshape(1, 1);
                if (blob.total() == 1) {
                    layerParams.type = "Power";
                    layerParams.set("shift", blob.at<float>(0));
                }
                else {
                    layerParams.type = "Scale";
                    layerParams.set("bias_term", true);
                    layerParams.blobs.push_back(blob);
                }
            }
            else {
                layerParams.type = "Eltwise";
            }
        }
        else if (layer_type == "Sub")
        {
            Mat blob = getBlob(node_proto, constBlobs, 1);
            if (blob.total() == 1) {
                layerParams.type = "Power";
                layerParams.set("shift", -blob.at<float>(0));
            }
            else {
                layerParams.type = "Scale";
                layerParams.set("has_bias", true);
                layerParams.blobs.push_back(-1.0f * blob.reshape(1, 1));
            }
        }
        else if (layer_type == "Div")
        {
            Mat blob = getBlob(node_proto, constBlobs, 1);
            CV_Assert_N(blob.type() == CV_32F, blob.total());
            if (blob.total() == 1)
            {
                layerParams.set("scale", 1.0f / blob.at<float>(0));
                layerParams.type = "Power";
            }
            else
            {
                layerParams.type = "Scale";
                divide(1.0, blob, blob);
                layerParams.blobs.push_back(blob);
                layerParams.set("bias_term", false);
            }
        }
        else if (layer_type == "Constant")
        {
            CV_Assert(node_proto.input_size() == 0);
            CV_Assert(layerParams.blobs.size() == 1);
            constBlobs.insert(std::make_pair(layerParams.name, layerParams.blobs[0]));
            continue;
        }
        else if (layer_type == "ImageScaler")
        {
            const float scale = layerParams.has("scale") ? layerParams.get<float>("scale") : 1.0f;
            layerParams.erase("scale");

            if (layerParams.has("bias"))
            {
                layerParams.type = "Scale";
                layerParams.blobs.push_back(
                    Mat(Size(1,  layerParams.get("bias").size()), CV_32FC1, scale));

                layerParams.set("bias_term", true);
                Mat bias(1, layerParams.get("bias").size(), CV_32FC1);
                for (int j = 0; j < bias.total(); j++) {
                    bias.at<float>(0, j) = layerParams.get("bias").getRealValue(j);
                }
                layerParams.blobs.push_back(bias);
                layerParams.erase("bias");
            }
            else {
                layerParams.set("scale", scale);
                layerParams.type = "Power";
            }
        }
        else if (layer_type == "LeakyRelu")
        {
            layerParams.type = "ReLU";
            replaceLayerParam(layerParams, "alpha", "negative_slope");
        }
        else if (layer_type == "LRN")
        {
            replaceLayerParam(layerParams, "size", "local_size");
        }
        else if (layer_type == "BatchNormalization")
        {
            if (node_proto.input_size() != 5)
                CV_Error(Error::StsNotImplemented,
                         "Expected input, scale, bias, mean and var");

            layerParams.type = "BatchNorm";
            replaceLayerParam(layerParams, "epsilon", "eps");
            replaceLayerParam(layerParams, "spatial", "use_global_stats");

            Mat meanData = getBlob(node_proto, constBlobs, 3);
            Mat stdData =  getBlob(node_proto, constBlobs, 4);

            layerParams.blobs.push_back(meanData);
            layerParams.blobs.push_back(stdData);

            if (!node_proto.input(1).empty()) {
                layerParams.set("has_weight", true);
                layerParams.blobs.push_back(getBlob(node_proto, constBlobs, 1));  // weightData
            } else {
                layerParams.set("has_weight", false);
            }

            if (!node_proto.input(2).empty()) {
                layerParams.set("has_bias", true);
                layerParams.blobs.push_back(getBlob(node_proto, constBlobs, 2)); // biasData
            } else {
                layerParams.set("has_bias", false);
            }
        }
        else if (layer_type == "Gemm")
        {
            CV_Assert(node_proto.input_size() >= 2);
            layerParams.type = "InnerProduct";
            Mat weights = getBlob(node_proto, constBlobs, 1);
            int ind_num_out = 0;
            if (layerParams.has("transB") && !layerParams.get<int>("transB")) {
                transpose(weights, weights);
                ind_num_out = 1;
            }
            layerParams.blobs.push_back(weights);

            if (node_proto.input_size() == 3) {
                Mat bias = getBlob(node_proto, constBlobs, 2);
                layerParams.blobs.push_back(bias);
            }

            layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]);
            layerParams.set("bias_term", node_proto.input_size() == 3);
        }
        else if (layer_type == "MatMul")
        {
            CV_Assert(node_proto.input_size() == 2);
            layerParams.type = "InnerProduct";
            Mat blob = getBlob(node_proto, constBlobs, 1);
            layerParams.blobs.push_back(blob.t());
            layerParams.set("bias_term", false);
            layerParams.set("num_output", layerParams.blobs[0].size[0]);
        }
        else if (layer_type == "Mul")
        {
            CV_Assert(node_proto.input_size() == 2);
            if (layer_id.find(node_proto.input(1)) == layer_id.end()) {
                Mat blob = getBlob(node_proto, constBlobs, 1);
                blob = blob.reshape(1, 1);
                if (blob.total() == 1) {
                    layerParams.set("scale", blob.at<float>(0));
                    layerParams.type = "Power";
                }
                else {
                    layerParams.blobs.push_back(blob);
                    layerParams.type = "Scale";
                }
            }
            else {
                layerParams.type = "Eltwise";
                layerParams.set("operation", "prod");
            }
        }
        else if (layer_type == "Conv")
        {
            CV_Assert(node_proto.input_size() >= 2);
            layerParams.type = "Convolution";
            for (int j = 1; j < node_proto.input_size(); j++) {
                layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j));
            }
            layerParams.set("num_output", layerParams.blobs[0].size[0]);
            layerParams.set("bias_term", node_proto.input_size() == 3);
        }
        else if (layer_type == "ConvTranspose")
        {
            CV_Assert(node_proto.input_size() >= 2);
            layerParams.type = "Deconvolution";
            for (int j = 1; j < node_proto.input_size(); j++) {
                layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j));
            }
            layerParams.set("num_output", layerParams.blobs[0].size[1]);
            layerParams.set("bias_term", node_proto.input_size() == 3);
        }
        else if (layer_type == "Transpose")
        {
            layerParams.type = "Permute";
            replaceLayerParam(layerParams, "perm", "order");
        }
        else if (layer_type == "Unsqueeze")
        {
            CV_Assert(node_proto.input_size() == 1);
            Mat input = getBlob(node_proto, constBlobs, 0);

            DictValue axes = layerParams.get("axes");
            std::vector<int> dims;
            for (int j = 0; j < input.dims; j++) {
                dims.push_back(input.size[j]);
            }
            CV_Assert(axes.getIntValue(axes.size()-1) <= dims.size());
            for (int j = 0; j < axes.size(); j++) {
                dims.insert(dims.begin() + axes.getIntValue(j), 1);
            }

            Mat out = input.reshape(0, dims);
            constBlobs.insert(std::make_pair(layerParams.name, out));
            continue;
        }
        else if (layer_type == "Reshape")
        {
            CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape"));

            if (node_proto.input_size() == 2) {
                Mat blob = getBlob(node_proto, constBlobs, 1);
                CV_Assert(blob.type() == CV_32SC1);

                if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
                    Mat input = getBlob(node_proto, constBlobs, 0);
                    Mat out = input.reshape(0, static_cast<std::vector<int> >(blob));
                    constBlobs.insert(std::make_pair(layerParams.name, out));
                    continue;
                }
                layerParams.set("dim", DictValue::arrayInt<int*>(
                            blob.ptr<int>(), blob.total() ));
            }
            else {
                DictValue shape = layerParams.get("shape");
                std::vector<int> dim;
                for (int j = 0; j < shape.size(); j++) {
                    dim.push_back(shape.getIntValue(j));
                }

                if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
                    Mat input = getBlob(node_proto, constBlobs, 0);
                    Mat out = input.reshape(0, dim);
                    constBlobs.insert(std::make_pair(layerParams.name, out));
                    continue;
                }
                replaceLayerParam(layerParams, "shape", "dim");
            }
        }
        else if (layer_type == "Pad")
        {
            layerParams.type = "Padding";
        }
        else if (layer_type == "Shape")
        {
            CV_Assert(node_proto.input_size() == 1);
            shapeIt = outShapes.find(node_proto.input(0));
            CV_Assert(shapeIt != outShapes.end());
            MatShape inpShape = shapeIt->second;

            Mat shapeMat(inpShape.size(), 1, CV_32S);
            for (int j = 0; j < inpShape.size(); ++j)
                shapeMat.at<int>(j) = inpShape[j];
            shapeMat.dims = 1;

            constBlobs.insert(std::make_pair(layerParams.name, shapeMat));
            continue;
        }
        else if (layer_type == "Gather")
        {
            CV_Assert(node_proto.input_size() == 2);
            CV_Assert(layerParams.has("axis"));
            Mat input = getBlob(node_proto, constBlobs, 0);
            Mat indexMat = getBlob(node_proto, constBlobs, 1);
            CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1);
            int index = indexMat.at<int>(0);
            int axis = layerParams.get<int>("axis");

            std::vector<cv::Range> ranges(input.dims, Range::all());
            ranges[axis] = Range(index, index + 1);

            Mat out = input(ranges);
            constBlobs.insert(std::make_pair(layerParams.name, out));
            continue;
        }
        else if (layer_type == "Concat")
        {
            bool hasVariableInps = false;
            for (int i = 0; i < node_proto.input_size(); ++i)
            {
                if (layer_id.find(node_proto.input(i)) != layer_id.end())
                {
                    hasVariableInps = true;
                    break;
                }
            }

            if (!hasVariableInps)
            {
                std::vector<Mat> inputs(node_proto.input_size()), concatenated;
                for (size_t i = 0; i < inputs.size(); ++i)
                {
                    inputs[i] = getBlob(node_proto, constBlobs, i);
                }
                Ptr<Layer> concat = ConcatLayer::create(layerParams);
                runLayer(concat, inputs, concatenated);

                CV_Assert(concatenated.size() == 1);
                constBlobs.insert(std::make_pair(layerParams.name, concatenated[0]));
                continue;
            }
        }
        else
        {
            for (int j = 0; j < node_proto.input_size(); j++) {
                if (layer_id.find(node_proto.input(j)) == layer_id.end())
                    layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j));
            }
         }

         int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams);
         layer_id.insert(std::make_pair(layerParams.name, LayerInfo(id, 0)));


         std::vector<MatShape> layerInpShapes, layerOutShapes, layerInternalShapes;
         for (int j = 0; j < node_proto.input_size(); j++) {
             layerId = layer_id.find(node_proto.input(j));
             if (layerId != layer_id.end()) {
                 dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, j);
                 // Collect input shapes.
                 shapeIt = outShapes.find(node_proto.input(j));
                 CV_Assert(shapeIt != outShapes.end());
                 layerInpShapes.push_back(shapeIt->second);
             }
         }

         // Compute shape of output blob for this layer.
         Ptr<Layer> layer = dstNet.getLayer(id);
         layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes);
         CV_Assert(!layerOutShapes.empty());
         outShapes[layerParams.name] = layerOutShapes[0];
     }
 }
Beispiel #16
0
int main(int argc, char **argv)
{
  double wall_start = MPI_Wtime();
  Real *diag, **b, **bt, **z;
  Real pi, h, omp_local_max, local_max, global_max;
  int i, j, omp_id;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
  MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);

  omp_tot_threads = omp_get_max_threads();

  /* the total number of grid points in each spatial direction is (n+1) */
  /* the total number of degrees-of-freedom in each spatial direction is (n-1) */
  /* this version requires n to be a power of 2 */

  if (argc < 2) {
    if (mpi_rank == 0){
      printf("need a problem size\n");
    }
    MPI_Finalize();
    return 0;
  }

  n  = atoi(argv[1]);
  m  = n-1;
  // mpi_work is the amount of work needed to be done by each mpi node. The last
  // mpi node may do slightly less work than the others, but that's the closest
  // we'll get to proper load balancing.
  mpi_work = 1 + ((m - 1) / mpi_size);
  nn = 4*n;

  diag = createRealArray (m);
  b    = createReal2DArray (mpi_work, mpi_size*mpi_work);
  bt   = createReal2DArray (mpi_work, mpi_size*mpi_work);
  z    = createReal2DArray (omp_tot_threads, nn);

  h    = 1./(Real)n;
  pi   = 4.*atan(1.);
  
  #pragma omp parallel for private(i)
  for (i=0; i < m; i++) {
    diag[i] = 2.*(1.-cos((i+1)*pi/(Real)n));
  }

  #pragma omp parallel for private(j, i)
  for (j=0; j < mpi_work; j++) { // MPI
    for (i=0; j + mpi_work * mpi_rank < m && i < m; i++) { // OMP
      b[j][i] = h*h;
    }
  }

  #pragma omp parallel for private(omp_id, i)
  for (j=0; j < mpi_work; j++) { // MPI cut + OMP
    omp_id = omp_get_thread_num();
    fst_(b[j], &n, z[omp_id], &nn);
  }
  
  transpose (bt,b);

  #pragma omp parallel for private(i, omp_id) schedule(static)
  for (i=0; i < mpi_work; i++) { // MPI cut + OMP
    omp_id = omp_get_thread_num();
    fstinv_(bt[i], &n, z[omp_id], &nn);
  }

  #pragma omp parallel for private(j, i)
  for (j=0; j < mpi_work; j++) { // MPI
    for (i=0; i < m; i++) {
      bt[j][i] = bt[j][i]/(diag[i]+diag[j + mpi_work * mpi_rank]);
    }
  }

  #pragma omp parallel for private(i, omp_id) schedule(static)
  for (i=0; i < mpi_work; i++) { // MPI cut + OMP
    omp_id = omp_get_thread_num();
    fst_(bt[i], &n, z[omp_id], &nn);
  }

  transpose (b,bt);

  #pragma omp parallel for private(j, omp_id)
  for (j=0; j < mpi_work; j++) { // MPI cut + OMP
    omp_id = omp_get_thread_num();
    fstinv_(b[j], &n, z[omp_id], &nn);
  }

  local_max = 0.0;
  omp_local_max = 0.0;

  #pragma omp parallel shared(local_max) private(j,i) firstprivate(omp_local_max)
  {
    // MPI, work in range (and handle last node overflow)
    #pragma omp for nowait
    for (j=0; j < mpi_work; j++) {
      for (i=0; j + mpi_work * mpi_rank < m && i < m; i++) {
        if (b[j][i] > omp_local_max) omp_local_max = b[j][i];
      }
    }
    #pragma omp critical
    {
      if (omp_local_max > local_max) {
        local_max = omp_local_max;
      }
    }
  }

  MPI_Reduce(&local_max, &global_max, 1,
             MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
               
  free(diag);
  free(b[0]);
  free(b);
  free(bt[0]);
  free(bt);
  free(z[0]);
  free(z);
  MPI_Finalize();
  
  double wall_end = MPI_Wtime();

  if (mpi_rank == 0) {
    printf (" umax = %e, time = %.3fs \n", global_max,wall_end-wall_start);
    printf(" mpi_size = %d, omp_max_threads = %d, n = %d\n", mpi_size, omp_tot_threads, n);
  }
}
Beispiel #17
0
 // Transpose and Conjugate Transpose
 array array::T() const
 {
     return transpose(*this);
 }
Beispiel #18
0
 CMT_INLINE vec<vec<T, 2>, N> process(const vec<vec<T, 2>, N>& x, csizes_t<indices...>) const
 {
     return vec<vec<T, 2>, N>(hadd(transpose(x[indices] * matrix))...);
 }
bool AffineMapMetric::evaluate( PatchData& pd, size_t handle, double& value, MsqError& err )
{
  Sample s = ElemSampleQM::sample( handle );
  size_t e = ElemSampleQM::  elem( handle );
  MsqMeshEntity& elem = pd.element_by_index( e );
  EntityTopology type = elem.get_element_type();
  unsigned edim = TopologyInfo::dimension( type );
  const size_t* conn = elem.get_vertex_index_array();
  
    // This metric only supports sampling at corners, except for simplices.
    // If element is a simpex, then the Jacobian is constant over a linear 
    // element.  In this case, always evaluate at any vertex.
  //unsigned corner = s.number;
  if (s.dimension != 0) {
    if (type == TRIANGLE || type == TETRAHEDRON)
      /*corner = 0*/;
    else {
      MSQ_SETERR(err)("Invalid sample point for AffineMapMetric", MsqError::UNSUPPORTED_ELEMENT );
      return false;
    }
  }
  
  bool rval;
  if (edim == 3) { // 3x3 or 3x2 targets ?
    Vector3D c[3] = { Vector3D(0,0,0), Vector3D(0,0,0), Vector3D(0,0,0) };
    unsigned n;
    const unsigned* adj = TopologyInfo::adjacent_vertices( type, s.number, n );
    c[0] = pd.vertex_by_index( conn[adj[0]] ) - pd.vertex_by_index( conn[s.number] );
    c[1] = pd.vertex_by_index( conn[adj[1]] ) - pd.vertex_by_index( conn[s.number] );
    c[2] = pd.vertex_by_index( conn[adj[2]] ) - pd.vertex_by_index( conn[s.number] );
    MsqMatrix<3,3> A;
    A.set_column( 0, MsqMatrix<3,1>(c[0].to_array()) );
    A.set_column( 1, MsqMatrix<3,1>(c[1].to_array()) );
    A.set_column( 2, MsqMatrix<3,1>(c[2].to_array()) );
    if (type == TETRAHEDRON)
      A = A * TET_XFORM;

    MsqMatrix<3,3> W;
    targetCalc->get_3D_target( pd, e, s, W, err ); MSQ_ERRZERO(err);
    rval = targetMetric->evaluate( A * inverse(W), value, err ); MSQ_ERRZERO(err);
  }
  else {
    Vector3D c[2] = { Vector3D(0,0,0), Vector3D(0,0,0) };
    unsigned n;
    const unsigned* adj = TopologyInfo::adjacent_vertices( type, s.number, n );
    c[0] = pd.vertex_by_index( conn[adj[0]] ) - pd.vertex_by_index( conn[s.number] );
    c[1] = pd.vertex_by_index( conn[adj[1]] ) - pd.vertex_by_index( conn[s.number] );
    MsqMatrix<3,2> App;
    App.set_column( 0, MsqMatrix<3,1>(c[0].to_array()) );
    App.set_column( 1, MsqMatrix<3,1>(c[1].to_array()) );
    
    MsqMatrix<3,2> Wp;
    targetCalc->get_surface_target( pd, e, s, Wp, err ); MSQ_ERRZERO(err);

    MsqMatrix<2,2> A, W;
    MsqMatrix<3,2> RZ;
    surface_to_2d( App, Wp, W, RZ );
    A = transpose(RZ) * App;
    if (type == TRIANGLE)
      A = A * TRI_XFORM;
    
    rval = targetMetric->evaluate( A*inverse(W), value, err ); MSQ_ERRZERO(err);
  }
  
    // apply target weight to value
  if (weightCalc) {
    double ck = weightCalc->get_weight( pd, e, s, err ); MSQ_ERRZERO(err);
    value *= ck;
  }
  return rval;
}
Beispiel #20
0
void Camera::moveRelative(vec3 translation) {
	move(vec3(transpose(worldToCameraRot) * vec4(translation, 1)));
}
int test_main (int, char *[])
{
    // fundamental types

    A_matrix_3x1_fundamentals_type m_A1;

    bub::at<boost::mpl::size_t<0>, boost::mpl::size_t<0> >(m_A1) = 3.0;
    bub::at<boost::mpl::size_t<1>, boost::mpl::size_t<0> >(m_A1) = 1.0;
    bub::at<boost::mpl::size_t<2>, boost::mpl::size_t<0> >(m_A1) = 2.0;
    BOOST_CHECK((m_A1.at<0, 0>() == 3.0));
    BOOST_CHECK((m_A1.at<1, 0>() == 1.0));
    BOOST_CHECK((m_A1.at<2, 0>() == 2.0));

    bub::at<boost::mpl::size_t<0>, boost::mpl::size_t<0> >(m_A1) = 2.0;
    bub::at<boost::mpl::size_t<1>, boost::mpl::size_t<0> >(m_A1) = 3.0;
    bub::at<boost::mpl::size_t<2>, boost::mpl::size_t<0> >(m_A1) = 1.0;
    BOOST_CHECK((m_A1.at<0, 0>() == 2.0));
    BOOST_CHECK((m_A1.at<1, 0>() == 3.0));
    BOOST_CHECK((m_A1.at<2, 0>() == 1.0));

    bub::at_c<0, 0>(m_A1) = 1.0;
    bub::at_c<1, 0>(m_A1) = 2.0;
    bub::at_c<2, 0>(m_A1) = 3.0;
    BOOST_CHECK((m_A1.at<0, 0>() == 1.0));
    BOOST_CHECK((m_A1.at<1, 0>() == 2.0));
    BOOST_CHECK((m_A1.at<2, 0>() == 3.0));

    A_matrix_3x1_fundamentals_type m_A2 = prod(3.0, m_A1);
    BOOST_CHECK((m_A2.at<0, 0>() == 3.0));
    BOOST_CHECK((m_A2.at<1, 0>() == 6.0));
    BOOST_CHECK((m_A2.at<2, 0>() == 9.0));
    m_A2 = 3.0 * m_A1;
    BOOST_CHECK((m_A2.at<0, 0>() == 3.0));
    BOOST_CHECK((m_A2.at<1, 0>() == 6.0));
    BOOST_CHECK((m_A2.at<2, 0>() == 9.0));

    A_matrix_3x1_fundamentals_type m_A3 = prod(m_A1, 3.0);
    BOOST_CHECK((m_A3.at<0, 0>() == 3.0));
    BOOST_CHECK((m_A3.at<1, 0>() == 6.0));
    BOOST_CHECK((m_A3.at<2, 0>() == 9.0));
    m_A3 = m_A1 * 3.0;
    BOOST_CHECK((m_A3.at<0, 0>() == 3.0));
    BOOST_CHECK((m_A3.at<1, 0>() == 6.0));
    BOOST_CHECK((m_A3.at<2, 0>() == 9.0));

    A_matrix_3x1_fundamentals_type m_A4 = div(m_A1, 3.0);
    BOOST_CHECK((m_A4.at<0, 0>() == 1.0 / 3.0));
    BOOST_CHECK((m_A4.at<1, 0>() == 2.0 / 3.0));
    BOOST_CHECK((m_A4.at<2, 0>() == 3.0 / 3.0));
    m_A4 = m_A1 / 3.0;
    BOOST_CHECK((m_A4.at<0, 0>() == 1.0 / 3.0));
    BOOST_CHECK((m_A4.at<1, 0>() == 2.0 / 3.0));
    BOOST_CHECK((m_A4.at<2, 0>() == 3.0 / 3.0));

    A_matrix_3x1_fundamentals_type m_A5 = neg(m_A1);
    BOOST_CHECK((m_A5.at<0, 0>() == -1.0));
    BOOST_CHECK((m_A5.at<1, 0>() == -2.0));
    BOOST_CHECK((m_A5.at<2, 0>() == -3.0));
    m_A5 = -m_A1;
    BOOST_CHECK((m_A5.at<0, 0>() == -1.0));
    BOOST_CHECK((m_A5.at<1, 0>() == -2.0));
    BOOST_CHECK((m_A5.at<2, 0>() == -3.0));

    D_matrix_4x4_fundamentals_type m_D1;
    m_D1.at<0, 0>() = 1.0;
    m_D1.at<0, 1>() = 2.0;
    m_D1.at<0, 2>() = 3.0;
    m_D1.at<0, 3>() = 4.0;
    m_D1.at<1, 0>() = 5.0;
    m_D1.at<1, 1>() = 6.0;
    m_D1.at<1, 2>() = 7.0;
    m_D1.at<1, 3>() = 8.0;
    m_D1.at<2, 0>() = 9.0;
    m_D1.at<2, 1>() = 10.0;
    m_D1.at<2, 2>() = 11.0;
    m_D1.at<2, 3>() = 12.0;
    m_D1.at<3, 0>() = 13.0;
    m_D1.at<3, 1>() = 14.0;
    m_D1.at<3, 2>() = 15.0;
    m_D1.at<3, 3>() = 16.0;

    derived_from_D_matrix_4x4_fundamentals_type m_D1_d;
    m_D1_d.at<0, 0>() = 1.0;
    m_D1_d.at<0, 1>() = 2.0;
    m_D1_d.at<0, 2>() = 3.0;
    m_D1_d.at<0, 3>() = 4.0;
    m_D1_d.at<1, 0>() = 5.0;
    m_D1_d.at<1, 1>() = 6.0;
    m_D1_d.at<1, 2>() = 7.0;
    m_D1_d.at<1, 3>() = 8.0;
    m_D1_d.at<2, 0>() = 9.0;
    m_D1_d.at<2, 1>() = 10.0;
    m_D1_d.at<2, 2>() = 11.0;
    m_D1_d.at<2, 3>() = 12.0;
    m_D1_d.at<3, 0>() = 13.0;
    m_D1_d.at<3, 1>() = 14.0;
    m_D1_d.at<3, 2>() = 15.0;
    m_D1_d.at<3, 3>() = 16.0;

    D_matrix_4x4_fundamentals_type m_D2;
    m_D2 = transpose(m_D1);
    BOOST_CHECK((m_D2.at<0, 0>() == 1.0));
    BOOST_CHECK((m_D2.at<1, 0>() == 2.0));
    BOOST_CHECK((m_D2.at<2, 0>() == 3.0));
    BOOST_CHECK((m_D2.at<3, 0>() == 4.0));
    BOOST_CHECK((m_D2.at<0, 1>() == 5.0));
    BOOST_CHECK((m_D2.at<1, 1>() == 6.0));
    BOOST_CHECK((m_D2.at<2, 1>() == 7.0));
    BOOST_CHECK((m_D2.at<3, 1>() == 8.0));
    BOOST_CHECK((m_D2.at<0, 2>() == 9.0));
    BOOST_CHECK((m_D2.at<1, 2>() == 10.0));
    BOOST_CHECK((m_D2.at<2, 2>() == 11.0));
    BOOST_CHECK((m_D2.at<3, 2>() == 12.0));
    BOOST_CHECK((m_D2.at<0, 3>() == 13.0));
    BOOST_CHECK((m_D2.at<1, 3>() == 14.0));
    BOOST_CHECK((m_D2.at<2, 3>() == 15.0));
    BOOST_CHECK((m_D2.at<3, 3>() == 16.0));

    m_D2 = transpose(m_D1_d);
    BOOST_CHECK((m_D2.at<0, 0>() == 1.0));
    BOOST_CHECK((m_D2.at<1, 0>() == 2.0));
    BOOST_CHECK((m_D2.at<2, 0>() == 3.0));
    BOOST_CHECK((m_D2.at<3, 0>() == 4.0));
    BOOST_CHECK((m_D2.at<0, 1>() == 5.0));
    BOOST_CHECK((m_D2.at<1, 1>() == 6.0));
    BOOST_CHECK((m_D2.at<2, 1>() == 7.0));
    BOOST_CHECK((m_D2.at<3, 1>() == 8.0));
    BOOST_CHECK((m_D2.at<0, 2>() == 9.0));
    BOOST_CHECK((m_D2.at<1, 2>() == 10.0));
    BOOST_CHECK((m_D2.at<2, 2>() == 11.0));
    BOOST_CHECK((m_D2.at<3, 2>() == 12.0));
    BOOST_CHECK((m_D2.at<0, 3>() == 13.0));
    BOOST_CHECK((m_D2.at<1, 3>() == 14.0));
    BOOST_CHECK((m_D2.at<2, 3>() == 15.0));
    BOOST_CHECK((m_D2.at<3, 3>() == 16.0));

    typedef bub::result_of::slice<
        D_matrix_4x4_fundamentals_type,
        rows_type_1,
        columns_type_1
    >::type D_fundamentals_slice_type_1;
    D_fundamentals_slice_type_1 m_D_slice_1;
    m_D_slice_1 = bub::slice<rows_type_1, columns_type_1>(m_D1);
    BOOST_CHECK((m_D_slice_1.at<0, 0>() == 1.0));
    BOOST_CHECK((m_D_slice_1.at<0, 1>() == 2.0));
    BOOST_CHECK((m_D_slice_1.at<1, 0>() == 5.0));
    BOOST_CHECK((m_D_slice_1.at<1, 1>() == 6.0));

    m_D_slice_1 = bub::slice<rows_type_1, columns_type_1>(m_D1_d);
    BOOST_CHECK((m_D_slice_1.at<0, 0>() == 1.0));
    BOOST_CHECK((m_D_slice_1.at<0, 1>() == 2.0));
    BOOST_CHECK((m_D_slice_1.at<1, 0>() == 5.0));
    BOOST_CHECK((m_D_slice_1.at<1, 1>() == 6.0));

    typedef bub::result_of::slice<
        D_matrix_4x4_fundamentals_type,
        rows_type_2,
        columns_type_2
    >::type D_fundamentals_slice_type_2;
    D_fundamentals_slice_type_2 m_D_slice_2;
    m_D_slice_2 = bub::slice<rows_type_2, columns_type_2>(m_D1);
    BOOST_CHECK((m_D_slice_2.at<0, 0>() == 3.0));
    BOOST_CHECK((m_D_slice_2.at<1, 0>() == 15.0));
    BOOST_CHECK((m_D_slice_2.at<2, 0>() == 11.0));

    m_D_slice_2 = bub::slice<rows_type_2, columns_type_2>(m_D1_d);
    BOOST_CHECK((m_D_slice_2.at<0, 0>() == 3.0));
    BOOST_CHECK((m_D_slice_2.at<1, 0>() == 15.0));
    BOOST_CHECK((m_D_slice_2.at<2, 0>() == 11.0));

    typedef bub::result_of::slice<
        D_matrix_4x4_fundamentals_type,
        rows_type_3,
        columns_type_3
    >::type D_fundamentals_slice_type_3;
    D_fundamentals_slice_type_3 m_D_slice_3;
    m_D_slice_3 = bub::slice<rows_type_3, columns_type_3>(m_D1);
    BOOST_CHECK((m_D_slice_3.at<0, 0>() == 2.0));
    BOOST_CHECK((m_D_slice_3.at<0, 1>() == 4.0));

    m_D_slice_3 = bub::slice<rows_type_3, columns_type_3>(m_D1_d);
    BOOST_CHECK((m_D_slice_3.at<0, 0>() == 2.0));
    BOOST_CHECK((m_D_slice_3.at<0, 1>() == 4.0));


    // unit types

    A_matrix_3x1_units_type m_A1_u;

    bub::at<boost::mpl::size_t<0>, boost::mpl::size_t<0> >(m_A1_u) = time_::from_value(3.0);
    bub::at<boost::mpl::size_t<1>, boost::mpl::size_t<0> >(m_A1_u) = length::from_value(1.0);
    bub::at<boost::mpl::size_t<2>, boost::mpl::size_t<0> >(m_A1_u) = dimensionless::from_value(2.0);
    BOOST_CHECK((m_A1_u.at<0, 0>().value() == 3.0));
    BOOST_CHECK((m_A1_u.at<1, 0>().value() == 1.0));
    BOOST_CHECK((m_A1_u.at<2, 0>().value() == 2.0));

    bub::at<boost::mpl::size_t<0>, boost::mpl::size_t<0> >(m_A1_u) = time_::from_value(2.0);
    bub::at<boost::mpl::size_t<1>, boost::mpl::size_t<0> >(m_A1_u) = length::from_value(3.0);
    bub::at<boost::mpl::size_t<2>, boost::mpl::size_t<0> >(m_A1_u) = dimensionless::from_value(1.0);
    BOOST_CHECK((m_A1_u.at<0, 0>().value() == 2.0));
    BOOST_CHECK((m_A1_u.at<1, 0>().value() == 3.0));
    BOOST_CHECK((m_A1_u.at<2, 0>().value() == 1.0));

    bub::at_c<0, 0>(m_A1_u) = time_::from_value(1.0);
    bub::at_c<1, 0>(m_A1_u) = length::from_value(2.0);
    bub::at_c<2, 0>(m_A1_u) = dimensionless::from_value(3.0);
    BOOST_CHECK((m_A1_u.at<0, 0>().value() == 1.0));
    BOOST_CHECK((m_A1_u.at<1, 0>().value() == 2.0));
    BOOST_CHECK((m_A1_u.at<2, 0>().value() == 3.0));

    A_matrix_3x1_units_type m_A2_u = prod(3.0, m_A1_u);
    BOOST_CHECK((m_A2_u.at<0, 0>().value() == 3.0));
    BOOST_CHECK((m_A2_u.at<1, 0>().value() == 6.0));
    BOOST_CHECK((m_A2_u.at<2, 0>().value() == 9.0));
    m_A2_u = 3.0 * m_A1_u;
    BOOST_CHECK((m_A2_u.at<0, 0>().value() == 3.0));
    BOOST_CHECK((m_A2_u.at<1, 0>().value() == 6.0));
    BOOST_CHECK((m_A2_u.at<2, 0>().value() == 9.0));

    A_matrix_3x1_units_type m_A3_u = prod(m_A1_u, 3.0);
    BOOST_CHECK((m_A3_u.at<0, 0>().value() == 3.0));
    BOOST_CHECK((m_A3_u.at<1, 0>().value() == 6.0));
    BOOST_CHECK((m_A3_u.at<2, 0>().value() == 9.0));
    m_A3_u = m_A1_u * 3.0;
    BOOST_CHECK((m_A3_u.at<0, 0>().value() == 3.0));
    BOOST_CHECK((m_A3_u.at<1, 0>().value() == 6.0));
    BOOST_CHECK((m_A3_u.at<2, 0>().value() == 9.0));

    A_matrix_3x1_units_type m_A4_u = div(m_A1_u, 3.0);
    BOOST_CHECK((m_A4_u.at<0, 0>().value() == 1.0 / 3.0));
    BOOST_CHECK((m_A4_u.at<1, 0>().value() == 2.0 / 3.0));
    BOOST_CHECK((m_A4_u.at<2, 0>().value() == 3.0 / 3.0));
    m_A4_u = m_A1_u / 3.0;
    BOOST_CHECK((m_A4_u.at<0, 0>().value() == 1.0 / 3.0));
    BOOST_CHECK((m_A4_u.at<1, 0>().value() == 2.0 / 3.0));
    BOOST_CHECK((m_A4_u.at<2, 0>().value() == 3.0 / 3.0));

    A_matrix_3x1_units_type m_A5_u = neg(m_A1_u);
    BOOST_CHECK((m_A5_u.at<0, 0>().value() == -1.0));
    BOOST_CHECK((m_A5_u.at<1, 0>().value() == -2.0));
    BOOST_CHECK((m_A5_u.at<2, 0>().value() == -3.0));
    m_A5_u = -m_A1_u;
    BOOST_CHECK((m_A5_u.at<0, 0>().value() == -1.0));
    BOOST_CHECK((m_A5_u.at<1, 0>().value() == -2.0));
    BOOST_CHECK((m_A5_u.at<2, 0>().value() == -3.0));

    D_matrix_4x4_units_type m_D1_u;
    m_D1_u.at<0, 0>() = length::from_value(1.0);
    m_D1_u.at<0, 1>() = length::from_value(2.0);
    m_D1_u.at<0, 2>() = length::from_value(3.0);
    m_D1_u.at<0, 3>() = length::from_value(4.0);
    m_D1_u.at<1, 0>() = length::from_value(5.0);
    m_D1_u.at<1, 1>() = length::from_value(6.0);
    m_D1_u.at<1, 2>() = length::from_value(7.0);
    m_D1_u.at<1, 3>() = length::from_value(8.0);
    m_D1_u.at<2, 0>() = length::from_value(9.0);
    m_D1_u.at<2, 1>() = length::from_value(10.0);
    m_D1_u.at<2, 2>() = length::from_value(11.0);
    m_D1_u.at<2, 3>() = length::from_value(12.0);
    m_D1_u.at<3, 0>() = length::from_value(13.0);
    m_D1_u.at<3, 1>() = length::from_value(14.0);
    m_D1_u.at<3, 2>() = length::from_value(15.0);
    m_D1_u.at<3, 3>() = length::from_value(16.0);

    derived_from_D_matrix_4x4_units_type m_D1_u_d;
    m_D1_u_d.at<0, 0>() = length::from_value(1.0);
    m_D1_u_d.at<0, 1>() = length::from_value(2.0);
    m_D1_u_d.at<0, 2>() = length::from_value(3.0);
    m_D1_u_d.at<0, 3>() = length::from_value(4.0);
    m_D1_u_d.at<1, 0>() = length::from_value(5.0);
    m_D1_u_d.at<1, 1>() = length::from_value(6.0);
    m_D1_u_d.at<1, 2>() = length::from_value(7.0);
    m_D1_u_d.at<1, 3>() = length::from_value(8.0);
    m_D1_u_d.at<2, 0>() = length::from_value(9.0);
    m_D1_u_d.at<2, 1>() = length::from_value(10.0);
    m_D1_u_d.at<2, 2>() = length::from_value(11.0);
    m_D1_u_d.at<2, 3>() = length::from_value(12.0);
    m_D1_u_d.at<3, 0>() = length::from_value(13.0);
    m_D1_u_d.at<3, 1>() = length::from_value(14.0);
    m_D1_u_d.at<3, 2>() = length::from_value(15.0);
    m_D1_u_d.at<3, 3>() = length::from_value(16.0);

    D_matrix_4x4_units_type m_D2_u;
    m_D2_u = transpose(m_D1_u);
    BOOST_CHECK((m_D2_u.at<0, 0>().value() == 1.0));
    BOOST_CHECK((m_D2_u.at<1, 0>().value() == 2.0));
    BOOST_CHECK((m_D2_u.at<2, 0>().value() == 3.0));
    BOOST_CHECK((m_D2_u.at<3, 0>().value() == 4.0));
    BOOST_CHECK((m_D2_u.at<0, 1>().value() == 5.0));
    BOOST_CHECK((m_D2_u.at<1, 1>().value() == 6.0));
    BOOST_CHECK((m_D2_u.at<2, 1>().value() == 7.0));
    BOOST_CHECK((m_D2_u.at<3, 1>().value() == 8.0));
    BOOST_CHECK((m_D2_u.at<0, 2>().value() == 9.0));
    BOOST_CHECK((m_D2_u.at<1, 2>().value() == 10.0));
    BOOST_CHECK((m_D2_u.at<2, 2>().value() == 11.0));
    BOOST_CHECK((m_D2_u.at<3, 2>().value() == 12.0));
    BOOST_CHECK((m_D2_u.at<0, 3>().value() == 13.0));
    BOOST_CHECK((m_D2_u.at<1, 3>().value() == 14.0));
    BOOST_CHECK((m_D2_u.at<2, 3>().value() == 15.0));
    BOOST_CHECK((m_D2_u.at<3, 3>().value() == 16.0));

    m_D2_u = transpose(m_D1_u_d);
    BOOST_CHECK((m_D2_u.at<0, 0>().value() == 1.0));
    BOOST_CHECK((m_D2_u.at<1, 0>().value() == 2.0));
    BOOST_CHECK((m_D2_u.at<2, 0>().value() == 3.0));
    BOOST_CHECK((m_D2_u.at<3, 0>().value() == 4.0));
    BOOST_CHECK((m_D2_u.at<0, 1>().value() == 5.0));
    BOOST_CHECK((m_D2_u.at<1, 1>().value() == 6.0));
    BOOST_CHECK((m_D2_u.at<2, 1>().value() == 7.0));
    BOOST_CHECK((m_D2_u.at<3, 1>().value() == 8.0));
    BOOST_CHECK((m_D2_u.at<0, 2>().value() == 9.0));
    BOOST_CHECK((m_D2_u.at<1, 2>().value() == 10.0));
    BOOST_CHECK((m_D2_u.at<2, 2>().value() == 11.0));
    BOOST_CHECK((m_D2_u.at<3, 2>().value() == 12.0));
    BOOST_CHECK((m_D2_u.at<0, 3>().value() == 13.0));
    BOOST_CHECK((m_D2_u.at<1, 3>().value() == 14.0));
    BOOST_CHECK((m_D2_u.at<2, 3>().value() == 15.0));
    BOOST_CHECK((m_D2_u.at<3, 3>().value() == 16.0));

    typedef bub::result_of::slice<
        D_matrix_4x4_units_type,
        rows_type_1,
        columns_type_1
    >::type D_units_slice_type_1;
    D_units_slice_type_1 m_D_slice_1_u;
    m_D_slice_1_u = bub::slice<rows_type_1, columns_type_1>(m_D1_u);
    BOOST_CHECK((m_D_slice_1_u.at<0, 0>().value() == 1.0));
    BOOST_CHECK((m_D_slice_1_u.at<0, 1>().value() == 2.0));
    BOOST_CHECK((m_D_slice_1_u.at<1, 0>().value() == 5.0));
    BOOST_CHECK((m_D_slice_1_u.at<1, 1>().value() == 6.0));

    m_D_slice_1_u = bub::slice<rows_type_1, columns_type_1>(m_D1_u_d);
    BOOST_CHECK((m_D_slice_1_u.at<0, 0>().value() == 1.0));
    BOOST_CHECK((m_D_slice_1_u.at<0, 1>().value() == 2.0));
    BOOST_CHECK((m_D_slice_1_u.at<1, 0>().value() == 5.0));
    BOOST_CHECK((m_D_slice_1_u.at<1, 1>().value() == 6.0));

    typedef bub::result_of::slice<
        D_matrix_4x4_units_type,
        rows_type_2,
        columns_type_2
    >::type D_units_slice_type_2;
    D_units_slice_type_2 m_D_slice_2_u;
    m_D_slice_2_u = bub::slice<rows_type_2, columns_type_2>(m_D1_u);
    BOOST_CHECK((m_D_slice_2_u.at<0, 0>().value() == 3.0));
    BOOST_CHECK((m_D_slice_2_u.at<1, 0>().value() == 15.0));
    BOOST_CHECK((m_D_slice_2_u.at<2, 0>().value() == 11.0));

    m_D_slice_2_u = bub::slice<rows_type_2, columns_type_2>(m_D1_u_d);
    BOOST_CHECK((m_D_slice_2_u.at<0, 0>().value() == 3.0));
    BOOST_CHECK((m_D_slice_2_u.at<1, 0>().value() == 15.0));
    BOOST_CHECK((m_D_slice_2_u.at<2, 0>().value() == 11.0));

    typedef bub::result_of::slice<
        D_matrix_4x4_units_type,
        rows_type_3,
        columns_type_3
    >::type D_units_slice_type_3;
    D_units_slice_type_3 m_D_slice_3_u;
    m_D_slice_3_u = bub::slice<rows_type_3, columns_type_3>(m_D1_u);
    BOOST_CHECK((m_D_slice_3_u.at<0, 0>().value() == 2.0));
    BOOST_CHECK((m_D_slice_3_u.at<0, 1>().value() == 4.0));

    m_D_slice_3_u = bub::slice<rows_type_3, columns_type_3>(m_D1_u_d);
    BOOST_CHECK((m_D_slice_3_u.at<0, 0>().value() == 2.0));
    BOOST_CHECK((m_D_slice_3_u.at<0, 1>().value() == 4.0));

    return 0;
}
Beispiel #22
0
/**
 * Constructor that fills a coxph_data object with phenotype and genotype
 * data.
 *
 * @param phed Reference to a phedata object with phenotype data
 * @param gend Reference to a gendata object with genotype data
 * @param snpnum The number of the SNP in the genotype data object to
 * be added to the design matrix regdata::X. When set to a number < 0
 * no SNP data is added to the design matrix (e.g. when calculating
 * the null model).
 */
coxph_data::coxph_data(const phedata &phed, const gendata &gend,
                       const int snpnum)
{
    freq        = 0;
    gcount      = 0;
    nids        = gend.nids;
    masked_data = std::vector<bool>(nids, false);

    ngpreds = gend.ngpreds;
    if (snpnum >= 0)
    {
        ncov = phed.ncov + ngpreds;
    }
    else
    {
        ncov = phed.ncov;
    }

    if (phed.noutcomes != 2)
    {
        std::cerr << "coxph_data: number of outcomes should be 2 (now: "
                  << phed.noutcomes << ")\n";
        exit(1);
    }

    X.reinit(nids, (ncov + 1)); // Note: ncov takes ngpreds into
                                // account, see above!
    stime.reinit(nids, 1);
    sstat.reinit(nids, 1);
    weights.reinit(nids, 1);
    offset.reinit(nids, 1);
    strata.reinit(nids, 1);
    order.reinit(nids, 1);

    for (int i = 0; i < nids; i++)
    {
        stime[i] = (phed.Y).get(i, 0);
        sstat[i] = static_cast<int>((phed.Y).get(i, 1));
        if (sstat[i] != 1 && sstat[i] != 0)
        {
            std::cerr << "coxph_data: status not 0/1 "
                      <<"(correct order: id, fuptime, status ...)"
                      << endl;
            exit(1);
        }
    }

    // Add a column with a constant (=1) to the X matrix (the mean)
    for (int i = 0; i < nids; i++)
    {
        X.put(1., i, 0);
    }

    // Insert the covariate data into X (note we use phed.ncov and not
    // ncov, which includes ngpreds is not computing the null model!)
    for (int j = 1; j <= phed.ncov; j++)
    {
        for (int i = 0; i < nids; i++)
        {
            X.put((phed.X).get(i, j - 1), i, j);
        }
    }

    // Insert the genotype data into X
    if (snpnum > 0)
    {
        for (int j = 0; j < ngpreds; j++)
        {
            double *snpdata = new double[nids];
            gend.get_var(snpnum * ngpreds + j, snpdata);
            for (int i = 0; i < nids; i++)
            {
                X.put(snpdata[i], i, (ncov - ngpreds + j));
            }
            delete[] snpdata;
        }
    }

    for (int i = 0; i < nids; i++)
    {
        weights[i] = 1.0;
        offset[i] = 0.0;
        strata[i] = 0;
    }

    // sort by time
    double *tmptime = new double[nids];
    int *passed_sorted = new int[nids];
    std::fill(passed_sorted, passed_sorted + nids, 0);


    for (int i = 0; i < nids; i++)
    {
        tmptime[i] = stime[i];
    }

    qsort(tmptime, nids, sizeof(double), cmpfun);

    for (int i = 0; i < nids; i++)
    {
        int passed = 0;
        for (int j = 0; j < nids; j++)
        {
            if (tmptime[j] == stime[i])
            {
                if (!passed_sorted[j])
                {
                    order[i] = j;
                    passed_sorted[j] = 1;
                    passed = 1;
                    break;
                }
            }
        }
        if (passed != 1)
        {
            std::cerr << "cannot recover element " << i << "\n";
            exit(1);
        }
    }

    stime   = reorder(stime, order);
    sstat   = reorder(sstat, order);
    weights = reorder(weights, order);
    strata  = reorder(strata, order);
    offset  = reorder(offset, order);
    X       = reorder(X, order);

    // The coxfit2() function expects data in column major order.
    X = transpose(X);

    // X.print();
    // offset.print();
    // weights.print();
    // stime.print();
    // sstat.print();

    delete[] tmptime;
    delete[] passed_sorted;
}
Beispiel #23
0
void main(int argc, char **argv) {
	
	double start_t, end_t;
	
	int my_rank, p;
	complex A[512*512], B[512*512], C[512*512];

	/* initialize MPI */
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &p);
	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
	
	/* Create MPI Datatype for Complex */
    const float nitems=2;
    int          blocklengths[2] = {1,1};
    MPI_Datatype types[2] = {MPI_FLOAT, MPI_FLOAT};
    MPI_Aint     offsets[2];

    offsets[0] = offsetof(complex, r);
    offsets[1] = offsetof(complex, i);

    MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_complex);
    MPI_Type_commit(&mpi_complex);
	
	/* Initialize Data*/
	if(my_rank == 0) {
		initialize_data(f1_name, A);
		initialize_data(f2_name, B);
		start_t = MPI_Wtime();
		dist_data(A, p);
		dist_data(B, p);
	} else {
		recv_data(A, p, my_rank);
		recv_data(B, p, my_rank);
	}
	
	/* 2D FFT on A */
	execute_fft(A, 1, p, my_rank);
	collect_data(A, p, my_rank);
	if(my_rank == 0) {
		transpose(A);
		dist_data(A, p);
	} else {
		recv_data(A, p, my_rank);
	}
	execute_fft(A, 1, p, my_rank);
	
	/* 2D FFT on B */
	execute_fft(B, 1, p, my_rank);
	collect_data(B, p, my_rank);
	if(my_rank == 0) {
		transpose(B);
		dist_data(B, p);
	} else {
		recv_data(B, p, my_rank);
	}
	execute_fft(B, 1, p, my_rank);
	
	/* Multiplication Step */
	execute_mm(A, B, C, p, my_rank);
	
	/* 2D FFT on C */
	execute_fft(C, -1, p, my_rank);
	collect_data(C, p, my_rank);
	if(my_rank == 0) {
		transpose(C);
		dist_data(C, p);
	} else {
		recv_data(C, p, my_rank);
	}
	execute_fft(C, -1, p, my_rank);
	collect_data(C, p, my_rank);
	
	end_t = MPI_Wtime();
	
	if(my_rank == 0) {
		output_data(f_out, C);
		printf("\nElapsed time = %g s\n", end_t - start_t);
		printf("--------------------------------------------\n");
	}
	
	MPI_Finalize();
}
void runPoisson(int rank, int size, int n){
  double time=MPI_Wtime();
  Real **b, *diag, *RecvBuf,*z, h, maxError;
  int i, j, m, nn, *len, *disp;

  m  = n-1;
  nn = 4*n;
  splitVector(m, size, &len, &disp);
  diag = createRealArray (m);
  b    = createReal2DArray (len[rank],m);
  z    = createRealArray (nn);
  h    = 1./(Real)n;

  #pragma omp parallel for schedule(static)
  for (i=0; i < m; i++) {
    diag[i] = 2.*(1.-cos((i+1)*M_PI/(Real)n));
  }

  #pragma omp for
  for (j=0; j < len[rank]; j++) {
  #pragma omp parallel for schedule(static)
    for (i=0; i < m; i++) {
      Real x=(Real)(j+1+disp[rank])/n;
      Real y=(Real) (i+1)/n;
      b[j][i] = h*h * funcf(x,y);
    }
  }

  #pragma omp parallel for schedule(static)
  for (j=0; j < len[rank]; j++) {
    Real* zt = createRealArray (nn);
    fst_(b[j], &n, zt, &nn);
    free(zt);
  }

  transpose(b, size, len, disp, rank, m);

  #pragma omp parallel for schedule(static)
  for (i=0; i < len[rank]; i++) {
    Real* zt  = createRealArray (nn);
    fstinv_(b[i], &n, zt, &nn);
    free(zt);
  }

  #pragma omp for
  for (j=0; j < len[rank]; j++) {
  #pragma omp parallel for schedule(static)
    for (i=0; i < m; i++) {
      b[j][i] = b[j][i]/(diag[i]+diag[j+disp[rank]]);
    }
  }

  #pragma omp parallel for schedule(static)
  for (i=0; i < len[rank]; i++) {
    Real* zt  = createRealArray (nn);
    fst_(b[i], &n, zt, &nn);
    free(zt);
  }

  transpose(b, size, len, disp, rank, m);

  #pragma omp parallel for schedule(static)
  for (j=0; j < len[rank]; j++) {
    Real* zt  = createRealArray (nn);
    fstinv_(b[j], &n, zt, &nn);
    free(zt);
  }




  if (rank==0)
  {
    RecvBuf = createRealArray (m*m);
  }
  gatherMatrix(b, m, RecvBuf, len, disp,0);

  if (rank==0)
  {
    for (int j=0; j < m; j++) {
      for (int i=0; i < m; i++) {
        printf("%e %e %e \n",(Real)i/m,(Real)j/m,RecvBuf[j*m+i] );
      }
    }
  }
}
Beispiel #25
0
/***********************************************************************//**
 * @brief Test Cholesky decomposition
 ***************************************************************************/
void TestGSymMatrix::matrix_cholesky(void)
{
    // Test Cholesky decomposition
	GSymMatrix cd           = cholesky_decompose(m_test);
	GMatrix    cd_lower     = cd.extract_lower_triangle();
	GMatrix    cd_upper     = transpose(cd_lower);
	GMatrix    cd_product   = cd_lower * cd_upper;
	GMatrix    cd_residuals = GMatrix(m_test) - cd_product;
	double res = (abs(cd_residuals)).max();
    test_value(res, 0.0, 1.0e-15, "Test cholesky_decompose() method");

    // Test compressed Cholesky decomposition
    GSymMatrix test_zero         = set_matrix_zero();
	GSymMatrix cd_zero           = cholesky_decompose(test_zero);
	GMatrix    cd_zero_lower     = cd_zero.extract_lower_triangle();
	GMatrix    cd_zero_upper     = transpose(cd_zero_lower);
	GMatrix    cd_zero_product   = cd_zero_lower * cd_zero_upper;
	GMatrix    cd_zero_residuals = GMatrix(test_zero) - cd_zero_product;
	res = (abs(cd_zero_residuals)).max();
    test_value(res, 0.0, 1.0e-15, "Test compressed cholesky_decompose() method");

	// Test Cholesky inplace decomposition
	GSymMatrix test = m_test;
    test.cholesky_decompose();
	GMatrix cd_lower2 = test.extract_lower_triangle();
    test_assert((cd_lower2 == cd_lower), "Test inplace cholesky_decompose() method");

    // Test Cholesky solver (first test)
	GVector e0(g_rows);
	GVector a0(g_rows);
	e0[0] = 1.0;
	e0[1] = 0.0;
	e0[2] = 0.0;
	a0[0] = g_matrix[0];
	a0[1] = g_matrix[3];
	a0[2] = g_matrix[6];
	GVector s0 = cd.cholesky_solver(a0) - e0;
	res = max(abs(s0));
    test_value(res, 0.0, 1.0e-15, "Test cholesky_solver() method");

    // Test Cholesky solver (second test)
	e0[0] = 0.0;
	e0[1] = 1.0;
	e0[2] = 0.0;
	a0[0] = g_matrix[1];
	a0[1] = g_matrix[4];
	a0[2] = g_matrix[7];
	s0 = cd.cholesky_solver(a0) - e0;
	res = max(abs(s0));
    test_value(res, 0.0, 1.0e-15, "Test cholesky_solver() method");

    // Test Cholesky solver (third test)
	e0[0] = 0.0;
	e0[1] = 0.0;
	e0[2] = 1.0;
	a0[0] = g_matrix[2];
	a0[1] = g_matrix[5];
	a0[2] = g_matrix[8];
	s0 = cd.cholesky_solver(a0) - e0;
	res = max(abs(s0));
    test_value(res, 0.0, 1.0e-15, "Test cholesky_solver() method");

    // Test compressed Cholesky solver (first test)
	e0 = GVector(g_rows+1);
	a0 = GVector(g_rows+1);
	e0[0] = 1.0;
	e0[1] = 0.0;
	e0[2] = 0.0;
	e0[3] = 0.0;
	a0[0] = g_matrix[0];
	a0[1] = g_matrix[3];
	a0[2] = 0.0;
	a0[3] = g_matrix[6];
	s0    = cd_zero.cholesky_solver(a0) - e0;
	res   = max(abs(s0));
    test_value(res, 0.0, 1.0e-15, "Test compressed cholesky_solver() method");

    // Test compressed Cholesky solver (second test)
	e0[0] = 0.0;
	e0[1] = 1.0;
	e0[2] = 0.0;
	e0[3] = 0.0;
	a0[0] = g_matrix[1];
	a0[1] = g_matrix[4];
	a0[2] = 0.0;
	a0[3] = g_matrix[7];
	s0    = cd_zero.cholesky_solver(a0) - e0;
	res   = max(abs(s0));
    test_value(res, 0.0, 1.0e-15, "Test compressed cholesky_solver() method");

    // Test compressed Cholesky solver (third test)
	e0[0] = 0.0;
	e0[1] = 0.0;
	e0[2] = 0.0;
	e0[3] = 1.0;
	a0[0] = g_matrix[2];
	a0[1] = g_matrix[5];
	a0[2] = 0.0;
	a0[3] = g_matrix[8];
	s0    = cd_zero.cholesky_solver(a0) - e0;
	res   = max(abs(s0));
    test_value(res, 0.0, 1.0e-15, "Test compressed cholesky_solver() method");

	// Test Cholesky inverter
	GSymMatrix unit(g_rows,g_cols);
	unit(0,0) = unit(1,1) = unit(2,2) = 1.0;
	GSymMatrix test_inv = m_test;
	test_inv.cholesky_invert();
    GMatrix ci_product   = m_test * test_inv;
    GMatrix ci_residuals = ci_product - unit;
	res = (abs(ci_residuals)).max();
    test_value(res, 0.0, 1.0e-15, "Test cholesky_invert method");

	// Test Cholesky inverter for compressed matrix
	unit = GSymMatrix(4,4);
	unit(0,0) = unit(1,1) = unit(3,3) = 1.0;
	GSymMatrix test_zero_inv = test_zero;
	test_zero_inv.cholesky_invert();
    GMatrix ciz_product   = test_zero * test_zero_inv;
    GMatrix ciz_residuals = ciz_product - unit;
	res = (abs(ciz_residuals)).max();
    test_value(res, 0.0, 1.0e-15, "Test compressed cholesky_invert method");

    // Return
    return;
}
/* ----------------------------- MNI Header -----------------------------------
@NAME       : procrustes
@INPUT      : npoints - number of input point pairs
              ndim    - number of dimensions for each point
              Apoints - Matrix of point set 1 (in zero offset
                 form). The dimensions of this matrix should be defined
                 to be 1 to npoints and 1 to ndim (when calling the numerical
                 recipes routine matrix).
              Bpoints - Matrix of point set 2 (in zero offset
                 form). The dimensions of this matrix should be defined
                 to be 1 to npoints and 1 to ndim (when calling the numerical
                 recipes routine matrix).
@OUTPUT     : translation - zero offset vector (1 to ndim) that 
                 specifies the translation to be applied to Bpoints to line
                 up the centroid with that of Apoints. Calling routine must
                 allocate space for this vector.
              centre_of_rotation - zero offset vector (1 to ndim) that
                 specifies the centre of rotation and scaling (this is 
                 in fact only the centroid of Apoints). Calling routine must
                 allocate space for this vector.
              rotation - zero offset matrix (1 to ndim by 1 to ndim) 
                 to rotate translated Bpoints so that they line up with 
                 Apoints. Calling routine must allocate space for this 
                 matrix.
              scale - Scalar value giving global scaling to be applied to
                 translated and rotated Bpoints to match Apoints.
@RETURNS    : (nothing)
@DESCRIPTION: Calculates n-dimensional linear transformation from one set 
              of points to another, minimizing distance between equivalent
              points. Transformation from Bpoints to Apoints is calculated.
@METHOD     : See Matrix Computations, Golub and Van Loan, pp. 425-426 and
              paper by Sibson, Robin, J.R.Statist.Soc. B(1978), Vol. 40,
              No. 2, pp 234-238.
              Steps of calculations are as follows :
                 1) Calculate translation that aligns the centroids of the
                    two point sets.
                 2) Calculate rotation/reflexion that minimizes residual.
                 3) Calculate scaling of points to minimize residual.
              The process can be broken into independent steps because the
              best translation aligns centroids independently of the choice
              of rotation/reflexion and scaling and the best rotation/reflexion
              can be found independently of scale (after the best translation
              has been found). (See Sibson for more).
@GLOBALS    : (none)
@CALLS      : calc_centroid
              translate
              transpose
              matrix_multiply
              svdcmp (zero offset)
              trace
@CREATED    : Long time ago (Sean Marrett)
@MODIFIED   : Some time later (Shyan Ku)
              Feb. 26, 1990 (Weiqian Dai)
              January 30, 1992 (Peter Neelin)
                 - complete rewrite for roughly NIL-abiding code. Modified
                 name and calling parameters.
---------------------------------------------------------------------------- */
void procrustes(int npoints, int ndim, 
                       float **Apoints, float **Bpoints,
                       float *translation, float *centre_of_rotation,
                       float **rotation, float *scale)
{
   int i;
   float *Atranslation, *Btranslation, *svd_W;
   float **Ashift, **Bshift, **Atranspose, **Btranspose;
   float **svd_U, **svd_V, **svd_VT;
   float **Brotated, **product;
   float trace1, trace2;
                                   
   /* Get the vectors for centroids */
   Atranslation=vector(1,ndim);
   Btranslation=vector(1,ndim);
   svd_W=vector(1,ndim);

   /* Get various matrices */
   Ashift=matrix(1,npoints,1,ndim);
   Bshift=matrix(1,npoints,1,ndim);
   Atranspose=matrix(1,ndim,1,npoints);
   Btranspose=matrix(1,ndim,1,npoints);
   svd_U=matrix(1,ndim,1,ndim);
   svd_V=matrix(1,ndim,1,ndim);
   svd_VT=matrix(1,ndim,1,ndim);
   Brotated=matrix(1,npoints,1,ndim);
   product=matrix(1,npoints,1,npoints);

   /* Calculate the centroids, remove them from A and B points and
    save the translation */

   calc_centroid(npoints, ndim, Apoints, centre_of_rotation); 
   for (i=1; i<=ndim; i++) Atranslation[i] = -centre_of_rotation[i];
   translate(npoints, ndim, Apoints, Atranslation, Ashift);
   calc_centroid(npoints, ndim, Bpoints, Btranslation); 
   for (i=1; i<=ndim; i++) Btranslation[i] *= -1;
   translate(npoints, ndim, Bpoints, Btranslation, Bshift);

   for (i=1; i<=ndim; i++) translation[i] = Btranslation[i] - Atranslation[i];


   /* Calculate the rotation/reflexion matrix */

   transpose(npoints, ndim, Bshift, Btranspose);
   matrix_multiply(ndim, npoints, ndim, Btranspose, Ashift, svd_U);
   svdcmp(svd_U, ndim, ndim, svd_W, svd_V);
   transpose(ndim, ndim, svd_V, svd_VT);
   matrix_multiply(ndim, ndim, ndim, svd_U, svd_VT, rotation);


   /* Calculate the scale */

   matrix_multiply(npoints, ndim, ndim, Bshift, rotation, Brotated);
   transpose(npoints, ndim, Ashift, Atranspose);
   matrix_multiply(npoints, ndim, npoints, Brotated, Atranspose, product);
   trace1 = trace(npoints, product);
   matrix_multiply(npoints, ndim, npoints, Bshift, Btranspose, product);
   trace2 = trace(npoints, product);
   if (trace2 != 0.0) {
      *scale = trace1 / trace2;
   }
   else {
      *scale = 0.0;
   }


   /* transpose back the rotation matrix */

   transpose(ndim, ndim, rotation, rotation);

   /* Free vectors */
   free_vector(Atranslation,1,ndim);
   free_vector(Btranslation,1,ndim);
   free_vector(svd_W,1,ndim);

   /* Free matrices */
   free_matrix(Ashift,1,npoints,1,ndim);
   free_matrix(Bshift,1,npoints,1,ndim);
   free_matrix(Atranspose,1,ndim,1,npoints);
   free_matrix(Btranspose,1,ndim,1,npoints);
   free_matrix(svd_U,1,ndim,1,ndim);
   free_matrix(svd_V,1,ndim,1,ndim);
   free_matrix(svd_VT,1,ndim,1,ndim);
   free_matrix(Brotated,1,npoints,1,ndim);
   free_matrix(product,1,npoints,1,npoints);
}
Beispiel #27
0
void build_relations(void)
{
  register int i;
  register int j;
  register int k;
  register short *rulep;
  register short *rp;
  register shifts *sp;
  register int length;
  register int nedges;
  register int done;
  register int state1;
  register int stateno;
  register int symbol1;
  register int symbol2;
  register short *shortp;
  register short *edge;
  register short *states;
  register short **new_includes;

  includes = NEW2(ngotos, short *);
  edge = NEW2(ngotos + 1, short);
  states = NEW2(maxrhs + 1, short);

  for (i = 0; i < ngotos; i++)
    {
      nedges = 0;
      state1 = from_state[i];
      symbol1 = accessing_symbol[to_state[i]];

      for (rulep = derives[symbol1]; *rulep >= 0; rulep++)
        {
          length = 1;
          states[0] = state1;
          stateno = state1;

          for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++)
            {
              symbol2 = *rp;
              sp = shift_table[stateno];
              k = sp->nshifts;

              for (j = 0; j < k; j++)
                {
                  stateno = sp->shift[j];
                  if (accessing_symbol[stateno] == symbol2) break;
                }

              states[length++] = stateno;
            }

          add_lookback_edge(stateno, *rulep, i);

          length--;
          done = 0;
          while (!done)
            {
              done = 1;
              rp--;
              if (ISVAR(*rp))
                {
                  stateno = states[--length];
                  edge[nedges++] = map_goto(stateno, *rp);
                  if (nullable[*rp] && length > 0) done = 0;
                }
            }
        }

      if (nedges)
        {
          includes[i] = shortp = NEW2(nedges + 1, short);
          for (j = 0; j < nedges; j++)
            shortp[j] = edge[j];
          shortp[nedges] = -1;
        }
    }

  new_includes = transpose(includes, ngotos);

  for (i = 0; i < ngotos; i++)
    if (includes[i])
      FREE(includes[i]);

  FREE(includes);

  includes = new_includes;

  FREE(edge);
  FREE(states);
}
Beispiel #28
0
int main(int argc, char * argv[])
{
	parse_args(argc, argv);
	const int size = n * n;
	int data_size_bytes = size * sizeof(float);

	float *mat_a = malloc(data_size_bytes);
	float *mat_b = malloc(data_size_bytes);
	float *vector;
	float *output = malloc(data_size_bytes);

	float *expected = malloc(data_size_bytes);

	generate_matrix(n, mat_a, range);
	generate_matrix(n, mat_b, range);

	timing_t timer;
	timer_start(&timer);

	float *mat_b_trans = malloc(data_size_bytes);
	transpose(n, mat_b, mat_b_trans);

	for (int i=0; i<n; ++i) {
		vector = &mat_b_trans[n*i];
		MatMatMultiply(n, mat_a, vector, &output[n*i]);
	}
	float *output_trans = malloc(data_size_bytes);
	transpose(n, output, output_trans);

	timer_stop(&timer);
	float sum = sum_mat(size, output_trans);
	printf("%d %f %ld %ld\n", n, sum, timer.realtime, timer.cputime);

	int status = 0;

	if (trace == 1) {

		printf("\nMatrix A\n");
		for (int i=0; i<n; i++){
			for (int j=0; j<n; j++){
				printf("%f " , mat_a[i*n+j]);
			}
			printf("\n");
		}

		printf("\nMatrix B \n");
		for (int i=0; i<n; i++){
			for (int j=0; j<n; j++){
				printf("%f " , mat_b[i*n+j]);
			}
			printf("\n");
		}

		printf("\n\nResult\n");
		for (int i=0; i<n; i++){
			for (int j=0; j<n; j++){
				printf("%f " , output[i*n+j]);
			}
			printf("\n");
		}
	}

	else if (trace == 2) {
		multiply_CPU_matrix(n, mat_a, mat_b, expected);

		int status = check(size, output_trans, expected);
		if (status) {
			printf("Test failed.\n");
			status = 1;
		}
		else
			printf("Test passed OK!\n");

	}


	free(mat_a);
	free(mat_b);
	free(mat_b_trans);
	free(output);
	free(expected);
	free(output_trans);

	return status;
}
Beispiel #29
0
/* rank 2, in place, square transpose, tiled, buffered */
static void apply_ip_sq_tiledbuf(const plan *ego_, R *I, R *O)
{
     const P *ego = (const P *) ego_;
     UNUSED(O);
     transpose(ego->d, ego->rnk, ego->vl, I, X(transpose_tiledbuf));
}
Beispiel #30
0
// this function builds a projection matrix for rendering from the shadow's POV.
// First, it computes the appropriate z-range and sets an orthogonal projection.
// Then, it translates and scales it, so that it exactly captures the bounding box
// of the current frustum slice
float CShadowMap::ApplyCropMatrix(frustum &f)
{
	float shad_modelview[16];
	float shad_proj[16];
	float shad_crop[16];
	float shad_mvp[16];
	float maxX = -2000.0f;
    float maxY = -2000.0f;
	float maxZ;
    float minX =  2000.0f;
    float minY =  2000.0f;
	float minZ;

	matrix4<float> nv_mvp;
	vec4f transf;	
	
	// find the z-range of the current frustum as seen from the light
	// in order to increase precision
	glGetFloatv(GL_MODELVIEW_MATRIX, shad_modelview);
	nv_mvp.set_value(shad_modelview);
	
	// note that only the z-component is need and thus
	// the multiplication can be simplified
	// transf.z = shad_modelview[2] * f.point[0].x + shad_modelview[6] * f.point[0].y + shad_modelview[10] * f.point[0].z + shad_modelview[14];
	transf = nv_mvp*vec4f(f.point[0], 1.0f);
	minZ = -1000/*transf.z*/;
	maxZ = 1000/*transf.z*/;
	for(int i=1; i<8; i++)
	{
		transf = nv_mvp*vec4f(f.point[i], 1.0f);
		if(transf.z > maxZ) maxZ = transf.z;
		if(transf.z < minZ) minZ = transf.z;
	}
	// make sure all relevant shadow casters are included
	// note that these here are dummy objects at the edges of our scene
	for(int i=0; i<NUM_OBJECTS; i++)
	{
		transf = nv_mvp*vec4f(BSphere[i].center, 1.0f);
		if(transf.z + BSphere[i].radius > maxZ) maxZ = transf.z + BSphere[i].radius;
		//if(transf.z - BSphere[i].radius < minZ) minZ = transf.z - BSphere[i].radius;
	}
	
	glMatrixMode(GL_PROJECTION);
	glLoadIdentity();
	// set the projection matrix with the new z-bounds
	// note the inversion because the light looks at the neg. z axis
	// gluPerspective(LIGHT_FOV, 1.0, maxZ, minZ); // for point lights
	glOrtho(-1.0, 1.0, -1.0, 1.0, -maxZ, -minZ);
	glGetFloatv(GL_PROJECTION_MATRIX, shad_proj);
	glPushMatrix();
	glMultMatrixf(shad_modelview);
	glGetFloatv(GL_PROJECTION_MATRIX, shad_mvp);
	glPopMatrix();

	// find the extends of the frustum slice as projected in light's homogeneous coordinates
	nv_mvp.set_value(shad_mvp);
	for(int i=0; i<8; i++)
	{
		transf = nv_mvp*vec4f(f.point[i], 1.0f);

		transf.x /= transf.w;
		transf.y /= transf.w;

		if(transf.x > maxX) maxX = transf.x;
		if(transf.x < minX) minX = transf.x;
		if(transf.y > maxY) maxY = transf.y;
		if(transf.y < minY) minY = transf.y;
	}

	float scaleX = 2.0f/(maxX - minX);
	float scaleY = 2.0f/(maxY - minY);
	float offsetX = -0.5f*(maxX + minX)*scaleX;
	float offsetY = -0.5f*(maxY + minY)*scaleY;

	// apply a crop matrix to modify the projection matrix we got from glOrtho.
	nv_mvp.make_identity();
	nv_mvp.element(0,0) = scaleX;
	nv_mvp.element(1,1) = scaleY;
	nv_mvp.element(0,3) = offsetX;
	nv_mvp.element(1,3) = offsetY;
	transpose(nv_mvp);
	nv_mvp.get_value(shad_crop);
	glLoadMatrixf(shad_crop);
	glMultMatrixf(shad_proj);

	return minZ;
}