Example #1
0
    int cgs_solver::solve (sp_matrix_t matrix, spv_double sp_rhs, spv_double sp_sol)
    {
#ifdef _DEBUG
      BOSOUT (section::solvers, level::debug) << "CGS\n" << bs_end;
#endif
      BS_ERROR (sp_rhs->size (), "cgs_solve");
      BS_ERROR (sp_sol->size (), "cgs_solve");
      BS_ERROR (prop, "cgs_solve");

      t_double rho_1, rho_2 = 1, alpha = 1, beta, sigma;
      int iter;
      const double epsmac = 1e-24;
      t_double r_norm, b_norm, den_norm;
      //fp_type *x = solution;
      t_double *rhs = &(*sp_rhs)[0];
      t_double *sol = &(*sp_sol)[0];

      const t_double one = 1.0;
      //OMP_TIME_MEASURE_START (cgs_solve_timer);

      t_double tol = prop->get_f (tol_idx);
      tol *= tol;
      //resid = prop->get_residuals ();
      //convergence_rate = prop->get_convergence_rate ();

      int max_iter  = prop->get_i (max_iters_idx);
      t_long n    = matrix->get_n_rows () * matrix->get_n_block_size ();
      BS_ASSERT (n == (t_long)sp_sol->size ());
      
      t_double *p               = &(*sp_p)[0];
      t_double *phat            = &(*sp_phat)[0];
      t_double *v               = &(*sp_v)[0];
      t_double *tmp             = &(*sp_tmp)[0];
      t_double *q               = &(*sp_q)[0];
      t_double *u               = &(*sp_u)[0];
      t_double *d               = &(*sp_d)[0];
      t_double *dhat            = &(*sp_dhat)[0];
      t_double *r               = &(*sp_r)[0];
      t_double *rtilde          = &(*sp_rtilde)[0];
      //t_double *r_old           = &(*sp_r_old)[0];

      prop->set_b (success_idx, false);

      // solution = {0}
      //assign (solution, n, 0);
      memset (sol, 0, sizeof (t_double) * n);
      //solution.assign (n, 0);

      sp_p->resize (n);
      sp_phat->resize (n);
      sp_v->resize (n);
      sp_tmp->resize (n);
      sp_q->resize (n);
      sp_u->resize (n);
      sp_d->resize (n);
      sp_dhat->resize (n);
      sp_r->resize (n);
      sp_rtilde->resize (n);
      sp_r_old->resize (n);

      // r = {0}
      //r.assign (n, 0);
      memset (r, 0, sizeof (t_double) * n);
      memset (tmp, 0, sizeof (t_double) * n);
      memset (p, 0, sizeof (t_double) * n);
      memset (v, 0, sizeof (t_double) * n);
      memset (q, 0, sizeof (t_double) * n);

      
       // TODO:paste
      //tmp.assign (n, 0);
      //p.assign (n, 0);
      //v.assign (n, 0);
      //q.assign (n, 0);

      // p0 = u0 = r0;
      //u.assign (r.begin (), r.end ());
      memcpy (u, r, sizeof (t_double) * n);
      // TODO:end

      // r = Ax0 - b
      matrix->calc_lin_comb (-1.0, 1.0, sp_sol, sp_rhs, sp_r);
      //rtilde.assign (r.begin (), r.end ());
      memcpy (rtilde, r, sizeof (t_double) * n);

      //tools::save_seq_vector (tools::string_formater ("1_well_bhp.%s.txt", it->first).str).save (it->second);

      r_norm = mv_vector_inner_product_n (r, r, n);

      if (r_norm <= tol) // initial guess quite good
        return 0;

      rho_1 = r_norm;
      b_norm = sqrt (mv_vector_inner_product_n (rhs, rhs, n));

      // TODO:delete
      //p.assign      (r.begin (), r.end ());
      //rtilde.assign (r.begin (), r.end ());
      //v.assign      (n, 0);
      // TODO:end

      if (b_norm > epsmac) // choose convergence criterion
        {
          // |r_i|/|b| <= eps if |b| > 0
          tol *= b_norm;
          den_norm = b_norm;
        }
      else // (r_norm > epsmac)
        {
          // |r_i|/|r0| <= eps if |b| = 0
          tol *= r_norm;
          den_norm = r_norm;
        }

      // set up initial norm and convergense factor
      //prop->set_relative_factor (den_norm);

      // main loop
      for (iter = 0; iter < max_iter; ++iter)
        {
          //printf ("CGS iteration: %d, resid = %le\n", iter, r_norm);
          //fflush (stdout);
          // TODO: paste
          if (iter)
            {
              //rho_1 = (r,rtilde)
              rho_1 = mv_vector_inner_product_n (r, rtilde, n); //in first iter equals to r_norm
              if (rho_1 == 0) // failure
                {
                  if (den_norm > epsmac)
                    prop->set_f (final_res_idx, r_norm / den_norm);
                  else
                    prop->set_f (final_res_idx, r_norm);
                  bs_throw_exception ("CGS: Failure - rho_1 == 0");
                }
            }

           beta = rho_1/rho_2; // beta = rho_n/rho_n-1
           rho_2 = rho_1;

           // u = r + beta*q
           sum_vector_n (r, one, q, beta, u, n);
           // tmp = q+beta*p_old
           sum_vector_n (q, one, p, beta, tmp, n);
           // p_new = u + beta*tmp
           sum_vector_n (u, one, tmp, beta, p, n);

           //temp_p.assign (p.begin (), p.end ());
           if (prec)
             {
               if (prec->solve_prec (matrix, sp_p, sp_phat))
                 {
                   bs_throw_exception ("CGS: Preconditioner failed");
                 }
             }
           else // no precondition (preconditioner=identity_matrix)
             {
               memcpy (phat, p, sizeof (t_double) * n);
               //phat.assign (p.begin (), p.end ());
             }

          // v = A * phat = A * p, if no precondition;
          //v.assign (n, 0);
          memset (v, 0, sizeof (t_double) * n);

          matrix->matrix_vector_product (sp_phat, sp_v);
          // sigma = (v,rtilde)
          sigma = mv_vector_inner_product_n (rtilde, v, n);

          if (sigma > epsmac || sigma < -epsmac)
          // alpha = rho_1/sigma
            alpha = rho_1 / sigma;
          else // failure
            {
              if (den_norm > epsmac)
                prop->set_f (final_res_idx, r_norm / den_norm);
              else
                prop->set_f (final_res_idx, r_norm);
              bs_throw_exception ("CGS: Failure - sigma == 0");
            }

          // q = u - alpha*v
          sum_vector_n (u, one, v, -alpha, q, n);
          // d = u + q
          sum_vector_n (u, one, q, one, d, n);

          // dhat = M^(-1) * d;
          //temp_d.assign (d.begin (), d.end ());
          if (prec)
            {
              if(prec->solve_prec (matrix, sp_d, sp_dhat))
                {
                  bs_throw_exception ("CGS: Preconditioner failed");
                }
            }
          else // no precondition (preconditioner=identity_matrix)
            {
              //dhat.assign (d.begin (), d.end ());
              memcpy (dhat, d, sizeof (t_double) * n);
            }

          //tmp.assign (n, 0);
          memset (tmp, 0, sizeof (t_double) * n);
          // tmp = A*d
          matrix->matrix_vector_product (sp_dhat, sp_tmp);

          // r = r - alpha*tmp
          sum_vector_n (r, one, tmp, -alpha, r, n);
          // x = x + alpha*dhat
          sum_vector_n (sol, one, dhat, alpha, sol, n);

          r_norm = mv_vector_inner_product_n (r, r, n);


          if (r_norm <= tol) // && check_resid_for_matbalance (n_rows, nb, r, matb_tol))
            break;
     }

     //tools::save_seq_vector ("solution.txt").save(solution);

     //TODO: end
     prop->set_i (iters_idx, iter + 1);
     prop->set_b (success_idx, true);

      /*
      //additional checking convergence
      mv_calc_lin_comb (matrix, -1.0, 1.0, solution, rhs, r);
      r_norm = mv_vector_inner_product (r, r, n);
      */
      if (den_norm > epsmac)
        prop->set_f (final_res_idx, r_norm / den_norm);
      else
        prop->set_f (final_res_idx, r_norm);

      //printf ("CGS OK! iters = %d, resid = %le\n", lprop->iters, lprop->final_resid);
      //OMP_TIME_MEASURE_END (bicgstab_solve_timer);

      return 0;
    }
Example #2
0
    int tfqmr_solver::solve (sp_matrix_t matrix, spv_double sp_rhs, spv_double sp_sol)
    {
      BOSOUT (section::solvers, level::debug) << "TFQMR\n" << bs_end;

      BS_ERROR (matrix, "tfqmr_solve");
      BS_ERROR (sp_rhs->size (), "tfqmr_solve");
      BS_ERROR (sp_sol->size (), "tfqmr_solve");
      BS_ERROR (prop, "tfqmr_solve");


      t_double rho_1, rho_2 = 1, alpha = 1, beta, sigma;
      int iter;
      const double epsmac = 1e-24;
      t_double r_norm, b_norm, den_norm, w_norm, eta, nu, tau, c;
      //fp_type *x = solution;

      //OMP_TIME_MEASURE_START (tfqmr_solve_timer);

      t_double *rhs = &(*sp_rhs)[0];
      t_double *sol = &(*sp_sol)[0];
      t_double tol = prop->get_f (tol_idx);
      tol *= tol;
      //resid = prop->get_residuals ();
      //convergence_rate = prop->get_convergence_rate ();

      int max_iter  = prop->get_i (max_iters_idx);
      t_long n         = matrix->get_n_rows () * matrix->get_n_block_size ();

      sp_p->resize (n);
      sp_v->resize (n);
      sp_w->resize (n);
      sp_u->resize (n);
      sp_q->resize (n);
      sp_d->resize (n);
      sp_res->resize (n);
      sp_r->resize (n);
      sp_rtilde->resize (n);
      sp_tmp->resize (n);
      sp_rhat->resize (n);
      sp_y->resize (n);
      //x_cgs = y + n;

      prop->set_b (success_idx, false);

      // solution = {0}
      sp_sol->assign (0);
      // r = {0}
      sp_r->assign (0);
       // TODO:paste
      sp_tmp->assign (0);
      sp_p->assign (0);
      sp_v->assign (0);
      sp_q->assign (0);
      sp_d->assign (0);

      t_double *r              = &(*sp_r)[0];
      t_double *p              = &(*sp_p)[0];
      t_double *v              = &(*sp_v)[0];
      t_double *w              = &(*sp_w)[0];
      t_double *u              = &(*sp_u)[0];
      t_double *q              = &(*sp_q)[0];
      t_double *d              = &(*sp_d)[0];
      t_double *res            = &(*sp_res)[0];
      t_double *rtilde         = &(*sp_rtilde)[0];
      t_double *tmp            = &(*sp_tmp)[0];
      t_double *rhat           = &(*sp_rhat)[0];
      t_double *y              = &(*sp_y)[0];

      // r = Ax0 - b
      matrix->calc_lin_comb (-1.0, 1.0, sp_sol, sp_rhs, sp_r);
      memcpy (rtilde, r, sizeof (t_double) * n);
      //rtilde.assign (r.begin (), r.end ());

      // p0 = u0 = r0;
      //memcpy (p, r, n * sizeof (double));
      memcpy (u, r, sizeof (t_double) * n);
      memcpy (p, r, sizeof (t_double) * n);
      memcpy (w, r, sizeof (t_double) * n);
      //u.assign (r.begin (), r.end ());
      //p.assign (r.begin (), r.end ());
      //w.assign (r.begin (), r.end ());

      // tmp = M^(-1) * u;
      if (prec)
        {
          if (prec->solve_prec (matrix, sp_u, sp_tmp))
            {
              bs_throw_exception ("TFQMR: Preconditioner failed");
            }
              memcpy (u, tmp, sizeof (t_double) * n);
              memcpy (p, tmp, sizeof (t_double) * n);
	      //u.assign (tmp.begin (), tmp.end ());
	      //p.assign (u.begin (), u.end ());
        }

      matrix->matrix_vector_product (sp_p, sp_v);

      //tools::save_seq_vector (tools::string_formater ("1_well_bhp.%s.txt", it->first).str).save (it->second);

      r_norm = mv_vector_inner_product_n (r, r, n);


      if (r_norm <= tol) // initial guess quite good
        return 0;

      tau = sqrt (r_norm);
      rho_1 = r_norm;
      rho_2 = r_norm;
      b_norm = sqrt (mv_vector_inner_product_n (rhs, rhs, n));


      if (b_norm > epsmac) // choose convergence criterion
        {
          // |r_i|/|b| <= eps if |b| > 0
          tol *= b_norm;
          den_norm = b_norm;
        }
      else // (r_norm > epsmac)
        {
          // |r_i|/|r0| <= eps if |b| = 0
          tol *= r_norm;
          den_norm = r_norm;
        }

      // set up initial norm and convergense factor
      //prop->set_relative_factor (den_norm);

      int m, count;
      // main loop
      for (iter = 0; iter < max_iter; ++iter)
        {
          //printf ("TFQMR iteration: %d, resid = %le\n", iter, r_norm);
          //fflush (stdout);
          // TODO: paste
          if (iter)
            {
             //rho_1 = mv_vector_inner_product (r, rtilde, n);//in first iter equals to r_norm
              if (rho_1 == 0) // failure
                {
                  if (den_norm > epsmac)
                    prop->set_f (final_res_idx, r_norm / den_norm);
                  else
                    prop->set_f (final_res_idx, r_norm);
                  
                  bs_throw_exception ("TFQMR: Failure - rho_1 == 0");
                }
               sum_vector_n (u, (t_double)1., res, beta, p, n); //p[n] = u[n]+beta*res

               //v.assign (n, 0);
               memset (v, 0, sizeof (t_double) * n);
               matrix->matrix_vector_product (sp_p, sp_v); //v[n]=Ap[n]
             }

           sigma = mv_vector_inner_product_n (rtilde, v, n); //sigma=(rtilde,v[n-1])

           alpha = rho_1/sigma;

           // tmp = M^(-1)*v
           if (prec)
             {
               if (prec->solve_prec (matrix, sp_v, sp_tmp))
                {
                  bs_throw_exception ("TFQMR: Preconditioner failed");
                }
                   memcpy (v, tmp, sizeof (t_double) * n);
	           //v.assign (tmp.begin (), tmp.end ());
             }

           sum_vector_n (u, (t_double)1., v, -alpha, q, n); //q[n] = u[n-1]-alpha*v[n-1]
           sum_vector_n (u, (t_double)1., q, (t_double)1., res, n); //res = u[n-1]+q[n]

           //tmp.assign (n, 0);
           memset (tmp, 0, sizeof (t_double) * n);
           matrix->matrix_vector_product (sp_res, sp_tmp);// tmp=A*res
           sum_vector_n (r, (t_double)1., tmp, -alpha, r, n);// r=r-alpha*res

           //r_norm_old = r_norm;
           r_norm = mv_vector_inner_product_n (r, r, n);

           for (m = 1; m <= 2 ; m++)
             {
               if (m == 1) // m is odd
                 {
                   memcpy (y, u, sizeof (t_double) * n);
                   //y.assign (u.begin (), u.end ());
                   w_norm = sqrt(r_norm * r_norm);
                 }
               else // m is even
                 {
                   memcpy (y, q, sizeof (t_double) * n);
                   //y.assign (q.begin (), q.end ());
                   w_norm = sqrt(r_norm);
                 }

               sum_vector_n (y, (t_double)1., d, eta*nu*nu/alpha, d, n); //d[m] = y[m] + (eta[m-1]*nu[m-1]^2/alpha[n-1])*d[m-1]
               nu = w_norm/tau; //nu[m]=||w[m+1]||/tau[m-1]
               c = 1./sqrt (1. + nu*nu);
               tau = tau*c*nu; //tau[m]=tau[m-1]nu[m]c[m]
               eta = c*c*alpha; //eta[m]=c[m]^2*alpha[n-1]
               //SUM_VECTOR(x,d,1,alpha,x_cgs,k,n); //x_cgs[n] = x[2n-1]+alpha[n-1]*d[2n]
               sum_vector_n (sol, (t_double)1., d, eta, sol, n); //x[m] = x[m-1]+eta[m]*d[m]
               if (r_norm <= tol)
                 {
                   count = 1;
                   break;
                 }
             }

           if (r_norm <= tol)
             break;

           rho_1 = mv_vector_inner_product_n (r, rtilde, n);//in first iter equals to r_norm
           beta = rho_1 / rho_2;

           // rhat = M^(-1) * r;
           if (prec)
             {
               if (prec->solve_prec (matrix, sp_r, sp_rhat))
                {
                  bs_throw_exception ("TFQMR: Preconditioner failed");
                }
             }
           else // no precondition (preconditioner=identity_matrix)
             {
               memcpy (rhat, r, sizeof (t_double) * n);
               //rhat.assign (r.begin (), r.end ());
             }

          sum_vector_n (rhat, (t_double)1., q, beta, u, n); //u[n] = r[n]+beta*q[n]
          sum_vector_n (q, (t_double)1., p, beta, res, n); //res = q[n]+beta*p[n-1]

          rho_2 = rho_1;
     }

     //TODO: end
     prop->set_i (iters_idx, iter + 1);
     prop->set_b (success_idx, true);

      /*
      //additional checking convergence
      mv_calc_lin_comb (matrix, -1.0, 1.0, solution, rhs, r);
      r_norm = mv_vector_inner_product (r, r, n);
      */
      if (den_norm > epsmac)
        prop->set_f (final_res_idx, r_norm / den_norm);
      else
        prop->set_f (final_res_idx, r_norm);

      //printf ("TFQMR OK! iters = %d, resid = %le\n", lprop->iters, lprop->final_resid);
      //OMP_TIME_MEASURE_END (tfqmr_solve_timer);

      return 0;
    }
Example #3
0
  int 
  bcsr_ilu_prec::solve(sp_matrix_t matrix, spv_double sp_rhs, spv_double sp_sol)
  {
    BS_ASSERT (matrix);
    BS_ASSERT (sp_rhs->size ());
    BS_ASSERT (sp_sol->size ());
    BS_ASSERT (sp_rhs->size () == sp_sol->size ()) (sp_rhs->size ()) (sp_sol->size ());

    t_long b_sqr; 
    t_long n;     
    t_long nb;    
    t_double *rhs = &(*sp_rhs)[0];
    t_double *sol = &(*sp_sol)[0];

    sp_bcsr_matrix_t ilu;

    if (dynamic_cast<bcsr_matrix_iface_t*> (matrix.lock ()))
      {
        ilu = matrix;
        BS_ASSERT (ilu);
      }

    //const item_array_t &values    = ilu.get_values   ();
    bool ff = prop->get_b (use_internal_matrix);

    spv_float spvalues     = ff ? lu_matrix->get_values () : ilu->get_values   ();
    spv_long sp_rows                = ff ? lu_matrix->get_rows_ptr () : ilu->get_rows_ptr ();           
    spv_long sp_cols                = ff ? lu_matrix->get_cols_ind () : ilu->get_cols_ind ();           
    t_float *values           = &(*spvalues)[0];
    t_long *rows                      = &(*sp_rows)[0];
    t_long *cols                      = &(*sp_cols)[0];

    if (ff)
      {
        n         = lu_matrix->get_n_rows ();
        nb        = lu_matrix->get_n_block_size ();
      }
    else
      {

        n         = ilu->get_n_rows ();
        nb        = ilu->get_n_block_size ();
      }
    b_sqr = nb * nb;

    t_long j, k, l;
    t_long j1, j2;
    //const fp_type *D_block, *M_block;
    
    const t_float *D_block, *M_block;

    t_double *v, *r;

    memcpy (sol, rhs, sp_rhs->size () * sizeof (rhs[0]));

#ifdef _DEBUG
    t_double *sol_ = &sol[0];
#endif
    // solve Ly = b
    r = &sol[0];
    for (k = 0; k < n; ++k, r += nb)
      {
        // pointer to matrix row
        j1          = rows[k];
        j2          = j1;//diag_ind[k];
        t_long j3  = rows[k + 1];
        for (j = j1 + 1; j < j3; ++j)
          {
            l = cols[j];
            if (l < k)
              {
                v = &sol[l * nb];
                M_block = &values[j * b_sqr];
                LU_FIND_ROOT_UPDATE (nb, M_block, v, r);
              }
            else
              break;
          }
        BS_ASSERT (j1 == j2);
        BS_ASSERT (cols[j2] == k) (cols[j2]) (k);
        D_block = &values[j2 * b_sqr];
        uLU_FIND_ROOT_L (nb, D_block, r);
      }
    // solve Ux = y
    r = &sol [(n - 1) * nb];
    for (k = n - 1; k >= 0; --k, r -= nb)
      {
        // find element in k column
        t_long j0  = rows[k];
        j1          = j0;//diag_ind[k];
        j2          = rows[k + 1];
        for (j = j0; j < j2; ++j)
          {
            l = cols[j];
            if (l > k)
              {
                v = &sol[l * nb];
                M_block = &values[j * b_sqr];
                LU_FIND_ROOT_UPDATE (nb, M_block, v, r);
              }
          }
        BS_ASSERT (j0 == j1);
        BS_ASSERT (cols[j1] == k) (cols[j1]) (k);
        D_block = &values[j1 * b_sqr];
        uLU_FIND_ROOT_U (nb, D_block, r);
      }

    return 0;
  }
Example #4
0
    int gs_solver::solve (sp_matrix_t matrix, spv_double sp_rhs, spv_double sp_sol)
    {
      BS_ERROR (matrix, "gs_solve");
      BS_ERROR (sp_rhs->size (), "gs_solve");
      BS_ERROR (sp_sol->size (), "gs_solve");
      BS_ERROR (prop, "gs_solve");

      int iter;
      const double epsmac = 1e-24;
      t_double r_norm, b_norm, den_norm;

      t_double *rhs = &(*sp_rhs)[0];
      //t_double *sol = &(*sp_sol)[0];

      sp_bcsr_t bcsr;
      if (!dynamic_cast<bcsr_t *> (matrix.lock ()))
        {
          bcsr = matrix;
          BS_ASSERT (bcsr);
        }

      t_long n = matrix->get_n_rows () * matrix->get_n_block_size ();
      spv_long flags;

      t_double tol = prop->get_f (tol_idx);
      tol *= tol;

      int max_iter  = prop->get_i (max_iters_idx);
      prop->reset_i (cf_type_idx);

      prop->set_b (success_idx, false);

      matrix->init_vector (sp_r);
      t_double *r = &(*sp_r)[0];

      matrix->calc_lin_comb (-1.0, 1.0, sp_sol, sp_rhs, sp_r);
      r_norm = mv_vector_inner_product_n (r, r, n);

      b_norm = sqrt (mv_vector_inner_product_n (rhs, rhs, n));

      if (b_norm > epsmac) // choose convergence criterion
        {
          // |r_i|/|b| <= eps if |b| > 0
          tol *= b_norm;
          den_norm = b_norm;
        }
      else // (r_norm > epsmac)
        {
          // |r_i|/|r0| <= eps if |b| = 0
          tol *= r_norm;
          den_norm = r_norm;
        }

      // main loop
      for (iter = 0; iter < max_iter; ++iter)
        {

          smooth (bcsr, flags, 1, sp_rhs, sp_sol);
          matrix->calc_lin_comb (-1.0, 1.0, sp_sol, sp_rhs, sp_r);
          r_norm = mv_vector_inner_product_n (r, r, n);
          if (r_norm <= tol) // initial guess quite good
            break;
        } // end of main loop

      if (iter < max_iter)
        {
          prop->set_i (iters_idx, iter + 1);
          prop->set_b (success_idx, true);
        }
      else
        {
          prop->set_i (iters_idx, iter + 1);
          prop->set_b (success_idx, false);
        }

      if (den_norm > epsmac)
        prop->set_f(final_res_idx, r_norm / den_norm);
      else
        prop->set_f (final_res_idx, r_norm);

      BOSOUT (section::solvers, level::low) << "r_norm = " << r_norm << " r_norm / den_norm = " << r_norm / den_norm << " iter = " << iter << bs_end;

      return 0;
    }
Example #5
0
  /**
   * \brief setup preconditioner (merge matrices if needed)
   *
   * \param matrix Various matrix
   * \return 0 if success
   */
  int
  bcsr_ilu_prec::setup (sp_matrix_t matrix)
  {
    BS_ASSERT (matrix);


    t_long n;     
    t_long nb;    
    t_long b_sqr; 
    bool ff = prop->get_b (use_internal_matrix);


    sp_bcsr_matrix_t ilu;

    if (dynamic_cast<bcsr_matrix_iface_t *> (matrix.lock ()))
      {
        ilu = matrix;
        BS_ASSERT (ilu);
      }

    if (ff)
      {
        if (!lu_matrix)
          {
            lu_matrix = BS_KERNEL.create_object (matrix->bs_resolve_type ());
            if (!lu_matrix)
              {
                bs_throw_exception ("Can not create matrix");
              }
          }
        if (lu_matrix->copy (ilu))
          {
            bs_throw_exception ("Can not make matrix copy");
          }

        n              = lu_matrix->get_n_rows ();
        nb             = lu_matrix->get_n_block_size ();
      }
    else
      {
        n              = ilu->get_n_rows ();
        nb             = ilu->get_n_block_size ();
      }

    spv_long sp_ilu_rows                    = ff ? lu_matrix->get_rows_ptr () : ilu->get_rows_ptr ();       
    spv_long sp_ilu_cols                    = ff ? lu_matrix->get_cols_ind () : ilu->get_cols_ind ();       
    spv_float  sp_ilu_values        = ff ? lu_matrix->get_values ()   : ilu->get_values (); 

    t_long *ilu_rows                          = &(*sp_ilu_rows)[0];
    t_long *ilu_cols                          = &(*sp_ilu_cols)[0];
    t_float *ilu_values               = &(*sp_ilu_values)[0];

    b_sqr          = nb * nb;

    // common
    //int r_code = 0;
    t_long i, j, j1, j2, j3, cl, k;

    t_float *block;



    //ilu.ascii_write_in_csr_format ("ilu_set_internal");

    //  ilu.write_matrix_to_file ("matrix.out");
    // ----------------------
    // STAGE 3: build ilu factorization
    t_long i_str;
    //fp_type *d_block, *dd_block;
    t_float *d_block, *dd_block;
    t_double d;
    t_long jj, jj1, jj2;

    // loop through all rows
    for (i = 0; i < n; ++i)
      {
        // update i-th row by 0..i-1 row
        j1 = ilu_rows[i];
        j2 = j1;//ilu_diag_ind[i];
        j3 = ilu_rows[i + 1];

        BS_ASSERT (j1 == j2) (j1) (j2);

        // for all nonzero elements in i-th string (in L part)
        // update it by diagonal element and update all other nonzero elements
        // in i-th string
        for (j = j1 + 1; j < j3; ++j)
          {
            // find corresponding diagonal element
            i_str = ilu_cols[j];
            if (i_str < i)
              {
                // update by diagonal element
                block   = &ilu_values[j * b_sqr];
                d_block = &ilu_values[ilu_rows[i_str]/*ilu_diag_ind[i_str]*/ * b_sqr];
                //BS_ASSERT (ilu_diag_ind[i_str] == ilu_rows[i_str]) (ilu_diag_ind[i_str]) (ilu_rows[i_str]);
                uLU_SEEK_L (nb, d_block, block, d);

                jj1 = ilu_rows[i_str];
                jj2 = ilu_rows[i_str + 1];
                k = j + 1;

                for (jj = jj1; jj < jj2; ++jj)
                  {
                    cl = ilu_cols[jj];
                    if (cl <= i_str)
                      continue;

                    if (ilu_cols[j1] == cl)
                      {
                        // upgrade by corresponding element
                        d_block   = &ilu_values[jj * b_sqr];
                        dd_block  = &ilu_values[j1 * b_sqr];
                        LU_UPGRADE(nb, block, d_block, dd_block);
                      }
                    for (; k < j3 && ilu_cols[k] < cl; ++k)
                      ;

                    if (k < j3 && ilu_cols[k] == cl)
                      {
                        // upgrade by corresponding element
                        d_block   = &ilu_values[jj * b_sqr];
                        dd_block  = &ilu_values[k * b_sqr];
                        LU_UPGRADE(nb, block, d_block, dd_block);
                      }
                    else if (k >= j3)
                      break;
                  }
              }
            else 
              break;
          }

        // factorize i-th string
        block = &ilu_values[j2 * b_sqr];
        uLU (nb, block, d);

        for (j = j1; j < j3; ++j)
          {
            t_long cl = ilu_cols[j];
            if (cl > i)
              {
                d_block = &ilu_values[j * b_sqr];
                uLU_SEEK_U (nb, block, d_block);
              }
          }
      }
    // ----------------------
    return 0;
  }