void qrfactor(matrix & Q, matrix & R, matrix & betas, const matrix & A){
  int i, j, k, l, N, M; i = 0; j = 0; k = 0; l = 0; N = 0; M = 0;
  fpp beta, temp; beta = 0.0; temp = 0.0;

  N = A.get_rows(); M = A.get_cols();

  matrix bigV(N,1), temp_row(N,1); zeros(bigV); zeros(temp_row);
  submatrix x, v;

  if((Q.get_rows() != N) || (Q.get_cols() != M) ||(R.get_rows() != N) || (R.get_cols() != M) || (betas.get_rows() != A.get_rows()) || (betas.get_cols() != 1)){
    std::cerr << "QR dimensions incompatible! Q(" << Q.get_rows() << "," << Q.get_cols() << "), R(" << R.get_rows() << "," << R.get_cols() << "), betas:(" << betas.get_rows() << "," << betas.get_cols() << "), A:(" << A.get_rows() << "," << A.get_cols() << ")." << std::endl;
    exit(-1);
  }

  R = A;
  zeros(Q);

  for(i = 0; i < N; i++){
    Q(i,i) = 1.0;
  }

  for(i = 0; i < N-1; i++){
    x.subcreate(R, i, i, N-i, 1);
    v.subcreate(bigV, i, 0, N-i, 1);
    house(v,beta,x);

    for(k = i; k < M; k++){
      for(j = i; j < N; j++){
	temp = 0.0;
	for(l = i; l < N; l++){
	  temp += beta*v(l-i,0)*v(j-i,0)*R(l,k);
	}
	temp_row(j,0) = R(j,k) - temp;
      }
      for(l = i; l < N; l++){
	R(l,k) = temp_row(l,0);
      }
    }

    for(k = 0; k < M; k++){
      for(j = i; j < N; j++){
	temp = 0.0;
	for(l = i; l < N; l++){
	  temp += beta*v(l-i,0)*v(j-i,0)*Q(l,k);
	}
	temp_row(j,0) = Q(j,k) - temp;
      }
      for(l = i; l < N; l++){
	Q(l,k) = temp_row(l,0);
      }
    }

    betas(i,0) = beta;
  }

} 
Ejemplo n.º 2
0
/**
 * Description not yet available.
 * \param
 */
dvar_matrix operator*(const dvar_matrix& m1, const dmatrix& cm2)
 {
   if (m1.colmin() != cm2.rowmin() || m1.colmax() != cm2.rowmax())
   {
     cerr << " Incompatible array bounds in "
     "dmatrix operator*(const dvar_matrix& x, const dmatrix& m)\n";
     ad_exit(21);
   }
   dmatrix cm1=value(m1);
   //dmatrix cm2=value(m2);
   dmatrix tmp(m1.rowmin(),m1.rowmax(), cm2.colmin(), cm2.colmax());
#ifdef OPT_LIB
   const size_t rowsize = (size_t)cm2.rowsize();
#else
   const int _rowsize = cm2.rowsize();
   assert(_rowsize > 0);
   const size_t rowsize = (size_t)_rowsize;
#endif
   try
   {
     double* temp_col = new double[rowsize];
     temp_col-=cm2.rowmin();
     for (int j=cm2.colmin(); j<=cm2.colmax(); j++)
     {
       for (int k=cm2.rowmin(); k<=cm2.rowmax(); k++)
       {
         temp_col[k] = cm2.elem(k,j);
       }
       for (int i=cm1.rowmin(); i<=cm1.rowmax(); i++)
       {
         double sum=0.0;
         dvector& temp_row = cm1(i);
         for (int k=cm1.colmin(); k<=cm1.colmax(); k++)
         {
           sum+=temp_row(k) * (temp_col[k]);
           // sum+=temp_row(k) * cm2(k,j);
         }
         tmp(i,j)=sum;
       }
     }
     temp_col+=cm2.rowmin();
     delete [] temp_col;
     temp_col = 0;
   }
   catch (std::bad_alloc& e)
   {
     cerr << "Error[" << __FILE__ << ':' << __LINE__
          << "]: Unable to allocate array.\n";
     //ad_exit(21);
     throw e;
   }
   dvar_matrix vtmp=nograd_assign(tmp);
   save_identifier_string("TEST1");
   //m1.save_dvar_matrix_value();
   m1.save_dvar_matrix_position();
   cm2.save_dmatrix_value();
   cm2.save_dmatrix_position();
   vtmp.save_dvar_matrix_position();
   save_identifier_string("TEST6");
   gradient_structure::GRAD_STACK1->
            set_gradient_stack(dmcm_prod);
   return vtmp;
 }
Ejemplo n.º 3
0
  void Basker<Int,Entry,Exe_Space>::btf_blk_amd
  (
   BASKER_MATRIX &M, 
   INT_1DARRAY p, 
   INT_1DARRAY btf_nnz, 
   INT_1DARRAY btf_work
  )
  {
   

    // printf("=============BTF_BLK_AMD_CALLED========\n");
    if(Options.incomplete == BASKER_TRUE)
      {
	//We note that AMD on incomplete ILUK
	//Seems realy bad and leads to a zero on the diag
	//Therefore, we simply return the natural ordering
	for(Int i = 0 ; i < M.ncol; i++)
	  {
	    p(i) = i;
	  }
	//We will makeup work to be 1, 
	//Since BTF is not supported in our iluk
	for(Int b = 0; b < btf_nblks; b++)
	  {
	    btf_nnz(b) = 1;
	    btf_work(b) =1;
	  }
       
	//printf("Short amd blk\n");

	return;
      }

 
    //p == length(M)
    //Scan over all blks
    //Note, that this needs to be made parallel in the 
    //future (Future Josh will be ok with this, right?)

    //This is a horrible way to do this!!!!!
    //KLU does this very nice, but they also make all the little blks
    INT_1DARRAY temp_col;
    MALLOC_INT_1DARRAY(temp_col, M.ncol+1);
    INT_1DARRAY temp_row;
    MALLOC_INT_1DARRAY(temp_row, M.nnz);
    //printf("Done with btf_blk_amd malloc \n");
    //printf("blks: %d \n" , btf_nblks);


    for(Int b = 0; b < btf_nblks; b++)
      {
	Int blk_size = btf_tabs(b+1) - btf_tabs(b);

	//printf("blk: %d blk_size: %d \n",
	//     b, blk_size);

	if(blk_size < 3)
	  {
	    
	    //printf("debug, blk_size: %d \n", blk_size);
	    for(Int ii = 0; ii < blk_size; ++ii)
	      {
		//printf("set %d \n", btf_tabs(b)+ii-M.scol);
		p(ii+btf_tabs(b)) = btf_tabs(b)+ii-M.scol;
	      }
	    btf_work(b) = blk_size*blk_size*blk_size;
	    btf_nnz(b)  = (.5*(blk_size*blk_size) + blk_size);
	    continue;
	  }
	
	INT_1DARRAY tempp;
	MALLOC_INT_1DARRAY(tempp, blk_size+1);
	
	
	//Fill in temp matrix
	Int nnz = 0;
	Int column = 1;
	temp_col(0) = 0;
	for(Int k = btf_tabs(b); k < btf_tabs(b+1); k++)
	  {
	    for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); i++)
	      {
		if(M.row_idx(i) < btf_tabs(b))
		  continue;
		  
		temp_row(nnz) = M.row_idx(i) - btf_tabs(b);
		nnz++;
	      }// end over all row_idx
	    temp_col(column) = nnz;
	    column++;
	  }//end over all columns k
	
	#ifdef BASKER_DEBUG_ORDER_AMD
	printf("col_ptr: ");
	for(Int i = 0 ; i < blk_size+1; i++)
	  {
	    printf("%d, ", temp_col(i));
	  }
	printf("\n");
	printf("row_idx: ");
	for(Int i = 0; i < nnz; i++)
	  {
	    printf("%d, ", temp_row(i));
	  }
	printf("\n");
	#endif


	double l_nnz = 0;
	double lu_work = 0;
	BaskerSSWrapper<Int>::amd_order(blk_size, &(temp_col(0)), 
					&(temp_row(0)),&(tempp(0)), 
					l_nnz, lu_work);


	btf_nnz(b)  = l_nnz;
	btf_work(b) = lu_work;
       
	
	#ifdef BASKER_DEBUG_ORDER_AMD
	printf("blk: %d order: \n", b);
	for(Int ii = 0; ii < blk_size; ii++)
	  {
	    printf("%d, ", tempp(ii));
	  }
	#endif

				     
	//Add to the bigger perm vector
	for(Int ii = 0; ii < blk_size; ii++)
	  {
	    //printf("loc: %d val: %d \n", 
	    //ii+btf_tabs(b), tempp(ii)+btf_tabs(b));

	    p(tempp(ii)+btf_tabs(b)) = ii+btf_tabs(b);
	  }


	FREE_INT_1DARRAY(tempp);
	
      }//over all blk_tabs

    #ifdef BASKER_DEBUG_AMD_ORDER
    printf("blk amd final order\n");
    for(Int ii = 0; ii < M.ncol; ii++)
      {
	printf("%d, ", p(ii));
      }
    printf("\n");
    #endif

    FREE_INT_1DARRAY(temp_col);
    FREE_INT_1DARRAY(temp_row);
    
  }//end blk_amd()
Ejemplo n.º 4
0
  void Basker<Int,Entry,Exe_Space>::blk_amd(BASKER_MATRIX &M, INT_1DARRAY p)
  {
    
    //p == length(M)
    //Scan over all blks
    //Note, that this needs to be made parallel in the 
    //future (Future Josh will be ok with this, right?)

    //This is a horrible way to do this!!!!!
    //KLU does this very nice, but they also make all the little blks
    INT_1DARRAY temp_col;
    MALLOC_INT_1DARRAY(temp_col, M.ncol+1);
    INT_1DARRAY temp_row;
    MALLOC_INT_1DARRAY(temp_row, M.nnz);


    for(Int b = btf_tabs_offset; b < btf_nblks; b++)
      {
	Int blk_size = btf_tabs(b+1) - btf_tabs(b);
	if(blk_size < 3)
	  {
	    
	    //printf("debug, blk_size: %d \n", blk_size);
	    for(Int ii = 0; ii < blk_size; ++ii)
	      {
		//printf("set %d \n", btf_tabs(b)+ii-M.scol);
		p(ii+btf_tabs(b)) = btf_tabs(b)+ii-M.scol;
	      }
	    continue;
	  }
	
	INT_1DARRAY tempp;
	MALLOC_INT_1DARRAY(tempp, blk_size+1);
	
	
	//Fill in temp matrix
	Int nnz = 0;
	Int column = 1;
	temp_col(0) = 0;
	for(Int k = btf_tabs(b); k < btf_tabs(b+1); k++)
	  {
	    for(Int i = M.col_ptr(k); i < M.col_ptr(k+1); i++)
	      {
		if(M.row_idx(i) < btf_tabs(b))
		  continue;
		  
		temp_row(nnz) = M.row_idx(i) - btf_tabs(b);
		nnz++;
	      }// end over all row_idx
	    temp_col(column) = nnz;
	    column++;
	  }//end over all columns k
	
	#ifdef BASKER_DEBUG_ORDER_AMD
	printf("col_ptr: ");
	for(Int i = 0 ; i < blk_size+1; i++)
	  {
	    printf("%d, ", temp_col(i));
	  }
	printf("\n");
	printf("row_idx: ");
	for(Int i = 0; i < nnz; i++)
	  {
	    printf("%d, ", temp_row(i));
	  }
	printf("\n");
	#endif


	BaskerSSWrapper<Int>::amd_order(blk_size, &(temp_col(0)), 
					&(temp_row(0)),&(tempp(0)));


	
	#ifdef BASKER_DEBUG_ORDER_AMD
	printf("blk: %d order: \n", b);
	for(Int ii = 0; ii < blk_size; ii++)
	  {
	    printf("%d, ", tempp(ii));
	  }
	#endif

				     
	//Add to the bigger perm vector
	for(Int ii = 0; ii < blk_size; ii++)
	  {
	    //printf("loc: %d val: %d \n", 
	    //ii+btf_tabs(b), tempp(ii)+btf_tabs(b));

	    p(tempp(ii)+btf_tabs(b)) = ii+btf_tabs(b);
	  }


	FREE_INT_1DARRAY(tempp);
	
      }//over all blk_tabs

    #ifdef BASKER_DEBUG_AMD_ORDER
    printf("blk amd final order\n");
    for(Int ii = 0; ii < M.ncol; ii++)
      {
	printf("%d, ", p(ii));
      }
    printf("\n");
    #endif

    FREE_INT_1DARRAY(temp_col);
    FREE_INT_1DARRAY(temp_row);
    
  }//end blk_amd()
Ejemplo n.º 5
0
void SeparableConvolution2d(const RowMatrixXf& image,
                            const Eigen::RowVectorXf& kernel_x,
                            const Eigen::RowVectorXf& kernel_y,
                            const BorderType& border_type,
                            RowMatrixXf* out) {
  const int full_size = kernel_x.size();
  const int half_size = full_size / 2;
  out->resize(image.rows(), image.cols());

  // Convolving a vertical filter across rows is the same thing as transpose
  // multiply i.e. kernel_y^t * rows. This will give us the convoled value for
  // each row. However, care must be taken at the top and bottom borders.
  const RowVectorXf reverse_kernel_y = kernel_y.reverse();

  if (border_type == REFLECT) {
    for (int i = 0; i < half_size; i++) {
      const int forward_size = i + half_size + 1;
      const int reverse_size = full_size - forward_size;
      out->row(i) = kernel_y.tail(forward_size) *
                    image.block(0, 0, forward_size, image.cols()) +
                    reverse_kernel_y.tail(reverse_size) *
                    image.block(1, 0, reverse_size, image.cols());

      // Apply the same technique for the end rows.
      // TODO(csweeney): Move this to its own loop for cache exposure?
      out->row(image.rows() - i - 1) =
          kernel_y.head(forward_size) * image.block(image.rows() - forward_size,
                                                    0, forward_size,
                                                    image.cols()) +
          reverse_kernel_y.head(reverse_size) *
          image.block(image.rows() - reverse_size - 1, 0, reverse_size,
                      image.cols());
    }
  } else {
    // Perform border with REPLICATE as the option.
    for (int i = 0; i < half_size; i++) {
      const int forward_size = i + half_size + 1;
      const int reverse_size = full_size - forward_size;
      out->row(i) = kernel_y.tail(forward_size) *
                        image.block(0, 0, forward_size, image.cols()) +
                    reverse_kernel_y.tail(reverse_size) *
                        image.row(0).replicate(reverse_size, 1);

      // Apply the same technique for the end rows.
      out->row(image.rows() - i - 1) =
          kernel_y.head(forward_size) * image.block(image.rows() - forward_size,
                                                    0, forward_size,
                                                    image.cols()) +
          reverse_kernel_y.head(reverse_size) *
          image.row(image.rows() - 1).replicate(reverse_size, 1);
    }
  }

  // Applying the rest of the y filter.
#ifdef AKAZE_USE_OPENMP
#pragma omp parallel for
#endif
  for (int row = half_size; row < image.rows() - half_size; row++) {
    out->row(row) =
        kernel_y * image.block(row - half_size, 0, full_size, out->cols());
  }

  // Convolving with the horizontal filter is easy. Rather than using the kernel
  // as a sliding indow, we use the row pixels as a sliding window around the
  // filter. We prepend and append the proper border values so that we are sure
  // to end up with the correct convolved values.
  if (border_type == REFLECT) {
    RowVectorXf temp_row(image.cols() + full_size - 1);
#ifdef AKAZE_USE_OPENMP
#pragma omp parallel for firstprivate(temp_row)
#endif
    for (int row = 0; row < out->rows(); row++) {
    temp_row.head(half_size) =
          out->row(row).segment(1, half_size).reverse();
      temp_row.segment(half_size, image.cols()) = out->row(row);
      temp_row.tail(half_size) =
          out->row(row)
          .segment(image.cols() - 1 - half_size, half_size)
          .reverse();

      // Convolve the row. We perform the first step here explicitly so that we
      // avoid setting the row equal to zero.
      out->row(row) = kernel_x(0) * temp_row.head(image.cols());
      for (int i = 1; i < full_size; i++) {
        out->row(row) += kernel_x(i) * temp_row.segment(i, image.cols());
      }
    }
  } else {
    RowVectorXf temp_row(image.cols() + full_size - 1);
#ifdef AKAZE_USE_OPENMP
#pragma omp parallel for firstprivate(temp_row)
#endif
    for (int row = 0; row < out->rows(); row++) {
      temp_row.head(half_size).setConstant((*out)(row, 0));
      temp_row.segment(half_size, image.cols()) = out->row(row);
      temp_row.tail(half_size).setConstant((*out)(row, out->cols() - 1));

      // Convolve the row. We perform the first step here explicitly so that we
      // avoid setting the row equal to zero.
      out->row(row) = kernel_x(0) * temp_row.head(image.cols());
      for (int i = 1; i < full_size; i++) {
        out->row(row) += kernel_x(i) * temp_row.segment(i, image.cols());
      }
    }
  }
}