INLINE QRFactorization<max_m, max_n> factorize_qr_householder( Int m, Int n, Matrix<max_m, max_n> a) { Few<Vector<max_m>, max_n> v; Real anorm = frobenius_norm(m, n, a); for (Int k = 0; k < n; ++k) { v[k] = householder_vector(m, a, anorm, k); reflect_columns(m, n, a, v[k], k); } auto r = reduced_r_from_full(n, a); return {v, r}; }
int main() { using value_type = double; const unsigned N = 5; const unsigned M = 5; const unsigned r = 3; flens::matrix<value_type> A(N,M); #if 0 A = 2.0; #endif #if 1 for (unsigned i = 0; i < num_rows(A); ++i) for (unsigned j = 0; j < num_cols(A); ++j) A(i,j) = fmmtl::random<value_type>::get(); #endif #if 0 A[1][1] = 1; A[1][3] = 2; A[3][1] = 1; A[3][3] = 4; #endif std::cout << "A = \n" << A << std::endl; flens::matrix<double> U, V; std::tie(U, V) = adaptive_cross_approx(A, 1e-10, r); std::cout << "FINAL RANK = " << num_cols(U) << std::endl; std::cout << "U = \n" << U << std::endl; std::cout << "V = \n" << V << std::endl; std::cout << "UV = \n" << flens::matrix<value_type>(U*V) << std::endl; flens::matrix<value_type> Res; Res = A - U*V; std::cout << "Residual = \n" << Res << std::endl; std::cout << "norm_F = " << frobenius_norm(Res) << std::endl; return 0; }
void STARPU_PLU(compute_lu_matrix)(unsigned size, unsigned nblocks, TYPE *Asaved) { TYPE *all_r = STARPU_PLU(reconstruct_matrix)(size, nblocks); unsigned display = STARPU_PLU(display_flag)(); int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (rank == 0) { TYPE *L = malloc((size_t)size*size*sizeof(TYPE)); TYPE *U = malloc((size_t)size*size*sizeof(TYPE)); memset(L, 0, size*size*sizeof(TYPE)); memset(U, 0, size*size*sizeof(TYPE)); /* only keep the lower part */ unsigned i, j; for (j = 0; j < size; j++) { for (i = 0; i < j; i++) { L[j+i*size] = all_r[j+i*size]; } /* diag i = j */ L[j+j*size] = all_r[j+j*size]; U[j+j*size] = 1.0; for (i = j+1; i < size; i++) { U[j+i*size] = all_r[j+i*size]; } } STARPU_PLU(display_data_content)(L, size); STARPU_PLU(display_data_content)(U, size); /* now A_err = L, compute L*U */ CPU_TRMM("R", "U", "N", "U", size, size, 1.0f, U, size, L, size); if (display) fprintf(stderr, "\nLU\n"); STARPU_PLU(display_data_content)(L, size); /* compute "LU - A" in L*/ CPU_AXPY(size*size, -1.0, Asaved, 1, L, 1); TYPE err = CPU_ASUM(size*size, L, 1); int max = CPU_IAMAX(size*size, L, 1); if (display) fprintf(stderr, "DISPLAY ERROR\n"); STARPU_PLU(display_data_content)(L, size); fprintf(stderr, "(A - LU) Avg error : %e\n", err/(size*size)); fprintf(stderr, "(A - LU) Max error : %e\n", L[max]); double residual = frobenius_norm(L, size); double matnorm = frobenius_norm(Asaved, size); fprintf(stderr, "||A-LU|| / (||A||*N) : %e\n", residual/(matnorm*size)); } }
friend value_type norm(const self& dp) { return frobenius_norm(dp.M); };
int spai_line (matrix *A, int col, int spar, int lower_diag, int upper_diag, double tau, matrix *M) { int s,nbq,nnz,dimr,block_width; double scalar_resnorm,block_resnorm,adjust_epsilon; int i,index,pe,len,ierr; int row_address; int *rptr; double *aptr; int j, k, ptr, low_c, up_c, ccol, row; int rlen; int *buf; int *rbuf; double *vbuf; double comp_max, tau_limit = 1 - tau; block_width = A->block_sizes[col]; adjust_epsilon = epsilon*sqrt((double) block_width); if (spar == 1) /* mark elements depending on tau parameter */ { comp_max = 0; /* find maximum in column resp. row if transposed */ for (j=0; j<A->lines->len[col]; j++) { ptr = A->lines->ptrs[col][j]; if (comp_max < fabs( A->lines->A[col][j])) comp_max = fabs( A->lines->A[col][j]); } /* keep diagonal and elements about fraction of maximum */ for (i=0, j=0; j<A->lines->len[col]; j++) { ptr = A->lines->ptrs[col][j]; if (ptr == col + A->my_start_index || fabs(A->lines->A[col][j]/comp_max) > tau_limit) { n1[i] = A->block_sizes[j]; J->ptr[i++] = ptr; } } J->len = i; J->slen = i; dimr = nnz = 0; } else if (spar == 2) /* set diagonals - mind switching cols and rows */ { if ((low_c = col-upper_diag) < 0) low_c = 0; if ((up_c = col+lower_diag) > A->n-1) up_c = A->n-1; for (i=0, j=low_c; j<=up_c; j++,i++) { J->ptr[i] = j; n1[i] = A->block_sizes[j]; } J->len = i; J->slen = i; dimr = nnz = 0; } else /* initial sparsity diagonal */ { J->ptr[0] = col; J->len = 1; J->slen = block_width; n1[0] = block_width; dimr = nnz = 0; } /* compute I */ getrows(A,M,J,I); copyvv(J,J_tilde); for (s=0, nbq = 0, TAU_ptr[0] = 0, /* effectively infinity */ scalar_resnorm=block_resnorm=1000000*epsilon; (s < nbsteps); s++, nbq++) { com_server(A,M); full_matrix(A,M,max_dim, Ahat); n2[s] = I->slen - dimr; /* compute solution -> x, residual, and update QR */ if ((ierr = qr(A,col,nbq,dimr)) != 0) return ierr; nnz = J->len; dimr = J->slen; /* is solution good enough? */ /* Use Froebenius norm */ convert_to_block (res,resb,col,I->ptr,A,max_dim,I->len); block_resnorm = frobenius_norm(resb,block_width,I->slen); if (debug) { fprintf(fptr_dbg," s=%d col=%d of %d block_resnorm=%12.4le\n", s,col,A->n,block_resnorm); fflush(fptr_dbg); } if (spar == 1 /* row population with tau parameter */ || spar == 2) break; /* fixed diagonals - no further ado */ if (block_resnorm <= adjust_epsilon) break; /* Don't bother with last augment_sparsity */ if (s == (nbsteps-1)) break; if (! augment_sparsity(A,M,col,maxapi,block_resnorm)) break; getrows(A,M, J_tilde,I_tilde); deleter(I,I_tilde,A); if (! append(J,J_tilde)) break; /* J <- J U J_tilde */ if (! append(I,I_tilde)) break; /* I <- I U I_tilde */ } if (block_resnorm > adjust_epsilon && spar == 0) { num_bad_cols++; if (message) { fprintf(message, "could not meet tol, col=%d resnorm = %le, adjust_epsilon = %le\n", col+1, block_resnorm/sqrt((double) block_width), adjust_epsilon); fflush(message); } } if (resplot_fptr) { for (i=0; i<block_width; i++) { if (block_resnorm <= adjust_epsilon) block_flag = " "; else block_flag = "*"; scalar_resnorm = frobenius_norm(&res[i*max_dim],1,I->slen); if (scalar_resnorm <= epsilon) scalar_flag = " "; else scalar_flag = "*"; fprintf(resplot_fptr,"%6d %5.3lf %s %6d %5.3lf %s\n", start_col+i, scalar_resnorm, scalar_flag, col, block_resnorm/sqrt((double) block_width), block_flag); } start_col += block_width; } /* current solution in x, up to nnz, written to M(k,:) */ /* convert x to block structure */ convert_to_block (x,xb,col,J->ptr,A,max_dim,nnz); put_Mline(A,M, col, J->ptr, xb, nnz, J->slen); for (i=0; i<nbsteps; i++) { if (Qlist[i]) { free(Qlist[i]); Qlist[i] = NULL; } else break; } return 0; }