int main(int argc, char **argv) { uint prob; /* Used for selecting the problem: 2d or 3d? */ uint L; /* Number of grid refinements */ uint clf; /* Leafsize */ uint clustermode; /* Used for selecting the cluster strategy */ uint decomp; /* Used for selecting the type of the decomposition */ uint i, j; /* Auxiliary variable for loops */ ptri2d *gr_2d; /* 2d mesh hierarchy */ ptri2dp1 p1_2d; /* Linear p1 basis functions in 2d */ ptet3d *gr_3d; /* 3d mesh hierarchy */ ptet3dp1 p1_3d; /* Linear p1 basis functions in 3d */ psparsematrix sp; /* Sparsematrix object */ uint ndof; /* Number of degrees of freedom */ uint *idx; /* Array of indices for the clustergeometry */ pclustergeometry cg; /* Clustergeometry object */ uint dim; /* Dimension for domain decomposition clustering */ uint *flag; /* Auxiliary array for domain decompostion clustering */ pcluster root; /* Cluster tree object */ pblock broot; /* Block cluster tree object */ real eta; /* Admissibility parameter */ phmatrix hm; /* Hierarchical matrices */ real tol_decomp; /* Tolerance for decompositions */ real tol_coarsen; /* Tolerance for coarsening of the hierarchical matrix */ phmatrix l; /* Hierarchical matrices for storing the Cholesky factor */ ptruncmode tm; /* truncmode for truncations within the arithmetic */ real error; /* Auxiliary variable for testing */ pstopwatch sw; /* Stopwatch for time measuring */ real time; /* Variable for time measuring */ size_t sz, sz_rk, sz_full; /* Variables for memory footprint */ /* First initialise the library */ init_h2lib(&argc, &argv); /*Initialise variables */ eta = 2.0; tol_coarsen = 1e-16; tol_decomp = 1e-12; tm = new_releucl_truncmode(); sw = new_stopwatch(); /* Set problem parameters */ printf("Select problem\n"); prob = askforint("1 = fem2d,\t 2 = fem3d\t \n", "h2lib_fem", 1); L = askforint("Refinement?\n", "h2lib_L", 4); clf = askforint("Leafsize?\n", "h2lib_clf", 32); clustermode = askforint("1 = adaptive, \t 2 = ddcluster", "h2lib_clusterstrategy", 2); decomp = askforint("1 = LU-decomposition,\t, 2 = cholesky-decomposition\n", "h2lib_decomp", 1); tol_decomp = askforreal("tol_decomp=?\n", "h2lib_tol_decomp", 1e-13); /* Build geometry and discretisation for laplace equation */ if (prob == 1) { printf("========================================\n" " Create and fill fem2d sparsematrix\n"); /* Mesh hierarchy */ gr_2d = (ptri2d *) allocmem((size_t) sizeof(ptri2d) * (L + 1)); gr_2d[0] = new_unitsquare_tri2d(); /* Set domain */ //gr_2d[0]=new_unitcircle_tri2d(); //gr_2d[0] = new_lshape_tri2d(); for (i = 0; i < L; i++) { /* Mesh refinements */ gr_2d[i + 1] = refine_tri2d(gr_2d[i], NULL); } check_tri2d(gr_2d[L]); /* Check mesh for inconsistencies */ p1_2d = new_tri2dp1(gr_2d[L]); /* Build discretisation */ sp = build_tri2dp1_sparsematrix(p1_2d); /* Build corresponding sparsematrix */ assemble_tri2dp1_laplace_sparsematrix(p1_2d, sp, 0); /* Fill the sparsematrix */ ndof = p1_2d->ndof; printf("ndof = %u\n", ndof); /* Initialise index array for the cluster geometry */ idx = allocuint(ndof); for (i = 0; i < ndof; i++) idx[i] = i; /* Build clustergeometry for the problem */ cg = build_tri2dp1_clustergeometry(p1_2d, idx); del_tri2dp1(p1_2d); for (i = 0; i <= L; i++) { j = L - i; del_tri2d(gr_2d[j]); } freemem(gr_2d); dim = 2; /* Set dimenison for domain decomposition clustering */ } else { assert(prob == 2); printf("========================================\n" " Create and fill fem3d sparsematrix\n"); /* Mesh hierarchy */ gr_3d = (ptet3d *) allocmem((size_t) sizeof(ptri2d) * (L + 1)); gr_3d[0] = new_unitcube_tet3d(); /* Set domain */ for (i = 0; i < L; i++) { gr_3d[i + 1] = refine_tet3d(gr_3d[i], NULL); } check_tet3d(gr_3d[L]); /* Check mesh for inconsistencies */ p1_3d = new_tet3dp1(gr_3d[L]); /* build discretisation */ sp = build_tet3dp1_sparsematrix(p1_3d); /* Build corresponding sparsematrix */ assemble_tet3dp1_laplace_sparsematrix(p1_3d, sp, 0); /* Fill the sparsematrix */ ndof = p1_3d->ndof; printf("ndof = %u\n", ndof); /* initialise index array for the clustergeometry */ idx = allocuint(ndof); for (i = 0; i < ndof; i++) idx[i] = i; /* Build clustergeometry for the problem */ cg = build_tet3dp1_clustergeometry(p1_3d, idx); del_tet3dp1(p1_3d); for (i = 0; i <= L; i++) { j = L - i; del_tet3d(gr_3d[j]); } freemem(gr_3d); dim = 3; } /* Build and fill the corresponding hierarchical matrix */ printf("========================================\n" " Create and fill hierarchical matrix\n"); if (clustermode == 1) { /* Build cluster tree based on adaptive clustering */ root = build_cluster(cg, ndof, idx, clf, H2_ADAPTIVE); /* Build block cluster tree using the euclidian (maximum) admissibilty condition */ broot = build_nonstrict_block(root, root, &eta, admissible_2_cluster); /*Build hierarchical matrix from block */ hm = build_from_block_hmatrix(broot, 0); /* Fill hierarchical matrix with sparsematrix entries */ copy_sparsematrix_hmatrix(sp, hm); /* Compute error */ error = norm2diff_sparsematrix_hmatrix(hm, sp); printf("error = %g\n", error); del_block(broot); } else { /* clustermode == 2 */ /* Initialise auxiliary array for domain decomposition clustering */ flag = allocuint(ndof); for (i = 0; i < ndof; i++) flag[i] = 0; /* build cluster tree based adaptive doamin decomposition clustering */ root = build_adaptive_dd_cluster(cg, ndof, idx, clf, sp, dim, flag); freemem(flag); /* Build block cluster tree using the domain decompostion admissibilty condition */ broot = build_nonstrict_block(root, root, &eta, admissible_dd_cluster); /* Build hierarchical matrix from block cluster tree */ hm = build_from_block_hmatrix(broot, 0); /* Fill hierarchical matrix with sparsematrix entries */ copy_sparsematrix_hmatrix(sp, hm); /* Compute error */ error = norm2diff_sparsematrix_hmatrix(hm, sp); printf("error = %g\n", error); del_block(broot); } /* Draw hierarchical matrix with cairo */ #if 0 #ifdef USE_CAIRO cairo_t *cr; printf("Draw hmatrix to \"hm_p1.pdf\"\n"); cr = new_cairopdf("../hm_p1.pdf", 1024.0, 1024.0); draw_cairo_hmatrix(cr, hm, false, 0); cairo_destroy(cr); #endif #endif /* Compute size of the hierarchical matrix */ sz = getsize_hmatrix(hm); sz_rk = getfarsize_hmatrix(hm); sz_full = getnearsize_hmatrix(hm); printf("size original matrix:\t \t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz / 1024.0 / 1024.0, sz / 1024.0, sz / 1024.0 / ndof); printf("size rk: \t %.2f MB\t size full: \t %.2f MB \n", sz_rk / 1024.0 / 1024.0, sz_full / 1024.0 / 1024.0); printf("Coarsen hmatrix\n"); /* Coarsening of the hierarchical matrix to safe storage */ coarsen_hmatrix(hm, tm, tol_coarsen, true); /* Compute size of the hierarchical matrix after coarsening */ sz = getsize_hmatrix(hm); sz_rk = getfarsize_hmatrix(hm); sz_full = getnearsize_hmatrix(hm); printf("size original matrix:\t \t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz / 1024.0 / 1024.0, sz / 1024.0, sz / 1024.0 / ndof); printf("size rk: \t %.2f MB\t size full: \t %.2f MB \n", sz_rk / 1024.0 / 1024.0, sz_full / 1024.0 / 1024.0); /* Compute error after coarseing */ error = norm2diff_sparsematrix_hmatrix(hm, sp); printf("error = %g\n", error); /* Draw the hierarchical matrix after coarsening */ #if 0 #ifdef USE_CAIRO cairo_t *cr2; printf("Draw hmatrix to \"hm_p1_coarsend.pdf\"\n"); cr2 = new_cairopdf("../hm_p1_coarsend.pdf", 1024.0, 1024.0); draw_cairo_hmatrix(cr2, hm, false, 0); cairo_destroy(cr2); #endif #endif /* Compute decomposition of the hierarchical matrix */ if (decomp == 1) { printf("========================================\n" " Test lu-decomposition\n"); /* Compute size of the hierarchical matrix */ sz = getsize_hmatrix(hm); printf("size original matrix:\t \t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz / 1024.0 / 1024.0, sz / 1024.0, sz / 1024.0 / ndof); /* Compute lu-decomposition of the hierarchical matrix */ start_stopwatch(sw); lrdecomp_hmatrix(hm, 0, tol_decomp); time = stop_stopwatch(sw); /* Compute size of the lu-decomposition */ sz = getsize_hmatrix(hm); sz_rk = getfarsize_hmatrix(hm); sz_full = getnearsize_hmatrix(hm); printf ("size lu-decomposition (lu):\t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz / 1024.0 / 1024.0, sz / 1024.0, sz / 1024.0 / ndof); printf("size rk: \t %.2f MB\t size full: \t %.2f MB \n", sz_rk / 1024.0 / 1024.0, sz_full / 1024.0 / 1024.0); printf("time = %f s\n", time); #if 0 #ifdef USE_CAIRO cairo_t *cr; printf("Draw lu-decomposition to \"hm_p1_lu.pdf\"\n"); cr = new_cairopdf("../hm_p1_lu_staging.pdf", 512.0, 512.0); draw_cairo_hmatrix(cr, hm, false, 100); cairo_destroy(cr); #endif #endif /* Compute error */ error = norm2lu_sparsematrix(hm, sp); printf("||I-(lu)^-1 sp||_2 "); printf("error = %g\n", error); } else { /*decomp == 2 */ printf("========================================\n" " Test cholesky-decomposition\n"); /* Compute size of the hierarchical matrix */ sz = getsize_hmatrix(hm); printf("size original matrix:\t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz / 1024.0 / 1024.0, sz / 1024.0, sz / 1024.0 / ndof); /* Compute cholesky-decomposition */ start_stopwatch(sw); choldecomp_hmatrix(hm, 0, tol_decomp); time = stop_stopwatch(sw); /* Compute size of the cholesky-factor */ l = clone_lower_hmatrix(false, hm); sz = getsize_hmatrix(l); sz_rk = getfarsize_hmatrix(l); sz_full = getnearsize_hmatrix(l); printf ("size chol-decomposition (l):\t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz / 1024.0 / 1024.0, sz / 1024.0, sz / 1024.0 / ndof); printf("size rk: \t %.2f MB\t size full: \t %.2f MB \n", sz_rk / 1024.0 / 1024.0, sz_full / 1024.0 / 1024.0); printf("time = %f s\n", time); #if 0 #ifdef USE_CAIRO cairo_t *cr; printf("Draw chol-factor to \"hm_chol_staging.pdf\"\n"); cr = new_cairopdf("../hm_chol_staging.pdf", 512.0, 512.0); draw_cairo_hmatrix(cr, l, false, 1000); cairo_destroy(cr); #endif #endif /* Compute error */ error = norm2chol_sparsematrix(hm, sp); printf("||I-(ll*)^-1 sp||_2 "); printf("error = %g\n", error); del_hmatrix(l); } /* Cleaning up */ del_truncmode(tm); del_stopwatch(sw); del_hmatrix(hm); del_cluster(root); del_clustergeometry(cg); del_sparsematrix(sp); freemem(idx); (void) printf(" %u matrices and\n" " %u vectors still active\n", getactives_amatrix(), getactives_avector()); uninit_h2lib(); printf("The end\n"); return 0; }
static void init_helmholtzbem3d_opencl(pcbem3d bem) { uint num_kernels; const char *kernel_names[] = { "assemble_slp_cc_list_0", "assemble_slp_cc_list_1", "assemble_slp_cc_list_2", "assemble_slp_cc_list_3", "assemble_dlp_cc_list_0", "assemble_dlp_cc_list_1", "assemble_dlp_cc_list_2", "assemble_dlp_cc_list_3" }; cl_uint num_devices = ocl_system.num_devices; cl_uint num_queues = ocl_system.queues_per_device; cl_uint nthreads = num_devices * num_queues; cl_int res; cl_mem_flags mem_rflags, mem_wflags; uint i, j; real *gr_x; uint *gr_t; real *gr_n; real *q_xw, *x, *w; uint q, v, t; num_kernels = sizeof(kernel_names) / sizeof(kernel_names[0]); mem_rflags = CL_MEM_READ_ONLY; mem_wflags = CL_MEM_WRITE_ONLY; if (ocl_bem3d.num_kernels == 0) { /**************************************************** * Setup all necessary kernels ****************************************************/ setup_kernels(helmholtzbem3d_ocl_src, num_kernels, kernel_names, &ocl_bem3d.kernels); ocl_bem3d.num_kernels = num_kernels; /**************************************************** * Create buffers for matrix chunks ****************************************************/ ocl_bem3d.mem_N = (cl_mem *) allocmem(nthreads * sizeof(cl_mem)); ocl_bem3d.mem_ridx = (cl_mem *) allocmem(nthreads * sizeof(cl_mem)); ocl_bem3d.mem_cidx = (cl_mem *) allocmem(nthreads * sizeof(cl_mem)); for (i = 0; i < num_devices; ++i) { for (j = 0; j < num_queues; ++j) { ocl_bem3d.mem_N[j + i * num_queues] = clCreateBuffer(ocl_system.contexts[i], mem_wflags, ocl_system.max_package_size, NULL, &res); CL_CHECK(res) ocl_bem3d.mem_ridx[j + i * num_queues] = clCreateBuffer(ocl_system.contexts[i], mem_rflags, ocl_system.max_package_size / sizeof(field) * sizeof(uint), NULL, &res); CL_CHECK(res) ocl_bem3d.mem_cidx[j + i * num_queues] = clCreateBuffer(ocl_system.contexts[i], mem_rflags, ocl_system.max_package_size / sizeof(field) * sizeof(uint), NULL, &res); CL_CHECK(res) } } /**************************************************** * Create buffer for non singular quadrature rules * and copy the contents ****************************************************/ q = bem->sq->q; x = allocreal(q); w = allocreal(q); q_xw = allocreal(2 * q); assemble_gauss(q, x, w); for (i = 0; i < q; ++i) { q_xw[2 * i] = 0.5 + 0.5 * x[i]; q_xw[2 * i + 1] = 0.5 * w[i]; } ocl_bem3d.mem_q_xw = (cl_mem *) allocmem(num_devices * sizeof(cl_mem)); for (i = 0; i < num_devices; ++i) { ocl_bem3d.mem_q_xw[i] = clCreateBuffer(ocl_system.contexts[i], mem_rflags, 2 * q * sizeof(real), NULL, &res); CL_CHECK(res) res = clEnqueueWriteBuffer(ocl_system.queues[i * num_queues], ocl_bem3d.mem_q_xw[i], CL_TRUE, 0, 2 * q * sizeof(real), q_xw, 0, NULL, NULL); CL_CHECK(res); } ocl_bem3d.nq = 2 * q; freemem(x); freemem(w); freemem(q_xw); /**************************************************** * Create buffer for singular quadrature rules * and copy the contents ****************************************************/ q = bem->sq->q2; x = allocreal(q); w = allocreal(q); q_xw = allocreal(2 * q); assemble_gauss(q, x, w); for (i = 0; i < q; ++i) { q_xw[2 * i] = 0.5 + 0.5 * x[i]; q_xw[2 * i + 1] = 0.5 * w[i]; } ocl_bem3d.mem_q2_xw = (cl_mem *) allocmem(num_devices * sizeof(cl_mem)); for (i = 0; i < num_devices; ++i) { ocl_bem3d.mem_q2_xw[i] = clCreateBuffer(ocl_system.contexts[i], mem_rflags, 2 * q * sizeof(real), NULL, &res); CL_CHECK(res) res = clEnqueueWriteBuffer(ocl_system.queues[i * num_queues], ocl_bem3d.mem_q2_xw[i], CL_TRUE, 0, 2 * q * sizeof(real), q_xw, 0, NULL, NULL); CL_CHECK(res); } ocl_bem3d.nq2 = 2 * q; freemem(x); freemem(w); freemem(q_xw); /**************************************************** * Create buffers for geometry data and copy the contents ****************************************************/ v = bem->gr->vertices; t = bem->gr->triangles; gr_x = allocreal(3 * v); gr_t = allocuint(3 * t); gr_n = allocreal(3 * t); for (i = 0; i < v; ++i) { gr_x[3 * i + 0] = bem->gr->x[i][0]; gr_x[3 * i + 1] = bem->gr->x[i][1]; gr_x[3 * i + 2] = bem->gr->x[i][2]; } for (i = 0; i < t; ++i) { gr_t[i + 0 * t] = bem->gr->t[i][0]; gr_t[i + 1 * t] = bem->gr->t[i][1]; gr_t[i + 2 * t] = bem->gr->t[i][2]; } for (i = 0; i < t; ++i) { gr_n[3 * i + 0] = bem->gr->n[i][0]; gr_n[3 * i + 1] = bem->gr->n[i][1]; gr_n[3 * i + 2] = bem->gr->n[i][2]; } ocl_bem3d.mem_gr_t = (cl_mem *) allocmem(num_devices * sizeof(cl_mem)); ocl_bem3d.mem_gr_x = (cl_mem *) allocmem(num_devices * sizeof(cl_mem)); for (i = 0; i < num_devices; ++i) { ocl_bem3d.mem_gr_x[i] = clCreateBuffer(ocl_system.contexts[i], mem_rflags, 3 * v * sizeof(real), NULL, &res); CL_CHECK(res) ocl_bem3d.mem_gr_t[i] = clCreateBuffer(ocl_system.contexts[i], mem_rflags, 3 * t * sizeof(uint), NULL, &res); CL_CHECK(res) res = clEnqueueWriteBuffer(ocl_system.queues[i * num_queues], ocl_bem3d.mem_gr_x[i], CL_TRUE, 0, 3 * v * sizeof(real), gr_x, 0, NULL, NULL); CL_CHECK(res); res = clEnqueueWriteBuffer(ocl_system.queues[i * num_queues], ocl_bem3d.mem_gr_t[i], CL_TRUE, 0, 3 * t * sizeof(uint), gr_t, 0, NULL, NULL); CL_CHECK(res); } ocl_bem3d.triangles = t; freemem(gr_x); freemem(gr_t); freemem(gr_n); omp_init_lock(&nf_lock); omp_init_lock(&nf_dist_lock); omp_init_lock(&nf_vert_lock); omp_init_lock(&nf_edge_lock); omp_init_lock(&nf_iden_lock); }
int main(int argc, char **argv) { pmacrosurface3d mg; psurface3d gr; pamatrix Vfull, KMfull; pbem3d bem_slp, bem_dlp; pcluster root; pblock block; phmatrix V, KM; pclusterbasis Vrb, Vcb, KMrb, KMcb; ph2matrix V2, KM2; uint n, q, clf, m, l; real eta, delta, eps_aca; field kvec[3]; cl_device_id *devices; cl_uint ndevices; uint *idx; uint i; init_h2lib(&argc, &argv); get_opencl_devices(&devices, &ndevices); ndevices = 1; set_opencl_devices(devices, ndevices, 2); kvec[0] = 2.0, kvec[1] = 0.0, kvec[2] = 0.0; n = 512; q = 2; clf = 16; eta = 1.0; mg = new_sphere_macrosurface3d(); gr = build_from_macrosurface3d_surface3d(mg, REAL_SQRT(n * 0.125)); n = gr->triangles; printf("Testing unit sphere with %d triangles\n", n); bem_slp = new_slp_helmholtz_ocl_bem3d(kvec, gr, q, q + 2, BASIS_CONSTANT_BEM3D); bem_dlp = new_dlp_helmholtz_ocl_bem3d(kvec, gr, q, q + 2, BASIS_CONSTANT_BEM3D, BASIS_CONSTANT_BEM3D, 0.5); root = build_bem3d_cluster(bem_slp, clf, BASIS_CONSTANT_BEM3D); block = build_nonstrict_block(root, root, &eta, admissible_max_cluster); max_pardepth = 0; Vfull = new_amatrix(n, n); KMfull = new_amatrix(n, n); idx = allocuint(n); for (i = 0; i < n; ++i) { idx[i] = i; } SCHEDULE_OPENCL(0, 1, bem_slp->nearfield, idx, idx, bem_slp, false, Vfull); SCHEDULE_OPENCL(0, 1, bem_dlp->nearfield, idx, idx, bem_dlp, false, KMfull); V = build_from_block_hmatrix(block, 0); KM = build_from_block_hmatrix(block, 0); printf("----------------------------------------\n"); printf("Testing outer Boundary integral equations:\n"); printf("----------------------------------------\n\n"); /* * Test Interpolation */ m = 4; setup_hmatrix_aprx_inter_row_bem3d(bem_slp, root, root, block, m); setup_hmatrix_aprx_inter_row_bem3d(bem_dlp, root, root, block, m); test_hmatrix_system("Interpolation row", Vfull, KMfull, block, bem_slp, V, bem_dlp, KM, false, true, 1.0e-3, 2.0e-3); setup_hmatrix_aprx_inter_col_bem3d(bem_slp, root, root, block, m); setup_hmatrix_aprx_inter_col_bem3d(bem_dlp, root, root, block, m); test_hmatrix_system("Interpolation column", Vfull, KMfull, block, bem_slp, V, bem_dlp, KM, false, true, 1.0e-3, 2.0e-3); setup_hmatrix_aprx_inter_mixed_bem3d(bem_slp, root, root, block, m); setup_hmatrix_aprx_inter_mixed_bem3d(bem_dlp, root, root, block, m); test_hmatrix_system("Interpolation mixed", Vfull, KMfull, block, bem_slp, V, bem_dlp, KM, false, true, 1.0e-3, 2.0e-3); /* * Test Green */ m = 5; l = 1; delta = 0.5; setup_hmatrix_aprx_green_row_bem3d(bem_slp, root, root, block, m, l, delta, build_bem3d_cube_quadpoints); setup_hmatrix_aprx_green_row_bem3d(bem_dlp, root, root, block, m, l, delta, build_bem3d_cube_quadpoints); test_hmatrix_system("Green row", Vfull, KMfull, block, bem_slp, V, bem_dlp, KM, false, true, 1.0e-3, 2.0e-3); setup_hmatrix_aprx_green_col_bem3d(bem_slp, root, root, block, m, l, delta, build_bem3d_cube_quadpoints); setup_hmatrix_aprx_green_col_bem3d(bem_dlp, root, root, block, m, l, delta, build_bem3d_cube_quadpoints); test_hmatrix_system("Green column", Vfull, KMfull, block, bem_slp, V, bem_dlp, KM, false, true, 1.0e-3, 2.0e-3); setup_hmatrix_aprx_green_mixed_bem3d(bem_slp, root, root, block, m, l, delta, build_bem3d_cube_quadpoints); setup_hmatrix_aprx_green_mixed_bem3d(bem_dlp, root, root, block, m, l, delta, build_bem3d_cube_quadpoints); test_hmatrix_system("Green mixed", Vfull, KMfull, block, bem_slp, V, bem_dlp, KM, false, true, 1.0e-3, 2.0e-3); /* * Test Greenhybrid */ m = 2; l = 1; delta = 1.0; eps_aca = 2.0e-2; setup_hmatrix_aprx_greenhybrid_row_bem3d(bem_slp, root, root, block, m, l, delta, eps_aca, build_bem3d_cube_quadpoints); setup_hmatrix_aprx_greenhybrid_row_bem3d(bem_dlp, root, root, block, m, l, delta, eps_aca, build_bem3d_cube_quadpoints); test_hmatrix_system("Greenhybrid row", Vfull, KMfull, block, bem_slp, V, bem_dlp, KM, false, true, 1.0e-3, 2.0e-3); setup_hmatrix_aprx_greenhybrid_col_bem3d(bem_slp, root, root, block, m, l, delta, eps_aca, build_bem3d_cube_quadpoints); setup_hmatrix_aprx_greenhybrid_col_bem3d(bem_dlp, root, root, block, m, l, delta, eps_aca, build_bem3d_cube_quadpoints); test_hmatrix_system("Greenhybrid column", Vfull, KMfull, block, bem_slp, V, bem_dlp, KM, false, true, 1.0e-3, 2.0e-3); setup_hmatrix_aprx_greenhybrid_mixed_bem3d(bem_slp, root, root, block, m, l, delta, eps_aca, build_bem3d_cube_quadpoints); setup_hmatrix_aprx_greenhybrid_mixed_bem3d(bem_dlp, root, root, block, m, l, delta, eps_aca, build_bem3d_cube_quadpoints); test_hmatrix_system("Greenhybrid mixed", Vfull, KMfull, block, bem_slp, V, bem_dlp, KM, false, true, 1.0e-3, 2.0e-3); /* * Test ACA / PACA / HCA */ m = 2; eps_aca = 1.0e-2; /* Nearfield computation on GPU not applicable here yet! */ // setup_hmatrix_aprx_aca_bem3d(bem_slp, root, root, block, eps_aca); // setup_hmatrix_aprx_aca_bem3d(bem_dlp, root, root, block, eps_aca); // test_hmatrix_system("ACA full pivoting", Vfull, KMfull, block, bem_slp, V, // bem_dlp, KM, false, true, 1.0e-3, 2.0e-3); // // setup_hmatrix_aprx_paca_bem3d(bem_slp, root, root, block, eps_aca); // setup_hmatrix_aprx_paca_bem3d(bem_slp, root, root, block, eps_aca); // test_hmatrix_system("ACA partial pivoting", Vfull, KMfull, block, bem_slp, V, // bem_dlp, KM, false, true, 1.0e-3, 2.0e-3); setup_hmatrix_aprx_hca_bem3d(bem_slp, root, root, block, m, eps_aca); setup_hmatrix_aprx_hca_bem3d(bem_slp, root, root, block, m, eps_aca); test_hmatrix_system("HCA2", Vfull, KMfull, block, bem_slp, V, bem_dlp, KM, false, true, 1.0e-3, 2.0e-3); del_hmatrix(V); del_hmatrix(KM); del_block(block); /* * H2-matrix */ block = build_strict_block(root, root, &eta, admissible_max_cluster); Vrb = build_from_cluster_clusterbasis(root); Vcb = build_from_cluster_clusterbasis(root); KMrb = build_from_cluster_clusterbasis(root); KMcb = build_from_cluster_clusterbasis(root); V2 = build_from_block_h2matrix(block, Vrb, Vcb); KM2 = build_from_block_h2matrix(block, KMrb, KMcb); /* * Test Interpolation */ m = 3; setup_h2matrix_aprx_inter_bem3d(bem_slp, Vrb, Vcb, block, m); setup_h2matrix_aprx_inter_bem3d(bem_dlp, KMrb, KMcb, block, m); test_h2matrix_system("Interpolation", Vfull, KMfull, block, bem_slp, V2, bem_dlp, KM2, false, true, 1.0e-3, 2.0e-3); /* * Test Greenhybrid */ m = 2; l = 1; delta = 1.0; eps_aca = 2.0e-2; setup_h2matrix_aprx_greenhybrid_bem3d(bem_slp, Vrb, Vcb, block, m, l, delta, eps_aca, build_bem3d_cube_quadpoints); setup_h2matrix_aprx_greenhybrid_bem3d(bem_dlp, KMrb, KMcb, block, m, l, delta, eps_aca, build_bem3d_cube_quadpoints); test_h2matrix_system("Greenhybrid", Vfull, KMfull, block, bem_slp, V2, bem_dlp, KM2, false, true, 1.0e-3, 2.0e-3); /* Nearfield computation on GPU not applicable here yet! */ // setup_h2matrix_aprx_greenhybrid_ortho_bem3d(bem_slp, Vrb, Vcb, block, m, l, // delta, eps_aca, build_bem3d_cube_quadpoints); // setup_h2matrix_aprx_greenhybrid_ortho_bem3d(bem_dlp, KMrb, KMcb, block, m, l, // delta, eps_aca, build_bem3d_cube_quadpoints); // test_h2matrix_system("Greenhybrid ortho", Vfull, KMfull, block, bem_slp, V2, // bem_dlp, KM2, false, true, 1.0e-3, 2.0e-3); del_h2matrix(V2); del_h2matrix(KM2); del_block(block); freemem(root->idx); del_cluster(root); del_helmholtz_ocl_bem3d(bem_slp); del_helmholtz_ocl_bem3d(bem_dlp); del_amatrix(Vfull); del_amatrix(KMfull); del_surface3d(gr); del_macrosurface3d(mg); freemem(idx); (void) printf("----------------------------------------\n" " %u matrices and\n" " %u vectors still active\n" " %u errors found\n", getactives_amatrix(), getactives_avector(), problems); uninit_h2lib(); return problems; }
int main (int argc, char **argv) { uint prob; /* Used for selecting the problem: 2d or 3d? */ uint L; /* Number of grid refinements */ uint clf; /* Leafsize */ uint clustermode; /* Used for selecting the cluster strategy */ uint decomp; /* Used for selecting the type of the decomposition */ uint i, j; /* Auxiliary variables for loops */ ptri2d *gr_2d; /* 2d mesh hierarchy */ ptri2drt0 rt0_2d; /* Raviart_Thomas functions in 2d */ ptet3d *gr_3d; /* 3d mesh hierarchy */ ptet3drt0 rt0_3d; /* Raviart-Thomas functions in 3d */ psparsematrix sp_A, sp_B; /* Sparsematrix object*/ pavector k; /* Vector for permeabilities */ uint ndof; /* Number of degree of freedom */ uint *idx_B_rows, *idx_B_cols; /* Array of indices for the clustergeometry */ pclustergeometry cg_B_rows, cg_B_cols;/* Clustergeometry object */ uint dim; /* Dimension for domain decomposition clustering */ pcluster root_B_cols, root_B_rows; /* Cluster tree object */ pblock broot_A, broot_B; /* Block cluster tree object */ real eta; /* Admissibilty parameter*/ phmatrix hm_A, hm_B; /* Hierarchical matrices*/ real tol_decomp; /* Tolerance for decompositions*/ real tol_coarsen; /* Tolerance for coarsening of the hierarchical matrix */ phmatrix l; /* Hierarchical matrices for storing the cholesky factor*/ ptruncmode tm; /* Truncmode for truncations within the arithmetic */ real error; /* Auxiliary variable for testing*/ pstopwatch sw; /* Stopwatch for time measuring */ real time; /* Variable for time measurement */ size_t sz, sz_rk, sz_full; /* Variables for memory footprint */ uint elements; /*Auxiliary variable: numberof elements*/ /* First initialise the library */ init_h2lib(&argc, &argv); /*Initialise variables*/ eta = 2.0; tol_coarsen = 1e-16; tm = new_releucl_truncmode(); sw = new_stopwatch(); /* Set problme parameters */ printf("Select problem\n"); prob = askforint("1 = fem2d,\t 2 = fem3d\t \n","h2lib_fem", 1); L = askforint("Refinement?\n", "h2lib_L", 4); clf = askforint("Leafsize?\n", "h2lib_clf", 32); clustermode = askforint("1 = adaptive, \t 2 = adaptive from cluster", "h2lib_clusterstrategy", 2); decomp = askforint("1 = LU-decomposition of hm_A,\t, 2 = cholesky-decomposition of hm_A\n","h2lib_decomp",1); tol_decomp = askforreal("tol_decomp=?\n", "h2lib_tol_decomp", 1e-13); /* Build geomtery and discretisation for Darcy's equation */ if(prob==1){ printf("========================================\n" " Create and fill fem2d sparsematrix\n"); /* Mesh hierarchy */ gr_2d = (ptri2d *) allocmem((size_t) sizeof(ptri2d) * (L+1)); gr_2d[0] = new_unitsquare_tri2d(); /* Set domain */ //gr_2d[0]=new_unitcircle_tri2d(); //gr_2d[0] = new_lshape_tri2d(); for(i=0;i<L;i++){ /* Mesh refinements */ gr_2d[i+1] = refine_tri2d(gr_2d[i], NULL); fixnormals_tri2d(gr_2d[i]); } fixnormals_tri2d(gr_2d[i]); check_tri2d(gr_2d[L]); /*Check mesh for inconsistencies */ rt0_2d = new_tri2drt0(gr_2d[L]); /*Build discretisation */ set_boundary_unitsquare_tri2drt0(rt0_2d); update_tri2drt0(rt0_2d); sp_A = build_tri2drt0_A_sparsematrix(rt0_2d); /*Build corresponding sparsematrices */ sp_B = build_tri2drt0_B_sparsematrix(rt0_2d); k = new_avector(gr_2d[L]->triangles); /*Build and fill vector for the permeabilities */ function_permeability_2d(gr_2d[L], k); assemble_tri2drt0_darcy_A_sparsematrix(rt0_2d, sp_A, 0, k); /*Fill the sparsematrices */ assemble_tri2drt0_darcy_B_sparsematrix(rt0_2d, sp_B, 0); ndof = rt0_2d->ndof;printf("ndof = %u\n", ndof); /*Initialise index arrays for the cluster geometries */ idx_B_rows = allocuint(rt0_2d->t2->triangles); for(i=0;i<rt0_2d->t2->triangles;i++) idx_B_rows[i] = i; idx_B_cols = allocuint(rt0_2d->ndof); for(i=0;i<rt0_2d->ndof;i++) idx_B_cols[i] = i; /* Build clustergeomtries for the problem */ cg_B_rows = build_tri2drt0_B_clustergeometry(rt0_2d, idx_B_rows); cg_B_cols = build_tri2drt0_A_clustergeometry(rt0_2d, idx_B_cols); elements = gr_2d[L]->triangles; del_tri2drt0(rt0_2d); for(i=0;i<=L;i++){ j = L-i; del_tri2d(gr_2d[j]); } freemem(gr_2d); del_avector(k); dim = 2; /* Set dimenison for domain decomposition clustering */ } else { assert(prob==2); printf("========================================\n" " Create and fill fem3d sparsematrix\n"); /* Mesh hierarchy */ gr_3d = (ptet3d *) allocmem((size_t) sizeof(ptet3d) * (L+1)); gr_3d[0] = new_unitcube_tet3d(); /* Set domain */ for(i=0;i<L;i++){ gr_3d[i+1] = refine_tet3d(gr_3d[i],NULL); fixnormals_tet3d(gr_3d[i]); } fixnormals_tet3d(gr_3d[i]); check_tet3d(gr_3d[L]); /*Check mesh for inconsistencies */ rt0_3d = new_tet3drt0(gr_3d[L]); /*build discretisation */ set_boundary_unitcube_tet3drt0(rt0_3d); update_tet3drt0(rt0_3d); sp_A = build_tet3drt0_A_sparsematrix(rt0_3d); /*Build corresponding sparsematrices*/ sp_B = build_tet3drt0_B_sparsematrix(rt0_3d); k = new_avector(gr_3d[L]->tetrahedra); /* Build and fill vector for the permeabilities */ function_permeability_3d(gr_3d[L], k); assemble_tet3drt0_darcy_A_sparsematrix(rt0_3d, sp_A, 0, k); /*Fill the sparsematrices*/ assemble_tet3drt0_darcy_B_sparsematrix(rt0_3d, sp_B, 0); ndof = rt0_3d->ndof;printf("ndof = %u\n", ndof); /*Initialise index arrays for the cluster geometries */ idx_B_rows = allocuint(rt0_3d->t3->tetrahedra); for(i=0;i<rt0_3d->t3->tetrahedra;i++) idx_B_rows[i] = i; idx_B_cols = allocuint(rt0_3d->ndof); for(i=0;i<rt0_3d->ndof;i++) idx_B_cols[i] = i; /* Build clustergeomtries for the problem */ cg_B_rows = build_tet3drt0_B_clustergeometry(rt0_3d, idx_B_rows); cg_B_cols = build_tet3drt0_A_clustergeometry(rt0_3d, idx_B_cols); elements = gr_3d[L]->tetrahedra; del_tet3drt0(rt0_3d); for(i=0;i<=L;i++){ j = L -i; del_tet3d(gr_3d[j]); } freemem(gr_3d); del_avector(k); dim = 3; } /* Build and fill the corresponding hierarchical matrices*/ printf("========================================\n" " Create and fill hierarchical matrix\n"); if(clustermode ==1){ /* Build cluster trees based on adaptive clustering*/ root_B_rows = build_cluster(cg_B_rows, elements, idx_B_rows, clf, H2_ADAPTIVE); root_B_cols = build_cluster(cg_B_cols, ndof, idx_B_cols, clf, H2_ADAPTIVE); /*Build block cluster trees using the euclidian (maximum) admissibilty condition*/ broot_B = build_nonstrict_block(root_B_rows, root_B_cols, &eta, admissible_2_cluster); broot_A = build_nonstrict_block(root_B_cols, root_B_cols, &eta, admissible_2_cluster); /*Build hierarchical matrices from block*/ hm_A = build_from_block_hmatrix(broot_A, 0); hm_B = build_from_block_hmatrix(broot_B, 0); /*Fill hierarchical matrices with sparsematrix entries*/ copy_sparsematrix_hmatrix(sp_A, hm_A); copy_sparsematrix_hmatrix(sp_B, hm_B); /* Compute error */ printf("sp_A: rows %u cols: %u\n", sp_A->rows, sp_A->cols); printf("sp_B: rows %u cols: %u\n", sp_B->rows, sp_B->cols); printf("hm_A: rows %u cols: %u\n", hm_A->rc->size, hm_A->cc->size); printf("hm_B: rows %u cols: %u\n", hm_B->rc->size, hm_B->cc->size); error = norm2diff_sparsematrix_hmatrix(hm_A, sp_A); printf("error A = %g\n", error); error = norm2diff_sparsematrix_hmatrix(hm_B, sp_B); printf("error B = %g\n", error); del_block(broot_A); del_block(broot_B); } else{ /*clustermode == 2*/ /* Build cluster tree based on adaptive clustering */ root_B_rows = build_cluster(cg_B_rows, elements, idx_B_rows, clf, H2_ADAPTIVE); /* Build cluster tree based adaptive doamin decomposition clustering */ root_B_cols = build_adaptive_fromcluster_cluster(root_B_rows, cg_B_cols, ndof, idx_B_cols, clf, dim); // freemem(flag); /* Build block cluster trees using the domain decompostion admissibilty (rt0) condition*/ broot_B = build_nonstrict_block(root_B_rows, root_B_cols, &eta, admissible_2_cluster); //broot_B = build_nonstrict_block(root_B_rows, root_B_cols, &eta, admissible_dd_rt0_cluster); broot_A = build_nonstrict_block(root_B_cols, root_B_cols, &eta, admissible_dd_cluster); /* Build hierarchical matrices from block cluster tree*/ hm_A = build_from_block_hmatrix(broot_A, 0); hm_B = build_from_block_hmatrix(broot_B, 0); /* Fill hierarchical matrices with sparsematrix entries*/ copy_sparsematrix_hmatrix(sp_A, hm_A); copy_sparsematrix_hmatrix(sp_B, hm_B); /* Compute error */ printf("sp_A: rows %u cols: %u\n", sp_A->rows, sp_A->cols); printf("sp_B: rows %u cols: %u\n", sp_B->rows, sp_B->cols); printf("hm_A: rows %u cols: %u\n", hm_A->rc->size, hm_A->cc->size); printf("hm_B: rows %u cols: %u\n", hm_B->rc->size, hm_B->cc->size); error = norm2diff_sparsematrix_hmatrix(hm_A, sp_A); printf("error A = %g\n", error); error = norm2diff_sparsematrix_hmatrix(hm_B, sp_B); printf("error B = %g\n", error); del_block(broot_A); del_block(broot_B); } /* Draw hierarchical matrix with cairo */ #if 0 #ifdef USE_CAIRO cairo_t *cr; printf("Draw hmatrix to \"hm_p1.pdf\"\n"); cr = new_cairopdf("../hm_p1.pdf", 1024.0, 1024.0); draw_cairo_hmatrix(cr, hm, false, 0); cairo_destroy(cr); #endif #endif /*Compute size of the hierarchical matrix A*/ sz = getsize_hmatrix(hm_A); sz_rk = getfarsize_hmatrix(hm_A); sz_full = getnearsize_hmatrix(hm_A); printf("matrix A:\t \t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz/1024.0/1024.0, sz/1024.0, sz/1024.0/ndof); printf("size rk: \t %.2f MB\t size full: \t %.2f MB \n", sz_rk/1024.0/1024.0, sz_full /1024.0/1024.0); printf("Coarsen hmatrix\n"); /*Coarsening of the hierarchical matrix A to safe storage */ coarsen_hmatrix(hm_A, tm, tol_coarsen, true); /* Compute size of the hierarchical matrix A after coarsening*/ sz = getsize_hmatrix(hm_A); sz_rk = getfarsize_hmatrix(hm_A); sz_full = getnearsize_hmatrix(hm_A); printf("matrix A:\t \t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz/1024.0/1024.0, sz/1024.0, sz/1024.0/ndof); printf("size rk: \t %.2f MB\t size full: \t %.2f MB \n", sz_rk/1024.0/1024.0, sz_full /1024.0/1024.0); /* Compute error after coarseing */ error = norm2diff_sparsematrix_hmatrix(hm_A, sp_A); printf("error = %g\n", error); /*Compute size of the hierarchical matrix B */ sz = getsize_hmatrix(hm_B); sz_rk = getfarsize_hmatrix(hm_B); sz_full = getnearsize_hmatrix(hm_B); printf("matrix B:\t \t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz/1024.0/1024.0, sz/1024.0, sz/1024.0/ndof); printf("size rk: \t %.2f MB\t size full: \t %.2f MB \n", sz_rk/1024.0/1024.0, sz_full /1024.0/1024.0); printf("Coarsen hmatrix\n"); /*Coarsening of the hierarchical matrix B to safe storage */ coarsen_hmatrix(hm_B, tm, tol_coarsen, true); /* Compute size of the hierarchical matrix B after coarsening*/ sz = getsize_hmatrix(hm_B); sz_rk = getfarsize_hmatrix(hm_B); sz_full = getnearsize_hmatrix(hm_B); printf("matrix B:\t \t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz/1024.0/1024.0, sz/1024.0, sz/1024.0/ndof); printf("size rk: \t %.2f MB\t size full: \t %.2f MB \n", sz_rk/1024.0/1024.0, sz_full /1024.0/1024.0); /* Compute error after coarseing */ error = norm2diff_sparsematrix_hmatrix(hm_B, sp_B); printf("error = %g\n", error); /* Compute decomposition of the hierarchical matrix A */ if(decomp == 1){ printf("========================================\n" " Test lu-decomposition (A)\n"); /*Compute size of the hierarchical matrix*/ sz = getsize_hmatrix(hm_A); printf("size original matrix:\t \t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz/1024.0/1024.0, sz/1024.0, sz/1024.0/ndof); /*Compute lu-decomposition of the hierarchical matrix*/ start_stopwatch(sw); lrdecomp_hmatrix(hm_A, 0, tol_decomp); time = stop_stopwatch(sw); /*Compute size of the lu-decomposition*/ sz = getsize_hmatrix(hm_A); sz_rk = getfarsize_hmatrix(hm_A); sz_full = getnearsize_hmatrix(hm_A); printf("size lu-decomposition (lu):\t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz/1024.0/1024.0, sz/1024.0, sz/1024.0/ndof); printf("size rk: \t %.2f MB\t size full: \t %.2f MB \n", sz_rk/1024.0/1024.0, sz_full /1024.0/1024.0); printf("time = %f s\n", time); /*Compute error*/ error = norm2lu_sparsematrix(hm_A, sp_A); printf("||I-(lu)^-1 sp||_2 "); printf("error = %g\n", error); } else{ /*decomp == 2*/ printf("========================================\n" " Test cholesky-decomposition (A)\n"); /*Compute size of the hierarchical matrix*/ sz = getsize_hmatrix(hm_A); printf("size original matrix:\t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz/1024.0/1024.0, sz/1024.0, sz/1024.0/ndof); /* Compute cholesky-decomposition*/ start_stopwatch(sw); choldecomp_hmatrix(hm_A, 0, tol_decomp); time = stop_stopwatch(sw); /* Compute size of the cholesky-factor*/ l = clone_lower_hmatrix(false, hm_A); sz = getsize_hmatrix(l); sz_rk = getfarsize_hmatrix(l); sz_full = getnearsize_hmatrix(l); printf("size chol-decomposition (l):\t %.2f MB \t %.2f KB \t %.2f KB/DoF\n", sz/1024.0/1024.0, sz/1024.0, sz/1024.0/ndof); printf("size rk: \t %.2f MB\t size full: \t %.2f MB \n", sz_rk/1024.0/1024.0, sz_full /1024.0/1024.0); printf("time = %f s\n", time); /* Compute error */ error = norm2chol_sparsematrix(hm_A, sp_A); printf("||I-(ll*)^-1 sp||_2 "); printf("error = %g\n", error); del_hmatrix(l); } /*Cleaning up*/ del_truncmode(tm); del_stopwatch(sw); del_hmatrix(hm_A); del_hmatrix(hm_B); del_cluster(root_B_cols); del_cluster(root_B_rows); del_clustergeometry(cg_B_rows); del_clustergeometry(cg_B_cols); del_sparsematrix(sp_A); del_sparsematrix(sp_B); freemem(idx_B_rows); freemem(idx_B_cols); (void) printf(" %u matrices and\n" " %u vectors still active\n", getactives_amatrix(), getactives_avector()); uninit_h2lib(); printf("The end\n"); return 0; }