int main(int argc,char **argv) { PetscErrorCode ierr; KSP ksp; PC pc; Vec x,b; DM da; Mat A,Atrans; PetscInt dof=1,M=8; PetscBool flg,trans=PETSC_FALSE; ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; ierr = PetscOptionsGetInt(NULL,NULL,"-dof",&dof,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,NULL,"-M",&M,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-trans",&trans,NULL);CHKERRQ(ierr); ierr = DMDACreate(PETSC_COMM_WORLD,&da);CHKERRQ(ierr); ierr = DMSetDimension(da,3);CHKERRQ(ierr); ierr = DMDASetBoundaryType(da,DM_BOUNDARY_NONE,DM_BOUNDARY_NONE,DM_BOUNDARY_NONE);CHKERRQ(ierr); ierr = DMDASetStencilType(da,DMDA_STENCIL_STAR);CHKERRQ(ierr); ierr = DMDASetSizes(da,M,M,M);CHKERRQ(ierr); ierr = DMDASetNumProcs(da,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); ierr = DMDASetDof(da,dof);CHKERRQ(ierr); ierr = DMDASetStencilWidth(da,1);CHKERRQ(ierr); ierr = DMDASetOwnershipRanges(da,NULL,NULL,NULL);CHKERRQ(ierr); ierr = DMSetFromOptions(da);CHKERRQ(ierr); ierr = DMSetUp(da);CHKERRQ(ierr); ierr = DMCreateGlobalVector(da,&x);CHKERRQ(ierr); ierr = DMCreateGlobalVector(da,&b);CHKERRQ(ierr); ierr = ComputeRHS(da,b);CHKERRQ(ierr); ierr = DMSetMatType(da,MATBAIJ);CHKERRQ(ierr); ierr = DMSetFromOptions(da);CHKERRQ(ierr); ierr = DMCreateMatrix(da,&A);CHKERRQ(ierr); ierr = ComputeMatrix(da,A);CHKERRQ(ierr); /* A is non-symmetric. Make A = 0.5*(A + Atrans) symmetric for testing icc and cholesky */ ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&Atrans);CHKERRQ(ierr); ierr = MatAXPY(A,1.0,Atrans,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatScale(A,0.5);CHKERRQ(ierr); ierr = MatDestroy(&Atrans);CHKERRQ(ierr); /* Test sbaij matrix */ flg = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL, "-test_sbaij1", &flg,NULL);CHKERRQ(ierr); if (flg) { Mat sA; PetscBool issymm; ierr = MatIsTranspose(A,A,0.0,&issymm);CHKERRQ(ierr); if (issymm) { ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); } else {ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: A is non-symmetric\n");CHKERRQ(ierr);} ierr = MatConvert(A,MATSBAIJ,MAT_INITIAL_MATRIX,&sA);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); A = sA; } ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr); ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr); ierr = PCSetDM(pc,(DM)da);CHKERRQ(ierr); if (trans) { ierr = KSPSolveTranspose(ksp,b,x);CHKERRQ(ierr); } else { ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr); } /* check final residual */ flg = PETSC_FALSE; ierr = PetscOptionsGetBool(NULL,NULL, "-check_final_residual", &flg,NULL);CHKERRQ(ierr); if (flg) { Vec b1; PetscReal norm; ierr = KSPGetSolution(ksp,&x);CHKERRQ(ierr); ierr = VecDuplicate(b,&b1);CHKERRQ(ierr); ierr = MatMult(A,x,b1);CHKERRQ(ierr); ierr = VecAXPY(b1,-1.0,b);CHKERRQ(ierr); ierr = VecNorm(b1,NORM_2,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Final residual %g\n",norm);CHKERRQ(ierr); ierr = VecDestroy(&b1);CHKERRQ(ierr); } ierr = KSPDestroy(&ksp);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = DMDestroy(&da);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
int main(int argc,char **argv) { PetscErrorCode ierr; KSP ksp; PC pc; Vec x,b; DA da; Mat A,Atrans; PetscInt dof=1,M=-8; PetscTruth flg,trans=PETSC_FALSE; PetscInitialize(&argc,&argv,(char *)0,help); ierr = PetscOptionsGetInt(PETSC_NULL,"-dof",&dof,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(PETSC_NULL,"-M",&M,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsGetTruth(PETSC_NULL,"-trans",&trans,PETSC_NULL);CHKERRQ(ierr); ierr = DACreate(PETSC_COMM_WORLD,&da);CHKERRQ(ierr); ierr = DASetDim(da,3);CHKERRQ(ierr); ierr = DASetPeriodicity(da,DA_NONPERIODIC);CHKERRQ(ierr); ierr = DASetStencilType(da,DA_STENCIL_STAR);CHKERRQ(ierr); ierr = DASetSizes(da,M,M,M);CHKERRQ(ierr); ierr = DASetNumProcs(da,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); ierr = DASetDof(da,dof);CHKERRQ(ierr); ierr = DASetStencilWidth(da,1);CHKERRQ(ierr); ierr = DASetVertexDivision(da,PETSC_NULL,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); ierr = DASetFromOptions(da);CHKERRQ(ierr); ierr = DACreateGlobalVector(da,&x);CHKERRQ(ierr); ierr = DACreateGlobalVector(da,&b);CHKERRQ(ierr); ierr = ComputeRHS(da,b);CHKERRQ(ierr); ierr = DAGetMatrix(da,MATBAIJ,&A);CHKERRQ(ierr); ierr = ComputeMatrix(da,A);CHKERRQ(ierr); /* A is non-symmetric. Make A = 0.5*(A + Atrans) symmetric for testing icc and cholesky */ ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&Atrans);CHKERRQ(ierr); ierr = MatAXPY(A,1.0,Atrans,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatScale(A,0.5);CHKERRQ(ierr); ierr = MatDestroy(Atrans);CHKERRQ(ierr); /* Test sbaij matrix */ flg = PETSC_FALSE; ierr = PetscOptionsGetTruth(PETSC_NULL, "-test_sbaij1", &flg,PETSC_NULL);CHKERRQ(ierr); if (flg){ Mat sA; ierr = MatConvert(A,MATSBAIJ,MAT_INITIAL_MATRIX,&sA);CHKERRQ(ierr); ierr = MatDestroy(A);CHKERRQ(ierr); A = sA; } ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,A,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr); ierr = PCSetDA(pc,da);CHKERRQ(ierr); if (trans) { ierr = KSPSolveTranspose(ksp,b,x);CHKERRQ(ierr); } else { ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr); } /* check final residual */ flg = PETSC_FALSE; ierr = PetscOptionsGetTruth(PETSC_NULL, "-check_final_residual", &flg,PETSC_NULL);CHKERRQ(ierr); if (flg){ Vec b1; PetscReal norm; ierr = KSPGetSolution(ksp,&x);CHKERRQ(ierr); ierr = VecDuplicate(b,&b1);CHKERRQ(ierr); ierr = MatMult(A,x,b1);CHKERRQ(ierr); ierr = VecAXPY(b1,-1.0,b);CHKERRQ(ierr); ierr = VecNorm(b1,NORM_2,&norm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Final residual %g\n",norm);CHKERRQ(ierr); ierr = VecDestroy(b1);CHKERRQ(ierr); } ierr = KSPDestroy(ksp);CHKERRQ(ierr); ierr = VecDestroy(x);CHKERRQ(ierr); ierr = VecDestroy(b);CHKERRQ(ierr); ierr = MatDestroy(A);CHKERRQ(ierr); ierr = DADestroy(da);CHKERRQ(ierr); ierr = PetscFinalize();CHKERRQ(ierr); return 0; }
bool PotentialFieldSolver::m_ParticleToMesh() { m_SpatialHasher_mass.setSpatialHashGrid(m_gridx, m_L/(double)m_gridx, make_float3(m_origin.x,m_origin.y,m_origin.z), m_N_mass); m_SpatialHasher_mass.setHashParam(); m_SpatialHasher_mass.doSpatialHash(m_p_massPos->getDevicePtr(),m_N_mass); m_p_massPos_Reorder->memset(make_float4(0,0,0,0)); m_SpatialHasher_mass.reorderData(m_N_mass, (void*)(m_p_massPos->getDevicePtr()), (void*)(m_p_massPos_Reorder->getDevicePtr()), 4, 1); m_particle_mass_Reorder->memset(0); m_SpatialHasher_mass.reorderData(m_N_mass, (void*)(m_particle_mass->getDevicePtr()), (void*)(m_particle_mass_Reorder->getDevicePtr()), 1, 2); m_grid_density->memset(0); ParticleToMesh(m_SpatialHasher_mass.getStartTable(), m_SpatialHasher_mass.getEndTable(), m_p_massPos_Reorder->getDevicePtr(), m_particle_mass_Reorder->getDevicePtr(), m_SpatialHasher_mass.getCellSize().x, m_grid_density->getDevicePtr(), make_uint3(m_gridx,m_gridy,m_gridz), make_uint3(m_gridx,m_gridy,m_gridz), m_N_mass, m_origin); cudaMemcpy(m_grid_Rhs->getDevicePtr(), m_grid_density->getDevicePtr(), m_grid_Rhs->getSize()*m_grid_Rhs->typeSize(), cudaMemcpyDeviceToDevice); ComputeRHS(m_grid_Rhs->getDevicePtr(), m_SpatialHasher_mass.getCellSize().x*m_SpatialHasher_mass.getCellSize().x, -1.0, m_gridx*m_gridy*m_gridz); m_p_massPos_Reorder->copy(gf_GpuArray<float4>::DEVICE_TO_HOST); m_particle_mass_Reorder->copy(gf_GpuArray<double>::DEVICE_TO_HOST); double total_weight = 0; double total_mass = 0; for(int i=0; i<m_N_mass; i++) { double *host = m_particle_mass_Reorder->getHostPtr(); total_weight += fabs(host[i]); total_mass += host[i]; } double cx=0, cy=0, cz=0; for(int i=0; i<m_N_mass; i++) { float4 *hpos = m_p_massPos_Reorder->getHostPtr(); double *hmass = m_particle_mass_Reorder->getHostPtr(); cx+=hpos[i].x*fabs(hmass[i]); cy+=hpos[i].y*fabs(hmass[i]); cz+=hpos[i].z*fabs(hmass[i]); //printf("%f,%f,%f\n",cx,cy,cz); } cx=cx/total_weight; cy=cy/total_weight; cz=cz/total_weight; m_center.x = cx; m_center.y = cy; m_center.z = cz; m_total_mass = total_mass; applyDirichlet(m_grid_Rhs->getDevicePtr(), make_double4(cx,cy,cz,0), m_total_mass, make_double4(m_origin.x,m_origin.y,m_origin.z,0), m_SpatialHasher_mass.getCellSize().x, m_gridx, m_gridy, m_gridz); return true; }
int main(int argc, char** argv) { DM da; PetscErrorCode ierr; Vec x, rhs; Mat A, jac; ierr = PetscInitialize(&argc, &argv, NULL, NULL); CHKERRQ(ierr); ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Laplacian in 2D", ""); CHKERRQ(ierr); ierr = PetscOptionsEnd(); CHKERRQ(ierr); ierr = HpddmRegisterKSP(); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); double time = MPI_Wtime(); ierr = DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_STAR, 10, 10, PETSC_DECIDE, PETSC_DECIDE, 1, 1, 0, 0, &da); CHKERRQ(ierr); ierr = DMSetFromOptions(da); CHKERRQ(ierr); ierr = DMSetUp(da); CHKERRQ(ierr); ierr = DMCreateGlobalVector(da, &rhs); CHKERRQ(ierr); ierr = DMCreateGlobalVector(da, &x); CHKERRQ(ierr); ierr = DMCreateMatrix(da, &A); CHKERRQ(ierr); ierr = DMCreateMatrix(da, &jac); CHKERRQ(ierr); ierr = ComputeMatrix(da, jac, A); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime() - time; ierr = PetscPrintf(PETSC_COMM_WORLD, "--- Mat assembly = %f\n", time); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime(); KSP ksp; ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); CHKERRQ(ierr); ierr = KSPSetDM(ksp, da); CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetDMActive(ksp, PETSC_FALSE); CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime() - time; ierr = PetscPrintf(PETSC_COMM_WORLD, "--- PC setup = %f\n", time); CHKERRQ(ierr); PetscScalar nus[SIZE_ARRAY_NU] = {0.1, 10.0, 0.001, 100.0}; float t_time[SIZE_ARRAY_NU]; int t_its[SIZE_ARRAY_NU]; int i, j; for (j = 0; j < 2; ++j) { { if (j == 1) { ierr = KSPSetType(ksp, "hpddm"); CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); ierr = VecZeroEntries(x); CHKERRQ(ierr); } ierr = KSPSolve(ksp, rhs, x); CHKERRQ(ierr); if (j == 1) { const HpddmOption* const opt = HpddmOptionGet(); int previous = HpddmOptionVal(opt, "krylov_method"); if (previous == HPDDM_KRYLOV_METHOD_GCRODR || previous == HPDDM_KRYLOV_METHOD_BGCRODR) HpddmDestroyRecycling(); } } for (i = 0; i < SIZE_ARRAY_NU; ++i) { ierr = VecZeroEntries(x); CHKERRQ(ierr); ierr = ComputeRHS(da, rhs, nus[i]); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime(); ierr = KSPSolve(ksp, rhs, x); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); t_time[i] = MPI_Wtime() - time; PetscInt its; ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); t_its[i] = its; ierr = ComputeError(A, rhs, x); CHKERRQ(ierr); } for (i = 0; i < SIZE_ARRAY_NU; ++i) { ierr = PetscPrintf(PETSC_COMM_WORLD, "%d\t%d\t%f\n", i + 1, t_its[i], t_time[i]); CHKERRQ(ierr); if (i > 0) { t_its[0] += t_its[i]; t_time[0] += t_time[i]; } } if (SIZE_ARRAY_NU > 1) { ierr = PetscPrintf(PETSC_COMM_WORLD, "------------------------\n\t%d\t%f\n", t_its[0], t_time[0]); CHKERRQ(ierr); } } ierr = KSPDestroy(&ksp); CHKERRQ(ierr); ierr = VecDestroy(&x); CHKERRQ(ierr); ierr = VecDestroy(&rhs); CHKERRQ(ierr); ierr = MatDestroy(&A); CHKERRQ(ierr); ierr = MatDestroy(&jac); CHKERRQ(ierr); ierr = DMDestroy(&da); CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
bool BiotSavartSolver::m_ParticleToMesh() { m_SpatialHasher_vort.setSpatialHashGrid(m_gridx, m_L/(double)m_gridx, make_float3(m_origin.x,m_origin.y,m_origin.z), m_N_vort); m_SpatialHasher_vort.setHashParam(); m_SpatialHasher_vort.doSpatialHash(m_p_vortPos->getDevicePtr(),m_N_vort); m_p_vortPos_Reorder->memset(make_float4(0,0,0,0)); m_SpatialHasher_vort.reorderData(m_N_vort, (void*)(m_p_vortPos->getDevicePtr()), (void*)(m_p_vortPos_Reorder->getDevicePtr()), 4, 1); for(int i=0;i<NUM_COMPONENTS;i++) { m_particle_vort_Reorder[i]->memset(0); m_SpatialHasher_vort.reorderData(m_N_vort, (void*)(m_particle_vort[i]->getDevicePtr()), (void*)(m_particle_vort_Reorder[i]->getDevicePtr()), 1, 2); } for (int c=0;c<NUM_COMPONENTS;c++) { m_grid_vort[c]->memset(0); ParticleToMesh(m_SpatialHasher_vort.getStartTable(), m_SpatialHasher_vort.getEndTable(), m_p_vortPos_Reorder->getDevicePtr(), m_particle_vort_Reorder[c]->getDevicePtr(), m_SpatialHasher_vort.getCellSize().x, m_grid_vort[c]->getDevicePtr(), make_uint3(m_gridx,m_gridy,m_gridz), make_uint3(m_gridx,m_gridy,m_gridz), m_N_vort, m_origin); cudaMemcpy(m_grid_Rhs[c]->getDevicePtr(), m_grid_vort[c]->getDevicePtr(), m_grid_Rhs[c]->getSize()*m_grid_Rhs[c]->typeSize(), cudaMemcpyDeviceToDevice); ComputeRHS(m_grid_Rhs[c]->getDevicePtr(), m_SpatialHasher_vort.getCellSize().x*m_SpatialHasher_vort.getCellSize().x, -1.0, m_gridx*m_gridy*m_gridz); //m_p_vortPos_Reorder->copy(GpuArrayf4::DEVICE_TO_HOST); //m_particle_vort_Reorder[c]->copy(GpuArrayd::DEVICE_TO_HOST); //double total_weight = 0; //double total_mass = 0; //for(int i=0; i<m_N_vort; i++) //{ // double *host = m_particle_vort_Reorder[c]->getHostPtr(); // total_weight += fabs(host[i]); // total_mass += host[i]; //} //double cx=0, cy=0, cz=0; //for(int i=0; i<m_N_vort; i++) //{ // float4 *hpos = m_p_vortPos_Reorder->getHostPtr(); // double *hmass = m_particle_vort_Reorder[c]->getHostPtr(); // cx+=hpos[i].x*fabs(hmass[i]); // cy+=hpos[i].y*fabs(hmass[i]); // cz+=hpos[i].z*fabs(hmass[i]); // //printf("%f,%f,%f\n",cx,cy,cz); //} //cx=cx/total_weight; //cy=cy/total_weight; //cz=cz/total_weight; //m_center.x = cx; //m_center.y = cy; //m_center.z = cz; //m_total_vort[c] = total_mass; ////printf("%f,%f,%f,%f\n",cx,cy,cz,total_mass); //applyDirichlet(m_grid_Rhs[c]->getDevicePtr(), // make_double4(cx,cy,cz,0), // total_mass, // m_origin, // m_SpatialHasher_vort.getCellSize().x, // m_gridx, // m_gridy, // m_gridz); } return true; }