bool SchurPrecond<TAlgebra>:: preprocess(SmartPtr<MatrixOperator<matrix_type, vector_type> > A) { try{ // status UG_DLOG(SchurDebug, 2, "\n% Initializing SCHUR precond: \n"); m_pA = A; if(check_requirements() == false) return false; // Determine slicing for SchurComplementOperator std::vector<slice_desc_type> skeletonMark; get_skeleton_slicing(A, skeletonMark); // create & init local Schur complement object if(create_and_init_local_schur_complement(A, skeletonMark) == false) return false; // configure schur complement solver init_skeleton_solver(); // status UG_DLOG(SchurDebug, 1, "\n% 'SchurPrecond::init()' done!\n"); // we're done return true; }UG_CATCH_THROW("SchurPrecond::" << __FUNCTION__ << " failed"); return false; } /* end 'SchurPrecond::preprocess()' */
void OrderDownwind(ApproximationSpace<TDomain>& approxSpace, SmartPtr<UserData<MathVector<TDomain::dim>, TDomain::dim> > spVelocity, number threshold) { // TODO: implement for variable time and subset number time = 0.0; int si = 0; std::vector<SmartPtr<DoFDistribution> > vDD = approxSpace.dof_distributions(); UG_DLOG(LIB_DISC_ORDER, 2, "Starting DownwindOrdering." << std::endl); for(size_t i = 0; i < vDD.size(); ++i){ UG_DLOG(LIB_DISC_ORDER, 2, "Ordering Domain Distribution " << i << "." << std::endl); OrderDownwindForDofDist<TDomain>(vDD[i], approxSpace.domain(), spVelocity, time, si, threshold); } }
/** * init app, script and data paths */ bool InitPaths(const char* argv0) { PROFILE_FUNC(); //The method currently only works if the path is explicitly specified //during startup or if UG4_ROOT is defined. // extract the application path. char* ug4Root = getenv("UG4_ROOT"); const char* pathSep = GetPathSeparator(); std::string strRoot = ""; if(ug4Root){ strRoot = ug4Root; } else{ std::string tPath = argv0; size_t pos = tPath.find_last_of(pathSep); if (pos != std::string::npos) tPath = tPath.substr(0, pos); else tPath = "."; strRoot = tPath + pathSep + ".."; } if(!PathProvider::has_path(ROOT_PATH)) PathProvider::set_path(ROOT_PATH, strRoot); if(!PathProvider::has_path(BIN_PATH)) PathProvider::set_path(BIN_PATH, strRoot + pathSep + "bin"); if(!PathProvider::has_path(SCRIPT_PATH)) PathProvider::set_path(SCRIPT_PATH, strRoot + pathSep + "ugcore/scripts"); if(!PathProvider::has_path(DATA_PATH)) PathProvider::set_path(DATA_PATH, strRoot + pathSep + "data"); if(!PathProvider::has_path(GRID_PATH)) PathProvider::set_path(GRID_PATH, strRoot + pathSep + "data" + pathSep + "grids"); if(!PathProvider::has_path(PLUGIN_PATH)) PathProvider::set_path(PLUGIN_PATH, strRoot + pathSep + "bin" + pathSep + "plugins"); if(!PathProvider::has_path(APPS_PATH)) PathProvider::set_path(APPS_PATH, strRoot + pathSep + "apps"); // log the paths UG_DLOG(MAIN, 1, "app path set to: " << PathProvider::get_path(BIN_PATH) << std::endl << "script path set to: " << PathProvider::get_path(SCRIPT_PATH) << std::endl << "data path set to: " << PathProvider::get_path(DATA_PATH) << std::endl); /* if(!script::FileExists(PathProvider::get_path(BIN_PATH).c_str()) || !script::FileExists(PathProvider::get_path(SCRIPT_PATH).c_str()) || !script::FileExists(PathProvider::get_path(DATA_PATH).c_str())) { UG_LOG("WARNING: paths were not initialized correctly.\n"); return false; } */ return true; }
void SchurPrecond<TAlgebra>:: schur_solver_backward(vector_type &u_inner, vector_type &f_inner, vector_type &u_skeleton) { SCHUR_PROFILE_BEGIN(SchurSolverStep_Backward); UG_DLOG(SchurDebug, 3, "\n% 'SchurPrecond::step() - backward':\n"); m_spSchurComplementOp->sub_operator(SD_INNER, SD_SKELETON)->apply(f_inner, u_skeleton); if(!m_spDirichletSolver->apply_return_defect(u_inner, f_inner) ) { UG_LOG("SchurPrecond: Failed to solve inner system!\n"); } }
void SchurPrecond<TAlgebra>:: create_aux_vectors(const vector_type& d) { const SlicingData sd = m_spSchurComplementOp->slicing(); const size_t n_inner = m_spSchurComplementOp->sub_size(SD_INNER); const size_t n_skeleton = m_spSchurComplementOp->sub_size(SD_SKELETON); (void) n_skeleton; // warning fix // create vectors if (m_aux_rhs[SD_SKELETON].invalid()) { UG_DLOG(SchurDebug, 1, "% Creating skeleton defect vector of size " << n_skeleton << std::endl); //m_aux_rhs[SD_SKELETON] = new vector_type(n_skeleton); m_aux_rhs[SD_SKELETON] = sd.slice_clone_without_values(d, SD_SKELETON); m_aux_rhs[SD_SKELETON]->set_storage_type(PST_ADDITIVE); //std::cerr<< "Skeleton f:\n" <<*m_aux_rhs[SD_SKELETON]->layouts(); } if (m_aux_sol[SD_SKELETON].invalid()) { UG_DLOG(SchurDebug, 1, "% Creating skeleton corr vector of size " << n_skeleton << std::endl); //m_aux_sol[SD_SKELETON] = new vector_type(n_skeleton); m_aux_sol[SD_SKELETON] = sd.slice_clone_without_values(d, SD_SKELETON); m_aux_sol[SD_SKELETON]->set_storage_type(PST_CONSISTENT); //std::cerr<< "Skeleton u:\n" << *m_aux_sol[SD_SKELETON]->layouts(); } if (m_aux_rhs[SD_INNER].invalid()) { UG_DLOG(SchurDebug, 1, "% Creating inner defect vector of size " << n_inner << std::endl); m_aux_rhs[SD_INNER] = make_sp(new vector_type(n_inner)); m_aux_rhs[SD_INNER]->set_storage_type(PST_ADDITIVE); } if (m_aux_sol[SD_INNER].invalid()) { UG_DLOG(SchurDebug, 1, "% Creating inner corr vector of size " << n_inner << std::endl); m_aux_sol[SD_INNER] = make_sp(new vector_type(n_inner)); m_aux_sol[SD_INNER]->set_storage_type(PST_CONSISTENT); } }
bool SchurPrecond<TAlgebra>:: create_and_init_local_schur_complement(SmartPtr<MatrixOperator<matrix_type, vector_type> > A, std::vector<slice_desc_type> &skeletonMark) { try{ SCHUR_PROFILE_BEGIN(SchurPrecondInit_CreateInitLocalSchurComplement); m_spSchurComplementOp = make_sp(new SchurComplementOperator<TAlgebra>(A, skeletonMark)); if (m_spSchurComplementOp.invalid()) { UG_ASSERT(m_spSchurComplementOp.invalid(), "Failed creating operator!") } // set dirichlet solver for local Schur complement m_spSchurComplementOp->set_dirichlet_solver(m_spDirichletSolver); if(debug_writer().valid()) m_spSchurComplementOp->set_debug(debug_writer()); // init UG_DLOG(SchurDebug, 1, "\n% - Init local Schur complement ... "); m_spSchurComplementOp->init(); UG_DLOG(SchurDebug, 1, "done.\n"); // 1.4 check all procs /*bool bSuccess = true; if(!pcl::AllProcsTrue(bSuccess)) { UG_LOG("ERROR in SchurPrecond::init: Some processes could not init" " local Schur complement.\n"); return false; }*/ return true; }UG_CATCH_THROW("SchurPrecond::" << __FUNCTION__ << " failed") return false; }
/* pre-refine // Resize the attachment containers { Selector& sel = get_refmark_selector(); HNODE_PROFILE_BEGIN("HNode_ReserveAttachmentMemory"); HNODE_PROFILE_BEGIN(HNODE_ReserveVrtData); mg.reserve<Vertex>(grid.num<Vertex>() + + sel.num<Vertex>() + sel.num<Edge>() + sel.num<Quadrilateral>() + sel.num<Hexahedron>()); HNODE_PROFILE_END(); HNODE_PROFILE_BEGIN(HNODE_ReserveEdgeData); mg.reserve<Edge>(mg.num<Edge>() + 2 * mg.num<Edge>() + 3 * mg.num<Triangle>() + 4 * mg.num<Quadrilateral>() + 3 * mg.num<Prism>() + mg.num<Tetrahedron>() + 4 * mg.num<Pyramid>() + 6 * mg.num<Hexahedron>()); HNODE_PROFILE_END(); HNODE_PROFILE_BEGIN(HNODE_ReserveFaceData); mg.reserve<Face>(mg.num<Face>() + 4 * mg.num<Face>(l) + 10 * mg.num<Prism>(l) + 8 * mg.num<Tetrahedron>(l) + 9 * mg.num<Pyramid>(l) + 12 * mg.num<Hexahedron>(l)); HNODE_PROFILE_END(); HNODE_PROFILE_BEGIN(HNODE_ReserveVolData); mg.reserve<Volume>(mg.num<Volume>() + 8 * mg.num<Tetrahedron>(l) + 8 * mg.num<Prism>(l) + 6 * mg.num<Pyramid>(l) + 8 * mg.num<Hexahedron>(l)); HNODE_PROFILE_END(); HNODE_PROFILE_END(); } */ void HangingNodeRefiner_Grid:: post_refine() { if(!m_pGrid) throw(UGError("HangingNodeRefiner_Grid::post_refine: No grid assigned.")); // erase unused elements UG_DLOG(LIB_GRID, 1, " erasing elements.\n"); Grid& grid = *m_pGrid; vector<Face*> vFaces; vector<Volume*> vVols; // erase faces that are no longer needed. if(grid.num_volumes() > 0) { FaceIterator iter = m_selMarkedElements.begin<Face>(); while(iter != m_selMarkedElements.end<Face>()) { Face* f = *iter; ++iter; CollectVolumes(vVols, grid, f); if(vVols.empty()) { // erase grid.erase(f); } } } // erase edges that are no longer needed. if(grid.num_faces() > 0) { EdgeIterator iter = m_selMarkedElements.begin<Edge>(); while(iter != m_selMarkedElements.end<Edge>()) { Edge* e = *iter; ++iter; CollectFaces(vFaces, grid, e); if(vFaces.empty()) { // erase grid.erase(e); } } } }
void SchurPrecond<TAlgebra>:: schur_solver_forward(vector_type &u_inner, vector_type &f_inner) { UG_DLOG(SchurDebug, 3, "\n% 'SchurPrecond::step() - forward':"); SCHUR_PROFILE_BEGIN(SchurSolverStep_Forward); // solve //UG_LOG("\nf_inner1="); UG_LOG_Vector<vector_type>(f_inner); m_spDirichletSolver->apply_return_defect(u_inner, f_inner); // store first correction -> will be used again //UG_LOG("\nu_inner1="); UG_LOG_Vector<vector_type>(u_inner); //UG_LOG("\nf_skeleton="); UG_LOG_Vector<vector_type>(f_skeleton); // slicing.subtract_vector_slice(d, SD_SKELETON, f_skeleton); /// f_skeleton *= -1.0; }
void SchurPrecond<TAlgebra>:: schur_solve_skeleton(vector_type &u_skeleton, const vector_type &f_skeleton) { SCHUR_PROFILE_BEGIN(SchurSolverStep_SchurSolve); UG_DLOG(SchurDebug, 3, "\n% 'SchurPrecond::step() - skeleton solve':"); if(!f_skeleton.has_storage_type(PST_ADDITIVE)) { UG_THROW("ERROR: In 'SchurPrecond::step':Inadequate storage format of 'f_skeleton'.\n"); } if (!m_spSkeletonSolver->apply(u_skeleton, f_skeleton)) { UG_LOG("SchurPrecond: Failed to solve skeleton system!\n"); } if(!u_skeleton.has_storage_type(PST_CONSISTENT)) { UG_THROW("ERROR: In 'SchurPrecond::step':Inadequate storage format of 'u_skeleton'.\n"); } //UG_LOG("\nu_skeleton="); UG_LOG_Vector<vector_type>(u_skeleton); }
void OrderDownwindForDofDist(SmartPtr<DoFDistribution> dd, ConstSmartPtr<TDomain> domain, SmartPtr<UserData<MathVector<TDomain::dim>, TDomain::dim> > spVelocity, number time, int si, number threshold) { static const int dim = TDomain::dim; const size_t num_ind = dd->num_indices(); typedef typename std::pair<MathVector<dim>, size_t> pos_type; typedef typename std::vector<std::vector<size_t> > adjacency_type; // get positions of indices typename std::vector<pos_type> vPositions; ExtractPositions(domain, dd, vPositions); // get adjacency vector of vectors adjacency_type vvConnections; dd->get_connections(vvConnections); // Check vector sizes match if (vvConnections.size() != num_ind) UG_THROW("OrderDownstreamForDofDist: " "Adjacency list of dimension " << num_ind << " expected, got "<< vvConnections.size()); if (vPositions.size() != num_ind) UG_THROW("OrderDownstreamForDofDist: " "Position list of dimension " << num_ind << " expected, got "<< vPositions.size()); // init helper structures std::vector<size_t> vNewIndex(num_ind, 0); std::vector<size_t> vAncestorsCount(num_ind, 0); std::vector<bool> vVisited(num_ind, false); // remove connections that are not in stream direction adjacency_type::iterator VertexIter; std::vector<size_t>::iterator AdjIter; std::vector<size_t> vAdjacency; // count how many vertex were kept / removed per adjacency vector size_t initialcount, kept, removed = 0; MathVector<TDomain::dim> vVel1, vPos1, vPos2, vDir1_2; size_t i; for (VertexIter = vvConnections.begin(), i=0; VertexIter != vvConnections.end(); VertexIter++, i++) { UG_DLOG(LIB_DISC_ORDER, 2, "Filtering vertex " << i << " adjacency vector." <<std::endl); initialcount = VertexIter->size(); kept = 0; removed = 0; // get position and velocity of first trait vPos1 = vPositions.at(i).first; (*spVelocity)(vVel1, vPos1, time, si); if (VecLengthSq(vVel1) == 0 ) { // if the velocity is zero at this trait it does not interfere with others // NOTE: otherwise this trait would be downwind-connected to all of it's neighbors // NOTE: VertexIter-> will access inner vector functions (*VertexIter) is the inner vector. removed = VertexIter->size(); VertexIter->clear(); } else { AdjIter = VertexIter->begin(); while (AdjIter != VertexIter->end()) { // get position of second trait vPos2 = vPositions.at(*AdjIter).first; // get difference vector as direction vector VecSubtract(vDir1_2, vPos2, vPos1); // compute angle between velocity and direction vector number anglex1_2 = VecAngle(vDir1_2, vVel1); // if angle is smaller then threshold continue else remove connection if (anglex1_2 <= threshold && i != *AdjIter) { vAncestorsCount.at(*AdjIter) += 1; ++AdjIter; kept++; } else { AdjIter = VertexIter->erase(AdjIter); removed++; } } } UG_DLOG(LIB_DISC_ORDER, 2, "Kept: " << kept << ", removed: " << removed << " of " << initialcount << " entries in adjacency matrix." << std::endl << std::endl); } // calculate downwindorder // Find vertexes without any ancestors and start NumeriereKnoten on them. size_t v,N; for (v=0, N=0; v < vvConnections.size(); v++) { if (vAncestorsCount[v] == 0 && !vVisited[v]) { NumeriereKnoten(vvConnections, vVisited, vAncestorsCount, vNewIndex, N, v); } } // sanity check if (N < vvConnections.size()){ size_t fails = 0; for (v=0; v < vvConnections.size(); v++) { if (!vVisited[v]) { UG_DLOG(LIB_DISC_ORDER, 2, v << "was not visited, has unresolved ancestors: " << vAncestorsCount[v] << std::endl); fails ++; } } UG_THROW("OrderDownwindForDist failed, " << fails << " traits unvisited." << std::endl); } // reorder traits dd->permute_indices(vNewIndex); }
void CUDAManager::init() { //cudaDeviceReset(); // This will pick the best possible CUDA capable device cudaDeviceProp deviceProp; int devID = get_max_multiprocessor_cuda_device(); if (devID < 0) { UG_THROW("no CUDA device found.\n"); } CUDA_CHECK_SUCCESS(cudaSetDevice(devID), "setting up device " << devID); CUDA_CHECK_STATUS(cudaGetDeviceProperties(&deviceProp, devID)); // Statistics about the GPU device printf("> GPU device has %d Multi-Processors, SM %d.%d compute capabilities\n\n", deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor); int version = (deviceProp.major * 0x10 + deviceProp.minor); if (version < 0x11) { cudaDeviceReset(); UG_THROW("Requires a minimum CUDA compute 1.1 capability\n"); } m_maxThreadsPerBlock = deviceProp.maxThreadsPerBlock; UG_DLOG(DID_CUDA, 0, "CUDA Initialized:" "\n - CUDA Device '" << deviceProp.name << "': " << "\n - Total global Memory: " << deviceProp.totalGlobalMem/(1024*1024*1024.0) << " GB" "\n - Shared Mem per Block: " << deviceProp.sharedMemPerBlock/(1024.0) << " KB" "\n - Regs per Block: " << deviceProp.regsPerBlock << "\n - Warp Size: " << deviceProp.warpSize << "\n - Maximum Number of Threads per Block: " << deviceProp.maxThreadsPerBlock << "\n - Max Thread Dim: (" << deviceProp.maxThreadsDim[0] << ", " << deviceProp.maxThreadsDim[1] << ", " << deviceProp.maxThreadsDim[2] << ")" << "\n - Max Grid Size: (" << deviceProp.maxGridSize[0] << ", " << deviceProp.maxGridSize[1] << ", " << deviceProp.maxGridSize[2] << ")" << "\n - Clock Rate: " << deviceProp.clockRate/1000.0 << " Mhz" "\n - Total Const Mem: " << deviceProp.totalConstMem/(1024.0) << " KB" "\n - Compute Capability: " << deviceProp.major << "." << deviceProp.minor << "\n - Number of multiprocessors: " << deviceProp.multiProcessorCount << "\n - Maximum Texture Size 1D: " << deviceProp.maxTexture1D << "\n - Maximum Texture Size 2D: " << deviceProp.maxTexture2D[0] << " x " << deviceProp.maxTexture2D[1] << "\n - Maximum Texture Size 3D: " << deviceProp.maxTexture3D[0] << " x " << deviceProp.maxTexture3D[1] << " x " << deviceProp.maxTexture3D[2] << "\n - Memory Clock Rate: " << deviceProp.memoryClockRate/(1000000.0)<< " Mhz" "\n - Memory Bus Width: " << deviceProp.memoryBusWidth << "\n - L2 Cache Size: " << deviceProp.l2CacheSize << "\n - Max Threads per multiprocessor: " << deviceProp.maxThreadsPerMultiProcessor << "\n"); /* Get handle to the CUBLAS context */ cublasHandle = 0; cublasStatus_t cublasStatus = cublasCreate(&cublasHandle); CUDA_CHECK_STATUS(cublasStatus); #ifdef USE_CUSPARSE /* Get handle to the CUSPARSE context */ cusparseHandle = 0; cusparseStatus_t cusparseStatus = cusparseCreate(&cusparseHandle); CUDA_CHECK_STATUS(cusparseStatus); cusparseMatDescr_t descr = 0; cusparseStatus = cusparseCreateMatDescr(&descr); CUDA_CHECK_STATUS(cusparseStatus); #endif get_temp_buffer<double>(1024); m_tempRetBuffer = MyCudaAlloc<double>(4); }
void ParallelHNodeAdjuster:: ref_marks_changed(IRefiner& ref, const std::vector<Vertex*>& vrts, const std::vector<Edge*>& edges, const std::vector<Face*>& faces, const std::vector<Volume*>& vols) { UG_DLOG(LIB_GRID, 1, "refMarkAdjuster-start: ParallelHNodeAdjuster::ref_marks_changed\n"); UG_ASSERT(ref.grid(), "A refiner has to operate on a grid, before marks can be adjusted!"); if(!ref.grid()){ UG_DLOG(LIB_GRID, 1, "refMarkAdjuster-stop: ParallelHNodeAdjuster::ref_marks_changed\n"); return; } Grid& grid = *ref.grid(); if(!grid.is_parallel()){ UG_DLOG(LIB_GRID, 1, "refMarkAdjuster-stop: ParallelHNodeAdjuster::ref_marks_changed\n"); return; } DistributedGridManager& distGridMgr = *grid.distributed_grid_manager(); GridLayoutMap& layoutMap = distGridMgr.grid_layout_map(); // check whether new interface elements have been selected bool newInterfaceVrtsMarked = ContainsInterfaceElem(vrts, distGridMgr); bool newInterfaceEdgeMarked = ContainsInterfaceElem(edges, distGridMgr); bool newInterfaceFacesMarked = ContainsInterfaceElem(faces, distGridMgr); bool newInterfaceVolsMarked = ContainsInterfaceElem(vols, distGridMgr); bool newlyMarkedElems = newInterfaceVrtsMarked || newInterfaceEdgeMarked || newInterfaceFacesMarked || newInterfaceVolsMarked; bool exchangeFlag = pcl::OneProcTrue(newlyMarkedElems); if(exchangeFlag){ const byte consideredMarks = RM_REFINE | RM_ANISOTROPIC; ComPol_BroadcastRefineMarks<VertexLayout> compolRefVRT(ref, consideredMarks); ComPol_BroadcastRefineMarks<EdgeLayout> compolRefEDGE(ref, consideredMarks); ComPol_BroadcastRefineMarks<FaceLayout> compolRefFACE(ref, consideredMarks); // send data SLAVE -> MASTER m_intfComVRT.exchange_data(layoutMap, INT_H_SLAVE, INT_H_MASTER, compolRefVRT); m_intfComEDGE.exchange_data(layoutMap, INT_H_SLAVE, INT_H_MASTER, compolRefEDGE); m_intfComFACE.exchange_data(layoutMap, INT_H_SLAVE, INT_H_MASTER, compolRefFACE); m_intfComVRT.communicate(); m_intfComEDGE.communicate(); m_intfComFACE.communicate(); // and now MASTER -> SLAVE (the selection has been adjusted on the fly) m_intfComVRT.exchange_data(layoutMap, INT_H_MASTER, INT_H_SLAVE, compolRefVRT); m_intfComEDGE.exchange_data(layoutMap, INT_H_MASTER, INT_H_SLAVE, compolRefEDGE); m_intfComFACE.exchange_data(layoutMap, INT_H_MASTER, INT_H_SLAVE, compolRefFACE); m_intfComVRT.communicate(); m_intfComEDGE.communicate(); m_intfComFACE.communicate(); UG_DLOG(LIB_GRID, 1, "refMarkAdjuster-stop (force continue): ParallelHNodeAdjuster::ref_marks_changed\n"); } UG_DLOG(LIB_GRID, 1, "refMarkAdjuster-stop: ParallelHNodeAdjuster::ref_marks_changed\n"); }