void listenData(int fd, mixed hmm) { mixed ret; // CHAT_D->do_raw_chat("network", "Listen <network> Received: " + hmm); if(!ret = breakdown(hmm)) { socket_close(fd); } else { socket_write(fd, identify(ret)); } }
void cg_solve(OperatorType& A, const VectorType& b, VectorType& x, Matvec matvec, typename OperatorType::LocalOrdinalType max_iter, typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& tolerance, typename OperatorType::LocalOrdinalType& num_iters, typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& normr, timer_type* my_cg_times) { typedef typename OperatorType::ScalarType ScalarType; typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType; typedef typename OperatorType::LocalOrdinalType LocalOrdinalType; typedef typename TypeTraits<ScalarType>::magnitude_type magnitude_type; timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0; timer_type total_time = mytimer(); int myproc = 0; #ifdef HAVE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &myproc); #endif if (!A.has_local_indices) { std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means " << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve." << std::endl; return; } size_t nrows = A.rows.size(); LocalOrdinalType ncols = A.num_cols; VectorType r(b.startIndex, nrows, 256); VectorType p(0, ncols, 512); VectorType Ap(b.startIndex, nrows, 64); normr = 0; magnitude_type rtrans = 0; magnitude_type oldrtrans = 0; LocalOrdinalType print_freq = max_iter/10; if (print_freq>50) print_freq = 50; if (print_freq<1) print_freq = 1; ScalarType one = 1.0; ScalarType zero = 0.0; TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY); // print_vec(p.coefs, "p"); TICK(); matvec(A, p, Ap); TOCK(tMATVEC); TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY); TICK(); rtrans = dot_r2(r); TOCK(tDOT); //std::cout << "rtrans="<<rtrans<<std::endl; normr = std::sqrt(rtrans); if (myproc == 0) { std::cout << "Initial Residual = "<< normr << std::endl; } magnitude_type brkdown_tol = std::numeric_limits<magnitude_type>::epsilon(); #ifdef MINIFE_DEBUG std::ostream& os = outstream(); os << "brkdown_tol = " << brkdown_tol << std::endl; #endif #ifdef MINIFE_DEBUG_OPENMP std::cout << "Starting CG Solve Phase..." << std::endl; #endif for(LocalOrdinalType k=1; k <= max_iter && normr > tolerance; ++k) { if (k == 1) { //TICK(); waxpby(one, r, zero, r, p); TOCK(tWAXPY); TICK(); daxpby(one, r, zero, p); TOCK(tWAXPY); } else { oldrtrans = rtrans; TICK(); rtrans = dot_r2(r); TOCK(tDOT); const magnitude_type beta = rtrans/oldrtrans; TICK(); daxpby(one, r, beta, p); TOCK(tWAXPY); } normr = sqrt(rtrans); if (myproc == 0 && (k%print_freq==0 || k==max_iter)) { std::cout << "Iteration = "<<k<<" Residual = "<<normr<<std::endl; } magnitude_type alpha = 0; magnitude_type p_ap_dot = 0; TICK(); matvec(A, p, Ap); TOCK(tMATVEC); TICK(); p_ap_dot = dot(Ap, p); TOCK(tDOT); #ifdef MINIFE_DEBUG os << "iter " << k << ", p_ap_dot = " << p_ap_dot; os.flush(); #endif if (p_ap_dot < brkdown_tol) { if (p_ap_dot < 0 || breakdown(p_ap_dot, Ap, p)) { std::cerr << "miniFE::cg_solve ERROR, numerical breakdown!"<<std::endl; #ifdef MINIFE_DEBUG os << "ERROR, numerical breakdown!"<<std::endl; #endif //update the timers before jumping out. my_cg_times[WAXPY] = tWAXPY; my_cg_times[DOT] = tDOT; my_cg_times[MATVEC] = tMATVEC; my_cg_times[TOTAL] = mytimer() - total_time; return; } else brkdown_tol = 0.1 * p_ap_dot; } alpha = rtrans/p_ap_dot; #ifdef MINIFE_DEBUG os << ", rtrans = " << rtrans << ", alpha = " << alpha << std::endl; #endif TICK(); daxpby(alpha, p, one, x); daxpby(-alpha, Ap, one, r); TOCK(tWAXPY); num_iters = k; } my_cg_times[WAXPY] = tWAXPY; my_cg_times[DOT] = tDOT; my_cg_times[MATVEC] = tMATVEC; my_cg_times[MATVECDOT] = tMATVECDOT; my_cg_times[TOTAL] = mytimer() - total_time; }
void cg_solve(OperatorType& A, const VectorType& b, VectorType& x, Matvec matvec, typename OperatorType::LocalOrdinalType max_iter, typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& tolerance, typename OperatorType::LocalOrdinalType& num_iters, typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& normr, timer_type* my_cg_times) { typedef typename OperatorType::ScalarType ScalarType; typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType; typedef typename OperatorType::LocalOrdinalType LocalOrdinalType; typedef typename TypeTraits<ScalarType>::magnitude_type magnitude_type; timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0; timer_type total_time = mytimer(); int myproc = 0; #ifdef HAVE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &myproc); #endif if (!A.has_local_indices) { std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means " << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve." << std::endl; return; } size_t nrows = A.rows.size(); LocalOrdinalType ncols = A.num_cols; nvtxRangeId_t r1=nvtxRangeStartA("Allocation of Temporary Vectors"); VectorType r(b.startIndex, nrows); VectorType p(0, ncols); VectorType Ap(b.startIndex, nrows); nvtxRangeEnd(r1); #ifdef HAVE_MPI #ifndef GPUDIRECT //TODO move outside? cudaHostRegister(&p.coefs[0],ncols*sizeof(typename VectorType::ScalarType),0); cudaCheckError(); if(A.send_buffer.size()>0) cudaHostRegister(&A.send_buffer[0],A.send_buffer.size()*sizeof(typename VectorType::ScalarType),0); cudaCheckError(); #endif #endif normr = 0; magnitude_type rtrans = 0; magnitude_type oldrtrans = 0; LocalOrdinalType print_freq = max_iter/10; if (print_freq>50) print_freq = 50; if (print_freq<1) print_freq = 1; ScalarType one = 1.0; ScalarType zero = 0.0; TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY); TICK(); matvec(A, p, Ap); TOCK(tMATVEC); TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY); TICK(); rtrans = dot(r, r); TOCK(tDOT); normr = std::sqrt(rtrans); if (myproc == 0) { std::cout << "Initial Residual = "<< normr << std::endl; } magnitude_type brkdown_tol = std::numeric_limits<magnitude_type>::epsilon(); #ifdef MINIFE_DEBUG std::ostream& os = outstream(); os << "brkdown_tol = " << brkdown_tol << std::endl; #endif for(LocalOrdinalType k=1; k <= max_iter && normr > tolerance; ++k) { if (k == 1) { TICK(); waxpby(one, r, zero, r, p); TOCK(tWAXPY); } else { oldrtrans = rtrans; TICK(); rtrans = dot(r, r); TOCK(tDOT); magnitude_type beta = rtrans/oldrtrans; TICK(); waxpby(one, r, beta, p, p); TOCK(tWAXPY); } normr = std::sqrt(rtrans); if (myproc == 0 && (k%print_freq==0 || k==max_iter)) { std::cout << "Iteration = "<<k<<" Residual = "<<normr<<std::endl; } magnitude_type alpha = 0; magnitude_type p_ap_dot = 0; TICK(); matvec(A, p, Ap); TOCK(tMATVEC); TICK(); p_ap_dot = dot(Ap, p); TOCK(tDOT); #ifdef MINIFE_DEBUG os << "iter " << k << ", p_ap_dot = " << p_ap_dot; os.flush(); #endif //TODO remove false below if (false && p_ap_dot < brkdown_tol) { if (p_ap_dot < 0 || breakdown(p_ap_dot, Ap, p)) { std::cerr << "miniFE::cg_solve ERROR, numerical breakdown!"<<std::endl; #ifdef MINIFE_DEBUG os << "ERROR, numerical breakdown!"<<std::endl; #endif //update the timers before jumping out. my_cg_times[WAXPY] = tWAXPY; my_cg_times[DOT] = tDOT; my_cg_times[MATVEC] = tMATVEC; my_cg_times[TOTAL] = mytimer() - total_time; return; } else brkdown_tol = 0.1 * p_ap_dot; } alpha = rtrans/p_ap_dot; #ifdef MINIFE_DEBUG os << ", rtrans = " << rtrans << ", alpha = " << alpha << std::endl; #endif TICK(); waxpby(one, x, alpha, p, x); waxpby(one, r, -alpha, Ap, r); TOCK(tWAXPY); num_iters = k; } #ifdef HAVE_MPI #ifndef GPUDIRECT //TODO move outside? cudaHostUnregister(&p.coefs[0]); cudaCheckError(); if(A.send_buffer.size()>0) cudaHostUnregister(&A.send_buffer[0]); cudaCheckError(); #endif #endif my_cg_times[WAXPY] = tWAXPY; my_cg_times[DOT] = tDOT; my_cg_times[MATVEC] = tMATVEC; my_cg_times[MATVECDOT] = tMATVECDOT; my_cg_times[TOTAL] = mytimer() - total_time; }