bool mat_sqrt(const complex_vec_t& matrix, complex_vec_t& result) { result.clear(); for(complex_vec_t::const_iterator i = matrix.begin(); i != matrix.end(); ++ i) { result.push_back(sqrt(*i)); } // for return true; } // mat_sqrt()
bool mat_mul(complex_t scalar, const complex_vec_t& matrix, complex_vec_t& result) { result.clear(); for(complex_vec_t::const_iterator i = matrix.begin(); i != matrix.end(); ++ i) { result.push_back((*i) * scalar); } // for return true; } // mat_mul()
bool AnalyticFormFactor::mat_sinc(unsigned int x_size, unsigned int y_size, unsigned int z_size, const complex_vec_t& matrix, complex_vec_t& result) { result.clear(); for(std::vector<complex_t>::const_iterator i = matrix.begin(); i != matrix.end(); ++ i) { result.push_back(sinc(*i)); } // for return true; } // AnalyticFormFactor::mat_sinc()
bool AnalyticFormFactor::mat_fq_inv(unsigned int x_size, unsigned int y_size, unsigned int z_size, const complex_vec_t& matrix, real_t y, complex_vec_t& result) { result.clear(); for(complex_vec_t::const_iterator i = matrix.begin(); i != matrix.end(); ++ i) { result.push_back(fq_inv(*i, y)); } // for return true; } // AnalyticFormFactor::mat_fq_inv()
/** * computes element-by-element division of two matrices (matrix1 / matrix2) into result */ bool mat_dot_div(unsigned int nx1, unsigned int ny1, unsigned int nz1, const complex_vec_t& matrix1, unsigned int nx2, unsigned int ny2, unsigned int nz2, const complex_vec_t& matrix2, complex_vec_t& result) { if(nx1 != nx2 || ny1 != ny2 || nz1 != nz2 || matrix1.size() != matrix2.size()) { std::cerr << "error: matrix sizes are not the same for dot division operation" << std::endl; return false; } // if result.clear(); complex_vec_t::const_iterator i1 = matrix1.begin(); complex_vec_t::const_iterator i2 = matrix2.begin(); for(; i1 != matrix1.end(); ++ i1, ++ i2) { result.push_back((*i1) / (*i2)); } // for return true; } // mat_dot_div()
/** * computes element-by-element product of two matrices into result */ bool mat_dot_prod(unsigned int x1_size, unsigned int y1_size, unsigned int z1_size, const complex_vec_t& matrix1, unsigned int x2_size, unsigned int y2_size, unsigned int z2_size, const complex_vec_t& matrix2, complex_vec_t& result) { if(x1_size != x2_size || y1_size != y2_size || z1_size != z2_size || matrix1.size() != matrix2.size()) { std::cerr << "error: matrix sizes are not the same for dot product operation" << std::endl; return false; } // if result.clear(); complex_vec_t::const_iterator i1 = matrix1.begin(); complex_vec_t::const_iterator i2 = matrix2.begin(); for(; i1 != matrix1.end(); ++ i1, ++ i2) { result.push_back((*i1) * (*i2)); } // for return true; } // mat_dot_prod()
bool NumericFormFactor::compute2(const char * filename, complex_vec_t &ff, RotMatrix_t & rot #ifdef USE_MPI , woo::MultiNode & world_comm, std::string comm_key #endif ) { // initialize init (rot, ff); // read file std::vector<vertex_t> vertices; std::vector<std::vector<int>> faces; std::vector<std::vector<int>> dummy; ObjectShapeReader shape_reader; if (!shape_reader.load_object (filename, vertices, faces, dummy)) { std::cerr << "Error: shape reader failed to load triangles" << std::endl; return false; } // create triangles int num_triangles = faces.size(); triangle_t * triangles = new (std::nothrow) triangle_t [num_triangles]; for (int i = 0; i < num_triangles; i++) { triangles[i].v1[0] = vertices[faces[i][0]-1].x; triangles[i].v1[1] = vertices[faces[i][0]-1].y; triangles[i].v1[2] = vertices[faces[i][0]-1].z; triangles[i].v2[0] = vertices[faces[i][1]-1].x; triangles[i].v2[1] = vertices[faces[i][1]-1].y; triangles[i].v2[2] = vertices[faces[i][1]-1].z; triangles[i].v3[0] = vertices[faces[i][2]-1].x; triangles[i].v3[1] = vertices[faces[i][2]-1].y; triangles[i].v3[2] = vertices[faces[i][2]-1].z; } //#ifndef __SSE3__ real_vec_t shape_def; //#else //#ifdef USE_GPU // real_vec_t shape_def; //#else // real_t * shape_def = NULL; //#endif //#endif //unsigned int num_triangles = read_shapes_file(filename, shape_def); #ifdef USE_MPI int num_procs = world_comm.size(comm_key); int rank = world_comm.rank(comm_key); bool master = world_comm.is_master(comm_key); #else bool master = true; #endif if(master) { std::cout << "-- Numerical form factor computation ..." << std::endl << "** Using input shape file: " << filename << std::endl << "** Number of input triangles: " << num_triangles << std::endl << "** Q-grid resolution (q-points): " << nqy_ << std::endl #ifdef USE_MPI << "** Number of processes requested: " << num_procs << std::endl #endif << std::flush; } // if // copy q-points real_t * qx = new (std::nothrow) real_t [nqy_]; if (qx == NULL) { std::cerr << "Error: failure in allocation memeroy." << std::endl; return false; } for (int i = 0; i < nqy_; i++ ) qx[i] = QGrid::instance().qx(i); real_t * qy = new (std::nothrow) real_t [nqy_]; if (qy == NULL) { std::cerr << "Error: failure in allocation memeroy." << std::endl; return false; } for (int i = 0; i < nqy_; i++) qy[i] = QGrid::instance().qy(i); #ifdef FF_NUM_GPU cucomplex_t * qz = new (std::nothrow) cucomplex_t [nqz_]; if (qz == NULL) { std::cerr << "Error: failure in memeroy allocation." << std::endl; return 0; } for (int i = 0; i < nqz_; i++) { qz[i].x = QGrid::instance().qz_extended(i).real(); qz[i].y = QGrid::instance().qz_extended(i).imag(); } #else complex_t * qz = new (std::nothrow) complex_t [nqz_]; if (qz == NULL) { std::cerr << "Error: failure in memeroy allocation." << std::endl; return 0; } for (int i = 0; i < nqz_; i++) qz[i] = QGrid::instance().qz_extended(i); #endif real_t compute_time = 0.; #ifdef FF_NUM_GPU cucomplex_t * p_ff = NULL; // call kernel if (num_triangles != gff_.compute_exact_triangle(triangles, num_triangles, p_ff, nqy_, qx, qy, nqz_, qz, rot_, compute_time)) { std::cerr << "Calculation of numerical form-factor failed" << std::endl; return false; } for (int i = 0; i < nqz_; i++) ff.push_back (complex_t(p_ff[i].x, p_ff[i].y)); std::cout << "** FF GPU compute time: " << compute_time << " ms." << std::endl; #else complex_t * p_ff = new (std::nothrow) complex_t[nqz_]; if (p_ff == NULL){ std::cerr << "Error: failed to allocate memory of size: " << nqz_ * sizeof(complex_t) << std::endl; return false; } if (num_triangles != cff_.compute_exact_triangle(triangles, num_triangles, p_ff, nqy_, qx, qy, nqz_, qz, rot_, compute_time)) { std::cerr << "Calculation of numerical form-factor failed" << std::endl; return false; } for (int i = 0; i < nqz_; i++) ff.push_back(p_ff[i]); std::cout << "** FF CPU compute time: " << compute_time << " ms." << std::endl; #endif delete [] qx; delete [] qy; delete [] qz; delete [] triangles; if (p_ff != NULL) delete [] p_ff; return true; }
bool NumericFormFactor::compute(const char * filename, complex_vec_t & ff, RotMatrix_t & rot #ifdef USE_MPI , woo::MultiNode &world_comm, std::string comm_key #endif ){ real_t comp_time = 0.0; // initialize init (rot, ff); unsigned int nqy = QGrid::instance().nqy(); unsigned int nqz = QGrid::instance().nqz_extended(); // warning: all procs read the shape file!!!! // TODO: improve to parallel IO, or one proc reading and sending to all ... // #ifndef __SSE3__ real_vec_t shape_def; // #else // #ifdef USE_GPU // real_vec_t shape_def; // #else // real_t* shape_def = NULL; // #endif // #endif // use the new file reader instead ... unsigned int num_triangles = read_shapes_file(filename, shape_def); // TODO ... <--- sadly all procs read this! IMPROVE!!! #ifdef USE_MPI int num_procs = world_comm.size(comm_key); int rank = world_comm.rank(comm_key); bool master = world_comm.is_master(comm_key); #else bool master = true; #endif if(master) { std::cout << "-- Numerical form factor computation ..." << std::endl << "** Using input shape file: " << filename << std::endl << "** Number of input triangles: " << num_triangles << std::endl << "** Q-grid resolution (q-points): " << nqz << std::endl #ifdef USE_MPI << "** Number of processes requested: " << num_procs << std::endl #endif << std::flush; } // if if(num_triangles < 1) { std::cerr << "error: no triangles found in specified definition file" << std::endl; return false; } // if // FIXME: this is a yucky temporary fix ... fix properly ... real_t* qx = new (std::nothrow) real_t[nqy](); real_t* qy = new (std::nothrow) real_t[nqy](); #ifdef FF_NUM_GPU cucomplex_t* qz = new (std::nothrow) cucomplex_t[nqz](); #else complex_t* qz = new (std::nothrow) complex_t[nqz](); #endif // create qy_and qz using qgrid instance for(unsigned int i = 0; i < nqy; ++ i) qx[i] = QGrid::instance().qx(i); for(unsigned int i = 0; i < nqy; ++ i) qy[i] = QGrid::instance().qy(i); for(unsigned int i = 0; i < nqz; ++ i) { #ifdef FF_NUM_GPU qz[i].x = QGrid::instance().qz_extended(i).real(); qz[i].y = QGrid::instance().qz_extended(i).imag(); #else qz[i] = QGrid::instance().qz_extended(i); #endif } // for #ifdef FF_NUM_GPU cucomplex_t *p_ff = NULL; #else complex_t *p_ff = NULL; #endif real_t kernel_time = 0.; unsigned int ret_numtriangles = 0; #ifdef FF_NUM_GPU // use GPU ret_numtriangles = gff_.compute_approx_triangle(shape_def, p_ff, nqy, qx, qy, nqz, qz, rot_, kernel_time); for (int i = 0; i < nqz; i++) ff.push_back(complex_t(p_ff[i].x, p_ff[i].y)); std::cout << "** FF GPU compute time: " << kernel_time << " ms." << std::endl; #else // use only CPU ret_numtriangles = cff_.compute_approx_triangle(shape_def, p_ff, nqy, qx, qy, nqz, qz, rot_, kernel_time); for (int i = 0; i < nqz; i++) ff.push_back(p_ff[i]); std::cout << "** FF CPU compute time: " << kernel_time << " ms." << std::endl; #endif if(p_ff != NULL) delete[] p_ff; delete[] qz; delete[] qy; delete[] qx; }
bool mat_exp(complex_vec_t& matrix, complex_vec_t& result) { result.clear(); for(complex_vec_t::iterator i = matrix.begin(); i != matrix.end(); ++ i) result.push_back(exp(*i)); } // mat_exp()