int main(){ double performance; perf_t start,stop; double a,b,c; a = 1; b = 1; long flop = 1; // Executions a vide, flush potentiel, ... // Performance d'une addition scalaire perf(&start); c = a + b; perf(&stop); // Verification printf("%lf = %lf + %lf\n", c, a, b); // Performance perf_diff(&start, &stop); performance = perf_mflops(&stop, flop); printf("Mflop/s : %lf \n", performance); return 0; }
void test_question_five(){ double performance; perf_t start,stop; int size = 100000; double * A =(double*) malloc(sizeof(double)*size); double * B =(double*) malloc(sizeof(double)*size); matrix_init(A,size,1); matrix_init(B,size,1); long flop = 2*size; // Executions a vide, flush potentiel, ... // Performance d'une addition scalaire perf(&start); double res = cblas_ddot(size,A,1,B,1); perf(&stop); // Verification printf("Result %lf\n",res); // Performance perf_diff(&start, &stop); performance = perf_mflops(&stop, flop); printf("Mflop/s : %lf \n", performance); free(A); free(B); }
void perfedit::start_playing () { #ifdef SEQ64_PAUSE_SUPPORT perf().pause_key(true); /* was perf().start_key(true) */ #else perf().start_playing(true); /* forces start-from-perfedit */ #endif }
void perfedit::pause_playing () { #ifdef SEQ64_PAUSE_SUPPORT perf().pause_key(true); #else perf().pause_playing(true); #endif }
void perfedit::stop_playing () { #ifdef SEQ64_PAUSE_SUPPORT perf().stop_key(); #else perf().stop_playing(); #endif }
void perfroll::split_trigger (int a_sequence, long a_tick) { perf().push_trigger_undo(); perf().get_sequence(a_sequence)->split_trigger(a_tick); draw_background_on(m_pixmap, a_sequence); draw_sequence_on(m_pixmap, a_sequence); draw_drawable_row(m_window, m_pixmap, m_drop_y); }
void perfedit::set_transpose (int transpose) { char b[12]; snprintf(b, sizeof b, "%+d", transpose); m_entry_xpose->set_text(b); perf().all_notes_off(); perf().set_transpose(transpose); }
int main(int argc, char** argv) { //printf("Thread %d bootting\n", thread_id()); if(thread_id() == 0) { ginit(argc, argv); } thread_barrior(); init(argc, argv); thread_barrior(); printf("PacketNgin APP Start\n"); perf(); perf(); perf(); /* uint32_t i = 0; while(1) { uint32_t count = ni_count(); if(count > 0) { i = (i + 1) % count; NetworkInterface* ni = ni_get(i); if(ni_has_input(ni)) { process(ni); } } } */ thread_barrior(); destroy(); thread_barrior(); if(thread_id() == 0) { gdestroy(argc, argv); } while(1); return 0; }
void wzPerfShutdown() { if (perfList.size() == 0) { return; } QString ourfile = PHYSFS_getWriteDir(); ourfile.append("gfx-performance.csv"); // write performance counter list to file QFile perf(ourfile); perf.open(QIODevice::WriteOnly); perf.write("START, EFF, TERRAIN, LOAD, PRTCL, WATER, MODELS, MISC\n"); for (int i = 0; i < perfList.size(); i++) { QString line; line += QString::number(perfList[i].counters[PERF_START_FRAME]); for (int j = 1; j < PERF_COUNT; j++) { line += ", " + QString::number(perfList[i].counters[j]); } line += "\n"; perf.write(line.toUtf8()); } // all done, clear data perfStarted = false; perfList.clear(); queryActive = PERF_COUNT; }
void perfedit::set_jack_mode () { bool active = m_button_jack->get_active(); bool isjackrunning = perf().set_jack_mode(active); m_button_jack->set_active(isjackrunning); }
void V3Statistic::dump(std::ofstream& os) const { if (perf()) { os<<" "<<std::right<<std::fixed<<std::setprecision(6)<<std::setw(9)<<count(); } else { os<<" "<<std::right<<std::fixed<<std::setprecision(0)<<std::setw(9)<<count(); } }
bool perfedit::on_key_press_event (GdkEventKey * ev) { if (CAST_EQUIVALENT(ev->type, SEQ64_KEY_PRESS)) { keystroke k(ev->keyval, SEQ64_KEYSTROKE_PRESS, ev->state); bool startstop = perf().playback_key_event(k, true); if (startstop) { return true; /* event handled */ } else if (is_ctrl_key(ev)) { if (OR_EQUIVALENT(ev->keyval, SEQ64_z, SEQ64_Z)) /* undo */ { undo(); return true; } else if (OR_EQUIVALENT(ev->keyval, SEQ64_r, SEQ64_R)) /* redo */ { redo(); return true; } } } (void) m_perftime->key_press_event(ev); return Gtk::Window::on_key_press_event(ev); }
void perfroll::init_before_show () { m_roll_length_ticks = perf().get_max_trigger(); m_roll_length_ticks -= (m_roll_length_ticks % (m_ticks_per_bar)); m_roll_length_ticks += m_ppqn * m_page_factor; }
bool perfnames::on_button_press_event (GdkEventButton * ev) { int y = int(ev->y); int seqnum = convert_y(y); current_seq(seqnum); if (SEQ64_CLICK_LEFT(ev->button)) { if (perf().is_active(seqnum)) { guint modifiers; /* for filtering out caps/num lock etc. */ modifiers = gtk_accelerator_get_default_mod_mask(); if ((ev->state & modifiers) == SEQ64_SHIFT_MASK) { /* * \new ca 2016-03-15 * If the Shift key is pressed, mute all other sequences. * Inactive sequences are skipped. */ for (int s = 0; s < m_sequence_max; ++s) { if (s != seqnum) { sequence * seq = perf().get_sequence(s); if (not_nullptr(seq)) { bool muted = seq->get_song_mute(); seq->set_song_mute(! muted); } } } } else { sequence * seq = perf().get_sequence(seqnum); bool muted = seq->get_song_mute(); seq->set_song_mute(! muted); } enqueue_draw(); } } return true; }
int main (int argc, char** argv) { libMesh::init (argc, argv); { std::cout << "Running " << argv[0]; for (int i=1; i<argc; i++) std::cout << " " << argv[i]; std::cout << std::endl << std::endl; config.load(); config.print(); PerfLog perf("Main Program"); perf.start_event("program init"); Mesh mesh (config.dim); MeshData mesh_data(mesh); //mesh_data.activate(); mesh_data.enable_compatibility_mode(); mesh.read("tmp/in.xda",&mesh_data); mesh.find_neighbors(); //mesh_data.read("data.xta"); //mesh.print_info(); EquationSystems equation_systems (mesh,&mesh_data); LinearImplicitSystem & eigen_system = equation_systems.add_system<LinearImplicitSystem> ("Poisson"); //equation_systems.add_system<EigenSystem> ("Poisson"); equation_systems.get_system("Poisson").add_variable("u", FIRST); equation_systems.get_system("Poisson").attach_assemble_function (assemble_poisson); unsigned int nev = config.eigs; equation_systems.parameters.set<unsigned int>("eigenpairs") = nev; equation_systems.parameters.set<unsigned int>("basis vectors") = nev*3; // eigen_system.eigen_solver-> set_eigensolver_type(ARNOLDI); //eigen_system.eigen_solver-> set_eigensolver_type(SUBSPACE); //eigen_system.eigen_solver-> set_eigensolver_type(POWER); // eigen_system.eigen_solver-> set_eigensolver_type(LANCZOS); // eigen_system.set_eigenproblem_type(GHEP); // eigen_system.eigen_solver->set_position_of_spectrum(SMALLEST_MAGNITUDE); //eigen_system.eigen_solver->set_position_of_spectrum(LARGEST_MAGNITUDE); equation_systems.parameters.set<Real>("linear solver tolerance") = pow(TOLERANCE, 5./3.); equation_systems.parameters.set<unsigned int> ("linear solver maximum iterations") = 1000; equation_systems.init(); //equation_systems.print_info(); perf.stop_event("program init"); if (config.assemble) { assemble_poisson(equation_systems, "Poisson"); } if (!config.printlog) perf.clear(); } return libMesh::close(); }
bool perfroll::on_key_press_event (GdkEventKey * ev) { keystroke k(ev->keyval, KEYSTROKE_PRESS, ev->state); bool result = perf().perfroll_key_event(k, m_drop_sequence); if (result) { fill_background_pixmap(); queue_draw(); } return result; }
void test_question_six(){ double performance; perf_t start,stop; int size = 1000000; double * A = (double*) malloc(sizeof(double)*size); double * B = (double*) malloc(sizeof(double)*size); matrix_init(A,size,1); matrix_init(B,size,1); int size_temp = 50; long flop; double res; while(size_temp <= size){ flop = 2*size_temp; // Performance d'une addition scalaire perf(&start); res = cblas_ddot(size_temp,A,1,B,1); perf(&stop); perf_diff(&start, &stop); performance = perf_mflops(&stop, flop); printf("%d %lf \n",size_temp, performance); size_temp = size_temp + 0.25 * size_temp; } free(A); free(B); }
HttpData & NodeToVnfmEvent:: Receive(NodeData & a_rNodeData, DashBoard & a_board) { gLog->DEBUG("%-24s| Receive", "NodeToVnfmEvent"); if(a_rNodeData.IsError()) { gLog->DEBUG("%-24s| Received Msg is Error, message is going to discard", "NodeToVnfmEvent"); m_sHttpData.Clear(); } // PRA -> VNF 는 모두 Response 입니다. switch(a_rNodeData.GetCommand()) { case CMD_VNF_PRA_READY: ready(a_rNodeData, a_board); break; case CMD_VNF_PRA_START: started(a_rNodeData, a_board); break; case CMD_VNF_PRA_STOP: stopped(a_rNodeData, a_board); break; case CMD_VNF_SUBSCRIBER: subscriber(a_rNodeData, a_board); break; case CMD_VNF_EVENT: event(a_rNodeData, a_board); break; case CMD_RSA_PERF_REPORT: perf(a_rNodeData, a_board); break; // From EventAPI - SendTps() case CMD_VNF_PERF_TPS: perfTps(a_rNodeData, a_board); break; case CMD_REGISTER_PROVIDER: m_pResourceAndTpsProvider->Register(a_rNodeData.GetBody()); break; case CMD_VNF_PRA_INSTALL: break; default: unknown(a_rNodeData, a_board); break; } return m_sHttpData; }
void perfnames::redraw_dirty_sequences () { int y_f = m_window_y / m_names_y; for (int y = 0; y <= y_f; ++y) { int seq = y + m_sequence_offset; if (seq < m_sequence_max) { bool dirty = perf().is_dirty_names(seq); if (dirty) draw_sequence(seq); } } }
void perfedit::set_beat_width (int bw) { if (bw != m_bw && bw > 0) { char b[8]; snprintf(b, sizeof b, "%d", bw); m_entry_bw->set_text(b); if (m_bw != 0) /* are we in construction? */ perf().modify(); /* no, it's a modification now */ m_bw = bw; set_guides(); } }
void perfedit::set_beats_per_bar (int bpm) { if (bpm != m_bpm && bpm > 0) { char b[8]; snprintf(b, sizeof b, "%d", bpm); m_entry_bpm->set_text(b); if (m_bpm != 0) /* are we in construction? */ perf().modify(); /* no, it's a modification now */ m_bpm = bpm; set_guides(); } }
void perfroll::draw_progress () { long tick = perf().get_tick(); long tick_offset = m_4bar_offset * m_ticks_per_bar; int progress_x = (tick - tick_offset) / m_perf_scale_x; int old_progress_x = (m_old_progress_ticks - tick_offset) / m_perf_scale_x; m_window->draw_drawable /* draw old */ ( m_gc, m_pixmap, old_progress_x, 0, old_progress_x, 0, 1, m_window_y ); m_gc->set_foreground(black()); m_window->draw_line(m_gc, progress_x, 0, progress_x, m_window_y); m_old_progress_ticks = tick; }
// === FUNCTION ============================================================ // Name: TPlot::GetHist1D // Description: // =========================================================================== TH1* TPlot::GetHist1D(std::string histname, std::string det, std::string algo) { if (histname.find("Response") != std::string::npos ) { std::stringstream ss; ss << algDir[algo] <<"/" << histname.substr(0, histname.find("_Response")); std::cout << "fsf " << ss.str() << std::endl; Perf2D perf((TH2D*)detFile[det]->Get(ss.str().c_str()), 3); perf.DoPlotEachBin(det, algo); return perf.GetPerf(histname); } if (histname.find("Resolution") != std::string::npos ) { std::stringstream ss; ss << algDir[algo] <<"/" << histname.substr(0, histname.find("_Resolution")); std::cout << "fsf " << ss.str() << std::endl; Perf2D perf((TH2D*)detFile[det]->Get(ss.str().c_str()), 4); /*perf.DoPlotEachBin(det, algo);*/ return perf.GetPerf(histname); } if (histname.find("_Eff") != std::string::npos ) { return GetEffHist(histname, det, algo); } if (list1D.find(histname) != list1D.end()) { std::stringstream ss; ss << algDir[algo] <<"/" << histname; return (TH1*)detFile[det]->Get(ss.str().c_str()); } //return true; } // ----- end of function TPlot::GetHist1D -----
int main(int argc, char *argv[]) { int run_test; /* Set default test file size */ tests_size_parameter = 10 * 1024 * 1024; /* Handle common arguments */ run_test = tests_get_args(argc, argv, perf_get_title(), perf_get_description(), "z"); if (!run_test) return 1; /* Change directory to the file system and check it is ok for testing */ tests_check_test_file_system(); /* Do the actual test */ perf(); return 0; }
int main(int argc, char *argv[]) { oss_media_init(AOS_LOG_INFO); if (argc < 2) { usage(); return -1; } // example of oss media file functions if (strcmp("write", argv[1]) == 0) { write_file(); } else if (strcmp("append", argv[1]) == 0) { append_file(); } else if (strcmp("read", argv[1]) == 0) { read_file(); } else if (strcmp("seek", argv[1]) == 0) { seek_file(); } else if (strcmp("error_code", argv[1]) ==0) { error_code(); } else if (strcmp("idr", argv[1]) == 0) { if (argc < 3) { usage(); return -1; } idr(argv[2]); } else if (strcmp("perf", argv[1]) == 0) { int loop = (argc == 3) ? atoi(argv[2]) : 1000; perf(loop); } else if (strcmp("app", argv[1]) == 0) { if (argc < 3) { usage(); return -1; } camera_app(argv[2]); } else { printf("Unsupport operation:%s\n", argv[1]); usage(); } oss_media_destroy(); return 0; }
void InputEvents::eventNearestAirspaceDetails(gcc_unused const TCHAR *misc) { const MoreData &basic = CommonInterface::Basic(); const DerivedInfo &calculated = CommonInterface::Calculated(); const ComputerSettings &settings_computer = CommonInterface::GetComputerSettings(); ProtectedAirspaceWarningManager *airspace_warnings = GetAirspaceWarnings(); if (airspace_warnings != NULL && !airspace_warnings->warning_empty()) { // Prevent the dialog from closing itself without active warning // This is relevant if there are only acknowledged airspaces in the list // AutoClose will be reset when the dialog is closed again by hand dlgAirspaceWarningsShowModal(*XCSoarInterface::main_window, *airspace_warnings); return; } const AircraftState aircraft_state = ToAircraftState(basic, calculated); AirspaceVisiblePredicate visible(settings_computer.airspace, CommonInterface::GetMapSettings().airspace, aircraft_state); GlidePolar polar = settings_computer.polar.glide_polar_task; polar.SetMC(max(polar.GetMC(),fixed_one)); AirspaceAircraftPerformanceGlide perf(polar); AirspaceSoonestSort ans(aircraft_state, perf, fixed(1800), visible); const AbstractAirspace* as = ans.find_nearest(airspace_database); if (!as) { return; } dlgAirspaceDetails(*as, airspace_warnings); // clear previous warning if any XCSoarInterface::main_window->popup.Acknowledge(PopupMessage::MSG_AIRSPACE); // TODO code: No control via status data (ala DoStatusMEssage) // - can we change this? // Message::AddMessage(5000, Message::MSG_AIRSPACE, text); }
void perfroll::redraw_dirty_sequences () { bool draw = false; int y_s = 0; int y_f = m_window_y / m_names_y; for (int y = y_s; y <= y_f; y++) { int seq = y + m_sequence_offset; if (perf().is_dirty_perf(seq)) { draw_background_on(m_pixmap, seq); draw_sequence_on(m_pixmap, seq); draw = true; } } if (draw) { m_window->draw_drawable ( m_gc, m_pixmap, 0, 0, 0, 0, m_window_x, m_window_y ); } }
bool perfedit::timeout () { m_perfroll->follow_progress(); /* keep up with progress */ m_perfroll->redraw_progress(); m_perfnames->redraw_dirty_sequences(); #ifdef SEQ64_STAZED_JACK_SUPPORT if (m_button_follow->get_active() != perf().get_follow_transport()) m_button_follow->set_active(perf().get_follow_transport()); if (perf().is_running()) m_button_jack->set_sensitive(false); else m_button_jack->set_sensitive(true); #endif m_button_undo->set_sensitive(perf().have_undo()); m_button_redo->set_sensitive(perf().have_redo()); /* * Do not enable this code, it makes the whole perfedit panel flicker. * Instead, one can set (for example) the sequence's "dirty mp" flag. * * m_perfroll->enqueue_draw(); */ #ifdef SEQ64_PAUSE_SUPPORT if (perf().is_running() != m_is_running) { m_is_running = perf().is_running(); set_image(m_is_running); } #endif return true; }
static bool test_route(const unsigned n_airspaces, const RasterMap& map) { Airspaces airspaces; setup_airspaces(airspaces, map.GetMapCenter(), n_airspaces); { std::ofstream fout("results/terrain.txt"); unsigned nx = 100; unsigned ny = 100; GeoPoint origin(map.GetMapCenter()); for (unsigned i = 0; i < nx; ++i) { for (unsigned j = 0; j < ny; ++j) { fixed fx = (fixed)i / (nx - 1) * fixed(2.0) - fixed_one; fixed fy = (fixed)j / (ny - 1) * fixed(2.0) - fixed_one; GeoPoint x(origin.longitude + Angle::Degrees(fixed(0.2) + fixed(0.7) * fx), origin.latitude + Angle::Degrees(fixed(0.9) * fy)); short h = map.GetInterpolatedHeight(x); fout << x.longitude.Degrees() << " " << x.latitude.Degrees() << " " << h << "\n"; } fout << "\n"; } fout << "\n"; } { // local scope, see what happens when we go out of scope GeoPoint p_start(Angle::Degrees(fixed(-0.3)), Angle::Degrees(fixed(0.0))); p_start += map.GetMapCenter(); GeoPoint p_dest(Angle::Degrees(fixed(0.8)), Angle::Degrees(fixed(-0.7))); p_dest += map.GetMapCenter(); AGeoPoint loc_start(p_start, RoughAltitude(map.GetHeight(p_start) + 100)); AGeoPoint loc_end(p_dest, RoughAltitude(map.GetHeight(p_dest) + 100)); AircraftState state; GlidePolar glide_polar(fixed(0.1)); AirspaceAircraftPerformanceGlide perf(glide_polar); GeoVector vec(loc_start, loc_end); fixed range = fixed(10000) + vec.distance / 2; state.location = loc_start; state.altitude = loc_start.altitude; { Airspaces as_route(airspaces, false); // dummy // real one, see if items changed as_route.synchronise_in_range(airspaces, vec.MidPoint(loc_start), range); int size_1 = as_route.size(); if (verbose) printf("# route airspace size %d\n", size_1); as_route.synchronise_in_range(airspaces, vec.MidPoint(loc_start), fixed_one); int size_2 = as_route.size(); if (verbose) printf("# route airspace size %d\n", size_2); ok(size_2 < size_1, "shrink as", 0); // go back as_route.synchronise_in_range(airspaces, vec.MidPoint(loc_end), range); int size_3 = as_route.size(); if (verbose) printf("# route airspace size %d\n", size_3); ok(size_3 >= size_2, "grow as", 0); // and again as_route.synchronise_in_range(airspaces, vec.MidPoint(loc_start), range); int size_4 = as_route.size(); if (verbose) printf("# route airspace size %d\n", size_4); ok(size_4 >= size_3, "grow as", 0); scan_airspaces(state, as_route, perf, true, loc_end); } // try the solver SpeedVector wind(Angle::Degrees(fixed(0)), fixed(0.0)); GlidePolar polar(fixed_one); GlideSettings settings; settings.SetDefaults(); AirspaceRoute route(airspaces); route.UpdatePolar(settings, polar, polar, wind); route.SetTerrain(&map); RoutePlannerConfig config; config.mode = RoutePlannerConfig::Mode::BOTH; bool sol = false; for (int i = 0; i < NUM_SOL; i++) { loc_end.latitude += Angle::Degrees(fixed(0.1)); loc_end.altitude = map.GetHeight(loc_end) + 100; route.Synchronise(airspaces, loc_start, loc_end); if (route.Solve(loc_start, loc_end, config)) { sol = true; if (verbose) { PrintHelper::print_route(route); } } else { if (verbose) { printf("# fail\n"); } sol = false; } char buffer[80]; sprintf(buffer, "route %d solution", i); ok(sol, buffer, 0); } } return true; }
std::vector<double> test_product_tensor_legendre( const std::vector<int> & arg_var_degree , const int nGrid , const int iterCount , const bool check ) { typedef TensorType tensor_type ; typedef typename tensor_type::device_type device_type ; typedef KokkosArray::View< VectorScalar** , KokkosArray::LayoutLeft , device_type > vector_type ; typedef KokkosArray::BlockCrsMatrix< tensor_type , MatrixScalar , device_type > matrix_type ; typedef typename matrix_type::graph_type graph_type ; //------------------------------ // Generate graph for "FEM" box structure: std::vector< std::vector<size_t> > fem_graph ; const size_t fem_length = nGrid * nGrid * nGrid ; const size_t fem_graph_length = unit_test::generate_fem_graph( nGrid , fem_graph ); //------------------------------ // Generate CRS block-tensor matrix: const std::vector<unsigned> var_degree( arg_var_degree.begin() , arg_var_degree.end() ); const KokkosArray::TripleProductTensorLegendreCombinatorialEvaluation tensor( var_degree ); const size_t stoch_length = tensor.bases_count(); std::vector< std::vector< size_t > > stoch_graph( stoch_length ); for ( size_t i = 0 ; i < stoch_length ; ++i ) { for ( size_t j = 0 ; j < stoch_length ; ++j ) { if ( KokkosArray::matrix_nonzero(tensor,i,j) ) { stoch_graph[i].push_back(j); } } } //------------------------------ // Generate input multivector: vector_type x = vector_type( "x" , stoch_length , fem_length ); vector_type y = vector_type( "y" , stoch_length , fem_length ); typename vector_type::HostMirror hx = KokkosArray::create_mirror( x ); typename vector_type::HostMirror hy_result = KokkosArray::create_mirror( y ); for ( size_t iColFEM = 0 ; iColFEM < fem_length ; ++iColFEM ) { for ( size_t iColStoch = 0 ; iColStoch < stoch_length ; ++iColStoch ) { hx(iColStoch,iColFEM) = generate_vector_coefficient( fem_length , stoch_length , iColFEM , iColStoch ); }} KokkosArray::deep_copy( x , hx ); //------------------------------ matrix_type matrix ; matrix.block = tensor_type( var_degree ); matrix.graph = KokkosArray::create_crsarray<graph_type>( std::string("test crs graph") , fem_graph ); if ( stoch_length != matrix.block.dimension() ) { throw std::runtime_error("test_crs_product_tensor_legendre matrix sizing error"); } matrix.values = vector_type( "matrix" , stoch_length , fem_graph_length ); typename vector_type::HostMirror hM = KokkosArray::create_mirror( matrix.values ); for ( size_t iRowFEM = 0 , iEntryFEM = 0 ; iRowFEM < fem_length ; ++iRowFEM ) { for ( size_t iRowEntryFEM = 0 ; iRowEntryFEM < fem_graph[iRowFEM].size() ; ++iRowEntryFEM , ++iEntryFEM ) { const size_t iColFEM = fem_graph[iRowFEM][iRowEntryFEM] ; for ( size_t k = 0 ; k < stoch_length ; ++k ) { hM(k,iEntryFEM) = generate_matrix_coefficient( fem_length , stoch_length , iRowFEM , iColFEM , k ); } } } KokkosArray::deep_copy( matrix.values , hM ); //------------------------------ if (check) { for ( size_t iRowStoch = 0 ; iRowStoch < stoch_length ; ++iRowStoch ) { for ( size_t iRowFEM = 0 , iEntryFEM = 0 ; iRowFEM < fem_length ; ++iRowFEM ) { double y = 0 ; for ( size_t iRowEntryFEM = 0 ; iRowEntryFEM < fem_graph[ iRowFEM ].size() ; ++iRowEntryFEM , ++iEntryFEM ) { const size_t iColFEM = fem_graph[iRowFEM][iRowEntryFEM] ; for ( size_t iRowEntryStoch = 0 ; iRowEntryStoch < stoch_graph[iRowStoch].size() ; ++iRowEntryStoch ) { const size_t iColStoch = stoch_graph[iRowStoch][iRowEntryStoch]; double value = 0 ; for ( unsigned k = 0 ; k < stoch_length ; ++k ) { const double A_fem_k = generate_matrix_coefficient( fem_length , stoch_length , iRowFEM , iColFEM , k ); if ( 1.0e-6 < std::abs( hM(k,iEntryFEM) - A_fem_k ) ) { std::cout << "test_crs_product_tensor_legendre error: Matrix entry" << " A(" << k << ",(" << iRowFEM << "," << iColFEM << ")) = " << hM(k,iEntryFEM) << " , error = " << hM(k,iEntryFEM) - A_fem_k << std::endl ; } value += tensor(iRowStoch,iColStoch,k) * A_fem_k ; } y += value * hx( iColStoch , iColFEM ); } } hy_result( iRowStoch , iRowFEM ) = y ; } } } //------------------------------ const KokkosArray::Impl::Multiply< matrix_type , vector_type , vector_type > op( matrix , x , y ); KokkosArray::Impl::Timer clock ; for ( int iter = 0 ; iter < iterCount ; ++iter ) { op.run(); } device_type::fence(); const double seconds_per_iter = clock.seconds() / ((double) iterCount ); const double flops_per_block = matrix.block.multiply_add_flops(); const double flops = 1.0e-9*fem_graph_length*flops_per_block / seconds_per_iter; //------------------------------ // Verify result if (check) { const double tol = KokkosArray::Impl::is_same<double,VectorScalar>::value ? 1.0e-13 : 1.0e-5 ; const size_t error_max = 10 ; KokkosArray::deep_copy( hx , y ); size_t error_count = 0 ; for ( size_t iRowFEM = 0 ; iRowFEM < fem_length ; ++iRowFEM ) { for ( size_t iRowStoch = 0 ; iRowStoch < stoch_length ; ++iRowStoch ) { const double mag = std::abs( hy_result(iRowStoch,iRowFEM) ); const double error = std::abs( hx(iRowStoch,iRowFEM) - hy_result(iRowStoch,iRowFEM) ); if ( tol < error && tol < error / mag ) { if ( error_count < error_max ) { std::cout << "test_product_tensor_legendre error:" << " y(" << iRowStoch << "," << iRowFEM << ") = " << hx(iRowStoch,iRowFEM) << " , error = " << ( hx(iRowStoch,iRowFEM) - hy_result(iRowStoch,iRowFEM) ) << std::endl ; } ++error_count ; } } } if ( error_count ) { std::cout << "test_crs_product_tensor_legendre error_count = " << error_count << std::endl ; } } //------------------------------ std::vector<double> perf(3) ; perf[0] = fem_length * stoch_length ; perf[1] = seconds_per_iter ; perf[2] = flops ; return perf ; }