// TODO: create universal base class for PETSc solvers - so not to copypaste! // TODO: 1 universal code from TS solving?! (not to write it again and again!?) void E3PetscSolver::run(int steps, double time, bool use_step){ // printf("run started\n"); // fflush(stdout); static int run_cnt = 0; run_cnt++; int n_cores = 1; if(this->sconfig->has_n_cores()) n_cores = this->sconfig->n_cores(); std::ostringstream cmd_stream; // cmd_stream << "mpiexec -n "<< n_cores << " --host 192.168.0.101 ./Debug/ts3"; // cmd_stream << "mpiexec -n "<< n_cores << " --host 10.0.0.205 /home/dimalit/workspace/ts3/Debug/ts3"; cmd_stream << "mpiexec -n "<< n_cores << " ../ts3/Debug/ts3";// << " -info info.log"; std::string cmd = cmd_stream.str(); if(use_step) cmd += " use_step"; int rfd, wfd; child = rpc_call(cmd.c_str(), &rfd, &wfd); rf = fdopen(rfd, "rb"); wf = fdopen(wfd, "wb"); // int tmp = open("tmp", O_WRONLY | O_CREAT, 0664); // state->PrintDebugString(); pb::E3Model all; all.mutable_sconfig()->CopyFrom(*sconfig); all.mutable_pconfig()->CopyFrom(*pconfig); all.mutable_state()->CopyFrom(*state); int size = all.ByteSize(); int ok; ok = fwrite(&size, sizeof(size), 1, wf); assert(ok == 1); fflush(wf); all.SerializeToFileDescriptor(fileno(wf)); ok = fwrite(&steps, sizeof(steps), 1, wf); assert(ok == 1); ok = fwrite(&time, sizeof(time), 1, wf); assert(ok == 1); fflush(wf); if(!use_step){ // just final step bool res = read_results(); assert(res==true); // last waitpid(child, 0, 0); fclose(rf); rf = NULL; fclose(wf); wf = NULL; } }
bool E3PetscSolver::step(){ if(waitpid(child, 0, WNOHANG)!=0){ fclose(rf); rf = NULL; fclose(wf); wf = NULL; return false; } fputc('s', wf); fflush(wf); if(!read_results()){ // TODO: will it ever run? (see waitpid above) waitpid(child, 0, 0); // was before read - here for tests fclose(rf); rf = NULL; fclose(wf); wf = NULL; return false; } return true; }
int thread_collect_results(const struct settings *settings, struct stats *total_stats, int (*print_results)(const struct settings *, const struct stats *, void *), void *data) { unsigned int i = 0; SOCKET s; assert( settings != NULL ); assert( total_stats != NULL ); s = ((struct remote_data*)data)->stats_socket; assert ( s != INVALID_SOCKET ); if ( settings->verbose ) printf("Collecting %u results\n", settings->servercores); for ( ; i < settings->servercores; i++ ) { struct stats stats; if ( read_results(s, &stats) != 0 ) { fprintf(stderr, "%s:%d read_results() error\n", __FILE__, __LINE__ ); return -1; } if ( print_results(settings, &stats, data) ) { fprintf(stderr, "%s:%d print_results() error\n", __FILE__, __LINE__ ); return -1; } // Now add the values to the total stats_add( total_stats, &stats ); } // Divide the duration by the # of CPUs used if ( settings->servercores > 1 ) total_stats->duration /= settings->servercores; return 0; }
static int do_measure_one_cpu(void *data) { pfmon_thread_desc_t *arg = (pfmon_thread_desc_t *)data; pfmon_sdesc_t sdesc_var; /* local pfmon task descriptor */ pfmon_sdesc_t *sdesc = &sdesc_var; pfmon_ctxid_t ctxid = -1; pid_t mytid = gettid(); unsigned int mycpu; int aggr, needs_order; int r, error; /* * POSIX threads: * The signal state of the new thread is initialised as follows: * - the signal mask is inherited from the creating thread. * - the set of signals pending for the new thread is empty. * * we want to let the master handle the global signals, therefore * we mask them here. */ setup_worker_signals(); mycpu = arg->cpu; aggr = options.opt_aggr; /* * some NPTL sanity checks */ if (mytid == master_tid) { warning("pfmon is not compiled/linked with the correct pthread library," "the program is linked with NPTL when it should not." "Check Makefile." "[pid=%d:tid=%d]\n", getpid(), mytid); goto error; } /* * we initialize our "simplified" sdesc */ memset(sdesc, 0, sizeof(*sdesc)); /* * just to make sure we have these fields initialized */ sdesc->type = PFMON_SDESC_ATTACH; sdesc->tid = mytid; sdesc->pid = getpid(); sdesc->cpu = mycpu; sdesc->id = arg->id; /* logical id */ DPRINT(("CPU%u: pid=%d tid=%d\n", mycpu, sdesc->pid, sdesc->tid)); pthread_setspecific(param_key, arg); if (options.online_cpus > 1) { r = pfmon_pin_self(mycpu); if (r == -1) { warning("[%d] cannot set affinity to CPU%u: %s\n", mytid, mycpu, strerror(errno)); goto error; } } r = pfmon_sys_setup_context(sdesc, arg->cpu, arg->ctx); if (r) goto error; ctxid = sdesc->ctxid; needs_order = aggr || sdesc->out_fp == stdout; DPRINT(("sdesc->id=%u needs_order=%d\n", sdesc->id, needs_order)); /* * indicate we have reach the starting point */ arg->thread_state = THREAD_RUN; if (session_state == SESSION_ABORTED) goto error; /* * wait for the start signal */ pthread_barrier_wait(&barrier.barrier); DPRINT(("CPU%u after barrier state=%d\n", mycpu, session_state)); if (session_state == SESSION_ABORTED) goto error; if (options.opt_dont_start == 0) { if (pfmon_start(ctxid, &error) == -1) goto error; vbprintf("CPU%u started monitoring\n", mycpu); } else { vbprintf("CPU%u pfmon does not start session\n", mycpu); } /* * interval is not possible when sampling */ if (options.interval != PFMON_NO_TIMEOUT) { struct timespec tm; tm.tv_sec = options.interval / 1000; tm.tv_nsec = (options.interval % 1000) * 1000000; for(;session_state == SESSION_RUN; ) { nanosleep(&tm, NULL); /* * we only check on stop to avoid printing too many messages */ if (pfmon_stop(ctxid, &error) == -1) warning("CPU%u could not stop monitoring, CPU may be offline, check results\n", mycpu); read_incremental_results(sdesc); show_incr_results(sdesc, needs_order); pfmon_start(ctxid, &error); } pthread_testcancel(); } else { if (options.opt_use_smpl) { for(;session_state == SESSION_RUN;) { pfarg_msg_t msg; r = read(sdesc->ctxid, &msg, sizeof(msg)); if (r ==-1) { /* * we have been interrupted by signal (likely), * go check session_state */ continue; } ovfl_cnts[mycpu]++; pthread_testcancel(); if (aggr) pthread_mutex_lock(&pfmon_sys_aggr_lock); r = pfmon_process_smpl_buf(sdesc, 0); if (r) vbprintf("CPU%-4u error processing buffer\n", mycpu); if (aggr) pthread_mutex_unlock(&pfmon_sys_aggr_lock); pthread_testcancel(); } } else { sigset_t myset; int sig; sigemptyset(&myset); sigaddset(&myset, SIGUSR1); for(;session_state == SESSION_RUN;) { sigwait(&myset, &sig); } } } if (pfmon_stop(ctxid, &error) == -1) warning("CPU%u could not stop monitoring, CPU may be offline, check results\n", mycpu); vbprintf("CPU%-4u stopped monitoring\n", mycpu); /* * read the final counts */ if (options.opt_use_smpl == 0 || options.opt_smpl_print_counts) { if (read_results(sdesc) == -1) { warning("CPU%u read_results error\n", mycpu); goto error; } } DPRINT(("CPU%u has read PMDS\n", mycpu)); /* * dump results */ if (options.opt_aggr) { pthread_mutex_lock(&pfmon_sys_aggr_lock); syswide_aggregate_results(sdesc); if (options.opt_use_smpl) pfmon_process_smpl_buf(sdesc, 1); pthread_mutex_unlock(&pfmon_sys_aggr_lock); } else { if (options.opt_use_smpl) pfmon_process_smpl_buf(sdesc, 1); /* * no final totals in interval printing mode */ if (options.interval == PFMON_NO_TIMEOUT) show_results(sdesc, needs_order); close_results(sdesc); } if (options.opt_use_smpl) { if (options.opt_aggr == 0) pfmon_close_sampling_output(sdesc, -1, mycpu); munmap(sdesc->csmpl.smpl_hdr, sdesc->csmpl.map_size); } close(sdesc->ctxid); arg->thread_state = THREAD_DONE; DPRINT(("CPU%u is done\n", mycpu)); pthread_exit((void *)(0)); /* NO RETURN */ error: if (sdesc->ctxid > -1) close(sdesc->ctxid); arg->thread_state = THREAD_ERROR; vbprintf("CPU%-4u session aborted\n", mycpu); if (options.opt_use_smpl) { if (options.opt_aggr == 0) pfmon_close_sampling_output(sdesc, -1, mycpu); munmap(sdesc->csmpl.smpl_hdr, sdesc->csmpl.map_size); } pthread_exit((void *)(~0UL)); /* NO RETURN */ }