void print_current_time(FILE *out, char *prefix, char *suffix) { #ifdef CHORD_PRINT_LONG_TIME fprintf(out, "%s%"PRIu64"%s", prefix, wall_time(), suffix); #else fprintf(out, "%s%"PRIu64"%s", prefix, (wall_time() << 32) >> 32, suffix); #endif }
int main (int argc, char** argv) { /* main */ Particle* particle_array = (Particle*)NULL; Particle* particle_array2 = (Particle*)NULL; int timestep; int i; FILE *input_data = fopen(argv[1], "r"); Particle_input_arguments(input_data); particle_array = Particle_array_construct(number_of_particles); particle_array2 = Particle_array_construct(number_of_particles); Particle_array_initialize(particle_array, number_of_particles); // for (i = 0; i < number_of_particles; i++) { // particle_array2[i].mass = particle_array[i].mass; // } FILE * fileptr = fopen("nbody_out.xyz", "w"); Particle_array_output_xyz(fileptr, particle_array, number_of_particles); if (number_of_particles <= 1) return 0; double start = wall_time(); for (timestep = 1; timestep <= number_of_timesteps; timestep++) { if ((timestep % timesteps_between_outputs) == 0 ) fprintf(stderr, "Starting timestep #%d.\n", timestep); Particle_array_calculate_forces_cuda(particle_array, particle_array2, number_of_particles, time_interval ); /* swap arrays */ Particle * tmp = particle_array; particle_array = particle_array2; particle_array2 = tmp; } /* for timestep */ #pragma omp taskwait double end = wall_time (); printf("Time in seconds: %g s.\n", end - start); printf("Particles per second: %g \n", (number_of_particles*number_of_timesteps)/(end-start)); if ((number_of_timesteps % timesteps_between_outputs) != 0) { Particle_array_output_xyz(fileptr, particle_array, number_of_particles); } particle_array = Particle_array_destruct(particle_array, number_of_particles); if (fclose(fileptr) != 0) { fprintf(stderr, "ERROR: can't close the output file.\n"); exit(program_failure_code); } return program_success_code; } /* main */
bool LoadingScreen::next_timer(wall_time& time) { switch (_state) { case TYPING: case DONE: time = _next_update; return true; case LOADING: time = wall_time(); return true; } return false; }
void DebriefingScreen::become_front() { _typed_chars = 0; if (_state == TYPING) { _next_update = now() + kTypingDelay; } else { _next_update = wall_time(); } }
void fields::finished_working() { double now = wall_time(); if (last_wall_time >= 0) times_spent[working_on] += now - last_wall_time; last_wall_time = now; working_on = was_working_on[0]; for (int i = 0; i+1 < MEEP_TIMING_STACK_SZ; ++i) was_working_on[i] = was_working_on[i+1]; was_working_on[MEEP_TIMING_STACK_SZ-1] = Other; }
void fields::am_now_working_on(time_sink s) { double now = wall_time(); if (last_wall_time >= 0) times_spent[working_on] += now - last_wall_time; last_wall_time = now; for (int i = 0; i+1 < MEEP_TIMING_STACK_SZ; ++i) was_working_on[i+1] = was_working_on[i]; was_working_on[0] = working_on; working_on = s; }
double wall_dtime(double &t) { const double tnew = wall_time(); const double dt = tnew - t; t = tnew ; return dt ; }
EXPORT void startClock(const char* name) { struct clock *cp = clocks; while (cp != NULL) { if (strcmp(cp->name,name) == 0) { cp->startTime = wall_time(); return; } cp = cp->next; } cp = (struct clock*)malloc(sizeof(struct clock)); cp->name = (char*) malloc(strlen(name)+1); strcpy(cp->name,name); cp->totalTime = 0; cp->startTime = wall_time(); cp->next = clocks; clocks = cp; return; }
EXPORT void stopClock(const char* name) { struct clock *cp = clocks; while (cp && strcmp(cp->name,name)) { cp = cp->next; } if (cp && cp->startTime) { cp->totalTime = (wall_time() - cp->startTime); printf("%-20s %ld micros\n",cp->name,cp->totalTime); cp->startTime = 0; } }
int main(int argc, char *argv[]) { int i, j, n, r, step; double s, time, ttime, *a, *b; n = atoi(argv[1]); r = atoi(argv[2]); step = atoi(argv[3]); a = (double *) malloc(step * n * sizeof(double)); b = (double *) malloc(step * n * sizeof(double)); time = wall_time(); time = wall_time(); for (i = 0; i < r; i++) empty(); ttime = wall_time() - time; for (i = 0; i < n; i++) { a[i] = i; b[i] = 1.0 / (i + 1); } time = wall_time(); for (j = 0; j < r; j++) { s = skalar(a, b, n, step); } time = wall_time() - time; printf("Skalarprodukt : %f\n", s); printf("Laufzeit : %f s\n", time); printf("Overhead : %g s\n", ttime); printf("Zeit pro Wdhlg : %g s\n", time / r); printf("Overhead : %g s\n", ttime / r); // printf("Rechenleistung : %6.1f MFlop/s\n", 2.0 * n * r * 1e-6 / (time - ttime)); printf("Rechenleistung : %6.1f MFlop/s\n", 2.0 * (n/step+1) * r * 1e-6 / (time - ttime)); free(a); free(b); return 0; }
/*********************************************************************** * cl_ctx_set_timer - set timer * * input: * ctx - context * tv - timeout after which the timer expires * fun - function to be invoked when the timer expires * data - application data passed back to the application when the * callback is invoked * ************************************************************************/ cl_timer *cl_ctx_set_timer(cl_context *ctx, struct timeval *tv, void (*fun)(), void *data) { uint64_t when; Event *ev; if (ctx == NULL) return NULL; when = wall_time(); when = when + UMILLION*tv->tv_sec + tv->tv_usec; ev = newEvent(fun, data, when); insertEvent(&ctx->timer_heap, ev); return (cl_timer *)ev; }
void DebriefingScreen::fire_timer() { if (_state != TYPING) { throw Exception(format("DebriefingScreen::fire_timer() called but _state is {0}", _state)); } sys.sound.teletype(); wall_time now = antares::now(); while (_next_update <= now) { if (_typed_chars < _score->size()) { _next_update += kTypingDelay; ++_typed_chars; } else { _next_update = wall_time(); _state = DONE; break; } } }
void ObjectDataScreen::fire_timer() { wall_time now = antares::now(); if (_next_sound <= now) { sys.sound.teletype(); _next_sound += 3 * kTypingDelay; while (_next_sound <= now) { _next_sound += kTypingDelay; } } while (_next_update <= now) { if (_typed_chars < _text->size()) { _next_update += kTypingDelay; ++_typed_chars; } else { _next_update = wall_time(); _state = DONE; break; } } }
unsigned int __stdcall ping_thread_entry(void *data) #endif { PingThreadData *pdata = (PingThreadData *)data; int maxfd, ret; fd_set all_rset, rset; struct timeval to; I3ServerList *list = pdata->list; char *url = pdata->url; uint64_t *ping_start_time = pdata->ping_start_time; int num_pings; I3ServerListNode *next_to_ping; uint64_t last_ping_time, curr_time; uint64_t last_add_new_i3servers, last_update_serverlist; FD_ZERO(&all_rset); FD_ZERO(&rset); /* socket init */ #ifdef ICMP_PING if (init_icmp_socket(&ping_sock) == -1) abort(); #else if (init_udp_socket(&ping_sock) == -1) abort(); #endif FD_SET(ping_sock, &all_rset); maxfd = ping_sock + 1; /* initial populate the list of i3 servers */ update_i3_server_list(url, list, &next_to_ping); /* determine coordinates */ init_coordinates(list); /* add some close-by servers from the list based on coordinates */ change_ping_list(list, &next_to_ping, 1); /* eternal loop */ last_ping_time = last_add_new_i3servers = last_update_serverlist = wall_time(); set_status(ping_start_time, last_ping_time); for (;;) { rset = all_rset; to.tv_sec = 0; to.tv_usec = 10000; if ((ret = select(maxfd, &rset, NULL, NULL, &to)) < 0) { if (errno == EINTR) continue; else { perror("select"); abort(); } } /* message received on icmp socket */ if (FD_ISSET(ping_sock, &rset)) { uint32_t addr; uint16_t port, seq; uint64_t rtt; #ifdef ICMP_PING if (recv_echo_reply(ping_sock, &addr, &seq, &rtt)) { #else if (recv_i3_echo_reply(ping_sock, &addr, &port, &seq, &rtt)) { #endif update_ping_information(list, addr, seq, rtt); } } /* need to ping */ curr_time = wall_time(); if (list->num_ping_list > 0) { char status = get_status(ping_start_time, curr_time); num_pings = (curr_time - last_ping_time)/ (period_ping[status]/list->num_ping_list); if (num_pings > 0) { if (NULL == next_to_ping) { I3_PRINT_DEBUG0(I3_DEBUG_LEVEL_MINIMAL, "No servers to ping. Aborting\n"); } send_npings(ping_sock, list, &next_to_ping, num_pings); last_ping_time = curr_time; } } /* change the list of i3 servers */ if (curr_time - last_add_new_i3servers > period_pick_new_server[get_status(ping_start_time, curr_time)]) { /* testing just the best server */ uint32_t best_addr; uint16_t best_port; uint64_t best_rtt; struct in_addr ia; int required_k = 1; int ret = get_top_k(list, required_k, &best_addr, &best_port, &best_rtt); if (ret != required_k) { // We couldn't find the request k top nodes. I3_PRINT_INFO0 ( I3_INFO_LEVEL_WARNING, "I3 Ping Thread: Unable to obtain top k nodes.\n" ); // Dilip: Feb 20, 2006. I don't think the following works. // TODO: Start // We set the last_add_new_servers to fool the thread // to wait for some time before trying again to get // the top k nodes. //last_add_new_i3servers = curr_time; // TODO: End // Sleep for some time before trying again. # if defined (_WIN32) Sleep ( 25 ); // 25 milliseconds # else usleep(25 * 1000); // 25 milliseconds # endif continue; } ia.s_addr = htonl(best_addr); I3_PRINT_DEBUG3(I3_INFO_LEVEL_MINIMAL, "Best node: %s:%d with RTT %Ld\n", inet_ntoa(ia), best_port, best_rtt ); I3_PRINT_DEBUG0(I3_DEBUG_LEVEL_VERBOSE, "Adding new servers to list\n"); change_ping_list(list, &next_to_ping, 0); last_add_new_i3servers = curr_time; } /* update (wget) i3 server list */ if (curr_time - last_update_serverlist > PERIOD_SERVERLIST_WGET) { I3_PRINT_DEBUG0( I3_DEBUG_LEVEL_VERBOSE, "Updating server list from server\n"); update_i3_server_list(url, list, &next_to_ping); last_update_serverlist = curr_time; } } #ifndef _WIN32 pthread_exit(0); #endif return 0; }
/* BiCGSTAB(L) algorithm for the n-by-n problem Ax = b */ ptrdiff_t bicgstabL(const int L, const size_t n, realnum *x, bicgstab_op A, void *Adata, const realnum *b, const double tol, int *iters, realnum *work, const bool quiet) { if (!work) return (2 * L + 3) * n; // required workspace prealnum *r = new prealnum[L + 1]; prealnum *u = new prealnum[L + 1]; for (int i = 0; i <= L; ++i) { r[i] = work + i * n; u[i] = work + (L + 1 + i) * n; } double bnrm = norm2(n, b); if (bnrm == 0.0) bnrm = 1.0; int iter = 0; double last_output_wall_time = wall_time(); double *gamma = new double[L + 1]; double *gamma_p = new double[L + 1]; double *gamma_pp = new double[L + 1]; double *tau = new double[L * L]; double *sigma = new double[L + 1]; int ierr = 0; // error code to return, if any const double breaktol = 1e-30; /**** FIXME: check for breakdown conditions(?) during iteration ****/ // rtilde = r[0] = b - Ax realnum *rtilde = work + (2 * L + 2) * n; A(x, r[0], Adata); for (size_t m = 0; m < n; ++m) rtilde[m] = r[0][m] = b[m] - r[0][m]; { /* Sleipjen normalizes rtilde in his code; it seems to help slightly */ double s = 1.0 / norm2(n, rtilde); for (size_t m = 0; m < n; ++m) rtilde[m] *= s; } memset(u[0], 0, sizeof(realnum) * n); // u[0] = 0 double rho = 1.0, alpha = 0, omega = 1; double resid; while ((resid = norm2(n, r[0])) > tol * bnrm) { ++iter; if (!quiet && wall_time() > last_output_wall_time + MEEP_MIN_OUTPUT_TIME) { master_printf("residual[%d] = %g\n", iter, resid / bnrm); last_output_wall_time = wall_time(); } rho = -omega * rho; for (int j = 0; j < L; ++j) { if (fabs(rho) < breaktol) { ierr = -1; goto finish; } double rho1 = dot(n, r[j], rtilde); double beta = alpha * rho1 / rho; rho = rho1; for (int i = 0; i <= j; ++i) for (size_t m = 0; m < n; ++m) u[i][m] = r[i][m] - beta * u[i][m]; A(u[j], u[j + 1], Adata); alpha = rho / dot(n, u[j + 1], rtilde); for (int i = 0; i <= j; ++i) xpay(n, r[i], -alpha, u[i + 1]); A(r[j], r[j + 1], Adata); xpay(n, x, alpha, u[0]); } for (int j = 1; j <= L; ++j) { for (int i = 1; i < j; ++i) { int ij = (j - 1) * L + (i - 1); tau[ij] = dot(n, r[j], r[i]) / sigma[i]; xpay(n, r[j], -tau[ij], r[i]); } sigma[j] = dot(n, r[j], r[j]); gamma_p[j] = dot(n, r[0], r[j]) / sigma[j]; } omega = gamma[L] = gamma_p[L]; for (int j = L - 1; j >= 1; --j) { gamma[j] = gamma_p[j]; for (int i = j + 1; i <= L; ++i) gamma[j] -= tau[(i - 1) * L + (j - 1)] * gamma[i]; } for (int j = 1; j < L; ++j) { gamma_pp[j] = gamma[j + 1]; for (int i = j + 1; i < L; ++i) gamma_pp[j] += tau[(i - 1) * L + (j - 1)] * gamma[i + 1]; } xpay(n, x, gamma[1], r[0]); xpay(n, r[0], -gamma_p[L], r[L]); xpay(n, u[0], -gamma[L], u[L]); for (int j = 1; j < L; ++j) { /* TODO: use blas DGEMV (for L > 2) */ xpay(n, x, gamma_pp[j], r[j]); xpay(n, r[0], -gamma_p[j], r[j]); xpay(n, u[0], -gamma[j], u[j]); } if (iter == *iters) { ierr = 1; break; } } if (!quiet) master_printf("final residual = %g\n", norm2(n, r[0]) / bnrm); finish: delete[] sigma; delete[] tau; delete[] gamma_pp; delete[] gamma_p; delete[] gamma; delete[] u; delete[] r; *iters = iter; return ierr; }
/*--------------------------------------------------------------------------- * (function: do_high_level_synthesis) *-------------------------------------------------------------------------*/ void do_high_level_synthesis() { double elaboration_time = wall_time(); printf("--------------------------------------------------------------------\n"); printf("High-level synthesis Begin\n"); /* Perform any initialization routines here */ #ifdef VPR6 find_hard_multipliers(); find_hard_adders(); //find_hard_adders_for_sub(); register_hard_blocks(); #endif global_param_table_sc = sc_new_string_cache(); /* parse to abstract syntax tree */ printf("Parser starting - we'll create an abstract syntax tree. " "Note this tree can be viewed using GraphViz (see documentation)\n"); parse_to_ast(); /* Note that the entry point for ast optimzations is done per module with the * function void next_parsed_verilog_file(ast_node_t *file_items_list) */ /* after the ast is made potentiatlly do tagging for downstream links to verilog */ if (global_args.high_level_block != NULL) { add_tag_data(); } /* Now that we have a parse tree (abstract syntax tree [ast]) of * the Verilog we want to make into a netlist. */ printf("Converting AST into a Netlist. " "Note this netlist can be viewed using GraphViz (see documentation)\n"); create_netlist(); // Can't levelize yet since the large muxes can look like combinational loops when they're not check_netlist(verilog_netlist); /* point for all netlist optimizations. */ printf("Performing Optimizations of the Netlist\n"); netlist_optimizations_top(verilog_netlist); if (configuration.output_netlist_graphs ) { /* Path is where we are */ graphVizOutputNetlist(configuration.debug_output_path, "optimized", 1, verilog_netlist); } /* point where we convert netlist to FPGA or other hardware target compatible format */ printf("Performing Partial Map to target device\n"); partial_map_top(verilog_netlist); #ifdef VPR5 /* check for problems in the partial mapped netlist */ printf("Check for liveness and combinational loops\n"); levelize_and_check_for_combinational_loop_and_liveness(TRUE, verilog_netlist); #endif /* point for outputs. This includes soft and hard mapping all structures to the * target format. Some of these could be considred optimizations */ printf("Outputting the netlist to the specified output format\n"); output_top(verilog_netlist); elaboration_time = wall_time() - elaboration_time; printf("Successful High-level synthesis by Odin in "); print_time(elaboration_time); printf("\n"); printf("--------------------------------------------------------------------\n"); // FIXME: free contents? sc_free_string_cache(global_param_table_sc); }
int main(int argc, char **argv) { void (*hook_print_SE)(const qnu*, const Lagr*, const nmpc&) = NULL; void (*hook_print_LG)(const qnu*, const Lagr*, const nmpc&, const float*) = NULL; void (*hook_print_SD)(const unsigned int*, const double*) = NULL; void (*hook_print_TR)(const unsigned int*, const double*) = NULL; double (*hook_exec_control_horiz)(qnu*, const nmpc&, robot*) = NULL; char errnote[256]; unsigned int sd_loop = 0, k = 0, current_tgt_no = 0; float tgtdist, grad_dot_grad = 0.; robot vme; double sd_loop_time, time_last_cmd_sent = 0, now; nmpc C; cl_opts clopts = { false }; parse_command_line(argc, argv, &vme, &clopts); parse_input_file(C, vme.conffile()); print_greeting(C); qnu* qu = (qnu*) calloc(C.N, sizeof(qnu)); Lagr* p = (Lagr*) calloc(C.N, sizeof(Lagr)); //cmd* cmd_horiz = (cmd*) calloc(C.C, sizeof(cmd)); float* grad = (float*) calloc(C.N + 1, sizeof(float)); float* last_grad = (float*) calloc(C.N + 1, sizeof(float)); double* time_to_tgt = (double*) calloc(C.ntgt, sizeof(float)); C.cur_tgt = C.tgt; C.control_step = 0; init_qu_and_p(qu, p, C); tgtdist = C.tgttol + 1; // Just to get us into the waypoint loop. C.horizon_loop = 0; /* * This next block of decisions sort out the hooks used to print output. * Rather than include the ifs in each loop, I'm using function pointers * which are set at runtime based on CL flags for verbosity. */ if (clopts.selec_verbose) { hook_print_SE = &print_pathnerr; hook_print_LG = &print_LG; hook_print_SD = &print_SD; hook_print_TR = &print_TR; } else if (clopts.selec_verbose) { hook_print_SE = &empty_output_hook; hook_print_LG = &empty_output_hook; hook_print_SD = &empty_output_hook; hook_print_TR = &empty_output_hook; } else { if (clopts.selec_state_and_error_SE) hook_print_SE = &print_pathnerr; else hook_print_SE = &empty_output_hook; if (clopts.selec_lagrange_grad_LG) hook_print_LG = &print_LG; else hook_print_LG = &empty_output_hook; if (clopts.selec_SD_converged_SD) hook_print_SD = &print_SD; else hook_print_SD = &empty_output_hook; if (clopts.selec_target_reached_TR) hook_print_TR = &print_TR; else hook_print_TR = &empty_output_hook; } if (clopts.selec_sim) hook_exec_control_horiz = &exec_control_horiz_dummy; else hook_exec_control_horiz = &exec_control_horiz_vme; if (!clopts.selec_sim) { vme.tcp_connect(); vme.update_poshead(qu, C); } /* * Enter the loop which will take us through all waypoints. */ while (current_tgt_no < C.ntgt) { time_to_tgt[current_tgt_no] = -wall_time(); C.cur_tgt = &C.tgt[current_tgt_no * 2]; tgtdist = C.tgttol + 1; while (tgtdist > .1) { C.horizon_loop += 1; sd_loop = 0; sd_loop_time = -wall_time(); /* * SD loop. */ while (1) { sd_loop += 1; grad_dot_grad = 0.; /* * The core of the gradient decent is in the next few lines: */ tgtdist = predict_horizon(qu, p, C); get_gradient(qu, p, C, grad); for (k = 0; k < C.N; k++) { grad_dot_grad += grad[k] * last_grad[k]; } /* * Detect direction changes in the gradient by inspecting the * product <grad, last_grad>. If it is positive, then the * iterations are successfully stepping to the minimum, and we * can accelerate by increasing dg. If we overshoot (and the * product becomes negative), then backstep and drop dg to a * safe value. */ if (grad_dot_grad > 0) { C.dg *= 2; for (k = 0; k < C.N; ++k) { qu[k].Dth -= C.dg * grad[k]; } } else { C.dg = 0.1; // TODO: Adaptive. for (k = 0; k < C.N; ++k) { qu[k].Dth += C.dg * grad[k]; } } swap_fptr(&grad, &last_grad); if (last_grad[C.N] < .1) break; if (sd_loop >= MAX_SD_ITER) { sprintf(errnote, "Reached %d SD iterations. Stopping.", sd_loop); report_error(EXCEEDED_MAX_SD_ITER, errnote); } } hook_print_SE(qu, p, C); hook_print_LG(qu, p, C, grad); sd_loop_time += wall_time(); hook_print_SD(&sd_loop, &sd_loop_time); C.control_step += C.C; hook_exec_control_horiz(qu, C, &vme); for (k = 0; k < C.N - C.C - 1; ++k) { // cmd_horiz[k].v = qu[k].v; // cmd_horiz[k].Dth = qu[k].Dth; qu[k].v = qu[k + C.C].v; qu[k].Dth = qu[k + C.C].Dth; } if (C.control_step > MAX_NMPC_ITER * C.C) { sprintf(errnote, "Reached %d NMPC steps without reaching tgt. Stopping.", MAX_NMPC_ITER); report_error(TRAPPED_IN_NMPC_LOOP, errnote); } /* * The last thing we do is get the new position and heading for * the next SD calculation. */ // vme.update_poshead(qu); } time_to_tgt[current_tgt_no] += wall_time(); hook_print_TR(¤t_tgt_no, &time_to_tgt[current_tgt_no]); ++current_tgt_no; } return 0; }
void LBFGSSolver::solve(const Function& function, SolverResults* results) const { double global_start_time = wall_time(); // Dimension of problem. size_t n = function.get_number_of_scalars(); if (n == 0) { results->exit_condition = SolverResults::FUNCTION_TOLERANCE; return; } // Current point, gradient and Hessian. double fval = std::numeric_limits<double>::quiet_NaN(); double fprev = std::numeric_limits<double>::quiet_NaN(); double normg0 = std::numeric_limits<double>::quiet_NaN(); double normg = std::numeric_limits<double>::quiet_NaN(); double normdx = std::numeric_limits<double>::quiet_NaN(); Eigen::VectorXd x, g; // Copy the user state to the current point. function.copy_user_to_global(&x); Eigen::VectorXd x2(n); // L-BFGS history. std::vector<Eigen::VectorXd> s_data(this->lbfgs_history_size), y_data(this->lbfgs_history_size); std::vector<Eigen::VectorXd*> s(this->lbfgs_history_size), y(this->lbfgs_history_size); for (int h = 0; h < this->lbfgs_history_size; ++h) { s_data[h].resize(function.get_number_of_scalars()); s_data[h].setZero(); y_data[h].resize(function.get_number_of_scalars()); y_data[h].setZero(); s[h] = &s_data[h]; y[h] = &y_data[h]; } Eigen::VectorXd rho(this->lbfgs_history_size); rho.setZero(); Eigen::VectorXd alpha(this->lbfgs_history_size); alpha.setZero(); Eigen::VectorXd q(n); Eigen::VectorXd r(n); // Needed from the previous iteration. Eigen::VectorXd x_prev(n), s_tmp(n), y_tmp(n); CheckExitConditionsCache exit_condition_cache; // // START MAIN ITERATION // results->startup_time += wall_time() - global_start_time; results->exit_condition = SolverResults::INTERNAL_ERROR; int iter = 0; bool last_iteration_successful = true; int number_of_line_search_failures = 0; int number_of_restarts = 0; while (true) { // // Evaluate function and derivatives. // double start_time = wall_time(); // y[0] should contain the difference between the gradient // in this iteration and the gradient from the previous. // Therefore, update y before and after evaluating the // function. if (iter > 0) { y_tmp = -g; } fval = function.evaluate(x, &g); normg = std::max(g.maxCoeff(), -g.minCoeff()); if (iter == 0) { normg0 = normg; } results->function_evaluation_time += wall_time() - start_time; // // Update history // start_time = wall_time(); if (iter > 0 && last_iteration_successful) { s_tmp = x - x_prev; y_tmp += g; double sTy = s_tmp.dot(y_tmp); if (sTy > 1e-16) { // Shift all pointers one step back, discarding the oldest one. Eigen::VectorXd* sh = s[this->lbfgs_history_size - 1]; Eigen::VectorXd* yh = y[this->lbfgs_history_size - 1]; for (int h = this->lbfgs_history_size - 1; h >= 1; --h) { s[h] = s[h - 1]; y[h] = y[h - 1]; rho[h] = rho[h - 1]; } // Reuse the storage of the discarded data for the new data. s[0] = sh; y[0] = yh; *y[0] = y_tmp; *s[0] = s_tmp; rho[0] = 1.0 / sTy; } } results->lbfgs_update_time += wall_time() - start_time; // // Test stopping criteriea // start_time = wall_time(); if (iter > 1 && this->check_exit_conditions(fval, fprev, normg, normg0, x.norm(), normdx, last_iteration_successful, &exit_condition_cache, results)) { break; } if (iter >= this->maximum_iterations) { results->exit_condition = SolverResults::NO_CONVERGENCE; break; } if (this->callback_function) { CallbackInformation information; information.objective_value = fval; information.x = &x; information.g = &g; if (!callback_function(information)) { results->exit_condition = SolverResults::USER_ABORT; break; } } results->stopping_criteria_time += wall_time() - start_time; // // Compute search direction via L-BGFS two-loop recursion. // start_time = wall_time(); bool should_restart = false; double H0 = 1.0; if (iter > 0) { // If the gradient is identical two iterations in a row, // y will be the zero vector and H0 will be NaN. In this // case the line search will fail and L-BFGS will be restarted // with a steepest descent step. H0 = s[0]->dot(*y[0]) / y[0]->dot(*y[0]); // If isinf(H0) || isnan(H0) if (H0 == std::numeric_limits<double>::infinity() || H0 == -std::numeric_limits<double>::infinity() || H0 != H0) { should_restart = true; } } q = -g; for (int h = 0; h < this->lbfgs_history_size; ++h) { alpha[h] = rho[h] * s[h]->dot(q); q = q - alpha[h] * (*y[h]); } r = H0 * q; for (int h = this->lbfgs_history_size - 1; h >= 0; --h) { double beta = rho[h] * y[h]->dot(r); r = r + (*s[h]) * (alpha[h] - beta); } // If the function improves very little, the approximated Hessian // might be very bad. If this is the case, it is better to discard // the history once in a while. This allows the solver to correctly // solve some badly scaled problems. double restart_test = std::fabs(fval - fprev) / (std::fabs(fval) + std::fabs(fprev)); if (iter > 0 && iter % 100 == 0 && restart_test < this->lbfgs_restart_tolerance) { should_restart = true; } if (! last_iteration_successful) { should_restart = true; } if (should_restart) { if (this->log_function) { char str[1024]; if (number_of_restarts <= 10) { std::sprintf(str, "Restarting: fval = %.3e, deltaf = %.3e, max|g_i| = %.3e, test = %.3e", fval, std::fabs(fval - fprev), normg, restart_test); this->log_function(str); } if (number_of_restarts == 10) { this->log_function("NOTE: No more restarts will be reported."); } number_of_restarts++; } r = -g; for (int h = 0; h < this->lbfgs_history_size; ++h) { (*s[h]).setZero(); (*y[h]).setZero(); } rho.setZero(); alpha.setZero(); // H0 is not used, but its value will be printed. H0 = std::numeric_limits<double>::quiet_NaN(); } results->lbfgs_update_time += wall_time() - start_time; // // Perform line search. // start_time = wall_time(); double start_alpha = 1.0; // In the first iteration, start with a much smaller step // length. (heuristic used by e.g. minFunc) if (iter == 0) { double sumabsg = 0.0; for (size_t i = 0; i < n; ++i) { sumabsg += std::fabs(g[i]); } start_alpha = std::min(1.0, 1.0 / sumabsg); } double alpha_step = this->perform_linesearch(function, x, fval, g, r, &x2, start_alpha); if (alpha_step <= 0) { if (this->log_function) { this->log_function("Line search failed."); char str[1024]; std::sprintf(str, "%4d %+.3e %9.3e %.3e %.3e %.3e %.3e", iter, fval, std::fabs(fval - fprev), normg, alpha_step, H0, rho[0]); this->log_function(str); } if (! last_iteration_successful || number_of_line_search_failures++ > 10) { // This happens quite seldom. Every time it has happened, the function // was actually converged to a solution. results->exit_condition = SolverResults::GRADIENT_TOLERANCE; break; } last_iteration_successful = false; } else { // Record length of this step. normdx = alpha_step * r.norm(); // Compute new point. x_prev = x; x = x + alpha_step * r; last_iteration_successful = true; } results->backtracking_time += wall_time() - start_time; // // Log the results of this iteration. // start_time = wall_time(); int log_interval = 1; if (iter > 30) { log_interval = 10; } if (iter > 200) { log_interval = 100; } if (iter > 2000) { log_interval = 1000; } if (this->log_function && iter % log_interval == 0) { if (iter == 0) { this->log_function("Itr f deltaf max|g_i| alpha H0 rho"); } this->log_function( to_string( std::setw(4), iter, " ", std::setw(10), std::setprecision(3), std::scientific, std::showpos, fval, std::noshowpos, " ", std::setw(9), std::setprecision(3), std::scientific, std::fabs(fval - fprev), " ", std::setw(9), std::setprecision(3), std::setprecision(3), std::scientific, normg, " ", std::setw(9), std::setprecision(3), std::scientific, alpha_step, " ", std::setw(9), std::setprecision(3), std::scientific, H0, " ", std::setw(9), std::setprecision(3), std::scientific, rho[0] ) ); } results->log_time += wall_time() - start_time; fprev = fval; iter++; } function.copy_global_to_user(x); results->total_time += wall_time() - global_start_time; if (this->log_function) { char str[1024]; std::sprintf(str, " end %+.3e %.3e", fval, normg); this->log_function(str); } }
wall_time now() const { return wall_time(_ticks); }
void fields::step() { // however many times the fields have been synched, we want to restore now int save_synchronized_magnetic_fields = synchronized_magnetic_fields; if (synchronized_magnetic_fields) { synchronized_magnetic_fields = 1; // reset synchronization count restore_magnetic_fields(); } am_now_working_on(Stepping); if (!t) { last_step_output_wall_time = wall_time(); last_step_output_t = t; } if (!quiet && wall_time() > last_step_output_wall_time + MIN_OUTPUT_TIME) { master_printf("on time step %d (time=%g), %g s/step\n", t, time(), (wall_time() - last_step_output_wall_time) / (t - last_step_output_t)); if (save_synchronized_magnetic_fields) master_printf(" (doing expensive timestepping of synched fields)\n"); last_step_output_wall_time = wall_time(); last_step_output_t = t; } phase_material(); // update cached conductivity-inverse array, if needed for (int i=0;i<num_chunks;i++) chunks[i]->s->update_condinv(); calc_sources(time()); // for B sources step_db(B_stuff); step_source(B_stuff); step_boundaries(B_stuff); calc_sources(time() + 0.5*dt); // for integrated H sources update_eh(H_stuff); step_boundaries(WH_stuff); update_pols(H_stuff); step_boundaries(PH_stuff); step_boundaries(H_stuff); if (fluxes) fluxes->update_half(); calc_sources(time() + 0.5*dt); // for D sources step_db(D_stuff); step_source(D_stuff); step_boundaries(D_stuff); calc_sources(time() + dt); // for integrated E sources update_eh(E_stuff); step_boundaries(WE_stuff); update_pols(E_stuff); step_boundaries(PE_stuff); step_boundaries(E_stuff); if (fluxes) fluxes->update(); t += 1; update_dfts(); finished_working(); // re-synch magnetic fields if they were previously synchronized if (save_synchronized_magnetic_fields) { synchronize_magnetic_fields(); synchronized_magnetic_fields = save_synchronized_magnetic_fields; } }
void NelderMeadSolver::solve(const Function& function, SolverResults* results) const { double global_start_time = wall_time(); // Dimension of problem. size_t n = function.get_number_of_scalars(); if (n == 0) { results->exit_condition = SolverResults::FUNCTION_TOLERANCE; return; } // The Nelder-Mead simplex. std::vector<SimplexPoint> simplex(n + 1); // Copy the user state to the current point. Eigen::VectorXd x; function.copy_user_to_global(&x); initialize_simplex(function, x, &simplex); SimplexPoint mean_point; SimplexPoint reflection_point; SimplexPoint expansion_point; mean_point.x.resize(n); reflection_point.x.resize(n); expansion_point.x.resize(n); double fmin = std::numeric_limits<double>::quiet_NaN(); double fmax = std::numeric_limits<double>::quiet_NaN(); double fval = std::numeric_limits<double>::quiet_NaN(); double area = std::numeric_limits<double>::quiet_NaN(); double area0 = std::numeric_limits<double>::quiet_NaN(); double length = std::numeric_limits<double>::quiet_NaN(); double length0 = std::numeric_limits<double>::quiet_NaN(); Eigen::MatrixXd area_mat(n, n); // // START MAIN ITERATION // results->startup_time += wall_time() - global_start_time; results->exit_condition = SolverResults::INTERNAL_ERROR; int iter = 0; int n_shrink_in_a_row = 0; while (true) { // // In each iteration, the worst point in the simplex // is replaced with a new one. // double start_time = wall_time(); mean_point.x.setZero(); fval = 0; // Compute the mean of the best n points. for (size_t i = 0; i < n; ++i) { mean_point.x += simplex[i].x; fval += simplex[i].value; } fval /= double(n); mean_point.x /= double(n); fmin = simplex[0].value; fmax = simplex[n].value; const char* iteration_type = "n/a"; // Compute the reflexion point and evaluate it. reflection_point.x = 2.0 * mean_point.x - simplex[n].x; reflection_point.value = function.evaluate(reflection_point.x); bool is_shrink = false; if (simplex[0].value <= reflection_point.value && reflection_point.value < simplex[n - 1].value) { // Reflected point is neither better nor worst in the // new simplex. std::swap(reflection_point, simplex[n]); iteration_type = "Reflect 1"; } else if (reflection_point.value < simplex[0].value) { // Reflected point is better than the current best; try // to go farther along this direction. // Compute expansion point. expansion_point.x = 3.0 * mean_point.x - 2.0 * simplex[n].x; expansion_point.value = function.evaluate(expansion_point.x); if (expansion_point.value < reflection_point.value) { std::swap(expansion_point, simplex[n]); iteration_type = "Expansion"; } else { std::swap(reflection_point, simplex[n]); iteration_type = "Reflect 2"; } } else { // Reflected point is still worse than x[n]; contract. bool success = false; if (simplex[n - 1].value <= reflection_point.value && reflection_point.value < simplex[n].value) { // Try to perform "outside" contraction. expansion_point.x = 1.5 * mean_point.x - 0.5 * simplex[n].x; expansion_point.value = function.evaluate(expansion_point.x); if (expansion_point.value <= reflection_point.value) { std::swap(expansion_point, simplex[n]); success = true; iteration_type = "Outside contraction"; } } else { // Try to perform "inside" contraction. expansion_point.x = 0.5 * mean_point.x + 0.5 * simplex[n].x; expansion_point.value = function.evaluate(expansion_point.x); if (expansion_point.value < simplex[n].value) { std::swap(expansion_point, simplex[n]); success = true; iteration_type = "Inside contraction"; } } if (! success) { // Neither outside nor inside contraction was acceptable; // shrink the simplex toward the best point. for (size_t i = 1; i < n + 1; ++i) { simplex[i].x = 0.5 * (simplex[0].x + simplex[i].x); simplex[i].value = function.evaluate(simplex[i].x); iteration_type = "Shrink"; is_shrink = true; } } } std::sort(simplex.begin(), simplex.end()); results->function_evaluation_time += wall_time() - start_time; // // Test stopping criteriea // start_time = wall_time(); // Compute the area of the simplex. length = 0; for (size_t i = 0; i < n; ++i) { area_mat.col(i) = simplex[i].x - simplex[n].x; length = std::max(length, area_mat.col(i).norm()); } area = std::abs(area_mat.determinant()); if (iter == 0) { area0 = area; length0 = length; } if (area / area0 < this->area_tolerance) { results->exit_condition = SolverResults::GRADIENT_TOLERANCE; break; } if (area == 0) { results->exit_condition = SolverResults::GRADIENT_TOLERANCE; break; } if (length / length0 < this->length_tolerance) { results->exit_condition = SolverResults::GRADIENT_TOLERANCE; break; } if (is_shrink) { n_shrink_in_a_row++; } else { n_shrink_in_a_row = 0; } if (n_shrink_in_a_row > 50) { results->exit_condition = SolverResults::GRADIENT_TOLERANCE; break; } if (iter >= this->maximum_iterations) { results->exit_condition = SolverResults::NO_CONVERGENCE; break; } if (this->callback_function) { CallbackInformation information; information.objective_value = simplex[0].value; information.x = &simplex[0].x; if (!callback_function(information)) { results->exit_condition = SolverResults::USER_ABORT; break; } } results->stopping_criteria_time += wall_time() - start_time; // // Restarting // //if (area / area1 < 1e-10) { // x = simplex[0].x; // initialize_simplex(function, x, &simplex); // area1 = area; // if (this->log_function) { // this->log_function("Restarted."); // } //} // // Log the results of this iteration. // start_time = wall_time(); int log_interval = 1; if (iter > 30) { log_interval = 10; } if (iter > 200) { log_interval = 100; } if (iter > 2000) { log_interval = 1000; } if (this->log_function && iter % log_interval == 0) { char str[1024]; if (iter == 0) { this->log_function("Itr min(f) avg(f) max(f) area length type"); } std::sprintf(str, "%6d %+.3e %+.3e %+.3e %.3e %.3e %s", iter, fmin, fval, fmax, area, length, iteration_type); this->log_function(str); } results->log_time += wall_time() - start_time; iter++; } // Return the best point as solution. function.copy_global_to_user(simplex[0].x); results->total_time += wall_time() - global_start_time; if (this->log_function) { char str[1024]; std::sprintf(str, " end %+.3e %.3e %.3e", fval, area, length); this->log_function(str); } }
void main(void) { // Malloc spaces for four matrix double *A = malloc(sizeof(double) * SIZE * SIZE); fill_matrix(A, SIZE); double *B = malloc(sizeof(double) * SIZE * SIZE); fill_matrix(B, SIZE); double *C = malloc(sizeof(double) * SIZE * SIZE); memset(C, 0, sizeof(double) * SIZE * SIZE); double *D = malloc(sizeof(double) * SIZE * SIZE); memset(D, 0, sizeof(double) * SIZE * SIZE); // struct to timing struct timeval begin, end; // test function gettimeofday(&begin, NULL); square_dgemm(SIZE, A, B, C); gettimeofday(&end, NULL); // niave multipily naive_multiply(A, B, D, SIZE); // validate result, if wrong, print four matrix for(int i=0; i<SIZE*SIZE; i++) { if(C[i] != D[i]) { printf("WRONG.\n"); for(int x=0; x<SIZE; x++) { for(int y=0; y<SIZE; y++) { printf("%f ", A[x*SIZE+y]); } printf("\n"); } printf("-----------\n"); for(int x=0; x<SIZE; x++) { for(int y=0; y<SIZE; y++) { printf("%f ", B[x*SIZE+y]); } printf("\n"); } printf("-----------\n"); for(int x=0; x<SIZE; x++) { for(int y=0; y<SIZE; y++) { printf("%f ", C[x*SIZE+y]); } printf("\n"); } printf("-----------\n"); for(int x=0; x<SIZE; x++) { for(int y=0; y<SIZE; y++) { printf("%f ", D[x*SIZE+y]); } printf("\n"); } return; } } printf("CORRECT.^_^\n"); printf("Single Round Time use: %ld usec.\n", (end.tv_sec-begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec)); /* Time a "sufficiently long" sequence of calls to reduce noise */ double Gflops_s, seconds = -1.0; double timeout = 0.1; // "sufficiently long" := at least 1/10 second. for (int n_iterations = 1; seconds < timeout; n_iterations *= 2) { /* Warm-up */ square_dgemm (SIZE, A, B, C); /* Benchmark n_iterations runs of square_dgemm */ seconds = -wall_time(); for (int it = 0; it < n_iterations; ++it) square_dgemm (SIZE, A, B, C); seconds += wall_time(); /* compute Mflop/s rate */ Gflops_s = 2.e-9 * n_iterations * SIZE * SIZE * SIZE / seconds; } printf ("Size: %d\tGflop/s: %.3g\n", SIZE, Gflops_s); }
int main(int argc, char* argv[]) { // Print help if necessary bool help = read_bool(argc, argv, "--help", false); if ((argc < 2) || (help)) { usage(argv); return 0; } // Use parameters struct for passing parameters to kernels efficiently parameters prm; // Parse inputs prm.matDims[0] = read_int(argc, argv, "--m", 2); prm.matDims[1] = read_int(argc, argv, "--k", 2); prm.matDims[2] = read_int(argc, argv, "--n", 2); prm.rank = read_int(argc, argv, "--rank", 7); prm.method = read_string(argc, argv, "--method", (char *)"als"); int maxIters = read_int(argc, argv, "--maxiters", 1000); int maxSecs = read_int(argc, argv, "--maxsecs", 1000); double tol = read_double(argc, argv, "--tol", 1e-8); int printItn = read_int(argc, argv, "--printitn", 0); double printTol = read_double(argc, argv, "--printtol", 1.0); int seed = read_int(argc, argv, "--seed", 0); int numSeeds = read_int(argc, argv, "--numseeds", 1); bool verbose = read_bool(argc, argv, "--verbose", false); prm.rnd_maxVal = read_double(argc,argv,"--maxval",1.0); prm.rnd_pwrOfTwo = read_int(argc,argv,"--pwrof2",0); bool roundFinal = read_bool(argc, argv, "--rndfin",false); prm.alpha = read_double(argc,argv, "--alpha", 0.1); int M = read_int(argc,argv, "--M", 0); if (M) { prm.M[0] = M; prm.M[1] = M; prm.M[2] = M; } else { prm.M[0] = read_int(argc, argv, "--M0", -1); prm.M[1] = read_int(argc, argv, "--M1", -1); prm.M[2] = read_int(argc, argv, "--M2", -1); } char * infile = read_string(argc, argv, "--input", NULL); char * outfile = read_string(argc, argv, "--output", NULL); if (verbose) { setbuf(stdout, NULL); printf("\n\n---------------------------------------------------------\n"); printf("PARAMETERS\n"); printf("dimensions = %d %d %d\n",prm.matDims[0],prm.matDims[1],prm.matDims[2]); printf("rank = %d\n",prm.rank); printf("method = %s\n",prm.method); if (infile) printf("input = %s\n",infile); else { if (numSeeds == 1) printf("input = seed %d\n",seed); else printf("inputs = seeds %d-%d\n",seed,seed+numSeeds-1); } if (outfile) printf("output = %s\n",outfile); else printf("output = none\n"); if (!strcmp(prm.method,"als")) { printf("tol = %1.2e\n",tol); printf("alpha = %1.2e\n",prm.alpha); printf("maval = %1.2e\n",prm.rnd_maxVal); printf("M's = (%d,%d,%d)\n",prm.M[0],prm.M[1],prm.M[2]); printf("maxiters = %d\n",maxIters); printf("maxsecs = %d\n",maxSecs); printf("printitn = %d\n",printItn); printf("printtol = %1.2e\n",printTol); } printf("---------------------------------------------------------\n"); } // Initialize other variables int i, j, k, numIters, mkn, tidx[3]; double err, errOld, errChange = 0.0, start_als, start_search, elapsed, threshold; // Compute tensor dimensions prm.dims[0] = prm.matDims[0]*prm.matDims[1]; prm.dims[1] = prm.matDims[1]*prm.matDims[2]; prm.dims[2] = prm.matDims[0]*prm.matDims[2]; // Compute tensor's nnz, total number of entries, and Frobenius norm mkn = prm.matDims[0]*prm.matDims[1]*prm.matDims[2]; prm.mkn2 = mkn*mkn; prm.xNorm = sqrt(mkn); // Compute number of columns in matricized tensors for (i = 0; i < 3; i++) prm.mtCols[i] = prm.mkn2 / prm.dims[i]; // Construct three matricizations of matmul tensor prm.X = (double**) malloc( 3 * sizeof(double*) ); for (i = 0; i < 3; i++) prm.X[i] = (double*) calloc( prm.mkn2, sizeof(double) ); for (int mm = 0; mm < prm.matDims[0]; mm++) for (int kk = 0; kk < prm.matDims[1]; kk++) for (int nn = 0; nn < prm.matDims[2]; nn++) { tidx[0] = mm + kk*prm.matDims[0]; tidx[1] = kk + nn*prm.matDims[1]; tidx[2] = mm + nn*prm.matDims[0]; prm.X[0][tidx[0]+prm.dims[0]*(tidx[1]+prm.dims[1]*tidx[2])] = 1; prm.X[1][tidx[1]+prm.dims[1]*(tidx[0]+prm.dims[0]*tidx[2])] = 1; prm.X[2][tidx[2]+prm.dims[2]*(tidx[0]+prm.dims[0]*tidx[1])] = 1; } // Allocate factor weights and matrices: working, initial, and model prm.lambda = (double*) malloc( prm.rank * sizeof(double) ); prm.U = (double**) malloc( 3 * sizeof(double*) ); double** U0 = (double**) malloc( 3 * sizeof(double*) ); prm.model = (double**) malloc( 3 * sizeof(double*) ); for (i = 0; i < 3; i++) { prm.U[i] = (double*) calloc( prm.mkn2, sizeof(double) ); U0[i] = (double*) calloc( prm.dims[i]*prm.rank, sizeof(double) ); prm.model[i] = (double*) calloc( prm.dims[i]*prm.rank, sizeof(double) ); } // Allocate coefficient matrix within ALS (Khatri-Rao product) int maxMatDim = prm.matDims[0]; if (maxMatDim < prm.matDims[1]) maxMatDim = prm.matDims[1]; if (maxMatDim < prm.matDims[2]) maxMatDim = prm.matDims[2]; prm.A = (double*) malloc( maxMatDim*mkn*prm.rank * sizeof(double) ); // Allocate workspaces prm.tau = (double*) malloc( mkn * sizeof(double) ); prm.lwork = maxMatDim*mkn*prm.rank; prm.work = (double*) malloc( prm.lwork * sizeof(double) ); prm.iwork = (int*) malloc( prm.mkn2 * sizeof(int) ); // Allocate matrices for normal equations int maxDim = prm.dims[0]; if (maxDim < prm.dims[1]) maxDim = prm.dims[1]; if (maxDim < prm.dims[2]) maxDim = prm.dims[2]; prm.NE_coeff = (double*) malloc( prm.rank*prm.rank * sizeof(double) ); prm.NE_rhs = (double*) malloc( maxDim*prm.rank * sizeof(double) ); prm.residual = (double*) malloc( prm.mkn2 * sizeof(double) ); //-------------------------------------------------- // Search Loop //-------------------------------------------------- int mySeed = seed, numGoodSeeds = 0, statusCnt = 0, status = 1; start_search = wall_time(); for (int seed_cnt = 0; seed_cnt < numSeeds; ++seed_cnt) { // Set starting point from random seed (match Matlab Tensor Toolbox) RandomMT cRMT(mySeed); for (i = 0; i < 3; i++) for (j = 0; j < prm.dims[i]; j++) for (k = 0; k < prm.rank; k++) U0[i][j+k*prm.dims[i]] = cRMT.genMatlabMT(); for (i = 0; i < prm.rank; i++) prm.lambda[i] = 1.0; // Copy starting point for (i = 0; i < 3; i++) cblas_dcopy(prm.dims[i]*prm.rank,U0[i],1,prm.U[i],1); // read from file if input is given if( infile ) read_input( infile, prm ); if (verbose) { printf("\nSTARTING POINT...\n"); for (i = 0; i < 3; i++) { printf("Factor matrix %d:\n",i); print_matrix(prm.U[i],prm.dims[i],prm.rank,prm.dims[i]); } printf("\n"); } //-------------------------------------------------- // Main ALS Loop //-------------------------------------------------- start_als = wall_time(); err = 1.0; threshold = 1e-4; for (numIters = 0; numIters < maxIters && (wall_time()-start_als) < maxSecs; numIters++) { errOld = err; if (!strcmp(prm.method,"als")) { // Perform an iteration of ALS using NE with Smirnov's penalty term err = als( prm ); } else if (!strcmp(prm.method,"sparsify")) { // print stats before sparsifying printf("Old residual: %1.2e\n",compute_residual(prm,2,true)); printf("Old nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); // sparsify and return printf("\nSparsifying...\n\n"); sparsify( prm ); numIters = maxIters; // print stats after sparsifying printf("New residual: %1.2e\n",compute_residual(prm,2,true)); printf("New nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); } else if (!strcmp(prm.method,"round")) { // print stats before rounding printf("Old residual: %1.2e\n",compute_residual(prm,2,true)); printf("Old nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); // round and return for (i = 0; i < 3; i++) { capping(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_maxVal); rounding(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_pwrOfTwo); } numIters = maxIters; // print stats after rounding printf("New residual: %1.2e\n",compute_residual(prm,2,true)); printf("New nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); } else die("Invalid method\n"); // Compute change in relative residual norm errChange = fabs(err - errOld); // Print info at current iteration if ((printItn > 0) && (((numIters + 1) % printItn) == 0)) { // print info printf ("Iter %d: residual = %1.5e change = %1.5e\n", numIters + 1, err, errChange); } // Check for convergence if ( numIters > 0 && errChange < tol ) break; } // If rounding, round final solution and re-compute residual if(roundFinal) { // normalize columns in A and B factors, put arbitrary weights into C normalize_model( prm, 2 ); // cap large values and round to nearest power of 2 for (i = 0; i < 3; i++) { capping(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_maxVal); rounding(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_pwrOfTwo); } err = compute_residual(prm,0,true); } // Print status if searching over many seeds statusCnt++; if (numSeeds > 1000 && statusCnt == numSeeds/10) { printf("...%d%% complete...\n",10*status); status++; statusCnt = 0; } // Print final info elapsed = wall_time() - start_als; if ((printItn > 0 || verbose) && !strcmp(prm.method,"als")) { if (infile) printf("\nInput %s ",infile); else printf("\nInitial seed %d ",mySeed); printf("achieved residual %1.3e in %d iterations and %1.3e seconds\n \t final residual change: %1.3e\n \t average time per iteration: %1.3e s\n", err, numIters, elapsed, errChange, elapsed/numIters); } if (verbose) { printf("\nSOLUTION...\n"); for (i = 0; i < 3; i++) { printf("Factor matrix %d:\n",i); if (roundFinal || !strcmp(prm.method,"round")) print_int_matrix(prm.U[i], prm.dims[i], prm.rank, prm.dims[i], prm.rnd_pwrOfTwo); else print_matrix(prm.U[i],prm.dims[i],prm.rank,prm.dims[i]); } if (err < printTol) numGoodSeeds++; } else if (err < printTol) { numGoodSeeds++; printf("\n\n***************************************\n"); if (infile) printf("Input %s: ",infile); else printf("Initial seed %d: ",mySeed); printf("after %d iterations, achieved residual %1.3e with final residual change of %1.3e\n", numIters, err, errChange); if (roundFinal) { for (i = 0; i < 3; i++) { printf("Factor matrix %d:\n",i); print_int_matrix(prm.U[i], prm.dims[i], prm.rank, prm.dims[i], prm.rnd_pwrOfTwo); } int count = 0; for (i = 0; i < 3; i++) count += nnz(prm.U[i],prm.dims[i]*prm.rank); printf("\ttotal nnz in solution: %d\n",count); printf("\tnaive adds/subs: %d\n",count - prm.dims[2] - 2*prm.rank); } printf("***************************************\n\n\n"); } // write to output if( outfile ) write_output( outfile, prm ); mySeed++; } // Final report of processor statistics elapsed = wall_time()-start_search; // Print stats if (!strcmp(prm.method,"als")) { printf("\n\n------------------------------------------------------------\n"); printf("Time elapsed: \t%1.1e\tseconds\n",elapsed); printf("Total number of seeds tried: \t%d\n",numSeeds); printf("Total number of good seeds: \t%d",numGoodSeeds); printf("\t(residual < %2.1e)\n",printTol); printf("------------------------------------------------------------\n"); } // free allocated memory for (i = 0; i < 3; i++) { free( prm.X[i] ); free( prm.U[i] ); free( U0[i] ); free( prm.model[i] ); } free( prm.X ); free( prm.U ); free( U0 ); free( prm.model ); free( prm.lambda ); free( prm.A ); free( prm.NE_coeff ); free( prm.NE_rhs ); free( prm.residual ); free( prm.tau ); free( prm.work ); free( prm.iwork ); return 0; }
void chord_main(char *conf_file, int parent_sock) { fd_set interesting, readable; int nfound, nfds; struct in_addr ia; char id[4*ID_LEN]; FILE *fp; int64_t stabilize_wait; struct timeval timeout; setprogname("chord"); srandom(getpid() ^ time(0)); memset(&srv, 0, sizeof(Server)); srv.to_fix_finger = NFINGERS-1; fp = fopen(conf_file, "r"); if (fp == NULL) eprintf("fopen(%s,\"r\") failed:", conf_file); if (fscanf(fp, "%hd", (short*)&srv.node.port) != 1) eprintf("Didn't find port in \"%s\"", conf_file); if (fscanf(fp, " %s\n", id) != 1) eprintf("Didn't find id in \"%s\"", conf_file); srv.node.id = atoid(id); /* Figure out one's own address somehow */ srv.node.addr = ntohl(get_addr()); ia.s_addr = htonl(srv.node.addr); fprintf(stderr, "Chord started.\n"); fprintf(stderr, "id="); print_id(stderr, &srv.node.id); fprintf(stderr, "\n"); fprintf(stderr, "ip=%s\n", inet_ntoa(ia)); fprintf(stderr, "port=%d\n", srv.node.port); initialize(&srv); srv_ref = &srv; join(&srv, fp); fclose(fp); FD_ZERO(&interesting); FD_SET(srv.in_sock, &interesting); FD_SET(parent_sock, &interesting); nfds = MAX(srv.in_sock, parent_sock) + 1; NumKeys = read_keys(ACCLIST_FILE, KeyArray, MAX_KEY_NUM); if (NumKeys == -1) { printf("Error opening file: %s\n", ACCLIST_FILE); } if (NumKeys == 0) { printf("No key found in %s\n", ACCLIST_FILE); } /* Loop on input */ for (;;) { readable = interesting; stabilize_wait = (int64_t)(srv.next_stabilize_us - wall_time()); stabilize_wait = MAX(stabilize_wait,0); timeout.tv_sec = stabilize_wait / 1000000UL; timeout.tv_usec = stabilize_wait % 1000000UL; nfound = select(nfds, &readable, NULL, NULL, &timeout); if (nfound < 0 && errno == EINTR) { continue; } if (nfound == 0) { stabilize_wait = (int64_t)(srv.next_stabilize_us - wall_time()); if( stabilize_wait <= 0 ) { stabilize( &srv ); } continue; } if (FD_ISSET(srv.in_sock, &readable)) { handle_packet(srv.in_sock); } else if (FD_ISSET(parent_sock, &readable)) { handle_packet(parent_sock); } else { assert(0); } } }
int main(int argc, char *argv[]) { const int length[] = {1, 10, 100, 1000, 10000, 100000, 1000000}; const int arrlength = sizeof(length)/sizeof(length[0]); const int r_max = 1000; // wiederholungen char *msg; int rank, r, i, name_length, size; MPI_Status status; double time, ttime; char name[MPI_MAX_PROCESSOR_NAME]; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); int destination[size]; // maps: rank => destination of messages MPI_Get_processor_name(name, &name_length); if(size == 2) { destination[0] = 1; destination[1] = 0; } else if(size == 4) { destination[0] = 3; destination[2] = 1; destination[3] = 0; destination[1] = 2; } else { MPI_Abort(MPI_COMM_WORLD,0); } if(rank == 0) printf("size is %d\n", size); printf("%s: got rank %d\n",name, rank); sleep(1); for (i = 0; i < arrlength; i++){ msg = (char *)malloc(length[i]); if(rank%2 == 0) { time = wall_time(); time = wall_time(); } for (r = 0; r < r_max; r++){ // printf("rank %d, %d, %d\n", rank, i, r); if(rank%2 == 0) { MPI_Send(msg, length[i], MPI_CHAR, destination[rank], 0, MPI_COMM_WORLD); MPI_Recv(msg, length[i], MPI_CHAR, destination[rank], 0, MPI_COMM_WORLD, &status); } else { MPI_Recv(msg, length[i], MPI_CHAR, destination[rank], 0, MPI_COMM_WORLD, &status); MPI_Send(msg, length[i], MPI_CHAR, destination[rank], 0, MPI_COMM_WORLD); } } if(rank%2 == 0) { time = wall_time() - time; printf("%s: Zeit um %7d Bytes 2mal zu übertragen: %g s\n",name, length[i], time / r_max); } free(msg); } MPI_Finalize(); }
/* To determine the coordinates of the local node initially * Ping a subset of nodes and determine coordinates */ void init_coordinates(I3ServerList *list) { int n = MIN(NUM_LANDMARKS_COORDINATE, list->num_newservers + list->num_ping_list); I3ServerListNode *node = list->list, *temp_node; uint64_t start_time = wall_time(); Coordinates_RTT coord_rtt[NUM_LANDMARKS_COORDINATE]; int num_landmarks = 0; int started_full_list = 0; struct in_addr ia; nw_skt_t tmp_ping_sock; #ifdef ICMP_PING if (init_icmp_socket(&tmp_ping_sock) == -1) abort(); #else if (init_udp_socket(&tmp_ping_sock) == -1) abort(); #endif // wait for responses and accumulate // cut and pasted from below while ((wall_time() - start_time < COORD_INIT_PING_WAIT_TIME) && (num_landmarks < n)) { fd_set rset; struct timeval to; int ret; FD_ZERO(&rset); if (!node && !started_full_list) { node = list-> full_list; started_full_list = 1; } if (node) { ia.s_addr = htonl(node->addr); I3_PRINT_DEBUG1(I3_DEBUG_LEVEL_VERBOSE, "Sending ICMP echo request to %s\n", inet_ntoa(ia)); #ifdef ICMP_PING send_echo_request(tmp_ping_sock, node->addr, 0); #else i3_echo_request(tmp_ping_sock, node->addr, node->port, 0); #endif node = node->next_list; } FD_SET(tmp_ping_sock, &rset); to.tv_sec = 0; to.tv_usec = 200000ULL; if ((ret = select(tmp_ping_sock+1, &rset, NULL, NULL, &to)) < 0) { int err = nw_error(); if (err == EINTR) continue; else { perror("select"); abort(); } } // message received on icmp socket if (FD_ISSET(tmp_ping_sock, &rset)) { uint32_t addr; uint16_t port, seq; uint64_t rtt; #ifdef ICMP_PING if (recv_echo_reply(tmp_ping_sock, &addr, &seq, &rtt)) { #else if (recv_i3_echo_reply(tmp_ping_sock, &addr, &port, &seq, &rtt)) { #endif temp_node = lookup_i3server(list, addr); assert(NULL != temp_node); coord_rtt[num_landmarks].coord = temp_node->coord; coord_rtt[num_landmarks].rtt = rtt; num_landmarks++; ia.s_addr = htonl(addr); I3_PRINT_DEBUG4(I3_DEBUG_LEVEL_VERBOSE, "Node: %s Coordinate: %.1f:%.1f RTT: %Ld\n", inet_ntoa(ia), temp_node->coord.latitude, temp_node->coord.longitude, rtt); } } } nw_close(tmp_ping_sock); // compute own coordinate compute_coordinates(num_landmarks, coord_rtt); } /* Update the coordinates of a node using ping information */ void update_coordinate(I3ServerList *list, I3ServerListNode *next_to_ping) { Coordinates_RTT coord_rtt[NUM_LANDMARKS_COORDINATE]; int count, num_landmarks = 0; I3ServerListNode *node; // n1 and n2: number of landmarks from ping_list and rest in // proportion to the number of nodes in those lists int i, n = MIN(NUM_LANDMARKS_COORDINATE, list->num_newservers + list->num_ping_list); int n1 = ((float)list->num_ping_list/ (list->num_newservers + list->num_ping_list)) * n; int n2 = n-n1; // add from ping list count = 0; for (i = 0, node = list->list; i < list->num_ping_list, count < n1; node = node->next_list, ++i) { if (node->n > 0) { coord_rtt[count].rtt = get_rtt_node(node); coord_rtt[count].coord = node->coord; count++; } } num_landmarks = count; // add from rest count = 0; for (i = 0, node = list->full_list; i < list->num_newservers, count < n2; node = node->next_list, ++i) { if (node->n > 0) { coord_rtt[num_landmarks + count].rtt = get_rtt_node(node); coord_rtt[num_landmarks + count].coord = node->coord; count++; } } num_landmarks += count; // recompute coordinates compute_coordinates(num_landmarks, coord_rtt); // repopulate ping list afresh change_ping_list(list, &next_to_ping, 1); }
void main(int argc, char** argv) { args::Parser parser(argv[0], "Plays a replay into a set of images and a log of sounds"); String replay_path(utf8::decode(argv[0])); parser.add_argument("replay", store(replay_path)).help("an Antares replay script").required(); Optional<String> output_dir; parser.add_argument("-o", "--output", store(output_dir)) .help("place output in this directory"); int interval = 60; int width = 640; int height = 480; bool text = false; bool smoke = false; parser.add_argument("-i", "--interval", store(interval)) .help("take one screenshot per this many ticks (default: 60)"); parser.add_argument("-w", "--width", store(width)).help("screen width (default: 640)"); parser.add_argument("-h", "--height", store(height)).help("screen height (default: 480)"); parser.add_argument("-t", "--text", store_const(text, true)).help("produce text output"); parser.add_argument("-s", "--smoke", store_const(smoke, true)).help("run as smoke text"); parser.add_argument("--help", help(parser, 0)).help("display this help screen"); String error; if (!parser.parse_args(argc - 1, argv + 1, error)) { print(io::err, format("{0}: {1}\n", parser.name(), error)); exit(1); } if (output_dir.has()) { makedirs(*output_dir, 0755); } Preferences preferences; preferences.play_music_in_game = true; NullPrefsDriver prefs(preferences); EventScheduler scheduler; scheduler.schedule_event(unique_ptr<Event>(new MouseMoveEvent(wall_time(), Point(320, 240)))); // TODO(sfiera): add recurring snapshots to OffscreenVideoDriver. for (int64_t i = 1; i < 72000; i += interval) { scheduler.schedule_snapshot(i); } unique_ptr<SoundDriver> sound; if (!smoke && output_dir.has()) { String out(format("{0}/sound.log", *output_dir)); sound.reset(new LogSoundDriver(out)); } else { sound.reset(new NullSoundDriver); } NullLedger ledger; MappedFile replay_file(replay_path); if (smoke) { TextVideoDriver video({width, height}, Optional<String>()); video.loop(new ReplayMaster(replay_file.data(), output_dir), scheduler); } else if (text) { TextVideoDriver video({width, height}, output_dir); video.loop(new ReplayMaster(replay_file.data(), output_dir), scheduler); } else { OffscreenVideoDriver video({width, height}, output_dir); video.loop(new ReplayMaster(replay_file.data(), output_dir), scheduler); } }
/* The benchmarking program */ int main (int argc, char **argv) { printf ("Description:\t%s\n\n", dgemm_desc); /* Test sizes should highlight performance dips at multiples of certain powers-of-two */ int test_sizes[] = /* Multiples-of-32, +/- 1. Currently commented. */ {31,32,33,63,64,65,95,96,97,127,128,129,159,160,161,191,192,193,223,224,225,255,256,257,287,288,289,319,320,321,351,352,353,383,384,385,415,416,417,447,448,449,479,480,481,511,512,513,543,544,545,575,576,577,607,608,609,639,640,641,671,672,673,703,704,705,735,736,737,767,768,769,799,800,801,831,832,833,863,864,865,895,896,897,927,928,929,959,960,961,991,992,993,1023,1024,1025}; /* A representative subset of the first list. Currently uncommented. */ //{ 31, 32, 96, 97, 127, 128, 129, 191, 192, 229, 255, 256, 257, // 319, 320, 321, 417, 479, 480, 511, 512, 639, 640, 767, 768, 769 }; int nsizes = sizeof(test_sizes)/sizeof(test_sizes[0]); /* assume last size is also the largest size */ int nmax = test_sizes[nsizes-1]; /* allocate memory for all problems */ double* buf = NULL; buf = (double*) malloc (3 * nmax * nmax * sizeof(double)); if (buf == NULL) die ("failed to allocate largest problem size"); double Mflops_s[nsizes],per[nsizes],aveper; /* For each test size */ for (int isize = 0; isize < sizeof(test_sizes)/sizeof(test_sizes[0]); ++isize) { for( int block_size = 3;block_size<200;block_size++) { /* Create and fill 3 random matrices A,B,C*/ int n = test_sizes[isize]; double* A = buf + 0; double* B = A + nmax*nmax; double* C = B + nmax*nmax; fill (A, n*n); fill (B, n*n); fill (C, n*n); /* Measure performance (in Gflops/s). */ /* Time a "sufficiently long" sequence of calls to reduce noise */ double Gflops_s, seconds = -1.0; double timeout = 0.1; // "sufficiently long" := at least 1/10 second. for (int n_iterations = 1; seconds < timeout; n_iterations *= 2) { /* Warm-up */ square_dgemm (block_size,n, A, B, C); /* Benchmark n_iterations runs of square_dgemm */ seconds = -wall_time(); for (int it = 0; it < n_iterations; ++it) square_dgemm (block_size,n, A, B, C); seconds += wall_time(); /* compute Gflop/s rate */ Gflops_s = 2.e-9 * n_iterations * n * n * n / seconds; } /* Storing Mflop rate and calculating percentage of peak */ Mflops_s[isize] = Gflops_s*1000; per[isize] = Gflops_s*100/MAX_SPEED; printf ("Size: %d\t Block Size: %d\t Mflop/s: %8g\tPercentage:%6.2lf\n", n, block_size,Mflops_s[isize],per[isize]); /* Ensure that error does not exceed the theoretical error bound. */ /* C := A * B, computed with square_dgemm */ memset (C, 0, n * n * sizeof(double)); square_dgemm (block_size,n, A, B, C); /* Do not explicitly check that A and B were unmodified on square_dgemm exit * - if they were, the following will most likely detect it: * C := C - A * B, computed with reference_dgemm */ reference_dgemm(n, -1., A, B, C); /* A := |A|, B := |B|, C := |C| */ absolute_value (A, n * n); absolute_value (B, n * n); absolute_value (C, n * n); /* C := |C| - 3 * e_mach * n * |A| * |B|, computed with reference_dgemm */ reference_dgemm (n, -3.*DBL_EPSILON*n, A, B, C); /* If any element in C is positive, then something went wrong in square_dgemm */ for (int i = 0; i < n * n; ++i) if (C[i] > 0) die("*** FAILURE *** Error in matrix multiply exceeds componentwise error bounds.\n" ); } } free (buf); return 0; }
/*--------------------------------------------------------------------------------------------- * (function: simulateNextWave) *-------------------------------------------------------------------------------------------*/ int OdinInterface::simulateNextWave() { if(!num_vectors){ fprintf(stderr, "No vectors to simulate.\n"); } else { double total_time = 0; double simulation_time = 0; num_cycles = num_vectors*2; num_waves = 1; tvector = 0; double wave_start_time = wall_time(); //create a new wave wave++; int cycle_offset = SIM_WAVE_LENGTH * wave; int wave_length = SIM_WAVE_LENGTH; // Assign vectors to lines, either by reading or generating them. // Every second cycle gets a new vector. for (cycle = cycle_offset; cycle < cycle_offset + wave_length; cycle++) { if (is_even_cycle(cycle)) { if (input_vector_file) { char buffer[BUFFER_MAX_SIZE]; if (!get_next_vector(in, buffer)) error_message(SIMULATION_ERROR, 0, -1, (char*)"Could not read next vector."); tvector = parse_test_vector(buffer); } else { tvector = generate_random_test_vector(input_lines, cycle, hold_high_index, hold_low_index); } } add_test_vector_to_lines(tvector, input_lines, cycle); if (!is_even_cycle(cycle)) free_test_vector(tvector); } // Record the input vectors we are using. write_wave_to_file(input_lines, in_out, cycle_offset, wave_length, 1); // Write ModelSim script. write_wave_to_modelsim_file(netlist, input_lines, modelsim_out, cycle_offset, wave_length); double simulation_start_time = wall_time(); // Perform simulation for (cycle = cycle_offset; cycle < cycle_offset + wave_length; cycle++) { if (cycle) { simulate_cycle(cycle, stgs); } else { // The first cycle produces the stages, and adds additional // lines as specified by the -p option. pin_names *p = parse_pin_name_list(global_args.sim_additional_pins); stgs = simulate_first_cycle(netlist, cycle, p, output_lines); free_pin_name_list(p); // Make sure the output lines are still OK after adding custom lines. if (!verify_lines(output_lines)) error_message(SIMULATION_ERROR, 0, -1, (char*)"Problem detected with the output lines after the first cycle."); } } simulation_time += wall_time() - simulation_start_time; // Write the result of this wave to the output vector file. write_wave_to_file(output_lines, out, cycle_offset, wave_length, output_edge); total_time += wall_time() - wave_start_time; // Print netlist-specific statistics. if (!cycle_offset) { print_netlist_stats(stgs, num_vectors); fflush(stdout); } // Print statistics. print_simulation_stats(stgs, num_vectors, total_time, simulation_time); myCycle = cycle; } return myCycle; }
ulong get_current_time() { return (ulong)wall_time(); }