Example #1
0
void print_current_time(FILE *out, char *prefix, char *suffix)
{
#ifdef CHORD_PRINT_LONG_TIME
	fprintf(out, "%s%"PRIu64"%s", prefix, wall_time(), suffix);
#else
	fprintf(out, "%s%"PRIu64"%s", prefix, (wall_time() << 32) >> 32, suffix);
#endif
}
Example #2
0
int main (int argc, char** argv)
{ /* main */
    Particle* particle_array = (Particle*)NULL;
    Particle* particle_array2 = (Particle*)NULL;
    int       timestep;
    int       i;
	
   FILE *input_data = fopen(argv[1], "r");
    Particle_input_arguments(input_data);

    particle_array = Particle_array_construct(number_of_particles);
    particle_array2 = Particle_array_construct(number_of_particles);

    Particle_array_initialize(particle_array, number_of_particles);
//    for (i = 0; i < number_of_particles; i++) {
//       particle_array2[i].mass = particle_array[i].mass;
//    }

	FILE * fileptr = fopen("nbody_out.xyz", "w");
	Particle_array_output_xyz(fileptr, particle_array, number_of_particles);

    if (number_of_particles <= 1) return 0;

    double start = wall_time();

    for (timestep = 1; timestep <= number_of_timesteps; timestep++) {

        if ((timestep % timesteps_between_outputs) == 0 ) fprintf(stderr, "Starting timestep #%d.\n", timestep);

        Particle_array_calculate_forces_cuda(particle_array, particle_array2, number_of_particles, time_interval );

       /* swap arrays */
       Particle * tmp = particle_array;
       particle_array = particle_array2;
       particle_array2 = tmp;

    } /* for timestep */

    #pragma omp taskwait
    double end = wall_time ();

      printf("Time in seconds: %g s.\n", end - start);
      printf("Particles per second: %g \n", (number_of_particles*number_of_timesteps)/(end-start));
      
    if ((number_of_timesteps % timesteps_between_outputs) != 0) {
		Particle_array_output_xyz(fileptr, particle_array, number_of_particles);
	}

    particle_array = Particle_array_destruct(particle_array, number_of_particles);
    
    if (fclose(fileptr) != 0) {
		fprintf(stderr, "ERROR: can't close the output file.\n");
        exit(program_failure_code);
	}

    return program_success_code;

} /* main */
Example #3
0
bool LoadingScreen::next_timer(wall_time& time) {
    switch (_state) {
        case TYPING:
        case DONE: time    = _next_update; return true;
        case LOADING: time = wall_time(); return true;
    }
    return false;
}
Example #4
0
void DebriefingScreen::become_front() {
    _typed_chars = 0;
    if (_state == TYPING) {
        _next_update = now() + kTypingDelay;
    } else {
        _next_update = wall_time();
    }
}
Example #5
0
void fields::finished_working() {
  double now = wall_time();
  if (last_wall_time >= 0)
    times_spent[working_on] += now - last_wall_time;
  last_wall_time = now;
  working_on = was_working_on[0];
  for (int i = 0; i+1 < MEEP_TIMING_STACK_SZ; ++i)
    was_working_on[i] = was_working_on[i+1];
  was_working_on[MEEP_TIMING_STACK_SZ-1] = Other;
}
Example #6
0
void fields::am_now_working_on(time_sink s) {
  double now = wall_time();
  if (last_wall_time >= 0)
    times_spent[working_on] += now - last_wall_time;
  last_wall_time = now;
  for (int i = 0; i+1 < MEEP_TIMING_STACK_SZ; ++i)
    was_working_on[i+1] = was_working_on[i];
  was_working_on[0] = working_on;
  working_on = s;
}
Example #7
0
double
wall_dtime(double &t)
{
  const double tnew = wall_time();

  const double dt = tnew - t;

  t = tnew ;

  return dt ;
}
Example #8
0
EXPORT void startClock(const char* name) 
{
        struct clock *cp = clocks;

        while (cp != NULL) {
                if (strcmp(cp->name,name) == 0) {
                        cp->startTime = wall_time();
                        return;
                }
                cp = cp->next;
        }
        cp = (struct clock*)malloc(sizeof(struct clock));
        cp->name = (char*) malloc(strlen(name)+1);
        strcpy(cp->name,name);
        cp->totalTime = 0;
        cp->startTime = wall_time();
        cp->next = clocks;
        clocks = cp;
        return;
}
Example #9
0
EXPORT void stopClock(const char* name) 
{
        struct clock *cp = clocks;
        while (cp && strcmp(cp->name,name)) {
                cp = cp->next;
        }
        if (cp && cp->startTime) {
                cp->totalTime = (wall_time() - cp->startTime);
                printf("%-20s %ld micros\n",cp->name,cp->totalTime);
                cp->startTime = 0;
        }
}
Example #10
0
int main(int argc, char *argv[])
{
    int i, j, n, r, step;
    double s, time, ttime, *a, *b;

    n = atoi(argv[1]);
    r = atoi(argv[2]);
    step = atoi(argv[3]);
    a = (double *) malloc(step * n * sizeof(double));
    b = (double *) malloc(step * n * sizeof(double));

    time = wall_time();
    time = wall_time();
    for (i = 0; i < r; i++)
        empty();
    ttime = wall_time() - time;

    for (i = 0; i < n; i++) {
        a[i] = i;
        b[i] = 1.0 / (i + 1);
    }

    time = wall_time();
    for (j = 0; j < r; j++) {
        s = skalar(a, b, n, step);
    }
    time = wall_time() - time;

    printf("Skalarprodukt  : %f\n", s);
    printf("Laufzeit       : %f s\n", time);
    printf("Overhead       : %g s\n", ttime);
    printf("Zeit pro Wdhlg : %g s\n", time / r);
    printf("Overhead       : %g s\n", ttime / r);
//     printf("Rechenleistung : %6.1f MFlop/s\n", 2.0 * n * r * 1e-6 / (time - ttime));
    printf("Rechenleistung : %6.1f MFlop/s\n", 2.0 * (n/step+1) * r * 1e-6 / (time - ttime));

    free(a);
    free(b);
    return 0;
}
Example #11
0
/***********************************************************************
 *  cl_ctx_set_timer - set timer
 *
 *  input:
 *    ctx  - context
 *    tv   - timeout after which the timer expires
 *    fun  - function to be invoked when the timer expires
 *    data - application data passed back to the application when the
 *           callback is invoked
 *
 ************************************************************************/
cl_timer *cl_ctx_set_timer(cl_context *ctx, struct timeval *tv, 
			   void (*fun)(), void *data)
{
  uint64_t when;
  Event *ev;

  if (ctx == NULL)
    return NULL;

  when = wall_time();
  when = when + UMILLION*tv->tv_sec + tv->tv_usec;
  ev = newEvent(fun, data, when);

  insertEvent(&ctx->timer_heap, ev);

  return (cl_timer *)ev;
}
Example #12
0
void DebriefingScreen::fire_timer() {
    if (_state != TYPING) {
        throw Exception(format("DebriefingScreen::fire_timer() called but _state is {0}", _state));
    }
    sys.sound.teletype();
    wall_time now = antares::now();
    while (_next_update <= now) {
        if (_typed_chars < _score->size()) {
            _next_update += kTypingDelay;
            ++_typed_chars;
        } else {
            _next_update = wall_time();
            _state       = DONE;
            break;
        }
    }
}
Example #13
0
void ObjectDataScreen::fire_timer() {
    wall_time now = antares::now();
    if (_next_sound <= now) {
        sys.sound.teletype();
        _next_sound += 3 * kTypingDelay;
        while (_next_sound <= now) {
            _next_sound += kTypingDelay;
        }
    }
    while (_next_update <= now) {
        if (_typed_chars < _text->size()) {
            _next_update += kTypingDelay;
            ++_typed_chars;
        } else {
            _next_update = wall_time();
            _state       = DONE;
            break;
        }
    }
}
Example #14
0
unsigned int __stdcall ping_thread_entry(void *data)
#endif
{
    PingThreadData *pdata = (PingThreadData *)data;
    
    int maxfd, ret;
    fd_set all_rset, rset;
    struct timeval to;

    I3ServerList *list = pdata->list;
    char *url = pdata->url;
    uint64_t *ping_start_time = pdata->ping_start_time;
       
    int num_pings;
    I3ServerListNode *next_to_ping;
    uint64_t last_ping_time, curr_time;
    uint64_t last_add_new_i3servers, last_update_serverlist;
 
    FD_ZERO(&all_rset);
    FD_ZERO(&rset);

    /* socket init */
#ifdef ICMP_PING
    if (init_icmp_socket(&ping_sock) == -1)
	abort();
#else
    if (init_udp_socket(&ping_sock) == -1)
	abort();
#endif
    FD_SET(ping_sock, &all_rset);
    maxfd = ping_sock + 1;
    
    /* initial populate the list of i3 servers */
    update_i3_server_list(url, list, &next_to_ping);

    /* determine coordinates */
    init_coordinates(list);

    /* add some close-by servers from the list based on coordinates */
    change_ping_list(list, &next_to_ping, 1);
       
    /* eternal loop */
    last_ping_time = last_add_new_i3servers = last_update_serverlist = wall_time();
    set_status(ping_start_time, last_ping_time);
    for (;;) {
		rset = all_rset;
        to.tv_sec = 0; to.tv_usec = 10000;
        if ((ret = select(maxfd, &rset, NULL, NULL, &to)) < 0) {
            if (errno == EINTR)
                continue;
            else {
                perror("select");
                abort();
            }
        }

		/* message received on icmp socket */
		if (FD_ISSET(ping_sock, &rset)) {
			uint32_t addr; uint16_t port, seq; uint64_t rtt;
#ifdef ICMP_PING
			if (recv_echo_reply(ping_sock, &addr, &seq, &rtt)) {
#else
			if (recv_i3_echo_reply(ping_sock, &addr, &port, &seq, &rtt)) {
#endif
				update_ping_information(list, addr, seq, rtt);
			}
		}

		/* need to ping */
		curr_time = wall_time();
		if (list->num_ping_list > 0) {
			char status = get_status(ping_start_time, curr_time);
			num_pings = (curr_time - last_ping_time)/
				(period_ping[status]/list->num_ping_list);
			if (num_pings > 0) {
				if (NULL == next_to_ping) {
					I3_PRINT_DEBUG0(I3_DEBUG_LEVEL_MINIMAL, 
							"No servers to ping. Aborting\n");
				}
				send_npings(ping_sock, list, &next_to_ping, num_pings);
				last_ping_time = curr_time;
			}
		}
	
		/* change the list of i3 servers */
		if (curr_time - last_add_new_i3servers >
					period_pick_new_server[get_status(ping_start_time, curr_time)]) {
			/* testing just the best server */
			uint32_t best_addr; uint16_t best_port; uint64_t best_rtt;
			struct in_addr ia;
			int required_k = 1;
			int ret = get_top_k(list, required_k, &best_addr, &best_port, &best_rtt);
			
			if (ret != required_k) {
				// We couldn't find the request k top nodes.

				I3_PRINT_INFO0 (
						I3_INFO_LEVEL_WARNING,
						"I3 Ping Thread: Unable to obtain top k nodes.\n"
						);
				// Dilip: Feb 20, 2006.  I don't think the following works.
				// TODO: Start
				// We set the last_add_new_servers to fool the thread
				// to wait for some time before trying again to get
				// the top k nodes.
				//last_add_new_i3servers = curr_time;
				// TODO: End

				// Sleep for some time before trying again.
#				if defined (_WIN32)
					Sleep ( 25 ); // 25 milliseconds
#				else
					usleep(25 * 1000); // 25 milliseconds
#				endif				
				continue;
			}

			ia.s_addr = htonl(best_addr);
			I3_PRINT_DEBUG3(I3_INFO_LEVEL_MINIMAL,
					"Best node: %s:%d with RTT %Ld\n", 
					inet_ntoa(ia), best_port, best_rtt
					);
	    
			I3_PRINT_DEBUG0(I3_DEBUG_LEVEL_VERBOSE, "Adding new servers to list\n");
			change_ping_list(list, &next_to_ping, 0);
			last_add_new_i3servers = curr_time;
		}
	
		/* update (wget) i3 server list */
		if (curr_time - last_update_serverlist > PERIOD_SERVERLIST_WGET) {
			I3_PRINT_DEBUG0(	I3_DEBUG_LEVEL_VERBOSE, 
								"Updating server list from server\n");
			update_i3_server_list(url, list, &next_to_ping);
			last_update_serverlist = curr_time;
		}
    }

#ifndef _WIN32
    pthread_exit(0);
#endif
    return 0;
}
Example #15
0
/* BiCGSTAB(L) algorithm for the n-by-n problem Ax = b */
ptrdiff_t bicgstabL(const int L, const size_t n, realnum *x, bicgstab_op A, void *Adata,
                    const realnum *b, const double tol, int *iters, realnum *work,
                    const bool quiet) {
  if (!work) return (2 * L + 3) * n; // required workspace

  prealnum *r = new prealnum[L + 1];
  prealnum *u = new prealnum[L + 1];
  for (int i = 0; i <= L; ++i) {
    r[i] = work + i * n;
    u[i] = work + (L + 1 + i) * n;
  }

  double bnrm = norm2(n, b);
  if (bnrm == 0.0) bnrm = 1.0;

  int iter = 0;
  double last_output_wall_time = wall_time();

  double *gamma = new double[L + 1];
  double *gamma_p = new double[L + 1];
  double *gamma_pp = new double[L + 1];

  double *tau = new double[L * L];
  double *sigma = new double[L + 1];

  int ierr = 0; // error code to return, if any
  const double breaktol = 1e-30;

  /**** FIXME: check for breakdown conditions(?) during iteration  ****/

  // rtilde = r[0] = b - Ax
  realnum *rtilde = work + (2 * L + 2) * n;
  A(x, r[0], Adata);
  for (size_t m = 0; m < n; ++m)
    rtilde[m] = r[0][m] = b[m] - r[0][m];

  { /* Sleipjen normalizes rtilde in his code; it seems to help slightly */
    double s = 1.0 / norm2(n, rtilde);
    for (size_t m = 0; m < n; ++m)
      rtilde[m] *= s;
  }

  memset(u[0], 0, sizeof(realnum) * n); // u[0] = 0

  double rho = 1.0, alpha = 0, omega = 1;

  double resid;
  while ((resid = norm2(n, r[0])) > tol * bnrm) {
    ++iter;
    if (!quiet && wall_time() > last_output_wall_time + MEEP_MIN_OUTPUT_TIME) {
      master_printf("residual[%d] = %g\n", iter, resid / bnrm);
      last_output_wall_time = wall_time();
    }

    rho = -omega * rho;
    for (int j = 0; j < L; ++j) {
      if (fabs(rho) < breaktol) {
        ierr = -1;
        goto finish;
      }
      double rho1 = dot(n, r[j], rtilde);
      double beta = alpha * rho1 / rho;
      rho = rho1;
      for (int i = 0; i <= j; ++i)
        for (size_t m = 0; m < n; ++m)
          u[i][m] = r[i][m] - beta * u[i][m];
      A(u[j], u[j + 1], Adata);
      alpha = rho / dot(n, u[j + 1], rtilde);
      for (int i = 0; i <= j; ++i)
        xpay(n, r[i], -alpha, u[i + 1]);
      A(r[j], r[j + 1], Adata);
      xpay(n, x, alpha, u[0]);
    }

    for (int j = 1; j <= L; ++j) {
      for (int i = 1; i < j; ++i) {
        int ij = (j - 1) * L + (i - 1);
        tau[ij] = dot(n, r[j], r[i]) / sigma[i];
        xpay(n, r[j], -tau[ij], r[i]);
      }
      sigma[j] = dot(n, r[j], r[j]);
      gamma_p[j] = dot(n, r[0], r[j]) / sigma[j];
    }

    omega = gamma[L] = gamma_p[L];
    for (int j = L - 1; j >= 1; --j) {
      gamma[j] = gamma_p[j];
      for (int i = j + 1; i <= L; ++i)
        gamma[j] -= tau[(i - 1) * L + (j - 1)] * gamma[i];
    }
    for (int j = 1; j < L; ++j) {
      gamma_pp[j] = gamma[j + 1];
      for (int i = j + 1; i < L; ++i)
        gamma_pp[j] += tau[(i - 1) * L + (j - 1)] * gamma[i + 1];
    }

    xpay(n, x, gamma[1], r[0]);
    xpay(n, r[0], -gamma_p[L], r[L]);
    xpay(n, u[0], -gamma[L], u[L]);
    for (int j = 1; j < L; ++j) { /* TODO: use blas DGEMV (for L > 2) */
      xpay(n, x, gamma_pp[j], r[j]);
      xpay(n, r[0], -gamma_p[j], r[j]);
      xpay(n, u[0], -gamma[j], u[j]);
    }

    if (iter == *iters) {
      ierr = 1;
      break;
    }
  }

  if (!quiet) master_printf("final residual = %g\n", norm2(n, r[0]) / bnrm);

finish:
  delete[] sigma;
  delete[] tau;
  delete[] gamma_pp;
  delete[] gamma_p;
  delete[] gamma;
  delete[] u;
  delete[] r;

  *iters = iter;
  return ierr;
}
Example #16
0
/*---------------------------------------------------------------------------
 * (function: do_high_level_synthesis)
 *-------------------------------------------------------------------------*/
void do_high_level_synthesis()
{
	double elaboration_time = wall_time();

	printf("--------------------------------------------------------------------\n");
	printf("High-level synthesis Begin\n");
	/* Perform any initialization routines here */
	#ifdef VPR6
	find_hard_multipliers();
	find_hard_adders();
	//find_hard_adders_for_sub();
	register_hard_blocks();
	#endif
	global_param_table_sc = sc_new_string_cache();

	/* parse to abstract syntax tree */
	printf("Parser starting - we'll create an abstract syntax tree.  "
			"Note this tree can be viewed using GraphViz (see documentation)\n");
	parse_to_ast();
	/* Note that the entry point for ast optimzations is done per module with the
	 * function void next_parsed_verilog_file(ast_node_t *file_items_list) */

	/* after the ast is made potentiatlly do tagging for downstream links to verilog */
	if (global_args.high_level_block != NULL)
	{
		add_tag_data();
	}

	/* Now that we have a parse tree (abstract syntax tree [ast]) of
	 * the Verilog we want to make into a netlist. */
	printf("Converting AST into a Netlist. "
			"Note this netlist can be viewed using GraphViz (see documentation)\n");
	create_netlist();

	// Can't levelize yet since the large muxes can look like combinational loops when they're not
	check_netlist(verilog_netlist);

	/* point for all netlist optimizations. */
	printf("Performing Optimizations of the Netlist\n");
	netlist_optimizations_top(verilog_netlist);

	if (configuration.output_netlist_graphs )
	{
		/* Path is where we are */
		graphVizOutputNetlist(configuration.debug_output_path, "optimized", 1, verilog_netlist);
	}

	/* point where we convert netlist to FPGA or other hardware target compatible format */
	printf("Performing Partial Map to target device\n");
	partial_map_top(verilog_netlist);

	#ifdef VPR5
	/* check for problems in the partial mapped netlist */
	printf("Check for liveness and combinational loops\n");
	levelize_and_check_for_combinational_loop_and_liveness(TRUE, verilog_netlist);
	#endif

	/* point for outputs.  This includes soft and hard mapping all structures to the
	 * target format.  Some of these could be considred optimizations */
	printf("Outputting the netlist to the specified output format\n");
	output_top(verilog_netlist);

	elaboration_time = wall_time() - elaboration_time;

	printf("Successful High-level synthesis by Odin in ");
	print_time(elaboration_time);
	printf("\n");
	printf("--------------------------------------------------------------------\n");

	// FIXME: free contents?
	sc_free_string_cache(global_param_table_sc);
}
Example #17
0
int main(int argc, char **argv)
{

	void
	(*hook_print_SE)(const qnu*, const Lagr*, const nmpc&) = NULL;
	void
	(*hook_print_LG)(const qnu*, const Lagr*, const nmpc&, const float*) = NULL;
	void
	(*hook_print_SD)(const unsigned int*, const double*) = NULL;
	void
	(*hook_print_TR)(const unsigned int*, const double*) = NULL;

	double
	(*hook_exec_control_horiz)(qnu*, const nmpc&, robot*) = NULL;

	char errnote[256];
	unsigned int sd_loop = 0, k = 0, current_tgt_no = 0;
	float tgtdist, grad_dot_grad = 0.;
	robot vme;

	double sd_loop_time, time_last_cmd_sent = 0, now;

	nmpc C;
	cl_opts clopts =
	{ false };
	parse_command_line(argc, argv, &vme, &clopts);
	parse_input_file(C, vme.conffile());
	print_greeting(C);

	qnu* qu = (qnu*) calloc(C.N, sizeof(qnu));
	Lagr* p = (Lagr*) calloc(C.N, sizeof(Lagr));
	//cmd* cmd_horiz = (cmd*) calloc(C.C, sizeof(cmd));
	float* grad = (float*) calloc(C.N + 1, sizeof(float));
	float* last_grad = (float*) calloc(C.N + 1, sizeof(float));
	double* time_to_tgt = (double*) calloc(C.ntgt, sizeof(float));

	C.cur_tgt = C.tgt;
	C.control_step = 0;

	init_qu_and_p(qu, p, C);

	tgtdist = C.tgttol + 1; // Just to get us into the waypoint loop.
	C.horizon_loop = 0;

	/*
	 * This next block of decisions sort out the hooks used to print output.
	 * Rather than include the ifs in each loop, I'm using function pointers
	 * which are set at runtime based on CL flags for verbosity.
	 */
	if (clopts.selec_verbose)
	{
		hook_print_SE = &print_pathnerr;
		hook_print_LG = &print_LG;
		hook_print_SD = &print_SD;
		hook_print_TR = &print_TR;
	}
	else if (clopts.selec_verbose)
	{
		hook_print_SE = &empty_output_hook;
		hook_print_LG = &empty_output_hook;
		hook_print_SD = &empty_output_hook;
		hook_print_TR = &empty_output_hook;
	}
	else
	{
		if (clopts.selec_state_and_error_SE)
			hook_print_SE = &print_pathnerr;
		else
			hook_print_SE = &empty_output_hook;

		if (clopts.selec_lagrange_grad_LG)
			hook_print_LG = &print_LG;
		else
			hook_print_LG = &empty_output_hook;

		if (clopts.selec_SD_converged_SD)
			hook_print_SD = &print_SD;
		else
			hook_print_SD = &empty_output_hook;

		if (clopts.selec_target_reached_TR)
			hook_print_TR = &print_TR;
		else
			hook_print_TR = &empty_output_hook;
	}
	if (clopts.selec_sim)
		hook_exec_control_horiz = &exec_control_horiz_dummy;
	else
		hook_exec_control_horiz = &exec_control_horiz_vme;

	if (!clopts.selec_sim)
	{
		vme.tcp_connect();
		vme.update_poshead(qu, C);
	}

	/*
	 * Enter the loop which will take us through all waypoints.
	 */
	while (current_tgt_no < C.ntgt)
	{
		time_to_tgt[current_tgt_no] = -wall_time();
		C.cur_tgt = &C.tgt[current_tgt_no * 2];
		tgtdist = C.tgttol + 1;
		while (tgtdist > .1)
		{
			C.horizon_loop += 1;
			sd_loop = 0;
			sd_loop_time = -wall_time();
			/*
			 * SD loop.
			 */
			while (1)
			{
				sd_loop += 1;
				grad_dot_grad = 0.;
				/*
				 * The core of the gradient decent is in the next few lines:
				 */
				tgtdist = predict_horizon(qu, p, C);
				get_gradient(qu, p, C, grad);
				for (k = 0; k < C.N; k++)
				{
					grad_dot_grad += grad[k] * last_grad[k];
				}
				/*
				 * Detect direction changes in the gradient by inspecting the
				 * product <grad, last_grad>. If it is positive, then the
				 * iterations are successfully stepping to the minimum, and we
				 * can accelerate by increasing dg. If we overshoot (and the
				 * product becomes negative), then backstep and drop dg to a
				 * safe value.
				 */
				if (grad_dot_grad > 0)
				{
					C.dg *= 2;
					for (k = 0; k < C.N; ++k)
					{
						qu[k].Dth -= C.dg * grad[k];
					}
				}
				else
				{
					C.dg = 0.1; // TODO: Adaptive.
					for (k = 0; k < C.N; ++k)
					{
						qu[k].Dth += C.dg * grad[k];
					}
				}
				swap_fptr(&grad, &last_grad);
				if (last_grad[C.N] < .1)
					break;
				if (sd_loop >= MAX_SD_ITER)
				{
					sprintf(errnote, "Reached %d SD iterations. Stopping.",
							sd_loop);
					report_error(EXCEEDED_MAX_SD_ITER, errnote);
				}
			}
			hook_print_SE(qu, p, C);
			hook_print_LG(qu, p, C, grad);
			sd_loop_time += wall_time();
			hook_print_SD(&sd_loop, &sd_loop_time);
			C.control_step += C.C;

			hook_exec_control_horiz(qu, C, &vme);

			for (k = 0; k < C.N - C.C - 1; ++k)
			{
//              cmd_horiz[k].v = qu[k].v;
//              cmd_horiz[k].Dth = qu[k].Dth;
				qu[k].v = qu[k + C.C].v;
				qu[k].Dth = qu[k + C.C].Dth;
			}

			if (C.control_step > MAX_NMPC_ITER * C.C)
			{
				sprintf(errnote,
						"Reached %d NMPC steps without reaching tgt. Stopping.",
						MAX_NMPC_ITER);
				report_error(TRAPPED_IN_NMPC_LOOP, errnote);
			}
			/*
			 * The last thing we do is get the new position and heading for
			 * the next SD calculation.
			 */
//			vme.update_poshead(qu);
		}
		time_to_tgt[current_tgt_no] += wall_time();
		hook_print_TR(&current_tgt_no, &time_to_tgt[current_tgt_no]);
		++current_tgt_no;
	}

	return 0;
}
Example #18
0
void LBFGSSolver::solve(const Function& function,
                        SolverResults* results) const
{
	double global_start_time = wall_time();

	// Dimension of problem.
	size_t n = function.get_number_of_scalars();

	if (n == 0) {
		results->exit_condition = SolverResults::FUNCTION_TOLERANCE;
		return;
	}

	// Current point, gradient and Hessian.
	double fval   = std::numeric_limits<double>::quiet_NaN();
	double fprev  = std::numeric_limits<double>::quiet_NaN();
	double normg0 = std::numeric_limits<double>::quiet_NaN();
	double normg  = std::numeric_limits<double>::quiet_NaN();
	double normdx = std::numeric_limits<double>::quiet_NaN();

	Eigen::VectorXd x, g;

	// Copy the user state to the current point.
	function.copy_user_to_global(&x);
	Eigen::VectorXd x2(n);

	// L-BFGS history.
	std::vector<Eigen::VectorXd>  s_data(this->lbfgs_history_size),
	                              y_data(this->lbfgs_history_size);
	std::vector<Eigen::VectorXd*> s(this->lbfgs_history_size),
	                              y(this->lbfgs_history_size);
	for (int h = 0; h < this->lbfgs_history_size; ++h) {
		s_data[h].resize(function.get_number_of_scalars());
		s_data[h].setZero();
		y_data[h].resize(function.get_number_of_scalars());
		y_data[h].setZero();
		s[h] = &s_data[h];
		y[h] = &y_data[h];
	}

	Eigen::VectorXd rho(this->lbfgs_history_size);
	rho.setZero();

	Eigen::VectorXd alpha(this->lbfgs_history_size);
	alpha.setZero();
	Eigen::VectorXd q(n);
	Eigen::VectorXd r(n);

	// Needed from the previous iteration.
	Eigen::VectorXd x_prev(n), s_tmp(n), y_tmp(n);

	CheckExitConditionsCache exit_condition_cache;

	//
	// START MAIN ITERATION
	//
	results->startup_time   += wall_time() - global_start_time;
	results->exit_condition = SolverResults::INTERNAL_ERROR;
	int iter = 0;
	bool last_iteration_successful = true;
	int number_of_line_search_failures = 0;
	int number_of_restarts = 0;
	while (true) {

		//
		// Evaluate function and derivatives.
		//
		double start_time = wall_time();
		// y[0] should contain the difference between the gradient
		// in this iteration and the gradient from the previous.
		// Therefore, update y before and after evaluating the
		// function.
		if (iter > 0) {
			y_tmp = -g;
		}
		fval = function.evaluate(x, &g);

		normg = std::max(g.maxCoeff(), -g.minCoeff());
		if (iter == 0) {
			normg0 = normg;
		}
		results->function_evaluation_time += wall_time() - start_time;

		//
		// Update history
		//
		start_time = wall_time();

		if (iter > 0 && last_iteration_successful) {
			s_tmp = x - x_prev;
			y_tmp += g;

			double sTy = s_tmp.dot(y_tmp);
			if (sTy > 1e-16) {
				// Shift all pointers one step back, discarding the oldest one.
				Eigen::VectorXd* sh = s[this->lbfgs_history_size - 1];
				Eigen::VectorXd* yh = y[this->lbfgs_history_size - 1];
				for (int h = this->lbfgs_history_size - 1; h >= 1; --h) {
					s[h]   = s[h - 1];
					y[h]   = y[h - 1];
					rho[h] = rho[h - 1];
				}
				// Reuse the storage of the discarded data for the new data.
				s[0] = sh;
				y[0] = yh;

				*y[0] = y_tmp;
				*s[0] = s_tmp;
				rho[0] = 1.0 / sTy;
			}
		}

		results->lbfgs_update_time += wall_time() - start_time;

		//
		// Test stopping criteriea
		//
		start_time = wall_time();
		if (iter > 1 && this->check_exit_conditions(fval, fprev, normg,
		                                            normg0, x.norm(), normdx,
		                                            last_iteration_successful, 
		                                            &exit_condition_cache, results)) {
			break;
		}
		if (iter >= this->maximum_iterations) {
			results->exit_condition = SolverResults::NO_CONVERGENCE;
			break;
		}

		if (this->callback_function) {
			CallbackInformation information;
			information.objective_value = fval;
			information.x = &x;
			information.g = &g;

			if (!callback_function(information)) {
				results->exit_condition = SolverResults::USER_ABORT;
				break;
			}
		}

		results->stopping_criteria_time += wall_time() - start_time;

		//
		// Compute search direction via L-BGFS two-loop recursion.
		//
		start_time = wall_time();
		bool should_restart = false;

		double H0 = 1.0;
		if (iter > 0) {
			// If the gradient is identical two iterations in a row,
			// y will be the zero vector and H0 will be NaN. In this
			// case the line search will fail and L-BFGS will be restarted
			// with a steepest descent step.
			H0 = s[0]->dot(*y[0]) / y[0]->dot(*y[0]);

			// If isinf(H0) || isnan(H0)
			if (H0 ==  std::numeric_limits<double>::infinity() ||
			    H0 == -std::numeric_limits<double>::infinity() ||
			    H0 != H0) {
				should_restart = true;
			}
		}

		q = -g;

		for (int h = 0; h < this->lbfgs_history_size; ++h) {
			alpha[h] = rho[h] * s[h]->dot(q);
			q = q - alpha[h] * (*y[h]);
		}

		r = H0 * q;

		for (int h = this->lbfgs_history_size - 1; h >= 0; --h) {
			double beta = rho[h] * y[h]->dot(r);
			r = r + (*s[h]) * (alpha[h] - beta);
		}

		// If the function improves very little, the approximated Hessian
		// might be very bad. If this is the case, it is better to discard
		// the history once in a while. This allows the solver to correctly
		// solve some badly scaled problems.
		double restart_test = std::fabs(fval - fprev) /
		                      (std::fabs(fval) + std::fabs(fprev));
		if (iter > 0 && iter % 100 == 0 && restart_test
		                                   < this->lbfgs_restart_tolerance) {
			should_restart = true;
		}
		if (! last_iteration_successful) {
			should_restart = true;
		}

		if (should_restart) {
			if (this->log_function) {
				char str[1024];
				if (number_of_restarts <= 10) {
					std::sprintf(str, "Restarting: fval = %.3e, deltaf = %.3e, max|g_i| = %.3e, test = %.3e",
								 fval, std::fabs(fval - fprev), normg, restart_test);
					this->log_function(str);
				}
				if (number_of_restarts == 10) {
					this->log_function("NOTE: No more restarts will be reported.");
				}
				number_of_restarts++;
			}
			r = -g;
			for (int h = 0; h < this->lbfgs_history_size; ++h) {
				(*s[h]).setZero();
				(*y[h]).setZero();
			}
			rho.setZero();
			alpha.setZero();
			// H0 is not used, but its value will be printed.
			H0 = std::numeric_limits<double>::quiet_NaN();
		}

		results->lbfgs_update_time += wall_time() - start_time;

		//
		// Perform line search.
		//
		start_time = wall_time();
		double start_alpha = 1.0;
		// In the first iteration, start with a much smaller step
		// length. (heuristic used by e.g. minFunc)
		if (iter == 0) {
			double sumabsg = 0.0;
			for (size_t i = 0; i < n; ++i) {
				sumabsg += std::fabs(g[i]);
			}
			start_alpha = std::min(1.0, 1.0 / sumabsg);
		}
		double alpha_step = this->perform_linesearch(function, x, fval, g,
		                                             r, &x2, start_alpha);

		if (alpha_step <= 0) {
			if (this->log_function) {
				this->log_function("Line search failed.");
				char str[1024];
				std::sprintf(str, "%4d %+.3e %9.3e %.3e %.3e %.3e %.3e",
					iter, fval, std::fabs(fval - fprev), normg, alpha_step, H0, rho[0]);
				this->log_function(str);
			}
			if (! last_iteration_successful || number_of_line_search_failures++ > 10) {
				// This happens quite seldom. Every time it has happened, the function
				// was actually converged to a solution.
				results->exit_condition = SolverResults::GRADIENT_TOLERANCE;
				break;
			}

			last_iteration_successful = false;
		}
		else {
			// Record length of this step.
			normdx = alpha_step * r.norm();
			// Compute new point.
			x_prev = x;
			x = x + alpha_step * r;

			last_iteration_successful = true;
		}

		results->backtracking_time += wall_time() - start_time;

		//
		// Log the results of this iteration.
		//
		start_time = wall_time();

		int log_interval = 1;
		if (iter > 30) {
			log_interval = 10;
		}
		if (iter > 200) {
			log_interval = 100;
		}
		if (iter > 2000) {
			log_interval = 1000;
		}
		if (this->log_function && iter % log_interval == 0) {
			if (iter == 0) {
				this->log_function("Itr       f       deltaf   max|g_i|   alpha      H0       rho");
			}

			this->log_function(
				to_string(
					std::setw(4), iter, " ",
					std::setw(10), std::setprecision(3), std::scientific, std::showpos, fval, std::noshowpos, " ",
					std::setw(9),  std::setprecision(3), std::scientific, std::fabs(fval - fprev), " ",
					std::setw(9),  std::setprecision(3), std::setprecision(3), std::scientific, normg, " ",
					std::setw(9),  std::setprecision(3), std::scientific, alpha_step, " ",
					std::setw(9),  std::setprecision(3), std::scientific, H0, " ",
					std::setw(9),  std::setprecision(3), std::scientific, rho[0]
				)
			);
		}
		results->log_time += wall_time() - start_time;

		fprev = fval;
		iter++;
	}

	function.copy_global_to_user(x);
	results->total_time += wall_time() - global_start_time;

	if (this->log_function) {
		char str[1024];
		std::sprintf(str, " end %+.3e           %.3e", fval, normg);
		this->log_function(str);
	}
}
Example #19
0
 wall_time now() const { return wall_time(_ticks); }
Example #20
0
void fields::step() {
  // however many times the fields have been synched, we want to restore now
  int save_synchronized_magnetic_fields = synchronized_magnetic_fields;
  if (synchronized_magnetic_fields) {
    synchronized_magnetic_fields = 1; // reset synchronization count
    restore_magnetic_fields();
  } 

  am_now_working_on(Stepping);

  if (!t) {
    last_step_output_wall_time = wall_time();
    last_step_output_t = t;
  }
  if (!quiet && wall_time() > last_step_output_wall_time + MIN_OUTPUT_TIME) {
    master_printf("on time step %d (time=%g), %g s/step\n", t, time(), 
		  (wall_time() - last_step_output_wall_time) / 
		  (t - last_step_output_t));
    if (save_synchronized_magnetic_fields)
      master_printf("  (doing expensive timestepping of synched fields)\n");
    last_step_output_wall_time = wall_time();
    last_step_output_t = t;
  }

  phase_material();
  
  // update cached conductivity-inverse array, if needed
  for (int i=0;i<num_chunks;i++) chunks[i]->s->update_condinv();

  calc_sources(time()); // for B sources
  step_db(B_stuff);
  step_source(B_stuff);
  step_boundaries(B_stuff);
  calc_sources(time() + 0.5*dt); // for integrated H sources
  update_eh(H_stuff);
  step_boundaries(WH_stuff);
  update_pols(H_stuff);
  step_boundaries(PH_stuff);
  step_boundaries(H_stuff);

  if (fluxes) fluxes->update_half();

  calc_sources(time() + 0.5*dt); // for D sources
  step_db(D_stuff);
  step_source(D_stuff);
  step_boundaries(D_stuff);
  calc_sources(time() + dt); // for integrated E sources
  update_eh(E_stuff);
  step_boundaries(WE_stuff);
  update_pols(E_stuff);
  step_boundaries(PE_stuff);
  step_boundaries(E_stuff);

  if (fluxes) fluxes->update();
  t += 1;
  update_dfts();
  finished_working();

  // re-synch magnetic fields if they were previously synchronized
  if (save_synchronized_magnetic_fields) {
    synchronize_magnetic_fields();
    synchronized_magnetic_fields = save_synchronized_magnetic_fields;
  }
}
Example #21
0
void NelderMeadSolver::solve(const Function& function,
                             SolverResults* results) const
{
	double global_start_time = wall_time();

	// Dimension of problem.
	size_t n = function.get_number_of_scalars();

	if (n == 0) {
		results->exit_condition = SolverResults::FUNCTION_TOLERANCE;
		return;
	}

	// The Nelder-Mead simplex.
	std::vector<SimplexPoint> simplex(n + 1);

	// Copy the user state to the current point.
	Eigen::VectorXd x;
	function.copy_user_to_global(&x);

	initialize_simplex(function, x, &simplex);

	SimplexPoint mean_point;
	SimplexPoint reflection_point;
	SimplexPoint expansion_point;
	mean_point.x.resize(n);
	reflection_point.x.resize(n);
	expansion_point.x.resize(n);

	double fmin  = std::numeric_limits<double>::quiet_NaN();
	double fmax  = std::numeric_limits<double>::quiet_NaN();
	double fval  = std::numeric_limits<double>::quiet_NaN();
	double area  = std::numeric_limits<double>::quiet_NaN();
	double area0 = std::numeric_limits<double>::quiet_NaN();
	double length  = std::numeric_limits<double>::quiet_NaN();
	double length0 = std::numeric_limits<double>::quiet_NaN();

	Eigen::MatrixXd area_mat(n, n);

	//
	// START MAIN ITERATION
	//
	results->startup_time   += wall_time() - global_start_time;
	results->exit_condition = SolverResults::INTERNAL_ERROR;
	int iter = 0;
	int n_shrink_in_a_row = 0;
	while (true) {

		//
		// In each iteration, the worst point in the simplex
		// is replaced with a new one.
		//
		double start_time = wall_time();

		mean_point.x.setZero();
		fval = 0;
		// Compute the mean of the best n points.
		for (size_t i = 0; i < n; ++i) {
			mean_point.x += simplex[i].x;
			fval         += simplex[i].value;
		}
		fval         /= double(n);
		mean_point.x /= double(n);
		fmin = simplex[0].value;
		fmax = simplex[n].value;

		const char* iteration_type = "n/a";

		// Compute the reflexion point and evaluate it.
		reflection_point.x = 2.0 * mean_point.x - simplex[n].x;
		reflection_point.value = function.evaluate(reflection_point.x);

		bool is_shrink = false;
		if (simplex[0].value <= reflection_point.value &&
			reflection_point.value < simplex[n - 1].value) {
			// Reflected point is neither better nor worst in the
			// new simplex.
			std::swap(reflection_point, simplex[n]);
			iteration_type = "Reflect 1";
		}
		else if (reflection_point.value < simplex[0].value) {
			// Reflected point is better than the current best; try
			// to go farther along this direction.

			// Compute expansion point.
			expansion_point.x = 3.0 * mean_point.x - 2.0 * simplex[n].x;
			expansion_point.value = function.evaluate(expansion_point.x);

			if (expansion_point.value < reflection_point.value) {
				std::swap(expansion_point, simplex[n]);
				iteration_type = "Expansion";
			}
			else {
				std::swap(reflection_point, simplex[n]);
				iteration_type = "Reflect 2";
			}
		}
		else {
			// Reflected point is still worse than x[n]; contract.
			bool success = false;

			if (simplex[n - 1].value <= reflection_point.value &&
			    reflection_point.value < simplex[n].value) {
				// Try to perform "outside" contraction.
				expansion_point.x = 1.5 * mean_point.x - 0.5 * simplex[n].x;
				expansion_point.value = function.evaluate(expansion_point.x);

				if (expansion_point.value <= reflection_point.value) {
					std::swap(expansion_point, simplex[n]);
					success = true;
					iteration_type = "Outside contraction";
				}
			}
			else {
				// Try to perform "inside" contraction.
				expansion_point.x = 0.5 * mean_point.x + 0.5 * simplex[n].x;
				expansion_point.value = function.evaluate(expansion_point.x);

				if (expansion_point.value < simplex[n].value) {
					std::swap(expansion_point, simplex[n]);
					success = true;
					iteration_type = "Inside contraction";
				}
			}

			if (! success) {
				// Neither outside nor inside contraction was acceptable;
				// shrink the simplex toward the best point.
				for (size_t i = 1; i < n + 1; ++i) {
					simplex[i].x = 0.5 * (simplex[0].x + simplex[i].x);
					simplex[i].value = function.evaluate(simplex[i].x);
					iteration_type = "Shrink";
					is_shrink = true;
				}
			}
		}

		std::sort(simplex.begin(), simplex.end());

		results->function_evaluation_time += wall_time() - start_time;

		//
		// Test stopping criteriea
		//
		start_time = wall_time();
		
		// Compute the area of the simplex.
		length = 0;
		for (size_t i = 0; i < n; ++i) {
			area_mat.col(i) = simplex[i].x - simplex[n].x;
			length = std::max(length, area_mat.col(i).norm());
		}
		area = std::abs(area_mat.determinant());
		if (iter == 0) {
			area0 = area;
			length0 = length;
		}

		if (area / area0 < this->area_tolerance) {
			results->exit_condition = SolverResults::GRADIENT_TOLERANCE;
			break;
		}

		if (area == 0) {
			results->exit_condition = SolverResults::GRADIENT_TOLERANCE;
			break;
		}

		if (length / length0 < this->length_tolerance) {
			results->exit_condition = SolverResults::GRADIENT_TOLERANCE;
			break;
		}

		if (is_shrink) {
			n_shrink_in_a_row++;
		}
		else {
			n_shrink_in_a_row = 0;
		}
		if (n_shrink_in_a_row > 50) {
			results->exit_condition = SolverResults::GRADIENT_TOLERANCE;
			break;
		}

		if (iter >= this->maximum_iterations) {
			results->exit_condition = SolverResults::NO_CONVERGENCE;
			break;
		}

		if (this->callback_function) {
			CallbackInformation information;
			information.objective_value = simplex[0].value;
			information.x = &simplex[0].x;

			if (!callback_function(information)) {
				results->exit_condition = SolverResults::USER_ABORT;
				break;
			}
		}
		results->stopping_criteria_time += wall_time() - start_time;

		//
		// Restarting
		//
		//if (area / area1 < 1e-10) {
		//	x = simplex[0].x;
		//	initialize_simplex(function, x, &simplex);
		//	area1 = area;
		//	if (this->log_function) {
		//		this->log_function("Restarted.");
		//	}
		//}

		//
		// Log the results of this iteration.
		//
		start_time = wall_time();

		int log_interval = 1;
		if (iter > 30) {
			log_interval = 10;
		}
		if (iter > 200) {
			log_interval = 100;
		}
		if (iter > 2000) {
			log_interval = 1000;
		}
		if (this->log_function && iter % log_interval == 0) {
			char str[1024];
				if (iter == 0) {
					this->log_function("Itr     min(f)     avg(f)     max(f)    area    length   type");
				}
				std::sprintf(str, "%6d %+.3e %+.3e %+.3e %.3e %.3e %s",
					iter, fmin, fval, fmax, area, length, iteration_type);
			this->log_function(str);
		}
		results->log_time += wall_time() - start_time;

		iter++;
	}

	// Return the best point as solution.
	function.copy_global_to_user(simplex[0].x);
	results->total_time += wall_time() - global_start_time;

	if (this->log_function) {
		char str[1024];
		std::sprintf(str, " end   %+.3e                       %.3e %.3e", fval, area, length);
		this->log_function(str);
	}
}
void main(void) {

	// Malloc spaces for four matrix
	double *A = malloc(sizeof(double) * SIZE * SIZE);
	fill_matrix(A, SIZE);
	double *B = malloc(sizeof(double) * SIZE * SIZE);
	fill_matrix(B, SIZE);
	double *C = malloc(sizeof(double) * SIZE * SIZE);
	memset(C, 0, sizeof(double) * SIZE * SIZE);
	double *D = malloc(sizeof(double) * SIZE * SIZE);
	memset(D, 0, sizeof(double) * SIZE * SIZE);

	// struct to timing
	struct timeval begin, end;
	
	// test function
	gettimeofday(&begin, NULL);
	square_dgemm(SIZE, A, B, C);
	gettimeofday(&end, NULL);

	// niave multipily
	naive_multiply(A, B, D, SIZE);

	// validate result, if wrong, print four matrix
	for(int i=0; i<SIZE*SIZE; i++) {
		if(C[i] != D[i]) {
			printf("WRONG.\n");
			for(int x=0; x<SIZE; x++) {
				for(int y=0; y<SIZE; y++) {
					printf("%f ", A[x*SIZE+y]);
				}
				printf("\n");
			}
			printf("-----------\n");
			for(int x=0; x<SIZE; x++) {
				for(int y=0; y<SIZE; y++) {
					printf("%f ", B[x*SIZE+y]);
				}
				printf("\n");
			}
			printf("-----------\n");
			for(int x=0; x<SIZE; x++) {
				for(int y=0; y<SIZE; y++) {
					printf("%f ", C[x*SIZE+y]);
				}
				printf("\n");
			}
			printf("-----------\n");
			for(int x=0; x<SIZE; x++) {
				for(int y=0; y<SIZE; y++) {
					printf("%f ", D[x*SIZE+y]);
				}
				printf("\n");
			}
		return;
		}
	}
	printf("CORRECT.^_^\n");
	printf("Single Round Time use: %ld usec.\n", (end.tv_sec-begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec));

    /* Time a "sufficiently long" sequence of calls to reduce noise */
    double Gflops_s, seconds = -1.0;
    double timeout = 0.1; // "sufficiently long" := at least 1/10 second.
    for (int n_iterations = 1; seconds < timeout; n_iterations *= 2) 
    {
      /* Warm-up */
      square_dgemm (SIZE, A, B, C);

      /* Benchmark n_iterations runs of square_dgemm */
      seconds = -wall_time();
      for (int it = 0; it < n_iterations; ++it)
		square_dgemm (SIZE, A, B, C);
      seconds += wall_time();

      /*  compute Mflop/s rate */
      Gflops_s = 2.e-9 * n_iterations * SIZE * SIZE * SIZE / seconds;
    }
    printf ("Size: %d\tGflop/s: %.3g\n", SIZE, Gflops_s);
}
int main(int argc, char* argv[])
{

	// Print help if necessary
	bool help = read_bool(argc, argv, "--help", false);
	if ((argc < 2) || (help)) {
		usage(argv);
		return 0;
	}

	// Use parameters struct for passing parameters to kernels efficiently
	parameters prm;

	// Parse inputs
	prm.matDims[0] = read_int(argc, argv, "--m", 2);
	prm.matDims[1] = read_int(argc, argv, "--k", 2);
	prm.matDims[2] = read_int(argc, argv, "--n", 2);
	prm.rank = read_int(argc, argv, "--rank", 7);
	prm.method = read_string(argc, argv, "--method", (char *)"als");
	int maxIters = read_int(argc, argv, "--maxiters", 1000);
	int maxSecs = read_int(argc, argv, "--maxsecs", 1000);
	double tol = read_double(argc, argv, "--tol", 1e-8);
	int printItn = read_int(argc, argv, "--printitn", 0);
	double printTol = read_double(argc, argv, "--printtol", 1.0);
	int seed = read_int(argc, argv, "--seed", 0);
	int numSeeds = read_int(argc, argv, "--numseeds", 1);
	bool verbose = read_bool(argc, argv, "--verbose", false);
	prm.rnd_maxVal = read_double(argc,argv,"--maxval",1.0);
	prm.rnd_pwrOfTwo = read_int(argc,argv,"--pwrof2",0);
	bool roundFinal = read_bool(argc, argv, "--rndfin",false);
	prm.alpha = read_double(argc,argv, "--alpha", 0.1);
	int M = read_int(argc,argv, "--M", 0);
	if (M)
	{
		prm.M[0] = M;
		prm.M[1] = M;
		prm.M[2] = M;
	} else {	    
		prm.M[0] = read_int(argc, argv, "--M0", -1);
		prm.M[1] = read_int(argc, argv, "--M1", -1);
		prm.M[2] = read_int(argc, argv, "--M2", -1);
	}
	char * infile = read_string(argc, argv, "--input", NULL);
	char * outfile = read_string(argc, argv, "--output", NULL);

	if (verbose) {
		setbuf(stdout, NULL);
		printf("\n\n---------------------------------------------------------\n");
		printf("PARAMETERS\n");
		printf("dimensions = %d %d %d\n",prm.matDims[0],prm.matDims[1],prm.matDims[2]);
		printf("rank       = %d\n",prm.rank);
		printf("method     = %s\n",prm.method);
		if (infile)
			printf("input      = %s\n",infile);
		else
		{
			if (numSeeds == 1)
				printf("input      = seed %d\n",seed); 
			else
				printf("inputs     = seeds %d-%d\n",seed,seed+numSeeds-1);
		}
		if (outfile)
			printf("output     = %s\n",outfile);
		else
			printf("output     = none\n"); 
		if (!strcmp(prm.method,"als"))
		{
			printf("tol        = %1.2e\n",tol);
			printf("alpha      = %1.2e\n",prm.alpha);
			printf("maval      = %1.2e\n",prm.rnd_maxVal);
			printf("M's        = (%d,%d,%d)\n",prm.M[0],prm.M[1],prm.M[2]);
			printf("maxiters   = %d\n",maxIters);
			printf("maxsecs    = %d\n",maxSecs);
			printf("printitn   = %d\n",printItn);
			printf("printtol   = %1.2e\n",printTol);
		}
		printf("---------------------------------------------------------\n");
	}

	// Initialize other variables
	int i, j, k, numIters, mkn, tidx[3];
	double err, errOld, errChange = 0.0, start_als, start_search, elapsed, threshold;

	// Compute tensor dimensions
	prm.dims[0] = prm.matDims[0]*prm.matDims[1];
	prm.dims[1] = prm.matDims[1]*prm.matDims[2];
	prm.dims[2] = prm.matDims[0]*prm.matDims[2];

	// Compute tensor's nnz, total number of entries, and Frobenius norm
	mkn = prm.matDims[0]*prm.matDims[1]*prm.matDims[2];
	prm.mkn2 = mkn*mkn;
	prm.xNorm = sqrt(mkn);

	// Compute number of columns in matricized tensors
	for (i = 0; i < 3; i++)
		prm.mtCols[i] = prm.mkn2 / prm.dims[i];

	// Construct three matricizations of matmul tensor
	prm.X = (double**) malloc( 3 * sizeof(double*) );
	for (i = 0; i < 3; i++)
		prm.X[i] = (double*) calloc( prm.mkn2, sizeof(double) );
	for (int mm = 0; mm < prm.matDims[0]; mm++)
		for (int kk = 0; kk < prm.matDims[1]; kk++)
			for (int nn = 0; nn < prm.matDims[2]; nn++)
			{
				tidx[0] = mm + kk*prm.matDims[0];
				tidx[1] = kk + nn*prm.matDims[1];
				tidx[2] = mm + nn*prm.matDims[0];
				prm.X[0][tidx[0]+prm.dims[0]*(tidx[1]+prm.dims[1]*tidx[2])] = 1;
				prm.X[1][tidx[1]+prm.dims[1]*(tidx[0]+prm.dims[0]*tidx[2])] = 1;
				prm.X[2][tidx[2]+prm.dims[2]*(tidx[0]+prm.dims[0]*tidx[1])] = 1;
			}

	// Allocate factor weights and matrices: working, initial, and model
	prm.lambda = (double*) malloc( prm.rank * sizeof(double) );
	prm.U  = (double**) malloc( 3 * sizeof(double*) );
	double** U0 = (double**) malloc( 3 * sizeof(double*) );
	prm.model = (double**) malloc( 3 * sizeof(double*) );
	for (i = 0; i < 3; i++)
	{
		prm.U[i] =  (double*) calloc( prm.mkn2, sizeof(double) );
		U0[i] = (double*) calloc( prm.dims[i]*prm.rank, sizeof(double) );
		prm.model[i] = (double*) calloc( prm.dims[i]*prm.rank, sizeof(double) );
	}

	// Allocate coefficient matrix within ALS (Khatri-Rao product) 
	int maxMatDim = prm.matDims[0];
	if (maxMatDim < prm.matDims[1]) maxMatDim = prm.matDims[1];
	if (maxMatDim < prm.matDims[2]) maxMatDim = prm.matDims[2];
	prm.A = (double*) malloc( maxMatDim*mkn*prm.rank * sizeof(double) );

	// Allocate workspaces
	prm.tau = (double*) malloc( mkn * sizeof(double) );
	prm.lwork = maxMatDim*mkn*prm.rank;
	prm.work = (double*) malloc( prm.lwork * sizeof(double) );
	prm.iwork = (int*) malloc( prm.mkn2 * sizeof(int) );    

	// Allocate matrices for normal equations 
	int maxDim = prm.dims[0];
	if (maxDim < prm.dims[1]) maxDim = prm.dims[1];
	if (maxDim < prm.dims[2]) maxDim = prm.dims[2];
	prm.NE_coeff = (double*) malloc( prm.rank*prm.rank * sizeof(double) );
	prm.NE_rhs = (double*) malloc( maxDim*prm.rank * sizeof(double) );
	prm.residual = (double*) malloc( prm.mkn2 * sizeof(double) );

	//--------------------------------------------------
	// Search Loop
	//--------------------------------------------------
	int mySeed = seed, numGoodSeeds = 0, statusCnt = 0, status = 1;
	start_search = wall_time(); 
	for (int seed_cnt = 0; seed_cnt < numSeeds; ++seed_cnt)
	{
		// Set starting point from random seed (match Matlab Tensor Toolbox)
		RandomMT cRMT(mySeed);
		for (i = 0; i < 3; i++)
			for (j = 0; j < prm.dims[i]; j++)
				for (k = 0; k < prm.rank; k++)
					U0[i][j+k*prm.dims[i]] = cRMT.genMatlabMT();
		for (i = 0; i < prm.rank; i++)
			prm.lambda[i] = 1.0;  

		// Copy starting point
		for (i = 0; i < 3; i++)
			cblas_dcopy(prm.dims[i]*prm.rank,U0[i],1,prm.U[i],1); 

		// read from file if input is given    
		if( infile )
			read_input( infile, prm ); 

		if (verbose)
		{ 
			printf("\nSTARTING POINT...\n");
			for (i = 0; i < 3; i++)
			{
				printf("Factor matrix %d:\n",i);
				print_matrix(prm.U[i],prm.dims[i],prm.rank,prm.dims[i]);
			}
			printf("\n");
		}   

		//--------------------------------------------------
		// Main ALS Loop
		//--------------------------------------------------
		start_als = wall_time();
		err = 1.0; 
		threshold = 1e-4;
		for (numIters = 0; numIters < maxIters && (wall_time()-start_als) < maxSecs; numIters++)
		{
			errOld = err;

			if (!strcmp(prm.method,"als"))
			{
				// Perform an iteration of ALS using NE with Smirnov's penalty term
				err = als( prm );
			}
			else if (!strcmp(prm.method,"sparsify"))
			{   
				// print stats before sparsifying
				printf("Old residual: %1.2e\n",compute_residual(prm,2,true));
				printf("Old nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) );

				// sparsify and return
				printf("\nSparsifying...\n\n");
				sparsify( prm );
				numIters = maxIters;

				// print stats after sparsifying
				printf("New residual: %1.2e\n",compute_residual(prm,2,true));
				printf("New nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) );
			}
			else if (!strcmp(prm.method,"round"))
			{
				// print stats before rounding
				printf("Old residual: %1.2e\n",compute_residual(prm,2,true));
				printf("Old nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) );
				// round and return
				for (i = 0; i < 3; i++)
				{
					capping(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_maxVal);
					rounding(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_pwrOfTwo);
				}
				numIters = maxIters;

				// print stats after rounding
				printf("New residual: %1.2e\n",compute_residual(prm,2,true));
				printf("New nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) );
			}
			else
				die("Invalid method\n");   

			// Compute change in relative residual norm
			errChange = fabs(err - errOld);          

			// Print info at current iteration
			if ((printItn > 0) && (((numIters + 1) % printItn) == 0))
			{                
				// print info                    
				printf ("Iter %d: residual = %1.5e change = %1.5e\n", numIters + 1, err, errChange);
			} 

			// Check for convergence 
			if ( numIters > 0 && errChange < tol )
				break;

		}

		// If rounding, round final solution and re-compute residual
		if(roundFinal)
		{
			// normalize columns in A and B factors, put arbitrary weights into C
			normalize_model( prm, 2 );

			// cap large values and round to nearest power of 2
			for (i = 0; i < 3; i++)
			{
				capping(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_maxVal);
				rounding(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_pwrOfTwo);
			}

			err = compute_residual(prm,0,true);
		}    

		// Print status if searching over many seeds
		statusCnt++;
		if (numSeeds > 1000 && statusCnt == numSeeds/10)
		{
			printf("...%d%% complete...\n",10*status);
			status++;
			statusCnt = 0;
		}

		// Print final info
		elapsed = wall_time() - start_als;
		if ((printItn > 0 || verbose) && !strcmp(prm.method,"als"))
		{
			if (infile)
				printf("\nInput %s ",infile);
			else
				printf("\nInitial seed %d ",mySeed);
			printf("achieved residual %1.3e in %d iterations and %1.3e seconds\n \t final residual change: %1.3e\n \t average time per iteration: %1.3e s\n", err, numIters, elapsed, errChange, elapsed/numIters);
		}

		if (verbose)
		{
			printf("\nSOLUTION...\n");
			for (i = 0; i < 3; i++)
			{
				printf("Factor matrix %d:\n",i);
				if (roundFinal || !strcmp(prm.method,"round"))
					print_int_matrix(prm.U[i], prm.dims[i], prm.rank, prm.dims[i], prm.rnd_pwrOfTwo);
				else
					print_matrix(prm.U[i],prm.dims[i],prm.rank,prm.dims[i]);
			}
			
			if (err < printTol)
				numGoodSeeds++;
		}
		else if (err < printTol)
		{
			numGoodSeeds++;

			printf("\n\n***************************************\n");
			if (infile)
				printf("Input %s: ",infile);
			else
				printf("Initial seed %d: ",mySeed);
			printf("after %d iterations, achieved residual %1.3e with final residual change of %1.3e\n", numIters, err, errChange);
			if (roundFinal)
			{

				for (i = 0; i < 3; i++)
				{
					printf("Factor matrix %d:\n",i);
					print_int_matrix(prm.U[i], prm.dims[i], prm.rank, prm.dims[i], prm.rnd_pwrOfTwo);
				}

				int count = 0;
				for (i = 0; i < 3; i++)
					count += nnz(prm.U[i],prm.dims[i]*prm.rank);
				printf("\ttotal nnz in solution: %d\n",count);
				printf("\tnaive adds/subs:       %d\n",count - prm.dims[2] - 2*prm.rank);
			}
			printf("***************************************\n\n\n");
		}

		// write to output
		if( outfile )
			write_output( outfile, prm ); 

		mySeed++;
	}      

	// Final report of processor statistics
	elapsed = wall_time()-start_search;

	// Print stats
	if (!strcmp(prm.method,"als"))
	{
		printf("\n\n------------------------------------------------------------\n");
		printf("Time elapsed:                \t%1.1e\tseconds\n",elapsed);
		printf("Total number of seeds tried: \t%d\n",numSeeds);
		printf("Total number of good seeds:  \t%d",numGoodSeeds);
		printf("\t(residual < %2.1e)\n",printTol);   
		printf("------------------------------------------------------------\n");
	}


	// free allocated memory
	for (i = 0; i < 3; i++)
	{
		free( prm.X[i] );
		free( prm.U[i] );
		free( U0[i] );
		free( prm.model[i] );
	} 
	free( prm.X );
	free( prm.U );
	free( U0 );
	free( prm.model );
	free( prm.lambda );
	free( prm.A );
	free( prm.NE_coeff );
	free( prm.NE_rhs );
	free( prm.residual );
	free( prm.tau );
	free( prm.work );
	free( prm.iwork );

	return 0;

}
Example #24
0
File: chord.c Project: surki/hipl
void
chord_main(char *conf_file, int parent_sock)
{
    fd_set interesting, readable;
    int nfound, nfds;
    struct in_addr ia;
    char id[4*ID_LEN];
    FILE *fp;
    int64_t stabilize_wait;
    struct timeval timeout;
    
    setprogname("chord");
    srandom(getpid() ^ time(0));
    memset(&srv, 0, sizeof(Server));
    srv.to_fix_finger = NFINGERS-1;

    fp = fopen(conf_file, "r");
    if (fp == NULL)
	eprintf("fopen(%s,\"r\") failed:", conf_file);
    if (fscanf(fp, "%hd", (short*)&srv.node.port) != 1)
        eprintf("Didn't find port in \"%s\"", conf_file);
    if (fscanf(fp, " %s\n", id) != 1)
        eprintf("Didn't find id in \"%s\"", conf_file);
    srv.node.id = atoid(id);

    /* Figure out one's own address somehow */
    srv.node.addr = ntohl(get_addr());

    ia.s_addr = htonl(srv.node.addr);
    fprintf(stderr, "Chord started.\n");
    fprintf(stderr, "id="); print_id(stderr, &srv.node.id); 
    fprintf(stderr, "\n");
    fprintf(stderr, "ip=%s\n", inet_ntoa(ia));
    fprintf(stderr, "port=%d\n", srv.node.port);

    initialize(&srv);
    srv_ref = &srv;
    join(&srv, fp);
    fclose(fp);

    FD_ZERO(&interesting);
    FD_SET(srv.in_sock, &interesting);
    FD_SET(parent_sock, &interesting);
    nfds = MAX(srv.in_sock, parent_sock) + 1;

    NumKeys = read_keys(ACCLIST_FILE, KeyArray, MAX_KEY_NUM);
    if (NumKeys == -1) {
      printf("Error opening file: %s\n", ACCLIST_FILE);
    }
    if (NumKeys == 0) {
      printf("No key found in %s\n", ACCLIST_FILE);
    }

    /* Loop on input */
    for (;;) {
	readable = interesting;
	stabilize_wait = (int64_t)(srv.next_stabilize_us - wall_time());
	stabilize_wait = MAX(stabilize_wait,0);
	timeout.tv_sec = stabilize_wait / 1000000UL;
	timeout.tv_usec = stabilize_wait % 1000000UL;
	nfound = select(nfds, &readable, NULL, NULL, &timeout);
	if (nfound < 0 && errno == EINTR) {
            continue;
	}
	if (nfound == 0) {
	    stabilize_wait = (int64_t)(srv.next_stabilize_us - wall_time());
	    if( stabilize_wait <= 0 ) {
	        stabilize( &srv );
	    }
	    continue;
	}
	if (FD_ISSET(srv.in_sock, &readable)) {
	    handle_packet(srv.in_sock);
	}
	else if (FD_ISSET(parent_sock, &readable)) {
	    handle_packet(parent_sock);
	}
	else {
	    assert(0);
	}
    }
}
Example #25
0
int main(int argc, char *argv[])
{
  const int length[] = {1, 10, 100, 1000, 10000, 100000, 1000000};
  const int arrlength = sizeof(length)/sizeof(length[0]);
  const int r_max = 1000; // wiederholungen
  char *msg;
  int rank, r, i, name_length, size;
  MPI_Status status;
  double time, ttime;
  char name[MPI_MAX_PROCESSOR_NAME];

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  int destination[size]; // maps: rank => destination of messages
  MPI_Get_processor_name(name, &name_length);

  if(size == 2) {
    destination[0] = 1;
    destination[1] = 0;
  } else if(size == 4) {
    destination[0] = 3;
    destination[2] = 1;
    destination[3] = 0;
    destination[1] = 2;
  } else {
    MPI_Abort(MPI_COMM_WORLD,0);
  }

  if(rank == 0) printf("size is %d\n", size);
  printf("%s: got rank %d\n",name, rank);
  sleep(1);
  
  for (i = 0; i < arrlength; i++){
    msg = (char *)malloc(length[i]);

    if(rank%2 == 0) {
      time = wall_time();
      time = wall_time();
    }

    for (r = 0; r < r_max; r++){
      // printf("rank %d, %d, %d\n", rank, i, r);
      if(rank%2 == 0) {
        MPI_Send(msg, length[i], MPI_CHAR, destination[rank], 0, MPI_COMM_WORLD);
        MPI_Recv(msg, length[i], MPI_CHAR, destination[rank], 0, MPI_COMM_WORLD, &status);
      } else {
        MPI_Recv(msg, length[i], MPI_CHAR, destination[rank], 0, MPI_COMM_WORLD, &status);
        MPI_Send(msg, length[i], MPI_CHAR, destination[rank], 0, MPI_COMM_WORLD);
      }
    }

    if(rank%2 == 0) {
      time = wall_time() - time;
      printf("%s: Zeit um %7d Bytes 2mal zu übertragen: %g s\n",name, length[i], time / r_max);
    }

    free(msg);
  }

  MPI_Finalize();
}
Example #26
0
/* To determine the coordinates of the local node initially
 * Ping a subset of nodes and determine coordinates */
void init_coordinates(I3ServerList *list)
{
    int n = MIN(NUM_LANDMARKS_COORDINATE, list->num_newservers + list->num_ping_list);
    I3ServerListNode *node = list->list, *temp_node;
    uint64_t start_time = wall_time();
    Coordinates_RTT coord_rtt[NUM_LANDMARKS_COORDINATE];
    int num_landmarks = 0; int started_full_list = 0;
    struct in_addr ia;
    nw_skt_t tmp_ping_sock;

#ifdef ICMP_PING
    if (init_icmp_socket(&tmp_ping_sock) == -1)
	abort();
#else
    if (init_udp_socket(&tmp_ping_sock) == -1)
	abort();
#endif

    // wait for responses and accumulate
    // cut and pasted from below
    while ((wall_time() - start_time < COORD_INIT_PING_WAIT_TIME) && 
	    (num_landmarks < n)) {
	fd_set rset;
	struct timeval to;
	int ret;

	FD_ZERO(&rset);

	if (!node && !started_full_list) {
	    node = list-> full_list;
	    started_full_list = 1;
	}
	
	if (node) {
	    ia.s_addr = htonl(node->addr);
	    I3_PRINT_DEBUG1(I3_DEBUG_LEVEL_VERBOSE,
		    "Sending ICMP echo request to %s\n", inet_ntoa(ia));
#ifdef ICMP_PING
	    send_echo_request(tmp_ping_sock, node->addr, 0);
#else
	    i3_echo_request(tmp_ping_sock, node->addr, node->port, 0);
#endif
	    node = node->next_list;
	}

	FD_SET(tmp_ping_sock, &rset);
        to.tv_sec = 0; to.tv_usec = 200000ULL;
        if ((ret = select(tmp_ping_sock+1, &rset, NULL, NULL, &to)) < 0) {
	    int err = nw_error();
            if (err == EINTR)
                continue;
            else {
                perror("select");
                abort();
            }
        }

	// message received on icmp socket
	if (FD_ISSET(tmp_ping_sock, &rset)) {
	    uint32_t addr; uint16_t port, seq; uint64_t rtt;
#ifdef ICMP_PING
	    if (recv_echo_reply(tmp_ping_sock, &addr, &seq, &rtt)) {
#else
	    if (recv_i3_echo_reply(tmp_ping_sock, &addr, &port, &seq, &rtt)) {
#endif
		temp_node = lookup_i3server(list, addr);
		assert(NULL != temp_node);

		coord_rtt[num_landmarks].coord = temp_node->coord;
		coord_rtt[num_landmarks].rtt = rtt;
		num_landmarks++;

		ia.s_addr = htonl(addr);
		I3_PRINT_DEBUG4(I3_DEBUG_LEVEL_VERBOSE,
			"Node: %s Coordinate: %.1f:%.1f RTT: %Ld\n",
			inet_ntoa(ia), temp_node->coord.latitude,
			temp_node->coord.longitude, rtt);
	    }
	}
    }
    nw_close(tmp_ping_sock);

    // compute own coordinate
    compute_coordinates(num_landmarks, coord_rtt);
}

/* Update the coordinates of a node using ping information */
void update_coordinate(I3ServerList *list, I3ServerListNode *next_to_ping)
{
    Coordinates_RTT coord_rtt[NUM_LANDMARKS_COORDINATE];
    int count, num_landmarks = 0;
    I3ServerListNode *node;

    // n1 and n2: number of landmarks from ping_list and rest in
    // proportion to the number of nodes in those lists
    int i, n = MIN(NUM_LANDMARKS_COORDINATE, 
	    list->num_newservers + list->num_ping_list);
    int n1 = ((float)list->num_ping_list/
	    (list->num_newservers + list->num_ping_list)) * n;
    int n2 = n-n1;

    // add from ping list
    count = 0;
    for (i = 0, node = list->list; 
	    i < list->num_ping_list, count < n1;
	    node = node->next_list, ++i) {
	if (node->n > 0) {
	    coord_rtt[count].rtt = get_rtt_node(node);
	    coord_rtt[count].coord = node->coord;
	    count++;
	}
    }
    num_landmarks = count;

    // add from rest
    count = 0;
    for (i = 0, node = list->full_list; 
	    i < list->num_newservers, count < n2; 
	    node = node->next_list, ++i) {
	if (node->n > 0) {
	    coord_rtt[num_landmarks + count].rtt = get_rtt_node(node);
	    coord_rtt[num_landmarks + count].coord = node->coord;
	    count++;
	}
    }
    num_landmarks += count;

    // recompute coordinates
    compute_coordinates(num_landmarks, coord_rtt);

    // repopulate ping list afresh
    change_ping_list(list, &next_to_ping, 1);
}
Example #27
0
void main(int argc, char** argv) {
    args::Parser parser(argv[0], "Plays a replay into a set of images and a log of sounds");

    String replay_path(utf8::decode(argv[0]));
    parser.add_argument("replay", store(replay_path)).help("an Antares replay script").required();

    Optional<String> output_dir;
    parser.add_argument("-o", "--output", store(output_dir))
            .help("place output in this directory");

    int  interval = 60;
    int  width    = 640;
    int  height   = 480;
    bool text     = false;
    bool smoke    = false;
    parser.add_argument("-i", "--interval", store(interval))
            .help("take one screenshot per this many ticks (default: 60)");
    parser.add_argument("-w", "--width", store(width)).help("screen width (default: 640)");
    parser.add_argument("-h", "--height", store(height)).help("screen height (default: 480)");
    parser.add_argument("-t", "--text", store_const(text, true)).help("produce text output");
    parser.add_argument("-s", "--smoke", store_const(smoke, true)).help("run as smoke text");

    parser.add_argument("--help", help(parser, 0)).help("display this help screen");

    String error;
    if (!parser.parse_args(argc - 1, argv + 1, error)) {
        print(io::err, format("{0}: {1}\n", parser.name(), error));
        exit(1);
    }

    if (output_dir.has()) {
        makedirs(*output_dir, 0755);
    }

    Preferences preferences;
    preferences.play_music_in_game = true;
    NullPrefsDriver prefs(preferences);

    EventScheduler scheduler;
    scheduler.schedule_event(unique_ptr<Event>(new MouseMoveEvent(wall_time(), Point(320, 240))));
    // TODO(sfiera): add recurring snapshots to OffscreenVideoDriver.
    for (int64_t i = 1; i < 72000; i += interval) {
        scheduler.schedule_snapshot(i);
    }

    unique_ptr<SoundDriver> sound;
    if (!smoke && output_dir.has()) {
        String out(format("{0}/sound.log", *output_dir));
        sound.reset(new LogSoundDriver(out));
    } else {
        sound.reset(new NullSoundDriver);
    }
    NullLedger ledger;

    MappedFile replay_file(replay_path);
    if (smoke) {
        TextVideoDriver video({width, height}, Optional<String>());
        video.loop(new ReplayMaster(replay_file.data(), output_dir), scheduler);
    } else if (text) {
        TextVideoDriver video({width, height}, output_dir);
        video.loop(new ReplayMaster(replay_file.data(), output_dir), scheduler);
    } else {
        OffscreenVideoDriver video({width, height}, output_dir);
        video.loop(new ReplayMaster(replay_file.data(), output_dir), scheduler);
    }
}
/* The benchmarking program */
int main (int argc, char **argv)
{
  printf ("Description:\t%s\n\n", dgemm_desc);

  /* Test sizes should highlight performance dips at multiples of certain powers-of-two */

  int test_sizes[] = 

  /* Multiples-of-32, +/- 1. Currently commented. */
   {31,32,33,63,64,65,95,96,97,127,128,129,159,160,161,191,192,193,223,224,225,255,256,257,287,288,289,319,320,321,351,352,353,383,384,385,415,416,417,447,448,449,479,480,481,511,512,513,543,544,545,575,576,577,607,608,609,639,640,641,671,672,673,703,704,705,735,736,737,767,768,769,799,800,801,831,832,833,863,864,865,895,896,897,927,928,929,959,960,961,991,992,993,1023,1024,1025}; 

  /* A representative subset of the first list. Currently uncommented. */ 
  //{ 31, 32, 96, 97, 127, 128, 129, 191, 192, 229, 255, 256, 257,
  //  319, 320, 321, 417, 479, 480, 511, 512, 639, 640, 767, 768, 769 };

  int nsizes = sizeof(test_sizes)/sizeof(test_sizes[0]);

  /* assume last size is also the largest size */
  int nmax = test_sizes[nsizes-1];

  /* allocate memory for all problems */
  double* buf = NULL;
  buf = (double*) malloc (3 * nmax * nmax * sizeof(double));
  if (buf == NULL) die ("failed to allocate largest problem size");

  double Mflops_s[nsizes],per[nsizes],aveper;

  /* For each test size */
  for (int isize = 0; isize < sizeof(test_sizes)/sizeof(test_sizes[0]); ++isize) {
    for( int block_size = 3;block_size<200;block_size++) {
    /* Create and fill 3 random matrices A,B,C*/
        int n = test_sizes[isize];

        double* A = buf + 0;
        double* B = A + nmax*nmax;
        double* C = B + nmax*nmax;

        fill (A, n*n);
        fill (B, n*n);
        fill (C, n*n);

        /* Measure performance (in Gflops/s). */

        /* Time a "sufficiently long" sequence of calls to reduce noise */
        double Gflops_s, seconds = -1.0;
        double timeout = 0.1; // "sufficiently long" := at least 1/10 second.
        for (int n_iterations = 1; seconds < timeout; n_iterations *= 2) {
        /* Warm-up */
        square_dgemm (block_size,n, A, B, C);

        /* Benchmark n_iterations runs of square_dgemm */
        seconds = -wall_time();
        for (int it = 0; it < n_iterations; ++it)
	       square_dgemm (block_size,n, A, B, C);
        seconds += wall_time();

        /*  compute Gflop/s rate */
        Gflops_s = 2.e-9 * n_iterations * n * n * n / seconds;
    }
  
    /* Storing Mflop rate and calculating percentage of peak */
    Mflops_s[isize] = Gflops_s*1000;
    per[isize] = Gflops_s*100/MAX_SPEED;

    printf ("Size: %d\t Block Size: %d\t Mflop/s: %8g\tPercentage:%6.2lf\n", n, block_size,Mflops_s[isize],per[isize]);

    /* Ensure that error does not exceed the theoretical error bound. */

    /* C := A * B, computed with square_dgemm */
    memset (C, 0, n * n * sizeof(double));
    square_dgemm (block_size,n, A, B, C);

    /* Do not explicitly check that A and B were unmodified on square_dgemm exit
     *  - if they were, the following will most likely detect it:   
     * C := C - A * B, computed with reference_dgemm */
    reference_dgemm(n, -1., A, B, C);

    /* A := |A|, B := |B|, C := |C| */
    absolute_value (A, n * n);
    absolute_value (B, n * n);
    absolute_value (C, n * n);

    /* C := |C| - 3 * e_mach * n * |A| * |B|, computed with reference_dgemm */ 
    reference_dgemm (n, -3.*DBL_EPSILON*n, A, B, C);

    /* If any element in C is positive, then something went wrong in square_dgemm */
    for (int i = 0; i < n * n; ++i)
      if (C[i] > 0)
	die("*** FAILURE *** Error in matrix multiply exceeds componentwise error bounds.\n" );

  }

  }

  free (buf);

  return 0;
}
Example #29
0
/*---------------------------------------------------------------------------------------------
 * (function: simulateNextWave)
 *-------------------------------------------------------------------------------------------*/
int OdinInterface::simulateNextWave()
{
    if(!num_vectors){
        fprintf(stderr, "No vectors to simulate.\n");
    } else {

        double total_time = 0;
        double simulation_time = 0;

        num_cycles = num_vectors*2;
        num_waves = 1;
        tvector = 0;


        double wave_start_time = wall_time();
        //create a new wave
        wave++;
        int cycle_offset = SIM_WAVE_LENGTH * wave;
        int wave_length  = SIM_WAVE_LENGTH;

        // Assign vectors to lines, either by reading or generating them.
        // Every second cycle gets a new vector.

        for (cycle = cycle_offset; cycle < cycle_offset + wave_length; cycle++)
        {
            if (is_even_cycle(cycle))
            {
                    if (input_vector_file)
                    {
                            char buffer[BUFFER_MAX_SIZE];

                            if (!get_next_vector(in, buffer))
                                    error_message(SIMULATION_ERROR, 0, -1, (char*)"Could not read next vector.");

                            tvector = parse_test_vector(buffer);
                    }
                    else
                    {
                            tvector = generate_random_test_vector(input_lines, cycle, hold_high_index, hold_low_index);
                    }
            }

            add_test_vector_to_lines(tvector, input_lines, cycle);

            if (!is_even_cycle(cycle))
                    free_test_vector(tvector);
        }

        // Record the input vectors we are using.
        write_wave_to_file(input_lines, in_out, cycle_offset, wave_length, 1);
        // Write ModelSim script.
        write_wave_to_modelsim_file(netlist, input_lines, modelsim_out, cycle_offset, wave_length);

        double simulation_start_time = wall_time();

        // Perform simulation
        for (cycle = cycle_offset; cycle < cycle_offset + wave_length; cycle++)
        {
            if (cycle)
            {
                    simulate_cycle(cycle, stgs);
            }
            else
            {
                    // The first cycle produces the stages, and adds additional
                    // lines as specified by the -p option.
                    pin_names *p = parse_pin_name_list(global_args.sim_additional_pins);
                    stgs = simulate_first_cycle(netlist, cycle, p, output_lines);
                    free_pin_name_list(p);
                    // Make sure the output lines are still OK after adding custom lines.
                    if (!verify_lines(output_lines))
                            error_message(SIMULATION_ERROR, 0, -1,
                                            (char*)"Problem detected with the output lines after the first cycle.");
            }
        }

        simulation_time += wall_time() - simulation_start_time;

        // Write the result of this wave to the output vector file.
        write_wave_to_file(output_lines, out, cycle_offset, wave_length, output_edge);

        total_time += wall_time() - wave_start_time;

        // Print netlist-specific statistics.
        if (!cycle_offset)
        {
                print_netlist_stats(stgs, num_vectors);
                fflush(stdout);
        }

        // Print statistics.
        print_simulation_stats(stgs, num_vectors, total_time, simulation_time);
        myCycle = cycle;
    }


    return myCycle;
}
Example #30
0
ulong get_current_time()
{
	return (ulong)wall_time();
}