Exemple #1
0
static void
hup_handler(int sig)
{
	route *newroutes;
	cluster *newclusters;
	int id;
	FILE *newfd;

	logout("caught SIGHUP...\n");
	if (relay_stderr != stderr) {
		/* try to re-open the file first, so we can still try and say
		 * something if that fails */
		if ((newfd = fopen(relay_logfile, "a")) == NULL) {
			logerr("not reopening logfiles: can't open '%s': %s\n",
					relay_logfile, strerror(errno));
		} else {
			logout("closing logfile\n");
			relay_can_log = 0;
			fclose(relay_stderr);
			relay_stdout = newfd;
			relay_stderr = newfd;
			relay_can_log = 1;
			logout("reopening logfile\n");
		}
	}
	logout("reloading config from '%s'...\n", config);

	if (router_readconfig(&newclusters, &newroutes,
				config, queuesize, batchsize) == 0)
	{
		logerr("failed to read configuration '%s', aborting reload\n",
				config);
		return;
	}
	router_optimise(&newroutes);

	logout("reloading worker");
	for (id = 1; id < 1 + workercnt; id++)
		dispatch_schedulereload(workers[id + 0], newroutes);
	for (id = 1; id < 1 + workercnt; id++) {
		while (!dispatch_reloadcomplete(workers[id + 0]))
			usleep((100 + (rand() % 200)) * 1000);  /* 100ms - 300ms */
		fprintf(relay_stdout, " %d", id + 1);
		fflush(relay_stdout);
	}
	fprintf(relay_stdout, "\n");

	logout("reloading collector\n");
	collector_schedulereload(newclusters);
	while (!collector_reloadcomplete())
		usleep((100 + (rand() % 200)) * 1000);  /* 100ms - 300ms */

	router_free(clusters, routes);

	routes = newroutes;
	clusters = newclusters;

	logout("SIGHUP handler complete\n");
}
Exemple #2
0
/* =============================================================================
 * main
 * =============================================================================
 */
MAIN(argc, argv)
{
    GOTO_REAL();

    /*
     * Initialization
     */
    parseArgs(argc, (char** const)argv);
    long numThread = global_params[PARAM_THREAD];
    SIM_GET_NUM_CPU(numThread);
    TM_STARTUP(numThread);
    P_MEMORY_STARTUP(numThread);
    thread_startup(numThread);
    maze_t* mazePtr = maze_alloc();
    assert(mazePtr);
    long numPathToRoute = maze_read(mazePtr, global_inputFile);
    router_t* routerPtr = router_alloc(global_params[PARAM_XCOST],
                                       global_params[PARAM_YCOST],
                                       global_params[PARAM_ZCOST],
                                       global_params[PARAM_BENDCOST]);
    assert(routerPtr);
    list_t* pathVectorListPtr = list_alloc(NULL);
    assert(pathVectorListPtr);

    /*
     * Run transactions
     */
    router_solve_arg_t routerArg = {routerPtr, mazePtr, pathVectorListPtr};
    TIMER_T startTime;
    TIMER_READ(startTime);
    GOTO_SIM();
#ifdef OTM
#pragma omp parallel
    {
        router_solve((void *)&routerArg);
    }
#else
    thread_start(router_solve, (void*)&routerArg);
#endif
    GOTO_REAL();
    TIMER_T stopTime;
    TIMER_READ(stopTime);

    long numPathRouted = 0;
    list_iter_t it;
    list_iter_reset(&it, pathVectorListPtr);
    while (list_iter_hasNext(&it, pathVectorListPtr)) {
        vector_t* pathVectorPtr = (vector_t*)list_iter_next(&it, pathVectorListPtr);
        numPathRouted += vector_getSize(pathVectorPtr);
    }
    printf("Paths routed    = %li\n", numPathRouted);
    printf("Elapsed time    = %f seconds\n", TIMER_DIFF_SECONDS(startTime, stopTime));

    /*
     * Check solution and clean up
     */
    assert(numPathRouted <= numPathToRoute);
    bool_t status = maze_checkPaths(mazePtr, pathVectorListPtr, global_doPrint);
    assert(status == TRUE);
    puts("Verification passed.");
    maze_free(mazePtr);
    router_free(routerPtr);

    TM_SHUTDOWN();
    P_MEMORY_SHUTDOWN();

    GOTO_SIM();

    thread_shutdown();


    MAIN_RETURN(0);
}
Exemple #3
0
/* =============================================================================
 * main
 * =============================================================================
 */
MAIN(argc, argv)
{
    /*
     * Initialization
     */
    parseArgs(argc, (char** const)argv);
    long numThread = global_params[PARAM_THREAD];
    SIM_GET_NUM_CPU(numThread);
    TM_STARTUP(numThread);
    P_MEMORY_STARTUP(numThread);
    thread_startup(numThread);
    maze_t* mazePtr = maze_alloc();
    assert(mazePtr);
    long numPathToRoute = maze_read(mazePtr, global_inputFile);
    router_t* routerPtr = router_alloc(global_params[PARAM_XCOST],
                                       global_params[PARAM_YCOST],
                                       global_params[PARAM_ZCOST],
                                       global_params[PARAM_BENDCOST]);
    assert(routerPtr);
    list_t* pathVectorListPtr = list_alloc(NULL);
    assert(pathVectorListPtr);

    /*
     * Run transactions
     */
    router_solve_arg_t routerArg = {routerPtr, mazePtr, pathVectorListPtr};
    // NB: Since ASF/PTLSim "REAL" is native execution, and since we are using
    //     wallclock time, we want to be sure we read time inside the
    //     simulator, or else we report native cycles spent on the benchmark
    //     instead of simulator cycles.
    GOTO_SIM();
    TIMER_T startTime;
    TIMER_READ(startTime);
#ifdef OTM
#pragma omp parallel
    {
        router_solve((void *)&routerArg);
    }
#else
    thread_start(router_solve, (void*)&routerArg);
#endif
    TIMER_T stopTime;
    TIMER_READ(stopTime);
    // NB: As above, timer reads must be done inside of the simulated region
    //     for PTLSim/ASF
    GOTO_REAL();

    long numPathRouted = 0;
    list_iter_t it;
    list_iter_reset(&it, pathVectorListPtr);
    while (list_iter_hasNext(&it, pathVectorListPtr)) {
        vector_t* pathVectorPtr = (vector_t*)list_iter_next(&it, pathVectorListPtr);
        numPathRouted += vector_getSize(pathVectorPtr);
    }
    printf("Paths routed    = %li\n", numPathRouted);
    printf("Elapsed time    = %f seconds\n", TIMER_DIFF_SECONDS(startTime, stopTime));

    /*
     * Check solution and clean up
     */
    assert(numPathRouted <= numPathToRoute);
    bool status = maze_checkPaths(mazePtr, pathVectorListPtr, global_doPrint);
    assert(status);
    puts("Verification passed.");
    maze_free(mazePtr);
    router_free(routerPtr);

    TM_SHUTDOWN();
    P_MEMORY_SHUTDOWN();

    thread_shutdown();


    MAIN_RETURN(0);
}
Exemple #4
0
int
main(int argc, char * const argv[])
{
	int stream_sock[] = {0, 0, 0};  /* tcp4, tcp6, UNIX */
	int stream_socklen = sizeof(stream_sock) / sizeof(stream_sock[0]);
	int dgram_sock[] = {0, 0};  /* udp4, udp6 */
	int dgram_socklen = sizeof(dgram_sock) / sizeof(dgram_sock[0]);
	char id;
	unsigned short listenport = 2003;
	int ch;
	size_t numaggregators;
	size_t numcomputes;
	server *internal_submission;
	char *listeninterface = NULL;
	server **servers;
	char *allowed_chars = NULL;
	int i;
	enum { SUB, CUM } smode = CUM;

	if (gethostname(relay_hostname, sizeof(relay_hostname)) < 0)
		snprintf(relay_hostname, sizeof(relay_hostname), "127.0.0.1");

	while ((ch = getopt(argc, argv, ":hvdmstf:i:l:p:w:b:q:S:c:H:")) != -1) {
		switch (ch) {
			case 'v':
				do_version();
				break;
			case 'd':
				if (mode == TEST) {
					mode = DEBUGTEST;
				} else {
					mode = DEBUG;
				}
				break;
			case 'm':
				smode = SUB;
				break;
			case 's':
				mode = SUBMISSION;
				break;
			case 't':
				if (mode == DEBUG) {
					mode = DEBUGTEST;
				} else {
					mode = TEST;
				}
				break;
			case 'f':
				config = optarg;
				break;
			case 'i':
				listeninterface = optarg;
				break;
			case 'l':
				relay_logfile = optarg;
				break;
			case 'p':
				listenport = (unsigned short)atoi(optarg);
				if (listenport == 0) {
					fprintf(stderr, "error: port needs to be a number >0\n");
					do_usage(1);
				}
				break;
			case 'w':
				workercnt = (char)atoi(optarg);
				if (workercnt <= 0) {
					fprintf(stderr, "error: workers needs to be a number >0\n");
					do_usage(1);
				}
				break;
			case 'b':
				batchsize = atoi(optarg);
				if (batchsize <= 0) {
					fprintf(stderr, "error: batch size needs to be a number >0\n");
					do_usage(1);
				}
				break;
			case 'q':
				queuesize = atoi(optarg);
				if (queuesize <= 0) {
					fprintf(stderr, "error: queue size needs to be a number >0\n");
					do_usage(1);
				}
				break;
			case 'S':
				collector_interval = atoi(optarg);
				if (collector_interval <= 0) {
					fprintf(stderr, "error: sending interval needs to be "
							"a number >0\n");
					do_usage(1);
				}
				break;
			case 'c':
				allowed_chars = optarg;
				break;
			case 'H':
				snprintf(relay_hostname, sizeof(relay_hostname), "%s", optarg);
				break;
			case '?':
			case ':':
				do_usage(1);
				break;
			case 'h':
			default:
				do_usage(0);
				break;
		}
	}
	if (optind == 1 || config == NULL)
		do_usage(1);


	/* seed randomiser for dispatcher and aggregator "splay" */
	srand(time(NULL));

	if (workercnt == 0)
		workercnt = mode == SUBMISSION ? 2 : get_cores();

	/* any_of failover maths need batchsize to be smaller than queuesize */
	if (batchsize > queuesize) {
		fprintf(stderr, "error: batchsize must be smaller than queuesize\n");
		exit(-1);
	}

	if (relay_logfile != NULL && mode != TEST && mode != DEBUGTEST) {
		FILE *f = fopen(relay_logfile, "a");
		if (f == NULL) {
			fprintf(stderr, "error: failed to open logfile '%s': %s\n",
					relay_logfile, strerror(errno));
			exit(-1);
		}
		relay_stdout = f;
		relay_stderr = f;
	} else {
		relay_stdout = stdout;
		relay_stderr = stderr;
	}
	relay_can_log = 1;

	logout("starting carbon-c-relay v%s (%s), pid=%d\n",
			VERSION, GIT_VERSION, getpid());
	fprintf(relay_stdout, "configuration:\n");
	fprintf(relay_stdout, "    relay hostname = %s\n", relay_hostname);
	fprintf(relay_stdout, "    listen port = %u\n", listenport);
	if (listeninterface != NULL)
		fprintf(relay_stdout, "    listen interface = %s\n", listeninterface);
	fprintf(relay_stdout, "    workers = %d\n", workercnt);
	fprintf(relay_stdout, "    send batch size = %d\n", batchsize);
	fprintf(relay_stdout, "    server queue size = %d\n", queuesize);
	fprintf(relay_stdout, "    statistics submission interval = %ds\n",
			collector_interval);
	if (allowed_chars != NULL)
		fprintf(relay_stdout, "    extra allowed characters = %s\n",
				allowed_chars);
	if (mode == DEBUG || mode == DEBUGTEST)
		fprintf(relay_stdout, "    debug = true\n");
	else if (mode == SUBMISSION)
		fprintf(relay_stdout, "    submission = true\n");
	fprintf(relay_stdout, "    routes configuration = %s\n", config);
	fprintf(relay_stdout, "\n");

	if (router_readconfig(&clusters, &routes,
				config, queuesize, batchsize) == 0)
	{
		logerr("failed to read configuration '%s'\n", config);
		return 1;
	}
	router_optimise(&routes);

	numaggregators = aggregator_numaggregators();
	numcomputes = aggregator_numcomputes();
#define dbg (mode == DEBUG || mode == DEBUGTEST ? 2 : 0)
	if (numaggregators > 10 && !dbg) {
		fprintf(relay_stdout, "parsed configuration follows:\n"
				"(%zd aggregations with %zd computations omitted "
				"for brevity)\n", numaggregators, numcomputes);
		router_printconfig(relay_stdout, 0, clusters, routes);
	} else {
		fprintf(relay_stdout, "parsed configuration follows:\n");
		router_printconfig(relay_stdout, 1 + dbg, clusters, routes);
	}
	fprintf(relay_stdout, "\n");

	/* shortcut for rule testing mode */
	if (mode == TEST || mode == DEBUGTEST) {
		char metricbuf[METRIC_BUFSIZ];
		char *p;

		fflush(relay_stdout);
		while (fgets(metricbuf, sizeof(metricbuf), stdin) != NULL) {
			if ((p = strchr(metricbuf, '\n')) != NULL)
				*p = '\0';
			router_test(metricbuf, routes);
		}

		exit(0);
	}

	if (signal(SIGINT, exit_handler) == SIG_ERR) {
		logerr("failed to create SIGINT handler: %s\n", strerror(errno));
		return 1;
	}
	if (signal(SIGTERM, exit_handler) == SIG_ERR) {
		logerr("failed to create SIGTERM handler: %s\n", strerror(errno));
		return 1;
	}
	if (signal(SIGQUIT, exit_handler) == SIG_ERR) {
		logerr("failed to create SIGQUIT handler: %s\n", strerror(errno));
		return 1;
	}
	if (signal(SIGHUP, hup_handler) == SIG_ERR) {
		logerr("failed to create SIGHUP handler: %s\n", strerror(errno));
		return 1;
	}
	if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
		logerr("failed to ignore SIGPIPE: %s\n", strerror(errno));
		return 1;
	}

	workers = malloc(sizeof(dispatcher *) * (1 + workercnt + 1));
	if (workers == NULL) {
		logerr("failed to allocate memory for workers\n");
		return 1;
	}

	if (bindlisten(stream_sock, &stream_socklen,
				dgram_sock, &dgram_socklen,
				listeninterface, listenport) < 0) {
		logerr("failed to bind on port %s:%d: %s\n",
				listeninterface == NULL ? "" : listeninterface,
				listenport, strerror(errno));
		return -1;
	}
	for (ch = 0; ch < stream_socklen; ch++) {
		if (dispatch_addlistener(stream_sock[ch]) != 0) {
			logerr("failed to add listener\n");
			return -1;
		}
	}
	for (ch = 0; ch < dgram_socklen; ch++) {
		if (dispatch_addlistener_udp(dgram_sock[ch]) != 0) {
			logerr("failed to listen to datagram socket\n");
			return -1;
		}
	}
	if ((workers[0] = dispatch_new_listener()) == NULL)
		logerr("failed to add listener\n");

	if (allowed_chars == NULL)
		allowed_chars = "-_:#";
	logout("starting %d workers\n", workercnt);
	for (id = 1; id < 1 + workercnt; id++) {
		workers[id + 0] = dispatch_new_connection(routes, allowed_chars);
		if (workers[id + 0] == NULL) {
			logerr("failed to add worker %d\n", id);
			break;
		}
	}
	workers[id + 0] = NULL;
	if (id < 1 + workercnt) {
		logerr("shutting down due to errors\n");
		keep_running = 0;
	}

	/* server used for delivering metrics produced inside the relay,
	 * that is collector (statistics) and aggregator (aggregations) */
	if ((internal_submission = server_new("internal", listenport, CON_PIPE,
					NULL, 3000 + (numcomputes * 3), batchsize)) == NULL)
	{
		logerr("failed to create internal submission queue, shutting down\n");
		keep_running = 0;
	}

	if (numaggregators > 0) {
		logout("starting aggregator\n");
		if (!aggregator_start(internal_submission)) {
			logerr("shutting down due to failure to start aggregator\n");
			keep_running = 0;
		}
	}

	logout("starting statistics collector\n");
	collector_start(&workers[1], clusters, internal_submission, smode == CUM);

	logout("startup sequence complete\n");

	/* workers do the work, just wait */
	while (keep_running)
		sleep(1);

	logout("shutting down...\n");
	/* make sure we don't accept anything new anymore */
	for (ch = 0; ch < stream_socklen; ch++)
		dispatch_removelistener(stream_sock[ch]);
	destroy_usock(listenport);
	logout("listeners for port %u closed\n", listenport);
	/* since workers will be freed, stop querying the structures */
	collector_stop();
	logout("collector stopped\n");
	if (numaggregators > 0) {
		aggregator_stop();
		logout("aggregator stopped\n");
	}
	server_shutdown(internal_submission);
	/* give a little time for whatever the collector/aggregator wrote,
	 * to be delivered by the dispatchers */
	usleep(500 * 1000);  /* 500ms */
	/* make sure we don't write to our servers any more */
	logout("stopped worker");
	for (id = 0; id < 1 + workercnt; id++)
		dispatch_stop(workers[id + 0]);
	for (id = 0; id < 1 + workercnt; id++) {
		dispatch_shutdown(workers[id + 0]);
		fprintf(relay_stdout, " %d", id + 1);
		fflush(relay_stdout);
	}
	fprintf(relay_stdout, "\n");
	router_shutdown();
	servers = router_getservers(clusters);
	logout("stopped server");
	for (i = 0; servers[i] != NULL; i++)
		server_stop(servers[i]);
	for (i = 0; servers[i] != NULL; i++) {
		server_shutdown(servers[i]);
		fprintf(relay_stdout, " %d", i + 1);
		fflush(relay_stdout);
	}
	fprintf(relay_stdout, "\n");
	logout("routing stopped\n");

	router_free(clusters, routes);
	free(workers);
	return 0;
}