static void hup_handler(int sig) { route *newroutes; cluster *newclusters; int id; FILE *newfd; logout("caught SIGHUP...\n"); if (relay_stderr != stderr) { /* try to re-open the file first, so we can still try and say * something if that fails */ if ((newfd = fopen(relay_logfile, "a")) == NULL) { logerr("not reopening logfiles: can't open '%s': %s\n", relay_logfile, strerror(errno)); } else { logout("closing logfile\n"); relay_can_log = 0; fclose(relay_stderr); relay_stdout = newfd; relay_stderr = newfd; relay_can_log = 1; logout("reopening logfile\n"); } } logout("reloading config from '%s'...\n", config); if (router_readconfig(&newclusters, &newroutes, config, queuesize, batchsize) == 0) { logerr("failed to read configuration '%s', aborting reload\n", config); return; } router_optimise(&newroutes); logout("reloading worker"); for (id = 1; id < 1 + workercnt; id++) dispatch_schedulereload(workers[id + 0], newroutes); for (id = 1; id < 1 + workercnt; id++) { while (!dispatch_reloadcomplete(workers[id + 0])) usleep((100 + (rand() % 200)) * 1000); /* 100ms - 300ms */ fprintf(relay_stdout, " %d", id + 1); fflush(relay_stdout); } fprintf(relay_stdout, "\n"); logout("reloading collector\n"); collector_schedulereload(newclusters); while (!collector_reloadcomplete()) usleep((100 + (rand() % 200)) * 1000); /* 100ms - 300ms */ router_free(clusters, routes); routes = newroutes; clusters = newclusters; logout("SIGHUP handler complete\n"); }
/* ============================================================================= * main * ============================================================================= */ MAIN(argc, argv) { GOTO_REAL(); /* * Initialization */ parseArgs(argc, (char** const)argv); long numThread = global_params[PARAM_THREAD]; SIM_GET_NUM_CPU(numThread); TM_STARTUP(numThread); P_MEMORY_STARTUP(numThread); thread_startup(numThread); maze_t* mazePtr = maze_alloc(); assert(mazePtr); long numPathToRoute = maze_read(mazePtr, global_inputFile); router_t* routerPtr = router_alloc(global_params[PARAM_XCOST], global_params[PARAM_YCOST], global_params[PARAM_ZCOST], global_params[PARAM_BENDCOST]); assert(routerPtr); list_t* pathVectorListPtr = list_alloc(NULL); assert(pathVectorListPtr); /* * Run transactions */ router_solve_arg_t routerArg = {routerPtr, mazePtr, pathVectorListPtr}; TIMER_T startTime; TIMER_READ(startTime); GOTO_SIM(); #ifdef OTM #pragma omp parallel { router_solve((void *)&routerArg); } #else thread_start(router_solve, (void*)&routerArg); #endif GOTO_REAL(); TIMER_T stopTime; TIMER_READ(stopTime); long numPathRouted = 0; list_iter_t it; list_iter_reset(&it, pathVectorListPtr); while (list_iter_hasNext(&it, pathVectorListPtr)) { vector_t* pathVectorPtr = (vector_t*)list_iter_next(&it, pathVectorListPtr); numPathRouted += vector_getSize(pathVectorPtr); } printf("Paths routed = %li\n", numPathRouted); printf("Elapsed time = %f seconds\n", TIMER_DIFF_SECONDS(startTime, stopTime)); /* * Check solution and clean up */ assert(numPathRouted <= numPathToRoute); bool_t status = maze_checkPaths(mazePtr, pathVectorListPtr, global_doPrint); assert(status == TRUE); puts("Verification passed."); maze_free(mazePtr); router_free(routerPtr); TM_SHUTDOWN(); P_MEMORY_SHUTDOWN(); GOTO_SIM(); thread_shutdown(); MAIN_RETURN(0); }
/* ============================================================================= * main * ============================================================================= */ MAIN(argc, argv) { /* * Initialization */ parseArgs(argc, (char** const)argv); long numThread = global_params[PARAM_THREAD]; SIM_GET_NUM_CPU(numThread); TM_STARTUP(numThread); P_MEMORY_STARTUP(numThread); thread_startup(numThread); maze_t* mazePtr = maze_alloc(); assert(mazePtr); long numPathToRoute = maze_read(mazePtr, global_inputFile); router_t* routerPtr = router_alloc(global_params[PARAM_XCOST], global_params[PARAM_YCOST], global_params[PARAM_ZCOST], global_params[PARAM_BENDCOST]); assert(routerPtr); list_t* pathVectorListPtr = list_alloc(NULL); assert(pathVectorListPtr); /* * Run transactions */ router_solve_arg_t routerArg = {routerPtr, mazePtr, pathVectorListPtr}; // NB: Since ASF/PTLSim "REAL" is native execution, and since we are using // wallclock time, we want to be sure we read time inside the // simulator, or else we report native cycles spent on the benchmark // instead of simulator cycles. GOTO_SIM(); TIMER_T startTime; TIMER_READ(startTime); #ifdef OTM #pragma omp parallel { router_solve((void *)&routerArg); } #else thread_start(router_solve, (void*)&routerArg); #endif TIMER_T stopTime; TIMER_READ(stopTime); // NB: As above, timer reads must be done inside of the simulated region // for PTLSim/ASF GOTO_REAL(); long numPathRouted = 0; list_iter_t it; list_iter_reset(&it, pathVectorListPtr); while (list_iter_hasNext(&it, pathVectorListPtr)) { vector_t* pathVectorPtr = (vector_t*)list_iter_next(&it, pathVectorListPtr); numPathRouted += vector_getSize(pathVectorPtr); } printf("Paths routed = %li\n", numPathRouted); printf("Elapsed time = %f seconds\n", TIMER_DIFF_SECONDS(startTime, stopTime)); /* * Check solution and clean up */ assert(numPathRouted <= numPathToRoute); bool status = maze_checkPaths(mazePtr, pathVectorListPtr, global_doPrint); assert(status); puts("Verification passed."); maze_free(mazePtr); router_free(routerPtr); TM_SHUTDOWN(); P_MEMORY_SHUTDOWN(); thread_shutdown(); MAIN_RETURN(0); }
int main(int argc, char * const argv[]) { int stream_sock[] = {0, 0, 0}; /* tcp4, tcp6, UNIX */ int stream_socklen = sizeof(stream_sock) / sizeof(stream_sock[0]); int dgram_sock[] = {0, 0}; /* udp4, udp6 */ int dgram_socklen = sizeof(dgram_sock) / sizeof(dgram_sock[0]); char id; unsigned short listenport = 2003; int ch; size_t numaggregators; size_t numcomputes; server *internal_submission; char *listeninterface = NULL; server **servers; char *allowed_chars = NULL; int i; enum { SUB, CUM } smode = CUM; if (gethostname(relay_hostname, sizeof(relay_hostname)) < 0) snprintf(relay_hostname, sizeof(relay_hostname), "127.0.0.1"); while ((ch = getopt(argc, argv, ":hvdmstf:i:l:p:w:b:q:S:c:H:")) != -1) { switch (ch) { case 'v': do_version(); break; case 'd': if (mode == TEST) { mode = DEBUGTEST; } else { mode = DEBUG; } break; case 'm': smode = SUB; break; case 's': mode = SUBMISSION; break; case 't': if (mode == DEBUG) { mode = DEBUGTEST; } else { mode = TEST; } break; case 'f': config = optarg; break; case 'i': listeninterface = optarg; break; case 'l': relay_logfile = optarg; break; case 'p': listenport = (unsigned short)atoi(optarg); if (listenport == 0) { fprintf(stderr, "error: port needs to be a number >0\n"); do_usage(1); } break; case 'w': workercnt = (char)atoi(optarg); if (workercnt <= 0) { fprintf(stderr, "error: workers needs to be a number >0\n"); do_usage(1); } break; case 'b': batchsize = atoi(optarg); if (batchsize <= 0) { fprintf(stderr, "error: batch size needs to be a number >0\n"); do_usage(1); } break; case 'q': queuesize = atoi(optarg); if (queuesize <= 0) { fprintf(stderr, "error: queue size needs to be a number >0\n"); do_usage(1); } break; case 'S': collector_interval = atoi(optarg); if (collector_interval <= 0) { fprintf(stderr, "error: sending interval needs to be " "a number >0\n"); do_usage(1); } break; case 'c': allowed_chars = optarg; break; case 'H': snprintf(relay_hostname, sizeof(relay_hostname), "%s", optarg); break; case '?': case ':': do_usage(1); break; case 'h': default: do_usage(0); break; } } if (optind == 1 || config == NULL) do_usage(1); /* seed randomiser for dispatcher and aggregator "splay" */ srand(time(NULL)); if (workercnt == 0) workercnt = mode == SUBMISSION ? 2 : get_cores(); /* any_of failover maths need batchsize to be smaller than queuesize */ if (batchsize > queuesize) { fprintf(stderr, "error: batchsize must be smaller than queuesize\n"); exit(-1); } if (relay_logfile != NULL && mode != TEST && mode != DEBUGTEST) { FILE *f = fopen(relay_logfile, "a"); if (f == NULL) { fprintf(stderr, "error: failed to open logfile '%s': %s\n", relay_logfile, strerror(errno)); exit(-1); } relay_stdout = f; relay_stderr = f; } else { relay_stdout = stdout; relay_stderr = stderr; } relay_can_log = 1; logout("starting carbon-c-relay v%s (%s), pid=%d\n", VERSION, GIT_VERSION, getpid()); fprintf(relay_stdout, "configuration:\n"); fprintf(relay_stdout, " relay hostname = %s\n", relay_hostname); fprintf(relay_stdout, " listen port = %u\n", listenport); if (listeninterface != NULL) fprintf(relay_stdout, " listen interface = %s\n", listeninterface); fprintf(relay_stdout, " workers = %d\n", workercnt); fprintf(relay_stdout, " send batch size = %d\n", batchsize); fprintf(relay_stdout, " server queue size = %d\n", queuesize); fprintf(relay_stdout, " statistics submission interval = %ds\n", collector_interval); if (allowed_chars != NULL) fprintf(relay_stdout, " extra allowed characters = %s\n", allowed_chars); if (mode == DEBUG || mode == DEBUGTEST) fprintf(relay_stdout, " debug = true\n"); else if (mode == SUBMISSION) fprintf(relay_stdout, " submission = true\n"); fprintf(relay_stdout, " routes configuration = %s\n", config); fprintf(relay_stdout, "\n"); if (router_readconfig(&clusters, &routes, config, queuesize, batchsize) == 0) { logerr("failed to read configuration '%s'\n", config); return 1; } router_optimise(&routes); numaggregators = aggregator_numaggregators(); numcomputes = aggregator_numcomputes(); #define dbg (mode == DEBUG || mode == DEBUGTEST ? 2 : 0) if (numaggregators > 10 && !dbg) { fprintf(relay_stdout, "parsed configuration follows:\n" "(%zd aggregations with %zd computations omitted " "for brevity)\n", numaggregators, numcomputes); router_printconfig(relay_stdout, 0, clusters, routes); } else { fprintf(relay_stdout, "parsed configuration follows:\n"); router_printconfig(relay_stdout, 1 + dbg, clusters, routes); } fprintf(relay_stdout, "\n"); /* shortcut for rule testing mode */ if (mode == TEST || mode == DEBUGTEST) { char metricbuf[METRIC_BUFSIZ]; char *p; fflush(relay_stdout); while (fgets(metricbuf, sizeof(metricbuf), stdin) != NULL) { if ((p = strchr(metricbuf, '\n')) != NULL) *p = '\0'; router_test(metricbuf, routes); } exit(0); } if (signal(SIGINT, exit_handler) == SIG_ERR) { logerr("failed to create SIGINT handler: %s\n", strerror(errno)); return 1; } if (signal(SIGTERM, exit_handler) == SIG_ERR) { logerr("failed to create SIGTERM handler: %s\n", strerror(errno)); return 1; } if (signal(SIGQUIT, exit_handler) == SIG_ERR) { logerr("failed to create SIGQUIT handler: %s\n", strerror(errno)); return 1; } if (signal(SIGHUP, hup_handler) == SIG_ERR) { logerr("failed to create SIGHUP handler: %s\n", strerror(errno)); return 1; } if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) { logerr("failed to ignore SIGPIPE: %s\n", strerror(errno)); return 1; } workers = malloc(sizeof(dispatcher *) * (1 + workercnt + 1)); if (workers == NULL) { logerr("failed to allocate memory for workers\n"); return 1; } if (bindlisten(stream_sock, &stream_socklen, dgram_sock, &dgram_socklen, listeninterface, listenport) < 0) { logerr("failed to bind on port %s:%d: %s\n", listeninterface == NULL ? "" : listeninterface, listenport, strerror(errno)); return -1; } for (ch = 0; ch < stream_socklen; ch++) { if (dispatch_addlistener(stream_sock[ch]) != 0) { logerr("failed to add listener\n"); return -1; } } for (ch = 0; ch < dgram_socklen; ch++) { if (dispatch_addlistener_udp(dgram_sock[ch]) != 0) { logerr("failed to listen to datagram socket\n"); return -1; } } if ((workers[0] = dispatch_new_listener()) == NULL) logerr("failed to add listener\n"); if (allowed_chars == NULL) allowed_chars = "-_:#"; logout("starting %d workers\n", workercnt); for (id = 1; id < 1 + workercnt; id++) { workers[id + 0] = dispatch_new_connection(routes, allowed_chars); if (workers[id + 0] == NULL) { logerr("failed to add worker %d\n", id); break; } } workers[id + 0] = NULL; if (id < 1 + workercnt) { logerr("shutting down due to errors\n"); keep_running = 0; } /* server used for delivering metrics produced inside the relay, * that is collector (statistics) and aggregator (aggregations) */ if ((internal_submission = server_new("internal", listenport, CON_PIPE, NULL, 3000 + (numcomputes * 3), batchsize)) == NULL) { logerr("failed to create internal submission queue, shutting down\n"); keep_running = 0; } if (numaggregators > 0) { logout("starting aggregator\n"); if (!aggregator_start(internal_submission)) { logerr("shutting down due to failure to start aggregator\n"); keep_running = 0; } } logout("starting statistics collector\n"); collector_start(&workers[1], clusters, internal_submission, smode == CUM); logout("startup sequence complete\n"); /* workers do the work, just wait */ while (keep_running) sleep(1); logout("shutting down...\n"); /* make sure we don't accept anything new anymore */ for (ch = 0; ch < stream_socklen; ch++) dispatch_removelistener(stream_sock[ch]); destroy_usock(listenport); logout("listeners for port %u closed\n", listenport); /* since workers will be freed, stop querying the structures */ collector_stop(); logout("collector stopped\n"); if (numaggregators > 0) { aggregator_stop(); logout("aggregator stopped\n"); } server_shutdown(internal_submission); /* give a little time for whatever the collector/aggregator wrote, * to be delivered by the dispatchers */ usleep(500 * 1000); /* 500ms */ /* make sure we don't write to our servers any more */ logout("stopped worker"); for (id = 0; id < 1 + workercnt; id++) dispatch_stop(workers[id + 0]); for (id = 0; id < 1 + workercnt; id++) { dispatch_shutdown(workers[id + 0]); fprintf(relay_stdout, " %d", id + 1); fflush(relay_stdout); } fprintf(relay_stdout, "\n"); router_shutdown(); servers = router_getservers(clusters); logout("stopped server"); for (i = 0; servers[i] != NULL; i++) server_stop(servers[i]); for (i = 0; servers[i] != NULL; i++) { server_shutdown(servers[i]); fprintf(relay_stdout, " %d", i + 1); fflush(relay_stdout); } fprintf(relay_stdout, "\n"); logout("routing stopped\n"); router_free(clusters, routes); free(workers); return 0; }