int main(int argc, char ** argv) { int i; int ret; int hw_threads; int sw_threads; int running_threads; int buffer_size; int slice_size; unsigned int *data, *copy; timing_t t_start, t_stop; ms_t t_generate; ms_t t_sort; ms_t t_merge; ms_t t_check; if ((argc < 4) || (argc > 4)) { print_help(); exit(1); } // we have exactly 3 arguments now... hw_threads = atoi(argv[1]); sw_threads = atoi(argv[2]); // Base unit is bytes. Use macros TO_WORDS, TO_PAGES and TO_BLOCKS for conversion. buffer_size = atoi(argv[3])*PAGE_SIZE*PAGES_PER_THREAD; slice_size = PAGE_SIZE*PAGES_PER_THREAD; running_threads = hw_threads + sw_threads; //int gettimeofday(struct timeval *tv, struct timezone *tz); // init mailboxes mbox_init(&mb_start,TO_BLOCKS(buffer_size)); mbox_init(&mb_stop ,TO_BLOCKS(buffer_size)); // init reconos and communication resources reconos_init(14,15); res[0].type = RECONOS_TYPE_MBOX; res[0].ptr = &mb_start; res[1].type = RECONOS_TYPE_MBOX; res[1].ptr = &mb_stop; printf("Creating %i hw-threads: ", hw_threads); fflush(stdout); for (i = 0; i < hw_threads; i++) { printf(" %i",i); fflush(stdout); reconos_hwt_setresources(&(hwt[i]),res,2); reconos_hwt_create(&(hwt[i]),i,NULL); } printf("\n"); // init software threads printf("Creating %i sw-threads: ",sw_threads); fflush(stdout); for (i = 0; i < sw_threads; i++) { printf(" %i",i); fflush(stdout); pthread_attr_init(&swt_attr[i]); pthread_create(&swt[i], &swt_attr[i], sort_thread, (void*)res); } printf("\n"); //print_mmu_stats(); // create pages and generate data t_start = gettime(); printf("malloc page aligned ...\n"); data = malloc_page_aligned(TO_PAGES(buffer_size)); copy = malloc_page_aligned(TO_PAGES(buffer_size)); printf("generate data ...\n"); generate_data( data, TO_WORDS(buffer_size)); memcpy(copy,data,TO_WORDS(buffer_size)*4); t_stop = gettime(); t_generate = calc_timediff_ms(t_start,t_stop); // print data of first page printf("Printing of generated data skipped. \n"); //print_data(data, TO_WORDS(buffer_size)); // Start sort threads t_start = gettime(); printf("Putting %i blocks into job queue: ", TO_BLOCKS(buffer_size)); fflush(stdout); for (i=0; i<TO_BLOCKS(buffer_size); i++) { printf(" %i",i); fflush(stdout); mbox_put(&mb_start,(unsigned int)data+(i*BLOCK_SIZE)); } printf("\n"); // Wait for results printf("Waiting for %i acknowledgements: ", TO_BLOCKS(buffer_size)); fflush(stdout); for (i=0; i<TO_BLOCKS(buffer_size); i++) { printf(" %i",i); fflush(stdout); ret = mbox_get(&mb_stop); } printf("\n"); t_stop = gettime(); t_sort = calc_timediff_ms(t_start,t_stop); // merge data t_start = gettime(); printf("Merging sorted data slices...\n"); unsigned int * temp = malloc_page_aligned(TO_PAGES(buffer_size)); //printf("Data buffer at address %p \n", (void*)data); //printf("Address of temporary merge buffer: %p\n", (void*)temp); //printf("Total size of data in bytes: %i\n",buffer_size); //printf("Size of a sorting block in bytes: %i\n",BLOCK_SIZE); data = recursive_merge( data, temp, TO_WORDS(buffer_size), TO_WORDS(BLOCK_SIZE), simple_merge ); t_stop = gettime(); t_merge = calc_timediff_ms(t_start,t_stop); // check data //data[0] = 6666; // manual fault t_start = gettime(); printf("Checking sorted data: ... "); fflush(stdout); ret = check_data( data, copy, TO_WORDS(buffer_size)); if (ret >= 0) { printf("failure at word index %i\n", -ret); printf("expected 0x%08X found 0x%08X\n",copy[ret],data[ret]); printf("dumping the first 2048 words:\n"); for(i = 0; i < 2048; i++) { printf("%08X ",data[i]); if((i % 8) == 7) printf("\n"); } } else { printf("success\n"); //print_data(data, TO_WORDS(buffer_size)); } t_stop = gettime(); t_check = calc_timediff_ms(t_start,t_stop); // terminate all threads printf("Sending terminate message to %i threads:", running_threads); fflush(stdout); for (i=0; i<running_threads; i++) { printf(" %i",i); fflush(stdout); mbox_put(&mb_start,UINT_MAX); } printf("\n"); printf("Waiting for termination...\n"); for (i=0; i<hw_threads; i++) { pthread_join(hwt[i].delegate,NULL); } for (i=0; i<sw_threads; i++) { pthread_join(swt[i],NULL); } printf("\n"); print_mmu_stats(); printf( "Running times (size: %d words, %d hw-threads, %d sw-threads):\n" "\tGenerate data: %lu ms\n" "\tSort data : %lu ms\n" "\tMerge data : %lu ms\n" "\tCheck data : %lu ms\n" "Total computation time (sort & merge): %lu ms\n", TO_WORDS(buffer_size), hw_threads, sw_threads, t_generate, t_sort, t_merge, t_check, t_sort + t_merge ); //free(data); // Memory Leak on variable data!!! return 0; }
int main(int argc, char ** argv) { int i; int ret; int hw_threads; int sw_threads; int running_threads; int simulation_steps; single_pendulum_simple_state_t *data, *expected; int success; int iteration; timing_t t_start, t_stop; ms_t t_generate; ms_t t_calculate; ms_t t_check; // parse options while (1) { int c; static struct option long_options[] = { { "help", no_argument, &only_print_help, 1 }, { "without-reconos", no_argument, &without_reconos, 1 }, { "without-memory", no_argument, &without_memory, 1 }, { "dont-flush", no_argument, &dont_flush, 1 }, { "iterations", required_argument, 0, 'n' }, { "iterations-in-thread", required_argument, 0, 'm' }, {0, 0, 0, 0} }; int option_index = 0; c = getopt_long (argc, argv, "n:m:h?", long_options, &option_index); if (c == -1) // end of options break; switch (c) { case 0: // flags are handled by getopt - nothing else to do break; case 'n': iterations = atoi(optarg); break; case 'm': iterations_in_thread = atoi(optarg); break; case 'h': case '?': only_print_help = 1; break; } } # ifdef WITHOUT_RECONOS without_reconos = 1; # endif verbose_progress = (iterations < 10); if (only_print_help || argc - optind > 2) { print_help(); exit(-1); } hw_threads = optind < argc ? atoi(argv[optind++]) : 1; sw_threads = optind < argc ? atoi(argv[optind++]) : 0; if (iterations < 1) { fprintf(stderr, "The number of iterations must be at least 1.\n"); exit(-1); } if (without_reconos && hw_threads > 0) { fprintf(stderr, "We cannot use hardware threads without reconOS!\n"); exit(-1); } if (without_memory && sw_threads > 0) { fprintf(stderr, "'--without-memory' only works with hardware threads!\n"); exit(-1); } if (sizeof(void*) > sizeof(uint32_t) && sw_threads + hw_threads > 1) { fprintf(stderr, "mboxes work with 4-byte values, so we have some trouble passing a " "pointer through them. We have to pass it in parts, but with more than one thread, " "the threads might get parts from different pointers. Therefore, you cannot use " "more than one thread on this platform.\n"); exit(-1); } if (iterations_in_thread > 1) { if (hw_threads > 0 && hw_threads + sw_threads > 1) { fprintf(stderr, "'--iterations-in-thread' can only be used with software threads " "or one hardware thread.\n"); exit(-1); } } running_threads = hw_threads + sw_threads; // We calculate one step per thread. simulation_steps = running_threads; //int gettimeofday(struct timeval *tv, struct timezone *tz); // init mailboxes mbox_init(&mb_start, simulation_steps); mbox_init(&mb_stop, simulation_steps); // init reconos and communication resources if (!without_reconos) reconos_init(); res[0].type = RECONOS_TYPE_MBOX; res[0].ptr = &mb_start; res[1].type = RECONOS_TYPE_MBOX; res[1].ptr = &mb_stop; printf("Creating %i hw-threads: ", hw_threads); fflush(stdout); for (i = 0; i < hw_threads; i++) { printf(" %i",i);fflush(stdout); reconos_hwt_setresources(&(hwt[i]),res,2); reconos_hwt_create(&(hwt[i]),i,NULL); } printf("\n"); // init software threads printf("Creating %i sw-threads: ",sw_threads); fflush(stdout); for (i = 0; i < sw_threads; i++) { printf(" %i",i);fflush(stdout); pthread_attr_init(&swt_attr[i]); pthread_create(&swt[i], &swt_attr[i], software_thread, (void*)res); } printf("\n"); //print_mmu_stats(); // create pages and generate data if (!without_memory) { t_start = gettime(); printf("malloc of %zu bytes...\n", BLOCK_SIZE * simulation_steps); data = malloc(BLOCK_SIZE * simulation_steps); expected = malloc(BLOCK_SIZE * simulation_steps); printf("generate data ...\n"); generate_data(data, expected, simulation_steps); t_stop = gettime(); t_generate = calc_timediff_ms(t_start, t_stop); printf("Printing of generated data skipped. \n"); //print_data(data, simulation_steps * sizeof(single_pendulum_simple_state_t) / sizeof(real_t)); } // Start sort threads t_start = gettime(); if (!verbose_progress) { printf("Calculating... "); fflush(stdout); } for (iteration=0; iteration<iterations; iteration++) { if (verbose_progress) { printf("Putting %i blocks into job queue: ", simulation_steps); fflush(stdout); } if (!without_reconos && !dont_flush) reconos_cache_flush(); for (i=0; i<simulation_steps; i++) { if (verbose_progress) { printf(" %i",i); fflush(stdout); } if (iterations_in_thread > 1 && hw_threads > 0) { // set iterations_in_thread for hw thread mbox_put(&mb_start, SET_ITERATIONS); mbox_put(&mb_start, iterations_in_thread); } mbox_put_pointer(&mb_start, (without_memory ? WITHOUT_MEMORY : &data[i])); } if (verbose_progress) printf("\n"); // Wait for results if (verbose_progress) { printf("Waiting for %i acknowledgements: ", simulation_steps); fflush(stdout); } for (i=0; i<simulation_steps; i++) { if (verbose_progress) { printf(" %i",i); fflush(stdout); } (void)mbox_get_pointer(&mb_stop); } if (verbose_progress) printf("\n"); } if (!verbose_progress) { printf("done\n"); fflush(stdout); } t_stop = gettime(); t_calculate = calc_timediff_ms(t_start,t_stop); if (!without_reconos) reconos_cache_flush(); // check data //data[0] = 6666; // manual fault if (!without_memory) { t_start = gettime(); printf("Checking data: ... "); fflush(stdout); ret = check_data(data, expected, simulation_steps); if (ret >= 0) { int job; printf("failure at word index %i\n", ret); printf("expected %5.3f found %5.3f\n", ((real_t*)expected)[ret], ((real_t*)data)[ret]); job = ret / BLOCK_SIZE; printf("dumping job %d:\n", job); printf(" expected:\t"); print_data (expected[job].all, REALS_PER_BLOCK); printf(" actual: \t"); print_data (data [job].all, REALS_PER_BLOCK); printf(" expected:\t"); print_reals(expected[job].all, REALS_PER_BLOCK); printf(" actual: \t"); print_reals(data [job].all, REALS_PER_BLOCK); success = 0; } else { printf("success\n"); //print_data(data, TO_WORDS(buffer_size)); success = 1; } t_stop = gettime(); t_check = calc_timediff_ms(t_start,t_stop); } // terminate all threads printf("Sending terminate message to %i threads:", running_threads); fflush(stdout); for (i=0; i<running_threads; i++) { printf(" %i",i);fflush(stdout); mbox_put_pointer(&mb_start, THREAD_EXIT); } printf("\n"); printf("Waiting for termination...\n"); for (i=0; i<hw_threads; i++) { pthread_join(hwt[i].delegate,NULL); } for (i=0; i<sw_threads; i++) { pthread_join(swt[i],NULL); } printf("\n"); if (!without_reconos) print_mmu_stats(); if (!without_memory) { printf( "Running times (size: %d jobs, %d hw-threads, %d sw-threads):\n" "\tGenerate data: %lu ms\n" "\tCalculation : %lu ms\n" "\tCheck data : %lu ms\n", simulation_steps, hw_threads, sw_threads, t_generate, t_calculate, t_check); } else { printf( "Running times (size: %d jobs, %d hw-threads, %d sw-threads):\n" "\tCalculation : %lu ms\n", simulation_steps, hw_threads, sw_threads, t_calculate); } free(data); free(expected); if (!without_reconos) reconos_cleanup(); if (success) return 0; else return 1; }