hthread_attr_t create_attr() { hthread_attr_t attr; hthread_attr_init (&attr); //For portability reasons when changing to pthreads hthread_attr_setdetachstate (&attr, HTHREAD_CREATE_JOINABLE); return attr; }
void * testThread ( void * arg ) { int retVal; struct testdata * data = (struct testdata *) arg; hthread_create( &data->thread, data->attr, data->function, NULL ); hthread_attr_setdetachstate( data->attr, HTHREAD_CREATE_DETACHED ); retVal = hthread_join( data->thread, NULL ); hthread_exit( (void *) retVal ); return NULL; }
/*-------------------------------------------------------------------*/ static INLINE int hthread_init_thread_attr( ATTR* pat, int state, const char* location ) { int rc; UNREFERENCED( location ); rc = hthread_attr_init( pat ); if (!rc) rc = hthread_attr_setstacksize( pat, HTHREAD_STACK_SIZE ); if (!rc) rc = hthread_attr_setdetachstate( pat, state ); return rc; }
int main() { unsigned int i = 0; int retVal; // Allocate NUM_THREADS threads hthread_t * tid = (hthread_t *) malloc(sizeof(hthread_t) * NUM_THREADS); hthread_attr_t * attr = (hthread_attr_t *) malloc(sizeof(hthread_attr_t) * NUM_AVAILABLE_HETERO_CPUS); assert(tid); assert(attr); // Set up attributes for a hardware thread for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); } unsigned int failed = 0; // Create hardware threads first for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Create thread -- Assuming that thread manager will give us // a TID = 2 every time since we are creating & joining 1 thread // at a time. if (microblaze_create( &tid[i], &attr[i], foo_thread_FUNC_ID, (void *) 2, i) ) { failed = 1; PRINT_ERROR(THREAD_HARDWARE_CREATE_FAILED); } if (hthread_join( tid[i], (void *) &retVal ) ) { failed = 1; PRINT_ERROR(THREAD_HARDWARE_JOIN_FAILED); } // Make sure the return value is equal to base_array[i] if (base_array[i] != ((unsigned int) retVal - HT_CMD_HWTI_COMMAND)) { failed = 1; PRINT_ERROR(THREAD_HARDWARE_INCORRECT_RETURN); } } // Create all threads as software threads for (i = 0; i < NUM_THREADS; i++) { // Create threads if (hthread_create( &tid[i], NULL, foo_thread, (void *) 2 )) { failed = 1; PRINT_ERROR(THREAD_SOFTWARE_CREATE_FAILED); } } // Now join on all software threads we just created for (i = 0; i < NUM_THREADS; i++) { // Join on thread if (hthread_join(tid[i], (void *) &retVal )) { failed = 1; PRINT_ERROR(THREAD_SOFTWARE_JOIN_FAILED); } } // Create NUM_THREADS threads // ----> Create hardware threads first for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Create threads if (microblaze_create( &tid[i], &attr[i], foo2_thread_FUNC_ID, (void *) i, i) ) { failed = 1; PRINT_ERROR(THREAD_HARDWARE_CREATE_FAILED); } } // ----> The remaining are software threads for (i = NUM_AVAILABLE_HETERO_CPUS; i < NUM_THREADS; i++) { // Create threads if (hthread_create( &tid[i], NULL, foo2_thread, (void *) i )) { failed = 1; PRINT_ERROR(THREAD_SOFTWARE_CREATE_FAILED); } } // Try to create more here --SHOULD FAIL!!! for (i = 0; i < NUM_THREADS; i++) { // If it does not fail if (hthread_create( &tid[i], NULL, foo2_thread, (void *) i ) == SUCCESS ) { failed = 1; PRINT_ERROR(THREAD_SOFTWARE_ERROR_FAILED); } } // Clean up- Join on the threads. for (i = 0; i < NUM_THREADS; i++) { // If it fails if (hthread_join(tid[i], (void *) &retVal )) { failed = 1; PRINT_ERROR(FINAL_JOIN_ERROR); } } // Test dynamic_create_smart #ifdef SPLIT_BRAM // Test microblaze_create_DMA and dyanmic_create_smart_DMA #endif if (failed) { PRINT_ERROR(TEST_FAILED); } else PRINT_ERROR(TEST_PASSED); free(tid); free(attr); return TEST_PASSED; }
int main() { printf("--- combined2 Kernel benchmark ---\n"); printf("Number of Slave processors: %d\n", NUM_AVAILABLE_HETERO_CPUS); #ifdef OPCODE_FLAGGING printf("-->Opcode flagging ENABLED\n"); #else printf("-->Opcode flagging DISABLED\n"); #endif // Initialize various host tables once. init_host_tables(); // Create Queue software thread hthread_t queue_tid; hthread_attr_t queue_attr; hthread_attr_init(&queue_attr); if (thread_create(&queue_tid, &queue_attr,queue_thread_FUNC_ID,(void *) &exec_time[0], SOFTWARE_THREAD,0)){ printf("Error creating Queue thread\n"); while(1); } // Reset create_overhead = 0; Huint i = 0; // PI pi_t thread_data[PI_NUM_THREADS]; for (i = 0; i < PI_NUM_THREADS; i++) { thread_data[i].pi = 0; thread_data[i].MaxIterations = PI_MAX_ITERATIONS; } // HISTOGRAM // Thread attribute structures histogram_t * thread_arg = (histogram_t *) malloc(sizeof(histogram_t) * HISTOGRAM_NUM_THREADS); assert (thread_arg != NULL); // Array Structures int my_array[HISTOGRAM_NUM_THREADS][ARR_SIZE]; int my_hist[HISTOGRAM_NUM_THREADS][NUM_BINS]; int num_ops = 0, j = 0;; // Initialize histograms for (j = 0; j < HISTOGRAM_NUM_THREADS; j++) { int i; for (i = 0; i < NUM_BINS; i++) my_hist[j][i] = 0; for (i = 0; i < ARR_SIZE; i++) my_array[j][i] = i+num_ops % MOD_VAL; } // Initialize thread argument for (j = 0; j < HISTOGRAM_NUM_THREADS; j++) { thread_arg[j].array = (int *)&my_array[j][0]; thread_arg[j].hist = (int *)&my_hist[j][0]; thread_arg[j].max_value = MOD_VAL - 1; thread_arg[j].min_value = 0; } // -------- DISTANCE --------------- // // Thread attribute structures distance_t distance_arg[DISTANCE_NUM_THREADS]; float vals_x0[DISTANCE_ARR_LENGTH]; float vals_x1[DISTANCE_ARR_LENGTH]; float vals_y0[DISTANCE_ARR_LENGTH]; float vals_y1[DISTANCE_ARR_LENGTH]; float vals_ds[DISTANCE_ARR_LENGTH]; for (j = 0; j < DISTANCE_ARR_LENGTH; j++) { vals_x0[j] = (float) DISTANCE_ARR_LENGTH - j; vals_y0[j] = (float) DISTANCE_ARR_LENGTH - j; vals_x1[j] = (float) j + 1; vals_y1[j] = (float) DISTANCE_ARR_LENGTH - j + 1; } // Initialize thread arguments int num_items = DISTANCE_ARR_LENGTH/DISTANCE_NUM_THREADS; int extra_items = DISTANCE_ARR_LENGTH - (num_items*DISTANCE_NUM_THREADS); for ( j= 0; j < DISTANCE_NUM_THREADS; j++) { distance_arg[j].x0s = &vals_x0[j*(num_items)]; distance_arg[j].y0s = &vals_y0[j*(num_items)]; distance_arg[j].x1s = &vals_x1[j*(num_items)]; distance_arg[j].y1s = &vals_y1[j*(num_items)]; distance_arg[j].distances = &vals_ds[j*(num_items)]; distance_arg[j].length = num_items; } // Add in extra items for the last thread if needed distance_arg[j-1].length += extra_items; // Matrix Multiply matrix_t matrix_arg[MATRIX_NUM_THREADS]; int n; for (n = 0; n < MATRIX_NUM_THREADS; n++) { for (i = 0; i < MATRIX_A_ROW; i++) { for (j = 0; j < MATRIX_A_COL; j++) { matrix_arg[n].matrixA[i][j] = i + j; matrix_arg[n].matrixB[i][j] = i + j; matrix_arg[n].matrixC[i][j] = 0; } } } // -------- Find MAx-------------- // max_t findmax_arg[FINDMAX_NUM_THREADS]; for (i = 0; i < FINDMAX_NUM_THREADS; i++) { findmax_arg[i].length = FINDMAX_LENGTH; findmax_arg[i].shift_amount = sizeof(findmax_arg[i].A[0]); for (j = 0; j < FINDMAX_LENGTH; j++) { findmax_arg[i].A[j] = (int) (rand() % FINDMAX_LENGTH); findmax_arg[i].B[j] = (int) (rand() % FINDMAX_LENGTH); findmax_arg[i].result[j] = 0; } } // Set all threads to detached for(i = 0; i < NUM_THREADS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate(&attr[i], HTHREAD_CREATE_DETACHED); } hthread_time_t start = hthread_time_get(); thread_create( &tid[0 ], &attr[0 ], distance_thread_FUNC_ID, (void *) &distance_arg[2], DYNAMIC_HW, 0); thread_create( &tid[1 ], &attr[1 ], pi_thread_FUNC_ID, (void *) &thread_data[10], DYNAMIC_HW, 0); thread_create( &tid[2 ], &attr[2 ], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[6], DYNAMIC_HW, 0); thread_create( &tid[3 ], &attr[3 ], distance_thread_FUNC_ID, (void *) &distance_arg[0], DYNAMIC_HW, 0); thread_create( &tid[4 ], &attr[4 ], distance_thread_FUNC_ID, (void *) &distance_arg[6], DYNAMIC_HW, 0); thread_create( &tid[5 ], &attr[5 ], pi_thread_FUNC_ID, (void *) &thread_data[4], DYNAMIC_HW, 0); thread_create( &tid[6 ], &attr[6 ], find_max_thread_FUNC_ID, (void *) &findmax_arg[9], DYNAMIC_HW, 0); thread_create( &tid[7 ], &attr[7 ], pi_thread_FUNC_ID, (void *) &thread_data[12], DYNAMIC_HW, 0); thread_create( &tid[8 ], &attr[8 ], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[0], DYNAMIC_HW, 0); thread_create( &tid[9 ], &attr[9 ], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 ); thread_create( &tid[10], &attr[10], find_max_thread_FUNC_ID, (void *) &findmax_arg[8], DYNAMIC_HW, 0); thread_create( &tid[11], &attr[11], pi_thread_FUNC_ID, (void *) &thread_data[2], DYNAMIC_HW, 0); thread_create( &tid[12], &attr[12], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 ); thread_create( &tid[13], &attr[13], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0); thread_create( &tid[14], &attr[14], distance_thread_FUNC_ID, (void *) &distance_arg[7], DYNAMIC_HW, 0); thread_create( &tid[15], &attr[15], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 ); thread_create( &tid[16], &attr[16], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[17], DYNAMIC_HW, 0); thread_create( &tid[17], &attr[17], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0); thread_create( &tid[18], &attr[18], find_max_thread_FUNC_ID, (void *) &findmax_arg[4], DYNAMIC_HW, 0); thread_create( &tid[19], &attr[19], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[11], DYNAMIC_HW, 0); thread_create( &tid[20], &attr[20], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[8], DYNAMIC_HW, 0); thread_create( &tid[21], &attr[21], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[13], DYNAMIC_HW, 0); thread_create( &tid[22], &attr[22], pi_thread_FUNC_ID, (void *) &thread_data[6], DYNAMIC_HW, 0); thread_create( &tid[23], &attr[23], distance_thread_FUNC_ID, (void *) &distance_arg[4], DYNAMIC_HW, 0); thread_create( &tid[24], &attr[24], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 ); thread_create( &tid[25], &attr[25], distance_thread_FUNC_ID, (void *) &distance_arg[1], DYNAMIC_HW, 0); thread_create( &tid[26], &attr[26], pi_thread_FUNC_ID, (void *) &thread_data[11], DYNAMIC_HW, 0); thread_create( &tid[27], &attr[27], find_max_thread_FUNC_ID, (void *) &findmax_arg[3], DYNAMIC_HW, 0); thread_create( &tid[28], &attr[28], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[15], DYNAMIC_HW, 0); thread_create( &tid[29], &attr[29], distance_thread_FUNC_ID, (void *) &distance_arg[8], DYNAMIC_HW, 0); thread_create( &tid[30], &attr[30], pi_thread_FUNC_ID, (void *) &thread_data[9], DYNAMIC_HW, 0); thread_create( &tid[31], &attr[31], find_max_thread_FUNC_ID, (void *) &findmax_arg[10], DYNAMIC_HW, 0); thread_create( &tid[32], &attr[32], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0); thread_create( &tid[33], &attr[33], pi_thread_FUNC_ID, (void *) &thread_data[1], DYNAMIC_HW, 0); thread_create( &tid[34], &attr[34], histogram_thread_FUNC_ID, (void*)(&thread_arg[1]),DYNAMIC_HW,0 ); thread_create( &tid[35], &attr[35], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 ); thread_create( &tid[36], &attr[36], distance_thread_FUNC_ID, (void *) &distance_arg[5], DYNAMIC_HW, 0); thread_create( &tid[37], &attr[37], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0); thread_create( &tid[38], &attr[38], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0); thread_create( &tid[39], &attr[39], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 ); thread_create( &tid[40], &attr[40], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 ); thread_create( &tid[41], &attr[41], pi_thread_FUNC_ID, (void *) &thread_data[0], DYNAMIC_HW, 0); thread_create( &tid[42], &attr[42], pi_thread_FUNC_ID, (void *) &thread_data[14], DYNAMIC_HW, 0); thread_create( &tid[43], &attr[43], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[12], DYNAMIC_HW, 0); thread_create( &tid[44], &attr[44], histogram_thread_FUNC_ID, (void*)(&thread_arg[2]),DYNAMIC_HW,0 ); thread_create( &tid[45], &attr[45], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[3], DYNAMIC_HW, 0); thread_create( &tid[46], &attr[46], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 ); thread_create( &tid[47], &attr[47], distance_thread_FUNC_ID, (void *) &distance_arg[3], DYNAMIC_HW, 0); thread_create( &tid[48], &attr[48], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[2], DYNAMIC_HW, 0); thread_create( &tid[49], &attr[49], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[16], DYNAMIC_HW, 0); thread_create( &tid[50], &attr[50], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[7], DYNAMIC_HW, 0); thread_create( &tid[51], &attr[51], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[5], DYNAMIC_HW, 0); thread_create( &tid[52], &attr[52], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 ); thread_create( &tid[53], &attr[53], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[14], DYNAMIC_HW, 0); thread_create( &tid[54], &attr[54], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[10], DYNAMIC_HW, 0); thread_create( &tid[55], &attr[55], pi_thread_FUNC_ID, (void *) &thread_data[13], DYNAMIC_HW, 0); thread_create( &tid[56], &attr[56], find_max_thread_FUNC_ID, (void *) &findmax_arg[12], DYNAMIC_HW, 0); thread_create( &tid[57], &attr[57], pi_thread_FUNC_ID, (void *) &thread_data[8], DYNAMIC_HW, 0); thread_create( &tid[58], &attr[58], find_max_thread_FUNC_ID, (void *) &findmax_arg[5], DYNAMIC_HW, 0); thread_create( &tid[59], &attr[59], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 ); thread_create( &tid[60], &attr[60], find_max_thread_FUNC_ID, (void *) &findmax_arg[1], DYNAMIC_HW, 0); thread_create( &tid[61], &attr[61], find_max_thread_FUNC_ID, (void *) &findmax_arg[0], DYNAMIC_HW, 0); thread_create( &tid[62], &attr[62], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[1], DYNAMIC_HW, 0); thread_create( &tid[63], &attr[63], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0); thread_create( &tid[64], &attr[64], pi_thread_FUNC_ID, (void *) &thread_data[7], DYNAMIC_HW, 0); thread_create( &tid[65], &attr[65], find_max_thread_FUNC_ID, (void *) &findmax_arg[11], DYNAMIC_HW, 0); thread_create( &tid[66], &attr[66], find_max_thread_FUNC_ID, (void *) &findmax_arg[6], DYNAMIC_HW, 0); thread_create( &tid[67], &attr[67], find_max_thread_FUNC_ID, (void *) &findmax_arg[2], DYNAMIC_HW, 0); thread_create( &tid[68], &attr[68], pi_thread_FUNC_ID, (void *) &thread_data[5], DYNAMIC_HW, 0); thread_create( &tid[69], &attr[69], histogram_thread_FUNC_ID, (void*)(&thread_arg[1]),DYNAMIC_HW,0 ); thread_create( &tid[70], &attr[70], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 ); thread_create( &tid[71], &attr[71], find_max_thread_FUNC_ID, (void *) &findmax_arg[7], DYNAMIC_HW, 0); thread_create( &tid[72], &attr[72], pi_thread_FUNC_ID, (void *) &thread_data[3], DYNAMIC_HW, 0); thread_create( &tid[73], &attr[73], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0); thread_create( &tid[74], &attr[74], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 ); thread_create( &tid[75], &attr[75], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[9], DYNAMIC_HW, 0); thread_create( &tid[76], &attr[76], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[4], DYNAMIC_HW, 0); thread_create( &tid[77], &attr[77], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0); // Wait until all threads are finished while(get_num_free_slaves() < NUM_AVAILABLE_HETERO_CPUS || thread_entries != 0) { if (thread_entries != 0) hthread_yield(); } hthread_time_t stop = hthread_time_get(); printf("---------------------------\n"); hthread_time_t diff; hthread_time_diff(diff, stop, start); printf("Total Execution Time: %.2f ms\n", hthread_time_msec(diff)); printf("Total Execution Time: %.2f us\n", hthread_time_usec(diff)); #if 0 // Grab the total number of calls statistic. printf("Total number of thread_create (DYNAMIC) calls: %d\n", total_calls); printf("---------------------------------------------------\n"); printf("Perfect Ratio: %03d / %03d = %0.2f\n", perfect_match_counter, total_calls, perfect_match_counter / (1.0f * total_calls)); printf("Best Ratio: %03d / %03d = %0.2f\n", best_match_counter, total_calls, best_match_counter / (1.0f * total_calls)); printf("Better Ratio: %03d / %03d = %0.2f\n", better_match_counter, total_calls, better_match_counter / (1.0f * total_calls)); printf("Possible Ratio: %03d / %03d = %0.2f\n", possible_match_counter, total_calls, possible_match_counter / (1.0f * total_calls)); perfect_match_counter = 0; best_match_counter = 0; better_match_counter = 0; possible_match_counter = 0; Huint hw_counter[NUM_AVAILABLE_HETERO_CPUS]; Huint sw_counter[NUM_AVAILABLE_HETERO_CPUS]; Huint pr_counter[NUM_AVAILABLE_HETERO_CPUS]; Huint total_hw_count = 0; Huint total_sw_count = 0; Huint total_pr_count = 0; for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hw_counter[i] = _hwti_get_accelerator_hw_counter(hwti_array[i]); sw_counter[i] = _hwti_get_accelerator_sw_counter(hwti_array[i]); pr_counter[i] = _hwti_get_accelerator_pr_counter(hwti_array[i]); total_hw_count += hw_counter[i]; total_sw_count += sw_counter[i]; total_pr_count += pr_counter[i]; // Manually Reset _hwti_set_accelerator_hw_counter(hwti_array[i], 0); _hwti_set_accelerator_sw_counter(hwti_array[i], 0); _hwti_set_accelerator_pr_counter(hwti_array[i], 0); } printf("Total HW Counter: %d\n", total_hw_count); printf("Total SW Counter: %d\n", total_sw_count); printf("Total PR Counter: %d\n", total_pr_count); printf("-----------------------\n"); if (total_hw_count) // if total_hw_count != 0 printf("Total PR Counter / HW Counter = %f\n", total_pr_count / (1.0 *total_hw_count)); printf("Total PR Counter / HW+SW Counter = %f\n", total_pr_count / (1.0 *(total_hw_count+total_sw_count))); #endif printf("Total OS overhead (thread_create) = %f msec\n", hthread_time_msec(create_overhead)); #if 0 hthread_time_t software_time = 0; for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { volatile hthread_time_t * temp = (hthread_time_t *) (hwti_array[i] + 0x100); printf("%d: Software Execution = %f msec\n",i, hthread_time_msec(*temp)); software_time += *temp; } printf("Total Software Execution = %f msec\n", hthread_time_msec(software_time)); #endif // Display thread times for (i = 0; i < NUM_THREADS; i++) { // Determine which slave ran this thread based on address Huint base = attr[i].hardware_addr - HT_HWTI_COMMAND_OFFSET; Huint slave_num = (base & 0x00FF0000) >> 16; printf("Execution time (TID : %d, Slave : %d, HW ADDRESS = 0x%08x)\n", tid[i], slave_num, attr[i].hardware_addr); } printf("--- Done ---\n"); return 0; }
int main(){ printf("HOST: START\n"); // Initialize various host tables once. init_host_tables(); int i = 0; unsigned int j = 0, h,k; int ret[NUM_AVAILABLE_HETERO_CPUS]; Hint * ptr; Data3 input3[NUM_AVAILABLE_HETERO_CPUS]; printf("HOST: Creating thread & attribute structures\n"); hthread_t * child = (hthread_t *) malloc(sizeof(hthread_t) * NUM_AVAILABLE_HETERO_CPUS); hthread_attr_t * attr = (hthread_attr_t *) malloc(sizeof(hthread_attr_t) * NUM_AVAILABLE_HETERO_CPUS); assert (child != NULL); assert (attr != NULL); #ifdef TEST_PR printf("------------------------------------\n"); printf("HOST: Testing PR\n"); // Set up attributes for a hardware thread for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); } // Creating threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { if (thread_create (&child[i], &attr[i],test_PR_thread_FUNC_ID, (void *)(NUM_TRIALS), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } } // Joining threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } if (ret[i] != SUCCESS) printf("Thread %02d Failed: %d\n",i, ret[i]); } printf("HOST: Done\n"); #endif #ifdef USER_SORT int list[NUM_AVAILABLE_HETERO_CPUS][LIST_LENGTH]; printf("------------------------------------\n"); printf("HOST: Testing SORT\n"); // initialized the list for (j = 0; j < NUM_TRIALS; j++) { for (h = 0; h < NUM_AVAILABLE_HETERO_CPUS; h++) { for (i = 0; i < LIST_LENGTH; i++) { //list[h][i] = rand() % 1000; list[h][i] = LIST_LENGTH-i; } } #if 0 printf("Printing original lists\n"); for (h = 0; h < NUM_AVAILABLE_HETERO_CPUS; h++) { printf("List[%d]: ", h); for (i = 0; i < LIST_LENGTH; i++) { printf("..%d", list[h][i]); } printf("\n"); } #endif // Set up attributes for a hardware thread for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); } // Creating threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { if (thread_create (&child[i], &attr[i],sort_thread_FUNC_ID, (void *)(&list[i][0]), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } } // Joining threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } //printf("Thread %02d Result = %d\n",i,ret[i]); } // Check results //printf("Now checking the lists\n"); for (h = 0; h < NUM_AVAILABLE_HETERO_CPUS; h++) { //printf("List[%d]: ", h); //for (i = LIST_LENGTH - (LIST_LENGTH-1); i < LIST_LENGTH; i++) { for (i = 0; i < LIST_LENGTH-1; i++) { //printf("..%d", list[h][i]); if (list[h][i] > list[h][i+1]) { printf("*"); printf("[TRIAL %d, Slave %d] Sort failed!\n", j, h); i = LIST_LENGTH; } } // Print last element //printf("..%d", list[h][i]); //printf("\n"); } } printf("HOST: Done\n"); #endif #ifdef USER_CRC printf("------------------------------------\n"); printf("HOST: Testing CRC\n"); Hint * input; Hint * check, index = 0; for (j = 0; j < NUM_TRIALS; j++) { for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { input = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); check = (Hint *) malloc(ARRAY_SIZE * sizeof(Hint)); assert(input != NULL); assert(check != NULL); // Initializing the data ptr = input; for(index = 0; index < ARRAY_SIZE; index++) { *ptr = (rand() % 1000)*8; *(check+index) = *ptr; ptr++; } // Generating the CRC of that data if (poly_crc(check, ARRAY_SIZE)) { printf("Host failed to generate CRC check of data\n"); while(1); } // Set up attributes for a hardware thread hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); // Creating threads if (thread_create (&child[i], &attr[i], crc_thread_FUNC_ID, (void *)input, #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } // Join on child thread int status; status = hthread_join(child[i], (void *) &ret[i]); if (status) { printf("Error joining child thread: %d\n", status); while(1); } //printf("Thread %02d Result = %d\n",i,ret[i]); // For CRC Results for ( h = 0; h < ARRAY_SIZE; h++) { if (*(input+h) != *(check+h) ) { printf("[TRIAL %d, Slave %d] CRC failed!\n", j, i); h = ARRAY_SIZE; } } // Release memory free(input); free(check); } } printf("HOST: Done\n"); #endif #ifdef USER_MATRIXMUL printf("------------------------------------\n"); printf("HOST: Testing MatrixMul\n"); data package[NUM_AVAILABLE_HETERO_CPUS]; for (j = 0; j < NUM_TRIALS; j++) { for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { package[i].dataA = (Hint*) malloc(MATRIX_SIZE * MATRIX_SIZE * sizeof(Hint)); package[i].dataB = (Hint*) malloc(MATRIX_SIZE * MATRIX_SIZE * sizeof(Hint)); package[i].dataC = (Hint*) malloc(MATRIX_SIZE * MATRIX_SIZE * sizeof(Hint)); assert(package[i].dataA != NULL); assert(package[i].dataB != NULL); assert(package[i].dataC != NULL); Huint l; for (k = 0; k < MATRIX_SIZE; k++) { for (l = 0; l < MATRIX_SIZE; l++) { package[i].dataA[k*MATRIX_SIZE + l] = k; package[i].dataB[k*MATRIX_SIZE + l] = l; package[i].dataC[k*MATRIX_SIZE + l] = 0; } } package[i].size = MATRIX_SIZE; unsigned int row, col; #if 0 printf("Original Matrix A: 0x%08x\n", package[i].dataA); for (row=0 ; row < MATRIX_SIZE; row++) { for (col=0 ; col < MATRIX_SIZE; col++) { printf("%02d ", package[i].dataA[row*MATRIX_SIZE+col]); } printf("\n"); } printf("Original Matrix B: 0x%08x\n", package[i].dataB); for (row=0 ; row < MATRIX_SIZE; row++) { for (col=0 ; col < MATRIX_SIZE; col++) { printf("%02d ", package[i].dataB[row*MATRIX_SIZE+col]); } printf("\n"); } printf("Original Matrix C: 0x%08x\n", package[i].dataC); for (row=0 ; row < MATRIX_SIZE; row++) { for (col=0 ; col < MATRIX_SIZE; col++) { printf("%02d ", package[i].dataC[row*MATRIX_SIZE+col]); } printf("\n"); } #endif // Set up attributes for a hardware thread hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); // Creating threads if (thread_create (&child[i], &attr[i], matrix_multiply_thread_FUNC_ID, (void *)(&package[i]), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } if (ret[i] != SUCCESS) printf("Return value for thread indicates an error!\n"); #if 0 printf("New Matrix C:\n"); for (row=0 ; row < MATRIX_SIZE; row++) { for (col=0 ; col < MATRIX_SIZE; col++) { printf("%02d ", package[i].dataC[row*MATRIX_SIZE+col]); } printf("\n"); } #endif // Check results Hint temp[MATRIX_SIZE][MATRIX_SIZE]; poly_matrix_mul(package[i].dataA, package[i].dataB, &temp, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE); int r, c; for (r=0 ; r < MATRIX_SIZE; r++) { for (c=0 ; c < MATRIX_SIZE; c++) { if ( temp[r][c] != package[i].dataC[r*MATRIX_SIZE + c]) { printf("[TRIAL %d, Slave %d] Matrix Mul failed!\n", j, i); r = c = MATRIX_SIZE; } } } // Release memory free(package[i].dataA); free(package[i].dataB); free(package[i].dataC); } } printf("HOST: Done\n"); #endif #ifdef USER_VECTORSUB printf("------------------------------------\n"); printf("HOST: Testing VectorSub\n"); for (j = 0; j < NUM_TRIALS; j++) { for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1; input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1; input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1; for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} } // Set up attributes for a hardware thread for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); } // Creating threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { if (thread_create (&child[i], &attr[i], vector_sub_thread_FUNC_ID, (void *)(&input3[i]), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } } // Joining threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } } // Check results for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { for (h=0 ; h < ARRAY_SIZE; h++) { if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] - input3[i].startAddr2[h])) { printf("[TRIAL %d, Slave %d] Vector Sub failed!\n", j, i); h = ARRAY_SIZE; } } // Release memory free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3); } } printf("HOST: Done\n"); #endif #ifdef USER_VECTORADD printf("------------------------------------\n"); printf("HOST: Testing VectorAdd\n"); for (j = 0; j < NUM_TRIALS; j++) { for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1; input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1; input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1; for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} } // Set up attributes for a hardware thread for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); } // Creating threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { if (thread_create (&child[i], &attr[i], vector_add_thread_FUNC_ID, (void *)(&input3[i]), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } } // Joining threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } } // Check results for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { for (h=0 ; h < ARRAY_SIZE; h++) { if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] + input3[i].startAddr2[h])) { printf("[TRIAL %d, Slave %d] Vector Add failed!\n", j, i); h = ARRAY_SIZE; } } // Release memory free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3); } } printf("HOST: Done\n"); #endif #ifdef USER_VECTORMUL printf("------------------------------------\n"); printf("HOST: Testing VectorMultiply\n"); for (j = 0; j < NUM_TRIALS; j++) { for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1; input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1; input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1; for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} } // Set up attributes for a hardware thread for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); } // Creating threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { if (thread_create (&child[i], &attr[i], vector_multiply_thread_FUNC_ID, (void *)(&input3[i]), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } } // Joining threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } } // Check results for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { for (h=0 ; h < ARRAY_SIZE; h++) { if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] * input3[i].startAddr2[h])) { printf("[TRIAL %d, Slave %d] Vector Multiply failed!\n", j, i); h = ARRAY_SIZE; } } // Release memory free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3); } } printf("HOST: Done\n"); #endif #ifdef USER_VECTORDIV printf("------------------------------------\n"); printf("HOST: Testing VectorDivide\n"); for (j = 0; j < NUM_TRIALS; j++) { for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1; input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1; input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1; for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) (rand() % 1000) + 1; /* printf( " %i \n",*ptr );*/} } // Set up attributes for a hardware thread for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); } // Creating threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { if (thread_create (&child[i], &attr[i], vector_divide_thread_FUNC_ID, (void *)(&input3[i]), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } } // Joining threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } } // Check results for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { for (h=0 ; h < ARRAY_SIZE; h++) { // dividend was generated to be a non-zero number above. Hence, no need to check for / 0 if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] / input3[i].startAddr2[h])) { printf("[TRIAL %d, Slave %d] Vector Divide failed!\n", j, i); h = ARRAY_SIZE; } } // Release memory free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3); } } printf("HOST: Done\n"); #endif printf("END\n"); return 0; }