int main(){ printf("HOST: START\n"); // Initialize various host tables once. init_host_tables(); int i = 0; unsigned int j = 0, h,k; int ret[NUM_AVAILABLE_HETERO_CPUS]; Hint * ptr; Data3 input3[NUM_AVAILABLE_HETERO_CPUS]; printf("HOST: Creating thread & attribute structures\n"); hthread_t * child = (hthread_t *) malloc(sizeof(hthread_t) * NUM_AVAILABLE_HETERO_CPUS); hthread_attr_t * attr = (hthread_attr_t *) malloc(sizeof(hthread_attr_t) * NUM_AVAILABLE_HETERO_CPUS); assert (child != NULL); assert (attr != NULL); #ifdef TEST_PR printf("------------------------------------\n"); printf("HOST: Testing PR\n"); // Set up attributes for a hardware thread for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); } // Creating threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { if (thread_create (&child[i], &attr[i],test_PR_thread_FUNC_ID, (void *)(NUM_TRIALS), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } } // Joining threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } if (ret[i] != SUCCESS) printf("Thread %02d Failed: %d\n",i, ret[i]); } printf("HOST: Done\n"); #endif #ifdef USER_SORT int list[NUM_AVAILABLE_HETERO_CPUS][LIST_LENGTH]; printf("------------------------------------\n"); printf("HOST: Testing SORT\n"); // initialized the list for (j = 0; j < NUM_TRIALS; j++) { for (h = 0; h < NUM_AVAILABLE_HETERO_CPUS; h++) { for (i = 0; i < LIST_LENGTH; i++) { //list[h][i] = rand() % 1000; list[h][i] = LIST_LENGTH-i; } } #if 0 printf("Printing original lists\n"); for (h = 0; h < NUM_AVAILABLE_HETERO_CPUS; h++) { printf("List[%d]: ", h); for (i = 0; i < LIST_LENGTH; i++) { printf("..%d", list[h][i]); } printf("\n"); } #endif // Set up attributes for a hardware thread for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); } // Creating threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { if (thread_create (&child[i], &attr[i],sort_thread_FUNC_ID, (void *)(&list[i][0]), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } } // Joining threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } //printf("Thread %02d Result = %d\n",i,ret[i]); } // Check results //printf("Now checking the lists\n"); for (h = 0; h < NUM_AVAILABLE_HETERO_CPUS; h++) { //printf("List[%d]: ", h); //for (i = LIST_LENGTH - (LIST_LENGTH-1); i < LIST_LENGTH; i++) { for (i = 0; i < LIST_LENGTH-1; i++) { //printf("..%d", list[h][i]); if (list[h][i] > list[h][i+1]) { printf("*"); printf("[TRIAL %d, Slave %d] Sort failed!\n", j, h); i = LIST_LENGTH; } } // Print last element //printf("..%d", list[h][i]); //printf("\n"); } } printf("HOST: Done\n"); #endif #ifdef USER_CRC printf("------------------------------------\n"); printf("HOST: Testing CRC\n"); Hint * input; Hint * check, index = 0; for (j = 0; j < NUM_TRIALS; j++) { for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { input = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); check = (Hint *) malloc(ARRAY_SIZE * sizeof(Hint)); assert(input != NULL); assert(check != NULL); // Initializing the data ptr = input; for(index = 0; index < ARRAY_SIZE; index++) { *ptr = (rand() % 1000)*8; *(check+index) = *ptr; ptr++; } // Generating the CRC of that data if (poly_crc(check, ARRAY_SIZE)) { printf("Host failed to generate CRC check of data\n"); while(1); } // Set up attributes for a hardware thread hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); // Creating threads if (thread_create (&child[i], &attr[i], crc_thread_FUNC_ID, (void *)input, #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } // Join on child thread int status; status = hthread_join(child[i], (void *) &ret[i]); if (status) { printf("Error joining child thread: %d\n", status); while(1); } //printf("Thread %02d Result = %d\n",i,ret[i]); // For CRC Results for ( h = 0; h < ARRAY_SIZE; h++) { if (*(input+h) != *(check+h) ) { printf("[TRIAL %d, Slave %d] CRC failed!\n", j, i); h = ARRAY_SIZE; } } // Release memory free(input); free(check); } } printf("HOST: Done\n"); #endif #ifdef USER_MATRIXMUL printf("------------------------------------\n"); printf("HOST: Testing MatrixMul\n"); data package[NUM_AVAILABLE_HETERO_CPUS]; for (j = 0; j < NUM_TRIALS; j++) { for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { package[i].dataA = (Hint*) malloc(MATRIX_SIZE * MATRIX_SIZE * sizeof(Hint)); package[i].dataB = (Hint*) malloc(MATRIX_SIZE * MATRIX_SIZE * sizeof(Hint)); package[i].dataC = (Hint*) malloc(MATRIX_SIZE * MATRIX_SIZE * sizeof(Hint)); assert(package[i].dataA != NULL); assert(package[i].dataB != NULL); assert(package[i].dataC != NULL); Huint l; for (k = 0; k < MATRIX_SIZE; k++) { for (l = 0; l < MATRIX_SIZE; l++) { package[i].dataA[k*MATRIX_SIZE + l] = k; package[i].dataB[k*MATRIX_SIZE + l] = l; package[i].dataC[k*MATRIX_SIZE + l] = 0; } } package[i].size = MATRIX_SIZE; unsigned int row, col; #if 0 printf("Original Matrix A: 0x%08x\n", package[i].dataA); for (row=0 ; row < MATRIX_SIZE; row++) { for (col=0 ; col < MATRIX_SIZE; col++) { printf("%02d ", package[i].dataA[row*MATRIX_SIZE+col]); } printf("\n"); } printf("Original Matrix B: 0x%08x\n", package[i].dataB); for (row=0 ; row < MATRIX_SIZE; row++) { for (col=0 ; col < MATRIX_SIZE; col++) { printf("%02d ", package[i].dataB[row*MATRIX_SIZE+col]); } printf("\n"); } printf("Original Matrix C: 0x%08x\n", package[i].dataC); for (row=0 ; row < MATRIX_SIZE; row++) { for (col=0 ; col < MATRIX_SIZE; col++) { printf("%02d ", package[i].dataC[row*MATRIX_SIZE+col]); } printf("\n"); } #endif // Set up attributes for a hardware thread hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); // Creating threads if (thread_create (&child[i], &attr[i], matrix_multiply_thread_FUNC_ID, (void *)(&package[i]), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } if (ret[i] != SUCCESS) printf("Return value for thread indicates an error!\n"); #if 0 printf("New Matrix C:\n"); for (row=0 ; row < MATRIX_SIZE; row++) { for (col=0 ; col < MATRIX_SIZE; col++) { printf("%02d ", package[i].dataC[row*MATRIX_SIZE+col]); } printf("\n"); } #endif // Check results Hint temp[MATRIX_SIZE][MATRIX_SIZE]; poly_matrix_mul(package[i].dataA, package[i].dataB, &temp, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE); int r, c; for (r=0 ; r < MATRIX_SIZE; r++) { for (c=0 ; c < MATRIX_SIZE; c++) { if ( temp[r][c] != package[i].dataC[r*MATRIX_SIZE + c]) { printf("[TRIAL %d, Slave %d] Matrix Mul failed!\n", j, i); r = c = MATRIX_SIZE; } } } // Release memory free(package[i].dataA); free(package[i].dataB); free(package[i].dataC); } } printf("HOST: Done\n"); #endif #ifdef USER_VECTORSUB printf("------------------------------------\n"); printf("HOST: Testing VectorSub\n"); for (j = 0; j < NUM_TRIALS; j++) { for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1; input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1; input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1; for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} } // Set up attributes for a hardware thread for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); } // Creating threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { if (thread_create (&child[i], &attr[i], vector_sub_thread_FUNC_ID, (void *)(&input3[i]), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } } // Joining threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } } // Check results for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { for (h=0 ; h < ARRAY_SIZE; h++) { if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] - input3[i].startAddr2[h])) { printf("[TRIAL %d, Slave %d] Vector Sub failed!\n", j, i); h = ARRAY_SIZE; } } // Release memory free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3); } } printf("HOST: Done\n"); #endif #ifdef USER_VECTORADD printf("------------------------------------\n"); printf("HOST: Testing VectorAdd\n"); for (j = 0; j < NUM_TRIALS; j++) { for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1; input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1; input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1; for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} } // Set up attributes for a hardware thread for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); } // Creating threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { if (thread_create (&child[i], &attr[i], vector_add_thread_FUNC_ID, (void *)(&input3[i]), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } } // Joining threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } } // Check results for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { for (h=0 ; h < ARRAY_SIZE; h++) { if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] + input3[i].startAddr2[h])) { printf("[TRIAL %d, Slave %d] Vector Add failed!\n", j, i); h = ARRAY_SIZE; } } // Release memory free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3); } } printf("HOST: Done\n"); #endif #ifdef USER_VECTORMUL printf("------------------------------------\n"); printf("HOST: Testing VectorMultiply\n"); for (j = 0; j < NUM_TRIALS; j++) { for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1; input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1; input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1; for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} } // Set up attributes for a hardware thread for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); } // Creating threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { if (thread_create (&child[i], &attr[i], vector_multiply_thread_FUNC_ID, (void *)(&input3[i]), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } } // Joining threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } } // Check results for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { for (h=0 ; h < ARRAY_SIZE; h++) { if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] * input3[i].startAddr2[h])) { printf("[TRIAL %d, Slave %d] Vector Multiply failed!\n", j, i); h = ARRAY_SIZE; } } // Release memory free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3); } } printf("HOST: Done\n"); #endif #ifdef USER_VECTORDIV printf("------------------------------------\n"); printf("HOST: Testing VectorDivide\n"); for (j = 0; j < NUM_TRIALS; j++) { for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1; input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1; input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1; for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000; /* printf( " %i \n",*ptr );*/} for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) (rand() % 1000) + 1; /* printf( " %i \n",*ptr );*/} } // Set up attributes for a hardware thread for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { hthread_attr_init(&attr[i]); hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE); } // Creating threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { if (thread_create (&child[i], &attr[i], vector_divide_thread_FUNC_ID, (void *)(&input3[i]), #ifndef HARDWARE_THREAD SOFTWARE_THREAD, #elif DYNAMIC DYNAMIC_HW, #else STATIC_HW0 + i, #endif 0)) { printf("hthread_create error on HW THREAD %d\n", i); while(1); } } // Joining threads for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { // Join on child thread if( hthread_join(child[i], (void *) &ret[i])) { printf("Error joining child thread\n"); while(1); } } // Check results for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { for (h=0 ; h < ARRAY_SIZE; h++) { // dividend was generated to be a non-zero number above. Hence, no need to check for / 0 if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] / input3[i].startAddr2[h])) { printf("[TRIAL %d, Slave %d] Vector Divide failed!\n", j, i); h = ARRAY_SIZE; } } // Release memory free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3); } } printf("HOST: Done\n"); #endif printf("END\n"); return 0; }
int main() { hthread_time_t time_create, time_start, time_stop, diff; // Thread attribute structures Huint sta[NUM_THREADS]; void* retval[NUM_THREADS]; hthread_t tid[NUM_THREADS]; hthread_attr_t attr[NUM_THREADS]; targ_t targ[NUM_THREADS]; int my_dct_matrix[BLOCK_SIZE][BLOCK_SIZE] = { {23170, 23170, 23170, 23170, 23170, 23170, 23170, 23170 }, {32138, 27246, 18205, 6393, -6393, -18205, -27246, -32138 }, {30274, 12540, -12540, -30274, -30274, -12540, 12540, 30274 }, {27246, -6393, -32138, -18205, 18205, 32138, 6393, -27246 }, {23170, -23170, -23170, 23170, 23170, -23170, -23170, 23170 }, {18205, -32138, 6393, 27246, -27246, -6393, 32138, -18205 }, {12540, -30274, 30274, -12540, -12540, 30274, -30274, 12540 }, {6393 , -18205, 27246, -32138, 32138, -27246, 18205, -6393 } }; int my_dct_matrix_trans[BLOCK_SIZE][BLOCK_SIZE] = { {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0} }; int my_temp[BLOCK_SIZE][BLOCK_SIZE]; int my_input[BLOCK_SIZE][BLOCK_SIZE]; int my_intermediate_dct[BLOCK_SIZE][BLOCK_SIZE]; int my_intermediate_idct[BLOCK_SIZE][BLOCK_SIZE]; int my_output[BLOCK_SIZE][BLOCK_SIZE]; int my_idct_output[BLOCK_SIZE][BLOCK_SIZE]; int my_scale_factor = 16; int i,j; for (j = 0; j < NUM_THREADS; j++) { // Initialize the attributes for the threads hthread_attr_init( &attr[j] ); } // Calculate transpose of dct_matrix initialize_dct_matrix(my_dct_matrix, my_dct_matrix_trans); // Initialize input for (i = 0; i < BLOCK_SIZE; i++) { for (j = 0; j < BLOCK_SIZE; j++) { my_input[i][j] = i+j; } } // Fill in thread arguments for an initial DCT run targ[0].scale_factor = my_scale_factor; targ[0].input = my_input; targ[0].intermediate = my_intermediate_dct; targ[0].output = my_temp; targ[0].coeff_matrix = my_dct_matrix; targ[0].coeff_matrix_trans = my_dct_matrix_trans; dct_thread(&targ[0]); printf("\r\nOriginal Input:\r\n"); print_matrix(my_input); printf("\r\nOriginal DCT:\r\n"); print_matrix(my_temp); printf("**************************************************\r\n"); // Fill in thread arguments targ[0].scale_factor = my_scale_factor; targ[0].input = my_input; targ[0].intermediate = my_intermediate_dct; targ[0].output = my_output; targ[0].coeff_matrix = my_dct_matrix; targ[0].coeff_matrix_trans = my_dct_matrix_trans; targ[1].scale_factor = my_scale_factor; targ[1].input = my_temp; targ[1].intermediate = my_intermediate_idct; targ[1].output = my_idct_output; targ[1].coeff_matrix = my_dct_matrix; targ[1].coeff_matrix_trans = my_dct_matrix_trans; // Start timing thread create time_create = hthread_time_get(); // Peform DCT sta[0] = thread_create(&tid[0], &attr[0], dct_thread_FUNC_ID, (void *) &targ[0], STATIC_HW0, 0); // Peform IDCT sta[1] = thread_create(&tid[1], &attr[1], idct_thread_FUNC_ID, (void *) &targ[1], STATIC_HW1, 0); // Allow created threads to begin running and start timer time_start = hthread_time_get(); // Wait for threads to complete hthread_join(tid[0],&retval[0]); hthread_join(tid[1],&retval[1]); // Stop timer time_stop = hthread_time_get(); printf("\r\nDCT (retval = 0x%08x):\r\n",(unsigned int)retval[0]); print_matrix(my_output); printf("\r\nIDCT (retval = 0x%08x):\r\n",(unsigned int)retval[1]); print_matrix(my_idct_output); printf("*********************************\n"); printf("Create time = %llu\n",time_create); printf("Start time = %llu\n",time_start); printf("Stop time = %llu\n",time_stop); printf("*********************************\n"); hthread_time_diff(diff,time_start, time_create); printf("Creation time (|Start - Create|) usec = %f\n",hthread_time_usec(diff)); hthread_time_diff(diff,time_stop, time_start); printf("Elapsed time (|Stop - Start|) usec = %f\n",hthread_time_usec(diff)); hthread_time_diff(diff,time_stop, time_create); printf("Total time (|Create - Stop|) usec = %f\n",hthread_time_usec(diff)); hthread_time_t * slave_time = (hthread_time_t *) (attr[0].hardware_addr - HT_CMD_HWTI_COMMAND + HT_CMD_VHWTI_EXEC_TIME); printf("Time reported by slave nano kernel #0 = %f usec\n", hthread_time_usec(*slave_time)); slave_time = (hthread_time_t *) (attr[1].hardware_addr - HT_CMD_HWTI_COMMAND + HT_CMD_VHWTI_EXEC_TIME); printf("Time reported by slave nano kernel #1 = %f usec\n", hthread_time_usec(*slave_time)); return 0; }
int run_tests() { // Timer variables xps_timer_t timer; int time_create, time_start, time_unlock, time_stop; // Mutex hthread_mutex_t * mutex = (hthread_mutex_t*)malloc( sizeof(hthread_mutex_t) ); hthread_mutex_init( mutex, NULL ); float min; // Thread attribute structures Huint sta[NUM_THREADS]; void* retval[NUM_THREADS]; hthread_t tid[NUM_THREADS]; hthread_attr_t attr[NUM_THREADS]; targ_t thread_arg[NUM_THREADS]; // Setup Cache XCache_DisableDCache(); XCache_EnableICache(0xc0000801); // Create timer xps_timer_create(&timer, (int*)0x20400000); // Start timer xps_timer_start(&timer); // ************************************************************************************* extern unsigned char intermediate[]; extern unsigned int min_handle_offset; unsigned int min_handle = (min_handle_offset) + (unsigned int)(&intermediate); // ************************************************************************************* printf("Code start address = 0x%08x\n", (unsigned int)&intermediate); int i = 0; float main_array[ARRAY_LENGTH]; printf("Addr of array = 0x%08x\n",(unsigned int)&main_array[0]); for (i = 0; i < ARRAY_LENGTH; i++) { main_array[i] = (i+2)*3.14f; } int num_items = ARRAY_LENGTH/NUM_THREADS; int extra_items = ARRAY_LENGTH - (num_items*NUM_THREADS); float * start_addr = &main_array[0]; for (i = 0; i < NUM_THREADS; i++) { // Initialize the attributes for the hardware threads hthread_attr_init( &attr[i] ); hthread_attr_sethardware( &attr[i], (void*)base_array[i] ); // Initialize thread arguments thread_arg[i].num_items = num_items; thread_arg[i].data_ptr = start_addr; thread_arg[i].min_mutex = mutex; thread_arg[i].min = &min; start_addr+=num_items; } // Add in extra items for the last thread if needed thread_arg[i-1].num_items += extra_items; int num_ops = 0; for( num_ops = 0; num_ops < 2; num_ops = num_ops + 1) { printf("******* Round %d ********\n",num_ops); #ifdef USE_MB_THREAD printf("**** MB-based Threads ****\n"); #else printf("**** PPC-based Threads ****\n"); #endif min = 9999999; // Lock mutex before hand so that timing will not include thread creation time hthread_mutex_lock(mutex); // Start timing thread create time_create = xps_timer_read_counter(&timer); for (i = 0; i < NUM_THREADS; i++) { // Create the worker threads #ifdef USE_MB_THREAD // Create MB Thread sta[i] = hthread_create( &tid[i], &attr[i], (void*)(min_handle), (void*)(&thread_arg[i]) ); #else // Create SW Thread sta[i] = hthread_create( &tid[i], NULL, min_thread, (void*)(&thread_arg[i]) ); #endif } // Allow created threads to begin running and start timer time_start = xps_timer_read_counter(&timer); hthread_mutex_unlock(mutex); time_unlock = xps_timer_read_counter(&timer); // Wait for the threads to exit //printf( "Waiting for thread(s) to complete... \n" ); for (i = 0; i < NUM_THREADS; i++) { hthread_join( tid[i], &retval[i] ); } time_stop = xps_timer_read_counter(&timer); // Display results printf("Min = %f\n",min); for (i = 0; i < NUM_THREADS; i++) { printf("TID = 0x%08x, status = 0x%08x, retval = 0x%08x\n",tid[i],sta[i],(Huint)retval[i]); } printf("*********************************\n"); printf("Create time = %u\n",time_create); printf("Start time = %u\n",time_start); printf("Unlock time = %u\n",time_unlock); printf("Stop time = %u\n",time_stop); printf("*********************************\n"); printf("Creation time (|Start - Create|) = %u\n",time_start - time_create); printf("Unlock time (|Unlock - Start|) = %u\n",time_unlock - time_start); printf("Elapsed time (|Stop - Start|) = %u\n",time_stop - time_start); } hthread_mutex_destroy( mutex ); free( mutex ); // Clean up the attribute structures for (i = 0; i < NUM_THREADS; i++) { hthread_attr_destroy( &attr[i] ); } printf ("-- Complete --\n"); // Return from main return 0; }