hthread_attr_t create_attr() {

  hthread_attr_t attr;
  hthread_attr_init (&attr);
  //For portability reasons when changing to pthreads
  hthread_attr_setdetachstate (&attr, HTHREAD_CREATE_JOINABLE);

  return attr;
}
Esempio n. 2
0
void * testThread ( void * arg ) {
	int retVal;
	struct testdata * data = (struct testdata *) arg;
	
	hthread_create( &data->thread, data->attr, data->function, NULL );
    hthread_attr_setdetachstate( data->attr, HTHREAD_CREATE_DETACHED );
	retVal = hthread_join( data->thread, NULL );
	
	hthread_exit( (void *) retVal );
	return NULL;
}
Esempio n. 3
0
/*-------------------------------------------------------------------*/
static INLINE int hthread_init_thread_attr( ATTR* pat, int state,
                                            const char* location )
{
    int rc;
    UNREFERENCED( location );
    rc = hthread_attr_init( pat );
    if (!rc)
        rc = hthread_attr_setstacksize( pat, HTHREAD_STACK_SIZE );
    if (!rc)
        rc = hthread_attr_setdetachstate( pat, state );
    return rc;
}
int main() {
    unsigned int i = 0;
	int retVal;
    
    // Allocate NUM_THREADS threads
    hthread_t * tid = (hthread_t *) malloc(sizeof(hthread_t) * NUM_THREADS);
    hthread_attr_t * attr = (hthread_attr_t *) malloc(sizeof(hthread_attr_t) * NUM_AVAILABLE_HETERO_CPUS);

    assert(tid);
    assert(attr);
    
    // Set up attributes for a hardware thread
    for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++)
    { 
        hthread_attr_init(&attr[i]);
        hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);
    }
	
    unsigned int failed = 0;
	
    // Create hardware threads first
    for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
        // Create thread -- Assuming that thread manager will give us
        // a TID = 2 every time since we are creating & joining 1 thread 
        // at a time.
        if (microblaze_create( &tid[i], &attr[i], foo_thread_FUNC_ID, (void *) 2, i) ) {
            failed = 1;
            PRINT_ERROR(THREAD_HARDWARE_CREATE_FAILED);
        }
        if (hthread_join( tid[i], (void *) &retVal ) ) {
            failed = 1;
            PRINT_ERROR(THREAD_HARDWARE_JOIN_FAILED);
        }
        // Make sure the return value is equal to base_array[i]
        if (base_array[i] != ((unsigned int) retVal - HT_CMD_HWTI_COMMAND)) {
            failed = 1;
            PRINT_ERROR(THREAD_HARDWARE_INCORRECT_RETURN);
        }

    }
    
    // Create all threads as software threads
    for (i = 0; i < NUM_THREADS; i++) {
        // Create threads
	    if (hthread_create( &tid[i], NULL, foo_thread, (void *) 2 )) {
            failed = 1;
            PRINT_ERROR(THREAD_SOFTWARE_CREATE_FAILED);
        }
    }
    // Now join on all software threads we just created
    for (i = 0; i < NUM_THREADS; i++) {
        // Join on thread
	    if (hthread_join(tid[i], (void *) &retVal )) {
            failed = 1;
            PRINT_ERROR(THREAD_SOFTWARE_JOIN_FAILED);
        }
    }
    // Create NUM_THREADS threads
	// ----> Create hardware threads first
    for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
        // Create threads
        if (microblaze_create( &tid[i], &attr[i], foo2_thread_FUNC_ID, (void *) i, i) ) {
            failed = 1;
            PRINT_ERROR(THREAD_HARDWARE_CREATE_FAILED);
        }
    }

    // ----> The remaining are software threads
    for (i = NUM_AVAILABLE_HETERO_CPUS; i < NUM_THREADS; i++) {
        // Create threads
	    if (hthread_create( &tid[i], NULL, foo2_thread, (void *) i )) {
            failed = 1;
            PRINT_ERROR(THREAD_SOFTWARE_CREATE_FAILED);
        }
    }

    // Try to create more here --SHOULD FAIL!!!
    for (i = 0; i < NUM_THREADS; i++) {
        // If it does not fail
	    if (hthread_create( &tid[i], NULL, foo2_thread, (void *) i ) == SUCCESS ) {
            failed = 1;
            PRINT_ERROR(THREAD_SOFTWARE_ERROR_FAILED);
        }
    }

    // Clean up- Join on the threads.
    for (i = 0; i < NUM_THREADS; i++) {
        // If it fails
	    if (hthread_join(tid[i], (void *) &retVal )) {
            failed = 1;
            PRINT_ERROR(FINAL_JOIN_ERROR);
        }
    }

    // Test dynamic_create_smart
    

#ifdef SPLIT_BRAM
    // Test microblaze_create_DMA and dyanmic_create_smart_DMA
        
#endif
    
    if (failed) {
        PRINT_ERROR(TEST_FAILED);
    }
    else
        PRINT_ERROR(TEST_PASSED);

    free(tid);
    free(attr);

	return TEST_PASSED;
}
Esempio n. 5
0
int main() {
   
   printf("--- combined2 Kernel benchmark ---\n"); 
   printf("Number of Slave processors: %d\n", NUM_AVAILABLE_HETERO_CPUS);
#ifdef OPCODE_FLAGGING
   printf("-->Opcode flagging ENABLED\n");
#else
   printf("-->Opcode flagging DISABLED\n");
#endif
   // Initialize various host tables once.
   init_host_tables();
   
   // Create Queue software thread
   hthread_t queue_tid;
   hthread_attr_t queue_attr;
   hthread_attr_init(&queue_attr);
   if (thread_create(&queue_tid, &queue_attr,queue_thread_FUNC_ID,(void *) &exec_time[0], SOFTWARE_THREAD,0)){
      printf("Error creating Queue thread\n");
      while(1);
   }
   // Reset
   create_overhead = 0;

   Huint i = 0;
   // PI
   pi_t thread_data[PI_NUM_THREADS];
   for (i = 0; i < PI_NUM_THREADS; i++) {
      thread_data[i].pi = 0;
      thread_data[i].MaxIterations = PI_MAX_ITERATIONS;
   }

   // HISTOGRAM
   // Thread attribute structures
   histogram_t * thread_arg = (histogram_t *) malloc(sizeof(histogram_t) * HISTOGRAM_NUM_THREADS);
   assert (thread_arg != NULL);

   // Array Structures
   int my_array[HISTOGRAM_NUM_THREADS][ARR_SIZE];
   int my_hist[HISTOGRAM_NUM_THREADS][NUM_BINS];

   int num_ops = 0, j = 0;;

   // Initialize histograms
   for (j = 0; j < HISTOGRAM_NUM_THREADS; j++) {
      int i;
      for (i = 0; i < NUM_BINS; i++)
         my_hist[j][i] = 0;
      for (i = 0; i < ARR_SIZE; i++) 
         my_array[j][i] = i+num_ops % MOD_VAL;
   }

   // Initialize thread argument
   for (j = 0; j < HISTOGRAM_NUM_THREADS; j++) 
   {
      thread_arg[j].array = (int *)&my_array[j][0];
      thread_arg[j].hist = (int *)&my_hist[j][0];
      thread_arg[j].max_value = MOD_VAL - 1;
      thread_arg[j].min_value = 0;
   }

   // -------- DISTANCE --------------- //
    // Thread attribute structures
    distance_t distance_arg[DISTANCE_NUM_THREADS];

    float vals_x0[DISTANCE_ARR_LENGTH];
    float vals_x1[DISTANCE_ARR_LENGTH];

    float vals_y0[DISTANCE_ARR_LENGTH];
    float vals_y1[DISTANCE_ARR_LENGTH];

    float vals_ds[DISTANCE_ARR_LENGTH];
    for (j = 0; j < DISTANCE_ARR_LENGTH; j++)
    {
        vals_x0[j] = (float) DISTANCE_ARR_LENGTH - j;
        vals_y0[j] = (float) DISTANCE_ARR_LENGTH - j;

        vals_x1[j] = (float) j + 1;
        vals_y1[j] = (float) DISTANCE_ARR_LENGTH - j + 1;
    }

    // Initialize thread arguments
    int num_items = DISTANCE_ARR_LENGTH/DISTANCE_NUM_THREADS;
    int extra_items = DISTANCE_ARR_LENGTH - (num_items*DISTANCE_NUM_THREADS);
    for ( j= 0; j < DISTANCE_NUM_THREADS; j++)
    {
       distance_arg[j].x0s = &vals_x0[j*(num_items)];
       distance_arg[j].y0s = &vals_y0[j*(num_items)];
       distance_arg[j].x1s = &vals_x1[j*(num_items)];
       distance_arg[j].y1s = &vals_y1[j*(num_items)];
       distance_arg[j].distances = &vals_ds[j*(num_items)];
       distance_arg[j].length = num_items;
    }
    // Add in extra items for the last thread if needed
    distance_arg[j-1].length += extra_items;

    // Matrix Multiply
    matrix_t matrix_arg[MATRIX_NUM_THREADS];
    int n;
    for (n = 0; n < MATRIX_NUM_THREADS; n++) {
      for (i = 0; i < MATRIX_A_ROW; i++) {
	      for (j = 0; j < MATRIX_A_COL; j++) {
	         matrix_arg[n].matrixA[i][j] = i + j;
	         matrix_arg[n].matrixB[i][j] = i + j;
	         matrix_arg[n].matrixC[i][j] = 0;
	      }
      }
    }

    // -------- Find MAx-------------- //
    max_t findmax_arg[FINDMAX_NUM_THREADS];
    for (i = 0; i < FINDMAX_NUM_THREADS; i++) {
       findmax_arg[i].length = FINDMAX_LENGTH;
       findmax_arg[i].shift_amount = sizeof(findmax_arg[i].A[0]);
       for (j = 0; j < FINDMAX_LENGTH; j++) {
          findmax_arg[i].A[j] = (int) (rand() % FINDMAX_LENGTH);
          findmax_arg[i].B[j] = (int) (rand() % FINDMAX_LENGTH);
          findmax_arg[i].result[j] = 0;
       }
    }

   // Set all threads to detached   
   for(i = 0; i < NUM_THREADS; i++) {
		hthread_attr_init(&attr[i]);
		hthread_attr_setdetachstate(&attr[i], HTHREAD_CREATE_DETACHED);
	}

   hthread_time_t start = hthread_time_get();

   thread_create( &tid[0 ], &attr[0 ], distance_thread_FUNC_ID, (void *) &distance_arg[2], DYNAMIC_HW, 0);
   thread_create( &tid[1 ], &attr[1 ], pi_thread_FUNC_ID, (void *) &thread_data[10], DYNAMIC_HW, 0);
   thread_create( &tid[2 ], &attr[2 ], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[6], DYNAMIC_HW, 0);
   thread_create( &tid[3 ], &attr[3 ], distance_thread_FUNC_ID, (void *) &distance_arg[0], DYNAMIC_HW, 0);
   thread_create( &tid[4 ], &attr[4 ], distance_thread_FUNC_ID, (void *) &distance_arg[6], DYNAMIC_HW, 0);
   thread_create( &tid[5 ], &attr[5 ], pi_thread_FUNC_ID, (void *) &thread_data[4], DYNAMIC_HW, 0);
   thread_create( &tid[6 ], &attr[6 ], find_max_thread_FUNC_ID, (void *) &findmax_arg[9], DYNAMIC_HW, 0);
   thread_create( &tid[7 ], &attr[7 ], pi_thread_FUNC_ID, (void *) &thread_data[12], DYNAMIC_HW, 0);
   thread_create( &tid[8 ], &attr[8 ], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[0], DYNAMIC_HW, 0);
   thread_create( &tid[9 ], &attr[9 ], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[10], &attr[10], find_max_thread_FUNC_ID, (void *) &findmax_arg[8], DYNAMIC_HW, 0);
   thread_create( &tid[11], &attr[11], pi_thread_FUNC_ID, (void *) &thread_data[2], DYNAMIC_HW, 0);
   thread_create( &tid[12], &attr[12], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[13], &attr[13], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[14], &attr[14], distance_thread_FUNC_ID, (void *) &distance_arg[7], DYNAMIC_HW, 0);
   thread_create( &tid[15], &attr[15], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[16], &attr[16], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[17], DYNAMIC_HW, 0);
   thread_create( &tid[17], &attr[17], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[18], &attr[18], find_max_thread_FUNC_ID, (void *) &findmax_arg[4], DYNAMIC_HW, 0);
   thread_create( &tid[19], &attr[19], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[11], DYNAMIC_HW, 0);
   thread_create( &tid[20], &attr[20], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[8], DYNAMIC_HW, 0);
   thread_create( &tid[21], &attr[21], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[13], DYNAMIC_HW, 0);
   thread_create( &tid[22], &attr[22], pi_thread_FUNC_ID, (void *) &thread_data[6], DYNAMIC_HW, 0);
   thread_create( &tid[23], &attr[23], distance_thread_FUNC_ID, (void *) &distance_arg[4], DYNAMIC_HW, 0);
   thread_create( &tid[24], &attr[24], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[25], &attr[25], distance_thread_FUNC_ID, (void *) &distance_arg[1], DYNAMIC_HW, 0);
   thread_create( &tid[26], &attr[26], pi_thread_FUNC_ID, (void *) &thread_data[11], DYNAMIC_HW, 0);
   thread_create( &tid[27], &attr[27], find_max_thread_FUNC_ID, (void *) &findmax_arg[3], DYNAMIC_HW, 0);
   thread_create( &tid[28], &attr[28], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[15], DYNAMIC_HW, 0);
   thread_create( &tid[29], &attr[29], distance_thread_FUNC_ID, (void *) &distance_arg[8], DYNAMIC_HW, 0);
   thread_create( &tid[30], &attr[30], pi_thread_FUNC_ID, (void *) &thread_data[9], DYNAMIC_HW, 0);
   thread_create( &tid[31], &attr[31], find_max_thread_FUNC_ID, (void *) &findmax_arg[10], DYNAMIC_HW, 0);
   thread_create( &tid[32], &attr[32], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[33], &attr[33], pi_thread_FUNC_ID, (void *) &thread_data[1], DYNAMIC_HW, 0);
   thread_create( &tid[34], &attr[34], histogram_thread_FUNC_ID, (void*)(&thread_arg[1]),DYNAMIC_HW,0 );
   thread_create( &tid[35], &attr[35], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[36], &attr[36], distance_thread_FUNC_ID, (void *) &distance_arg[5], DYNAMIC_HW, 0);
   thread_create( &tid[37], &attr[37], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[38], &attr[38], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[39], &attr[39], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[40], &attr[40], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[41], &attr[41], pi_thread_FUNC_ID, (void *) &thread_data[0], DYNAMIC_HW, 0);
   thread_create( &tid[42], &attr[42], pi_thread_FUNC_ID, (void *) &thread_data[14], DYNAMIC_HW, 0);
   thread_create( &tid[43], &attr[43], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[12], DYNAMIC_HW, 0);
   thread_create( &tid[44], &attr[44], histogram_thread_FUNC_ID, (void*)(&thread_arg[2]),DYNAMIC_HW,0 );
   thread_create( &tid[45], &attr[45], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[3], DYNAMIC_HW, 0);
   thread_create( &tid[46], &attr[46], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[47], &attr[47], distance_thread_FUNC_ID, (void *) &distance_arg[3], DYNAMIC_HW, 0);
   thread_create( &tid[48], &attr[48], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[2], DYNAMIC_HW, 0);
   thread_create( &tid[49], &attr[49], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[16], DYNAMIC_HW, 0);
   thread_create( &tid[50], &attr[50], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[7], DYNAMIC_HW, 0);
   thread_create( &tid[51], &attr[51], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[5], DYNAMIC_HW, 0);
   thread_create( &tid[52], &attr[52], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[53], &attr[53], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[14], DYNAMIC_HW, 0);
   thread_create( &tid[54], &attr[54], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[10], DYNAMIC_HW, 0);
   thread_create( &tid[55], &attr[55], pi_thread_FUNC_ID, (void *) &thread_data[13], DYNAMIC_HW, 0);
   thread_create( &tid[56], &attr[56], find_max_thread_FUNC_ID, (void *) &findmax_arg[12], DYNAMIC_HW, 0);
   thread_create( &tid[57], &attr[57], pi_thread_FUNC_ID, (void *) &thread_data[8], DYNAMIC_HW, 0);
   thread_create( &tid[58], &attr[58], find_max_thread_FUNC_ID, (void *) &findmax_arg[5], DYNAMIC_HW, 0);
   thread_create( &tid[59], &attr[59], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[60], &attr[60], find_max_thread_FUNC_ID, (void *) &findmax_arg[1], DYNAMIC_HW, 0);
   thread_create( &tid[61], &attr[61], find_max_thread_FUNC_ID, (void *) &findmax_arg[0], DYNAMIC_HW, 0);
   thread_create( &tid[62], &attr[62], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[1], DYNAMIC_HW, 0);
   thread_create( &tid[63], &attr[63], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[64], &attr[64], pi_thread_FUNC_ID, (void *) &thread_data[7], DYNAMIC_HW, 0);
   thread_create( &tid[65], &attr[65], find_max_thread_FUNC_ID, (void *) &findmax_arg[11], DYNAMIC_HW, 0);
   thread_create( &tid[66], &attr[66], find_max_thread_FUNC_ID, (void *) &findmax_arg[6], DYNAMIC_HW, 0);
   thread_create( &tid[67], &attr[67], find_max_thread_FUNC_ID, (void *) &findmax_arg[2], DYNAMIC_HW, 0);
   thread_create( &tid[68], &attr[68], pi_thread_FUNC_ID, (void *) &thread_data[5], DYNAMIC_HW, 0);
   thread_create( &tid[69], &attr[69], histogram_thread_FUNC_ID, (void*)(&thread_arg[1]),DYNAMIC_HW,0 );
   thread_create( &tid[70], &attr[70], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[71], &attr[71], find_max_thread_FUNC_ID, (void *) &findmax_arg[7], DYNAMIC_HW, 0);
   thread_create( &tid[72], &attr[72], pi_thread_FUNC_ID, (void *) &thread_data[3], DYNAMIC_HW, 0);
   thread_create( &tid[73], &attr[73], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[74], &attr[74], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[75], &attr[75], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[9], DYNAMIC_HW, 0);
   thread_create( &tid[76], &attr[76], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[4], DYNAMIC_HW, 0);
   thread_create( &tid[77], &attr[77], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);


   // Wait until all threads are finished 
	while(get_num_free_slaves() < NUM_AVAILABLE_HETERO_CPUS || thread_entries != 0) {
      if (thread_entries != 0)
         hthread_yield();
   }
   
   hthread_time_t stop = hthread_time_get();

	printf("---------------------------\n");
	hthread_time_t diff;
	hthread_time_diff(diff, stop, start);
	printf("Total Execution Time: %.2f ms\n", hthread_time_msec(diff));
	printf("Total Execution Time: %.2f us\n", hthread_time_usec(diff));
    
#if 0
    // Grab the total number of calls statistic.
    printf("Total number of thread_create (DYNAMIC) calls: %d\n", total_calls);
    printf("---------------------------------------------------\n");
    printf("Perfect Ratio:  %03d / %03d = %0.2f\n", perfect_match_counter, total_calls, perfect_match_counter / (1.0f * total_calls));
    printf("Best Ratio:     %03d / %03d = %0.2f\n", best_match_counter, total_calls, best_match_counter / (1.0f * total_calls));
    printf("Better Ratio:   %03d / %03d = %0.2f\n", better_match_counter, total_calls, better_match_counter / (1.0f * total_calls));
    printf("Possible Ratio: %03d / %03d = %0.2f\n", possible_match_counter, total_calls, possible_match_counter / (1.0f * total_calls));

    perfect_match_counter = 0;
    best_match_counter = 0;
    better_match_counter = 0;
    possible_match_counter = 0;

    Huint hw_counter[NUM_AVAILABLE_HETERO_CPUS];
    Huint sw_counter[NUM_AVAILABLE_HETERO_CPUS];
    Huint pr_counter[NUM_AVAILABLE_HETERO_CPUS];
    Huint total_hw_count = 0;
    Huint total_sw_count = 0;
    Huint total_pr_count = 0;

    for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
        hw_counter[i] = _hwti_get_accelerator_hw_counter(hwti_array[i]);
        sw_counter[i] = _hwti_get_accelerator_sw_counter(hwti_array[i]);
        pr_counter[i] = _hwti_get_accelerator_pr_counter(hwti_array[i]);

        total_hw_count += hw_counter[i];
        total_sw_count += sw_counter[i];
        total_pr_count += pr_counter[i];

        // Manually Reset
        _hwti_set_accelerator_hw_counter(hwti_array[i], 0);
        _hwti_set_accelerator_sw_counter(hwti_array[i], 0);
        _hwti_set_accelerator_pr_counter(hwti_array[i], 0);
    }

    printf("Total HW Counter: %d\n", total_hw_count);
    printf("Total SW Counter: %d\n", total_sw_count);
    printf("Total PR Counter: %d\n", total_pr_count);
    printf("-----------------------\n");
    if (total_hw_count)     // if total_hw_count != 0
        printf("Total PR Counter / HW Counter = %f\n", total_pr_count / (1.0 *total_hw_count));
    printf("Total PR Counter / HW+SW Counter = %f\n", total_pr_count / (1.0 *(total_hw_count+total_sw_count)));
#endif
    printf("Total OS overhead (thread_create) = %f msec\n", hthread_time_msec(create_overhead));
#if 0
    hthread_time_t software_time = 0;
    for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
       volatile hthread_time_t * temp = (hthread_time_t *) (hwti_array[i] + 0x100);
       printf("%d: Software Execution = %f msec\n",i, hthread_time_msec(*temp));
       software_time += *temp;
    }
    printf("Total Software Execution = %f msec\n", hthread_time_msec(software_time));
#endif
   // Display thread times
   for (i = 0; i < NUM_THREADS; i++) { 
      // Determine which slave ran this thread based on address
      Huint base = attr[i].hardware_addr - HT_HWTI_COMMAND_OFFSET;
      Huint slave_num = (base & 0x00FF0000) >> 16;
      printf("Execution time (TID : %d, Slave : %d, HW ADDRESS = 0x%08x)\n", tid[i], slave_num, attr[i].hardware_addr);
   }
  

   printf("--- Done ---\n");

   return 0;
}
Esempio n. 6
0
int main(){

   printf("HOST: START\n");
   // Initialize various host tables once.
   init_host_tables();

   int i = 0; unsigned int j = 0, h,k;
   int ret[NUM_AVAILABLE_HETERO_CPUS];
   Hint * ptr;
	Data3 input3[NUM_AVAILABLE_HETERO_CPUS];

   printf("HOST: Creating thread & attribute structures\n");
   hthread_t * child = (hthread_t *) malloc(sizeof(hthread_t) * NUM_AVAILABLE_HETERO_CPUS);
   hthread_attr_t * attr = (hthread_attr_t *) malloc(sizeof(hthread_attr_t) * NUM_AVAILABLE_HETERO_CPUS);
   assert (child != NULL);
   assert (attr != NULL);

#ifdef TEST_PR
   printf("------------------------------------\n");
   printf("HOST: Testing PR\n");
      
   // Set up attributes for a hardware thread
   for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { 
      hthread_attr_init(&attr[i]);
      hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);
   }

   // Creating threads
   for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
      if (thread_create (&child[i], &attr[i],test_PR_thread_FUNC_ID, (void *)(NUM_TRIALS),
                     #ifndef HARDWARE_THREAD
                       SOFTWARE_THREAD,
                     #else
                       STATIC_HW0 + i,
                     #endif
                       0))
      {
         printf("hthread_create error on HW THREAD %d\n", i);
         while(1);
      }
   }

   // Joining threads
   for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
      // Join on child thread
      if( hthread_join(child[i], (void *) &ret[i])) {
         printf("Error joining child thread\n");
         while(1);
      }
      if (ret[i] != SUCCESS)
         printf("Thread %02d Failed:  %d\n",i, ret[i]);
   }
   printf("HOST: Done\n");
#endif

    
#ifdef USER_SORT
   int list[NUM_AVAILABLE_HETERO_CPUS][LIST_LENGTH];
   printf("------------------------------------\n");
   printf("HOST: Testing SORT\n");
   // initialized the list 
   for (j = 0; j < NUM_TRIALS; j++) {

      for (h = 0; h < NUM_AVAILABLE_HETERO_CPUS; h++) {
         for (i = 0; i < LIST_LENGTH; i++) {
            //list[h][i] = rand() % 1000;
            list[h][i] = LIST_LENGTH-i;
         }
      }

      #if 0 
      printf("Printing original lists\n");
      for (h = 0; h < NUM_AVAILABLE_HETERO_CPUS; h++) {
         printf("List[%d]: ", h);
         for (i = 0; i < LIST_LENGTH; i++) {
            printf("..%d", list[h][i]);
         }
         printf("\n");
      }
      #endif

      // Set up attributes for a hardware thread
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { 
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);
      }

      // Creating threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         if (thread_create (&child[i], &attr[i],sort_thread_FUNC_ID, (void *)(&list[i][0]),
                     #ifndef HARDWARE_THREAD
                       SOFTWARE_THREAD,
                     #elif DYNAMIC
                       DYNAMIC_HW,
                     #else
                       STATIC_HW0 + i,
                     #endif
                       0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
       }

      // Joining threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         // Join on child thread
         if( hthread_join(child[i], (void *) &ret[i])) {
            printf("Error joining child thread\n");
            while(1);
         }
         //printf("Thread %02d Result = %d\n",i,ret[i]);
      }

      // Check results
      //printf("Now checking the lists\n");
      for (h = 0; h < NUM_AVAILABLE_HETERO_CPUS; h++) {
         //printf("List[%d]: ", h);
         //for (i = LIST_LENGTH - (LIST_LENGTH-1); i < LIST_LENGTH; i++) {
         for (i = 0; i < LIST_LENGTH-1; i++) {
            //printf("..%d", list[h][i]);
            if (list[h][i] > list[h][i+1]) {
               printf("*");
               printf("[TRIAL %d, Slave %d] Sort failed!\n", j, h);
               i = LIST_LENGTH;
            }
         }
         // Print last element
         //printf("..%d", list[h][i]);
         //printf("\n");
      }
   }
   printf("HOST: Done\n");
#endif


#ifdef USER_CRC
   printf("------------------------------------\n");
   printf("HOST: Testing CRC\n");
   
   Hint * input;
   Hint * check, index = 0;
   for (j = 0; j < NUM_TRIALS; j++) {
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         input = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint));
         check = (Hint *) malloc(ARRAY_SIZE * sizeof(Hint)); 
         assert(input != NULL);
         assert(check != NULL);

         // Initializing the data
         ptr = input;
         for(index = 0; index < ARRAY_SIZE; index++) {
            *ptr = (rand() % 1000)*8;	
            *(check+index) = *ptr;
            ptr++;
         }
     
         // Generating the CRC of that data
         if (poly_crc(check, ARRAY_SIZE)) {
            printf("Host failed to generate CRC check of data\n");
            while(1);
         }
         
         // Set up attributes for a hardware thread
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);


         // Creating threads
         if (thread_create (&child[i], &attr[i], crc_thread_FUNC_ID, (void *)input,
                        #ifndef HARDWARE_THREAD
                           SOFTWARE_THREAD,
                        #elif DYNAMIC
                           DYNAMIC_HW,
                        #else
                           STATIC_HW0 + i,
                        #endif
                        0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
      
         // Join on child thread
         int status;
         status = hthread_join(child[i], (void *) &ret[i]); 
         if (status) {
            printf("Error joining child thread: %d\n", status);
            while(1);
         }
         //printf("Thread %02d Result = %d\n",i,ret[i]);

         // For CRC Results
         for ( h = 0; h < ARRAY_SIZE; h++) {
            if (*(input+h) != *(check+h) )  {
               printf("[TRIAL %d, Slave %d] CRC failed!\n", j, i);
               h = ARRAY_SIZE;
            }
         } 
         // Release memory
         free(input);
         free(check);
      }
   }
   printf("HOST: Done\n");
#endif

#ifdef USER_MATRIXMUL
   printf("------------------------------------\n");
   printf("HOST: Testing MatrixMul\n");

   data package[NUM_AVAILABLE_HETERO_CPUS];
   for (j = 0; j < NUM_TRIALS; j++) {
   
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         package[i].dataA = (Hint*) malloc(MATRIX_SIZE * MATRIX_SIZE * sizeof(Hint)); 	
         package[i].dataB = (Hint*) malloc(MATRIX_SIZE * MATRIX_SIZE * sizeof(Hint)); 	
         package[i].dataC = (Hint*) malloc(MATRIX_SIZE * MATRIX_SIZE * sizeof(Hint));
         assert(package[i].dataA != NULL);   
         assert(package[i].dataB != NULL);   
         assert(package[i].dataC != NULL);
         Huint l;   
         for (k = 0; k < MATRIX_SIZE; k++) {
            for (l = 0; l < MATRIX_SIZE; l++) {
               package[i].dataA[k*MATRIX_SIZE + l] = k;
               package[i].dataB[k*MATRIX_SIZE + l] = l;
               package[i].dataC[k*MATRIX_SIZE + l] = 0;
            }
         }
         package[i].size = MATRIX_SIZE;
         unsigned int row, col;
#if 0
         printf("Original Matrix A: 0x%08x\n", package[i].dataA);
         for (row=0 ; row < MATRIX_SIZE; row++) {
            for (col=0 ; col < MATRIX_SIZE; col++) {
               printf("%02d ", package[i].dataA[row*MATRIX_SIZE+col]);
            }
            printf("\n");
         }
         printf("Original Matrix B: 0x%08x\n", package[i].dataB);
         for (row=0 ; row < MATRIX_SIZE; row++) {
            for (col=0 ; col < MATRIX_SIZE; col++) {
               printf("%02d ", package[i].dataB[row*MATRIX_SIZE+col]);
            }
            printf("\n");
         }

         printf("Original Matrix C: 0x%08x\n", package[i].dataC);
         for (row=0 ; row < MATRIX_SIZE; row++) {
            for (col=0 ; col < MATRIX_SIZE; col++) {
               printf("%02d ", package[i].dataC[row*MATRIX_SIZE+col]);
            }
            printf("\n");
         }
#endif    

         // Set up attributes for a hardware thread
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);

         // Creating threads
         if (thread_create (&child[i], &attr[i], matrix_multiply_thread_FUNC_ID, (void *)(&package[i]),
                           #ifndef HARDWARE_THREAD
                              SOFTWARE_THREAD,
                           #elif DYNAMIC
                              DYNAMIC_HW,
                           #else
                              STATIC_HW0 + i,
                           #endif
                           0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
      
         // Join on child thread
         if( hthread_join(child[i], (void *) &ret[i])) {
            printf("Error joining child thread\n");
            while(1);
         }
         if (ret[i] != SUCCESS)
            printf("Return value for thread indicates an error!\n");
         #if 0 
         printf("New Matrix C:\n");
         for (row=0 ; row < MATRIX_SIZE; row++) {
            for (col=0 ; col < MATRIX_SIZE; col++) {
               printf("%02d ", package[i].dataC[row*MATRIX_SIZE+col]);
            }
            printf("\n");
         }
         #endif
         // Check results
         Hint temp[MATRIX_SIZE][MATRIX_SIZE];
         poly_matrix_mul(package[i].dataA, package[i].dataB, &temp, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE);
         int r, c;
         for (r=0 ; r < MATRIX_SIZE; r++) {
            for (c=0 ; c < MATRIX_SIZE; c++) {
               if ( temp[r][c] != package[i].dataC[r*MATRIX_SIZE + c])  {
                  printf("[TRIAL %d, Slave %d] Matrix Mul failed!\n", j, i);
                  r = c = MATRIX_SIZE;
               }
            }
         }
         
         // Release memory
         free(package[i].dataA); 
         free(package[i].dataB); 
         free(package[i].dataC);
      }
   }
   printf("HOST: Done\n");
#endif

#ifdef USER_VECTORSUB
   printf("------------------------------------\n");
   printf("HOST: Testing VectorSub\n");
   
   for (j = 0; j < NUM_TRIALS; j++) {
   
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1;
         input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1;
         input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1;
         
         for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/} 
         for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/}
      }    
      
      // Set up attributes for a hardware thread
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { 
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);
      }

      // Creating threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         if (thread_create (&child[i], &attr[i], vector_sub_thread_FUNC_ID, (void *)(&input3[i]),
                           #ifndef HARDWARE_THREAD
                              SOFTWARE_THREAD,
                           #elif DYNAMIC
                              DYNAMIC_HW,
                           #else
                              STATIC_HW0 + i,
                           #endif
                           0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
      }
      
      // Joining threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         // Join on child thread
         if( hthread_join(child[i], (void *) &ret[i])) {
            printf("Error joining child thread\n");
            while(1);
         }
      }
      
      // Check results
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         for (h=0 ; h < ARRAY_SIZE; h++) {
            if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] - input3[i].startAddr2[h]))  {
               printf("[TRIAL %d, Slave %d] Vector Sub failed!\n", j, i);
               h = ARRAY_SIZE;
            }
         }
         
         // Release memory
         free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3);
      }
   }
   printf("HOST: Done\n");
#endif

#ifdef USER_VECTORADD
   printf("------------------------------------\n");
   printf("HOST: Testing VectorAdd\n");
   
   for (j = 0; j < NUM_TRIALS; j++) {
   
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1;
         input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1;
         input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1;
         
         for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/} 
         for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/}
      }    
      
      // Set up attributes for a hardware thread
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { 
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);
      }

      // Creating threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         if (thread_create (&child[i], &attr[i], vector_add_thread_FUNC_ID, (void *)(&input3[i]),
                           #ifndef HARDWARE_THREAD
                              SOFTWARE_THREAD,
                           #elif DYNAMIC
                              DYNAMIC_HW,
                           #else
                              STATIC_HW0 + i,
                           #endif
                           0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
      }
      
      // Joining threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         // Join on child thread
         if( hthread_join(child[i], (void *) &ret[i])) {
            printf("Error joining child thread\n");
            while(1);
         }
      }
      
      // Check results
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         for (h=0 ; h < ARRAY_SIZE; h++) {
            if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] + input3[i].startAddr2[h]))  {
               printf("[TRIAL %d, Slave %d] Vector Add failed!\n", j, i);
               h = ARRAY_SIZE;
            }
         }
         
         // Release memory
         free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3);
      }
   }
   printf("HOST: Done\n");
#endif

#ifdef USER_VECTORMUL
   printf("------------------------------------\n");
   printf("HOST: Testing VectorMultiply\n");
   
   for (j = 0; j < NUM_TRIALS; j++) {
   
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1;
         input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1;
         input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1;
         
         for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/} 
         for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/}
      }    
      
      // Set up attributes for a hardware thread
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { 
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);
      }

      // Creating threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         if (thread_create (&child[i], &attr[i], vector_multiply_thread_FUNC_ID, (void *)(&input3[i]),
                           #ifndef HARDWARE_THREAD
                              SOFTWARE_THREAD,
                           #elif DYNAMIC
                              DYNAMIC_HW,
                           #else
                              STATIC_HW0 + i,
                           #endif
                           0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
      }
      
      // Joining threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         // Join on child thread
         if( hthread_join(child[i], (void *) &ret[i])) {
            printf("Error joining child thread\n");
            while(1);
         }
      }
      
      // Check results
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         for (h=0 ; h < ARRAY_SIZE; h++) {
            if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] * input3[i].startAddr2[h]))  {
               printf("[TRIAL %d, Slave %d] Vector Multiply failed!\n", j, i);
               h = ARRAY_SIZE;
            }
         }
         
         // Release memory
         free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3);
      }
   }
   printf("HOST: Done\n");
#endif

#ifdef USER_VECTORDIV
   printf("------------------------------------\n");
   printf("HOST: Testing VectorDivide\n");
   
   for (j = 0; j < NUM_TRIALS; j++) {
   
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1;
         input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1;
         input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1;
         
         for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/} 
         for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) (rand() % 1000) + 1;	/* printf( " %i \n",*ptr );*/}
      }    
      
      // Set up attributes for a hardware thread
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { 
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);
      }

      // Creating threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         if (thread_create (&child[i], &attr[i], vector_divide_thread_FUNC_ID, (void *)(&input3[i]),
                           #ifndef HARDWARE_THREAD
                              SOFTWARE_THREAD,
                           #elif DYNAMIC
                              DYNAMIC_HW,
                           #else
                              STATIC_HW0 + i,
                           #endif
                           0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
      }
      
      // Joining threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         // Join on child thread
         if( hthread_join(child[i], (void *) &ret[i])) {
            printf("Error joining child thread\n");
            while(1);
         }
      }
      
      // Check results
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         for (h=0 ; h < ARRAY_SIZE; h++) {
            // dividend was generated to be a non-zero number above. Hence, no need to check for / 0
            if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] / input3[i].startAddr2[h]))  {
               printf("[TRIAL %d, Slave %d] Vector Divide failed!\n", j, i);
               h = ARRAY_SIZE;
            }
         }
         
         // Release memory
         free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3);
      }
   }
   printf("HOST: Done\n");
#endif
        
   printf("END\n");
   return 0;
}