Exemplo n.º 1
0
Hint crc (void * list_ptr, Huint size, Huint * done) {

    // done variable not used for slave accelerator calls
    
    Hint result = SUCCESS;
    
    // Get VHWTI
    Huint vhwti_base = 0;
    
    // Get VHWTI from PVRs 
    getpvr(1,&vhwti_base);
    
    // Increment SW counter 
    Huint sw_counter = _hwti_get_accelerator_sw_counter( vhwti_base );
    _hwti_set_accelerator_sw_counter( vhwti_base, ++sw_counter);


    // if the size is more than 4k words,
    // grab the amount of iterations you
    // need to perform.
    Huint iterations = size / BRAM_SIZE;
    
    // If there is a remainder in this 
    // division, round up.
    if ((size % BRAM_SIZE) != 0) {
        iterations++;
    }

    Huint i = 0;
    Huint new_size = 0;
    Huint * new_list_ptr = 0;
    for (i = 0; i < iterations; i++) {

        // Calculate the size for this iteration
        new_size = (size < (BRAM_SIZE*(i+1))) ? size-(BRAM_SIZE*i) : BRAM_SIZE; 

        // Calculate the starting pointer for this iteration. Typcasting
        // may be repetitive here.
        new_list_ptr = (Huint *) ((Huint *)list_ptr + BRAM_SIZE*i);

        // Run crc in software
        result =  (sw_crc((void *) new_list_ptr, new_size));

        if (result != SUCCESS) break;
    } /* end of iterations loop */

    return result;
}
Exemplo n.º 2
0
// -------------------------------------------------------------- //
//     Initialization routine for all polymorphic functions       //
// -------------------------------------------------------------- //
Hbool poly_init(Hint acc, Huint size) {
   
   // Get VHWTI
   Huint vhwti_base = 0;
   
   // Get VHWTI from PVRs 
   getpvr(1,vhwti_base);

   // Use Accelerator?
   Hbool use_accelerator = useHW(acc,size);

   if (use_accelerator) {
       // Increment HW counter 
       Huint hw_counter = _hwti_get_accelerator_hw_counter( vhwti_base );
       _hwti_set_accelerator_hw_counter( vhwti_base, ++hw_counter);
   } else {
       // Increment SW counter 
       Huint sw_counter = _hwti_get_accelerator_sw_counter( vhwti_base );
       _hwti_set_accelerator_sw_counter( vhwti_base, ++sw_counter);
   }

   return use_accelerator;
}
Exemplo n.º 3
0
int main() {
   
   printf("--- combined2 Kernel benchmark ---\n"); 
   printf("Number of Slave processors: %d\n", NUM_AVAILABLE_HETERO_CPUS);
#ifdef OPCODE_FLAGGING
   printf("-->Opcode flagging ENABLED\n");
#else
   printf("-->Opcode flagging DISABLED\n");
#endif
   // Initialize various host tables once.
   init_host_tables();
   
   // Create Queue software thread
   hthread_t queue_tid;
   hthread_attr_t queue_attr;
   hthread_attr_init(&queue_attr);
   if (thread_create(&queue_tid, &queue_attr,queue_thread_FUNC_ID,(void *) &exec_time[0], SOFTWARE_THREAD,0)){
      printf("Error creating Queue thread\n");
      while(1);
   }
   // Reset
   create_overhead = 0;

   Huint i = 0;
   // PI
   pi_t thread_data[PI_NUM_THREADS];
   for (i = 0; i < PI_NUM_THREADS; i++) {
      thread_data[i].pi = 0;
      thread_data[i].MaxIterations = PI_MAX_ITERATIONS;
   }

   // HISTOGRAM
   // Thread attribute structures
   histogram_t * thread_arg = (histogram_t *) malloc(sizeof(histogram_t) * HISTOGRAM_NUM_THREADS);
   assert (thread_arg != NULL);

   // Array Structures
   int my_array[HISTOGRAM_NUM_THREADS][ARR_SIZE];
   int my_hist[HISTOGRAM_NUM_THREADS][NUM_BINS];

   int num_ops = 0, j = 0;;

   // Initialize histograms
   for (j = 0; j < HISTOGRAM_NUM_THREADS; j++) {
      int i;
      for (i = 0; i < NUM_BINS; i++)
         my_hist[j][i] = 0;
      for (i = 0; i < ARR_SIZE; i++) 
         my_array[j][i] = i+num_ops % MOD_VAL;
   }

   // Initialize thread argument
   for (j = 0; j < HISTOGRAM_NUM_THREADS; j++) 
   {
      thread_arg[j].array = (int *)&my_array[j][0];
      thread_arg[j].hist = (int *)&my_hist[j][0];
      thread_arg[j].max_value = MOD_VAL - 1;
      thread_arg[j].min_value = 0;
   }

   // -------- DISTANCE --------------- //
    // Thread attribute structures
    distance_t distance_arg[DISTANCE_NUM_THREADS];

    float vals_x0[DISTANCE_ARR_LENGTH];
    float vals_x1[DISTANCE_ARR_LENGTH];

    float vals_y0[DISTANCE_ARR_LENGTH];
    float vals_y1[DISTANCE_ARR_LENGTH];

    float vals_ds[DISTANCE_ARR_LENGTH];
    for (j = 0; j < DISTANCE_ARR_LENGTH; j++)
    {
        vals_x0[j] = (float) DISTANCE_ARR_LENGTH - j;
        vals_y0[j] = (float) DISTANCE_ARR_LENGTH - j;

        vals_x1[j] = (float) j + 1;
        vals_y1[j] = (float) DISTANCE_ARR_LENGTH - j + 1;
    }

    // Initialize thread arguments
    int num_items = DISTANCE_ARR_LENGTH/DISTANCE_NUM_THREADS;
    int extra_items = DISTANCE_ARR_LENGTH - (num_items*DISTANCE_NUM_THREADS);
    for ( j= 0; j < DISTANCE_NUM_THREADS; j++)
    {
       distance_arg[j].x0s = &vals_x0[j*(num_items)];
       distance_arg[j].y0s = &vals_y0[j*(num_items)];
       distance_arg[j].x1s = &vals_x1[j*(num_items)];
       distance_arg[j].y1s = &vals_y1[j*(num_items)];
       distance_arg[j].distances = &vals_ds[j*(num_items)];
       distance_arg[j].length = num_items;
    }
    // Add in extra items for the last thread if needed
    distance_arg[j-1].length += extra_items;

    // Matrix Multiply
    matrix_t matrix_arg[MATRIX_NUM_THREADS];
    int n;
    for (n = 0; n < MATRIX_NUM_THREADS; n++) {
      for (i = 0; i < MATRIX_A_ROW; i++) {
	      for (j = 0; j < MATRIX_A_COL; j++) {
	         matrix_arg[n].matrixA[i][j] = i + j;
	         matrix_arg[n].matrixB[i][j] = i + j;
	         matrix_arg[n].matrixC[i][j] = 0;
	      }
      }
    }

    // -------- Find MAx-------------- //
    max_t findmax_arg[FINDMAX_NUM_THREADS];
    for (i = 0; i < FINDMAX_NUM_THREADS; i++) {
       findmax_arg[i].length = FINDMAX_LENGTH;
       findmax_arg[i].shift_amount = sizeof(findmax_arg[i].A[0]);
       for (j = 0; j < FINDMAX_LENGTH; j++) {
          findmax_arg[i].A[j] = (int) (rand() % FINDMAX_LENGTH);
          findmax_arg[i].B[j] = (int) (rand() % FINDMAX_LENGTH);
          findmax_arg[i].result[j] = 0;
       }
    }

   // Set all threads to detached   
   for(i = 0; i < NUM_THREADS; i++) {
		hthread_attr_init(&attr[i]);
		hthread_attr_setdetachstate(&attr[i], HTHREAD_CREATE_DETACHED);
	}

   hthread_time_t start = hthread_time_get();

   thread_create( &tid[0 ], &attr[0 ], distance_thread_FUNC_ID, (void *) &distance_arg[2], DYNAMIC_HW, 0);
   thread_create( &tid[1 ], &attr[1 ], pi_thread_FUNC_ID, (void *) &thread_data[10], DYNAMIC_HW, 0);
   thread_create( &tid[2 ], &attr[2 ], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[6], DYNAMIC_HW, 0);
   thread_create( &tid[3 ], &attr[3 ], distance_thread_FUNC_ID, (void *) &distance_arg[0], DYNAMIC_HW, 0);
   thread_create( &tid[4 ], &attr[4 ], distance_thread_FUNC_ID, (void *) &distance_arg[6], DYNAMIC_HW, 0);
   thread_create( &tid[5 ], &attr[5 ], pi_thread_FUNC_ID, (void *) &thread_data[4], DYNAMIC_HW, 0);
   thread_create( &tid[6 ], &attr[6 ], find_max_thread_FUNC_ID, (void *) &findmax_arg[9], DYNAMIC_HW, 0);
   thread_create( &tid[7 ], &attr[7 ], pi_thread_FUNC_ID, (void *) &thread_data[12], DYNAMIC_HW, 0);
   thread_create( &tid[8 ], &attr[8 ], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[0], DYNAMIC_HW, 0);
   thread_create( &tid[9 ], &attr[9 ], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[10], &attr[10], find_max_thread_FUNC_ID, (void *) &findmax_arg[8], DYNAMIC_HW, 0);
   thread_create( &tid[11], &attr[11], pi_thread_FUNC_ID, (void *) &thread_data[2], DYNAMIC_HW, 0);
   thread_create( &tid[12], &attr[12], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[13], &attr[13], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[14], &attr[14], distance_thread_FUNC_ID, (void *) &distance_arg[7], DYNAMIC_HW, 0);
   thread_create( &tid[15], &attr[15], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[16], &attr[16], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[17], DYNAMIC_HW, 0);
   thread_create( &tid[17], &attr[17], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[18], &attr[18], find_max_thread_FUNC_ID, (void *) &findmax_arg[4], DYNAMIC_HW, 0);
   thread_create( &tid[19], &attr[19], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[11], DYNAMIC_HW, 0);
   thread_create( &tid[20], &attr[20], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[8], DYNAMIC_HW, 0);
   thread_create( &tid[21], &attr[21], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[13], DYNAMIC_HW, 0);
   thread_create( &tid[22], &attr[22], pi_thread_FUNC_ID, (void *) &thread_data[6], DYNAMIC_HW, 0);
   thread_create( &tid[23], &attr[23], distance_thread_FUNC_ID, (void *) &distance_arg[4], DYNAMIC_HW, 0);
   thread_create( &tid[24], &attr[24], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[25], &attr[25], distance_thread_FUNC_ID, (void *) &distance_arg[1], DYNAMIC_HW, 0);
   thread_create( &tid[26], &attr[26], pi_thread_FUNC_ID, (void *) &thread_data[11], DYNAMIC_HW, 0);
   thread_create( &tid[27], &attr[27], find_max_thread_FUNC_ID, (void *) &findmax_arg[3], DYNAMIC_HW, 0);
   thread_create( &tid[28], &attr[28], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[15], DYNAMIC_HW, 0);
   thread_create( &tid[29], &attr[29], distance_thread_FUNC_ID, (void *) &distance_arg[8], DYNAMIC_HW, 0);
   thread_create( &tid[30], &attr[30], pi_thread_FUNC_ID, (void *) &thread_data[9], DYNAMIC_HW, 0);
   thread_create( &tid[31], &attr[31], find_max_thread_FUNC_ID, (void *) &findmax_arg[10], DYNAMIC_HW, 0);
   thread_create( &tid[32], &attr[32], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[33], &attr[33], pi_thread_FUNC_ID, (void *) &thread_data[1], DYNAMIC_HW, 0);
   thread_create( &tid[34], &attr[34], histogram_thread_FUNC_ID, (void*)(&thread_arg[1]),DYNAMIC_HW,0 );
   thread_create( &tid[35], &attr[35], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[36], &attr[36], distance_thread_FUNC_ID, (void *) &distance_arg[5], DYNAMIC_HW, 0);
   thread_create( &tid[37], &attr[37], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[38], &attr[38], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[39], &attr[39], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[40], &attr[40], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[41], &attr[41], pi_thread_FUNC_ID, (void *) &thread_data[0], DYNAMIC_HW, 0);
   thread_create( &tid[42], &attr[42], pi_thread_FUNC_ID, (void *) &thread_data[14], DYNAMIC_HW, 0);
   thread_create( &tid[43], &attr[43], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[12], DYNAMIC_HW, 0);
   thread_create( &tid[44], &attr[44], histogram_thread_FUNC_ID, (void*)(&thread_arg[2]),DYNAMIC_HW,0 );
   thread_create( &tid[45], &attr[45], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[3], DYNAMIC_HW, 0);
   thread_create( &tid[46], &attr[46], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[47], &attr[47], distance_thread_FUNC_ID, (void *) &distance_arg[3], DYNAMIC_HW, 0);
   thread_create( &tid[48], &attr[48], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[2], DYNAMIC_HW, 0);
   thread_create( &tid[49], &attr[49], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[16], DYNAMIC_HW, 0);
   thread_create( &tid[50], &attr[50], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[7], DYNAMIC_HW, 0);
   thread_create( &tid[51], &attr[51], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[5], DYNAMIC_HW, 0);
   thread_create( &tid[52], &attr[52], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[53], &attr[53], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[14], DYNAMIC_HW, 0);
   thread_create( &tid[54], &attr[54], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[10], DYNAMIC_HW, 0);
   thread_create( &tid[55], &attr[55], pi_thread_FUNC_ID, (void *) &thread_data[13], DYNAMIC_HW, 0);
   thread_create( &tid[56], &attr[56], find_max_thread_FUNC_ID, (void *) &findmax_arg[12], DYNAMIC_HW, 0);
   thread_create( &tid[57], &attr[57], pi_thread_FUNC_ID, (void *) &thread_data[8], DYNAMIC_HW, 0);
   thread_create( &tid[58], &attr[58], find_max_thread_FUNC_ID, (void *) &findmax_arg[5], DYNAMIC_HW, 0);
   thread_create( &tid[59], &attr[59], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[60], &attr[60], find_max_thread_FUNC_ID, (void *) &findmax_arg[1], DYNAMIC_HW, 0);
   thread_create( &tid[61], &attr[61], find_max_thread_FUNC_ID, (void *) &findmax_arg[0], DYNAMIC_HW, 0);
   thread_create( &tid[62], &attr[62], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[1], DYNAMIC_HW, 0);
   thread_create( &tid[63], &attr[63], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[64], &attr[64], pi_thread_FUNC_ID, (void *) &thread_data[7], DYNAMIC_HW, 0);
   thread_create( &tid[65], &attr[65], find_max_thread_FUNC_ID, (void *) &findmax_arg[11], DYNAMIC_HW, 0);
   thread_create( &tid[66], &attr[66], find_max_thread_FUNC_ID, (void *) &findmax_arg[6], DYNAMIC_HW, 0);
   thread_create( &tid[67], &attr[67], find_max_thread_FUNC_ID, (void *) &findmax_arg[2], DYNAMIC_HW, 0);
   thread_create( &tid[68], &attr[68], pi_thread_FUNC_ID, (void *) &thread_data[5], DYNAMIC_HW, 0);
   thread_create( &tid[69], &attr[69], histogram_thread_FUNC_ID, (void*)(&thread_arg[1]),DYNAMIC_HW,0 );
   thread_create( &tid[70], &attr[70], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[71], &attr[71], find_max_thread_FUNC_ID, (void *) &findmax_arg[7], DYNAMIC_HW, 0);
   thread_create( &tid[72], &attr[72], pi_thread_FUNC_ID, (void *) &thread_data[3], DYNAMIC_HW, 0);
   thread_create( &tid[73], &attr[73], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);
   thread_create( &tid[74], &attr[74], histogram_thread_FUNC_ID, (void*)(&thread_arg[0]),DYNAMIC_HW,0 );
   thread_create( &tid[75], &attr[75], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[9], DYNAMIC_HW, 0);
   thread_create( &tid[76], &attr[76], matrix_mult_thread_FUNC_ID, (void *) &matrix_arg[4], DYNAMIC_HW, 0);
   thread_create( &tid[77], &attr[77], mandel_thread_FUNC_ID, (void *) MANDEL_MAX_ITERATIONS, DYNAMIC_HW, 0);


   // Wait until all threads are finished 
	while(get_num_free_slaves() < NUM_AVAILABLE_HETERO_CPUS || thread_entries != 0) {
      if (thread_entries != 0)
         hthread_yield();
   }
   
   hthread_time_t stop = hthread_time_get();

	printf("---------------------------\n");
	hthread_time_t diff;
	hthread_time_diff(diff, stop, start);
	printf("Total Execution Time: %.2f ms\n", hthread_time_msec(diff));
	printf("Total Execution Time: %.2f us\n", hthread_time_usec(diff));
    
#if 0
    // Grab the total number of calls statistic.
    printf("Total number of thread_create (DYNAMIC) calls: %d\n", total_calls);
    printf("---------------------------------------------------\n");
    printf("Perfect Ratio:  %03d / %03d = %0.2f\n", perfect_match_counter, total_calls, perfect_match_counter / (1.0f * total_calls));
    printf("Best Ratio:     %03d / %03d = %0.2f\n", best_match_counter, total_calls, best_match_counter / (1.0f * total_calls));
    printf("Better Ratio:   %03d / %03d = %0.2f\n", better_match_counter, total_calls, better_match_counter / (1.0f * total_calls));
    printf("Possible Ratio: %03d / %03d = %0.2f\n", possible_match_counter, total_calls, possible_match_counter / (1.0f * total_calls));

    perfect_match_counter = 0;
    best_match_counter = 0;
    better_match_counter = 0;
    possible_match_counter = 0;

    Huint hw_counter[NUM_AVAILABLE_HETERO_CPUS];
    Huint sw_counter[NUM_AVAILABLE_HETERO_CPUS];
    Huint pr_counter[NUM_AVAILABLE_HETERO_CPUS];
    Huint total_hw_count = 0;
    Huint total_sw_count = 0;
    Huint total_pr_count = 0;

    for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
        hw_counter[i] = _hwti_get_accelerator_hw_counter(hwti_array[i]);
        sw_counter[i] = _hwti_get_accelerator_sw_counter(hwti_array[i]);
        pr_counter[i] = _hwti_get_accelerator_pr_counter(hwti_array[i]);

        total_hw_count += hw_counter[i];
        total_sw_count += sw_counter[i];
        total_pr_count += pr_counter[i];

        // Manually Reset
        _hwti_set_accelerator_hw_counter(hwti_array[i], 0);
        _hwti_set_accelerator_sw_counter(hwti_array[i], 0);
        _hwti_set_accelerator_pr_counter(hwti_array[i], 0);
    }

    printf("Total HW Counter: %d\n", total_hw_count);
    printf("Total SW Counter: %d\n", total_sw_count);
    printf("Total PR Counter: %d\n", total_pr_count);
    printf("-----------------------\n");
    if (total_hw_count)     // if total_hw_count != 0
        printf("Total PR Counter / HW Counter = %f\n", total_pr_count / (1.0 *total_hw_count));
    printf("Total PR Counter / HW+SW Counter = %f\n", total_pr_count / (1.0 *(total_hw_count+total_sw_count)));
#endif
    printf("Total OS overhead (thread_create) = %f msec\n", hthread_time_msec(create_overhead));
#if 0
    hthread_time_t software_time = 0;
    for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
       volatile hthread_time_t * temp = (hthread_time_t *) (hwti_array[i] + 0x100);
       printf("%d: Software Execution = %f msec\n",i, hthread_time_msec(*temp));
       software_time += *temp;
    }
    printf("Total Software Execution = %f msec\n", hthread_time_msec(software_time));
#endif
   // Display thread times
   for (i = 0; i < NUM_THREADS; i++) { 
      // Determine which slave ran this thread based on address
      Huint base = attr[i].hardware_addr - HT_HWTI_COMMAND_OFFSET;
      Huint slave_num = (base & 0x00FF0000) >> 16;
      printf("Execution time (TID : %d, Slave : %d, HW ADDRESS = 0x%08x)\n", tid[i], slave_num, attr[i].hardware_addr);
   }
  

   printf("--- Done ---\n");

   return 0;
}