예제 #1
0
int main(){

   printf("HOST: START\n");
   // Initialize various host tables once.
   init_host_tables();

   int i = 0; unsigned int j = 0, h,k;
   int ret[NUM_AVAILABLE_HETERO_CPUS];
   Hint * ptr;
	Data3 input3[NUM_AVAILABLE_HETERO_CPUS];

   printf("HOST: Creating thread & attribute structures\n");
   hthread_t * child = (hthread_t *) malloc(sizeof(hthread_t) * NUM_AVAILABLE_HETERO_CPUS);
   hthread_attr_t * attr = (hthread_attr_t *) malloc(sizeof(hthread_attr_t) * NUM_AVAILABLE_HETERO_CPUS);
   assert (child != NULL);
   assert (attr != NULL);

#ifdef TEST_PR
   printf("------------------------------------\n");
   printf("HOST: Testing PR\n");
      
   // Set up attributes for a hardware thread
   for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { 
      hthread_attr_init(&attr[i]);
      hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);
   }

   // Creating threads
   for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
      if (thread_create (&child[i], &attr[i],test_PR_thread_FUNC_ID, (void *)(NUM_TRIALS),
                     #ifndef HARDWARE_THREAD
                       SOFTWARE_THREAD,
                     #else
                       STATIC_HW0 + i,
                     #endif
                       0))
      {
         printf("hthread_create error on HW THREAD %d\n", i);
         while(1);
      }
   }

   // Joining threads
   for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
      // Join on child thread
      if( hthread_join(child[i], (void *) &ret[i])) {
         printf("Error joining child thread\n");
         while(1);
      }
      if (ret[i] != SUCCESS)
         printf("Thread %02d Failed:  %d\n",i, ret[i]);
   }
   printf("HOST: Done\n");
#endif

    
#ifdef USER_SORT
   int list[NUM_AVAILABLE_HETERO_CPUS][LIST_LENGTH];
   printf("------------------------------------\n");
   printf("HOST: Testing SORT\n");
   // initialized the list 
   for (j = 0; j < NUM_TRIALS; j++) {

      for (h = 0; h < NUM_AVAILABLE_HETERO_CPUS; h++) {
         for (i = 0; i < LIST_LENGTH; i++) {
            //list[h][i] = rand() % 1000;
            list[h][i] = LIST_LENGTH-i;
         }
      }

      #if 0 
      printf("Printing original lists\n");
      for (h = 0; h < NUM_AVAILABLE_HETERO_CPUS; h++) {
         printf("List[%d]: ", h);
         for (i = 0; i < LIST_LENGTH; i++) {
            printf("..%d", list[h][i]);
         }
         printf("\n");
      }
      #endif

      // Set up attributes for a hardware thread
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { 
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);
      }

      // Creating threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         if (thread_create (&child[i], &attr[i],sort_thread_FUNC_ID, (void *)(&list[i][0]),
                     #ifndef HARDWARE_THREAD
                       SOFTWARE_THREAD,
                     #elif DYNAMIC
                       DYNAMIC_HW,
                     #else
                       STATIC_HW0 + i,
                     #endif
                       0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
       }

      // Joining threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         // Join on child thread
         if( hthread_join(child[i], (void *) &ret[i])) {
            printf("Error joining child thread\n");
            while(1);
         }
         //printf("Thread %02d Result = %d\n",i,ret[i]);
      }

      // Check results
      //printf("Now checking the lists\n");
      for (h = 0; h < NUM_AVAILABLE_HETERO_CPUS; h++) {
         //printf("List[%d]: ", h);
         //for (i = LIST_LENGTH - (LIST_LENGTH-1); i < LIST_LENGTH; i++) {
         for (i = 0; i < LIST_LENGTH-1; i++) {
            //printf("..%d", list[h][i]);
            if (list[h][i] > list[h][i+1]) {
               printf("*");
               printf("[TRIAL %d, Slave %d] Sort failed!\n", j, h);
               i = LIST_LENGTH;
            }
         }
         // Print last element
         //printf("..%d", list[h][i]);
         //printf("\n");
      }
   }
   printf("HOST: Done\n");
#endif


#ifdef USER_CRC
   printf("------------------------------------\n");
   printf("HOST: Testing CRC\n");
   
   Hint * input;
   Hint * check, index = 0;
   for (j = 0; j < NUM_TRIALS; j++) {
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         input = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint));
         check = (Hint *) malloc(ARRAY_SIZE * sizeof(Hint)); 
         assert(input != NULL);
         assert(check != NULL);

         // Initializing the data
         ptr = input;
         for(index = 0; index < ARRAY_SIZE; index++) {
            *ptr = (rand() % 1000)*8;	
            *(check+index) = *ptr;
            ptr++;
         }
     
         // Generating the CRC of that data
         if (poly_crc(check, ARRAY_SIZE)) {
            printf("Host failed to generate CRC check of data\n");
            while(1);
         }
         
         // Set up attributes for a hardware thread
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);


         // Creating threads
         if (thread_create (&child[i], &attr[i], crc_thread_FUNC_ID, (void *)input,
                        #ifndef HARDWARE_THREAD
                           SOFTWARE_THREAD,
                        #elif DYNAMIC
                           DYNAMIC_HW,
                        #else
                           STATIC_HW0 + i,
                        #endif
                        0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
      
         // Join on child thread
         int status;
         status = hthread_join(child[i], (void *) &ret[i]); 
         if (status) {
            printf("Error joining child thread: %d\n", status);
            while(1);
         }
         //printf("Thread %02d Result = %d\n",i,ret[i]);

         // For CRC Results
         for ( h = 0; h < ARRAY_SIZE; h++) {
            if (*(input+h) != *(check+h) )  {
               printf("[TRIAL %d, Slave %d] CRC failed!\n", j, i);
               h = ARRAY_SIZE;
            }
         } 
         // Release memory
         free(input);
         free(check);
      }
   }
   printf("HOST: Done\n");
#endif

#ifdef USER_MATRIXMUL
   printf("------------------------------------\n");
   printf("HOST: Testing MatrixMul\n");

   data package[NUM_AVAILABLE_HETERO_CPUS];
   for (j = 0; j < NUM_TRIALS; j++) {
   
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         package[i].dataA = (Hint*) malloc(MATRIX_SIZE * MATRIX_SIZE * sizeof(Hint)); 	
         package[i].dataB = (Hint*) malloc(MATRIX_SIZE * MATRIX_SIZE * sizeof(Hint)); 	
         package[i].dataC = (Hint*) malloc(MATRIX_SIZE * MATRIX_SIZE * sizeof(Hint));
         assert(package[i].dataA != NULL);   
         assert(package[i].dataB != NULL);   
         assert(package[i].dataC != NULL);
         Huint l;   
         for (k = 0; k < MATRIX_SIZE; k++) {
            for (l = 0; l < MATRIX_SIZE; l++) {
               package[i].dataA[k*MATRIX_SIZE + l] = k;
               package[i].dataB[k*MATRIX_SIZE + l] = l;
               package[i].dataC[k*MATRIX_SIZE + l] = 0;
            }
         }
         package[i].size = MATRIX_SIZE;
         unsigned int row, col;
#if 0
         printf("Original Matrix A: 0x%08x\n", package[i].dataA);
         for (row=0 ; row < MATRIX_SIZE; row++) {
            for (col=0 ; col < MATRIX_SIZE; col++) {
               printf("%02d ", package[i].dataA[row*MATRIX_SIZE+col]);
            }
            printf("\n");
         }
         printf("Original Matrix B: 0x%08x\n", package[i].dataB);
         for (row=0 ; row < MATRIX_SIZE; row++) {
            for (col=0 ; col < MATRIX_SIZE; col++) {
               printf("%02d ", package[i].dataB[row*MATRIX_SIZE+col]);
            }
            printf("\n");
         }

         printf("Original Matrix C: 0x%08x\n", package[i].dataC);
         for (row=0 ; row < MATRIX_SIZE; row++) {
            for (col=0 ; col < MATRIX_SIZE; col++) {
               printf("%02d ", package[i].dataC[row*MATRIX_SIZE+col]);
            }
            printf("\n");
         }
#endif    

         // Set up attributes for a hardware thread
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);

         // Creating threads
         if (thread_create (&child[i], &attr[i], matrix_multiply_thread_FUNC_ID, (void *)(&package[i]),
                           #ifndef HARDWARE_THREAD
                              SOFTWARE_THREAD,
                           #elif DYNAMIC
                              DYNAMIC_HW,
                           #else
                              STATIC_HW0 + i,
                           #endif
                           0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
      
         // Join on child thread
         if( hthread_join(child[i], (void *) &ret[i])) {
            printf("Error joining child thread\n");
            while(1);
         }
         if (ret[i] != SUCCESS)
            printf("Return value for thread indicates an error!\n");
         #if 0 
         printf("New Matrix C:\n");
         for (row=0 ; row < MATRIX_SIZE; row++) {
            for (col=0 ; col < MATRIX_SIZE; col++) {
               printf("%02d ", package[i].dataC[row*MATRIX_SIZE+col]);
            }
            printf("\n");
         }
         #endif
         // Check results
         Hint temp[MATRIX_SIZE][MATRIX_SIZE];
         poly_matrix_mul(package[i].dataA, package[i].dataB, &temp, MATRIX_SIZE, MATRIX_SIZE, MATRIX_SIZE);
         int r, c;
         for (r=0 ; r < MATRIX_SIZE; r++) {
            for (c=0 ; c < MATRIX_SIZE; c++) {
               if ( temp[r][c] != package[i].dataC[r*MATRIX_SIZE + c])  {
                  printf("[TRIAL %d, Slave %d] Matrix Mul failed!\n", j, i);
                  r = c = MATRIX_SIZE;
               }
            }
         }
         
         // Release memory
         free(package[i].dataA); 
         free(package[i].dataB); 
         free(package[i].dataC);
      }
   }
   printf("HOST: Done\n");
#endif

#ifdef USER_VECTORSUB
   printf("------------------------------------\n");
   printf("HOST: Testing VectorSub\n");
   
   for (j = 0; j < NUM_TRIALS; j++) {
   
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1;
         input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1;
         input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1;
         
         for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/} 
         for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/}
      }    
      
      // Set up attributes for a hardware thread
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { 
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);
      }

      // Creating threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         if (thread_create (&child[i], &attr[i], vector_sub_thread_FUNC_ID, (void *)(&input3[i]),
                           #ifndef HARDWARE_THREAD
                              SOFTWARE_THREAD,
                           #elif DYNAMIC
                              DYNAMIC_HW,
                           #else
                              STATIC_HW0 + i,
                           #endif
                           0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
      }
      
      // Joining threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         // Join on child thread
         if( hthread_join(child[i], (void *) &ret[i])) {
            printf("Error joining child thread\n");
            while(1);
         }
      }
      
      // Check results
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         for (h=0 ; h < ARRAY_SIZE; h++) {
            if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] - input3[i].startAddr2[h]))  {
               printf("[TRIAL %d, Slave %d] Vector Sub failed!\n", j, i);
               h = ARRAY_SIZE;
            }
         }
         
         // Release memory
         free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3);
      }
   }
   printf("HOST: Done\n");
#endif

#ifdef USER_VECTORADD
   printf("------------------------------------\n");
   printf("HOST: Testing VectorAdd\n");
   
   for (j = 0; j < NUM_TRIALS; j++) {
   
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1;
         input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1;
         input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1;
         
         for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/} 
         for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/}
      }    
      
      // Set up attributes for a hardware thread
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { 
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);
      }

      // Creating threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         if (thread_create (&child[i], &attr[i], vector_add_thread_FUNC_ID, (void *)(&input3[i]),
                           #ifndef HARDWARE_THREAD
                              SOFTWARE_THREAD,
                           #elif DYNAMIC
                              DYNAMIC_HW,
                           #else
                              STATIC_HW0 + i,
                           #endif
                           0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
      }
      
      // Joining threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         // Join on child thread
         if( hthread_join(child[i], (void *) &ret[i])) {
            printf("Error joining child thread\n");
            while(1);
         }
      }
      
      // Check results
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         for (h=0 ; h < ARRAY_SIZE; h++) {
            if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] + input3[i].startAddr2[h]))  {
               printf("[TRIAL %d, Slave %d] Vector Add failed!\n", j, i);
               h = ARRAY_SIZE;
            }
         }
         
         // Release memory
         free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3);
      }
   }
   printf("HOST: Done\n");
#endif

#ifdef USER_VECTORMUL
   printf("------------------------------------\n");
   printf("HOST: Testing VectorMultiply\n");
   
   for (j = 0; j < NUM_TRIALS; j++) {
   
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1;
         input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1;
         input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1;
         
         for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/} 
         for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/}
      }    
      
      // Set up attributes for a hardware thread
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { 
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);
      }

      // Creating threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         if (thread_create (&child[i], &attr[i], vector_multiply_thread_FUNC_ID, (void *)(&input3[i]),
                           #ifndef HARDWARE_THREAD
                              SOFTWARE_THREAD,
                           #elif DYNAMIC
                              DYNAMIC_HW,
                           #else
                              STATIC_HW0 + i,
                           #endif
                           0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
      }
      
      // Joining threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         // Join on child thread
         if( hthread_join(child[i], (void *) &ret[i])) {
            printf("Error joining child thread\n");
            while(1);
         }
      }
      
      // Check results
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         for (h=0 ; h < ARRAY_SIZE; h++) {
            if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] * input3[i].startAddr2[h]))  {
               printf("[TRIAL %d, Slave %d] Vector Multiply failed!\n", j, i);
               h = ARRAY_SIZE;
            }
         }
         
         // Release memory
         free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3);
      }
   }
   printf("HOST: Done\n");
#endif

#ifdef USER_VECTORDIV
   printf("------------------------------------\n");
   printf("HOST: Testing VectorDivide\n");
   
   for (j = 0; j < NUM_TRIALS; j++) {
   
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         input3[i].startAddr1 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr1 = input3[i].startAddr1 + ARRAY_SIZE - 1;
         input3[i].startAddr2 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr2 = input3[i].startAddr2 + ARRAY_SIZE - 1;
         input3[i].startAddr3 = (Hint*) malloc(ARRAY_SIZE * sizeof(Hint)); 	
         input3[i].endAddr3 = input3[i].startAddr3 + ARRAY_SIZE - 1;
         
         for( ptr = input3[i].startAddr1; ptr <= input3[i].endAddr1; ptr++ ){*ptr = (Hint) rand() % 1000;	/* printf( " %i \n",*ptr );*/} 
         for( ptr = input3[i].startAddr2; ptr <= input3[i].endAddr2; ptr++ ){*ptr = (Hint) (rand() % 1000) + 1;	/* printf( " %i \n",*ptr );*/}
      }    
      
      // Set up attributes for a hardware thread
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) { 
         hthread_attr_init(&attr[i]);
         hthread_attr_setdetachstate( &attr[i], HTHREAD_CREATE_JOINABLE);
      }

      // Creating threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         if (thread_create (&child[i], &attr[i], vector_divide_thread_FUNC_ID, (void *)(&input3[i]),
                           #ifndef HARDWARE_THREAD
                              SOFTWARE_THREAD,
                           #elif DYNAMIC
                              DYNAMIC_HW,
                           #else
                              STATIC_HW0 + i,
                           #endif
                           0)) 
         {
            printf("hthread_create error on HW THREAD %d\n", i);
            while(1);
         }
      }
      
      // Joining threads
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         // Join on child thread
         if( hthread_join(child[i], (void *) &ret[i])) {
            printf("Error joining child thread\n");
            while(1);
         }
      }
      
      // Check results
      for (i = 0; i < NUM_AVAILABLE_HETERO_CPUS; i++) {
         for (h=0 ; h < ARRAY_SIZE; h++) {
            // dividend was generated to be a non-zero number above. Hence, no need to check for / 0
            if ( (input3[i].startAddr3[h]) != (input3[i].startAddr1[h] / input3[i].startAddr2[h]))  {
               printf("[TRIAL %d, Slave %d] Vector Divide failed!\n", j, i);
               h = ARRAY_SIZE;
            }
         }
         
         // Release memory
         free(input3[i].startAddr1); free(input3[i].startAddr2); free(input3[i].startAddr3);
      }
   }
   printf("HOST: Done\n");
#endif
        
   printf("END\n");
   return 0;
}
예제 #2
0
int main()
{
    hthread_time_t time_create, time_start, time_stop, diff;

    // Thread attribute structures
    Huint           sta[NUM_THREADS];
    void*           retval[NUM_THREADS];
    hthread_t       tid[NUM_THREADS];
    hthread_attr_t  attr[NUM_THREADS];
    targ_t          targ[NUM_THREADS];

    int my_dct_matrix[BLOCK_SIZE][BLOCK_SIZE] = {
        {23170,    23170,    23170,    23170,    23170,    23170,    23170,  23170  }, 
        {32138,    27246,    18205,     6393,    -6393,   -18205,   -27246,  -32138 },
        {30274,    12540,   -12540,   -30274,   -30274,   -12540,    12540,  30274  },
        {27246,    -6393,   -32138,   -18205,    18205,    32138,     6393,  -27246 },
        {23170,   -23170,   -23170,    23170,    23170,   -23170,   -23170,  23170  },
        {18205,   -32138,     6393,    27246,   -27246,   -6393,     32138,  -18205 },
        {12540,   -30274,    30274,   -12540,   -12540,    30274,   -30274,  12540  },
        {6393 ,   -18205,    27246,   -32138,    32138,   -27246,    18205,  -6393  }
     };

    int my_dct_matrix_trans[BLOCK_SIZE][BLOCK_SIZE] = {
        {0, 0, 0, 0, 0, 0, 0, 0},
        {0, 0, 0, 0, 0, 0, 0, 0},
        {0, 0, 0, 0, 0, 0, 0, 0},
        {0, 0, 0, 0, 0, 0, 0, 0},
        {0, 0, 0, 0, 0, 0, 0, 0},
        {0, 0, 0, 0, 0, 0, 0, 0},
        {0, 0, 0, 0, 0, 0, 0, 0},
        {0, 0, 0, 0, 0, 0, 0, 0}
     };

    int my_temp[BLOCK_SIZE][BLOCK_SIZE];
    int my_input[BLOCK_SIZE][BLOCK_SIZE];
    int my_intermediate_dct[BLOCK_SIZE][BLOCK_SIZE];
    int my_intermediate_idct[BLOCK_SIZE][BLOCK_SIZE];
    int my_output[BLOCK_SIZE][BLOCK_SIZE];
    int my_idct_output[BLOCK_SIZE][BLOCK_SIZE];
    int my_scale_factor = 16;


    int i,j;
    for (j = 0; j < NUM_THREADS; j++)
    {
        // Initialize the attributes for the threads
        hthread_attr_init( &attr[j] );
    }


    // Calculate transpose of dct_matrix
    initialize_dct_matrix(my_dct_matrix, my_dct_matrix_trans);

    // Initialize input
    for (i = 0; i < BLOCK_SIZE; i++)
    {
        for (j = 0; j < BLOCK_SIZE; j++)
        {
            my_input[i][j] = i+j;
        }
    }
    // Fill in thread arguments for an initial DCT run
    targ[0].scale_factor = my_scale_factor;
    targ[0].input = my_input;
    targ[0].intermediate = my_intermediate_dct;
    targ[0].output = my_temp;
    targ[0].coeff_matrix = my_dct_matrix;
    targ[0].coeff_matrix_trans = my_dct_matrix_trans;
    dct_thread(&targ[0]);

    printf("\r\nOriginal Input:\r\n");
    print_matrix(my_input);

    printf("\r\nOriginal DCT:\r\n");
    print_matrix(my_temp);

    printf("**************************************************\r\n");

    // Fill in thread arguments
    targ[0].scale_factor = my_scale_factor;
    targ[0].input = my_input;
    targ[0].intermediate = my_intermediate_dct;
    targ[0].output = my_output;
    targ[0].coeff_matrix = my_dct_matrix;
    targ[0].coeff_matrix_trans = my_dct_matrix_trans;

    targ[1].scale_factor = my_scale_factor;
    targ[1].input = my_temp;
    targ[1].intermediate = my_intermediate_idct;
    targ[1].output = my_idct_output;
    targ[1].coeff_matrix = my_dct_matrix;
    targ[1].coeff_matrix_trans = my_dct_matrix_trans;

    // Start timing thread create
    time_create = hthread_time_get();

    // Peform DCT
    sta[0] =  thread_create(&tid[0], &attr[0], dct_thread_FUNC_ID, (void *) &targ[0], STATIC_HW0, 0);

    // Peform IDCT
    sta[1] =  thread_create(&tid[1], &attr[1], idct_thread_FUNC_ID, (void *) &targ[1], STATIC_HW1, 0);
    
    // Allow created threads to begin running and start timer
    time_start = hthread_time_get();

    // Wait for threads to complete
    hthread_join(tid[0],&retval[0]);
    hthread_join(tid[1],&retval[1]);

    // Stop timer
    time_stop = hthread_time_get();

    printf("\r\nDCT (retval = 0x%08x):\r\n",(unsigned int)retval[0]);
    print_matrix(my_output);

    printf("\r\nIDCT (retval = 0x%08x):\r\n",(unsigned int)retval[1]);
    print_matrix(my_idct_output);

    printf("*********************************\n");
    printf("Create time  = %llu\n",time_create);
    printf("Start time   = %llu\n",time_start);
    printf("Stop time    = %llu\n",time_stop);
    printf("*********************************\n");
    hthread_time_diff(diff,time_start, time_create);
    printf("Creation time (|Start - Create|) usec = %f\n",hthread_time_usec(diff));
    hthread_time_diff(diff,time_stop, time_start);
    printf("Elapsed time  (|Stop  - Start|)  usec = %f\n",hthread_time_usec(diff));
    hthread_time_diff(diff,time_stop, time_create);
    printf("Total time    (|Create - Stop|)  usec = %f\n",hthread_time_usec(diff));
   
    hthread_time_t * slave_time = (hthread_time_t *) (attr[0].hardware_addr - HT_CMD_HWTI_COMMAND + HT_CMD_VHWTI_EXEC_TIME);
    printf("Time reported by slave nano kernel #0 = %f usec\n", hthread_time_usec(*slave_time));
    slave_time = (hthread_time_t *) (attr[1].hardware_addr - HT_CMD_HWTI_COMMAND + HT_CMD_VHWTI_EXEC_TIME);
    printf("Time reported by slave nano kernel #1 = %f usec\n", hthread_time_usec(*slave_time));
    return 0;
}
예제 #3
0
int run_tests()
{
    // Timer variables
    xps_timer_t timer;
    int time_create, time_start, time_unlock, time_stop;

    // Mutex
    hthread_mutex_t * mutex          = (hthread_mutex_t*)malloc( sizeof(hthread_mutex_t) );
    hthread_mutex_init( mutex, NULL );

    float min;

    // Thread attribute structures
    Huint           sta[NUM_THREADS];
    void*           retval[NUM_THREADS];
    hthread_t       tid[NUM_THREADS];
    hthread_attr_t  attr[NUM_THREADS];
    targ_t thread_arg[NUM_THREADS];

    // Setup Cache
    XCache_DisableDCache();
    XCache_EnableICache(0xc0000801);

    // Create timer
    xps_timer_create(&timer, (int*)0x20400000);

    // Start timer
    xps_timer_start(&timer);

// *************************************************************************************    
    extern unsigned char intermediate[];

    extern unsigned int min_handle_offset;
    unsigned int min_handle = (min_handle_offset) + (unsigned int)(&intermediate);


// *************************************************************************************    
    printf("Code start address = 0x%08x\n", (unsigned int)&intermediate);

    int i = 0;
    float main_array[ARRAY_LENGTH];
    printf("Addr of array = 0x%08x\n",(unsigned int)&main_array[0]);
    for (i = 0; i < ARRAY_LENGTH; i++)
    {
        main_array[i] = (i+2)*3.14f;
    }


    int num_items = ARRAY_LENGTH/NUM_THREADS;
    int extra_items = ARRAY_LENGTH - (num_items*NUM_THREADS);
    float * start_addr = &main_array[0];
    for (i = 0; i < NUM_THREADS; i++)
    {
        // Initialize the attributes for the hardware threads
        hthread_attr_init( &attr[i] );
        hthread_attr_sethardware( &attr[i], (void*)base_array[i] );

        // Initialize thread arguments
        thread_arg[i].num_items = num_items;
        thread_arg[i].data_ptr = start_addr;
        thread_arg[i].min_mutex = mutex;
        thread_arg[i].min  = &min;
        start_addr+=num_items;
    }
    // Add in extra items for the last thread if needed
    thread_arg[i-1].num_items += extra_items;

    int num_ops = 0;
    for( num_ops = 0; num_ops < 2; num_ops = num_ops + 1)
    { 

        printf("******* Round %d ********\n",num_ops);
#ifdef USE_MB_THREAD
    printf("**** MB-based Threads ****\n");
#else
    printf("**** PPC-based Threads ****\n");
#endif
        min = 9999999;

        // Lock mutex before hand so that timing will not include thread creation time
        hthread_mutex_lock(mutex);

        // Start timing thread create
        time_create = xps_timer_read_counter(&timer);

        for (i = 0; i < NUM_THREADS; i++)
        {
            // Create the worker threads
#ifdef USE_MB_THREAD

            // Create MB Thread
            sta[i] = hthread_create( &tid[i], &attr[i], (void*)(min_handle), (void*)(&thread_arg[i]) );
#else
            // Create SW Thread
            sta[i] = hthread_create( &tid[i], NULL, min_thread, (void*)(&thread_arg[i]) );
#endif
        }

        // Allow created threads to begin running and start timer
        time_start = xps_timer_read_counter(&timer);
        hthread_mutex_unlock(mutex);
        time_unlock = xps_timer_read_counter(&timer);

        // Wait for the threads to exit
		//printf( "Waiting for thread(s) to complete... \n" );
        for (i = 0; i < NUM_THREADS; i++)
        {
    	    hthread_join( tid[i], &retval[i] );
        }

        time_stop = xps_timer_read_counter(&timer);

        // Display results
        printf("Min = %f\n",min);
        for (i = 0; i < NUM_THREADS; i++)
        {
            printf("TID = 0x%08x, status = 0x%08x, retval = 0x%08x\n",tid[i],sta[i],(Huint)retval[i]);
        }
        printf("*********************************\n");
        printf("Create time  = %u\n",time_create);
        printf("Start time   = %u\n",time_start);
        printf("Unlock time  = %u\n",time_unlock);
        printf("Stop time    = %u\n",time_stop);
        printf("*********************************\n");
        printf("Creation time (|Start - Create|) = %u\n",time_start - time_create);
        printf("Unlock time (|Unlock - Start|)   = %u\n",time_unlock - time_start);
        printf("Elapsed time  (|Stop - Start|)   = %u\n",time_stop - time_start);

    }

    hthread_mutex_destroy( mutex );
    free( mutex );

    // Clean up the attribute structures
    for (i = 0; i < NUM_THREADS; i++)
    {
        hthread_attr_destroy( &attr[i] );
    }
    printf ("-- Complete --\n");

    // Return from main
    return 0;
}