/* write 'bytes' of bytes from *host_a to SPU local storage area. */ void cellspu_memwrite( void *lsa, const void *host_a, size_t bytes ) { #ifdef DEBUG_CELLSPU_DRIVER printf("cellspu: write %d bytes from %x (host) to %x (device)\n", bytes, host_a,lsa); #endif void *mmap_base=spe_ls_area_get( spe_context ); memcpy( (void*)(mmap_base+(int)lsa), (const void*)host_a, bytes); }
void pocl_cellspu_read (void *data, void *host_ptr, const void *device_ptr, size_t cb) { chunk_info_t *chunk = (chunk_info_t*)device_ptr; assert( chunk->is_allocated && "cellspu: writing to an ullacoated memory?"); #ifdef DEBUG_CELLSPU_DRIVER printf("cellspu: read %d bytes to %x (host) from %x (device)\n", cb, host_ptr,chunk->start_address); #endif void *mmap_base=spe_ls_area_get( spe_context ); memcpy( host_ptr, mmap_base+(chunk->start_address), cb); }
/* Creates SPE trheads, parameters: * 1. SPE_trheads, number of SPE threads to be created */ void _create_threads(int SPE_threads){ int i,j,rc; ppu_pthread_data_t data[MAX_NUM_SPUS]; // If the number SPE threads is larger than the number of SPEs if (SPE_threads>NUM_SPE){ printf("Error: Too many SPEs requested (%d). Only %d are available.\n",SPE_threads,NUM_SPE); exit(0); } /* Forking SPE threads */ for(i=0; i<SPE_threads; i++){ /* Create context */ if ((data[i].spuid = spe_context_create (SPE_MAP_PS, NULL)) == NULL) { fprintf (stderr, "Failed spe_context_create(errno=%d strerror=%s)\n", errno, strerror(errno)); exit (1); } /* Load program */ if ((rc = spe_program_load (data[i].spuid, &diskio_spu)) != 0) { fprintf (stderr, "Failed spe_program_load(errno=%d strerror=%s)\n", errno, strerror(errno)); exit (1); } /* Create thread */ if ((rc = pthread_create (&data[i].pthread, NULL, &ppu_pthread_function, &data[i])) != 0) { fprintf (stderr, "Failed pthread_create(errno=%d strerror=%s)\n", errno, strerror(errno)); exit (1); } /* Get the direct problem state addresses */ mfc_ps_area[i] = spe_ps_area_get(data[i].spuid, SPE_MFC_COMMAND_AREA); if (mfc_ps_area[i] == NULL) { fprintf (stderr, "Failed spe_ps_area_get - MFC_COMMAND (errno=%d strerror=%s)\n", errno, strerror(errno)); exit (1); } mbox_ps_area[i] = spe_ps_area_get(data[i].spuid, SPE_CONTROL_AREA); if (mbox_ps_area[i] == NULL) { fprintf (stderr, "Failed spe_ps_area_get - CONTROL (errno=%d strerror=%s)\n", errno, strerror(errno)); exit (1); } sig_notify_ps_area[i] = spe_ps_area_get(data[i].spuid, SPE_SIG_NOTIFY_1_AREA); if (sig_notify_ps_area[i] == NULL) { fprintf (stderr, "Failed spe_ps_area_get - SIG_NOTIFY (errno=%d strerror=%s)\n", errno, strerror(errno)); exit (1); } mssync_ps_area[i] = spe_ps_area_get(data[i].spuid, SPE_MSSYNC_AREA); if (mssync_ps_area[i] == NULL) { fprintf (stderr, "Failed spe_ps_area_get - MSSYNC (errno=%d strerror=%s)\n", errno, strerror(errno)); exit (1); } send_mail(i,(unsigned int) SPE_threads); send_mail(i,i); } /* Getting the LS addresses of all SPE threads */ for(i=0; i<SPE_threads; i++) ls_addr[i] = (unsigned long long) spe_ls_area_get(data[i].spuid); /* Getting the addresses of the communication parameters of the SPE threads */ for(i=0; i<SPE_threads; i++){ Pass[i] = _spe_out_mbox_read(mbox_ps_area[i]); Pass[i] += ls_addr[i]; signal[i] = _spe_out_mbox_read(mbox_ps_area[i]); signal[i] += ls_addr[i]; } // Distribute the pass locations to all spes for(i=0; i<SPE_threads; i++) { for(j=0; j<SPE_threads; j++) { ((struct pass *)Pass[i])->freeBuffLocs[j] = (unsigned long long)&((struct pass *)Pass[j])->freebuffer; ((struct pass *)Pass[i])->canSendLocs[j] = (unsigned long long)&((struct pass *)Pass[j])->cansend; ((struct pass *)Pass[i])->speLocs[j] = ls_addr[j]; } } }
/** * @brief Classifies a set of test points using a set of training points. * * @param k The number of k nearest neighbours. * @param test_points The set of test points. * @param training_points The set of training points. * * @return An array of calculated labels for the set of test points. * The element at the first position represents the calculated * label of the first test points. */ unsigned char *classify(int k, Points<unsigned char, unsigned char> &test_points, Points<unsigned char, unsigned char> &training_points) { time_t start_time, end_time; time(&start_time); cb.k = k; cb.values_size = training_points.getVSize(); cb.label_size = training_points.getLSize(); cb.training_dimension = training_points.getDimension(); cb.training_count = training_points.getCount(); cb.training_data_size = training_points.getCount() * training_points.getVSize(); cb.training_points_per_transfer = TRAINING_VALUES_MAX_SIZE / training_points.getVSize(); cb.test_dimension = test_points.getDimension(); cb.test_count = test_points.getCount(); cb.test_data_size = test_points.getCount() * test_points.getVSize(); cb.test_points_per_transfer = TEST_VALUES_MAX_SIZE / test_points.getVSize(); cb.ea_training_points = (uint64_t) training_points.getValues(0); cb.ea_training_labels = (uint64_t) training_points.getLabel(0); cb.ea_test_points = (uint64_t) test_points.getValues(0); cb.ea_test_labels = (uint64_t) test_points.getLabel(0); Points<unsigned char, unsigned char> test_points_results(test_points.getCount(), test_points.getDimension()); cb.ea_test_labels_calculated = (uint64_t) ((char *) test_points_results.getLabel(0)); cb.num_spes = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1); if (cb.num_spes > MAX_NUM_SPES) { cb.num_spes = MAX_NUM_SPES; } #ifdef PRINT printf("PPE:\t Num spes = %d\n", cb.num_spes); #endif uint32_t num; printf("PPE:\t Start calculating\n"); fflush(stdout); // create SPE context and load SPE program into the SPE context for (num=0; num<cb.num_spes; num++) { if ((data[num].spe_ctx = spe_context_create(SPE_MAP_PS |SPE_CFG_SIGNOTIFY1_OR|SPE_CFG_SIGNOTIFY2_OR, NULL))==NULL) { perror("Failed creating context"); exit(1); } if (spe_program_load(data[num].spe_ctx, &cellknn_spu)) { perror("Failed loading program"); exit(1); } } // create SPE pthreads for (num=0; num<cb.num_spes; num++) { if (pthread_create(&data[num].pthread, NULL, &spu_pthread, &data[num])) { perror("Failed creating thread"); exit(1); } } // map SPE's MFC problem state to main storage (get effective address) for (num=0; num<cb.num_spes; num++) { if ((cb.spu_mfc_ctl[num] = (uint64_t)spe_ps_area_get(data[num].spe_ctx, SPE_CONTROL_AREA))==0) { perror("Failed mapping MFC control area"); exit(1); } if ((cb.spu_ls[num] = (uint64_t)spe_ls_area_get(data[num].spe_ctx))==0) { perror("Failed mapping SPU local store"); exit(1); } if ((cb.spu_sig1[num] = (uint64_t)spe_ps_area_get(data[num].spe_ctx, SPE_SIG_NOTIFY_1_AREA))==0) { perror("Failed mapping Signal1 area"); exit(1); } if ((cb.spu_sig2[num] = (uint64_t)spe_ps_area_get(data[num].spe_ctx, SPE_SIG_NOTIFY_2_AREA))==0) { perror("Failed mapping Signal2 area"); exit(1); } } // send each SPE its number using BLOCKING mailbox write for (num=0; num<cb.num_spes; num++) { // write 1 entry to in_mailbox - we don't know if we have availalbe space so use blocking // cb parameter have to be loaded after receiving local id!!! spe_in_mbox_write(data[num].spe_ctx, (uint32_t*)&num, 1, SPE_MBOX_ALL_BLOCKING); } // wait for all SPEs to complete for (num=0; num<cb.num_spes; num++) { // wait for all the SPE pthread to complete if (pthread_join(data[num].pthread, NULL)) { perror("Failed joining thread"); exit(1); } // destroy the SPE contexts if (spe_context_destroy(data[num].spe_ctx)) { perror("Failed spe_context_destroy"); exit(1); } } time(&end_time); double difference = difftime(end_time, start_time); printf("It took %.2lf seconds to calculate %d test points and %d training points\n", difference, cb.test_count, cb.training_count); // We have to create a new array, since the Points object is destroyed after this block. // This array has to be freed somewhere outside this function. unsigned char *result = (unsigned char *) malloc(test_points.getCount() * sizeof(unsigned char)); for (int i = 0; i < test_points.getCount(); i++) { result[i] = test_points_results.getLabel(i)[0]; } return result; }