// Free everything - should only be called when the run is over void run_kill() { // now when the threads check kill, the answer will be yes manipulateKill(1); manipulateNumDevicesFinished(-1, 0); // Wakeup GPU threads so they can free their resources pthread_mutex_lock(mutex); myCVsignal = 0; pthread_cond_broadcast(cv_GPUThreads); pthread_cond_wait(cv_GPUThreads, mutex); pthread_mutex_unlock(mutex); // Once the GPU threads are done, free shared resources and return free(nengoDataArray); free(deviceForEnsemble); free(deviceForNetworkArray); free(sharedInput); pthread_mutex_destroy(mutex); pthread_cond_destroy(cv_GPUThreads); pthread_cond_destroy(cv_JNI); free(mutex); free(cv_GPUThreads); free(cv_JNI); }
// Called by the function setup. By the time this is called, the NengoGPUData structure // for each device should have all its static data set (but not yet loaded onto a device, // since it should't have access to a device yet). This function initializes the // synchronization primitives and creates a new thread for each GPU in use void run_start() { if(do_print) printf("NengoGPU: RUN_START\n"); manipulateReset(-1); manipulateKill(-1); manipulateNumDevicesFinished(0, 0); myCVsignal = 0; pthread_t* current_thread = (pthread_t*) malloc(sizeof(pthread_t)); if(!current_thread) { printf("bad malloc\n"); exit(EXIT_FAILURE); } // Initialize the mutex and condition variable. Must be done // before we create the threads since the threads use them. mutex = (pthread_mutex_t*) malloc(sizeof(pthread_mutex_t)); cv_GPUThreads = (pthread_cond_t*) malloc(sizeof(pthread_cond_t)); cv_JNI = (pthread_cond_t*) malloc(sizeof(pthread_cond_t)); if(!mutex || !cv_GPUThreads || !cv_JNI) { printf("bad malloc\n"); exit(EXIT_FAILURE); } pthread_mutex_init(mutex, NULL); pthread_cond_init(cv_GPUThreads, NULL); pthread_cond_init(cv_JNI, NULL); NengoGPUData* current_data; // Start the node-processing threads. Their starting function is start_GPU_thread. int i = 0; for(;i < num_devices; i++) { current_data = nengo_data_array[i]; pthread_create(current_thread, NULL, &start_GPU_thread, (void*)current_data); } // Wait for the threads to do their initializing (signalled by // myCVSignal == num_devices), then return. pthread_mutex_lock(mutex); while(myCVsignal < num_devices) { pthread_cond_wait(cv_JNI, mutex); } myCVsignal = 0; pthread_cond_broadcast(cv_GPUThreads); pthread_mutex_unlock(mutex); free(current_thread); sched_yield(); }
// Called once per GPU device per simulation run. This is the entry point for each processing thread. Its input is the // NengoGPUData structure that it is to process. The behaviour of this function is: wait until we get the signal to step // (from nativeStep in NengoGPU_JNI.c), process the NengoGPUData structure for one step with run_NEFEnsembles, wait again. // Eventually manipulateKill(0) will return true, meaning the run is finished and the function will break out of the loop // and free its resources. void* start_GPU_thread(void* arg) { NengoGPUData* nengoData = (NengoGPUData*) arg; jint numDevicesFinished; printf("GPU Thread %d: about to acquire device\n", nengoData->device); initGPUDevice(nengoData->device); printf("GPU Thread %d: done acquiring device\n", nengoData->device); printf("GPU Thread %d: about to move simulation data to device\n", nengoData->device); moveToDeviceNengoGPUData(nengoData); printf("GPU Thread %d: done moving simulation data to device\n", nengoData->device); // signal to parent thread that initialization is complete, then wait for the other threads to finish initialization. pthread_mutex_lock(mutex); myCVsignal++; if(myCVsignal == numDevices) { pthread_cond_broadcast(cv_JNI); } pthread_cond_wait(cv_GPUThreads, mutex); pthread_mutex_unlock(mutex); // Wait for the signal to step. If that signal has already come, then myCVsignal == 1. In that case, we don't wait (if we did, we'd wait forever). pthread_mutex_lock(mutex); if(myCVsignal == 0) { pthread_cond_wait(cv_GPUThreads, mutex); } pthread_mutex_unlock(mutex); // The main loop for the processing threads. The thread is either processing nodes on the GPU or it is waiting for the call to step. while(!manipulateKill(0)) { run_NEFEnsembles(nengoData, startTime, endTime); // signal that this device is finished processing for the step numDevicesFinished = manipulateNumDevicesFinished(2, 0); pthread_mutex_lock(mutex); // Wakeup the main thread if all devices are finished running if(numDevicesFinished == numDevices) { pthread_cond_broadcast(cv_JNI); manipulateNumDevicesFinished(3, 0); } // Wait for call from main thread to step pthread_cond_wait(cv_GPUThreads, mutex); pthread_mutex_unlock(mutex); } // Should only get here after run_kill has been called freeNengoGPUData(nengoData); shutdownGPUDevice(); // if this is the last thread to finish, we wake up the main thread, it has to free some things before we finish pthread_mutex_lock(mutex); myCVsignal++; if(myCVsignal == numDevices) { pthread_cond_broadcast(cv_GPUThreads); } pthread_mutex_unlock(mutex); return NULL; }