void THCudaShutdown(THCState* state) { THCRandom_shutdown(state); THCudaBlas_shutdown(state); free(state->blasState); free(state->rngState); free(state->deviceProperties); int prevDev = -1; THCudaCheck(cudaGetDevice(&prevDev)); for (int dev = 0; dev < state->numDevices; ++dev) { THCudaCheck(cudaSetDevice(dev)); /* Free Torch-defined streams (0 is the default stream) */ for (int stream = 1; stream <= state->numUserStreams; ++stream) { THCudaCheck(cudaStreamDestroy(state->streamsPerDevice[dev][stream])); } free(state->streamsPerDevice[dev]); } free(state->streamsPerDevice); THCudaCheck(cudaSetDevice(prevDev)); }
void THCudaShutdown(THCState* state) { THCRandom_shutdown(state); free(state->blasState); free(state->rngState); THCudaBlas_shutdown(state); }
void THCudaShutdown(THCState* state) { THCRandom_shutdown(state); free(state->rngState); free(state->cudaHostAllocator); free(state->deviceProperties); int deviceCount = 0; int prevDev = -1; THCudaCheck(cudaGetDevice(&prevDev)); THCudaCheck(cudaGetDeviceCount(&deviceCount)); /* cleanup p2p access state */ for (int dev = 0; dev < deviceCount; ++dev) { free(state->p2pAccessEnabled[dev]); } free(state->p2pAccessEnabled); /* cleanup per-device state */ for (int dev = 0; dev < deviceCount; ++dev) { THCudaCheck(cudaSetDevice(dev)); /* Free Torch-defined streams (0 is the default stream) */ for (int stream = 1; stream <= state->numUserStreams; ++stream) { THCudaCheck(cudaStreamDestroy( THCState_getDeviceStream(state, dev, stream))); } /* Free Torch-defined handles (0 is NULL for consistency with streams API) */ for (int handle = 1; handle <= state->numUserBlasHandles; ++handle) { THCublasCheck(cublasDestroy( THCState_getDeviceBlasHandle(state, dev, handle))); } /* Free per-stream scratch space; starts at 0 because there is space for the default stream as well*/ for (int stream = 0; stream <= state->numUserStreams; ++stream) { THCudaCheck(THCudaFree(state, THCState_getDeviceScratchSpace(state, dev, stream))); } free(state->resourcesPerDevice[dev].streams); free(state->resourcesPerDevice[dev].blasHandles); free(state->resourcesPerDevice[dev].devScratchSpacePerStream); } free(state->resourcesPerDevice); state->cudaDeviceAllocator.shutdown(state->cudaDeviceAllocator.state); THCThreadLocal_free(state->currentPerDeviceStream); THCThreadLocal_free(state->currentPerDeviceBlasHandle); THCudaCheck(cudaSetDevice(prevDev)); }
void THCudaShutdown(THCState* state) { THCRandom_shutdown(state); free(state->rngState); free(state->deviceProperties); int deviceCount = 0; int prevDev = -1; THCudaCheck(cudaGetDevice(&prevDev)); THCudaCheck(cudaGetDeviceCount(&deviceCount)); /* cleanup p2p access state */ for (int dev = 0; dev < deviceCount; ++dev) { free(state->p2pAccessEnabled[dev]); } free(state->p2pAccessEnabled); /* cleanup per-device state */ for (int dev = 0; dev < deviceCount; ++dev) { THCudaCheck(cudaSetDevice(dev)); THCCudaResourcesPerDevice* res = &(state->resourcesPerDevice[dev]); /* Free user defined BLAS handles */ for (int i = 0; i < res->numBlasHandles; ++i) { THCublasCheck(cublasDestroy(res->blasHandles[i])); } /* Free user defined sparse handles */ for (int i = 0; i < res->numSparseHandles; ++i) { THCusparseCheck(cusparseDestroy(res->sparseHandles[i])); } free(res->blasHandles); free(res->sparseHandles); THCStream_free((THCStream*)THCThreadLocal_get(state->currentStreams[dev])); THCThreadLocal_free(state->currentStreams[dev]); } free(state->resourcesPerDevice); if (state->cudaDeviceAllocator->emptyCache) { state->cudaDeviceAllocator->emptyCache(state->cudaDeviceAllocator->state); } if (state->cudaHostAllocator == &THCCachingHostAllocator) { THCCachingHostAllocator_emptyCache(); } free(state->currentStreams); THCThreadLocal_free(state->currentPerDeviceBlasHandle); THCudaCheck(cudaSetDevice(prevDev)); }