void HandRenderer::renderHandGpu(Array<float>& outputData, const std::array<Array<float>, 2>& handKeypoints) { try { // GPU rendering #ifndef CPU_ONLY const auto elementRendered = spElementToRender->load(); // I prefer std::round(T&) over intRound(T) for std::atomic const auto numberPeople = handKeypoints[0].getSize(0); // GPU rendering if (numberPeople > 0 && elementRendered == 0) { cpuToGpuMemoryIfNotCopiedYet(outputData.getPtr()); // Draw handKeypoints const auto handArea = handKeypoints[0].getSize(1)*handKeypoints[0].getSize(2); const auto handVolume = numberPeople * handArea; cudaMemcpy(pGpuHand, handKeypoints[0].getConstPtr(), handVolume * sizeof(float), cudaMemcpyHostToDevice); cudaMemcpy(pGpuHand + handVolume, handKeypoints[1].getConstPtr(), handVolume * sizeof(float), cudaMemcpyHostToDevice); renderHandKeypointsGpu(*spGpuMemoryPtr, mFrameSize, pGpuHand, 2 * numberPeople, mRenderThreshold); // CUDA check cudaCheck(__LINE__, __FUNCTION__, __FILE__); } // GPU memory to CPU if last renderer gpuToCpuMemoryIfLastRenderer(outputData.getPtr()); cudaCheck(__LINE__, __FUNCTION__, __FILE__); // CPU_ONLY mode #else error("GPU rendering not available if `CPU_ONLY` is set.", __LINE__, __FUNCTION__, __FILE__); UNUSED(outputData); UNUSED(handKeypoints); #endif } catch (const std::exception& e) { error(e.what(), __LINE__, __FUNCTION__, __FILE__); } }
std::pair<int, std::string> PoseGpuRenderer::renderPose(Array<float>& outputData, const Array<float>& poseKeypoints, const float scaleInputToOutput, const float scaleNetToOutput) { try { // Security checks if (outputData.empty()) error("Empty Array<float> outputData.", __LINE__, __FUNCTION__, __FILE__); // GPU rendering const auto elementRendered = spElementToRender->load(); std::string elementRenderedName; #ifdef USE_CUDA const auto numberPeople = poseKeypoints.getSize(0); if (numberPeople > 0 || elementRendered != 0 || !mBlendOriginalFrame) { cpuToGpuMemoryIfNotCopiedYet(outputData.getPtr(), outputData.getVolume()); cudaCheck(__LINE__, __FUNCTION__, __FILE__); const auto numberBodyParts = getPoseNumberBodyParts(mPoseModel); const auto numberBodyPartsPlusBkg = numberBodyParts+1; const auto numberBodyPAFChannels = getPosePartPairs(mPoseModel).size(); const Point<int> frameSize{outputData.getSize(1), outputData.getSize(0)}; // Draw poseKeypoints if (elementRendered == 0) { // Rescale keypoints to output size auto poseKeypointsRescaled = poseKeypoints.clone(); scaleKeypoints(poseKeypointsRescaled, scaleInputToOutput); // Render keypoints if (!poseKeypoints.empty()) cudaMemcpy(pGpuPose, poseKeypointsRescaled.getConstPtr(), numberPeople * numberBodyParts * 3 * sizeof(float), cudaMemcpyHostToDevice); renderPoseKeypointsGpu(*spGpuMemory, mPoseModel, numberPeople, frameSize, pGpuPose, mRenderThreshold, mShowGooglyEyes, mBlendOriginalFrame, getAlphaKeypoint()); } else { // If resized to input resolution: Replace scaleNetToOutput * scaleInputToOutput by // scaleInputToOutput, and comment the security checks. // Security checks if (scaleNetToOutput == -1.f) error("Non valid scaleNetToOutput.", __LINE__, __FUNCTION__, __FILE__); // Parameters const auto& heatMapSizes = spPoseExtractorNet->getHeatMapSize(); const Point<int> heatMapSize{heatMapSizes[3], heatMapSizes[2]}; const auto lastPAFChannel = numberBodyPartsPlusBkg+2+numberBodyPAFChannels/2; // Add all heatmaps if (elementRendered == 2) // if (elementRendered == numberBodyPartsPlusBkg+1) { elementRenderedName = "Heatmaps"; renderPoseHeatMapsGpu(*spGpuMemory, mPoseModel, frameSize, spPoseExtractorNet->getHeatMapGpuConstPtr(), heatMapSize, scaleNetToOutput * scaleInputToOutput, (mBlendOriginalFrame ? getAlphaHeatMap() : 1.f)); } // Draw PAFs (Part Affinity Fields) else if (elementRendered == 3) // else if (elementRendered == numberBodyPartsPlusBkg+2) { elementRenderedName = "PAFs (Part Affinity Fields)"; renderPosePAFsGpu(*spGpuMemory, mPoseModel, frameSize, spPoseExtractorNet->getHeatMapGpuConstPtr(), heatMapSize, scaleNetToOutput * scaleInputToOutput, (mBlendOriginalFrame ? getAlphaHeatMap() : 1.f)); } // Draw specific body part or background else if (elementRendered <= numberBodyPartsPlusBkg+2) { const auto realElementRendered = (elementRendered == 1 ? numberBodyParts : elementRendered - 4); elementRenderedName = mPartIndexToName.at(realElementRendered); renderPoseHeatMapGpu(*spGpuMemory, mPoseModel, frameSize, spPoseExtractorNet->getHeatMapGpuConstPtr(), heatMapSize, scaleNetToOutput * scaleInputToOutput, realElementRendered, (mBlendOriginalFrame ? getAlphaHeatMap() : 1.f)); } // Draw affinity between 2 body parts else if (elementRendered <= lastPAFChannel) { const auto affinityPart = (elementRendered-numberBodyPartsPlusBkg-3)*2; const auto affinityPartMapped = numberBodyPartsPlusBkg + getPoseMapIndex(mPoseModel).at(affinityPart); elementRenderedName = mPartIndexToName.at(affinityPartMapped); elementRenderedName = elementRenderedName.substr(0, elementRenderedName.find("(")); renderPosePAFGpu(*spGpuMemory, mPoseModel, frameSize, spPoseExtractorNet->getHeatMapGpuConstPtr(), heatMapSize, scaleNetToOutput * scaleInputToOutput, affinityPartMapped, (mBlendOriginalFrame ? getAlphaHeatMap() : 1.f)); } // Draw neck-part distance channel else { if (mPoseModel != PoseModel::BODY_25D) error("Neck-part distance channel only for BODY_25D.", __LINE__, __FUNCTION__, __FILE__); const auto distancePart = (elementRendered - lastPAFChannel - 1); const auto distancePartMapped = numberBodyPartsPlusBkg + numberBodyPAFChannels + distancePart; elementRenderedName = mPartIndexToName.at(distancePartMapped); renderPoseDistance(*spGpuMemory, mPoseModel, frameSize, spPoseExtractorNet->getHeatMapGpuConstPtr(), heatMapSize, scaleNetToOutput * scaleInputToOutput, distancePartMapped, (mBlendOriginalFrame ? getAlphaHeatMap() : 1.f)); } } } // GPU memory to CPU if last renderer gpuToCpuMemoryIfLastRenderer(outputData.getPtr(), outputData.getVolume()); cudaCheck(__LINE__, __FUNCTION__, __FILE__); #else UNUSED(outputData); UNUSED(poseKeypoints); UNUSED(scaleInputToOutput); UNUSED(scaleNetToOutput); error("OpenPose must be compiled with the `USE_CUDA` macro definitions in order to run this" " functionality. You can alternatively use CPU rendering (flag `--render_pose 1`).", __LINE__, __FUNCTION__, __FILE__); #endif // Return result return std::make_pair(elementRendered, elementRenderedName); } catch (const std::exception& e) { error(e.what(), __LINE__, __FUNCTION__, __FILE__); return std::make_pair(-1, ""); } }