void GuiInfoAdder::addInfo(cv::Mat& cvOutputData, const int numberPeople, const unsigned long long id, const std::string& elementRenderedName, const unsigned long long frameNumber, const Array<long long>& poseIds, const Array<float>& poseKeypoints) { try { // Sanity check if (cvOutputData.empty()) error("Wrong input element (empty cvOutputData).", __LINE__, __FUNCTION__, __FILE__); // Size const auto borderMargin = intRound(fastMax(cvOutputData.cols, cvOutputData.rows) * 0.025); // Update fps updateFps(mLastId, mFps, mFpsCounter, mFpsQueue, id, mNumberGpus); // Fps or s/gpu char charArrayAux[15]; std::snprintf(charArrayAux, 15, "%4.1f fps", mFps); // Recording inverse: sec/gpu // std::snprintf(charArrayAux, 15, "%4.2f s/gpu", (mFps != 0. ? mNumberGpus/mFps : 0.)); putTextOnCvMat(cvOutputData, charArrayAux, {intRound(cvOutputData.cols - borderMargin), borderMargin}, WHITE_SCALAR, true, cvOutputData.cols); // Part to show // Allowing some buffer when changing the part to show (if >= 2 GPUs) // I.e. one GPU might return a previous part after the other GPU returns the new desired part, it looks // like a mini-bug on screen // Difference between Titan X (~110 ms) & 1050 Ti (~290ms) if (mNumberGpus == 1 || (elementRenderedName != mLastElementRenderedName && mLastElementRenderedCounter > 4)) { mLastElementRenderedName = elementRenderedName; mLastElementRenderedCounter = 0; } mLastElementRenderedCounter = fastMin(mLastElementRenderedCounter, std::numeric_limits<int>::max() - 5); mLastElementRenderedCounter++; // Add each person ID addPeopleIds(cvOutputData, poseIds, poseKeypoints, borderMargin); // OpenPose name as well as help or part to show putTextOnCvMat(cvOutputData, "OpenPose - " + (!mLastElementRenderedName.empty() ? mLastElementRenderedName : (mGuiEnabled ? "'h' for help" : "")), {borderMargin, borderMargin}, WHITE_SCALAR, false, cvOutputData.cols); // Frame number putTextOnCvMat(cvOutputData, "Frame: " + std::to_string(frameNumber), {borderMargin, (int)(cvOutputData.rows - borderMargin)}, WHITE_SCALAR, false, cvOutputData.cols); // Number people putTextOnCvMat(cvOutputData, "People: " + std::to_string(numberPeople), {(int)(cvOutputData.cols - borderMargin), (int)(cvOutputData.rows - borderMargin)}, WHITE_SCALAR, true, cvOutputData.cols); } catch (const std::exception& e) { error(e.what(), __LINE__, __FUNCTION__, __FILE__); } }
void addPeopleIds(cv::Mat& cvOutputData, const Array<long long>& poseIds, const Array<float>& poseKeypoints, const int borderMargin) { try { if (!poseIds.empty()) { const auto poseKeypointsArea = poseKeypoints.getSize(1)*poseKeypoints.getSize(2); const auto isVisible = 0.05f; for (auto i = 0u ; i < poseIds.getVolume() ; i++) { if (poseIds[i] > -1) { const auto indexMain = i * poseKeypointsArea; const auto indexSecondary = i * poseKeypointsArea + poseKeypoints.getSize(2); if (poseKeypoints[indexMain+2] > isVisible || poseKeypoints[indexSecondary+2] > isVisible) { const auto xA = intRound(poseKeypoints[indexMain]); const auto yA = intRound(poseKeypoints[indexMain+1]); const auto xB = intRound(poseKeypoints[indexSecondary]); const auto yB = intRound(poseKeypoints[indexSecondary+1]); int x; int y; if (poseKeypoints[indexMain+2] > isVisible && poseKeypoints[indexSecondary+2] > isVisible) { const auto keypointRatio = intRound(0.15f * std::sqrt((xA-xB)*(xA-xB) + (yA-yB)*(yA-yB))); x = xA + 3*keypointRatio; y = yA - 3*keypointRatio; } else if (poseKeypoints[indexMain+2] > isVisible) { x = xA + intRound(0.25f*borderMargin); y = yA - intRound(0.25f*borderMargin); } else //if (poseKeypoints[indexSecondary+2] > isVisible) { x = xB + intRound(0.25f*borderMargin); y = yB - intRound(0.5f*borderMargin); } putTextOnCvMat(cvOutputData, std::to_string(poseIds[i]), {x, y}, WHITE_SCALAR, false, cvOutputData.cols); } } } } } catch (const std::exception& e) { error(e.what(), __LINE__, __FUNCTION__, __FILE__); } }
// checks if _probability_ occurred in _randnum_ probability-checking can be done by seeing if // _randnum_ <= _probability_ * _max_number_. However, this means if we check for 1/6, and then check for // 1/3, since (_max_)(1/6) is encompassed in (_max_)(1/3), this can lead to unexpected results. Therefore, // the region used in calculation is subtracted from _randnum_, so that it may be called again without // having to account for these side-effects. (if the probability was hit, we can assume they won't be // checking for more probabilities) static bool probability_hit(unsigned int& randnum, const double probability, const unsigned long randMax) { const unsigned long border = intRound(randMax * probability); if(randnum <= border) return true; randnum -= border; return false; }
GuiInfoAdder::GuiInfoAdder(const Point<int>& outputSize, const int numberGpus, const bool guiEnabled) : mOutputSize{outputSize}, mBorderMargin{intRound(fastMax(mOutputSize.x, mOutputSize.y) * 0.025)}, mNumberGpus{numberGpus}, mGuiEnabled{guiEnabled}, mFpsCounter{0u}, mLastElementRenderedCounter{std::numeric_limits<int>::max()}, mLastId{std::numeric_limits<unsigned long long>::max()} { }
void GuiInfoAdder::addInfo(cv::Mat& cvOutputData, const Array<float>& poseKeypoints, const unsigned long long id, const std::string& elementRenderedName) { try { // Security checks if (cvOutputData.empty()) error("Wrong input element (empty cvOutputData).", __LINE__, __FUNCTION__, __FILE__); // Update fps updateFps(mLastId, mFps, mFpsCounter, mFpsQueue, id, mNumberGpus); // Used colors const cv::Scalar white{255, 255, 255}; // Fps or s/gpu char charArrayAux[15]; std::snprintf(charArrayAux, 15, "%4.1f fps", mFps); // Recording inverse: sec/gpu // std::snprintf(charArrayAux, 15, "%4.2f s/gpu", (mFps != 0. ? mNumberGpus/mFps : 0.)); putTextOnCvMat(cvOutputData, charArrayAux, {mBorderMargin,mBorderMargin}, white, false); // Part to show // Allowing some buffer when changing the part to show (if >= 2 GPUs) // I.e. one GPU might return a previous part after the other GPU returns the new desired part, it looks like a mini-bug on screen // Difference between Titan X (~110 ms) & 1050 Ti (~290ms) if (mNumberGpus == 1 || (elementRenderedName != mLastElementRenderedName && mLastElementRenderedCounter > 4)) { mLastElementRenderedName = elementRenderedName; mLastElementRenderedCounter = 0; } mLastElementRenderedCounter = fastMin(mLastElementRenderedCounter, std::numeric_limits<int>::max() - 5); mLastElementRenderedCounter++; // Display element to display or help std::string message = (!mLastElementRenderedName.empty() ? mLastElementRenderedName : (mGuiEnabled ? "'h' for help" : "")); if (!message.empty()) putTextOnCvMat(cvOutputData, message, {intRound(mOutputSize.x - mBorderMargin), mBorderMargin}, white, true); // Frame number putTextOnCvMat(cvOutputData, "Frame " + std::to_string(id), {mBorderMargin, (int)(mOutputSize.y - mBorderMargin)}, white, false); // Number people const auto textToDisplay = std::to_string(poseKeypoints.getSize(0)) + " people"; putTextOnCvMat(cvOutputData, textToDisplay, {(int)(mOutputSize.x - mBorderMargin), (int)(mOutputSize.y - mBorderMargin)}, white, true); } catch (const std::exception& e) { error(e.what(), __LINE__, __FUNCTION__, __FILE__); } }
void updateFaceHeatMapsForPerson(Array<float>& heatMaps, const int person, const ScaleMode heatMapScaleMode, const float* heatMapsGpuPtr) { try { // Copy memory const auto channelOffset = heatMaps.getVolume(2, 3); const auto volumeBodyParts = FACE_NUMBER_PARTS * channelOffset; auto totalOffset = 0u; auto* heatMapsPtr = &heatMaps.getPtr()[person*volumeBodyParts]; // Copy face parts #ifdef USE_CUDA cudaMemcpy(heatMapsPtr, heatMapsGpuPtr, volumeBodyParts * sizeof(float), cudaMemcpyDeviceToHost); #else //std::memcpy(heatMapsPtr, heatMapsGpuPtr, volumeBodyParts * sizeof(float)); std::copy(heatMapsGpuPtr, heatMapsGpuPtr + volumeBodyParts, heatMapsPtr); #endif // Change from [0,1] to [-1,1] if (heatMapScaleMode == ScaleMode::PlusMinusOne) for (auto i = 0u ; i < volumeBodyParts ; i++) heatMapsPtr[i] = fastTruncate(heatMapsPtr[i]) * 2.f - 1.f; // [0, 255] else if (heatMapScaleMode == ScaleMode::UnsignedChar) for (auto i = 0u ; i < volumeBodyParts ; i++) heatMapsPtr[i] = (float)intRound(fastTruncate(heatMapsPtr[i]) * 255.f); // Avoid values outside original range else for (auto i = 0u ; i < volumeBodyParts ; i++) heatMapsPtr[i] = fastTruncate(heatMapsPtr[i]); totalOffset += (unsigned int)volumeBodyParts; } catch (const std::exception& e) { error(e.what(), __LINE__, __FUNCTION__, __FILE__); } }
void renderKeypointsCpu(Array<float>& frameArray, const Array<float>& keypoints, const std::vector<unsigned int>& pairs, const std::vector<float> colors, const float thicknessCircleRatio, const float thicknessLineRatioWRTCircle, const std::vector<float>& poseScales, const float threshold) { try { if (!frameArray.empty()) { // Array<float> --> cv::Mat auto frame = frameArray.getCvMat(); // Security check if (frame.dims != 3 || frame.size[0] != 3) error(errorMessage, __LINE__, __FUNCTION__, __FILE__); // Get frame channels const auto width = frame.size[2]; const auto height = frame.size[1]; const auto area = width * height; const auto channelOffset = area * sizeof(float) / sizeof(uchar); cv::Mat frameB(height, width, CV_32FC1, &frame.data[0]); cv::Mat frameG(height, width, CV_32FC1, &frame.data[channelOffset]); cv::Mat frameR(height, width, CV_32FC1, &frame.data[2 * channelOffset]); // Parameters const auto lineType = 8; const auto shift = 0; const auto numberColors = colors.size(); const auto numberScales = poseScales.size(); const auto thresholdRectangle = 0.1f; const auto numberKeypoints = keypoints.getSize(1); // Keypoints for (auto person = 0 ; person < keypoints.getSize(0) ; person++) { const auto personRectangle = getKeypointsRectangle(keypoints, person, thresholdRectangle); if (personRectangle.area() > 0) { const auto ratioAreas = fastMin(1.f, fastMax(personRectangle.width/(float)width, personRectangle.height/(float)height)); // Size-dependent variables const auto thicknessRatio = fastMax(intRound(std::sqrt(area) * thicknessCircleRatio * ratioAreas), 2); // Negative thickness in cv::circle means that a filled circle is to be drawn. const auto thicknessCircle = fastMax(1, (ratioAreas > 0.05f ? thicknessRatio : -1)); const auto thicknessLine = fastMax(1, intRound(thicknessRatio * thicknessLineRatioWRTCircle)); const auto radius = thicknessRatio / 2; // Draw lines for (auto pair = 0u ; pair < pairs.size() ; pair+=2) { const auto index1 = (person * numberKeypoints + pairs[pair]) * keypoints.getSize(2); const auto index2 = (person * numberKeypoints + pairs[pair+1]) * keypoints.getSize(2); if (keypoints[index1+2] > threshold && keypoints[index2+2] > threshold) { const auto thicknessLineScaled = thicknessLine * poseScales[pairs[pair+1] % numberScales]; const auto colorIndex = pairs[pair+1]*3; // Before: colorIndex = pair/2*3; const cv::Scalar color{colors[colorIndex % numberColors], colors[(colorIndex+1) % numberColors], colors[(colorIndex+2) % numberColors]}; const cv::Point keypoint1{intRound(keypoints[index1]), intRound(keypoints[index1+1])}; const cv::Point keypoint2{intRound(keypoints[index2]), intRound(keypoints[index2+1])}; cv::line(frameR, keypoint1, keypoint2, color[0], thicknessLineScaled, lineType, shift); cv::line(frameG, keypoint1, keypoint2, color[1], thicknessLineScaled, lineType, shift); cv::line(frameB, keypoint1, keypoint2, color[2], thicknessLineScaled, lineType, shift); } } // Draw circles for (auto part = 0 ; part < numberKeypoints ; part++) { const auto faceIndex = (person * numberKeypoints + part) * keypoints.getSize(2); if (keypoints[faceIndex+2] > threshold) { const auto radiusScaled = radius * poseScales[part % numberScales]; const auto thicknessCircleScaled = thicknessCircle * poseScales[part % numberScales]; const auto colorIndex = part*3; const cv::Scalar color{colors[colorIndex % numberColors], colors[(colorIndex+1) % numberColors], colors[(colorIndex+2) % numberColors]}; const cv::Point center{intRound(keypoints[faceIndex]), intRound(keypoints[faceIndex+1])}; cv::circle(frameR, center, radiusScaled, color[0], thicknessCircleScaled, lineType, shift); cv::circle(frameG, center, radiusScaled, color[1], thicknessCircleScaled, lineType, shift); cv::circle(frameB, center, radiusScaled, color[2], thicknessCircleScaled, lineType, shift); } } } } } } catch (const std::exception& e) { error(e.what(), __LINE__, __FUNCTION__, __FILE__); } }
//! Computes the SIFT descriptor for keypoint \$(x,y,\sigma,\theta)\f$. SIFTDescriptor operator()(float x, float y, float sigma, float theta, const Image<Vector2f>& gradPolar) const { const float pi = static_cast<float>(M_PI); /* The oriented keypoint is denoted by $k = (x,y,\sigma,\theta)$. SIFT describes keypoint $k$ in a similarity-invariant manner. To do so, we consider a square image patch which: - is centered in $(x,y)$ - has an orientation angle $\theta$ w.r.t. the image frame coordinates: => to ensure rotation invariance - has a side length proportional to the scale $\sigma$: => to ensure scale invariance This square patch is denoted by $P(x,y,\sigma,\theta) = P(k)$. The square patch $P(x,y,\sigma,\theta)$ is itself divided into NxN smaller square patches $(P_{i,j})_{1 \leq i \leq N, j \leq j \leq N}$. Notice that we omit the variables $(x,y,\sigma,\theta)$ which the patches $P_{i,j}$ actually depend on. $N$ corresponds to the template argument 'int N' which should be 4 as stated in the paper [Lowe, IJCV 2004]). In the image, each small square patch $P_{i,j}$ has a side length $l$ proportional to the scale $\sigma$ of the keypoint, i.e., $l = \lambda \sigma$. */ const float lambda = bin_scale_unit_length_; const float l = lambda*sigma; /* It is important to note that $\lambda$ is some 'universal' constant used for all SIFT descriptors to ensure the scale-invariance of the descriptor. */ /* Now in each image square patch $P_{i,j}$, we build a histogram of gradient orientations $\mathbf{h}_{i,j} \in \mathbb{R}^d$, which quantizes the gradient orientations into $O$ principal orientations. $O$ corresponds to the template argument 'int O'. Let us initialize the SIFT descriptor consisting of the NxN histograms $\mathbf{h}_{i,j}$, each in $\mathbf{R}^O$ as follows. */ SIFTDescriptor h(SIFTDescriptor::Zero()); /* In the rescaled and oriented coordinate frame bound to the patch $P(k)$, - keypoint $k$ is located at (0,0) - centers $C_{i,j}$ of patch $P_{i,j}$ are located at $[ -(N+1)/2 + i, -(N+1)/2 + j ]$ For example for $N=4$, they are at: (-1.5,-1.5) (-0.5,-1.5) (0.5,-1.5) (1.5,-1.5) (-1.5,-0.5) (-0.5,-0.5) (0.5,-0.5) (1.5,-0.5) (-1.5, 0.5) (-0.5, 0.5) (0.5, 0.5) (1.5, 0.5) (-1.5, 1.5) (-0.5, 1.5) (0.5, 1.5) (1.5, 1.5) Gradients in $[x_i-1, x_i+1] \times [y_i-1, y_i+1]$ contributes to histogram $\mathbf{h}_{i,j}$, namely gradients in the square patch $Q_{i,j}$ - centered in $C_{i,j}$ as square patch $P_{i,j}$, - with side length $2$. That is because we want to do trilinear interpolation in order to make SIFT robust to small shift in rotation, translation. Therefore, to compute the SIFT descriptor we need to scan all the pixels on a larger circular image patch with radius $r$: */ const float r = sqrt(2.f) * l * (N+1)/2.f; /* In the above formula, notice: - the factor $\sqrt{2}$ because diagonal corners of the furthest patches $P_{i,j}$ from the center $(x,y)$ must be in the circular patch. - the factor $(N+1)/2$ because we have to include the gradients in larger patches $Q_{i,j}$ for each $P_{i,j}$. It is recommended to make a drawing to convince oneself. */ // To build the SIFT descriptor, we do the following procedure: // - we work in the image reference frame; // - we scan in the convolved image $G_\sigma$ the position $(x+u, y+v)$ // where $(u,v) \in [-r,r]^2$; // - we retrieve its coordinates in the oriented frame of the patch // $P(x,y,\sigma,\theta)$ with inverse transform $T = 1/l R_\theta^T$ Matrix2f T; T << cos(theta), sin(theta), -sin(theta), cos(theta); T /= l; // Loop to perform interpolation const int rounded_r = intRound(r); const float rounded_x = intRound(x); const float rounded_y = intRound(y); for (int v = -rounded_r; v <= rounded_r; ++v) { for (int u = -rounded_r; u <= rounded_r; ++u) { // Compute the coordinates in the rescaled and oriented coordinate // frame bound to patch $P(k)$. Vector2f pos( T*Vector2f(u,v) ); // subpixel correction? /*pos.x() -= (x - rounded_x); pos.y() -= (y - rounded_y);*/ if ( rounded_x+u < 0 || rounded_x+u >= gradPolar.width() || rounded_y+v < 0 || rounded_y+v >= gradPolar.height() ) continue; // Compute the Gaussian weight which gives more emphasis to gradient // closer to the center. float weight = exp(-pos.squaredNorm()/(2.f*pow(N/2.f, 2))); float mag = gradPolar(rounded_x+u, rounded_y+v)(0); float ori = gradPolar(rounded_x+u, rounded_y+v)(1) - theta; ori = ori < 0.f ? ori+2.f*pi : ori; ori *= float(O)/(2.f*pi); // The coordinate frame is centered in the patch center, thus: // $(x,y)$ is in $[-(N+1)/2, (N+1)/2]^2$. // // Change the coordinate frame so that $(x,y)$ is in $[-1, N]^2$. Thus, // translate by $[ (N-1)/2, (N-1)/2 ]$. pos.array() += N/2.f - 0.5f; if (pos.minCoeff() <= -1.f || pos.maxCoeff() >= static_cast<float>(N)) continue; // In the translated coordinate frame, note that for $N=4$ the centers // are now located at: // (0,0) (1,0) (2,0) (3,0) // (0,1) (1,1) (2,1) (3,1) // (0,2) (1,1) (2,2) (3,2) // (0,3) (1,1) (2,3) (3,3) // // Update the SIFT descriptor using trilinear interpolation. accumulate(h, pos, ori, weight, mag); } } h.normalize(); h = (h * 512.f).cwiseMin(Matrix<float, Dim, 1>::Ones()*255.f); return h; }