Point vanishPointDecide(bool& skyVpt_turnoff,const Mat& img,int index,const vector<Point> & skyVpts, const vector<Vec4i> & hlines,const vector<vector<Point2f> > & trjs, bool vFrmEchCue,vector<Point> &vCues) { Point rslt; //static =false;//notice, this is a in-function static variable, though I dislike this a lot if(!skyVpt_turnoff) { maxChange(index,skyVpts,skyVpt_turnoff); } vector<tuple<double,double,double> > linesOfTrjs(trjs[0].size());//this is dirty and quick [0] tjsTolns(linesOfTrjs,trjs); vector<double> trjsLenth(trjs[0].size()); for (int i = 0; i < trjsLenth.size(); i++) { trjsLenth[i]=norm(trjs[0][i]-trjs[trjs.size()-1][i]); } vector<bool> trjlbs(trjs[0].size(),true); vector<bool> hlnlbs(hlines.size(),true); for (int i = 0; i < trjs[0].size(); i++)//dirty and quick { if((get<2>(linesOfTrjs[i])<trajectoryIsLine_threshold)||(trjsLenth[i]<trajectoryLength_constrain)) trjlbs[i]=false; } vector<vector<int> > hlineTrjCorres(hlines.size(),vector<int>(TrjNumOfEachLine)); vector<pair<double,double> > abln_hlines(hlines.size()); for (int i = 0; i < hlines.size(); i++) { double x=hlines[i][2]-hlines[i][0]; double y=hlines[i][3]-hlines[i][1]; int x1(hlines[i][0]),y1(hlines[i][1]);//hline[0],hline[1] // double d1=norm(pt-pttype(x1,y1)); int x2(hlines[i][2]),y2(hlines[i][3]); // double d2=norm(pt-pttype(x2,y2)); double t1=x2-x1; if (abs(t1)<0.001) t1= t1>0?0.001:-0.001; double a=(double)(y2-y1)/t1; double b=y1-a*x1; abln_hlines[i].first=a; abln_hlines[i].second=b; if(abs(x)<0.001) x=0.001; double r=abs(y/x); if((r>houghXYRatio_consgtrain)||(r<1.0/houghXYRatio_consgtrain)) hlnlbs[i]=false; HlineTrajCorr(trjs[0],hlines[i],hlineTrjCorres[i]); double maxdis=0.0; for (int j = 0; j < hlineTrjCorres[i].size(); j++) { auto dis=trjsLenth[hlineTrjCorres[i][j]]; if(dis>maxdis) maxdis=dis; } if (maxdis<10.0) hlnlbs[i]=false; } vector<Point> candidates; if(vFrmEchCue) { vCues.resize(3); vCues[0]=skyVpts[index]; } if (!skyVpt_turnoff) { int middlex=skyVpts[index].x; int middley=skyVpts[index].y; for (int x=middlex-halfSearchSizeX/10;x<middlex+halfSearchSizeX/10;x+=firstSearchStepx/3) { for (int y = middley-halfSearchSizeY/10; y < middley+halfSearchSizeY/10; y+=firstSearchStepy/3) { candidates.push_back(Point(x,y)); } } } else { int middlex=img.cols/2; int middley=img.rows/2; for (int x=middlex-halfSearchSizeX;x<middlex+halfSearchSizeX;x+=firstSearchStepx) { for (int y = middley-halfSearchSizeY; y < middley+halfSearchSizeY; y+=firstSearchStepy) { candidates.push_back(Point(x,y)); } } } vector<double> candidateConfidence(candidates.size(),0.0); double largeConfi=0.0; int largindex=0; double largeConfi_motion=0.0; double largeConfi_structure=0.0; for (int i = 0; i < candidates.size(); i++) { for (int j = 0; j < linesOfTrjs.size(); j++) { if(trjlbs[j]) { if(pointToLineDis(get<0>(linesOfTrjs[j]),get<1>(linesOfTrjs[j]),candidates[i])<candidateSelectThres_traj) candidateConfidence[i]+=traj_weight; } } if(vFrmEchCue) { if (candidateConfidence[i]>largeConfi_structure) { largeConfi_structure=candidateConfidence[i]; vCues[1]=candidates[i]; } } double tem_Confidence_structure=0; for (int j = 0; j < abln_hlines.size(); j++) { if(hlnlbs[j]) { if(pointToLineDis(abln_hlines[j].first,abln_hlines[j].second,candidates[i])<candidateSelectThres_hline) candidateConfidence[i]+=hline_weight; } } if(vFrmEchCue) { if((candidateConfidence[i]-tem_Confidence_structure)>largeConfi_motion) { largeConfi_motion=candidateConfidence[i]-tem_Confidence_structure; vCues[2]=candidates[i]; } } if (candidateConfidence[i]>largeConfi) { largeConfi=candidateConfidence[i]; largindex=i; } } #ifdef presentationMode_on Mat copy=img.clone(); drawPoint(copy,skyVpts[index]); drawHlines(copy,hlines,hlnlbs); drawTrajs(copy,trjs,trjlbs); imshow("rslt",copy); copy=img.clone(); drawHlines(copy,hlines,hlnlbs,false); drawTrajs(copy,trjs,trjlbs,false); drawPoint(copy,candidates[largindex]); imshow("newrslt",copy); #endif return candidates[largindex]; }
bool f_stabilizer::proc(){ ch_image * pgryin = dynamic_cast<ch_image*>(m_chin[0]); if(pgryin == NULL) return false; ch_image * pclrin = dynamic_cast<ch_image*>(m_chin[1]); if(pclrin == NULL) return false; ch_image * pgryout = dynamic_cast<ch_image*>(m_chout[0]); if(pgryout == NULL) return false; ch_image * pclrout = dynamic_cast<ch_image*>(m_chout[1]); if(pclrout == NULL) return false; long long tgry; Mat gry = pgryin->get_img(tgry); if(gry.empty()) return true; long long tclr; Mat clr = pclrin->get_img(tclr); if(clr.empty()) return true; if(m_bthrough){ pgryout->set_img(gry, tgry); pclrout->set_img(clr, tclr); return true; } if(m_bclr) { long long timg; Mat img = pgryin->get_img(timg); pgryout->set_img(img, timg); m_num_conv_frms = m_num_frms = 0; m_bWinit = false; m_bclr = false; return true; } if(tgry != tclr) return true; // getting new gray image to be stabilized //Mat & gry = m_pgryin->get_img(); // roi saturation if(m_roi.x < 0 || m_roi.x > gry.cols) m_roi.x = 0; if(m_roi.y < 0 || m_roi.y > gry.rows) m_roi.y = 0; if(m_roi.width <= 0 || (m_roi.x + m_roi.width) >= gry.cols) m_roi.width = gry.cols - m_roi.x; if(m_roi.height <= 0 || (m_roi.y + m_roi.height) >= gry.rows) m_roi.height = gry.rows - m_roi.y; // making image pyramid of a new image buildPyramid(gry, m_pyrimg[0], m_num_pyr_level); // making a template and its pyramid if(m_refimg.rows != gry.rows && m_refimg.cols != gry.cols ) m_refimg = gry.clone(); if(m_bmask && m_Tmask.size() != m_num_pyr_level) buildPyramid(m_mask, m_Tmask, m_num_pyr_level); // making image pyramid of the template image sampled from ROI // in the previous image stabilized with warp parameters m_W buildPyramid(m_refimg(m_roi), m_pyrimg[1], m_num_pyr_level); if(!m_bWinit){ init(); m_bWinit = true; } // calculating new warp. The new warp contains the previous warp. // Warp function basically is ROI of Wnew(Original Image) = ROI of Wold(Previous Image) Mat Wnew; if(m_bmask) Wnew = m_core.calc_warp(m_pyrimg[1] /* previous image template*/, m_Tmask /* Mask image. Pixels with value zero is ignored in the error calculation.*/, m_pyrimg[0] /* image the warp to be calculated */, m_roi /* roi for template sampling */, m_W /* previous warp */); else Wnew = m_core.calc_warp(m_pyrimg[1] /* previous image */, m_pyrimg[0] /* image the warp to be calculated */, m_roi/* roi for template sampling */, m_W /* previous warp */); // convergence check if(m_core.is_conv()) m_num_conv_frms++; else // if not, the previous warp parameters are used. m_W.copyTo(Wnew); m_num_frms++; // Motion parameters are estimated as the moving average of Wnew Mat clrout, gryout; switch(m_core.get_wt()){ case EWT_AFN: case EWT_SIM: case EWT_RGD: { double theta0 = asin(m_M.at<double>(1, 0)); double theta1 = asin(Wnew.at<double>(1, 0)); // we want to cancel the rotation. but still need to be canceled slightly. // because the alignment errors cause unintended rotation. // Very small (m_alpha[0] + m_beta) sets the camear's average rotation as the stable rotation theta0 = /*(1 - m_alpha[0]) * theta0 + */(m_alpha[0] + m_beta) * theta1; double s0, c0; s0 = sin(theta0); m_M.at<double>(0, 1) = -s0; m_M.at<double>(1, 0) = s0; c0 = cos(theta0); m_M.at<double>(0, 0) = c0; m_M.at<double>(1, 1) = c0; } case EWT_TRN: // usually we dont want to cancel the horizontal motion. The coefficient m_alpha[2] + m_beta should nearly be 1.0 m_M.at<double>(0, 2) = /*(1 - m_alpha[2]) * m_M.at<double>(0, 2) +*/ (m_alpha[2] + m_beta) * Wnew.at<double>(0, 2); m_M.at<double>(1, 2) = /*(1 - m_alpha[5]) * m_M.at<double>(1, 2) +*/ (m_alpha[5] + m_beta) * Wnew.at<double>(1, 2); // Here m_M is the true motion it should not be canceled invertAffineTransform(m_M, m_iM); // Subtract m_M by multiplying Wnew and m_iM the inverse of m_M synth_afn(Wnew, m_iM, m_W); // Applying resulting warp function for both color and gray scale image warpAffine(clr, clrout, m_W, Size(clr.cols, clr.rows) , INTER_LINEAR | WARP_INVERSE_MAP); warpAffine(gry, gryout, m_W, Size(gry.cols, gry.rows) , INTER_LINEAR | WARP_INVERSE_MAP); break; case EWT_HMG: m_iM = m_M.inv(); m_W = Wnew * m_iM; warpPerspective(clr, clrout, m_W, Size(clr.cols, clr.rows) , INTER_LINEAR | WARP_INVERSE_MAP); warpPerspective(gry, gryout, m_W, Size(gry.cols, gry.rows) , INTER_LINEAR | WARP_INVERSE_MAP); break; } m_refimg = gryout; pgryout->set_img(gryout, tgry); pclrout->set_img(clrout, tclr); // output image if(!m_disp_inf) return true; cv::rectangle(clrout, m_roi, CV_RGB(255, 0, 0), 2); sprintf(m_str, "plv: %d, max_itr: %d, exit_th: %f, w: %s", m_num_pyr_level, m_core.get_num_itrs(), m_core.get_itr_exit_th(), (m_core.get_weight() ? "yes":"no")); cv::putText(clrout, m_str, Point(10, 20), FONT_HERSHEY_SIMPLEX, 0.5, CV_RGB(0, 200, 100)); sprintf(m_str, "wt : %s ipt: %s ", get_warp_type_name(m_core.get_wt()), get_interpol_type_name(m_core.get_interpol_type())); cv::putText(clrout, m_str, Point(10, 40), FONT_HERSHEY_SIMPLEX, 0.5, CV_RGB(0, 200, 100)); Size sz = m_core.get_tmpl_sblk_sz(); sprintf(m_str, "r : %s th: %f sbsz: (%d, %d)", (m_core.get_robust() ? "yes":"no"), m_core.get_robust_th(), sz.width, sz.height); cv::putText(clrout, m_str, Point(10, 60), FONT_HERSHEY_SIMPLEX, 0.5, CV_RGB(0, 200, 100)); sprintf(m_str, "Convergence ratio = %f ( %d / %d )", (double) m_num_conv_frms/ (double) m_num_frms, m_num_conv_frms, m_num_frms); cv::putText(clrout, m_str, Point(10, 80), FONT_HERSHEY_SIMPLEX, 0.5, CV_RGB(0, 200, 100)); int num_lvs = m_core.get_lv(); for(int ilv = 0; ilv < num_lvs; ilv++){ sprintf(m_str, "lv: %d, itr: %d, delta: %f pix_rat: %f, blk_rat: %f", ilv, m_core.get_num_itrs(ilv), m_core.get_delta(ilv), m_core.get_rjct_pix(ilv), m_core.get_rjct_blk(ilv)); cv::putText(clrout, m_str, Point(10, 100 + ilv * 20 ), FONT_HERSHEY_SIMPLEX, 0.5, CV_RGB(0, 200, 100)); } return true; }
//============================================================================= // Assumes that source image exists and numDownPyrs > 1, no ROIs for either // image, and both images have the same depth and number of channels bool CFastMatchTemplate::FastMatchTemplate( const Mat& source, const Mat& target, vector<Point>* foundPointsList, vector<double>* confidencesList, int matchPercentage, bool findMultipleTargets, int numMaxima, int numDownPyrs, int searchExpansion ) { // make sure that the template image is smaller than the source if(target.size().width > source.size().width || target.size().height > source.size().height) { printf( "\nSource image must be larger than target image.\n" ); return false; } if(source.depth() != target.depth()) { printf( "\nSource image and target image must have same depth.\n" ); return false; } if(source.channels() != target.channels()) { printf("%d %d\n",source.channels() , target.channels()); printf("\nSource image and target image must have same number of channels.\n" ); return false; } Size sourceSize = source.size(); Size targetSize = target.size(); // create copies of the images to modify Mat copyOfSource = source.clone(); Mat copyOfTarget = target.clone(); // down pyramid the images for(int ii = 0; ii < numDownPyrs; ii++) { // start with the source image sourceSize.width = (sourceSize.width + 1) / 2; sourceSize.height = (sourceSize.height + 1) / 2; Mat smallSource(sourceSize, source.type()); pyrDown(copyOfSource, smallSource); // prepare for next loop, if any copyOfSource = smallSource.clone(); // next, do the target targetSize.width = (targetSize.width + 1) / 2; targetSize.height = (targetSize.height + 1) / 2; Mat smallTarget(targetSize, target.type()); pyrDown(copyOfTarget, smallTarget); // prepare for next loop, if any copyOfTarget = smallTarget.clone(); } // perform the match on the shrunken images Size smallTargetSize = copyOfTarget.size(); Size smallSourceSize = copyOfSource.size(); Size resultSize; resultSize.width = smallSourceSize.width - smallTargetSize.width + 1; resultSize.height = smallSourceSize.height - smallTargetSize.height + 1; Mat result(resultSize, CV_32FC1); matchTemplate(copyOfSource, copyOfTarget, result, CV_TM_CCOEFF_NORMED); // find the top match locations Point* locations = NULL; MultipleMaxLoc(result, &locations, numMaxima); // search the large images at the returned locations sourceSize = source.size(); targetSize = target.size(); // create a copy of the source in order to adjust its ROI for searching for(int currMax = 0; currMax < numMaxima; currMax++) { // transform the point to its corresponding point in the larger image locations[currMax].x *= (int)pow(2.0f, numDownPyrs); locations[currMax].y *= (int)pow(2.0f, numDownPyrs); locations[currMax].x += targetSize.width / 2; locations[currMax].y += targetSize.height / 2; const Point& searchPoint = locations[currMax]; // if we are searching for multiple targets and we have found a target or // multiple targets, we don't want to search in the same location(s) again if(findMultipleTargets && !foundPointsList->empty()) { bool thisTargetFound = false; int numPoints = foundPointsList->size(); for(int currPoint = 0; currPoint < numPoints; currPoint++) { const Point& foundPoint = (*foundPointsList)[currPoint]; if(abs(searchPoint.x - foundPoint.x) <= searchExpansion * 2 && abs(searchPoint.y - foundPoint.y) <= searchExpansion * 2) { thisTargetFound = true; break; } } // if the current target has been found, continue onto the next point if(thisTargetFound) { continue; } } // set the source image's ROI to slightly larger than the target image, // centred at the current point Rect searchRoi; searchRoi.x = searchPoint.x - (target.size().width) / 2 - searchExpansion; searchRoi.y = searchPoint.y - (target.size().height) / 2 - searchExpansion; searchRoi.width = target.size().width + searchExpansion * 2; searchRoi.height = target.size().height + searchExpansion * 2; // make sure ROI doesn't extend outside of image if(searchRoi.x < 0) { searchRoi.x = 0; } if(searchRoi.y < 0) { searchRoi.y = 0; } if((searchRoi.x + searchRoi.width) > (sourceSize.width - 1)) { int numPixelsOver = (searchRoi.x + searchRoi.width) - (sourceSize.width - 1); searchRoi.width -= numPixelsOver; } if((searchRoi.y + searchRoi.height) > (sourceSize.height - 1)) { int numPixelsOver = (searchRoi.y + searchRoi.height) - (sourceSize.height - 1); searchRoi.height -= numPixelsOver; } Mat searchImage = Mat(source, searchRoi); // perform the search on the large images resultSize.width = searchRoi.width - target.size().width + 1; resultSize.height = searchRoi.height - target.size().height + 1; result = Mat(resultSize, CV_32FC1); matchTemplate(searchImage, target, result, CV_TM_CCOEFF_NORMED); // find the best match location double minValue, maxValue; Point minLoc, maxLoc; minMaxLoc(result, &minValue, &maxValue, &minLoc, &maxLoc); maxValue *= 100; // transform point back to original image maxLoc.x += searchRoi.x + target.size().width / 2; maxLoc.y += searchRoi.y + target.size().height / 2; if(maxValue >= matchPercentage) { // add the point to the list foundPointsList->push_back(maxLoc); confidencesList->push_back(maxValue); // if we are only looking for a single target, we have found it, so we // can return if(!findMultipleTargets) { break; } } } // if(foundPointsList->empty()) // { // printf( "\nTarget was not found to required confidence of %d.\n", // matchPercentage ); // } delete [] locations; return true; }
void callback( const sensor_msgs::ImageConstPtr& imgmsg, const humans_msgs::HumansConstPtr& kinect ) { int okao_i = 0; int no_okao_i = 0; humans_msgs::Humans okao_human, no_okao_human; cv_bridge::CvImagePtr cv_ptr; try { cv_ptr = cv_bridge::toCvCopy(imgmsg, sensor_msgs::image_encodings::BGR8); for(int i = 0; i < kinect->num; i++) { //POS head2d, head3d, neck2d; Point top, bottom; geometry_msgs::Point head2d, neck2d;//, top, bottom; head2d.x = kinect->human[i].body.joints[HEAD].position_color_space.x; head2d.y = kinect->human[i].body.joints[HEAD].position_color_space.y; neck2d.x = kinect->human[i].body.joints[SPINE_S].position_color_space.x; neck2d.y = kinect->human[i].body.joints[SPINE_S].position_color_space.y; double diff_w = fabs(head2d.y-neck2d.y); double diff_h = fabs(head2d.y-neck2d.y); top.x = head2d.x - diff_w; top.y = head2d.y - diff_h; bottom.x = head2d.x + diff_w; bottom.y = head2d.y + diff_h; /* cout << "cut (" << cut.x << "," << cut.y << ")"<<endl; if( cut.x < 0 ) cut.x = 0; else if( cut.x >= (cv_ptr->image.cols - diff_w*2) ) cut.x = cv_ptr->image.cols-diff_w*2-1; if( cut.y < 0 ) cut.y = 0; else if( cut.y >= (cv_ptr->image.rows - diff_h*2) ) cut.y = cv_ptr->image.rows-diff_h*2-1; */ top.x = max(0, top.x); top.y = max(0, top.y); bottom.x = min(cv_ptr->image.cols-1, bottom.x); bottom.y = min(cv_ptr->image.rows-1, bottom.y); if (( top.x > bottom.x || top.y > bottom.y)||( top.x == bottom.x || top.y == bottom.y)) continue; /* cout << "(" << top.x << "," << top.y << ")" << "-" << "(" << bottom.x << "," << bottom.y << ")"<<endl; cout << "diff:" << "(" << diff_w << "," << diff_h<< ")" << endl; cout << "image:" << "(" << cv_ptr->image.cols << "," << cv_ptr->image.rows << ")" << endl; */ Mat cutRgbImage; try { cutRgbImage = Mat(cv_ptr->image, cv::Rect(top, bottom)); } catch(cv_bridge::Exception& e) { ROS_ERROR("cv_bridge exception: %s",e.what()); } Mat rgbImage = cutRgbImage.clone(); if( rgbImage.cols > 1280 ) { cv::resize( rgbImage, rgbImage, cv::Size(1280, cutRgbImage.rows*1280/cutRgbImage.cols) ); } if( rgbImage.rows > 1024 ) { cv::resize( rgbImage, rgbImage, cv::Size(cutRgbImage.cols*1024/cutRgbImage.rows , 1024) ); } //rgbImage = cutRgbImage; Mat grayImage; cv::cvtColor(rgbImage,grayImage,CV_BGR2GRAY); //test //cv::rectangle( cv_ptr->image, top, // bottom, // cv::Scalar(0,200,0), 5, 8); try { cv::Mat img = grayImage; std::vector<unsigned char> buf(img.data, img.data + img.cols * img.rows * img.channels()); std::vector<int> encodeParam(2); encodeParam[0] = CV_IMWRITE_PNG_COMPRESSION; encodeParam[1] = 3; cv::imencode(".png", img, buf, encodeParam); picojson::object p; p.insert(std::make_pair("mode", picojson::value(std::string("FaceRecognition")))); p.insert(std::make_pair("format",picojson::value(std::string("PNG")))); p.insert(std::make_pair("width",picojson::value((double)img.cols))); p.insert(std::make_pair("height",picojson::value((double)img.rows))); p.insert(std::make_pair("depth",picojson::value((double)1))); picojson::value para = picojson::value(p); std::string param = para.serialize().c_str(); // リクエストメッセージの作成 OkaoServer::RequestMessage reqMsg; reqMsg.img = buf; reqMsg.param = param; // 送信 OkaoServer::sendRequestMessage(*responder, reqMsg); // 受信 OkaoServer::ReplyMessage repMsg; OkaoServer::recvReplyMessage(*responder, &repMsg); //std::cout << "repMsg.okao: " << repMsg.okao << std::endl; //std::ofstream ofs("/home/yhirai/okao_data.txt", std::ios::out | std::ios::app ); //ofs << repMsg.okao << endl; const char* json = repMsg.okao.c_str(); picojson::value v; std::string err; picojson::parse(v,json,json + strlen(json),&err); if(err.empty()) { humans_msgs::Face face_msg; humans_msgs::Body body_msg; bool p_ok = false; JsonToMsg::face(v, &face_msg, top.x, top.y, &p_ok); MsgToMsg::bodyToBody(kinect->human[i].body, &body_msg); if( p_ok ) { ++okao_i; humans_msgs::Human h; h.body = body_msg; h.face.persons.resize( OKAO ); h.face = face_msg; okao_human.human.push_back( h ); cv::Point lt(face_msg.position.lt.x, face_msg.position.lt.y); cv::Point rb(face_msg.position.rb.x, face_msg.position.rb.y); cv::Point rb_out = rb; cv::Scalar red(0,0,200); cv::Scalar green(0,200,0); cv::rectangle( cv_ptr->image, top, bottom, red, 5, 8); cv::putText( cv_ptr->image, face_msg.persons[0].name, bottom, FONT_HERSHEY_SIMPLEX, 3, red, 2, CV_AA); ros::ServiceClient client = nh_.serviceClient< okao_client::OkaoStack>("stack_add"); okao_client::OkaoStack stack; //stack.request.rule = "add"; stack.request.person = face_msg.persons[0]; sensor_msgs::Image output; cv::Mat outcutImage; cv::resize(rgbImage, outcutImage, cv::Size(128,128)); output.height = outcutImage.rows; output.width = outcutImage.cols; output.encoding = idToEncoding( outcutImage.type() ); output.step = outcutImage.cols * outcutImage.elemSize(); output.data.assign(outcutImage.data, outcutImage.data + size_t(outcutImage.rows*output.step)); stack.request.image = output; if ( !client.call(stack) ) cout << "service missing!" << endl; } else { ++no_okao_i; humans_msgs::Human h; h.body = body_msg; no_okao_human.human.push_back( h ); } } } catch(const zmq::error_t& e) { std::cout << e.what() << std::endl; return ; } } //パブリッシュ okao_human.num = okao_i; no_okao_human.num = no_okao_i; okao_human.header.stamp = no_okao_human.header.stamp = ros::Time::now(); okao_human.header.frame_id = kinect->header.frame_id; no_okao_human.header.frame_id = kinect->header.frame_id; discovery_pub_.publish(okao_human); undiscovered_pub_.publish(no_okao_human); image_pub_.publish(cv_ptr->toImageMsg()); //cv::imshow(OPENCV_WINDOW, cv_ptr->image); //cv::waitKey(1); } catch(cv_bridge::Exception& e) { ROS_ERROR("cv_bridge exception: %s",e.what()); } }
int main(int argc, char** argv) { Mat currentFrame; Mat current; Mat grayFrame; Mat gaussianblurFrame; Mat undistorted; Size frameSz; Mat dst; Mat dst2; Mat dst3; Mat dst4; Mat dst5; Mat dst6; Mat frame; VideoCapture cap(0); cap.set(CV_CAP_PROP_FRAME_WIDTH, 640); cap.set(CV_CAP_PROP_FRAME_HEIGHT, 480); ROBOT_VISUAL_S Robot_Visual; int fileid; /**串口资源初始化**/ fileid = SerialPort_open("/dev/ttyAMA3", 0); if (fileid < 0) { return fileid; } cap >> frame; const int IMAGE_HEIGHT = 480; Mat huitu; huitu.create(Size(640, 640), CV_8UC3); frameSz.height = 640; frameSz.width = 480; Mat ImgUndistort = frame.clone(); Mat CM = Mat(3, 3, CV_32FC1); Mat D; FileStorage fs2("left.yml", FileStorage::READ); fs2["cameraMatrix"] >> CM; fs2["distortionCoefficients"] >> D; fs2.release(); float * laserDotArr = new float[frameSz.height]; grayFrame.create(Size(frameSz.width, frameSz.height), CV_8UC1); undistorted.create(Size(frameSz.width, frameSz.height), CV_8UC3); MeasureResult result = { 0, 0, 0 }; char buf[1024]; unsigned int count = 0; unsigned int cycle_count = 0; while (true) { cap >> currentFrame; undistort(currentFrame, ImgUndistort, CM, D); transpose(ImgUndistort, dst); flip(dst, dst2, 1); transpose(huitu, dst4); flip(dst4, dst5, 1); cvtColor(dst2, grayFrame, COLOR_BGR2GRAY); GaussianBlur(grayFrame, gaussianblurFrame, Size(3, 3), 0, 0); for (int y = 0; y < frameSz.height; ++y) { laserDotArr[y] = findLaserCenterByCol(gaussianblurFrame.ptr<uchar>(y), frameSz.width); } for (int y = 0; y < frameSz.height; ++y) { if (laserDotArr[y] != -1) { circle(dst2, cvPoint(laserDotArr[y], y), 2, Scalar(0, 255, 255)); calcDistanceByPos(laserDotArr[y], y, IMAGE_HEIGHT, &result); if ((y+1)%TRANSMIT_FREQ != 0) { count = sprintf(buf, "%-d ", y); count += sprintf(buf + count, "%-d\n", (unsigned short)result.distance); } if ((0 == (y+1)%TRANSMIT_FREQ) && (0 == cycle_count%5)) { count += sprintf(buf + count, "%-d ", y); count += sprintf(buf + count, "%-d\n", (unsigned short)result.distance); printf("%s", buf); Robot_Visual_Data_Write(fileid, buf, count); count = 0; memset((void*)buf, '\0', sizeof(buf)); cycle_count = 0; } //printf("%d %.2f\n", y, result.distance); circle(dst5, cvPoint(result.distance, y), 2, Scalar(255, 255, 255)); char txtMsg[200]; if (y == frameSz.height / 2) { sprintf(txtMsg, "y=%.1f dist=%.2f", laserDotArr[y], result.distance); putText(dst2, txtMsg, cvPoint(laserDotArr[y], y), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(200, 200, 200), 1, 8, false); } } } cycle_count++; while(1){}; transpose(dst2, dst); flip(dst, dst3, 0); transpose(dst5, dst4); flip(dst4, dst6, 0); //imshow("dst3", dst3); //imshow("dst6", dst6); waitKey(1); } }
void captureTheFace(int picNum, Mat frame, VideoCapture cap, Mat *theFacePtr){ vector<Mat> images; vector<int> labels; // Read in the data (fails if no valid input filename is given, but you'll get an error message): try { read_csv("libs/faceLearn.csv", images, labels); } catch (cv::Exception& e) { cerr << "Error opening file \"" << "\". Reason: " << e.msg << endl; // nothing more we can do exit(1); } // Get the height from the first image. We'll need this // later in code to reshape the images to their original // size AND we need to reshape incoming faces to this size: int im_width = 92; int im_height = 112; if(images.size()){ im_width = images[0].cols; im_height = images[0].rows; } CascadeClassifier haar_cascade; haar_cascade.load(face_cascade_name); std::cout << "Press SPACE to take picture number " << picNum << "..." << endl; Mat original, gray, norm, float_gray, blur, num, den; for(;;) { cap >> frame; // Clone the current frame original = frame.clone(); // Convert the current frame to grayscale and perform illumination normalization cvtColor(original, gray, CV_BGR2GRAY); // Find the faces in the frame: vector< Rect_<int> > faces; haar_cascade.detectMultiScale(gray, faces); if(faces.size()){ // Process face: Rect face_i = faces[0]; // Crop the face from the image: Mat face = gray(face_i); cv::resize(face, face, Size(im_width, im_height), 1.0, 1.0, INTER_CUBIC); // convert to floating-point image //face.convertTo(float_gray, CV_32F, 1.0/255.0); // numerator = img - gauss_blur(img) //cv::GaussianBlur(float_gray, blur, Size(0,0), 2, 2); //num = float_gray - blur; // denominator = sqrt(gauss_blur(img^2)) //cv::GaussianBlur(num.mul(num), blur, Size(0,0), 20, 20); //cv::pow(blur, 0.5, den); // output = numerator / denominator //norm = num / den; // normalize output into [0,1] //cv::normalize(norm, norm, 0.0, 1.0, NORM_MINMAX, -1); *theFacePtr = face; // First of all draw a green rectangle around the detected face: rectangle(original, face_i, CV_RGB(0, 255,0), 1); imshow("Face", face); } // Show the result: imshow("NewFaceCapture", original); // And display it: char key = (char) waitKey(32); // Exit this loop on if(key == 32) break; } }
//============================================================================== void face_detector:: train(ft_data &data, const string fname, const Mat &ref, const bool mirror, const bool visi, const float frac, const float scaleFactor, const int minNeighbours, const Size minSize) { detector.load(fname.c_str()); detector_fname = fname; reference = ref.clone(); vector<float> xoffset(0),yoffset(0),zoffset(0); for(int i = 0; i < data.n_images(); i++){ Mat im = data.get_image(i,0); if(im.empty())continue; vector<Point2f> p = data.get_points(i,false); int n = p.size(); Mat pt = Mat(p).reshape(1,2*n); vector<Rect> faces; Mat eqIm; equalizeHist(im,eqIm); detector.detectMultiScale(eqIm,faces,scaleFactor,minNeighbours,0 |CV_HAAR_FIND_BIGGEST_OBJECT |CV_HAAR_SCALE_IMAGE,minSize); if(faces.size() >= 1){ if(visi){ Mat I; cvtColor(im,I,CV_GRAY2RGB); for(int i = 0; i < n; i++)circle(I,p[i],1,CV_RGB(0,255,0),2,CV_AA); rectangle(I,faces[0].tl(),faces[0].br(),CV_RGB(255,0,0),3); imshow("face detector training",I); waitKey(10); } //check if enough points are in detected rectangle if(this->enough_bounded_points(pt,faces[0],frac)){ Point2f center = this->center_of_mass(pt); float w = faces[0].width; xoffset.push_back((center.x - (faces[0].x+0.5*faces[0].width ))/w); yoffset.push_back((center.y - (faces[0].y+0.5*faces[0].height))/w); zoffset.push_back(this->calc_scale(pt)/w); } } if(mirror){ im = data.get_image(i,1); if(im.empty())continue; p = data.get_points(i,true); pt = Mat(p).reshape(1,2*n); equalizeHist(im,eqIm); detector.detectMultiScale(eqIm,faces,scaleFactor,minNeighbours,0 |CV_HAAR_FIND_BIGGEST_OBJECT |CV_HAAR_SCALE_IMAGE,minSize); if(faces.size() >= 1){ if(visi){ Mat I; cvtColor(im,I,CV_GRAY2RGB); for(int i = 0; i < n; i++)circle(I,p[i],1,CV_RGB(0,255,0),2,CV_AA); rectangle(I,faces[0].tl(),faces[0].br(),CV_RGB(255,0,0),3); imshow("face detector training",I); waitKey(10); } //check if enough points are in detected rectangle if(this->enough_bounded_points(pt,faces[0],frac)){ Point2f center = this->center_of_mass(pt); float w = faces[0].width; xoffset.push_back((center.x - (faces[0].x+0.5*faces[0].width ))/w); yoffset.push_back((center.y - (faces[0].y+0.5*faces[0].height))/w); zoffset.push_back(this->calc_scale(pt)/w); } } } } //choose median value Mat X = Mat(xoffset),Xsort,Y = Mat(yoffset),Ysort,Z = Mat(zoffset),Zsort; cv::sort(X,Xsort,CV_SORT_EVERY_COLUMN|CV_SORT_ASCENDING); int nx = Xsort.rows; cv::sort(Y,Ysort,CV_SORT_EVERY_COLUMN|CV_SORT_ASCENDING); int ny = Ysort.rows; cv::sort(Z,Zsort,CV_SORT_EVERY_COLUMN|CV_SORT_ASCENDING); int nz = Zsort.rows; detector_offset = Vec3f(Xsort.fl(nx/2),Ysort.fl(ny/2),Zsort.fl(nz/2)); return; }
Mat skinDetector::cannySegmentation(Mat img0, int minPixelSize) { // Segments items in gray image (img0) // minPixelSize= // -1, returns largest region only // pixels, threshold for removing smaller regions, with less than minPixelSize pixels // 0, returns all detected segments // LB: Zero pad image to remove edge effects when getting regions.... int padPixels=20; // Rect border added at start... Rect tempRect; tempRect.x=padPixels; tempRect.y=padPixels; tempRect.width=img0.cols; tempRect.height=img0.rows; Mat img1 = Mat::zeros(img0.rows+(padPixels*2), img0.cols+(padPixels*2), CV_8UC1); img0.copyTo(img1(tempRect)); // apply your filter Canny(img1, img1, 100, 200, 3); //100, 200, 3); // find the contours vector< vector<Point> > contours; findContours(img1, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE); // Mask for segmented regiond Mat mask = Mat::zeros(img1.rows, img1.cols, CV_8UC1); vector<double> areas(contours.size()); if (minPixelSize==-1) { // Case of taking largest region for(int i = 0; i < contours.size(); i++) areas[i] = contourArea(Mat(contours[i])); double max; Point maxPosition; minMaxLoc(Mat(areas),0,&max,0,&maxPosition); drawContours(mask, contours, maxPosition.y, Scalar(1), CV_FILLED); } else { // Case for using minimum pixel size for (int i = 0; i < contours.size(); i++) { if (contourArea(Mat(contours[i]))>minPixelSize) drawContours(mask, contours, i, Scalar(1), CV_FILLED); } } // normalize so imwrite(...)/imshow(...) shows the mask correctly! normalize(mask.clone(), mask, 0.0, 255.0, CV_MINMAX, CV_8UC1); Mat returnMask; returnMask=mask(tempRect); // show the images if (verboseOutput) imshow("Canny Skin: Img in", img0); if (verboseOutput) imshow("Canny Skin: Mask", returnMask); if (verboseOutput) imshow("Canny Skin: Output", img1); return returnMask; }
void ProcAbsPos::handleStart(ProjAcq& pAcq, AbsPos2D<float> const& pos) const { Acq* acq = pAcq.getAcq(); Mat rgb = acq->getMat(BGR); #ifdef WRITE_IMAGES imwrite("rgb.png", rgb); #endif #ifdef COMP_HSV Mat im_hsv = acq->getMat(HSV); #ifdef HSV_TO_HGRAY for (Mat_<Vec3b>::iterator it = im_hsv.begin<Vec3b>(); it != im_hsv.end<Vec3b>(); it++) { (*it)[1] = (*it)[0]; (*it)[2] = (*it)[0]; } #elif defined(HSV_TO_VGRAY) for (Mat_<Vec3b>::iterator it = im_hsv.begin<Vec3b>(); it != im_hsv.end<Vec3b>(); it++) { (*it)[0] = (*it)[2]; (*it)[1] = (*it)[2]; } #endif #ifdef WRITE_IMAGES imwrite("hsv.png", im_hsv); #endif /* WRITE_IMAGES */ #ifdef SHOW_IMAGES imshow("hsv", im_hsv); #endif /* SHOW_IMAGES */ #endif /* COMP_HSV */ #ifdef COMP_SIMULATED // simulate acquisition Mat _im3 = getSimulatedAt(pAcq, pos); string _bn("_im3-sa"); #ifdef WRITE_IMAGES imwrite(_bn + ".png", _im3); #endif /* WRITE_IMAGES */ #ifdef SHOW_IMAGES imshow(bn, _im3); #endif /* SHOW_IMAGES */ #ifdef COMP_HSV // show simulated acquisition in hsv Mat im3_hsv = im3.clone(); cvtColor(im3, im3_hsv, COLOR_BGR2HSV); #ifdef HSV_TO_HGRAY for (Mat_<Vec3b>::iterator it = im3_hsv.begin<Vec3b>(); it != im3_hsv.end<Vec3b>(); it++) { (*it)[1] = (*it)[0]; (*it)[2] = (*it)[0]; } #elif defined(HSV_TO_VGRAY) for(Mat_<Vec3b>::iterator it = im3_hsv.begin<Vec3b>(); it != im3_hsv.end<Vec3b>(); it++) { (*it)[0] = (*it)[2]; (*it)[1] = (*it)[2]; } #endif #ifdef WRITE_IMAGES imwrite(bn + "_hsv.png", im3_hsv); #endif /* WRITE_IMAGES */ #ifdef SHOW_IMAGES imshow(bn + "_hsv", im3_hsv); #endif /* SHOW_IMAGES */ Mat diff_hsv = im_hsv - im3_hsv; #ifdef WRITE_IMAGES imwrite(bn + "_diff_hsv.png", diff_hsv); #endif /* WRITE_IMAGES */ #ifdef SHOW_IMAGES imshow(bn + "_diff_hsv", diff_hsv); #endif /* SHOW_IMAGES */ #endif /* COMP_HSV */ #ifdef COMP_TESTPOINTS Mat _im3_tp = getTestPointsAt(pAcq, pos.getTransform().getReverse()); _bn = "_im3_tp-sa"; #ifdef WRITE_IMAGES imwrite(_bn + ".png", _im3_tp); #endif /* WRITE_IMAGES */ #ifdef SHOW_IMAGES imshow(bn, im3_tp); #endif /* SHOW_IMAGES */ #endif /* COMP_TESTPOINTS */ #endif /* COMP_SIMULATED */ #ifdef COMP_TESTPOINTS Mat _rgb_tp = rgb.clone(); addTestPointsAtTo(_rgb_tp, pAcq, pos.getTransform().getReverse()); _bn = "_im_tp-sa"; #ifdef WRITE_IMAGES imwrite(_bn + ".png", _rgb_tp); #endif /* WRITE_IMAGES */ #ifdef SHOW_IMAGES imshow(bn, rgb_tp); #endif /* SHOW_IMAGES */ #endif /* COMP_TESTPOINTS */ }
void detectFrontFaces(Mat front_body_roi, vector<Rect>boundRectFront, size_t f, Mat image1, Mat frame1_gray, Ptr<FaceRecognizer> modelFront) { vector<Rect> frontalFaces; //Detected object(s) float searchScaleFactor = 1.1; //How many sizes to search int minNeighbors = 4; //Reliability vs many faces int flags = 0 | CASCADE_SCALE_IMAGE; //Search for many faces Size minFeatureSize(30, 30); //Smallest face size frontFaceDetector.detectMultiScale(front_body_roi, frontalFaces, searchScaleFactor, minNeighbors, flags, minFeatureSize); if(frontalFaces.size() != 0){ //If faces are detected isFrontFaceDetected = true; for(size_t i = 0; i < 1; i++) { int faces_y1 = frontalFaces[i].y + boundRectFront[f].y; int faces_y2 = frontalFaces[i].y + frontalFaces[i].height + boundRectFront[f].y; Point f1(frontalFaces[i].x + boundRectFront[f].x, faces_y1); Point f2(frontalFaces[i].x + frontalFaces[i].width + boundRectFront[f].x, faces_y2); rectangle(image1, f1, f2, Scalar(0,0,255), 2, 8, 0); //~ cout << "asdf: " << frontalFaces[i].x + frontalFaces[i].width + boundRectFront[f].x << endl; Rect ROI(f1, f2); Mat faceROI = frame1_gray(ROI); Mat face_scaled; if(!faceROI.empty()) { frontFrameCounter++; if(faceROI.cols != 50 && faceROI.rows!= 50){ resize(faceROI.clone(), face_scaled, Size(50,50)); } else{ face_scaled = faceROI.clone(); } equalizeHist(face_scaled, face_scaled); imshow("Detected front face", face_scaled); if(frontFrameCounter == 1){ time_t currentTime; struct tm *localTime; time( ¤tTime ); // Get the current time localTime = localtime( ¤tTime ); // Convert the current time to the local time int Hour = localTime->tm_hour; int Min = localTime->tm_min; int Sec = localTime->tm_sec; stringstream ss; ss << "Time: " << Hour << ":" << Min << ":" << Sec; timeStamp = ss.str(); ss.str(""); } recognizeFrontFaces(face_scaled, modelFront); } } } else{ frontFrameCounter = 0; int array[10] = { a_1, b_1, c_1, d_1, e_1, f_1, g_1, h_1, i_1, j_1 }; for(int i=0;i<10;i++) { if(array[i]> maxLabel1) maxLabel1=array[i]; } } }
// // エッジ抽出実行 // int OillineDetector2::Execute(Mat &src, double *dist, double *gl_theta, int *step, Mat &result_img, bool debug){ // 初期化 *dist = *gl_theta = 0.0; // 前回と同じ画像であれば、前回の結果を返す if( is_same_image( src, _last_img ) ){ *dist = _last_dist; *gl_theta = _last_gl_theta; result_img = _last_result_img.clone(); return _last_exec_ret; } // source image int w = src.cols, h = src.rows; // Red Channel vector<Mat> bgr; split( src, bgr); Mat red_img = bgr[2]; // 前処理 -----> // ガンマ補正 Mat gamma_img = auto_gamma_correction(red_img, _gamma_base, _gamma_scale); // ガウシアンフィルタ Mat gauss; cv::GaussianBlur(gamma_img, gauss, Size( _gaussian_window, _gaussian_window), _gaussian_sigma, _gaussian_sigma); // <----前処理終了 Mat gray = gauss; // // 各縦ラインのピーク取得 // vector< vector<int> > peaks; for( int x=_peak_margin; x<gray.cols-_peak_margin; x+=_peak_interval ){ vector<int> pks = find_vertical_peaks( gray, x, _peak_isolate_interval, _peak_count, _peak_min ); peaks.push_back(pks); } // // ピークをチェーン化する // vector< vector<Point> > chains; chains = make_chains( peaks, _chain_max_interval, _peak_margin, _peak_interval ); // // ぐちゃぐちゃしたチェーンを削除 // remove_noisy_chains( chains, _chain_noisy_th ); // // のっぺりとしたピークを削除(線っぽいピークだけに絞る) // remove_flat_peaks( chains, gray, _flat_peak_th ); // // 短いチェーンを削除 // for( int i=0; i<chains.size(); i++ ){ if(chains[i].size() < _chain_min_size ){ chains.erase( chains.begin() + i ); i--; } } // // 平均輝度の高いものに絞る // focus_top_chains( chains, gray, _max_slit_cnt ); // // 抜けた箇所を線形補間する // vector< vector<double> > intplChains; intplChains = interpolate_chains( chains ); // ここまでのchain 画像作成 Mat chain_img = Mat::zeros( h, w, CV_8UC3); for( int i=0; i<chains.size(); i++ ) polylines( chain_img, chains[i], false, Scalar(215,176,255) ); // 平滑化で細かい振動を除去 vector< vector<double> > smoothChains; for( int i=0; i<intplChains.size(); i++ ){ vector<double> chain = smoothing( intplChains[i], _smooth_win_size ); smoothChains.push_back(chain); } // 両サイドの段差を調べる int l_step = is_sidestep( chains, smoothChains, true, chain_img); int r_step = is_sidestep( chains, smoothChains, false, chain_img); *step = l_step + 2*r_step; // // 曲率を計算 // vector< vector<double> > curves; for( int i=0; i<intplChains.size(); i++ ){ vector<double> curve = calc_curvature( smoothChains[i], _curve_interval ); curves.push_back(curve); } // // 最大&最小曲率を求める // vector<int> curve_max_indexes; vector<int> curve_min_indexes; for( int i=0; i<curves.size(); i++ ){ int max = 0, min=0; for( int j=0; j<curves[i].size(); j++ ){ if(curves[i][j] > curves[i][max]) max = j; if(curves[i][j] < curves[i][min]) min = j; } curve_max_indexes.push_back(max); curve_min_indexes.push_back(min); } // // 最大曲率位置(=上向きエッジ点)を求める // vector<Point2d> up_edge_points; for( int i=0; i<curve_max_indexes.size(); i++ ) { double curvature = curves[i][curve_max_indexes[i]]; if( curvature > _th_curve_max ){ double x = (double)(chains[i][0].x + curve_max_indexes[i]); double y = intplChains[i][curve_max_indexes[i]]; Point2d p(x,y); up_edge_points.push_back(p); } } // // 最小曲率位置(=下向きエッジ点)を求める // vector<Point2d> down_edge_points; for( int i=0; i<curve_min_indexes.size(); i++ ) { double curvature = curves[i][curve_min_indexes[i]]; if( fabs(curvature) > _th_curve_max ){ double x = (double)(chains[i][0].x + curve_min_indexes[i]); double y = intplChains[i][curve_min_indexes[i]]; Point2d p(x,y); down_edge_points.push_back(p); } } // // 俯瞰画像の座標に変換 // vector<Point2d> up_bird_points = to_bird_coordinate(up_edge_points); vector<Point2d> down_bird_points = to_bird_coordinate(down_edge_points); // // 俯瞰画像上の直線(rhoとtheta)を求める // double rho, theta; bool success_line = get_rho_theta( down_bird_points, &rho, &theta); // // 実空間上のrhoとthetaを求める // double gl_rho; if( success_line ) real_world_value(rho, theta, &gl_rho, gl_theta, dist); // // 前回の検出との差が許容内か調べる // bool accept_dif = is_acceptable_difference( _last_line, Vec2d(gl_rho, *gl_theta), _th_rho_mm, _th_theta_deg*CV_PI/180.0 ); // // 俯瞰画像に結果描画 // // 原画像 -> 校正画像 -> 俯瞰画像 作成 Mat calib_img; cameara_calibrate(src, calib_img); Mat bird_img; make_birdimg(calib_img, bird_img); // 描画色選定 Scalar result_color; if( !success_line ) result_color = Scalar(0,0,255); //Red else if( !accept_dif ) result_color = Scalar(255,64,64); // Blue else result_color = Scalar(0,255,0); // エッジ線描画 vector<Vec2d> ln; ln.push_back(Vec2d(rho, theta)); if( success_line ) bird_img = draw_lines(bird_img, ln, result_color); // 検出エッジ点描画 for( int i=0; i<down_bird_points.size(); i++ ){ int x = cvRound( down_bird_points[i].x ); int y = cvRound( down_bird_points[i].y ); circle( bird_img, Point(x,y), 5, result_color ); } // 検出エッジ点描画(補助側) for( int i=0; i<up_bird_points.size(); i++ ){ int x = cvRound( up_bird_points[i].x ); int y = cvRound( up_bird_points[i].y ); circle( bird_img, Point(x,y), 3, result_color ); } // // 結果画像作成 // Mat empty; stackImages(empty); if( debug ){ stackImages(src); stackImages(red_img); stackImages(chain_img); } result_img = stackImages(bird_img); // // 実空間上で直線と点との平均距離(溶接幅)を算出する(チェック用) // if( success_line && up_bird_points.size() > 0 ) _last_weld_width = average_distance_points_and_line( rho, theta, up_bird_points ); else _last_weld_width = -1.0; // // 終了 // // 最後の処理結果を取っておく _last_img = src.clone(); _last_dist = *dist; _last_gl_theta = *gl_theta; _last_line = Vec2d(gl_rho, *gl_theta); _last_result_img = result_img.clone(); // 失敗終了 if( !success_line || !accept_dif) return (_last_exec_ret = 0); // 成功終了 return (_last_exec_ret = 1); }
int main(int argc, char** argv) { //VL_PRINT ("Hello world!") ; try { TCLAP::CmdLine cmd("EmbAttSpotter tester", ' ', "0.1"); TCLAP::SwitchArg evalF("e","eval","Evaluate against dataset", cmd, false); TCLAP::SwitchArg evalSubF("s","evalSubwordSpotting","Evaluate subword spotting results", cmd, false); TCLAP::SwitchArg evalSubCombF("a","evalSubwordSpottingCombine","Evaluate subword spotting results when combining exemplars", cmd, false); TCLAP::ValueArg<int> primeSubwordArg("p","primeSubword","save the cca att for the given string length", false, -1,"int"); cmd.add( primeSubwordArg ); TCLAP::ValueArg<string> modelArg("l","location","load/save prefix", false,"model/evalGW","string"); cmd.add( modelArg ); TCLAP::SwitchArg testF("t","test","Run unit tests", cmd, false); TCLAP::ValueArg<int> imageArg("i","image","show visual result", false,-1,"int"); cmd.add( imageArg ); TCLAP::ValueArg<int> image2Arg("c","image2","compare images", false,-1,"int"); cmd.add( image2Arg ); TCLAP::ValueArg<string> compare1Arg("1","compare1","1st image for comparison", false,"","string"); cmd.add( compare1Arg ); TCLAP::ValueArg<string> compare2Arg("2","compare2","2nd image for comparison", false,"","string"); cmd.add( compare2Arg ); TCLAP::SwitchArg retrainAttReprTrF("6","attReprTr","Retrain attReprTr", cmd, false); TCLAP::SwitchArg retrainEmbeddingF("5","embedding","Retrain embedding", cmd, false); TCLAP::ValueArg<string> trainFileArg("9","trainfile","training file: *.gtp for 'docimagefile lx ty rx by gt', *.txt for 'imagefile gt'", false,"test/queries_train.gtp","string"); cmd.add( trainFileArg ); TCLAP::ValueArg<string> testFileArg("8","testfile","testing file: *.gtp for 'docimagefile lx ty rx by gt', *.txt for 'imagefile gt'", false,"test/queries_test.gtp","string"); cmd.add( testFileArg ); TCLAP::ValueArg<string> exemplarFileArg("7","exemplars","exemplar file: *.gtp for 'docimagefile lx ty rx by gt', *.txt for 'imagefile gt'", false,"test/exemplars.txt","string"); cmd.add( exemplarFileArg ); //TCLAP::ValueArg<string> exemplarLocArg("7","exemplar_locations","exemplar locations file", false,"test/exemplars.txt","string"); //cmd.add( exemplarLocArg ); TCLAP::ValueArg<string> imageDirArg("d","images","directory containing images", false,"/home/brian/intel_index/brian_handwriting/EmbeddedAtt_Almazan/datasets/GW/images/","string"); cmd.add( imageDirArg ); TCLAP::ValueArg<string> exemplarDirArg("x","exemplarsDir","directory containing exemplar images", false,"/home/brian/intel_index/data/gw_20p_wannot/bigrams_clean_deslant/","string"); cmd.add( exemplarDirArg ); TCLAP::ValueArg<string> fullFileArg("f","full","do a full sliding window subword spotting on this image", false,"","string"); cmd.add( fullFileArg ); TCLAP::ValueArg<float> hyarg("y","hybridalpha","hybrid alpha, 0 text only, 1 image only", false,-1,"float"); cmd.add( hyarg ); TCLAP::ValueArg<string> charSegCSVArg("3","charSeg","char seg csv file: 'word,pageId,x1,y1,x2,y2,char1start,char1end,char2start,...'", false,"","string"); cmd.add( charSegCSVArg ); cmd.parse( argc, argv ); if ( testF.getValue() ) { EmbAttSpotter spotter("testing",false,true,1); spotter.test(); } if ( evalF.getValue() ) { EmbAttSpotter spotter(modelArg.getValue()); GWDataset train(trainFileArg.getValue(),imageDirArg.getValue()); GWDataset test(testFileArg.getValue(),imageDirArg.getValue()); spotter.setTraining_dataset(&train); if ( retrainAttReprTrF.getValue() ) spotter.attReprTr(true); if ( retrainEmbeddingF.getValue() ) spotter.embedding(true); spotter.eval(&test); } if (primeSubwordArg.getValue()>=0) { EmbAttSpotter spotter(modelArg.getValue()); GWDataset train(trainFileArg.getValue(),imageDirArg.getValue()); GWDataset test(testFileArg.getValue(),imageDirArg.getValue()); spotter.setTraining_dataset(&train); spotter.setCorpus_dataset(&test); spotter.primeSubwordSpotting(primeSubwordArg.getValue()); } if ( evalSubF.getValue() || evalSubCombF.getValue()) { EmbAttSpotter spotter(modelArg.getValue()); GWDataset train(trainFileArg.getValue(),imageDirArg.getValue()); GWDataset test(testFileArg.getValue(),imageDirArg.getValue()); GWDataset exemplars(exemplarFileArg.getValue(),exemplarDirArg.getValue()); spotter.setTraining_dataset(&train); if ( retrainAttReprTrF.getValue() ) spotter.attReprTr(true); if ( retrainEmbeddingF.getValue() ) spotter.embedding(true); if (charSegCSVArg.getValue().length()>0) { vector< vector<int> > corpusXLetterStartBoundsRel; vector< vector<int> > corpusXLetterEndBoundsRel; ifstream in (charSegCSVArg.getValue()); string line; //getline(in,line);//header while (getline(in,line)) { string s; std::stringstream ss(line); getline(ss,s,','); getline(ss,s,','); getline(ss,s,',');//x1 int x1=stoi(s); getline(ss,s,','); getline(ss,s,',');//x2 getline(ss,s,','); vector<int> lettersStartRel, lettersEndRel; while (getline(ss,s,',')) { lettersStartRel.push_back(stoi(s)-x1); getline(ss,s,','); lettersEndRel.push_back(stoi(s)-x1); //getline(ss,s,',');//conf } corpusXLetterStartBoundsRel.push_back(lettersStartRel); corpusXLetterEndBoundsRel.push_back(lettersEndRel); } in.close(); spotter.evalSubwordSpottingWithCharBounds(&exemplars, &test, &corpusXLetterStartBoundsRel, &corpusXLetterEndBoundsRel, hyarg.getValue()); } else if ( evalSubF.getValue() ) spotter.evalSubwordSpotting(&exemplars, &test, hyarg.getValue()); else spotter.evalSubwordSpottingCombine(&exemplars, &test, hyarg.getValue()); } if ( imageArg.getValue()>=0 ) { EmbAttSpotter spotter(modelArg.getValue()); GWDataset train(trainFileArg.getValue(),imageDirArg.getValue()); GWDataset test(testFileArg.getValue(),imageDirArg.getValue()); spotter.setTraining_dataset(&train); spotter.setCorpus_dataset(&test); int ex=imageArg.getValue(); if ( image2Arg.getValue()>=0 ) { int ex2= image2Arg.getValue(); double score = spotter.compare(test.image(ex),test.image(ex2)); cout <<"score: "<<score<<endl; } else { cout <<"test word "<<test.labels()[ex]<<endl; imshow("test word",test.image(ex)); vector<float> scores = spotter.spot(test.image(ex), "", 1); multimap<float, int> ranked; for (int i=0; i<scores.size(); i++) ranked.emplace(scores[i],i); auto iter = ranked.end(); for (int i=1; i<=5; i++) { iter--; cout<<"I rank "<<i<<" is "<<iter->second<<" with score "<<iter->first<<endl; imshow("I "+to_string(i),test.image(iter->second)); } scores = spotter.spot(test.image(ex), test.labels()[ex], 0); ranked.clear(); for (int i=0; i<scores.size(); i++) ranked.emplace(scores[i],i); iter = ranked.end(); for (int i=1; i<=5; i++) { iter--; cout<<"T rank "<<i<<" is "<<iter->second<<" with score "<<iter->first<<endl; imshow("T "+to_string(i),test.image(iter->second)); } waitKey(); } } if (compare1Arg.getValue().length()>0 && compare2Arg.getValue().length()>0) { EmbAttSpotter spotter(modelArg.getValue()); Mat im1 = imread(compare1Arg.getValue(),CV_LOAD_IMAGE_GRAYSCALE); Mat im2 = imread(compare2Arg.getValue(),CV_LOAD_IMAGE_GRAYSCALE); //im1 = GWDataset::preprocess(im1); //im2 = GWDataset::preprocess(im2); cout<<spotter.compare(im1,im2)<<endl;; } if (fullFileArg.getValue().length()>0) { assert(compare1Arg.getValue().length()>0); EmbAttSpotter spotter(modelArg.getValue()); StrideDataset im(fullFileArg.getValue()); spotter.setCorpus_dataset_fullSub(&im); Mat ex = imread(compare1Arg.getValue(),CV_LOAD_IMAGE_GRAYSCALE); clock_t start = clock(); time_t start2 = time(0); vector< SubwordSpottingResult > res = spotter.subwordSpot_full(ex,"",1); clock_t end = clock(); time_t end2 = time(0); double time = (double) (end-start) / CLOCKS_PER_SEC; double time2 = difftime(end2, start2) * 1000.0; cout<<"Took "<<time<<" secs."<<endl; cout<<"Took "<<time2<<" secs."<<endl; Mat img = imread(fullFileArg.getValue()); Mat orig = img.clone(); int top=100; float maxS = res[0].score; float minS = res[top].score; for (int i=0; i<top; i++) { float s = 1-((res[i].score-minS)/(maxS-minS)); //cout <<res[i].score<<": "<<s<<endl; for (int x=res[i].startX; x<=res[i].endX; x++) for (int y=res[i].imIdx*3; y<res[i].imIdx*3 +65; y++) { Vec3b& p = img.at<Vec3b>(y,x); Vec3b o = orig.at<Vec3b>(y,x); p[0] = min(p[0],(unsigned char)(o[0]*s)); } } imwrite("spotting.png",img); imshow("spotting",img); waitKey(); } } catch (TCLAP::ArgException &e) // catch any exceptions { std::cerr << "error: " << e.error() << " for arg " << e.argId() << std::endl; } }
Mat BilateralFilter(const Mat& ImRef, const Mat& ImMsk, const Mat& ImSrc, const int wndSZ, double sig_sp, const double sig_clr) { // filter signal must be 1 channel CV_Assert(ImSrc.type() == CV_32FC1 ); // range parameter is half window size sig_sp = wndSZ / 2.0f; int H = ImRef.rows; int W = ImRef.cols; int H_WD = wndSZ / 2; Mat ImDst = ImSrc.clone(); if( 1 == ImRef.channels() ) { for( int y = 0; y < H; y ++ ) { uchar * pmsk = (uchar *)(ImMsk.ptr<uchar>( y )); //imMsk float * psrc = (float *)(ImSrc.ptr<float>( y )); //imSrc; float * pdst = (float *)(ImDst.ptr<float>( y )); //imDst double * pref = (double *)(ImRef.ptr<double>)( y ); //imRef for( int x = 0; x < W; x ++ ) { if(pmsk[x] == 0) continue; double sum = 0.0f; double sumWgt = 0.0f; for( int wy = - H_WD; wy <= H_WD; wy ++ ) { int qy = y + wy; if( qy < 0 ) { qy += H; } if( qy >= H ) { qy -= H; } uchar * qmsk = (uchar *)(ImMsk.ptr<uchar>( qy )); float * qsrc = (float *)(ImSrc.ptr<float>( qy )); //imSrc double * qref = (double *)(ImRef.ptr<double>( qy )); //imRef for( int wx = - H_WD; wx <= H_WD; wx ++ ) { int qx = x + wx; if( qx < 0 ) { qx += W; } if( qx >= W ) { qx -= W; } if( qmsk[x] == 0) continue; double spDis = wx * wx + wy * wy; double clrDis = fabs( qref[ qx ] - pref[ x ] ); double wgt = exp( - spDis / ( sig_sp * sig_sp ) - clrDis * clrDis / ( sig_clr * sig_clr ) ); sum += wgt * qsrc[ qx ]; sumWgt += wgt; } } pdst[ x ] = sum / sumWgt; } } } else if( 3 == ImRef.channels() ) { for( int y = 0; y < H; y ++ ) { uchar * pmsk = (uchar *)(ImMsk.ptr<uchar>(y)); float * psrc = (float *)(ImSrc.ptr<float>(y)); float * pdst = (float *)(ImDst.ptr<float>(y)); double * pref = (double *)(ImRef.ptr<double>(y)); for( int x = 0; x < W; x ++ ) { if(pmsk[x] == 0) continue; double * pClr = pref + 3 * x; double sum = 0.0f; double sumWgt = 0.0f; for( int wy = - H_WD; wy <= H_WD; wy ++ ) { int qy = y + wy; if( qy < 0 ) { qy += H; } if( qy >= H ) { qy -= H; } uchar * qmsk = (uchar *)(ImMsk.ptr<uchar>(qy)); float * qsrc = (float *)(ImSrc.ptr<float>(qy)); double * qref = (double *)(ImRef.ptr<double>(qy)); for( int wx = - H_WD; wx <= H_WD; wx ++ ) { int qx = x + wx; if( qx < 0 ) { qx += W; } if( qx >= W ) { qx -= W; } if(qmsk[qx] == 0) continue; double * qClr = qref + 3 * qx; double spDis = wx * wx + wy * wy; double clrDis = 0.0f; for( int c = 0; c < 3; c++ ) { clrDis += fabs( pClr[ c ] - qClr[ c ] ); } clrDis *= 0.333333333; double wgt = exp( - spDis / ( sig_sp * sig_sp ) - clrDis * clrDis / ( sig_clr * sig_clr ) ); sum += wgt * qsrc[ qx ]; sumWgt += wgt; } } pdst[ x ] = sum / sumWgt; } } } return ImDst; }
tResult cParkingDirection::ProcessInput(IMediaSample* pSample) { // VideoInput RETURN_IF_POINTER_NULL(pSample); cObjectPtr<IMediaSample> pNewRGBSample; const tVoid* l_pSrcBuffer; if (IS_OK(pSample->Lock(&l_pSrcBuffer))) { Mat image; image = Mat(m_sInputFormat.nHeight,m_sInputFormat.nWidth,CV_8UC1,(tVoid*)l_pSrcBuffer,m_sInputFormat.nBytesPerLine); Mat transformedImage = image(Rect(0,480,200,PROJECTED_IMAGE_HEIGTH)).clone(); Mat sobeledImage = image(Rect(0,480+250,200,PROJECTED_IMAGE_HEIGTH)).clone(); Mat groundPlane = image(Rect(0,480+2*250,200,PROJECTED_IMAGE_HEIGTH)).clone(); pSample->Unlock(l_pSrcBuffer); //create an output image for debugging Mat generalOutputImage(300,800,CV_8UC3,Scalar(0,0,0)); Mat transformedCol; cvtColor(transformedImage,transformedCol,CV_GRAY2BGR); /* tUInt32 stamp = 0; SendSignalValueMessage(&m_oDistanceOutputPin,cModel.getDistToCrossing(),stamp);*/ pModel.cameraUpdate(transformedImage,sobeledImage,groundPlane); tFloat32 parallel = pModel.certaintyParallel / (pModel.certaintyParallel+pModel.certaintyCross); tFloat32 cross = pModel.certaintyCross / (pModel.certaintyParallel+pModel.certaintyCross); //LOG_INFO(cString::Format("ParallelC: %f, CrossC: %f",parallel,cross)); if(parallel > cross) SendSignalValueMessage(&m_oDirectionOutputPin, 2, 0); else if(cross > parallel) SendSignalValueMessage(&m_oDirectionOutputPin, 1, 0); else SendSignalValueMessage(&m_oDirectionOutputPin, 0, 0); #ifdef PAINT_OUTPUT Point2d carCenter(100,0); Mat ipmColor = transformedCol.clone(); pModel.paintDebugInfo(ipmColor); circle(ipmColor,Point(3,ipmColor.rows-1-80),2,Scalar(0,255,255),-1); circle(ipmColor,Point(3,ipmColor.rows-1),2,Scalar(0,255,255),-1); circle(ipmColor,Point(ipmColor.cols-3,ipmColor.rows-1),2,Scalar(0,255,255),-1); circle(ipmColor,Point(ipmColor.cols-3,ipmColor.rows-1-80),2,Scalar(0,255,255),-1); circle(ipmColor,Point(126,50),2,Scalar(0,255,255),-1); circle(ipmColor,Point(74,50),2,Scalar(0,255,255),-1); ipmColor.copyTo(generalOutputImage(Rect(0,0,ipmColor.cols,ipmColor.rows))); #endif #ifdef PAINT_OUTPUT cObjectPtr<IMediaSample> pNewRGBSample; if (IS_OK(AllocMediaSample(&pNewRGBSample))) { tTimeStamp tmStreamTime = _clock ? _clock->GetStreamTime() : adtf_util::cHighResTimer::GetTime(); pNewRGBSample->Update(tmStreamTime, generalOutputImage.data, tInt32(3*800*300), 0); RETURN_IF_FAILED(m_oVideoOutputPin.Transmit(pNewRGBSample)); } #endif } RETURN_NOERROR; }
int main( int argc, const char* argv[] ) { cv::Mat sourceImage = cv::imread(fileName4); namedWindow("Original Image", WINDOW_AUTOSIZE); // Create a window for display. imshow("Original Image", sourceImage); // Show our image inside it. cvtColor(sourceImage, sourceImage, CV_BGR2GRAY); Mat img = sourceImage.clone(); Mat tmp, dst; IplImage ipl = sourceImage; cv::Mat m = cv::cvarrToMat(&ipl); // default additional arguments: don't copy data. cvSmooth(&ipl, &ipl, CV_GAUSSIAN, 9, 9, 0); cv::threshold(m, dst, 50, 255, cv::THRESH_BINARY | cv::THRESH_OTSU); namedWindow("Binarized2", CV_WINDOW_AUTOSIZE); //create a window with the name "Binarized" imshow("Binarized2", dst); //cv::threshold(img, img, 50, 255, cv::THRESH_BINARY | cv::THRESH_OTSU); CvMat cvmat = m; cvAdaptiveThreshold(&cvmat, &cvmat, 255, CV_ADAPTIVE_THRESH_GAUSSIAN_C, CV_THRESH_BINARY, 13, 1); localThreshold::binarisation(img, 41, 56); Mat binImg = img.clone(); ideka::binOptimisation(img); namedWindow( "Binarised Image", WINDOW_AUTOSIZE ); // Create a window for display. imshow( "Binarised Image", binImg ); // Show our image inside it. namedWindow( "Optimised Image", WINDOW_AUTOSIZE ); // Create a window for display. imshow( "Optimised Image", img ); // Show our image inside it. // Create markers image /*cv::Mat markers(m.size(), CV_8U, cv::Scalar(-1)); //Rect(topleftcornerX, topleftcornerY, width, height); //top rectangle markers(Rect(0, 0, m.cols, 10)) = Scalar::all(1); //bottom rectangle markers(Rect(0, m.rows - 10, m.cols, 10)) = Scalar::all(1); //left rectangle markers(Rect(0, 0, 10, m.rows)) = Scalar::all(1); //right rectangle markers(Rect(m.cols - 10, 0, 10, m.rows)) = Scalar::all(1); //centre rectangle int centreW = m.cols / 3; int centreH = m.rows / 3; markers(Rect((m.cols / 2) - (centreW / 2), (m.rows / 2) - (centreH / 2), centreW, centreH)) = Scalar::all(3); markers.convertTo(markers, CV_BGR2GRAY); imshow("markers", markers); //Create watershed segmentation object Mat dest1; WatershedSegmenter segmenter; segmenter.setMarkers(markers); cv::Mat wshedMask = segmenter.process(sourceImage); cv::Mat mask; convertScaleAbs(wshedMask, mask, 1, 0); double thresh = threshold(mask, mask, 1, 255, THRESH_BINARY); bitwise_and(m, m, dest1, mask); dest1.convertTo(dest1, CV_8U); imshow("final_result", dest1);*/ //skeletionizing cv::bitwise_not(img, img); //Inverse for bit-operations GuoHall::thinning(img); cv::bitwise_not(img, img); namedWindow( "Skeletenised Image", WINDOW_AUTOSIZE ); // Create a window for display. imshow( "Skeletenised Image", img ); // Show our image inside it. //Minutiae-Extraction vector<Minutiae> minutiae; crossingNumber::getMinutiae(img, minutiae, 30); cout<<"Number of Minutiae Results : " << minutiae.size() << endl; //Visualisation Mat minutImg = img.clone(); cvtColor(img, minutImg, CV_GRAY2RGB); for(std::vector<Minutiae>::size_type i = 0; i<minutiae.size(); i++){ //add an transparent square at each minutiae-location int squareSize = 5; //has to be uneven Mat roi = minutImg(Rect(minutiae[i].getLocX()-squareSize/2, minutiae[i].getLocY()-squareSize/2, squareSize, squareSize)); double alpha = 0.3; if(minutiae[i].getType() == Minutiae::Type::RIDGEENDING){ Mat color(roi.size(), CV_8UC3, cv::Scalar(255,0,0)); //blue square for ridgeending addWeighted(color, alpha, roi, 1.0 - alpha , 0.0, roi); }else if(minutiae[i].getType() == Minutiae::Type::BIFURCATION){ Mat color(roi.size(), CV_8UC3, cv::Scalar(0,0,255)); //red square for bifurcation addWeighted(color, alpha, roi, 1.0 - alpha , 0.0, roi); } } namedWindow( "Minutie", WINDOW_AUTOSIZE ); // Create a window for display. imshow( "Minutie", minutImg ); // Show our image inside it. //Minutiae-filtering Filter::filterMinutiae(minutiae); cout<<"Number minutiae after filtering : " << minutiae.size() << endl; Mat minutImg2 = img.clone(); cvtColor(img, minutImg2, CV_GRAY2RGB); for(std::vector<Minutiae>::size_type i = 0; i<minutiae.size(); i++){ //add an transparent square at each minutiae-location int squareSize = 5; //has to be uneven Mat roi = minutImg2(Rect(minutiae[i].getLocX()-squareSize/2, minutiae[i].getLocY()-squareSize/2, squareSize, squareSize)); double alpha = 0.3; if(minutiae[i].getType() == Minutiae::Type::RIDGEENDING){ Mat color(roi.size(), CV_8UC3, cv::Scalar(255,0,0)); //blue square for ridgeending addWeighted(color, alpha, roi, 1.0 - alpha , 0.0, roi); }else if(minutiae[i].getType() == Minutiae::Type::BIFURCATION){ Mat color(roi.size(), CV_8UC3, cv::Scalar(0,0,255)); //red square for bifurcation addWeighted(color, alpha, roi, 1.0 - alpha , 0.0, roi); } } namedWindow( "Filtered Minutiae", WINDOW_AUTOSIZE ); // Create a window for display. imshow( "Filtered Minutiae", minutImg2 ); // Show our image inside it. time_t t = time(0); //get time now struct tm * now = localtime(&t); string time = std::to_string(now->tm_year) + 1900 + '-' + std::to_string(now->tm_mon + 1) + '-' + std::to_string(now->tm_mday); imwrite("foo2.png", minutImg2); //save minutiae with unique time for matching waitKey(0); // Wait for a keystroke in the window return 0; }
bool ht_fl_estimate(headtracker_t& ctx, Mat& frame, const Rect roi, Mat& rvec_, Mat& tvec_) { int bbox[4]; bbox[0] = roi.x; bbox[1] = roi.y; #if 0 bbox[2] = max(roi.width, roi.height) + roi.x; bbox[3] = max(roi.width, roi.height) + roi.y; if (bbox[0] + bbox[2] > frame.cols) bbox[2] = bbox[3] = frame.cols - bbox[0]; if (bbox[1] + bbox[1] > frame.rows) bbox[2] = bbox[3] = frame.rows - bbox[1]; #else bbox[2] = roi.width + roi.x; bbox[3] = roi.height + roi.y; #endif IplImage c_image = frame; double landmarks[fl_count * 2]; if (flandmark_detect(&c_image, bbox, ctx.flandmark_model, landmarks)) return false; Point2f left_eye_right = Point2f( landmarks[2 * fl_left_eye_int], landmarks[2 * fl_left_eye_int + 1]); Point2f left_eye_left = Point2f( landmarks[2 * fl_left_eye_ext], landmarks[2 * fl_left_eye_ext + 1]); Point2f right_eye_left = Point2f( landmarks[2 * fl_right_eye_int], landmarks[2 * fl_right_eye_int + 1]); Point2f right_eye_right = Point2f( landmarks[2 * fl_right_eye_ext], landmarks[2 * fl_right_eye_ext + 1]); Point2f nose = Point2f(landmarks[2 * fl_nose], landmarks[2 * fl_nose + 1]); Point2f mouth_left = Point2f( landmarks[2 * fl_mouth_left], landmarks[2 * fl_mouth_left + 1]); Point2f mouth_right = Point2f( landmarks[2 * fl_mouth_right], landmarks[2 * fl_mouth_right + 1]); vector<Point2f> image_points(7); vector<Point3f> object_points(7); object_points[0] = Point3d(-0.03387, -0.03985, 0.14169); object_points[1] = Point3d(0.03387, -0.03985, 0.14169); object_points[2] = Point3d(-0.08307, -0.04124, 0.1327); object_points[3] = Point3d(0.08307, -0.04124, 0.1327); object_points[5] = Point3d(-0.04472, 0.08171, 0.15877); object_points[6] = Point3d(0.04472, 0.08171, 0.15877); object_points[4] = Point3d(0, 0.0335, 0.19386); for (unsigned i = 0; i < object_points.size(); i++) { object_points[i].x *= 100; object_points[i].y *= 100; object_points[i].z *= 100; } image_points[0] = left_eye_right; image_points[1] = right_eye_left; image_points[2] = left_eye_left; image_points[3] = right_eye_right; image_points[5] = mouth_left; image_points[6] = mouth_right; image_points[4] = nose; Mat intrinsics = Mat::eye(3, 3, CV_32FC1); intrinsics.at<float> (0, 0) = ctx.focal_length_w; intrinsics.at<float> (1, 1) = ctx.focal_length_h; intrinsics.at<float> (0, 2) = ctx.grayscale.cols/2; intrinsics.at<float> (1, 2) = ctx.grayscale.rows/2; Mat dist_coeffs = Mat::zeros(5, 1, CV_32FC1); Mat rvec = Mat::zeros(3, 1, CV_64FC1); Mat tvec = Mat::zeros(3, 1, CV_64FC1); for (int i = 0; i < 5; i++) dist_coeffs.at<float>(i) = ctx.config.dist_coeffs[i]; rvec.at<double> (0, 0) = 1.0; tvec.at<double> (0, 0) = 1.0; tvec.at<double> (1, 0) = 1.0; if (ctx.has_pose) { rvec = ctx.rvec.clone(); tvec = ctx.tvec.clone(); } if (ctx.has_pose) { if (!solvePnP(object_points, image_points, intrinsics, dist_coeffs, rvec, tvec, true, HT_PNP_TYPE)) return false; } else { if (!solvePnP(object_points, image_points, intrinsics, dist_coeffs, rvec, tvec, false, cv::SOLVEPNP_EPNP)) return false; if (!solvePnP(object_points, image_points, intrinsics, dist_coeffs, rvec, tvec, true, HT_PNP_TYPE)) return false; } if (ctx.has_pose) { vector<Point2f> image_points2; projectPoints(object_points, ctx.rvec, ctx.tvec, intrinsics, dist_coeffs, image_points2); Scalar color(0, 0, 255); float mult = ctx.color.cols / (float)ctx.grayscale.cols; Scalar color2(255, 255, 255); float error = 0; static const float error_weights[7] = { 0.1, 0.1, 1, 1, 1., 0.2, 0.2, }; for (unsigned i = 0; i < image_points.size(); i++) { float tmp1 = image_points[i].x - image_points2[i].x; float tmp2 = image_points[i].y - image_points2[i].y; error += error_weights[i] * (tmp1 * tmp1 + tmp2 * tmp2); } error /= ctx.zoom_ratio; const float max_error = 45.; if (error > max_error) return false; if (ctx.config.debug) { for (unsigned i = 0; i < image_points.size(); i++) { line(ctx.color, image_points[i] * mult , image_points2[i] * mult, color, 7); circle(ctx.color, image_points[i] * mult, 5, color2, -1); } fprintf(stderr, "flandmark error %f\n", error); fflush(stderr); } } rvec_ = rvec.clone(); tvec_ = tvec.clone(); return true; }
void computePoseDifference(Mat img1, Mat img2, CommandArgs args, Mat k, Mat& dist_coefficients, double& worldScale, Mat& R, Mat& t, Mat& img_matches) { cout << "%===============================================%" << endl; Mat camera_matrix = k.clone(); if (args.resize_factor > 1) { resize(img1, img1, Size(img1.cols / args.resize_factor, img1.rows / args.resize_factor)); // make smaller for performance and displayablity resize(img2, img2, Size(img2.cols / args.resize_factor, img2.rows / args.resize_factor)); // scale matrix down according to changed resolution camera_matrix = camera_matrix / args.resize_factor; camera_matrix.at<double>(2,2) = 1; } Mat K1, K2; K1 = K2 = camera_matrix; if (img1.rows > img1.cols) // it is assumed the camera has been calibrated in landscape mode, so undistortion must also be performed in landscape orientation, or the camera matrix must be modified (fx,fy and cx,cy need to be exchanged) { swap(K1.at<double>(0,0), K1.at<double>(1,1)); swap(K1.at<double>(0,2), K1.at<double>(1,2)); } if (img2.rows > img2.cols) { swap(K2.at<double>(0,0), K2.at<double>(1,1)); swap(K2.at<double>(0,2), K2.at<double>(1,2)); } // Feature detection + extraction vector<KeyPoint> KeyPoints_1, KeyPoints_2; Mat descriptors_1, descriptors_2; Ptr<Feature2D> feat_detector; if (args.detector == DETECTOR_KAZE) { feat_detector = AKAZE::create(args.detector_data.upright ? AKAZE::DESCRIPTOR_MLDB_UPRIGHT : AKAZE::DESCRIPTOR_MLDB, args.detector_data.descriptor_size, args.detector_data.descriptor_channels, args.detector_data.threshold, args.detector_data.nOctaves, args.detector_data.nOctaveLayersAkaze); } else if (args.detector == DETECTOR_SURF) { feat_detector = xfeatures2d::SURF::create(args.detector_data.minHessian, args.detector_data.nOctaves, args.detector_data.nOctaveLayersAkaze, args.detector_data.extended, args.detector_data.upright); } else if (args.detector == DETECTOR_SIFT) { feat_detector = xfeatures2d::SIFT::create(args.detector_data.nFeatures, args.detector_data.nOctaveLayersSift, args.detector_data.contrastThreshold, args.detector_data.sigma); } feat_detector->detectAndCompute(img1, noArray(), KeyPoints_1, descriptors_1); feat_detector->detectAndCompute(img2, noArray(), KeyPoints_2, descriptors_2); cout << "Number of feature points (img1, img2): " << "(" << KeyPoints_1.size() << ", " << KeyPoints_2.size() << ")" << endl; // Find correspondences BFMatcher matcher; vector<DMatch> matches; if (args.use_ratio_test) { if (args.detector == DETECTOR_KAZE) matcher = BFMatcher(NORM_HAMMING, false); else matcher = BFMatcher(NORM_L2, false); vector<vector<DMatch>> match_candidates; const float ratio = args.ratio; matcher.knnMatch(descriptors_1, descriptors_2, match_candidates, 2); for (int i = 0; i < match_candidates.size(); i++) if (match_candidates[i][0].distance < ratio * match_candidates[i][1].distance) matches.push_back(match_candidates[i][0]); cout << "Number of matches passing ratio test: " << matches.size() << endl; } else { if (args.detector == DETECTOR_KAZE) matcher = BFMatcher(NORM_HAMMING, true); else matcher = BFMatcher(NORM_L2, true); matcher.match(descriptors_1, descriptors_2, matches); cout << "Number of matching feature points: " << matches.size() << endl; } // Convert correspondences to vectors vector<Point2f>imgpts1,imgpts2; for(unsigned int i = 0; i < matches.size(); i++) { imgpts1.push_back(KeyPoints_1[matches[i].queryIdx].pt); imgpts2.push_back(KeyPoints_2[matches[i].trainIdx].pt); } Mat mask; // inlier mask if (args.undistort) { undistortPoints(imgpts1, imgpts1, K1, dist_coefficients, noArray(), K1); undistortPoints(imgpts2, imgpts2, K2, dist_coefficients, noArray(), K2); } double focal = camera_matrix.at<double>(0,0); Point2d principalPoint(camera_matrix.at<double>(0,2),camera_matrix.at<double>(1,2)); Mat E = findEssentialMat(imgpts1, imgpts2, focal, principalPoint, RANSAC, 0.999, 1, mask); /* Mat F = camera_matrix.t().inv() * E * camera_matrix.inv(); */ Mat F = findFundamentalMat(imgpts1, imgpts2, CV_FM_RANSAC); correctMatches(F, imgpts1, imgpts2, imgpts1, imgpts2); cout << "Reprojection error:\n " << computeReprojectionError(imgpts1, imgpts2, mask, F) << endl; int inliers = recoverPose(E, imgpts1, imgpts2, R, t, focal, principalPoint, mask); cout << "Matches used for pose recovery:\n " << inliers << endl; /* Mat R1, R2, ProjMat1, ProjMat2, Q; */ /* stereoRectify(camera_matrix, dist_coefficients, camera_matrix, dist_coefficients, img1.size(), R, t, R1, R2, ProjMat1, ProjMat2, Q); */ /* cout << "P1=" << ProjMat1 << endl; */ /* cout << "P2=" << ProjMat2 << endl; */ /* cout << "Q=" << Q << endl; */ Mat mtxR, mtxQ; Mat Qx, Qy, Qz; Vec3d angles = RQDecomp3x3(R, mtxR, mtxQ, Qx, Qy, Qz); /* cout << "Qx:\n " << Qx << endl; */ /* cout << "Qy:\n " << Qy << endl; */ /* cout << "Qz:\n " << Qz << endl; */ cout << "Translation:\n " << t.t() << endl; cout << "Euler angles [x y z] in degrees:\n " << angles.t() << endl; if (args.epilines) { drawEpilines(Mat(imgpts1), 1, F, img2); drawEpilines(Mat(imgpts2), 2, F, img1); } drawMatches(img1, KeyPoints_1, img2, KeyPoints_2, // draw only inliers given by mask matches, img_matches, Scalar::all(-1), Scalar::all(-1), mask); vector<Point2f> imgpts1_masked, imgpts2_masked; for (int i = 0; i < imgpts1.size(); i++) { if (mask.at<uchar>(i,0) == 1) { imgpts1_masked.push_back(imgpts1[i]); imgpts2_masked.push_back(imgpts2[i]); } } Mat pnts4D; Mat P1 = camera_matrix * Mat::eye(3, 4, CV_64FC1), P2; Mat p2[2] = { R, t }; hconcat(p2, 2, P2); P2 = camera_matrix * P2; #define USE_OPENCV_TRIANGULATION #ifndef USE_OPENCV_TRIANGULATION // strangely, both methods yield identical results vector<Point3d> homogPoints1, homogPoints2; for (int i = 0; i < imgpts1_masked.size(); i++) { Point2f currentPoint1 = imgpts1_masked[i]; homogPoints1.push_back(Point3d(currentPoint1.x, currentPoint1.y, 1)); Point2f currentPoint2 = imgpts2_masked[i]; homogPoints2.push_back(Point3d(currentPoint2.x, currentPoint2.y, 1)); } Mat dehomogenized(imgpts1_masked.size(), 3, CV_64FC1); for (int i = 0; i < imgpts1_masked.size(); i++) { Mat_<double> triangulatedPoint = IterativeLinearLSTriangulation(homogPoints1[i], P1, homogPoints2[i], P2); Mat r = triangulatedPoint.t(); r.colRange(0,3).copyTo(dehomogenized.row(i)); // directly assigning to dehomogenized.row(i) compiles but does nothing, wtf? } #else triangulatePoints(P1, P2, imgpts1_masked, imgpts2_masked, pnts4D); pnts4D = pnts4D.t(); Mat dehomogenized; convertPointsFromHomogeneous(pnts4D, dehomogenized); dehomogenized = dehomogenized.reshape(1); // instead of 3 channels and 1 col, we want 1 channel and 3 cols #endif double mDist = 0; int n = 0; int pos = 0, neg = 0; /* Write ply file header */ ofstream ply_file("points.ply", ios_base::trunc); ply_file << "ply\n" "format ascii 1.0\n" "element vertex " << dehomogenized.rows << "\n" "property float x\n" "property float y\n" "property float z\n" "property uchar red\n" "property uchar green\n" "property uchar blue\n" "end_header" << endl; Mat_<double> row; for (int i = 0; i < dehomogenized.rows; i++) { row = dehomogenized.row(i); double d = row(2); if (d > 0) { pos++; mDist += norm(row); n++; /* float startx=imgpts1_masked[i].x - 1, starty=imgpts1_masked[i].y - 1, endx=imgpts1_masked[i].x + 1, endy=imgpts1_masked[i].y + 1; */ /* cout << "startx,endx = " << startx << "," << endx << endl; */ /* cout << "starty,endy = " << starty << "," << endy << endl; */ Vec3b rgb = img1.at<Vec3b>(imgpts1_masked[i].x, imgpts1_masked[i].y); ply_file << row(0) << " " << row(1) << " " << row(2) << " " << (int)rgb[2] << " " << (int)rgb[1] << " " << (int)rgb[0] << "\n"; } else { neg++; ply_file << 0 << " " << 0 << " " << 0 << " " << 0 << " " << 0 << " " << 0 << "\n"; } } ply_file.close(); mDist /= n; worldScale = mDist; cout << "Mean distance of " << n << " points to camera:\n " << mDist << " (dehomogenized)" << endl; cout << "pos=" << pos << ", neg=" << neg << endl; /* char filename[100]; */ /* sprintf(filename, "mat_1%d", i+1); */ /* Ptr<Formatter> formatter = Formatter::get(Formatter::FMT_CSV); */ /* Ptr<Formatted> formatted = formatter->format(dehomogenized); */ /* ofstream file(filename, ios_base::trunc); */ /* file << formatted << endl; */ /* Removed until cmake has been fathomed */ /* vector< Point3d > points3D; */ /* vector< vector< Point2d > > pointsImg; */ /* int NPOINTS=dehomogenized.rows; // number of 3d points */ /* int NCAMS=2; // number of cameras */ /* points3D.resize(NPOINTS); */ /* for (int i = 0; i < NPOINTS; i++) */ /* { */ /* points3D[i] = Point3d(dehomogenized.at<double>(i,0), */ /* dehomogenized.at<double>(i,1), */ /* dehomogenized.at<double>(i,2) */ /* ); */ /* } */ /* // fill image projections */ /* vector<vector<int> > visibility(2, vector<int>(NPOINTS, 1)); */ /* vector<Mat> camera_matrices(2, camera_matrix); */ /* vector<Mat> Rs(2); */ /* Rodrigues(Mat::eye(3, 3, CV_64FC1), Rs[0]); */ /* Rodrigues(R, Rs[0]); */ /* vector<Mat> Ts = { Mat::zeros(3,1, CV_64FC1), t }; */ /* vector<Mat> dist_coefficientss(2, dist_coefficients); */ /* pointsImg.resize(NCAMS); */ /* for(int i=0; i<NCAMS; i++) pointsImg[i].resize(NPOINTS); */ /* for (int i = 0; i < NPOINTS; i++) */ /* { */ /* pointsImg[0][i] = Point2d(imgpts1_masked[i].x, imgpts1_masked[i].y); */ /* pointsImg[1][i] = Point2d(imgpts2_masked[i].x, imgpts2_masked[i].y); */ /* } */ /* cvsba::Sba sba; */ /* sba.run(points3D, pointsImg, visibility, camera_matrices, Rs, Ts, dist_coefficientss); */ /* cout<<"Initial error="<<sba.getInitialReprjError()<<". "<< */ /* "Final error="<<sba.getFinalReprjError()<<endl; */ cout << "%===============================================%" << endl; }
void on_trackbar(int, void *) { Mat noise_image = add_noise(noise_level, input_image.clone()); vector_median_filter(noise_image.clone()); }
void addgui(Mat image) { if (lux() < 0.1) { Mat gs_rgb(image.size(), CV_8UC1); cvtColor(image, gs_rgb, CV_BGR2GRAY); cvtColor(gs_rgb, image, CV_GRAY2RGB); } Scalar guiColor(100,255,55); Point2f guiRT((image.cols>W)*W+W, 0); Point2f guiLT(guiRT.x - W, guiRT.y); Point2f guiLD(guiRT.x - W, guiRT.y+H); Mat tMat = image.clone(); rectangle(tMat, guiLT, guiLT + Point2f(160, 725), Scalar(0,0,0), -1, CV_AA); rectangle(tMat, guiRT, guiRT + Point2f(-330, 160), Scalar(0,0,0), -1, CV_AA); rectangle(tMat, guiRT, guiRT + Point2f(-165, 390), Scalar(0,0,0), -1, CV_AA); rectangle(tMat, guiLD+Point2f(0, -20), guiLD + Point2f(W, -60), Scalar(0,0,0), -1, CV_AA); addWeighted(image, 0.6, tMat, 0.4, 0.0, image); //yaw { line(image, Point(0,H - H/20), Point(W*3,H - H/20), guiColor, 1, CV_AA); for(int32_t i = -100; i <= 100; i = i+10) { double x = W/fov*i + guiLT.x + W*0.5; ostringstream temp; temp<<i; putTextCorr(image, temp.str(), Point(x, H-H/20+20), FONT_HERSHEY_DUPLEX, 0.4, guiColor, 1, CV_AA, Point2f(0.5, 0)); } for(int32_t i = -100; i <= 100; i = i+1) { double lh = 3; if (i%5 == 0) lh*=1.7; if (i%10 == 0) lh*=1.5; double x = W/fov*i + guiLT.x + W*0.5; line(image, Point(x, H-H/20+lh), Point(x,H-H/20),guiColor, 1, CV_AA); } } //pitch, roll { getRotImg("img/right.png", image, guiRT - Point2f(255, -75), pitch*180/PI); getRotImg("img/back.png", image, guiRT - Point2f(85, -75), roll*180/PI); addRule(image, guiRT - Point2f(160, -30), guiRT - Point2f(160, -140), pitch*180/PI, guiColor); addRule(image, guiRT - Point2f(140, -160), guiRT - Point2f(30, -160), roll*180/PI, guiColor); } //servos for (int32_t i = 0; i < 3; i++) { Point2f refp = guiRT - Point2f(140, -240-60*i); addRule(image, refp, refp - Point2f(-110, 0), getServo(2-i), guiColor, 0, 180); ostringstream temp; temp << "Servo " << int32_t(2-i); putText(image, temp.str(), refp - Point2f(4, 16), FONT_HERSHEY_DUPLEX, 0.35, guiColor, 1, CV_AA); } //dist/mlx/lux { circle(image, guiLT + Point2f(W/2,H/2), 40, guiColor, 1, CV_AA); ostringstream temp; temp << round(mlxTemp()*100)*0.01 << "'C"; putTextCorr(image, temp.str(), guiLT + Point2f(W/2,H/2 - 47), FONT_HERSHEY_DUPLEX, 0.4, guiColor, 1, CV_AA, Point2f(0.5,0.5)); temp.str(""); double d = dist(); if (d < 250) temp << round(d)*0.01 << "m"; else temp << ">2.5m"; putTextCorr(image, temp.str(), guiLT + Point2f(W/2,H/2 + 51), FONT_HERSHEY_DUPLEX, 0.4, guiColor, 1, CV_AA, Point2f(0.5,0.5)); temp.str(""); temp << round(lux()) << "Lux"; putTextCorr(image, temp.str(), guiLT + Point2f(W/2+44,H/2), FONT_HERSHEY_DUPLEX, 0.4, guiColor, 1, CV_AA, Point2f(0.0,0.5)); } //bat { Point2f refp = guiLT + Point2f(40,30); const double b_w = 75, b_h = 20; rectangle(image, refp, refp + Point2f(b_w, b_h), guiColor, 1, CV_AA); ostringstream temp; temp << vbat() << "v/" << cbat(Temp) << "%"; putTextCorr(image, temp.str(), refp + Point2f(b_w/2, b_h/2+2), FONT_HERSHEY_DUPLEX, 0.4, guiColor, 1, CV_AA, Point2f(0.5,0.5)); refp += Point2f(b_w,b_h/3); rectangle(image, refp, refp + Point2f(b_w*0.05, b_h/3), guiColor, 1, CV_AA); } //gps { Point2f refp = guiLT + Point2f(40,80); ostringstream temp; temp << "SAT:" << Satellites << " ALT:" << Altitude << "m"; putTextCorr(image, temp.str(), refp, FONT_HERSHEY_DUPLEX, 0.4, guiColor, 1, CV_AA, Point2f(0.0,0.5)); } //temp //hum //pre //mag { int32_t ya = 150; addGraph(image, guiLT + Point2f(40, 150+ya*0), "Hum", guiColor, &table[0][0], t_counter); addGraph(image, guiLT + Point2f(40, 150+ya*1), "Tmp", guiColor, &table[1][0], t_counter); addGraph(image, guiLT + Point2f(40, 150+ya*2), "Prs", guiColor, &table[2][0], t_counter); addGraph(image, guiLT + Point2f(40, 150+ya*3), "Mag", guiColor, &table[3][0], t_counter); } //time //pos }
void Escena::procesar( Mat &frame ) { Mat matImagenActual = frame.clone(); // PROCESAMIENTO // Declaramos un vector para guardar las caras detectadas std::vector<Rect> vectorCaras; vectorCaras.clear(); // Detectamos las caras en toda la extStream.reimagen completa clasificadorCara.detectMultiScale( matImagenActual, vectorCaras, 1.1, 2, 0|CASCADE_SCALE_IMAGE, Size( 100,100 ) ); for ( unsigned int i = 0; i < vectorCaras.size(); i++ ) { // Dibujamos el rectangulo de la cara //rectangle( matImagenActual, vectorCaras.at(i), Scalar( 0, 255, 0 ), linesWidth ); // Declaramos y dibujamos un contenedor para el ojo izquierdo Rect rectParaOjosIzquierdos( vectorCaras.at(i).x + vectorCaras.at(i).width/2, vectorCaras.at(i).y + vectorCaras.at(i).height*MARGEN_VERTICAL_CONTENEDOR_OJOS/100, vectorCaras.at(i).width*ANCHO_CONTENEDOR_OJO/100, vectorCaras.at(i).height*ALTO_CONTENEDOR_OJO/100); // rectangle( matImagenActual, rectParaOjosIzquierdos, Scalar( 0, 0, 200 ), linesWidth ); // Declaramos y dibujamos un contenedor para el ojo derecho Rect rectParaOjosDerechos( vectorCaras.at(i).x + vectorCaras.at(i).width*MARGEN_LATERAL_CONTENEDOR_OJOS/100, vectorCaras.at(i).y + vectorCaras.at(i).height*MARGEN_VERTICAL_CONTENEDOR_OJOS/100, vectorCaras.at(i).width*ANCHO_CONTENEDOR_OJO/100, vectorCaras.at(i).height*ALTO_CONTENEDOR_OJO/100); // rectangle( matImagenActual, rectParaOjosDerechos, Scalar( 0, 0, 200 ), linesWidth ); // Declaramos un vector para guardar los ojos izquierdos detectados std::vector<Rect> vectorOjosIzquierdos; vectorOjosIzquierdos.clear(); // Detectamos ojos izquierdos en el mat designado por el rectOjosIzquierdos Mat matParaOjosIzquierdos( matImagenActual, rectParaOjosIzquierdos ); clasificadorOjoIzquierdo.detectMultiScale( matParaOjosIzquierdos, vectorOjosIzquierdos, 1.1, 2, 0|CASCADE_SCALE_IMAGE, Size( 30, 30 ) ); Rect ojoIzquierdoLocalAlContenedor; if ( vectorOjosIzquierdos.size() > 0 ) { // Descartamos los ojos izquierdos que no sean el mas grande ojoIzquierdoLocalAlContenedor = rectanguloMasGrande( vectorOjosIzquierdos ); Rect rectanguloOjoIzquierdoDesplazado( rectParaOjosIzquierdos.x + ojoIzquierdoLocalAlContenedor.x, rectParaOjosIzquierdos.y + ojoIzquierdoLocalAlContenedor.y, ojoIzquierdoLocalAlContenedor.width, ojoIzquierdoLocalAlContenedor.height ); rectangle( matImagenActual, rectanguloOjoIzquierdoDesplazado, Scalar( 255, 255, 255 ), linesWidth ); } // Declaramos un vector para guardar los ojos derechos detectados std::vector<Rect> vectorOjosDerechos; vectorOjosDerechos.clear(); // Detectamos ojos derechos en el mat designado por el rectOjosDerechos Mat matParaOjosDerechos( matImagenActual, rectParaOjosDerechos ); clasificadorOjoDerecho.detectMultiScale( matParaOjosDerechos, vectorOjosDerechos, 1.1, 2, 0|CASCADE_SCALE_IMAGE, Size( 30, 30 ) ); Rect ojoDerechoLocalAlContenedor; if ( vectorOjosDerechos.size() > 0 ) { // Descartamos los ojos derechos que no sean el mas grande ojoDerechoLocalAlContenedor = rectanguloMasGrande( vectorOjosDerechos ); Rect rectanguloOjoDerechoDesplazado( rectParaOjosDerechos.x + ojoDerechoLocalAlContenedor.x, rectParaOjosDerechos.y + ojoDerechoLocalAlContenedor.y, ojoDerechoLocalAlContenedor.width, ojoDerechoLocalAlContenedor.height ); rectangle( matImagenActual, rectanguloOjoDerechoDesplazado, Scalar( 255, 255, 255 ), linesWidth ); } //Creo la matriz en la que voy a meter los posibles circulos //metodo para deteccion de parpadeo int threshold_value = 0; int threshold_type = 3;; int const max_value = 255; int const max_type = 4; int const max_BINARY_value = 255; //Mat src, src_gray, dst; std::vector<Vec3f> storageCir; Mat img; int dp = 2; float minDist = 300.0 ; int param1 = 32 ; int param2 = 60; int minRadius = 10; int maxRadius = 22; //GaussianBlur( img, img, Size(7,7), 2, 2 ); cvtColor( matParaOjosDerechos, img, CV_BGR2GRAY ); threshold( img, img, threshold_value, max_BINARY_value,threshold_type ); HoughCircles(img, storageCir, CV_HOUGH_GRADIENT , dp, minDist, param1, param2, minRadius, maxRadius); if (storageCir.empty()) { //qDebug()<<"ojo cerrado"; //storageCir.clear(); cant++; } else{ cant=0; //qDebug()<<"ojo abierto"; //storageCir.clear(); } storageCir.clear(); if (cant==3){ qDebug()<<"parpadeo"; cant=0; } if ( vectorOjosIzquierdos.size() > 0 && vectorOjosDerechos.size() > 0 ) { // A estos puntos les llamo centros globales, pero son los centros desplazados (1/4 y 3/4) Point centroOjoIzquierdoGlobal( rectParaOjosIzquierdos.x + ojoIzquierdoLocalAlContenedor.x + ojoIzquierdoLocalAlContenedor.width*3/4, rectParaOjosIzquierdos.y + ojoIzquierdoLocalAlContenedor.y + ojoIzquierdoLocalAlContenedor.height/2 ); Point centroOjoDerechoGlobal( rectParaOjosDerechos.x + ojoDerechoLocalAlContenedor.x + ojoDerechoLocalAlContenedor.width*1/4, rectParaOjosDerechos.y + ojoDerechoLocalAlContenedor.y + ojoDerechoLocalAlContenedor.height/2 ); // qDebug()<<cv::Point ('centroOjoDerechoGlobal'); //QDebug(centroOjoDerechoGlobal); //QDebug(centroOjoIzquierdoGlobal); line( matImagenActual, centroOjoIzquierdoGlobal, centroOjoDerechoGlobal, Scalar( 100, 50, 255 ), linesWidth ); float anguloEntreOjos = anguloEntre ( centroOjoDerechoGlobal, centroOjoIzquierdoGlobal ); float largoLineaOjoBoca = vectorCaras.at(i).height/9*6; float baseTrianguloOjoBoca = sin( anguloEntreOjos ) * largoLineaOjoBoca; if ( centroOjoDerechoGlobal.y < centroOjoIzquierdoGlobal.y ) baseTrianguloOjoBoca = -baseTrianguloOjoBoca; float alturaTrianguloOjoBoca = cos( anguloEntreOjos ) * largoLineaOjoBoca; Point finLineaOjoIzquierdo( centroOjoIzquierdoGlobal.x + baseTrianguloOjoBoca, centroOjoIzquierdoGlobal.y + alturaTrianguloOjoBoca ); Point finLineaOjoDerecho( centroOjoDerechoGlobal.x + baseTrianguloOjoBoca, centroOjoDerechoGlobal.y + alturaTrianguloOjoBoca ); line( matImagenActual, centroOjoIzquierdoGlobal, finLineaOjoIzquierdo, Scalar( 150, 20, 255 ), linesWidth ); line( matImagenActual, centroOjoDerechoGlobal, finLineaOjoDerecho, Scalar( 150, 20, 255 ), linesWidth ); // Saco dos promedios consecutivos Point bocaSupIzq( (finLineaOjoIzquierdo.x + centroOjoIzquierdoGlobal.x)/2, ( finLineaOjoIzquierdo.y + centroOjoIzquierdoGlobal.y)/2 ); Point bocaSupDer( (finLineaOjoDerecho.x + centroOjoDerechoGlobal.x)/2, ( finLineaOjoDerecho.y + centroOjoDerechoGlobal.y)/2 ); line( matImagenActual, bocaSupIzq, bocaSupDer, Scalar( 150, 20, 255 ), linesWidth ); line( matImagenActual, finLineaOjoIzquierdo, finLineaOjoDerecho, Scalar( 150, 20, 255 ), linesWidth ); //Point bocaInfIzq( (finLineaOjoIzquierdo.x + centroOjoIzquierdoGlobal.x)/2, (finLineaOjoIzquierdo.y + centroOjoIzquierdoGlobal.y)/2 ); //Point bocaInfDer( (finLineaOjoIzquierdo.x + centroOjoIzquierdoGlobal.x)/2, (finLineaOjoIzquierdo.y + centroOjoIzquierdoGlobal.y)/2 ); } // Estiramos un poco la cara y la dibujamos vectorCaras.at(i).height *= 1.2; rectangle( matImagenActual, vectorCaras.at(i), Scalar( 0, 0, 0 ), linesWidth ); } // FIN PROCESAMIENTO frame = matImagenActual.clone(); }
/* Member function * retuns the detected gesture ID * Inputs : frame -> current frame to analyse * faceRegion -> current face Region */ airGestType airGest::analyseGesture( Mat frame ) { airGestType gest = GEST_INVALID; resize( frame, frame, Size( OPTFLW_FRAME_WIDTH, OPTFLW_FRAME_HEIGHT ), 0, 0, INTER_AREA ); //Prepare canvas to draw intermediate result //canvas = frame.clone(); canvas = Mat( OPTFLW_FRAME_HEIGHT, OPTFLW_FRAME_WIDTH, CV_8UC3, Scalar( 0, 0, 0 ) ); //accCanvas = Mat( OPTFLW_FRAME_HEIGHT, OPTFLW_FRAME_WIDTH, CV_8UC3, Scalar( 0, 0, 0 ) ); Mat grayFrame; //Convert to gray scale if( frame.channels() == 3 ) { cvtColor( frame, grayFrame, CV_BGR2GRAY ); } else if( frame.channels() == 4 ) { cvtColor( frame, grayFrame, CV_BGRA2GRAY ); } else { //already gray grayFrame = frame.clone(); } if( currState != AIRGEST_ACTIVE ) { //unless active, return with an invalid code prevFrame = grayFrame.clone(); return gest; } //Here, airGest is active currFrame = grayFrame; //~ std::cout << "[airGest::analyseGesture] calculating optical flow...."; calcOpticalFlowFarneback( prevFrame, // first 8-bit single channel input image currFrame, // Second image of the same size and same type as prevgray flowMap, // computed flow image tha has the same size as pregray and type CV_32FC2 0.5, // pryScale, 0.5 means classical pyramid 3, // number of pyramid layers including initial image 20, // winSize 3, // number of iterations the algorithm does at each pyramid level 5, // Size of the pixel neighborhood used to find polynomial expansion in each pixel 1.1, // standard deviation of Guassian OPTFLOW_FARNEBACK_GAUSSIAN ); // //~ std::cout << "[COMPLETED]\n"; //~ std::cout << "-> boxFilter"; boxFilter( flowMap, flowMap , -1, BLUR_KERNEL_SIZE ); //~ std::cout << "-> drawFlowMap"; drawFlowMap(); //~ std::cout << "-> filterFlow"; filterFlow(); //imshow( "Current flow", canvas ); //imshow( "Accumulated Flow", accCanvas ); //copy current frame to prev for using next time prevFrame = currFrame.clone(); gest = ( decision == -1.00 )? GEST_PREV: ( decision == +1.00 )? GEST_NEXT: GEST_INVALID; return gest; }
//Función principal int main(int argc, char* argv[]) { char nombre_video[50]; //Iniacializa los "substractores de background" substractorMOG2 = createBackgroundSubtractorMOG2(); //MOG2 //Carga el video en la variable capture VideoCapture capture("Video_003.avi"); //Flag en caso de haber un error al abrir el video if (!capture.isOpened()){ printf("No fue posible abrir el archivo de video"); getchar(); exit(EXIT_FAILURE); } //Toma la primera imagen y permite dibujar la línea para realizar el conteo //Capturamos el primer frame if (!capture.read(frame_actual)) { printf("No fue posible leer el frame"); getchar(); exit(EXIT_FAILURE); } printf("Dibuje la línea para el conteo y presione la barra espaciadora para continuar."); fflush(stdout); while ((char)keyboard != 32){ cvSetMouseCallback("Frame", mouseHandler, NULL); Mat img1 = frame_actual.clone(); line(img1, point1, point2, CV_RGB(255, 0, 0), 2, 8, 0); imshow("Frame", img1); keyboard = waitKey(30); } // Variables para detección Mat im_detecciones; vector<cv::KeyPoint> personas; vector<cv::Point2f> prevPersonas; Point2f persona; int i = 0; //Contador para recorrer las personas detectadas int contador = 0; // Contador de cruces por la línea // Preparar los parámetros para el detector cv::SimpleBlobDetector::Params params; params.minDistBetweenBlobs = 1.0f; params.filterByInertia = false; params.filterByConvexity = false; params.filterByColor = false; params.filterByCircularity = false; params.filterByArea = true; params.minArea = 100.0f; params.maxArea = 2500.0f; // Preparar el detector con los parámetros definidos cv::Ptr<cv::SimpleBlobDetector> detector = cv::SimpleBlobDetector::create(params); //Flag para acabar el programa: mientras que no se presione tecla ESC while ((char)keyboard != 27){ //Capturamos el primer frame, si hubo error acaba la ejecuci�n if (!capture.read(frame_actual)) { printf("No fue posible leer el frame"); getchar(); exit(EXIT_FAILURE); } // cada frame es usado para calcular la m�scara de foreground y para actualizar el background substractorMOG2->apply(frame_actual, mascaraMOG2); // aplico el algoritmo MOG2 y obtengo la m�scara de foreground // Tratamiento de la máscara de foreground, para eliminar pequeños objetos // y llenar pequeños huecos. (Closing y opening)) dilate(mascaraMOG2, mascaraMOG2, getStructuringElement(MORPH_ELLIPSE, Size(2, 2)) ); erode(mascaraMOG2, mascaraMOG2, getStructuringElement(MORPH_ELLIPSE, Size(2, 2)) ); erode(mascaraMOG2, mascaraMOG2, getStructuringElement(MORPH_ELLIPSE, Size(2, 2)) ); dilate(mascaraMOG2, mascaraMOG2, getStructuringElement(MORPH_ELLIPSE, Size(2, 2)) ); //muestra el frame actual y su máscara line(frame_actual, point1, point2, CV_RGB(255, 0, 0), 2, 8, 0); imshow("Frame", frame_actual); //imshow("Mascara Foreground MOG2", mascaraMOG2); imshow("Mascara Foreground MOG2 Mejorada", mascaraMOG2); // Detección de BLOBs // https://www.learnopencv.com/blob-detection-using-opencv-python-c/ cv::KeyPoint::convert(personas,prevPersonas); detector->detect(mascaraMOG2, personas); drawKeypoints(frame_actual, personas, im_detecciones, Scalar(0,0,255), DrawMatchesFlags::DRAW_RICH_KEYPOINTS ); // Detección de desplazamiento de personas if(prevPersonas.size() >= personas.size()){ i=0; for(std::vector<cv::KeyPoint>::iterator blobIterator = personas.begin(); blobIterator != personas.end(); blobIterator++){ persona = Point2f(blobIterator->pt.x, blobIterator->pt.y); // Determino la longitud y no tomo en cuenta aquellas muy grandes. double longitud = cv::norm(persona-prevPersonas[i]); if (longitud < 10){ line(im_detecciones, persona, prevPersonas[i], CV_RGB(0, 255, 0), 1, 8, 0); // Contar los cruces de línea if (intersecta(persona,prevPersonas[i],point1,point2)){ contador++; printf("%d\n",contador); } } i++; } } // Mostrar personas y desplazamiento imshow("keypoints", im_detecciones ); //get the input from the keyboard keyboard = waitKey(30); } //delete capture object capture.release(); //destroy GUI windows destroyAllWindows(); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { #ifdef WIN32 //_CrtSetDbgFlag ( _CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF ); // dump leaks at return //_CrtSetBreakAlloc(287); #endif string verboseLevelConsole; path inputFilename; path configFilename; path inputLandmarks; string landmarkType; path outputPath("."); try { po::options_description desc("Allowed options"); desc.add_options() ("help,h", "produce help message") ("verbose,v", po::value<string>(&verboseLevelConsole)->implicit_value("DEBUG")->default_value("INFO","show messages with INFO loglevel or below."), "specify the verbosity of the console output: PANIC, ERROR, WARN, INFO, DEBUG or TRACE") ("config,c", po::value<path>(&configFilename)->required(), "path to a config (.cfg) file") ("input,i", po::value<path>(&inputFilename)->required(), "input filename") ("landmarks,l", po::value<path>(&inputLandmarks)->required(), "input landmarks") ("landmark-type,t", po::value<string>(&landmarkType)->required(), "specify the type of landmarks: ibug") ; po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(desc).run(), vm); // style(po::command_line_style::unix_style | po::command_line_style::allow_long_disguise) if (vm.count("help")) { cout << "Usage: fitter [options]\n"; cout << desc; return EXIT_SUCCESS; } po::notify(vm); } catch (po::error& e) { cout << "Error while parsing command-line arguments: " << e.what() << endl; cout << "Use --help to display a list of options." << endl; return EXIT_SUCCESS; } LogLevel logLevel; if(boost::iequals(verboseLevelConsole, "PANIC")) logLevel = LogLevel::Panic; else if(boost::iequals(verboseLevelConsole, "ERROR")) logLevel = LogLevel::Error; else if(boost::iequals(verboseLevelConsole, "WARN")) logLevel = LogLevel::Warn; else if(boost::iequals(verboseLevelConsole, "INFO")) logLevel = LogLevel::Info; else if(boost::iequals(verboseLevelConsole, "DEBUG")) logLevel = LogLevel::Debug; else if(boost::iequals(verboseLevelConsole, "TRACE")) logLevel = LogLevel::Trace; else { cout << "Error: Invalid LogLevel." << endl; return EXIT_FAILURE; } Loggers->getLogger("morphablemodel").addAppender(make_shared<logging::ConsoleAppender>(logLevel)); Loggers->getLogger("render").addAppender(make_shared<logging::ConsoleAppender>(logLevel)); Loggers->getLogger("fitter").addAppender(make_shared<logging::ConsoleAppender>(logLevel)); Logger appLogger = Loggers->getLogger("fitter"); appLogger.debug("Verbose level for console output: " + logging::logLevelToString(logLevel)); appLogger.debug("Using config: " + configFilename.string()); // Load the image shared_ptr<ImageSource> imageSource; try { imageSource = make_shared<FileImageSource>(inputFilename.string()); } catch(const std::runtime_error& e) { appLogger.error(e.what()); return EXIT_FAILURE; } // Load the ground truth shared_ptr<LabeledImageSource> labeledImageSource; shared_ptr<NamedLandmarkSource> landmarkSource; shared_ptr<LandmarkFormatParser> landmarkFormatParser; if(boost::iequals(landmarkType, "ibug")) { landmarkFormatParser = make_shared<IbugLandmarkFormatParser>(); landmarkSource = make_shared<DefaultNamedLandmarkSource>(vector<path>{inputLandmarks}, landmarkFormatParser); } else if (boost::iequals(landmarkType, "did")) { landmarkFormatParser = make_shared<DidLandmarkFormatParser>(); landmarkSource = make_shared<DefaultNamedLandmarkSource>(vector<path>{inputLandmarks}, landmarkFormatParser); } else { cout << "Error: Invalid ground truth type." << endl; return EXIT_FAILURE; } labeledImageSource = make_shared<NamedLabeledImageSource>(imageSource, landmarkSource); // Load the config file ptree pt; try { boost::property_tree::info_parser::read_info(configFilename.string(), pt); } catch(const boost::property_tree::ptree_error& error) { appLogger.error(error.what()); return EXIT_FAILURE; } // Load the Morphable Model morphablemodel::MorphableModel morphableModel; try { morphableModel = morphablemodel::MorphableModel::load(pt.get_child("morphableModel")); } catch (const boost::property_tree::ptree_error& error) { appLogger.error(error.what()); return EXIT_FAILURE; } catch (const std::runtime_error& error) { appLogger.error(error.what()); return EXIT_FAILURE; } //const string windowName = "win"; // Create the output directory if it doesn't exist yet if (!boost::filesystem::exists(outputPath)) { boost::filesystem::create_directory(outputPath); } std::chrono::time_point<std::chrono::system_clock> start, end; Mat img; morphablemodel::OpenCVCameraEstimation epnpCameraEstimation(morphableModel); // todo: this can all go to only init once morphablemodel::AffineCameraEstimation affineCameraEstimation(morphableModel); vector<imageio::ModelLandmark> landmarks; labeledImageSource->next(); start = std::chrono::system_clock::now(); appLogger.info("Starting to process " + labeledImageSource->getName().string()); img = labeledImageSource->getImage(); LandmarkCollection lms = labeledImageSource->getLandmarks(); vector<shared_ptr<Landmark>> lmsv = lms.getLandmarks(); landmarks.clear(); Mat landmarksImage = img.clone(); // blue rect = the used landmarks for (const auto& lm : lmsv) { lm->draw(landmarksImage); //if (lm->getName() == "right.eye.corner_outer" || lm->getName() == "right.eye.corner_inner" || lm->getName() == "left.eye.corner_outer" || lm->getName() == "left.eye.corner_inner" || lm->getName() == "center.nose.tip" || lm->getName() == "right.lips.corner" || lm->getName() == "left.lips.corner") { landmarks.emplace_back(imageio::ModelLandmark(lm->getName(), lm->getPosition2D())); cv::rectangle(landmarksImage, cv::Point(cvRound(lm->getX() - 2.0f), cvRound(lm->getY() - 2.0f)), cv::Point(cvRound(lm->getX() + 2.0f), cvRound(lm->getY() + 2.0f)), cv::Scalar(255, 0, 0)); //} } // Start affine camera estimation (Aldrian paper) Mat affineCamLandmarksProjectionImage = landmarksImage.clone(); // the affine LMs are currently not used (don't know how to render without z-vals) Mat affineCam = affineCameraEstimation.estimate(landmarks); for (const auto& lm : landmarks) { Vec3f tmp = morphableModel.getShapeModel().getMeanAtPoint(lm.getName()); Mat p(4, 1, CV_32FC1); p.at<float>(0, 0) = tmp[0]; p.at<float>(1, 0) = tmp[1]; p.at<float>(2, 0) = tmp[2]; p.at<float>(3, 0) = 1; Mat p2d = affineCam * p; Point2f pp(p2d.at<float>(0, 0), p2d.at<float>(1, 0)); // Todo: check cv::circle(affineCamLandmarksProjectionImage, pp, 4.0f, Scalar(0.0f, 255.0f, 0.0f)); } // End Affine est. // Estimate the shape coefficients vector<float> fittedCoeffs; { // $\hat{V} \in R^{3N\times m-1}$, subselect the rows of the eigenvector matrix $V$ associated with the $N$ feature points // And we insert a row of zeros after every third row, resulting in matrix $\hat{V}_h \in R^{4N\times m-1}$: Mat V_hat_h = Mat::zeros(4 * landmarks.size(), morphableModel.getShapeModel().getNumberOfPrincipalComponents(), CV_32FC1); int rowIndex = 0; for (const auto& lm : landmarks) { Mat basisRows = morphableModel.getShapeModel().getPcaBasis(lm.getName()); // getPcaBasis should return the not-normalized basis I think basisRows.copyTo(V_hat_h.rowRange(rowIndex, rowIndex + 3)); rowIndex += 4; // replace 3 rows and skip the 4th one, it has all zeros } // Form a block diagonal matrix $P \in R^{3N\times 4N}$ in which the camera matrix C (P_Affine, affineCam) is placed on the diagonal: Mat P = Mat::zeros(3 * landmarks.size(), 4 * landmarks.size(), CV_32FC1); for (int i = 0; i < landmarks.size(); ++i) { Mat submatrixToReplace = P.colRange(4 * i, (4 * i) + 4).rowRange(3 * i, (3 * i) + 3); affineCam.copyTo(submatrixToReplace); } // The variances: We set the 3D and 2D variances to one static value for now. $sigma^2_2D = sqrt(1) + sqrt(3)^2 = 4$ float sigma_2D = std::sqrt(4); Mat Sigma = Mat::zeros(3 * landmarks.size(), 3 * landmarks.size(), CV_32FC1); for (int i = 0; i < 3 * landmarks.size(); ++i) { Sigma.at<float>(i, i) = 1.0f / sigma_2D; } Mat Omega = Sigma.t() * Sigma; // The landmarks in matrix notation (in homogeneous coordinates), $3N\times 1$ Mat y = Mat::ones(3 * landmarks.size(), 1, CV_32FC1); for (int i = 0; i < landmarks.size(); ++i) { y.at<float>(3 * i, 0) = landmarks[i].getX(); y.at<float>((3 * i) + 1, 0) = landmarks[i].getY(); // the position (3*i)+2 stays 1 (homogeneous coordinate) } // The mean, with an added homogeneous coordinate (x_1, y_1, z_1, 1, x_2, ...)^t Mat v_bar = Mat::ones(4 * landmarks.size(), 1, CV_32FC1); for (int i = 0; i < landmarks.size(); ++i) { Vec3f modelMean = morphableModel.getShapeModel().getMeanAtPoint(landmarks[i].getName()); v_bar.at<float>(4 * i, 0) = modelMean[0]; v_bar.at<float>((4 * i) + 1, 0) = modelMean[1]; v_bar.at<float>((4 * i) + 2, 0) = modelMean[2]; // the position (4*i)+3 stays 1 (homogeneous coordinate) } // Bring into standard regularised quadratic form with diagonal distance matrix Omega Mat A = P * V_hat_h; Mat b = P * v_bar - y; //Mat c_s; // The x, we solve for this! (the variance-normalized shape parameter vector, $c_s = [a_1/sigma_{s,1} , ..., a_m-1/sigma_{s,m-1}]^t$ float lambda = 0.1f; // lambdaIn; //0.01f; // The weight of the regularisation int numShapePc = morphableModel.getShapeModel().getNumberOfPrincipalComponents(); Mat AtOmegaA = A.t() * Omega * A; Mat AtOmegaAReg = AtOmegaA + lambda * Mat::eye(numShapePc, numShapePc, CV_32FC1); Mat AtOmegaARegInv = AtOmegaAReg.inv(/*cv::DECOMP_SVD*/); // check if invertible. Use Eigen? Regularisation? (see SDM). Maybe we need pseudo-inverse. Mat AtOmegatb = A.t() * Omega.t() * b; Mat c_s = -AtOmegaARegInv * AtOmegatb; fittedCoeffs = vector<float>(c_s); } // End estimate the shape coefficients //std::shared_ptr<render::Mesh> meanMesh = std::make_shared<render::Mesh>(morphableModel.getMean()); //render::Mesh::writeObj(*meanMesh.get(), "C:\\Users\\Patrik\\Documents\\GitHub\\mean.obj"); //const float aspect = (float)img.cols / (float)img.rows; // 640/480 //render::Camera camera(Vec3f(0.0f, 0.0f, 0.0f), /*horizontalAngle*/0.0f*(CV_PI / 180.0f), /*verticalAngle*/0.0f*(CV_PI / 180.0f), render::Frustum(-1.0f*aspect, 1.0f*aspect, -1.0f, 1.0f, /*zNear*/-0.1f, /*zFar*/-100.0f)); /*render::SoftwareRenderer r(img.cols, img.rows, camera); // 640, 480 r.perspectiveDivision = render::SoftwareRenderer::PerspectiveDivision::None; r.doClippingInNDC = false; r.directToScreenTransform = true; r.doWindowTransform = false; r.setObjectToScreenTransform(shapemodels::AffineCameraEstimation::calculateFullMatrix(affineCam)); r.draw(meshToDraw, nullptr); Mat buff = r.getImage();*/ /* std::ofstream myfile; path coeffsFilename = outputPath / labeledImageSource->getName().stem(); myfile.open(coeffsFilename.string() + ".txt"); for (int i = 0; i < fittedCoeffs.size(); ++i) { myfile << fittedCoeffs[i] * std::sqrt(morphableModel.getShapeModel().getEigenvalue(i)) << std::endl; } myfile.close(); */ //std::shared_ptr<render::Mesh> meshToDraw = std::make_shared<render::Mesh>(morphableModel.drawSample(fittedCoeffs, vector<float>(morphableModel.getColorModel().getNumberOfPrincipalComponents(), 0.0f))); //render::Mesh::writeObj(*meshToDraw.get(), "C:\\Users\\Patrik\\Documents\\GitHub\\fittedMesh.obj"); //r.resetBuffers(); //r.draw(meshToDraw, nullptr); // TODO: REPROJECT THE POINTS FROM THE C_S MODEL HERE AND SEE IF THE LMS REALLY GO FURTHER OUT OR JUST THE REST OF THE MESH //cv::imshow(windowName, img); //cv::waitKey(5); end = std::chrono::system_clock::now(); int elapsed_mseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count(); appLogger.info("Finished processing. Elapsed time: " + lexical_cast<string>(elapsed_mseconds)+"ms.\n"); return 0; }
/** @function main */ int main( int argc, char** argv ) { int start = 90; int end = 70; char* result_path = argv[1]; int result_size = 2700; int result_center_x = 500; int result_center_y = 1700; //x-form accumulator; Mat Acc = (Mat_<float>(3, 3) << 1, 0, result_center_x, 0, 1, result_center_y, 0, 0, 1); Mat result; int ctr = 10; for(int img_num = start; img_num > end; img_num--) { std::stringstream ss; ss << img_num; std::stringstream ss1; ss1 << (img_num - 1); std::string target_num(ss.str()); std::string src_num(ss1.str()); string src_filename = std::string("flattened_input_15/") + src_num + std::string(".png"); string target_filename = std::string("flattened_input_15/") + target_num + std::string(".png"); cout<<"Target: "<<target_filename<<endl; cout<<"Source: "<<src_filename<<endl; src = imread( src_filename, 1 ); target = imread ( target_filename, 1); //copy first target into result if(img_num == start) { result = Mat::zeros(result_size, result_size, target.type()); warpPerspective( target, result, Acc, result.size() ); } //get transformation of source Mat xform = getTransfromation(src, target); //accumulate transformation Acc = Acc * xform; //save target Mat tempresult = result.clone(); warpPerspective( src, result, Acc, result.size() ); result = result + tempresult; imwrite( src_num+result_path, result ); } imwrite( result_path, result ); return(0); }
bool BowVocabulary::computeVocabulary(Mat& vocabularyOut, const string& vocabularyImgsList, bool outputAnalyzedImages, bool useOnlyTargetRegions) { if (loadVocabulary(vocabularyOut)) { return true; } _bowTrainer->clear(); ifstream imgsList(vocabularyImgsList); if (imgsList.is_open()) { vector<string> fileNames; string filename; while (getline(imgsList, filename)) { fileNames.push_back(filename); } int numberOfFiles = fileNames.size(); cout << " -> Building vocabulary with " << numberOfFiles << " images..." << endl; PerformanceTimer performanceTimer; performanceTimer.start(); int descriptorsOriginalMatrixType = CV_32FC1; //#pragma omp parallel for schedule(dynamic) for (int i = 0; i < numberOfFiles; ++i) { Mat imagePreprocessed; string imageFilename = IMGS_DIRECTORY + fileNames[i] + IMAGE_TOKEN; if (_imagePreprocessor->loadAndPreprocessImage(imageFilename, imagePreprocessed, CV_LOAD_IMAGE_GRAYSCALE, false)) { Mat outputImage; if (outputAnalyzedImages) { outputImage = imagePreprocessed.clone(); } if (useOnlyTargetRegions) { vector<Mat> masks; ImageUtils::retriveTargetsMasks(IMGS_DIRECTORY + fileNames[i], masks); for (size_t maskIndex = 0; maskIndex < masks.size(); ++maskIndex) { vector<KeyPoint> keypoints; Mat targetMask = masks[maskIndex]; _featureDetector->detect(imagePreprocessed, keypoints, targetMask); //_featureDetector->detect(imagePreprocessed, keypoints, masks[maskIndex]); if (keypoints.size() > 3) { Mat descriptors; _descriptorExtractor->compute(imagePreprocessed, keypoints, descriptors); descriptorsOriginalMatrixType = descriptors.type(); descriptors.convertTo(descriptors, CV_32FC1); if (descriptors.rows > 0) { //#pragma omp critical _bowTrainer->add(descriptors); } if (outputAnalyzedImages) { cv::drawKeypoints(outputImage, keypoints, outputImage); } } } } else { vector<KeyPoint> keypoints; _featureDetector->detect(imagePreprocessed, keypoints); if (keypoints.size() > 3) { Mat descriptors; _descriptorExtractor->compute(imagePreprocessed, keypoints, descriptors); descriptorsOriginalMatrixType = descriptors.type(); descriptors.convertTo(descriptors, CV_32FC1); if (descriptors.rows > 0) { //#pragma omp critical _bowTrainer->add(descriptors); } if (outputAnalyzedImages) { cv::drawKeypoints(outputImage, keypoints, outputImage); } } } if (outputAnalyzedImages) { stringstream imageOutputFilename; imageOutputFilename << VOCABULARY_BUILD_OUTPUT_DIRECTORY << fileNames[i] << FILENAME_SEPARATOR << _vocabularyFilename << IMAGE_OUTPUT_EXTENSION; imwrite(imageOutputFilename.str(), outputImage); } } } vocabularyOut = _bowTrainer->cluster(); saveVocabulary(vocabularyOut); vocabularyOut.convertTo(vocabularyOut, descriptorsOriginalMatrixType); _bowImgDescriptorExtractor->setVocabulary(vocabularyOut); cout << " -> Finished building vocabulary with " << vocabularyOut.rows << " word size in " << performanceTimer.getElapsedTimeFormated() << "\n" << endl; _bowTrainer->clear(); return true; } return false; }
int main() { int b=1,x=0,y=0,c=0,c1=0,a=0,w=0,N=0,L=0; // CvCapture* capture = cvCaptureFromCAM( 0 ); CvCapture* capture = cvCreateFileCapture("rtsp://10.0.0.90:8086"); cvSetCaptureProperty(capture, CV_CAP_PROP_FRAME_WIDTH, 320); cvSetCaptureProperty(capture, CV_CAP_PROP_FRAME_HEIGHT, 240); cvSetCaptureProperty(capture, CV_CAP_PROP_FPS, 10); CvSize size = cvSize( (int)cvGetCaptureProperty( capture, CV_CAP_PROP_FRAME_WIDTH), (int)cvGetCaptureProperty( capture, CV_CAP_PROP_FRAME_HEIGHT) ); CvVideoWriter *writer = cvCreateVideoWriter("out.mjpg",CV_FOURCC('M','J','P','G'),10,size); vector<Point> corners; Point2f p[4],q[4]; overlay = Mat::zeros(319,239,CV_8UC3); storage = cvCreateMemStorage(0); cvClearMemStorage( storage ); cascade = (CvHaarClassifierCascade*)cvLoad( cascade_name, 0, 0, 0 ); pic = cv::imread("N1.png"); // flip(pic,pic,0); // flip(pic,pic,1); while(1) { c=c1=0; img = cvQueryFrame( capture ); if ( !img ) { printf("Connect Camera.....\n"); break; } if ( L == 3 ) { for( y=0; y<10; y++ ) { L=0; img = cvQueryFrame( capture ); // cvShowImage( "Frame", img ); cvWaitKey(33); } } Mat src(img); if(!img)break; face = cvHaarDetectObjects( img, cascade, storage,1.1, 2, CV_HAAR_DO_CANNY_PRUNING,cvSize(40, 40)); for( i = 0; i < (face ? face->total : 0); i++ ) { CvRect* r = (CvRect*)cvGetSeqElem( face, i ); found =10; pt1.x = r->x*scale+50 ; pt2.x = (r->x+r->width)*scale+50; pt1.y = r->y*scale ; pt2.y = (r->y+r->height)*scale ; //cvRectangle( img, pt1, pt2, CV_RGB(0,0,0), 3, 8, 0 ); } /************************************Next************************************/ pt7.x=20;pt7.y=10; cvInitFont( &font, CV_FONT_HERSHEY_COMPLEX, 0.3,0.3 ); cvPutText( img, "Next", pt7, &font, CV_RGB(255,255,0)); cvRectangle( img, cvPoint(0,0),cvPoint(65,20), CV_RGB(0,255,0), 2, 8, 0 ); /************************************prev************************************/ pt7.x=280;pt7.y=14; cvInitFont( &font, CV_FONT_HERSHEY_COMPLEX, 0.4,0.4 ); cvPutText( img, "Prev", pt7, &font, CV_RGB(255,255,0)); cvRectangle( img, cvPoint(270,0),cvPoint(320,20), CV_RGB(0,255,0), 2, 8, 0 ); pt7.x=96;pt7.y=14; cvInitFont( &font, CV_FONT_HERSHEY_COMPLEX, 0.4,0.4 ); cvPutText( img, "Augment Reality", pt7, &font, CV_RGB(255,255,0)); pt3.x=65;pt3.y=20;pt4.x=270;pt4.y=20;//hori cvLine( img, pt3, pt4, CV_RGB(0,255,255), 2, 8, 0 );//bottom-hor Mat hsv; cvtColor(src, hsv, CV_BGR2HSV); Mat bw; inRange(hsv, Scalar(0, 50, 170, 0), Scalar(10, 180, 256, 0), bw);//red vector<vector<Point> > contours; findContours(bw.clone(), contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE); Mat dst = Mat::zeros(src.size(), src.type()); Mat dst1 = Mat::zeros(src.size(), src.type()); drawContours(dst, contours, -1, Scalar::all(255), CV_FILLED); unsigned int v1=0,v2=0,v3=0,v4=0,v5=0,v6=0,v7=0,v8=0; if ( found > 5 ) { for( y = 8; y < 13; y++ ) { for( x = 8; x < 55; x++ ) { a=saturate_cast<uchar>(( dst.at<Vec3b>(y,x)[0] )); if ( (a > 230 ) ) { c =c+1; c1 =0; } } } for( y = 8; y < 13; y++ ) { for( x = 275; x < 317; x++ ) { w=saturate_cast<uchar>(( dst.at<Vec3b>(y,x)[0] )); if ( (w > 230 ) ) { c1 = c1 + 1; c = 0; } } } prev: if( ( c > 7 ) && ( b == 0 ) ) { pic = cv::imread("N2.png"); flip(pic,pic,0); flip(pic,pic,1); b=b+1;c=0;L=3; goto Next; } if( ( c > 7 ) && ( b == 1 ) ) { pic = cv::imread("N3.png"); flip(pic,pic,0); flip(pic,pic,1); if (N==9){N=0;L=3;c=0;goto Next;} b=b+1;c=0;L=3; goto Next; } if( ( c > 7 ) && ( b == 2 ) ) { pic = cv::imread("N4.png"); flip(pic,pic,0); flip(pic,pic,1); if (N==9){N=0;L=3;b=b-1;goto Next;} c=0;L=3; goto Next; } if( ( c1 > 7 ) ) { b=b-1; c1=0; //printf("Prev.............\n"); c=10; N=9; goto prev; } Next: found=0; frame=img; ///printf("argument...........\n"); q[0].x= 0; q[0].y= 0; q[1].x= 150; q[1].y= 0; q[2].x= 150; q[2].y= 180; q[3].x= 0; //center q[3].y= 180; v1=pt1.y,v2=pt1.x,v3=pt2.y,v4=pt1.x,v5=pt2.y,v6=pt2.x,v7=pt1.y,v8=pt2.x; for(i=0;i<corners.size();i++)//find the size of contour { cout << "# of corners points: " << corners.size() << endl ; } p[0].x= v1;// pt1.x; p[0].y= v2;//pt1.y; p[1].x= v3;//pt2.x; p[1].y= v4;//pt1.y; p[2].x= v5;//pt2.x; p[2].y= v6;//pt2.y; p[3].x= v7;//pt1.x; p[3].y= v8;//pt2.y; perspMat = getPerspectiveTransform(q,p); warpPerspective(pic,overlay,perspMat,Size(frame.cols,frame.rows)); merge(frame,overlay,frame); imshow("Frame",frame); } //for ( j=0; j<=100;j++) //{ //cvWriteFrame(writer,img); //} // cvWriteFrame(writer,img); //change 14-11-2014 // cvSaveImage("out.jpg", img); cvWriteFrame(writer, img); cvWaitKey( 33); } cvReleaseCapture( &capture ); //cvReleaseVideoWriter( &writer ); //cvReleaseVideoWriter( &writer ); return 0; }
int main(int argc, char *argv[]) { #ifdef WIN32 //_CrtSetDbgFlag ( _CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF ); // dump leaks at return //_CrtSetBreakAlloc(287); #endif string verboseLevelConsole; int deviceId, kinectId; bool useCamera = false, useKinect = false; bool useFileList = false; bool useImgs = false; bool useDirectory = false; bool useLandmarkFiles = false; vector<path> inputPaths; path inputFilelist; path inputDirectory; vector<path> inputFilenames; path configFilename; shared_ptr<ImageSource> imageSource; path landmarksDir; // TODO: Make more dynamic wrt landmark format. a) What about the loading-flags (1_Per_Folder etc) we have? b) Expose those flags to cmdline? c) Make a LmSourceLoader and he knows about a LM_TYPE (each corresponds to a Parser/Loader class?) string landmarkType; path sdmModelFile; path faceDetectorFilename; bool trackingMode; try { po::options_description desc("Allowed options"); desc.add_options() ("help,h", "produce help message") ("verbose,v", po::value<string>(&verboseLevelConsole)->implicit_value("DEBUG")->default_value("INFO","show messages with INFO loglevel or below."), "specify the verbosity of the console output: PANIC, ERROR, WARN, INFO, DEBUG or TRACE") ("config,c", po::value<path>(&configFilename)->required(), "path to a config (.cfg) file") ("input,i", po::value<vector<path>>(&inputPaths)/*->required()*/, "input from one or more files, a directory, or a .lst/.txt-file containing a list of images") ("device,d", po::value<int>(&deviceId)->implicit_value(0), "A camera device ID for use with the OpenCV camera driver") ("kinect,k", po::value<int>(&kinectId)->implicit_value(0), "Windows only: Use a Kinect as camera. Optionally specify a device ID.") ("model,m", po::value<path>(&sdmModelFile)->required(), "A SDM model file to load.") ("face-detector,f", po::value<path>(&faceDetectorFilename)->required(), "Path to an XML CascadeClassifier from OpenCV.") ("landmarks,l", po::value<path>(&landmarksDir), "load landmark files from the given folder") ("landmark-type,t", po::value<string>(&landmarkType), "specify the type of landmarks to load: ibug") ("tracking-mode,r", po::value<bool>(&trackingMode)->default_value(false)->implicit_value(true), "If on, V&J will be run to initialize the model only and after the model lost tracking. If off, V&J will be run on every frame/image.") ; po::positional_options_description p; p.add("input", -1); po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm); po::notify(vm); if (vm.count("help")) { cout << "Usage: fitter [options]\n"; cout << desc; return EXIT_SUCCESS; } if (vm.count("landmarks")) { useLandmarkFiles = true; if (!vm.count("landmark-type")) { cout << "You have specified to use landmark files. Please also specify the type of the landmarks to load via --landmark-type or -t." << endl; return EXIT_SUCCESS; } } } catch(std::exception& e) { cout << e.what() << endl; return EXIT_FAILURE; } loglevel logLevel; if(boost::iequals(verboseLevelConsole, "PANIC")) logLevel = loglevel::PANIC; else if(boost::iequals(verboseLevelConsole, "ERROR")) logLevel = loglevel::ERROR; else if(boost::iequals(verboseLevelConsole, "WARN")) logLevel = loglevel::WARN; else if(boost::iequals(verboseLevelConsole, "INFO")) logLevel = loglevel::INFO; else if(boost::iequals(verboseLevelConsole, "DEBUG")) logLevel = loglevel::DEBUG; else if(boost::iequals(verboseLevelConsole, "TRACE")) logLevel = loglevel::TRACE; else { cout << "Error: Invalid loglevel." << endl; return EXIT_FAILURE; } Loggers->getLogger("shapemodels").addAppender(make_shared<logging::ConsoleAppender>(logLevel)); Loggers->getLogger("render").addAppender(make_shared<logging::ConsoleAppender>(logLevel)); Loggers->getLogger("sdmTracking").addAppender(make_shared<logging::ConsoleAppender>(logLevel)); Logger appLogger = Loggers->getLogger("sdmTracking"); appLogger.debug("Verbose level for console output: " + logging::loglevelToString(logLevel)); appLogger.debug("Using config: " + configFilename.string()); if (inputPaths.size() > 1) { // We assume the user has given several, valid images useImgs = true; inputFilenames = inputPaths; } else if (inputPaths.size() == 1) { // We assume the user has given either an image, directory, or a .lst-file if (inputPaths[0].extension().string() == ".lst" || inputPaths[0].extension().string() == ".txt") { // check for .lst or .txt first useFileList = true; inputFilelist = inputPaths.front(); } else if (boost::filesystem::is_directory(inputPaths[0])) { // check if it's a directory useDirectory = true; inputDirectory = inputPaths.front(); } else { // it must be an image useImgs = true; inputFilenames = inputPaths; } } else { // todo see HeadTracking.cpp useCamera = true; //appLogger.error("Please either specify one or several files, a directory, or a .lst-file containing a list of images to run the program!"); //return EXIT_FAILURE; } if (useFileList==true) { appLogger.info("Using file-list as input: " + inputFilelist.string()); shared_ptr<ImageSource> fileListImgSrc; // TODO VS2013 change to unique_ptr, rest below also try { fileListImgSrc = make_shared<FileListImageSource>(inputFilelist.string()); } catch(const std::runtime_error& e) { appLogger.error(e.what()); return EXIT_FAILURE; } imageSource = fileListImgSrc; } if (useImgs==true) { //imageSource = make_shared<FileImageSource>(inputFilenames); //imageSource = make_shared<RepeatingFileImageSource>("C:\\Users\\Patrik\\GitHub\\data\\firstrun\\ws_8.png"); appLogger.info("Using input images: "); vector<string> inputFilenamesStrings; // Hack until we use vector<path> (?) for (const auto& fn : inputFilenames) { appLogger.info(fn.string()); inputFilenamesStrings.push_back(fn.string()); } shared_ptr<ImageSource> fileImgSrc; try { fileImgSrc = make_shared<FileImageSource>(inputFilenamesStrings); } catch(const std::runtime_error& e) { appLogger.error(e.what()); return EXIT_FAILURE; } imageSource = fileImgSrc; } if (useDirectory==true) { appLogger.info("Using input images from directory: " + inputDirectory.string()); try { imageSource = make_shared<DirectoryImageSource>(inputDirectory.string()); } catch(const std::runtime_error& e) { appLogger.error(e.what()); return EXIT_FAILURE; } } if (useCamera) { imageSource = make_shared<CameraImageSource>(deviceId); } // Load the ground truth // Either a) use if/else for imageSource or labeledImageSource, or b) use an EmptyLandmarkSoure shared_ptr<LabeledImageSource> labeledImageSource; shared_ptr<NamedLandmarkSource> landmarkSource; if (useLandmarkFiles) { vector<path> groundtruthDirs; groundtruthDirs.push_back(landmarksDir); // Todo: Make cmdline use a vector<path> shared_ptr<LandmarkFormatParser> landmarkFormatParser; if(boost::iequals(landmarkType, "lst")) { //landmarkFormatParser = make_shared<LstLandmarkFormatParser>(); //landmarkSource = make_shared<DefaultNamedLandmarkSource>(LandmarkFileGatherer::gather(imageSource, string(), GatherMethod::SEPARATE_FILES, groundtruthDirs), landmarkFormatParser); } else if(boost::iequals(landmarkType, "ibug")) { landmarkFormatParser = make_shared<IbugLandmarkFormatParser>(); landmarkSource = make_shared<DefaultNamedLandmarkSource>(LandmarkFileGatherer::gather(imageSource, ".pts", GatherMethod::ONE_FILE_PER_IMAGE_SAME_DIR, groundtruthDirs), landmarkFormatParser); } else { cout << "Error: Invalid ground truth type." << endl; return EXIT_FAILURE; } } else { landmarkSource = make_shared<EmptyLandmarkSource>(); } labeledImageSource = make_shared<NamedLabeledImageSource>(imageSource, landmarkSource); ptree pt; try { boost::property_tree::info_parser::read_info(configFilename.string(), pt); } catch(const boost::property_tree::ptree_error& error) { appLogger.error(error.what()); return EXIT_FAILURE; } std::chrono::time_point<std::chrono::system_clock> start, end; Mat img; const string windowName = "win"; vector<imageio::ModelLandmark> landmarks; cv::namedWindow(windowName); SdmLandmarkModel lmModel = SdmLandmarkModel::load(sdmModelFile); SdmLandmarkModelFitting modelFitter(lmModel); // faceDetectorFilename: e.g. opencv\\sources\\data\\haarcascades\\haarcascade_frontalface_alt2.xml cv::CascadeClassifier faceCascade; if (!faceCascade.load(faceDetectorFilename.string())) { cout << "Error loading face detection model." << endl; return EXIT_FAILURE; } bool runRigidAlign = true; std::ofstream resultsFile("C:\\Users\\Patrik\\Documents\\GitHub\\sdm_lfpw_tr_68lm_10s_5c_RESULTS.txt"); vector<string> comparisonLandmarks({ "9", "31", "37", "40", "43", "46", "49", "55", "63", "67" }); while(labeledImageSource->next()) { start = std::chrono::system_clock::now(); appLogger.info("Starting to process " + labeledImageSource->getName().string()); img = labeledImageSource->getImage(); Mat landmarksImage = img.clone(); LandmarkCollection groundtruth = labeledImageSource->getLandmarks(); vector<shared_ptr<Landmark>> lmv = groundtruth.getLandmarks(); for (const auto& l : lmv) { cv::circle(landmarksImage, l->getPoint2D(), 3, Scalar(255.0f, 0.0f, 0.0f)); } // iBug 68 points. No eye-centers. Calculate them: cv::Point2f reye_c = (groundtruth.getLandmark("37")->getPosition2D() + groundtruth.getLandmark("40")->getPosition2D()) / 2.0f; cv::Point2f leye_c = (groundtruth.getLandmark("43")->getPosition2D() + groundtruth.getLandmark("46")->getPosition2D()) / 2.0f; cv::circle(landmarksImage, reye_c, 3, Scalar(255.0f, 0.0f, 127.0f)); cv::circle(landmarksImage, leye_c, 3, Scalar(255.0f, 0.0f, 127.0f)); cv::Scalar interEyeDistance = cv::norm(Vec2f(reye_c), Vec2f(leye_c), cv::NORM_L2); Mat imgGray; cvtColor(img, imgGray, cv::COLOR_BGR2GRAY); vector<cv::Rect> faces; float score, notFace = 0.5; // face detection faceCascade.detectMultiScale(img, faces, 1.2, 2, 0, cv::Size(50, 50)); //faces.push_back({ 172, 199, 278, 278 }); if (faces.empty()) { runRigidAlign = true; cv::imshow(windowName, landmarksImage); cv::waitKey(5); continue; } for (const auto& f : faces) { cv::rectangle(landmarksImage, f, cv::Scalar(0.0f, 0.0f, 255.0f)); } // Check if the face corresponds to the ground-truth: Mat gtLmsRowX(1, lmv.size(), CV_32FC1); Mat gtLmsRowY(1, lmv.size(), CV_32FC1); int idx = 0; for (const auto& l : lmv) { gtLmsRowX.at<float>(idx) = l->getX(); gtLmsRowY.at<float>(idx) = l->getY(); ++idx; } double minWidth, maxWidth, minHeight, maxHeight; cv::minMaxIdx(gtLmsRowX, &minWidth, &maxWidth); cv::minMaxIdx(gtLmsRowY, &minHeight, &maxHeight); float cx = cv::mean(gtLmsRowX)[0]; float cy = cv::mean(gtLmsRowY)[0] - 30.0f; // do this in relation to the IED, not absolute pixel values if (std::abs(cx - (faces[0].x+faces[0].width/2.0f)) > 30.0f || std::abs(cy - (faces[0].y+faces[0].height/2.0f)) > 30.0f) { //cv::imshow(windowName, landmarksImage); //cv::waitKey(); continue; } Mat modelShape = lmModel.getMeanShape(); //if (runRigidAlign) { modelShape = modelFitter.alignRigid(modelShape, faces[0]); //runRigidAlign = false; //} // print the mean initialization for (int i = 0; i < lmModel.getNumLandmarks(); ++i) { cv::circle(landmarksImage, Point2f(modelShape.at<float>(i, 0), modelShape.at<float>(i + lmModel.getNumLandmarks(), 0)), 3, Scalar(255.0f, 0.0f, 255.0f)); } modelShape = modelFitter.optimize(modelShape, imgGray); for (int i = 0; i < lmModel.getNumLandmarks(); ++i) { cv::circle(landmarksImage, Point2f(modelShape.at<float>(i, 0), modelShape.at<float>(i + lmModel.getNumLandmarks(), 0)), 3, Scalar(0.0f, 255.0f, 0.0f)); } imwrite("C:\\Users\\Patrik\\Documents\\GitHub\\out_sdm_lms\\" + labeledImageSource->getName().filename().string(), landmarksImage); resultsFile << "# " << labeledImageSource->getName() << std::endl; for (const auto& lmId : comparisonLandmarks) { shared_ptr<Landmark> gtlm = groundtruth.getLandmark(lmId); // Todo: Handle case when LM not found cv::Point2f gt = gtlm->getPoint2D(); cv::Point2f det = lmModel.getLandmarkAsPoint(lmId, modelShape); float dx = (gt.x - det.x); float dy = (gt.y - det.y); float diff = std::sqrt(dx*dx + dy*dy); diff = diff / interEyeDistance[0]; // normalize by the IED resultsFile << diff << " # " << lmId << std::endl; } end = std::chrono::system_clock::now(); int elapsed_mseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count(); appLogger.info("Finished processing. Elapsed time: " + lexical_cast<string>(elapsed_mseconds) + "ms.\n"); //cv::imshow(windowName, landmarksImage); //cv::waitKey(5); } resultsFile.close(); return 0; }
//! sets the img as background for grid display void GridDisplay::setImage(Mat img) { this->img = img.clone(); this->setImageSize(this->img.size()); }
/** * Grow the edges along with directon of gradient */ Mat RobustTextDetection::growEdges(Mat& image, Mat& edges ) { CV_Assert( edges.type() == CV_8UC1 ); Mat grad_x, grad_y; Sobel( image, grad_x, CV_32FC1, 1, 0 ); Sobel( image, grad_y, CV_32FC1, 0, 1 ); Mat grad_mag, grad_dir; cartToPolar( grad_x, grad_y, grad_mag, grad_dir, true ); /* Convert the angle into predefined 3x3 neighbor locations | 2 | 3 | 4 | | 1 | 0 | 5 | | 8 | 7 | 6 | */ for( int y = 0; y < grad_dir.rows; y++ ) { float * grad_ptr = grad_dir.ptr<float>(y); for( int x = 0; x < grad_dir.cols; x++ ) { if( grad_ptr[x] != 0 ) grad_ptr[x] = toBin( grad_ptr[x] ); } } grad_dir.convertTo( grad_dir, CV_8UC1 ); /* Perform region growing based on the gradient direction */ Mat result = edges.clone(); uchar * prev_ptr = result.ptr<uchar>(0); uchar * curr_ptr = result.ptr<uchar>(1); for( int y = 1; y < edges.rows - 1; y++ ) { uchar * edge_ptr = edges.ptr<uchar>(y); uchar * grad_ptr = grad_dir.ptr<uchar>(y); uchar * next_ptr = result.ptr<uchar>(y + 1); for( int x = 1; x < edges.cols - 1; x++ ) { /* Only consider the contours */ if( edge_ptr[x] != 0 ) { /* .. there should be a better way .... */ switch( grad_ptr[x] ) { case 1: curr_ptr[x-1] = 255; break; case 2: prev_ptr[x-1] = 255; break; case 3: prev_ptr[x ] = 255; break; case 4: prev_ptr[x+1] = 255; break; case 5: curr_ptr[x ] = 255; break; case 6: next_ptr[x+1] = 255; break; case 7: next_ptr[x ] = 255; break; case 8: next_ptr[x-1] = 255; break; default: break; } } } prev_ptr = curr_ptr; curr_ptr = next_ptr; } return result; }
int main(int argc, char *argv[]) { #ifdef WIN32 //_CrtSetDbgFlag ( _CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF ); // dump leaks at return //_CrtSetBreakAlloc(287); #endif string verboseLevelConsole; path outputFilename; path configFilename; try { po::options_description desc("Allowed options"); desc.add_options() ("help,h", "produce help message") ("verbose,v", po::value<string>(&verboseLevelConsole)->implicit_value("DEBUG")->default_value("INFO","show messages with INFO loglevel or below."), "specify the verbosity of the console output: PANIC, ERROR, WARN, INFO, DEBUG or TRACE") ("config,c", po::value<path>(&configFilename)->required(), "input config") ("output,o", po::value<path>(&outputFilename)->required(), "output filename") ; po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(desc).run(), vm); po::notify(vm); if (vm.count("help")) { cout << "Usage: sdmTraining [options]\n"; cout << desc; return EXIT_SUCCESS; } } catch(std::exception& e) { cout << e.what() << endl; return EXIT_FAILURE; } LogLevel logLevel; if(boost::iequals(verboseLevelConsole, "PANIC")) logLevel = LogLevel::Panic; else if(boost::iequals(verboseLevelConsole, "ERROR")) logLevel = LogLevel::Error; else if(boost::iequals(verboseLevelConsole, "WARN")) logLevel = LogLevel::Warn; else if(boost::iequals(verboseLevelConsole, "INFO")) logLevel = LogLevel::Info; else if(boost::iequals(verboseLevelConsole, "DEBUG")) logLevel = LogLevel::Debug; else if(boost::iequals(verboseLevelConsole, "TRACE")) logLevel = LogLevel::Trace; else { cout << "Error: Invalid LogLevel." << endl; return EXIT_FAILURE; } Loggers->getLogger("superviseddescentmodel").addAppender(make_shared<logging::ConsoleAppender>(logLevel)); Loggers->getLogger("sdmTraining").addAppender(make_shared<logging::ConsoleAppender>(logLevel)); Logger appLogger = Loggers->getLogger("sdmTraining"); appLogger.debug("Verbose level for console output: " + logging::logLevelToString(logLevel)); struct Dataset { string databaseName; path images; path groundtruth; string landmarkType; map<string, string> landmarkMappings; // from model (lhs) to thisDb (rhs) shared_ptr<LabeledImageSource> labeledImageSource; }; string modelLandmarkType; std::vector<string> modelLandmarks; vector<Dataset> trainingDatasets; int numSamplesPerImage; // How many Monte Carlo samples to generate per training image, in addition to the original image. Default: 10 int numCascadeSteps; // How many cascade steps to learn? (i.e. how many regressors) vector<string> descriptorTypes; vector<shared_ptr<DescriptorExtractor>> descriptorExtractors; LandmarkBasedSupervisedDescentTraining::Regularisation regularisation; // Read the stuff from the config: ptree pt; try { read_info(configFilename.string(), pt); } catch (const boost::property_tree::ptree_error& error) { appLogger.error(string("Error reading the config file: ") + error.what()); return EXIT_FAILURE; } try { // Get stuff from the parameters subtree ptree ptParameters = pt.get_child("parameters"); numSamplesPerImage = ptParameters.get<int>("numSamplesPerImage", 10); numCascadeSteps = ptParameters.get<int>("numCascadeSteps", 5); regularisation.factor = ptParameters.get<float>("regularisationFactor", 0.5f); regularisation.regulariseAffineComponent = ptParameters.get<bool>("regulariseAffineComponent", false); regularisation.regulariseWithEigenvalueThreshold = ptParameters.get<bool>("regulariseWithEigenvalueThreshold", false); // Read the 'featureDescriptors' sub-tree: ptree ptFeatureDescriptors = ptParameters.get_child("featureDescriptors"); for (const auto& kv : ptFeatureDescriptors) { string descriptorType = kv.second.get<string>("descriptorType"); string descriptorPostprocessing = kv.second.get<string>("descriptorPostprocessing", "none"); string descriptorParameters = kv.second.get<string>("descriptorParameters", ""); if (descriptorType == "OpenCVSift") { // Todo: make a load method in each descriptor shared_ptr<DescriptorExtractor> sift = std::make_shared<SiftDescriptorExtractor>(); descriptorExtractors.push_back(sift); } else if (descriptorType == "vlhog-dt") { vector<string> params; boost::split(params, descriptorParameters, boost::is_any_of(" ")); if (params.size() != 6) { throw std::logic_error("descriptorParameters must contain numCells, cellSize and numBins."); } int numCells = boost::lexical_cast<int>(params[1]); int cellSize = boost::lexical_cast<int>(params[3]); int numBins = boost::lexical_cast<int>(params[5]); shared_ptr<DescriptorExtractor> vlhogDt = std::make_shared<VlHogDescriptorExtractor>(VlHogDescriptorExtractor::VlHogType::DalalTriggs, numCells, cellSize, numBins); descriptorExtractors.push_back(vlhogDt); } else if (descriptorType == "vlhog-uoctti") { vector<string> params; boost::split(params, descriptorParameters, boost::is_any_of(" ")); if (params.size() != 6) { throw std::logic_error("descriptorParameters must contain numCells, cellSize and numBins."); } int numCells = boost::lexical_cast<int>(params[1]); int cellSize = boost::lexical_cast<int>(params[3]); int numBins = boost::lexical_cast<int>(params[5]); shared_ptr<DescriptorExtractor> vlhogUoctti = std::make_shared<VlHogDescriptorExtractor>(VlHogDescriptorExtractor::VlHogType::Uoctti, numCells, cellSize, numBins); descriptorExtractors.push_back(vlhogUoctti); } else { throw std::logic_error("descriptorType does not match 'OpenCVSift', 'vlhog-dt' or 'vlhog-uoctti'."); } descriptorTypes.push_back(descriptorType); } // Get stuff from the modelLandmarks subtree: ptree ptModelLandmarks = pt.get_child("modelLandmarks"); modelLandmarkType = ptModelLandmarks.get<string>("landmarkType"); appLogger.debug("Type of the model landmarks: " + modelLandmarkType); string modelLandmarksUsage = ptModelLandmarks.get<string>("landmarks"); if (modelLandmarksUsage.empty()) { // value is empty, meaning it's a node and the user should specify a list of 'landmarks' ptree ptModelLandmarksList = ptModelLandmarks.get_child("landmarks"); for (const auto& kv : ptModelLandmarksList) { modelLandmarks.push_back(kv.first); } appLogger.debug("Loaded a list of " + lexical_cast<string>(modelLandmarks.size()) + " landmarks to train the model."); } else if (modelLandmarksUsage == "all") { throw std::logic_error("Using 'all' modelLandmarks is not implemented yet - specify a list for now."); } else { throw std::logic_error("Error reading the models 'landmarks' key, should either provide a node with a list of landmarks or specify 'all'."); } // Get stuff from the trainingData subtree: ptree ptTrainingData = pt.get_child("trainingData"); for (const auto& kv : ptTrainingData) { // For each database: appLogger.debug("Using database '" + kv.first + "' for training:"); Dataset dataset; dataset.databaseName = kv.first; dataset.images = kv.second.get<path>("images"); dataset.groundtruth = kv.second.get<path>("groundtruth"); dataset.landmarkType = kv.second.get<string>("landmarkType"); string landmarkMappingsUsage = kv.second.get<string>("landmarkMappings"); if (landmarkMappingsUsage.empty()) { // value is empty, meaning it's a node and the user should specify a list of landmarkMappings ptree ptLandmarkMappings = kv.second.get_child("landmarkMappings"); for (const auto& mapping : ptLandmarkMappings) { dataset.landmarkMappings.insert(make_pair(mapping.first, mapping.second.get_value<string>())); } appLogger.debug("Loaded a list of " + lexical_cast<string>(dataset.landmarkMappings.size()) + " landmark mappings."); if (dataset.landmarkMappings.size() < modelLandmarks.size()) { throw std::logic_error("Error reading the landmark mappings, there are less mappings given than the number of landmarks that should be used to train the model."); } } else if (landmarkMappingsUsage == "none") { // generate identity mappings for (const auto& lm : modelLandmarks) { dataset.landmarkMappings.insert(make_pair(lm, lm)); } appLogger.debug("Generated a list of " + lexical_cast<string>(dataset.landmarkMappings.size()) + " identity landmark mappings."); } else { throw std::logic_error("Error reading the landmark mappings, should either provide list of mappings or specify 'none'."); } trainingDatasets.push_back(dataset); } } catch (const boost::property_tree::ptree_error& error) { appLogger.error("Parsing config: " + string(error.what())); return EXIT_FAILURE; } catch (const std::logic_error& e) { appLogger.error("Parsing config: " + string(e.what())); return EXIT_FAILURE; } // Read in the image sources: for (auto& d : trainingDatasets) { // Load the images: shared_ptr<ImageSource> imageSource; // We assume the user has given either a directory or a .lst/.txt-file if (d.images.extension().string() == ".lst" || d.images.extension().string() == ".txt") { // check for .lst or .txt first appLogger.info("Using file-list as input: " + d.images.string()); shared_ptr<ImageSource> fileListImgSrc; // TODO VS2013 change to unique_ptr, rest below also try { fileListImgSrc = make_shared<FileListImageSource>(d.images.string()); } catch (const std::runtime_error& e) { appLogger.error(e.what()); return EXIT_FAILURE; } imageSource = fileListImgSrc; } else if (boost::filesystem::is_directory(d.images)) { appLogger.info("Using input images from directory: " + d.images.string()); try { imageSource = make_shared<DirectoryImageSource>(d.images.string()); } catch (const std::runtime_error& e) { appLogger.error(e.what()); return EXIT_FAILURE; } } else { appLogger.error("The path given is neither a directory nor a .lst/.txt-file containing a list of images: " + d.images.string()); return EXIT_FAILURE; } // Load the ground truth shared_ptr<NamedLandmarkSource> landmarkSource; vector<path> groundtruthDirs = { d.groundtruth }; shared_ptr<LandmarkFormatParser> landmarkFormatParser; if (boost::iequals(d.landmarkType, "ibug")) { landmarkFormatParser = make_shared<IbugLandmarkFormatParser>(); landmarkSource = make_shared<DefaultNamedLandmarkSource>(LandmarkFileGatherer::gather(imageSource, ".pts", GatherMethod::ONE_FILE_PER_IMAGE_SAME_DIR, groundtruthDirs), landmarkFormatParser); } else { cout << "Error: Invalid ground truth type." << endl; return EXIT_FAILURE; } d.labeledImageSource = make_shared<NamedLabeledImageSource>(imageSource, landmarkSource); } std::chrono::time_point<std::chrono::system_clock> start, end; string faceDetectionModel("C:\\opencv\\opencv_2.4.8_prebuilt\\sources\\data\\haarcascades\\haarcascade_frontalface_alt2.xml"); // sgd: "../models/haarcascade_frontalface_alt2.xml" cv::CascadeClassifier faceCascade; if (!faceCascade.load(faceDetectionModel)) { cout << "Error loading face detection model." << endl; return EXIT_FAILURE; } // App-config: FilterByFaceDetection filterByFaceDetection = FilterByFaceDetection::VIOLAJONES; // Add a switch "use_GT_as_detectionResults" ? // Our data: using cv::Rect; vector<Mat> trainingImages; vector<Mat> trainingGroundtruthLandmarks; vector<Rect> trainingFaceboxes; start = std::chrono::system_clock::now(); appLogger.info("Reading data, doing V&J if enabled..."); for (const auto& d : trainingDatasets) { if (filterByFaceDetection == FilterByFaceDetection::NONE) { // 1. Use all the images for training while (d.labeledImageSource->next()) { Mat landmarks(1, 2 * modelLandmarks.size(), CV_32FC1); int currentLandmark = 0; for (const auto ml : modelLandmarks) { try { string lmIdInDb = d.landmarkMappings.at(ml); shared_ptr<Landmark> lm = d.labeledImageSource->getLandmarks().getLandmark(lmIdInDb); landmarks.at<float>(0, currentLandmark) = lm->getX(); landmarks.at<float>(0, currentLandmark + modelLandmarks.size()) = lm->getY(); } catch (std::out_of_range& e) { appLogger.error(e.what()); // mapping failed return EXIT_FAILURE; } catch (std::invalid_argument& e) { appLogger.error(e.what()); // lm not in db return EXIT_FAILURE; } ++currentLandmark; } Mat img = d.labeledImageSource->getImage(); trainingImages.push_back(img); trainingGroundtruthLandmarks.push_back(landmarks); } } else if (filterByFaceDetection == FilterByFaceDetection::VIOLAJONES) { // 1. First, check on which faces the face-detection succeeds. Then only use these images for training. // "succeeds" means all ground-truth landmarks are inside the face-box. This is reasonable for a face- // detector like OCV V&J which has a big face-box, but for others, another method is necessary. while (d.labeledImageSource->next()) { Mat img = d.labeledImageSource->getImage(); vector<cv::Rect> detectedFaces; faceCascade.detectMultiScale(img, detectedFaces, 1.2, 2, 0, cv::Size(50, 50)); if (detectedFaces.empty()) { continue; } Mat output = img.clone(); for (const auto& f : detectedFaces) { cv::rectangle(output, f, cv::Scalar(0.0f, 0.0f, 255.0f)); } // check if the detected face is a valid one: // i.e. for now, if the ground-truth landmarks 37 (reye_oc), 46 (leye_oc) and 58 (mouth_ll_c) are inside the face-box // (should add: _and_ the face-box is not bigger than IED*2 or something) vector<shared_ptr<Landmark>> allLandmarks = d.labeledImageSource->getLandmarks().getLandmarks(); bool skipImage = false; for (const auto& lm : allLandmarks) { if (lm->getName() == "37" || lm->getName() == "46" || lm->getName() == "58") { if (!detectedFaces[0].contains(lm->getPoint2D())) { skipImage = true; break; // if any LM is not inside, skip this training image // Note: improvement: if the first face-box doesn't work, try the other ones } } } if (skipImage) { continue; } // We're using the image: Mat landmarks(1, 2 * modelLandmarks.size(), CV_32FC1); int currentLandmark = 0; for (const auto ml : modelLandmarks) { try { string lmIdInDb = d.landmarkMappings.at(ml); shared_ptr<Landmark> lm = d.labeledImageSource->getLandmarks().getLandmark(lmIdInDb); landmarks.at<float>(0, currentLandmark) = lm->getX(); landmarks.at<float>(0, currentLandmark + modelLandmarks.size()) = lm->getY(); } catch (std::out_of_range& e) { appLogger.error(e.what()); // mapping failed return EXIT_FAILURE; } catch (std::invalid_argument& e) { appLogger.error(e.what()); // lm not in db return EXIT_FAILURE; } ++currentLandmark; } trainingImages.push_back(img); trainingGroundtruthLandmarks.push_back(landmarks); trainingFaceboxes.push_back(detectedFaces[0]); } } } end = std::chrono::system_clock::now(); int elapsed_mseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count(); appLogger.debug("Finished after " + lexical_cast<string>(elapsed_mseconds)+"ms."); LandmarkBasedSupervisedDescentTraining tr; tr.setNumSamplesPerImage(numSamplesPerImage); tr.setNumCascadeSteps(numCascadeSteps); tr.setRegularisation(regularisation); tr.setAlignGroundtruth(LandmarkBasedSupervisedDescentTraining::AlignGroundtruth::NONE); // TODO Read from config! tr.setMeanNormalization(LandmarkBasedSupervisedDescentTraining::MeanNormalization::UNIT_SUM_SQUARED_NORMS); // TODO Read from config! SdmLandmarkModel model = tr.train(trainingImages, trainingGroundtruthLandmarks, trainingFaceboxes, modelLandmarks, descriptorTypes, descriptorExtractors); std::time_t currentTime_t = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); string currentTime = std::ctime(¤tTime_t); model.save(outputFilename, "Trained on " + currentTime.substr(0, currentTime.length() - 1)); appLogger.info("Finished training. Saved model to " + outputFilename.string() + "."); return 0; }