// Resize the input image and then re-compute Sobel image etc void DetectionScanner::ResizeImage() { image.Resize(sobel,ratio); image.Swap(sobel); image.Sobel(sobel,false,false); ComputeCT(sobel,ct); }
void PinholePointProjector::project(IntImage &indexImage, DepthImage &depthImage, const PointVector &points) const { assert(_imageRows && _imageCols && "PinholePointProjector: _imageRows and _imageCols are zero"); indexImage.create(_imageRows, _imageCols); depthImage.create(_imageRows, _imageCols); depthImage.setTo(cv::Scalar(std::numeric_limits<float>::max())); indexImage.setTo(cv::Scalar(-1)); float *drowPtrs[_imageRows]; int *irowPtrs[_imageRows]; for(int i = 0; i < _imageRows; i++) { drowPtrs[i] = &depthImage(i, 0); irowPtrs[i] = &indexImage(i, 0); } const Point *point = &points[0]; for(size_t i = 0; i < points.size(); i++, point++) { int x, y; float d; if(!_project(x, y, d, *point) || d < _minDistance || d > _maxDistance || x < 0 || x >= indexImage.cols || y < 0 || y >= indexImage.rows) continue; float &otherDistance = drowPtrs[y][x]; int &otherIndex = irowPtrs[y][x]; if(!otherDistance || otherDistance > d) { otherDistance = d; otherIndex = i; } } }
// combine the (xdiv-1)*(ydiv-1) integral images into a single one void DetectionScanner::InitIntegralImages(const int stepsize) { if(cascade->nodes[0]->type!=NodeDetector::LINEAR) return; // No need to prepare integral images const int hd = height/xdiv*2-2; const int wd = width/ydiv*2-2; scores.Create(ct.nrow,ct.ncol); scores.Zero(cascade->nodes[0]->thresh/hd/wd); double* linearweights = cascade->nodes[0]->classifier.buf; for(int i=0; i<xdiv-EXT; i++) { const int xoffset = height/xdiv*i; for(int j=0; j<ydiv-EXT; j++) { const int yoffset = width/ydiv*j; for(int x=2; x<ct.nrow-2-xoffset; x++) { int* ctp = ct.p[x+xoffset]+yoffset; double* tempp = scores.p[x]; for(int y=2; y<ct.ncol-2-yoffset; y++) tempp[y] += linearweights[ctp[y]]; } linearweights += baseflength; } } scores.CalcIntegralImageInPlace(); for(int i=2; i<ct.nrow-2-height; i+=stepsize) { double* p1 = scores.p[i]; double* p2 = scores.p[i+hd]; for(int j=2; j<ct.ncol-2-width; j+=stepsize) p1[j] += (p2[j+wd] - p2[j] - p1[j+wd]); } }
void CascadeClassifier::ApplyOriginalSizeForInputBoosting(const CString filename,int& pointer) const { IntImage procface; IntImage image,square; REAL sq,ex,value; int result; CRect rect; REAL ratio; procface.Load(filename); if(procface.height <=0 || procface.width<=0) return; ratio = 1.0; REAL paddedsize = REAL(1)/REAL((sx+1)*(sy+1)); while((procface.height>sx+1) && (procface.width>sy+1)) { procface.CalcSquareAndIntegral(square,image); for(int i=0,size_x=image.height-sx; i<size_x; i+=bootstrap_increment[bootstrap_level]) for(int j=0,size_y=image.width-sy; j<size_y; j+=bootstrap_increment[bootstrap_level]) { ex = image.data[i+sx][j+sy]+image.data[i][j]-image.data[i+sx][j]-image.data[i][j+sy]; if(ex<mean_min || ex>mean_max) continue; sq = square.data[i+sx][j+sy]+square.data[i][j]-square.data[i+sx][j]-square.data[i][j+sy]; if(sq<sq_min) continue; ex *= paddedsize; ex = ex * ex; sq *= paddedsize; sq = sq - ex; ASSERT(sq>=0); if(sq>0) sq = sqrt(sq); else sq = 1.0; if(sq<var_min) continue; result = 1; for(int k=0; k<count; k++) { value = 0.0; for(int t=0,size=ac[k].count; t<size; t++) value += (ac[k].alphas[t]*ac[k].scs[t].Apply(ac[k].scs[t].GetOneFeatureTranslation(image.data+i,j)/sq)); if(value<ac[k].thresh) { result = 0; break; } } if(result==1) { for(int k=1; k<=sx; k++) for(int t=1; t<=sy; t++) trainset[pointer].data[k][t]=image.data[i+k][j+t]-image.data[i+k][j]-image.data[i][j+t]+image.data[i][j]; pointer++; if(pointer==totalcount) return; } } ratio = ratio * bootstrap_resizeratio[bootstrap_level]; procface.Resize(image,bootstrap_resizeratio[bootstrap_level]); SwapIntImage(procface,image); } }
ActionInstance* loadTemplateByName(std::string actionName, int actionType, int id, int count) { std::cout << "loading " << actionName << " " << id << std::endl; //std::string totalPrefix = "data/" + actionName; //std::string totalPrefix = "/Users/admin/GITHUB/MPC/our_templates/" + actionName; //std::string totalPrefix = "/home/ubuntu/project/MPC/temp_subs/ActionRecDemoV3/data/" + actionName; // std::string totalPrefix = "/Users/priyankakulkarni/Documents/Project/MPC/ActionRecDemoV3/palm_fist_templates/" + actionName; std::string totalPrefix = "/Users/priyankakulkarni/Documents/Project/MPC/ActionRecDemoV3/breakout_templates_resized/" + actionName; //std::string totalPrefix = "/home/ubuntu/project/MPC/palm_fist_templates/" + actionName; ActionInstance* ret = new ActionInstance; ret->tData = new std::vector<FloatImage*>; ret->id = id; ret->actionType = actionType; ret->tmass = 0.0f; ret->dist = 0.0f; for(int i = 0; i < count; ++i) { int curNum = 10000 + (100 * id) + i; //std::string fname = totalPrefix + "_" + IntToString(curNum) + ".PNG"; std::string fname = totalPrefix + "_" + IntToString(curNum) + ".png"; std::cout << "Filename: " << fname << std::endl; IntImage* tempII = new IntImage(cvLoadImage(fname.c_str())); int* c0 = tempII->getChannel(0); FloatImage* tempF = new FloatImage(tempII->width(), tempII->height()); for(int p = 0; p < tempF->width * tempF->height; ++p) { if(c0[p] > 127) { tempF->data[p] = -1.0f; ret->tmass += 1.0f; } else tempF->data[p] = 1.0f; } ret->tData->push_back(tempF); delete tempII; } std::cout << "Template had mass of " << ret->tmass << std::endl; return ret; }
void IntImage::Resize(IntImage &result, REAL ratio) const { result.SetSize(CSize(int(m_iHeight*ratio),int(m_iWidth*ratio))); ratio = 1/ratio; for(int i=0,rh=result.m_iHeight,rw=result.m_iWidth;i<rh;i++) for(int j=0;j<rw;j++) { int x0,y0; REAL x,y,fx0,fx1; x = j*ratio; y = i*ratio; x0 = (int)(x); y0 = (int)(y); //by Jianxin Wu //1. The conversion of float to int in C is towards to 0 point, i.e. the floor function for positive numbers, and ceiling function for negative numbers. //2. We only make use of ratio<1 in this applicaiton, and all numbers involved are positive. //Using these, we have 0<=x<=height-1 and 0<=y<=width-1. Thus, boundary conditions check is not necessary. //In languages other than C/C++ or ratio>=1, take care. if (x0 == m_iWidth-1) x0--; if (y0 == m_iHeight-1) y0--; x = x - x0; y = y - y0; fx0 = m_Data[y0][x0] + x*(m_Data[y0][x0+1]-m_Data[y0][x0]); fx1 = m_Data[y0+1][x0] + x*(m_Data[y0+1][x0+1]-m_Data[y0+1][x0]); result.m_Data[i][j] = fx0 + y*(fx1-fx0); } }
void CylindricalPointProjector::unProject(PointVector &points, Gaussian3fVector &gaussians, IntImage &indexImage, const DepthImage &depthImage) const { assert(depthImage.rows > 0 && depthImage.cols > 0 && "CylindricalPointProjector: Depth image has zero dimensions"); points.resize(depthImage.rows * depthImage.cols); gaussians.resize(depthImage.rows * depthImage.cols); indexImage.create(depthImage.rows, depthImage.cols); int count = 0; Point *point = &points[0]; Gaussian3f *gaussian = &gaussians[0]; for(int r = 0; r < depthImage.rows; r++) { const float *f = &depthImage(r, 0); int *i = &indexImage(r, 0); for(int c = 0; c < depthImage.cols; c++, f++, i++) { if(!_unProject(*point, c, r, *f)) { *i = -1; continue; } Eigen::Matrix3f cov = Eigen::Matrix3f::Identity(); *gaussian = Gaussian3f(point->head<3>(), cov); gaussian++; point++; *i = count; count++; } } points.resize(count); gaussians.resize(count); }
void SphericalPointProjector::unProject(PointVector &points, IntImage &indexImage, const DepthImage &depthImage) const { if(_imageRows == 0 || _imageCols == 0) { throw "SphericalPointProjector: Depth image has zero dimensions"; } points.resize(depthImage.rows * depthImage.cols); int count = 0; indexImage.create(depthImage.rows, depthImage.cols); Point *point = &points[0]; for(int r = 0; r < depthImage.rows; r++) { const float *f = &depthImage(r, 0); int *i = &indexImage(r, 0); for(int c = 0; c < depthImage.cols; c++, f++, i++) { if(!_unProject(*point, c, r, *f)) { *i = -1; continue; } point++; *i = count; count++; } } points.resize(count); }
void ReadOneTrainingSample(ifstream& is,IntImage& image) { int i,j; char buf[256]; ASSERT(sx<=256 && sy<=256); is>>image.label; is.ignore(256,'\n'); ASSERT( (image.label == 0) || (image.label == 1) ); is>>image.height>>image.width; is.ignore(256,'\n'); ASSERT(image.height==sx); ASSERT(image.width==sy); image.SetSize(CSize(image.height+1,image.width+1)); for(i=0;i<image.height;i++) image.data[i][0] = 0; for(i=0;i<image.width;i++) image.data[0][i] = 0; for(i=1;i<image.height;i++) { is.read(buf,image.width-1); for(j=1;j<image.width;j++) { image.data[i][j] = REAL(int(unsigned char(buf[j-1]))); ASSERT(image.data[i][j]>=0 && image.data[i][j] <= 255); } } is.ignore(256,'\n'); }
IntImage* renderTemplateFrame(std::vector<FloatImage*>* tData, int f) { int twidth = (*tData)[0]->width; int theight = (*tData)[0]->height; IntImage* ret = new IntImage(twidth, theight, 3); ret->fill(0); // first, figure out the maximum value float maxval = 0.0f; float* curfdata = ((*tData)[f])->data; for(int p = 0; p < twidth * theight; ++p) if(fabs(curfdata[p]) > maxval) maxval = abs(curfdata[p]); std::cout << "maxval: " << maxval << std::endl; // now, figure out the multiplier float mult = 255.0f / maxval; int* rchan = ret->getChannel(0); int* gchan = ret->getChannel(1); int* bchan = ret->getChannel(2); // now multiply away and stuff for(int p = 0; p < twidth * theight; ++p) { int sval = (int)(curfdata[p] * mult); if(sval > 0) // put into green channel { gchan[p] = sval; } else // sval <= 0 // put into red channel { rchan[p] = -sval; } bchan[p] = 0; } return ret; }
void IntImage::CalcSquareAndIntegral(IntImage& square, IntImage& image) const { REAL partialsum,partialsum2; square.SetSize(CSize(m_iHeight+1,m_iWidth+1)); image.SetSize(CSize(m_iHeight+1,m_iWidth+1)); for(int i=0;i<=m_iWidth+1;i++) square.m_Buf[i]=image.m_Buf[i]=0; for(int i=1;i<=m_iHeight;i++) { partialsum = partialsum2 = 0; square.m_Data[i][0] = 0; image.m_Data[i][0] = 0; for(int j=1;j<=m_iWidth;j++) { partialsum += (m_Data[i-1][j-1]*m_Data[i-1][j-1]); partialsum2 += m_Data[i-1][j-1]; square.m_Data[i][j] = square.m_Data[i-1][j] + partialsum; image.m_Data[i][j] = image.m_Data[i-1][j] + partialsum2; } } }
void IntImage::CalcSquareAndIntegral(IntImage& square, IntImage& image) const { REAL partialsum,partialsum2; square.SetSize(MSize(height+1,width+1)); image.SetSize(MSize(height+1,width+1)); for(int i=0; i<=width+1; i++) square.buf[i]=image.buf[i]=0; for(int i=1; i<=height; i++) { partialsum = partialsum2 = 0; square.data[i][0] = 0; image.data[i][0] = 0; for(int j=1; j<=width; j++) { partialsum += (data[i-1][j-1]*data[i-1][j-1]); partialsum2 += data[i-1][j-1]; square.data[i][j] = square.data[i-1][j] + partialsum; image.data[i][j] = image.data[i-1][j] + partialsum2; } } }
void PinholePointProjector::projectIntervals(IntImage &intervalImage, const DepthImage &depthImage, const float worldRadius) const { assert(depthImage.rows > 0 && depthImage.cols > 0 && "PinholePointProjector: Depth image has zero dimensions"); intervalImage.create(depthImage.rows, depthImage.cols); for(int r = 0; r < depthImage.rows; r++) { const float *f = &depthImage(r, 0); int *i = &intervalImage(r, 0); for(int c = 0; c < depthImage.cols; c++, f++, i++) { *i = _projectInterval(r, c, *f, worldRadius); } } }
void IntImage<T>::Sobel(IntImage<REAL>& result,const bool useSqrt,const bool normalize) { // compute the Sobel gradient. For now, we just use the very inefficient way. Optimization can be done later // if useSqrt = true, we compute the real Sobel gradient; otherwise, the square of it // if normalize = true, the numbers are normalized to be in 0..255 result.Create(nrow,ncol); for(int i=0; i<nrow; i++) result.p[i][0] = result.p[i][ncol-1] = 0; std::fill(result.p[0],result.p[0]+ncol,0.0); std::fill(result.p[nrow-1],result.p[nrow-1]+ncol,0.0); for(int i=1; i<nrow-1; i++) { T* p1 = p[i-1]; T* p2 = p[i]; T* p3 = p[i+1]; REAL* pr = result.p[i]; for(int j=1; j<ncol-1; j++) { REAL gx = p1[j-1] - p1[j+1] + 2*(p2[j-1] - p2[j+1]) + p3[j-1] - p3[j+1]; REAL gy = p1[j-1] - p3[j-1] + 2*(p1[j] - p3[j]) + p1[j+1] - p3[j+1]; pr[j] = gx*gx+gy*gy; } } if(useSqrt || normalize ) // if we want to normalize the result image, we'd better use the true Sobel gradient for(int i=1; i<nrow-1; i++) for(int j=1; j<ncol-1; j++) result.p[i][j] = sqrt(result.p[i][j]); if(normalize) { REAL minv = 1e20, maxv = -minv; for(int i=1; i<nrow-1; i++) { for(int j=1; j<ncol-1; j++) { if(result.p[i][j]<minv) minv = result.p[i][j]; else if(result.p[i][j]>maxv) maxv = result.p[i][j]; } } for(int i=0; i<nrow; i++) result.p[i][0] = result.p[i][ncol-1] = minv; for(int i=0; i<ncol; i++) result.p[0][i] = result.p[nrow-1][i] = minv; REAL s = 255.0/(maxv-minv); for(int i=0; i<nrow*ncol; i++) result.buf[i] = (result.buf[i]-minv)*s; } }
void IntImage<T>::Resize(IntImage<T>& result,const int height,const int width) const { assert(height>0 && width>0); result.SetSize(height,width); REAL ixratio = nrow*1.0/height, iyratio = ncol*1.0/width; REAL* p_y = new REAL[result.ncol]; assert(p_y!=NULL); int* p_y0 = new int[result.ncol]; assert(p_y0!=NULL); for(int i=0; i<width; i++) { p_y[i] = i*iyratio; p_y0[i] = (int)p_y[i]; if(p_y0[i]==ncol-1) p_y0[i]--; p_y[i] -= p_y0[i]; } for(int i=0; i<height; i++) { int x0; REAL x; x = i*ixratio; x0 = (int)x; if(x0==nrow-1) x0--; x -= x0; T* rp = result.p[i]; const T* px0 = p[x0]; const T* px1 = p[x0+1]; for(int j=0; j<width; j++) { int y0=p_y0[j]; REAL y=p_y[j],fx0,fx1; fx0 = REAL(px0[y0] + y*(px0[y0+1]-px0[y0])); fx1 = REAL(px1[y0] + y*(px1[y0+1]-px1[y0])); rp[j] = T(fx0 + x*(fx1-fx0)); } } delete[] p_y; p_y=NULL; delete[] p_y0; p_y0=NULL; }
void PinholePointProjector::unProject(PointVector &points, Gaussian3fVector &gaussians, IntImage &indexImage, const DepthImage &depthImage) const { assert(depthImage.rows > 0 && depthImage.cols > 0 && "PinholePointProjector: Depth image has zero dimensions"); points.resize(depthImage.rows * depthImage.cols); gaussians.resize(depthImage.rows * depthImage.cols); indexImage.create(depthImage.rows, depthImage.cols); int count = 0; Point *point = &points[0]; Gaussian3f *gaussian = &gaussians[0]; float fB = _baseline * _cameraMatrix(0, 0); Eigen::Matrix3f J; for(int r = 0; r < depthImage.rows; r++) { const float *f = &depthImage(r, 0); int *i = &indexImage(r, 0); for(int c = 0; c < depthImage.cols; c++, f++, i++) { if(!_unProject(*point, c, r, *f)) { *i = -1; continue; } float z = *f; float zVariation = (_alpha * z * z) / (fB + z * _alpha); J << z, 0, (float)r, 0, z, (float)c, 0, 0, 1; J = _iK * J; Diagonal3f imageCovariance(3.0f, 3.0f, zVariation); Eigen::Matrix3f cov = J * imageCovariance * J.transpose(); *gaussian = Gaussian3f(point->head<3>(), cov); gaussian++; point++; *i = count; count++; } } points.resize(count); gaussians.resize(count); }
void PinholePointProjector::unProject(PointVector &points, IntImage &indexImage, const DepthImage &depthImage) const { assert(depthImage.rows > 0 && depthImage.cols > 0 && "PinholePointProjector: Depth image has zero dimensions"); points.resize(depthImage.rows * depthImage.cols); int count = 0; indexImage.create(depthImage.rows, depthImage.cols); Point *point = &points[0]; for(int r = 0; r < depthImage.rows; r++) { const float *f = &depthImage(r, 0); int *i = &indexImage(r, 0); for(int c = 0; c < depthImage.cols; c++, f++, i++) { if(!_unProject(*point, c, r, *f)) { *i = -1; continue; } point++; *i = count; count++; } } points.resize(count); }
// compute the Sobel image "ct" from "original" void ComputeCT(IntImage<double>& original,IntImage<int>& ct) { ct.Create(original.nrow,original.ncol); for(int i=2; i<original.nrow-2; i++) { double* p1 = original.p[i-1]; double* p2 = original.p[i]; double* p3 = original.p[i+1]; int* ctp = ct.p[i]; for(int j=2; j<original.ncol-2; j++) { int index = 0; if(p2[j]<=p1[j-1]) index += 0x80; if(p2[j]<=p1[j]) index += 0x40; if(p2[j]<=p1[j+1]) index += 0x20; if(p2[j]<=p2[j-1]) index += 0x10; if(p2[j]<=p2[j+1]) index += 0x08; if(p2[j]<=p3[j-1]) index += 0x04; if(p2[j]<=p3[j]) index += 0x02; if(p2[j]<=p3[j+1]) index ++; ctp[j] = index; } } }
void CascadeClassifier::ApplyOriginalSize(IntImage& original,const CString filename) { IntImage procface; IntImage image,square; REAL sq,ex,value; int result; CRect rect; REAL ratio; vector<CRect> results; procface.Copy(original); ratio = 1.0; results.clear(); REAL paddedsize = REAL(1)/REAL((sx+1)*(sy+1)); while((procface.height>sx+1) && (procface.width>sy+1)) { procface.CalcSquareAndIntegral(square,image); for(int i=0,size_x=image.height-sx; i<size_x; i+=1) for(int j=0,size_y=image.width-sy; j<size_y; j+=1) { ex = image.data[i+sx][j+sy]+image.data[i][j]-image.data[i+sx][j]-image.data[i][j+sy]; if(ex<mean_min || ex>mean_max) continue; sq = square.data[i+sx][j+sy]+square.data[i][j]-square.data[i+sx][j]-square.data[i][j+sy]; if(sq<sq_min) continue; ex *= paddedsize; ex = ex * ex; sq *= paddedsize; sq = sq - ex; ASSERT(sq>=0); if(sq>0) sq = sqrt(sq); else sq = 1.0; if(sq<var_min) continue; result = 1; for(int k=0; k<count; k++) { value = 0.0; for(int t=0,size=ac[k].count; t<size; t++) { REAL f1 = 0; REAL** p = image.data + i; SimpleClassifier& s = ac[k].scs[t]; switch(s.type) { case 0: f1 = p[s.x1][j+s.y3] - p[s.x1][j+s.y1] + p[s.x3][j+s.y3] - p[s.x3][j+s.y1] + REAL(2)*(p[s.x2][j+s.y1] - p[s.x2][j+s.y3]); break; case 1: f1 = p[s.x3][j+s.y1] + p[s.x3][j+s.y3] - p[s.x1][j+s.y1] - p[s.x1][j+s.y3] + REAL(2)*(p[s.x1][j+s.y2] - p[s.x3][j+s.y2]); break; case 2: f1 = p[s.x1][j+s.y1] - p[s.x1][j+s.y3] + p[s.x4][j+s.y3] - p[s.x4][j+s.y1] + REAL(3)*(p[s.x2][j+s.y3] - p[s.x2][j+s.y1] + p[s.x3][j+s.y1] - p[s.x3][j+s.y3]); break; case 3: f1 = p[s.x1][j+s.y1] - p[s.x1][j+s.y4] + p[s.x3][j+s.y4] - p[s.x3][j+s.y1] + REAL(3)*(p[s.x3][j+s.y2] - p[s.x3][j+s.y3] + p[s.x1][j+s.y3] - p[s.x1][j+s.y2]); break; case 4: f1 = p[s.x1][j+s.y1] + p[s.x1][j+s.y3] + p[s.x3][j+s.y1] + p[s.x3][j+s.y3] - REAL(2)*(p[s.x2][j+s.y1] + p[s.x2][j+s.y3] + p[s.x1][j+s.y2] + p[s.x3][j+s.y2]) + REAL(4)*p[s.x2][j+s.y2]; break; default: #ifndef DEBUG __assume(0); #else ; #endif } if(s.parity!=0) if(f1<sq*s.thresh) value += ac[k].alphas[t]; else ; else if(f1>=sq*s.thresh) value += ac[k].alphas[t]; else ; } if(value<ac[k].thresh) { result = 0; break; } } if(result!=0) { const REAL r = 1.0/ratio; rect.left = (LONG)(j*r); rect.top = (LONG)(i*r); rect.right = (LONG)((j+sy)*r); rect.bottom = (LONG)((i+sx)*r); results.push_back(rect); } } ratio = ratio * REAL(0.8); procface.Resize(image,REAL(0.8)); SwapIntImage(procface,image); } total_fp += results.size(); PostProcess(results,2); PostProcess(results,0); DrawResults(original,results); // original.Save(filename+"_result.JPG"); }
int main(int argc,char* argv[]) { std::cout << "usage:" << std::endl; std::cout << argv[0] << " <video_file>" << std::endl << std::endl; std::cout << "keys:" << std::endl; std::cout << "space : toggle using simple post-process (NMS, non-maximal suppression)" << std::endl; std::cout << "0 : waits to process next frame until a key pressed" << std::endl; std::cout << "1 : doesn't wait to process next frame" << std::endl; std::cout << "2 : resize frames 1/2" << std::endl; std::cout << "3 : don't resize frames" << std::endl; std::cout << "4 : resize frames 1/4" << std::endl; if (argc < 2) return 0; cv::Mat src; cv::VideoCapture capture( argv[1] ); LoadCascade(scanner); std::cout<<"Detectors loaded."<<std::endl; int key = 0; int wait_time = 1; float fx = 1; bool rect_organization = true; IntImage<double> original; while( key != 27 ) { capture >> src; if( src.empty() ) break; if (fx < 1) { cv::resize(src, src, cv::Size(), fx, fx); } original.Load( src ); std::vector<CRect> results; scanner.FastScan(original, results, 2); if(rect_organization) { PostProcess(results,2); PostProcess(results,0); RemoveCoveredRectangles(results); } for(size_t i = 0; i < results.size(); i++) { cv::rectangle(src, cvPoint(results[i].left,results[i].top),cvPoint(results[i].right,results[i].bottom),cv::Scalar(0,255,0),2 ); } cv::imshow("result",src); key = cv::waitKey( wait_time ); if (key == 32) rect_organization = !rect_organization; if (key == 48) wait_time = 0; if (key == 49) wait_time = 1; if (key == 50) fx = 0.5; if (key == 51) fx = 1; if (key == 52) fx = 0.25; } cv::waitKey(); return 0; }
const bool BoostingInputFiles(const bool discard) { int i,pointer,index; IntImage im; ofstream of; im.SetSize(CSize(gSx+1,gSy+1)); gCascade->LoadDefaultCascade(); pointer=gFaceCount; for(i=gFaceCount;i<gTotalCount;i++) { if(discard) break; if(gCascade->ApplyImagePatch(gTrainSet[i])!=0) { if(pointer!=i) SwapIntImage(gTrainSet[i],gTrainSet[pointer]); pointer++; if(pointer==gTotalCount) break; } } if(pointer==gTotalCount) return true; index = 0; while(pointer<gTotalCount) { if(index==gBootstrapSize) { if(bootstrap_level==max_bootstrap_level-1) return false; else { bootstrap_level++; for(i=0;i<gMaxNumFiles;i++) gFileUsed[i] = 0; index=0; pointer=gFaceCount; } } if(gFileUsed[index]==1) { index++; continue; } gCascade->ApplyOriginalSizeForInputBoosting(gBootstrap_Filenames[index],pointer); gFileUsed[index]=1; index++; } for(i=0;i<gTotalCount;i++) { int k,t; memcpy(im.m_Buf,gTrainSet[i].m_Buf,(gSx+1)*(gSy+1)*sizeof(im.m_Buf[0])); for(k=0;k<=gSy;k++) gTrainSet[i].m_Data[0][k] = 0; for(k=0;k<=gSx;k++) gTrainSet[i].m_Data[k][0] = 0; for(k=1;k<=gSx;k++) for(t=1;t<=gSy;t++) gTrainSet[i].m_Data[k][t] = im.m_Data[k][t]-im.m_Data[k-1][t]-im.m_Data[k][t-1]+im.m_Data[k-1][t-1]; } of.open(gTrainset_Filename,ios_base::out | ios_base::binary); of<<gTotalCount<<endl; unsigned char* writebuf; writebuf = new unsigned char[gSx*gSy]; ASSERT(writebuf!=NULL); for(i=0;i<gTotalCount;i++) { of<<gTrainSet[i].m_iLabel<<endl; of<<gSx<<" "<<gSy<<endl; for(int k=0;k<gSx;k++) for(int t=0;t<gSy;t++) writebuf[k*gSy+t] = (unsigned char)((int)gTrainSet[i].m_Data[k+1][t+1]); of.write((char*)writebuf,gSx*gSy); of<<endl; } delete[] writebuf; writebuf=NULL; of.close(); for(i=0;i<gTotalCount;i++) gTrainSet[i].CalculateVarianceAndIntegralImageInPlace(); for(i=gFaceCount;i<gTotalCount;i++) { if(gCascade->ApplyImagePatch(gTrainSet[i])==0) ; //AfxMessageBox("Something is wrong?"); } return true; }
// initialization -- compute the Census Tranform image for CENTRIST void DetectionScanner::InitImage(IntImage<double>& original) { image = original; image.Sobel(sobel,false,false); ComputeCT(sobel,ct); }
//int main(int argc, char* argv[]) int processVideo(client_info_t *client_info) { //Assign the socket descriptor--Subbu int sockfd = client_info->connectfd; int cameraid; // = 1; int writeFCount = 0; //if (argc==2) cameraid=1; //else cameraid =0; cameraid = 0; if (!cameraid) { std::cout<<" thread processing in progress"; //return 0; } // define processing and display resolutions int process_width = 180; int process_height = 144; int display_width = 320; int display_height = 240; int searchX = 52; int searchY = 0; int searchW = 80; int searchH = 140; bool normalizing = true; int actionFrames = 10; int numActions = 4; ActionType* actionTypes = new ActionType[numActions]; actionTypes[0].actionName = "up"; actionTypes[0].actionEnabled = true; actionTypes[0].count = 15; actionTypes[0].sendKey = 1049; actionTypes[1].actionName = "left"; actionTypes[1].actionEnabled = true; actionTypes[1].count = 15; actionTypes[1].sendKey = 1062; actionTypes[2].actionName = "right"; actionTypes[2].actionEnabled = true; actionTypes[2].count = 15; actionTypes[2].sendKey = 1064; actionTypes[3].actionName = "idle"; actionTypes[3].actionEnabled = true; actionTypes[3].count = 15; actionTypes[3].sendKey = -1; int* voteArray = new int[numActions]; int knearestk = 5; int winval = 3; int numSFrames = 5; int sframeMajority = 3; int* sframes = new int[numSFrames]; int sframePos = 0; int minRepeatTime = 10; int repeatTimout = 0; int timeoutPeriod = 5; int* timeoutArray = new int[numActions]; for(int i = 0; i < numActions; ++i) timeoutArray[i] = 0; int timeoutPeriodArray[4] = {12, 5, 5, 5}; std::vector<ActionInstance*> actionInstances; // load the template library std::cout << "Loading action library...\n"; for(int at = 0; at < numActions; ++at) { for(int i = 0; i < actionTypes[at].count; ++i) { ActionInstance* curInstance = loadTemplateByName(actionTypes[at].actionName, at, i+1, actionFrames); actionInstances.push_back(curInstance); } } /* // load the template library by threads-Subbu //int at = client_info->actionType; std::cout<<"\n Action type is "<<at<<std::endl; for(int i = 0; i < actionTypes[at].count; ++i) { ActionInstance* curInstance = loadTemplateByName(actionTypes[at].actionName, at, i+1, actionFrames); actionInstances.push_back(curInstance); } */ std::cout << "Loading action library...\n"; for(int at = 0; at < numActions; ++at) { for(int i = 0; i < actionTypes[at].count; ++i) { ActionInstance* curInstance = loadTemplateByName(actionTypes[at].actionName, at, i+1, actionFrames); actionInstances.push_back(curInstance); } } std::cout << "Done loading action library; loaded " << actionInstances.size() << " instances.\n"; //cvWaitKey(1000); IntImage* timg = renderTemplateFrame(actionInstances[0]->tData, 2); cvNamedWindow( "Template", CV_WINDOW_AUTOSIZE ); IplImage* timgipl = timg->getIplImage(); cvShowImage("Template", timgipl); cvWaitKey(10); LinearShapeMatch* lsm = new LinearShapeMatch(process_width, process_height, actionFrames); lsm->setFill(1.0f, true); // get access to webcam CvCapture* srcVideoCapture = cvCaptureFromCAM( cameraid ); float scale_x = display_width / process_width; float scale_y = display_height / process_height; // initialize some buffers IntImage* src_img = new IntImage(display_width, display_height, 3); IntImage* src_segmentation = new IntImage(process_width, process_height, 3); IplImage* rawFrame; IplImage* resizedPFrame = cvCreateImage(cvSize(process_width, process_height), IPL_DEPTH_8U, 3); IplImage* resizedSrcFrame = cvCreateImage(cvSize(display_width, display_height), IPL_DEPTH_8U, 3); IplImage* finalDisplayFrame = cvCreateImage(cvSize(display_width, display_height), IPL_DEPTH_8U, 3); IplImage* display_temp = cvCreateImage(cvSize(process_width, process_height), IPL_DEPTH_8U, 3); IntImage* src_r_img = new IntImage(process_width, process_height, 3); IntImage* dest_img = new IntImage(process_width, process_height, 3); IntImage* seg_img = new IntImage(process_width, process_height, 3); //IntImage* bground_img = new IntImage(process_width, process_height, 3); // clear out the color buffers src_img->fill(0); // the probability threshold for merging segments during the segmentation // step; larger -> smaller segments float seg_prob = 0.99f; // initialize the segmenter and do an initial segmentation just to provide // the shape units with some valid segmentation to start with int* tempSeg; int* tempSegSizes; StatMerge* statMerge = new StatMerge(process_width, process_height); tempSeg = statMerge->doSegmentation(src_r_img->getChannel(0), src_r_img->getChannel(1), src_r_img->getChannel(2), seg_prob); tempSegSizes = statMerge->getSizeArray(); src_segmentation->copyChannel(tempSeg, 0); src_segmentation->copyChannel(tempSegSizes, 1); cvNamedWindow( "Source Video", CV_WINDOW_AUTOSIZE ); cvNamedWindow( "Template Locations", CV_WINDOW_AUTOSIZE ); std::cout << "About to enter main loop.\n"; // prepare some timing stuff std::list<clock_t> clocktimes(TIME_CALC_FRAMES); clock_t curTime, frontTime; float avg_delay, fps; int last_delay = 0; float desired_delay = 1.0f / 12.0f; cout<<endl<<"preparing timing stuff"; /* clock_t cst = clock(); std::cout << "cst: " << cst << std::endl; cvWaitKey(10000); clock_t cnd = clock(); std::cout << "cnd: " << cnd << std::endl; int clockFactor = cnd - cst; std::cout << "Clock factor: " << clockFactor << std::endl; cvWaitKey(); */ int clockFactor = 1; clock_t prevTime = clock(); curTime = clock(); // enter main loop-- this runs the display as fast as possible while(1) { cout<<endl<<"Entered main loop"; // deal with timing stuff prevTime = curTime; curTime = clock(); frontTime = clocktimes.front(); clocktimes.pop_front(); clocktimes.push_back(curTime); avg_delay = (float)(curTime - prevTime) / (float)(TIME_CALC_FRAMES * clockFactor); // * CLOCKS_PER_SEC); fps = 1.0f / avg_delay; std::cout << "avg_delay: " << avg_delay << std::endl; std::cout << "fps: " << fps << std::endl; /* // grab a frame rawFrame = cvQueryFrame(srcVideoCapture); cvResize(rawFrame, resizedSrcFrame); cvResize(rawFrame, resizedPFrame); */ //Popping from the queue --Subbu cout<<endl<<"manipulaintg dequeue"; std::cout<<"dequeue size is " <<client_info->mydeque.size(); Mat mat_img = client_info->mydeque.back(); client_info->mydeque.pop_back(); cout<<endl<<"dequeue manipulation done"; //IplImage ipl_img = mat_img.operator IplImage(); IplImage ipl_img = mat_img; rawFrame = &ipl_img; cvResize(rawFrame, resizedSrcFrame); cvResize(rawFrame, resizedPFrame); //resizedSrcFrame = &ipl_img; //resizedPFrame = &ipl_img; cout<<endl<<"showing image"; cvShowImage("Source Video", resizedSrcFrame); cvWaitKey(10); src_r_img->copy(resizedPFrame, true); dest_img->copy(resizedPFrame, true); src_img->copy(resizedSrcFrame, true); // segment it tempSeg = statMerge->doSegmentation(src_r_img->getChannel(0), src_r_img->getChannel(1), src_r_img->getChannel(2), seg_prob); tempSegSizes = statMerge->getSizeArray(); src_segmentation->copyChannel(tempSeg, 0); src_segmentation->copyChannel(tempSegSizes, 1); statMerge->doLinesOnly(seg_img->getChannel(0), seg_img->getChannel(1), seg_img->getChannel(2)); // give the segmentation to the matcher lsm->pushFrame(src_segmentation); float bestDist = 1000000.0f; cout<<endl<<"compare with instances "<<actionInstances.size(); // run through every instance and compare it... for(int inst = 0; inst < actionInstances.size(); ++inst) { ActionInstance* curInstance = actionInstances[inst]; float curDist = 1000000.0f; if(actionTypes[curInstance->actionType].actionEnabled) { float curVal = lsm->doSinglePointMatching(searchX, searchY, curInstance->tData); if(normalizing) curDist = (curInstance->tmass + curVal) / (curInstance->tmass); else curDist = (curInstance->tmass + curVal); } if(curDist < bestDist) bestDist = curDist; curInstance->dist = curDist; } cout<<endl<<"comparison done"; // sort instances according to distance.. std::sort(actionInstances.begin(), actionInstances.end(), actionInstanceSorter); for(int i = 0; i < numActions; ++i) voteArray[i] = 0; // get the k nearest to vote for(int i = 0; i < knearestk; ++i) voteArray[actionInstances[i]->actionType] += 1; std::string bestAction = "ambiguous"; bool ambiguous = true; float bestDist2 = actionInstances[0]->dist; int bestActionID = -1; for(int i = 0; i < numActions; ++i) { if(voteArray[i] >= winval) { bestAction = actionTypes[i].actionName; bestActionID = i; ambiguous = false; break; } } /* sframePos = (sframePos + 1) % numSFrames; sframes[sframePos] = bestActionID; */ int sendActionID = -1; std::string sendActionName = "none"; // count actions /* for(int i = 0; i < numActions; ++i) voteArray[i] = 0; for(int i = 0; i < numSFrames; ++i) if(sframes[i] != -1) voteArray[sframes[i]] += 1; for(int i = 0; i < numActions; ++i) { if(voteArray[i] >= sframeMajority) { sendActionID = i; sendActionName = actionTypes[sendActionID].actionName; break; } }*/ if(bestActionID != -1 && timeoutArray[bestActionID] <= 0) { sendActionID = bestActionID; sendActionName = actionTypes[sendActionID].actionName; //timeoutArray[bestActionID] = timeoutPeriod; timeoutArray[bestActionID] = timeoutPeriodArray[bestActionID]; } for(int i = 0; i < numActions; ++i) timeoutArray[i] -= 1; // get the best one... //ActionInstance* best = actionInstances[0]; //std::string bestAction = actionTypes[best->actionType].actionName; //std::cout << "Best action: " << bestAction << " " << best->id << " with a distance of " << best->dist << std::endl; //std::cout << "Real best dist: " << bestDist << std::endl; std::cout << "Best action: " << bestAction << std::endl; std::cout << "Send action: " << sendActionName << std::endl; std::cout << "Best distance: " << bestDist2 << std::endl; //send the action information back to the client-Subbu send(sockfd, bestAction.c_str(), bestAction.size(), NULL); if(sendActionID >= 0) { int keyToSend = actionTypes[sendActionID].sendKey; if(keyToSend > 0) { std::string sendString = IntToString(keyToSend) + "\n"; write(3, sendString.c_str(), sendString.size()); } } drawRectangle(dest_img, searchX, searchY, searchW, searchH, 255, 255, 255); dest_img->getIplImage(display_temp); cvShowImage("Template Locations", display_temp); //copy3ItoU(seg_r, seg_b, seg_g, segmentation_temp); //cvShowImage("Segmentation", segmentation_temp); // do background subtraction step (TODO!) // Note: it is safe to overwrite tempSeg and tempSegSizes since they are // 'renewed' every time the segmentation is done (they aren't used to // warm start the segmenter or anything) // pad out the frame with a delay if we are going too fast: // the idea here is that if our delay last frame was less than desired, // increase the delay a bit; if our delay was too large, decrease it int delayTime = last_delay; if(avg_delay < desired_delay) delayTime += (int)((desired_delay - avg_delay) * 100); else { delayTime -= (int)((avg_delay - desired_delay) * 100); } if(delayTime < 1) delayTime = 1; last_delay = delayTime; //int keyPressed = cvWaitKey(delayTime); int keyPressed = cvWaitKey(1); std::cout << "key : " << (keyPressed & 255) << std::endl; if( (keyPressed & 255) == 27 ) // esc key { // quit break; } } return 0; }