int cpjNextLocalMaxPosi (float *e, int i, int w) { for ( ; i<w-3 ; i++) { if( isLocalMax( e, i ) && cpjMaxDeviation( e+i-2, 5 ) > 6) { return i; } } return w; }
// TO DO--------------------------------------------------------------------- // Loop through the harrisImage to threshold and compute the local maxima in a neighborhood // srcImage: image with Harris values // destImage: Assign 1 to a pixel if it is above a threshold and is the local maximum in 3x3 window, 0 otherwise. // You'll need to find a good threshold to use. void computeLocalMaxima(CFloatImage &srcImage,CByteImage &destImage) { int width = srcImage.Shape().width; int height = srcImage.Shape().height; double mean, stdDev; double sum = 0; double squareSum = 0; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { float pixel = srcImage.Pixel(x, y, 0); if (!(pixel >= 0 || pixel < 0)) { auto error = "TRUE"; } sum += srcImage.Pixel(x, y, 0); } } mean = sum / (float)(width * height); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { squareSum += pow((srcImage.Pixel(x, y, 0) - mean), 2.); } } stdDev = sqrt(squareSum / (float)(width * height - 1)); int count = 0; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { unsigned char *pixel = &destImage.Pixel(x, y, 0); if (srcImage.Pixel(x, y, 0) >= 3.*stdDev + mean && isLocalMax(srcImage, x, y)) { count++; *pixel = 1; } else { *pixel = 0; } } } }
int CDog::isLocalMax(const vector<vector<float> >& pdog, const vector<vector<float> >& cdog, const vector<vector<float> >& ndog, const int x, const int y) { const int flag = isLocalMax(cdog, x, y); if (flag == 1) { if (pdog[y][x] < cdog[y][x] && ndog[y][x] < cdog[y][x]) return 1; else return 0; } else if (flag == -1) { if (cdog[y][x] < pdog[y][x] && cdog[y][x] < ndog[y][x]) return -1; else return 0; } return 0; }
//recognize the fundamental frequency and amplitude of the voice and transfer them to midi note and velocity //return false if the voice is recognized as silence, return true otherwise int voice2midi(int sampleRate,int numSamples, float *data, float *freq, int *velocity) { if(!isPowerOfTwo(numSamples))numSamples=reduceToPowerOfTwo(numSamples); move2zero(numSamples,data); float aa=absoluteAverage(numSamples,data); if(aa<SOUND_THRESHOLD) { *velocity = 0; return false; } float *realOut=new float[numSamples]; float *imagOut=new float[numSamples]; float *realOut2=new float[numSamples]; float *imagOut2=new float[numSamples]; for(int i=0;i<numSamples;i++) data[i] *= (1 + cos(2*PI*i/numSamples - PI)/2); fft_float(numSamples,false,data,0,realOut,imagOut); for(int i=0;i<numSamples;i++) realOut[i]=realOut[i]*realOut[i]+imagOut[i]*imagOut[i]; fft_float(numSamples,true,realOut,0,realOut2,imagOut2); float maxValue=-999999; int maxPosition=-1; for(int i=1;i<numSamples/2;i++) { if(isLocalMax(numSamples,realOut2,i)) if(maxValue<realOut2[i]) { maxValue=realOut2[i]; maxPosition=i; //break; } } float timePerFrame=(float)numSamples/sampleRate; float T=timePerFrame*maxPosition/numSamples; float f=1/T; //printf("%d,%f,%f\n",maxPosition,T,f); //cout<<maxPosition<<endl; *freq=f; *velocity=amplitude2midi(aa); delete []realOut; delete []imagOut; delete []realOut2; delete []imagOut2; return true; }
void CDog::run(const vector<uchar>& image, const vector<uchar>& mask, const vector<uchar>& edge, const int width, const int height, const int gspeedup, const float firstscale, // 1.4f const float lastscale, // 4.0f multiset<CKeypoint2D>& result) { cout << "\tDoG running..." ; m_width = width; m_height = height; m_firstScale = firstscale; m_lastScale = lastscale; init(image, mask, edge); const int factor = 2; const int maxPointsGrid = factor * factor; const int gridsize = gspeedup * factor; const int w = (m_width + gridsize - 1) / gridsize; const int h = (m_height + gridsize - 1) / gridsize; vector<vector<multiset<CKeypoint2D> > > resultgrids; resultgrids.resize(h); for (int y = 0; y < h; ++y) resultgrids[y].resize(w); const float scalestep = pow(2.0f, 1/2.0f); const int steps = max(4, (int)ceil(log(m_lastScale / m_firstScale) / log(scalestep))); vector<vector<float> > pdog, cdog, ndog, cres, nres; setResponse(m_firstScale, cres); setResponse(m_firstScale * scalestep, nres); setDOG(cres, nres, cdog); cres.swap(nres); setResponse(m_firstScale * scalestep * scalestep, nres); setDOG(cres, nres, ndog); vector<vector<uchar> > alreadydetected; alreadydetected.resize(m_height); for (int y = 0; y < m_height; ++y) { alreadydetected[y].resize(m_width); for (int x = 0; x < m_width; ++x) { alreadydetected[y][x] = (uchar)0; } } for (int i = 2; i <= steps - 1; ++i) { const float cscale = m_firstScale * pow(scalestep, i + 1); cres.swap(nres); setResponse(cscale, nres); pdog.swap(cdog); cdog.swap(ndog); setDOG(cres, nres, ndog); const int margin = (int)ceil(2 * cscale); // now 3 response maps are ready for (int y = margin; y < m_height - margin; ++y) { for (int x = margin; x < m_width - margin; ++x) { if (alreadydetected[y][x]) continue; if (cdog[y][x] == 0.0) continue; //if (isCloseBoundary(x, y, margin)) //continue; // check local maximum if (isLocalMax(pdog, cdog, ndog, x, y) && notOnEdge(cdog, x, y)) { const int x0 = min(x / gridsize, w - 1); const int y0 = min(y / gridsize, h - 1); alreadydetected[y][x] = 1; CKeypoint2D p; p.icoord = cv::Vec2f(x, y); p.response = fabs(cdog[y][x]); p.type = 1; resultgrids[y0][x0].insert(p); if (maxPointsGrid < (int)resultgrids[y0][x0].size()) resultgrids[y0][x0].erase(resultgrids[y0][x0].begin()); } } } } for (int y = 0; y < h; ++y) for (int x = 0; x < w; ++x) { //const float threshold = setThreshold(resultgrids[y][x]); std::multiset<CKeypoint2D>::iterator begin = resultgrids[y][x].begin(); std::multiset<CKeypoint2D>::iterator end = resultgrids[y][x].end(); while (begin != end) { //if(threshold <= begin->response) result.insert(*begin); ++begin; } } cout << "DoG features: " << (int)result.size() << "." << endl ; }