TEST(DSPSingle, TestConvolve) { float out[121]; float in1[5] = { 0.0, 0.5, 1.0, 0.5, 0.0 }; float in2[6] = { 0.5, 0.5, 0.5, 1.0, 1.0, 1.0 }; // >> conv([0 0.5 1 0.5 0], [0.5 0.5 0.5 1.0 1.0 1.0]) float res[10] = { 0.0, 0.25, 0.75, 1.0, 1.25, 1.75, 2.0, 1.5, 0.5, 0.0 }; Convolve(in1, 5, in2, 6, out); for (unsigned i = 0; i < 10; ++i) { ASSERT_FLOAT_EQ(res[i], out[i]); } }
void ImageProximityFFT::SqrDistance(Image& Source, Image& Template, Image& Dest) { CheckFloat(Dest); CheckSameNbChannels(Source, Template); CheckSameNbChannels(Source, Dest); CheckSameSize(Source, Dest); // Verify image size if (Template.Width() > Source.Width() || Template.Height() > Source.Height()) throw cl::Error(CL_IMAGE_FORMAT_NOT_SUPPORTED, "The template image must be smaller than source image."); // Verify image types if(!SameType(Source, Template)) throw cl::Error(CL_IMAGE_FORMAT_MISMATCH, "The source image and the template image must be same type."); PrepareFor(Source, Template); m_integral.SqrIntegral(Source, *m_image_sqsums); double templ_sqsum[4] = {0}; m_statistics.SumSqr(Template, templ_sqsum); Convolve(Source, Template, Dest); // Computes the cross correlation using FFT MatchSquareDiff(Template.Width(), Template.Height(), *m_image_sqsums, templ_sqsum, Dest); }
void ConvolveSeparable(CImageOf<T> src, CImageOf<T>& dst, CFloatImage x_kernel, CFloatImage y_kernel, float scale, float offset, int decimate, int interpolate) { // Allocate the result, if necessary CShape dShape = src.Shape(); if (decimate > 1) { dShape.width = (dShape.width + decimate-1) / decimate; dShape.height = (dShape.height + decimate-1) / decimate; } dst.ReAllocate(dShape, false); // Allocate the intermediate images CImageOf<T> tmpImg1(src.Shape()); CImageOf<T> tmpImg2(src.Shape()); // Create a proper vertical convolution kernel CFloatImage v_kernel(1, y_kernel.Shape().width, 1); for (int k = 0; k < y_kernel.Shape().width; k++) v_kernel.Pixel(0, k, 0) = y_kernel.Pixel(k, 0, 0); v_kernel.origin[1] = y_kernel.origin[0]; // Perform the two convolutions Convolve(src, tmpImg1, x_kernel, 1.0f, 0.0f); Convolve(tmpImg1, tmpImg2, v_kernel, scale, offset); // Downsample or copy for (int y = 0; y < dShape.height; y++) { T* sPtr = &tmpImg2.Pixel(0, y * decimate, 0); T* dPtr = &dst.Pixel(0, y, 0); int nB = dShape.nBands; for (int x = 0; x < dShape.width; x++) { for (int b = 0; b < nB; b++) dPtr[b] = sPtr[b]; sPtr += decimate * nB; dPtr += nB; } } interpolate++; // to get rid of "unused parameter" warning }
void GetHarrisComponents(CFloatImage &srcImage, CFloatImage &A, CFloatImage &B, CFloatImage &C, CFloatImage *partialX, CFloatImage *partialY) { int w = srcImage.Shape().width; int h = srcImage.Shape().height; CFloatImage *partialXPtr; CFloatImage *partialYPtr; if (partialX != nullptr && partialY != nullptr) { partialXPtr = partialX; partialYPtr = partialY; } else { partialXPtr = new CFloatImage(srcImage.Shape()); partialYPtr = new CFloatImage(srcImage.Shape()); } CFloatImage partialXX(srcImage.Shape()); CFloatImage partialYY(srcImage.Shape()); CFloatImage partialXY(srcImage.Shape()); CFloatImage gaussianImage = GetImageFromMatrix((float *)gaussian5x5Float, 5, 5); Convolve(srcImage, *partialXPtr, ConvolveKernel_SobelX); Convolve(srcImage, *partialYPtr, ConvolveKernel_SobelY); for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { float *xxPixel = &partialXX.Pixel(x, y, 0); float *yyPixel = &partialYY.Pixel(x, y, 0); float *xyPixel = &partialXY.Pixel(x, y, 0); // The 1/8 factor is to do the scaling inherent in sobel filtering *xxPixel = pow((double)(1./8. *8. * partialXPtr->Pixel(x, y, 0)), 2.); *yyPixel = pow((double)(1./8. *8. * partialYPtr->Pixel(x, y, 0)), 2.); *xyPixel = pow(1./8. *8., 2.) * partialXPtr->Pixel(x, y, 0) * partialYPtr->Pixel(x, y, 0); } } Convolve(partialXX, A, gaussianImage); Convolve(partialXY, B, gaussianImage); Convolve(partialYY, C, gaussianImage); }
void ConvolveSeparable(CImageOf<T> src, CImageOf<T>& dst, CFloatImage x_kernel, CFloatImage y_kernel, int subsample) { // Allocate the result, if necessary CShape dShape = src.Shape(); if (subsample > 1) { dShape.width = (dShape.width + subsample-1) / subsample; dShape.height = (dShape.height + subsample-1) / subsample; } dst.ReAllocate(dShape, false); // Allocate the intermediate images CImageOf<T> tmpImg1(src.Shape()); CImageOf<T> tmpImg2(src.Shape()); // Create a proper vertical convolution kernel CFloatImage v_kernel(1, y_kernel.Shape().width, 1); for (int k = 0; k < y_kernel.Shape().width; k++) v_kernel.Pixel(0, k, 0) = y_kernel.Pixel(k, 0, 0); v_kernel.origin[1] = y_kernel.origin[0]; // Perform the two convolutions Convolve(src, tmpImg1, x_kernel); Convolve(tmpImg1, tmpImg2, v_kernel); // Downsample or copy for (int y = 0; y < dShape.height; y++) { T* sPtr = &tmpImg2.Pixel(0, y * subsample, 0); T* dPtr = &dst.Pixel(0, y, 0); int nB = dShape.nBands; for (int x = 0; x < dShape.width; x++) { for (int b = 0; b < nB; b++) dPtr[b] = sPtr[b]; sPtr += subsample * nB; dPtr += nB; } } }
bool LarsonSekaninaInstance::ExecuteOn( View& view ) { AutoViewLock lock( view ); ImageVariant image = view.Image(); if ( image.IsComplexSample() ) return false; StandardStatus status; image.SetStatusCallback( &status ); Console().EnableAbort(); ImageVariant sharpImg; sharpImg.CreateFloatImage( (image.BitsPerSample() > 32) ? image.BitsPerSample() : 32 ); sharpImg.AllocateImage( image->Width(), image->Height(), 1, ColorSpace::Gray ); if ( useLuminance && image->IsColor() ) { ImageVariant L; image.GetLightness( L ); Convolve( L, sharpImg, interpolation, radiusDiff, angleDiff, center, 0 ); ApplyFilter( L, sharpImg, amount, threshold, deringing, rangeLow, rangeHigh, false, 0, highPass ); image.SetLightness( L ); } else { for ( int c = 0, n = image->NumberOfNominalChannels(); c < n; ++c ) { image->SelectChannel( c ); if ( n > 1 ) Console().WriteLn( "<end><cbr>Processing channel #" + String( c ) ); Convolve( image, sharpImg, interpolation, radiusDiff, angleDiff, center, c ); ApplyFilter( image, sharpImg, amount, threshold, deringing, rangeLow, rangeHigh, disableExtension, c, highPass ); } } return true; }
static void Apply( GenericImage<P>& image, const FFTConvolution& F ) { Rect r = image.SelectedRectangle(); if ( F.m_h.IsNull() ) if ( !F.m_filter.IsNull() ) F.m_h = Initialize( *F.m_filter, r.Width(), r.Height(), F.IsParallelProcessingEnabled(), F.MaxProcessors() ); else F.m_h = Initialize( F.m_image, r.Width(), r.Height(), F.IsParallelProcessingEnabled(), F.MaxProcessors() ); Convolve( image, *F.m_h, F.IsParallelProcessingEnabled(), F.MaxProcessors() ); }
LPyramid::LPyramid(const float *image, unsigned int width, unsigned int height) : Width(width), Height(height) { // Make the Laplacian pyramid by successively // copying the earlier levels and blurring them for (unsigned int i = 0; i < MAX_PYR_LEVELS; i++) { if (i == 0) { Levels[i] = Copy(image); } else { Levels[i] = new float[Width * Height]; Convolve(Levels[i], Levels[i - 1]); } } }
void Image::Sharpen() { int* filt = new int[9]; //sharpening filter filt[0] = -1; filt[1] = -2; filt[2] = -1; filt[3] = -2; filt[4] = 19; filt[5] = -2; filt[6] = -1; filt[7] = -2; filt[8] = -1; int norm = 7; int n = 3; Convolve(filt, n, norm, false); }
void ImageProximityFFT::CrossCorr(Image& Source, Image& Template, Image& Dest) { CheckFloat(Dest); CheckSameNbChannels(Source, Template); CheckSameNbChannels(Source, Dest); CheckSameSize(Source, Dest); // Verify image size if (Template.Width() > Source.Width() || Template.Height() > Source.Height()) throw cl::Error(CL_IMAGE_FORMAT_NOT_SUPPORTED, "The template image must be smaller than source image."); // Verify image types if(!SameType(Source, Template)) throw cl::Error(CL_IMAGE_FORMAT_MISMATCH, "The source image and the template image must be same type."); PrepareFor(Source, Template); Convolve(Source, Template, Dest); // Computes the cross correlation using FFT }
void Image::Blur(int n) { /* Your Work Here (Section 3.4.1) */ double sig = floor(n / (double) 2) / 2; int mp = int(n / 2); double* filtU = new double[n]; //f(u) for (int i = -mp; i <= mp; i++) { filtU[i + mp] = (1 / (sqrt(2 * M_PI) * sig)) * exp(-(i*i) / (2 * sig*sig)); } double norm = 0.0; int* filt = new int[n*n]; //blur filter for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { int temp = (int) ((filtU[i] * filtU[j]) / (filtU[0] * filtU[0])); filt[i + j*n] = temp; norm += temp; } } Convolve(filt, n, (int) norm, false); }
void subsample(Feature* f, int imgSize, CFloatImage gaussianImage) { vector<double, std::allocator<double>>::iterator it; CFloatImage img = featureToImage(*f, imgSize, imgSize); CFloatImage blurredImg(img.Shape()); Convolve(img, blurredImg, gaussianImage); featuresFromImage(f,blurredImg,imgSize,imgSize); int count = 0; for(int y=0; y<imgSize; y++) { for(int x=0; x<imgSize; x++) { if(x%2 == 0 || y%2 == 0) { f->data.erase(f->data.begin() + count); } else { count++; } } } }
/************************************************************************* * * Function: cl_ltp * Purpose: closed-loop fractional pitch search * ************************************************************************** */ void cl_ltp ( clLtpState *clSt, /* i/o : State struct */ tonStabState *tonSt, /* i/o : State struct */ enum Mode mode, /* i : coder mode */ Word16 frameOffset, /* i : Offset to subframe */ Word16 T_op[], /* i : Open loop pitch lags */ Word16 *h1, /* i : Impulse response vector Q12 */ Word16 *exc, /* i/o : Excitation vector Q0 */ Word16 res2[], /* i/o : Long term prediction residual Q0 */ Word16 xn[], /* i : Target vector for pitch search Q0 */ Word16 lsp_flag, /* i : LSP resonance flag */ Word16 xn2[], /* o : Target vector for codebook search Q0 */ Word16 y1[], /* o : Filtered adaptive excitation Q0 */ Word16 *T0, /* o : Pitch delay (integer part) */ Word16 *T0_frac, /* o : Pitch delay (fractional part) */ Word16 *gain_pit, /* o : Pitch gain Q14 */ Word16 g_coeff[], /* o : Correlations between xn, y1, & y2 */ Word16 **anap, /* o : Analysis parameters */ Word16 *gp_limit /* o : pitch gain limit */ ) { Word16 i; Word16 index; Word32 L_temp; /* temporarily variable */ Word16 resu3; /* flag for upsample resolution */ Word16 gpc_flag; /*----------------------------------------------------------------------* * Closed-loop fractional pitch search * *----------------------------------------------------------------------*/ *T0 = Pitch_fr(&clSt->pitchSt, mode, T_op, exc, xn, h1, L_SUBFR, frameOffset, T0_frac, &resu3, &index); *(*anap)++ = index; /*-----------------------------------------------------------------* * - find unity gain pitch excitation (adapitve codebook entry) * * with fractional interpolation. * * - find filtered pitch exc. y1[]=exc[] convolve with h1[]) * * - compute pitch gain and limit between 0 and 1.2 * * - update target vector for codebook search * * - find LTP residual. * *-----------------------------------------------------------------*/ Pred_lt_3or6(exc, *T0, *T0_frac, L_SUBFR, resu3); Convolve(exc, h1, y1, L_SUBFR); /* gain_pit is Q14 for all modes */ *gain_pit = G_pitch(mode, xn, y1, g_coeff, L_SUBFR); /* check if the pitch gain should be limit due to resonance in LPC filter */ gpc_flag = 0; *gp_limit = MAX_16; if (lsp_flag != 0 && *gain_pit > GP_CLIP) { gpc_flag = check_gp_clipping(tonSt, *gain_pit); } /* special for the MR475, MR515 mode; limit the gain to 0.85 to */ /* cope with bit errors in the decoder in a better way. */ if (mode == MR475 || mode == MR515) { if (*gain_pit > 13926) { *gain_pit = 13926; /* 0.85 in Q14 */ } if (gpc_flag != 0) { *gp_limit = GP_CLIP; } } else { if (gpc_flag != 0) { *gp_limit = GP_CLIP; *gain_pit = GP_CLIP; } /* For MR122, gain_pit is quantized here and not in gainQuant */ if ( mode == MR122 ) { *(*anap)++ = q_gain_pitch(MR122, *gp_limit, gain_pit, NULL, NULL); } } /* update target vector und evaluate LTP residual */ for (i = 0; i < L_SUBFR; i++) { L_temp = ((Word32)y1[i] * *gain_pit) >> 14; xn2[i] = xn[i] - (Word16)L_temp; L_temp = ((Word32)exc[i] * *gain_pit) >> 14; res2[i] -= (Word16)L_temp; } }
void LinearFilter::Invoke() { Convolve(); //Display(1); }
/*************************************************************************** * FUNCTION: cod_amr * * PURPOSE: Main encoder routine. * * DESCRIPTION: This function is called every 20 ms speech frame, * operating on the newly read 160 speech samples. It performs the * principle encoding functions to produce the set of encoded parameters * which include the LSP, adaptive codebook, and fixed codebook * quantization indices (addresses and gains). * * INPUTS: * No input argument are passed to this function. However, before * calling this function, 160 new speech data should be copied to the * vector new_speech[]. This is a global pointer which is declared in * this file (it points to the end of speech buffer minus 160). * * OUTPUTS: * * ana[]: vector of analysis parameters. * synth[]: Local synthesis speech (for debugging purposes) * ***************************************************************************/ int cod_amr( cod_amrState *st, /* i/o : State struct */ enum Mode mode, /* i : AMR mode */ Word16 new_speech[], /* i : speech input (L_FRAME) */ Word16 ana[], /* o : Analysis parameters */ enum Mode *usedMode, /* o : used mode */ Word16 synth[] /* o : Local synthesis */ ) { /* LPC coefficients */ Word16 A_t[(MP1) * 4]; /* A(z) unquantized for the 4 subframes */ Word16 Aq_t[(MP1) * 4]; /* A(z) quantized for the 4 subframes */ Word16 *A, *Aq; /* Pointer on A_t and Aq_t */ Word16 lsp_new[M]; /* Other vectors */ Word16 xn[L_SUBFR]; /* Target vector for pitch search */ Word16 xn2[L_SUBFR]; /* Target vector for codebook search */ Word16 code[L_SUBFR]; /* Fixed codebook excitation */ Word16 y1[L_SUBFR]; /* Filtered adaptive excitation */ Word16 y2[L_SUBFR]; /* Filtered fixed codebook excitation */ Word16 gCoeff[6]; /* Correlations between xn, y1, & y2: */ Word16 res[L_SUBFR]; /* Short term (LPC) prediction residual */ Word16 res2[L_SUBFR]; /* Long term (LTP) prediction residual */ /* Vector and scalars needed for the MR475 */ Word16 xn_sf0[L_SUBFR]; /* Target vector for pitch search */ Word16 y2_sf0[L_SUBFR]; /* Filtered codebook innovation */ Word16 code_sf0[L_SUBFR]; /* Fixed codebook excitation */ Word16 h1_sf0[L_SUBFR]; /* The impulse response of sf0 */ Word16 mem_syn_save[M]; /* Filter memory */ Word16 mem_w0_save[M]; /* Filter memory */ Word16 mem_err_save[M]; /* Filter memory */ Word16 sharp_save; /* Sharpening */ Word16 evenSubfr; /* Even subframe indicator */ Word16 T0_sf0 = 0; /* Integer pitch lag of sf0 */ Word16 T0_frac_sf0 = 0; /* Fractional pitch lag of sf0 */ Word16 i_subfr_sf0 = 0; /* Position in exc[] for sf0 */ Word16 gain_pit_sf0; /* Quantized pitch gain for sf0 */ Word16 gain_code_sf0; /* Quantized codebook gain for sf0 */ /* Scalars */ Word16 i_subfr, subfrNr; Word16 T_op[L_FRAME/L_FRAME_BY2]; Word16 T0, T0_frac; Word16 gain_pit, gain_code; /* Flags */ Word16 lsp_flag = 0; /* indicates resonance in LPC filter */ Word16 gp_limit; /* pitch gain limit value */ Word16 vad_flag; /* VAD decision flag */ Word16 compute_sid_flag; /* SID analysis flag */ Copy(new_speech, st->new_speech, L_FRAME); *usedMode = mode; move16 (); /* DTX processing */ if (st->dtx) { /* no test() call since this if is only in simulation env */ /* Find VAD decision */ #ifdef VAD2 vad_flag = vad2 (st->new_speech, st->vadSt); vad_flag = vad2 (st->new_speech+80, st->vadSt) || vad_flag; logic16(); #else vad_flag = vad1(st->vadSt, st->new_speech); #endif fwc (); /* function worst case */ /* NB! usedMode may change here */ compute_sid_flag = tx_dtx_handler(st->dtx_encSt, vad_flag, usedMode); } else { compute_sid_flag = 0; move16 (); } /*------------------------------------------------------------------------* * - Perform LPC analysis: * * * autocorrelation + lag windowing * * * Levinson-durbin algorithm to find a[] * * * convert a[] to lsp[] * * * quantize and code the LSPs * * * find the interpolated LSPs and convert to a[] for all * * subframes (both quantized and unquantized) * *------------------------------------------------------------------------*/ /* LP analysis */ lpc(st->lpcSt, mode, st->p_window, st->p_window_12k2, A_t); fwc (); /* function worst case */ /* From A(z) to lsp. LSP quantization and interpolation */ lsp(st->lspSt, mode, *usedMode, A_t, Aq_t, lsp_new, &ana); fwc (); /* function worst case */ /* Buffer lsp's and energy */ dtx_buffer(st->dtx_encSt, lsp_new, st->new_speech); /* Check if in DTX mode */ test(); if (sub(*usedMode, MRDTX) == 0) { dtx_enc(st->dtx_encSt, compute_sid_flag, st->lspSt->qSt, st->gainQuantSt->gc_predSt, &ana); Set_zero(st->old_exc, PIT_MAX + L_INTERPOL); Set_zero(st->mem_w0, M); Set_zero(st->mem_err, M); Set_zero(st->zero, L_SUBFR); Set_zero(st->hvec, L_SUBFR); /* set to zero "h1[-L_SUBFR..-1]" */ /* Reset lsp states */ lsp_reset(st->lspSt); Copy(lsp_new, st->lspSt->lsp_old, M); Copy(lsp_new, st->lspSt->lsp_old_q, M); /* Reset clLtp states */ cl_ltp_reset(st->clLtpSt); st->sharp = SHARPMIN; move16 (); } else { /* check resonance in the filter */ lsp_flag = check_lsp(st->tonStabSt, st->lspSt->lsp_old); move16 (); } /*----------------------------------------------------------------------* * - Find the weighted input speech w_sp[] for the whole speech frame * * - Find the open-loop pitch delay for first 2 subframes * * - Set the range for searching closed-loop pitch in 1st subframe * * - Find the open-loop pitch delay for last 2 subframes * *----------------------------------------------------------------------*/ #ifdef VAD2 if (st->dtx) { /* no test() call since this if is only in simulation env */ st->vadSt->L_Rmax = 0; move32 (); st->vadSt->L_R0 = 0; move32 (); } #endif for(subfrNr = 0, i_subfr = 0; subfrNr < L_FRAME/L_FRAME_BY2; subfrNr++, i_subfr += L_FRAME_BY2) { /* Pre-processing on 80 samples */ pre_big(mode, gamma1, gamma1_12k2, gamma2, A_t, i_subfr, st->speech, st->mem_w, st->wsp); test (); test (); if ((sub(mode, MR475) != 0) && (sub(mode, MR515) != 0)) { /* Find open loop pitch lag for two subframes */ ol_ltp(st->pitchOLWghtSt, st->vadSt, mode, &st->wsp[i_subfr], &T_op[subfrNr], st->old_lags, st->ol_gain_flg, subfrNr, st->dtx); } } fwc (); /* function worst case */ test (); test(); if ((sub(mode, MR475) == 0) || (sub(mode, MR515) == 0)) { /* Find open loop pitch lag for ONE FRAME ONLY */ /* search on 160 samples */ ol_ltp(st->pitchOLWghtSt, st->vadSt, mode, &st->wsp[0], &T_op[0], st->old_lags, st->ol_gain_flg, 1, st->dtx); T_op[1] = T_op[0]; move16 (); } fwc (); /* function worst case */ #ifdef VAD2 if (st->dtx) { /* no test() call since this if is only in simulation env */ LTP_flag_update(st->vadSt, mode); } #endif #ifndef VAD2 /* run VAD pitch detection */ if (st->dtx) { /* no test() call since this if is only in simulation env */ vad_pitch_detection(st->vadSt, T_op); } #endif fwc (); /* function worst case */ if (sub(*usedMode, MRDTX) == 0) { goto the_end; } /*------------------------------------------------------------------------* * Loop for every subframe in the analysis frame * *------------------------------------------------------------------------* * To find the pitch and innovation parameters. The subframe size is * * L_SUBFR and the loop is repeated L_FRAME/L_SUBFR times. * * - find the weighted LPC coefficients * * - find the LPC residual signal res[] * * - compute the target signal for pitch search * * - compute impulse response of weighted synthesis filter (h1[]) * * - find the closed-loop pitch parameters * * - encode the pitch dealy * * - update the impulse response h1[] by including fixed-gain pitch * * - find target vector for codebook search * * - codebook search * * - encode codebook address * * - VQ of pitch and codebook gains * * - find synthesis speech * * - update states of weighting filter * *------------------------------------------------------------------------*/ A = A_t; /* pointer to interpolated LPC parameters */ Aq = Aq_t; /* pointer to interpolated quantized LPC parameters */ evenSubfr = 0; move16 (); subfrNr = -1; move16 (); for (i_subfr = 0; i_subfr < L_FRAME; i_subfr += L_SUBFR) { subfrNr = add(subfrNr, 1); evenSubfr = sub(1, evenSubfr); /* Save states for the MR475 mode */ test(); test(); if ((evenSubfr != 0) && (sub(*usedMode, MR475) == 0)) { Copy(st->mem_syn, mem_syn_save, M); Copy(st->mem_w0, mem_w0_save, M); Copy(st->mem_err, mem_err_save, M); sharp_save = st->sharp; } /*-----------------------------------------------------------------* * - Preprocessing of subframe * *-----------------------------------------------------------------*/ test(); if (sub(*usedMode, MR475) != 0) { subframePreProc(*usedMode, gamma1, gamma1_12k2, gamma2, A, Aq, &st->speech[i_subfr], st->mem_err, st->mem_w0, st->zero, st->ai_zero, &st->exc[i_subfr], st->h1, xn, res, st->error); } else { /* MR475 */ subframePreProc(*usedMode, gamma1, gamma1_12k2, gamma2, A, Aq, &st->speech[i_subfr], st->mem_err, mem_w0_save, st->zero, st->ai_zero, &st->exc[i_subfr], st->h1, xn, res, st->error); /* save impulse response (modified in cbsearch) */ test (); if (evenSubfr != 0) { Copy (st->h1, h1_sf0, L_SUBFR); } } /* copy the LP residual (res2 is modified in the CL LTP search) */ Copy (res, res2, L_SUBFR); fwc (); /* function worst case */ /*-----------------------------------------------------------------* * - Closed-loop LTP search * *-----------------------------------------------------------------*/ cl_ltp(st->clLtpSt, st->tonStabSt, *usedMode, i_subfr, T_op, st->h1, &st->exc[i_subfr], res2, xn, lsp_flag, xn2, y1, &T0, &T0_frac, &gain_pit, gCoeff, &ana, &gp_limit); /* update LTP lag history */ move16 (); test(); test (); if ((subfrNr == 0) && (st->ol_gain_flg[0] > 0)) { st->old_lags[1] = T0; move16 (); } move16 (); test(); test (); if ((sub(subfrNr, 3) == 0) && (st->ol_gain_flg[1] > 0)) { st->old_lags[0] = T0; move16 (); } fwc (); /* function worst case */ /*-----------------------------------------------------------------* * - Inovative codebook search (find index and gain) * *-----------------------------------------------------------------*/ cbsearch(xn2, st->h1, T0, st->sharp, gain_pit, res2, code, y2, &ana, *usedMode, subfrNr); fwc (); /* function worst case */ /*------------------------------------------------------* * - Quantization of gains. * *------------------------------------------------------*/ gainQuant(st->gainQuantSt, *usedMode, res, &st->exc[i_subfr], code, xn, xn2, y1, y2, gCoeff, evenSubfr, gp_limit, &gain_pit_sf0, &gain_code_sf0, &gain_pit, &gain_code, &ana); fwc (); /* function worst case */ /* update gain history */ update_gp_clipping(st->tonStabSt, gain_pit); test(); if (sub(*usedMode, MR475) != 0) { /* Subframe Post Porcessing */ subframePostProc(st->speech, *usedMode, i_subfr, gain_pit, gain_code, Aq, synth, xn, code, y1, y2, st->mem_syn, st->mem_err, st->mem_w0, st->exc, &st->sharp); } else { test(); if (evenSubfr != 0) { i_subfr_sf0 = i_subfr; move16 (); Copy(xn, xn_sf0, L_SUBFR); Copy(y2, y2_sf0, L_SUBFR); Copy(code, code_sf0, L_SUBFR); T0_sf0 = T0; move16 (); T0_frac_sf0 = T0_frac; move16 (); /* Subframe Post Porcessing */ subframePostProc(st->speech, *usedMode, i_subfr, gain_pit, gain_code, Aq, synth, xn, code, y1, y2, mem_syn_save, st->mem_err, mem_w0_save, st->exc, &st->sharp); st->sharp = sharp_save; move16(); } else { /* update both subframes for the MR475 */ /* Restore states for the MR475 mode */ Copy(mem_err_save, st->mem_err, M); /* re-build excitation for sf 0 */ Pred_lt_3or6(&st->exc[i_subfr_sf0], T0_sf0, T0_frac_sf0, L_SUBFR, 1); Convolve(&st->exc[i_subfr_sf0], h1_sf0, y1, L_SUBFR); Aq -= MP1; subframePostProc(st->speech, *usedMode, i_subfr_sf0, gain_pit_sf0, gain_code_sf0, Aq, synth, xn_sf0, code_sf0, y1, y2_sf0, st->mem_syn, st->mem_err, st->mem_w0, st->exc, &sharp_save); /* overwrites sharp_save */ Aq += MP1; /* re-run pre-processing to get xn right (needed by postproc) */ /* (this also reconstructs the unsharpened h1 for sf 1) */ subframePreProc(*usedMode, gamma1, gamma1_12k2, gamma2, A, Aq, &st->speech[i_subfr], st->mem_err, st->mem_w0, st->zero, st->ai_zero, &st->exc[i_subfr], st->h1, xn, res, st->error); /* re-build excitation sf 1 (changed if lag < L_SUBFR) */ Pred_lt_3or6(&st->exc[i_subfr], T0, T0_frac, L_SUBFR, 1); Convolve(&st->exc[i_subfr], st->h1, y1, L_SUBFR); subframePostProc(st->speech, *usedMode, i_subfr, gain_pit, gain_code, Aq, synth, xn, code, y1, y2, st->mem_syn, st->mem_err, st->mem_w0, st->exc, &st->sharp); } } fwc (); /* function worst case */ A += MP1; /* interpolated LPC parameters for next subframe */ Aq += MP1; } Copy(&st->old_exc[L_FRAME], &st->old_exc[0], PIT_MAX + L_INTERPOL); the_end: /*--------------------------------------------------* * Update signal for next frame. * *--------------------------------------------------*/ Copy(&st->old_wsp[L_FRAME], &st->old_wsp[0], PIT_MAX); Copy(&st->old_speech[L_FRAME], &st->old_speech[0], L_TOTAL - L_FRAME); fwc (); /* function worst case */ return 0; }
bool PixPair::Load( const char *apath, const char *bpath, int order, int bDoG, int r1, int r2, FILE* flog ) { printf( "\n---- Image loading ----\n" ); clock_t t0 = StartTiming(); /* ----------------------------- */ /* Load and sanity check rasters */ /* ----------------------------- */ uint8 *aras, *bras; uint32 wa, ha, wb, hb; int ok = false; aras = Raster8FromAny( apath, wa, ha, flog ); bras = Raster8FromAny( bpath, wb, hb, flog ); if( !aras || !bras ) { fprintf( flog, "PixPair: Picture load failure.\n" ); goto exit; } if( wa != wb || ha != hb ) { fprintf( flog, "PixPair: Nonmatching picture dimensions.\n" ); goto exit; } ok = true; wf = wa; hf = ha; ws = wa; hs = ha; scl = 1; /* -------------- */ /* Resin removal? */ /* -------------- */ //if( dbgCor ) { // ZeroResin( "bloba.tif", aras ); // ZeroResin( "blobb.tif", bras ); //} //else { // ZeroResin( NULL, aras ); // ZeroResin( NULL, bras ); //} //StopTiming( flog, "Resin removal", t0 ); /* ------- */ /* Flatten */ /* ------- */ LegPolyFlatten( _avf, aras, wf, hf, order ); RasterFree( aras ); LegPolyFlatten( _bvf, bras, wf, hf, order ); RasterFree( bras ); avs_vfy = avs_aln = avf_vfy = avf_aln = &_avf; bvs_vfy = bvs_aln = bvf_vfy = bvf_aln = &_bvf; /* ------------- */ /* Apply filters */ /* ------------- */ //{ // vector<CD> kfft; // double K[] = { // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; // Convolve( _avf, _avf, wf, hf, K, 11, 11, true, true, kfft ); // Normalize( _avf ); // Convolve( _bvf, _bvf, wf, hf, K, 11, 11, true, true, kfft ); // Normalize( _bvf ); //} #if 0 { vector<CD> kfft; double K[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; Convolve( _avfflt, _avf, wf, hf, K, 11, 11, true, true, kfft ); Normalize( _avfflt ); Convolve( _bvfflt, _bvf, wf, hf, K, 11, 11, true, true, kfft ); Normalize( _bvfflt ); avs_aln = avf_aln = &_avfflt; bvs_aln = bvf_aln = &_bvfflt; bDoG = true; } #endif if( bDoG ) { vector<double> DoG; vector<CD> kfft; int dim = MakeDoGKernel( DoG, r1, r2, flog ); Convolve( _avfflt, _avf, wf, hf, &DoG[0], dim, dim, true, true, kfft ); Normalize( _avfflt ); Convolve( _bvfflt, _bvf, wf, hf, &DoG[0], dim, dim, true, true, kfft ); Normalize( _bvfflt ); avs_aln = avf_aln = &_avfflt; bvs_aln = bvf_aln = &_bvfflt; } /* --------------------- */ /* Downsample all images */ /* --------------------- */ if( ws > 2048 || hs >= 2048 ) { do { ws /= 2; hs /= 2; scl *= 2; } while( ws > 2048 || hs > 2048 ); fprintf( flog, "PixPair: Scaling by %d\n", scl ); if( ws * scl != wf || hs * scl != hf ) { fprintf( flog, "PixPair: Dimensions not multiple of scale!\n" ); goto exit; } Downsample( _avs, _avf ); Downsample( _bvs, _bvf ); avs_vfy = avs_aln = &_avs; bvs_vfy = bvs_aln = &_bvs; if( bDoG ) { if( _avfflt.size() ) { Downsample( _avsflt, _avfflt ); avs_aln = &_avsflt; } if( _bvfflt.size() ) { Downsample( _bvsflt, _bvfflt ); bvs_aln = &_bvsflt; } } } else fprintf( flog, "PixPair: Using image scale=1.\n" ); /* ------------------------------ */ /* Write DoG images for debugging */ /* ------------------------------ */ #if 0 if( bDoG ) { VectorDblToTif8( "DoGa.tif", avs_aln, ws, hs ); VectorDblToTif8( "DoGb.tif", bvs_aln, ws, hs ); } #endif /* -------- */ /* Clean up */ /* -------- */ exit: if( aras ) RasterFree( aras ); if( bras ) RasterFree( bras ); StopTiming( flog, "Image conditioning", t0 ); return ok; }
Feature HOGFeatureExtractor::operator()(const CByteImage& img_) const { /******** BEGIN TODO ********/ // Compute the Histogram of Oriented Gradients feature // Steps are: // 1) Compute gradients in x and y directions. We provide the // derivative kernel proposed in the paper in _kernelDx and // _kernelDy. // 2) Compute gradient magnitude and orientation // 3) Add contribution each pixel to HOG cells whose // support overlaps with pixel. Each cell has a support of size // _cellSize and each histogram has _nAngularBins. // 4) Normalize HOG for each cell. One simple strategy that is // is also used in the SIFT descriptor is to first threshold // the bin values so that no bin value is larger than some // threshold (we leave it up to you do find this value) and // then re-normalize the histogram so that it has norm 1. A more // elaborate normalization scheme is proposed in Dalal & Triggs // paper but we leave that as extra credit. // // Useful functions: // convertRGB2GrayImage, TypeConvert, WarpGlobal, Convolve int xCells = ceil(1.*img_.Shape().width / _cellSize); int yCells = ceil(1.*img_.Shape().height / _cellSize); CFloatImage HOGHist(xCells, yCells, _nAngularBins); HOGHist.ClearPixels(); CByteImage gray(img_.Shape()); CFloatImage grayF(img_.Shape().width, img_.Shape().height, 1); convertRGB2GrayImage(img_, gray); TypeConvert(gray, grayF); CFloatImage diffX( img_.Shape()), diffY( img_.Shape()); Convolve(grayF, diffX, _kernelDx); Convolve(grayF, diffY, _kernelDy); CFloatImage grad(grayF.Shape()), grad2(grayF.Shape()); CFloatImage angl(grayF.Shape()), angl2(grayF.Shape()); for (int y = 0; y <grayF.Shape().height; y++){ for (int x = 0; x<grayF.Shape().width; x++) { grad2.Pixel(x,y,0) = (diffX.Pixel(x,y,0) * diffX.Pixel(x,y,0) + diffY.Pixel(x,y,0) * diffY.Pixel(x,y,0)); angl2.Pixel(x,y,0) = atan(diffY.Pixel(x,y,0) / abs(diffY.Pixel(x,y,0))); } } // Bilinear Filter ConvolveSeparable(grad2, grad, ConvolveKernel_121,ConvolveKernel_121,1); ConvolveSeparable(angl2, angl, ConvolveKernel_121,ConvolveKernel_121,1); //WriteFile(diffX, "angle.tga"); //WriteFile(diffY, "angleG.tga"); for (int y = 0; y <grayF.Shape().height; y++){ for (int x = 0; x<grayF.Shape().width; x++) { // Fit in the bins int a = angl.Pixel(x,y,0) / 3.14 * (_nAngularBins) + _nAngularBins/2; // Histogram HOGHist.Pixel(floor(1.*x / _cellSize), floor(1.*y / _cellSize), a) += grad.Pixel(x,y,0); } } // Normalization float threshold = 0.7; for (int y = 0; y < yCells; y++){ for (int x = 0; x < xCells; x++){ float total = 0; for (int a = 0; a < _nAngularBins; a++) { if (HOGHist.Pixel(x,y,a) > threshold) HOGHist.Pixel(x,y,a) = threshold; // Sum for normalization total += HOGHist.Pixel(x,y,a); } for (int a = 0;a< _nAngularBins; a++) { HOGHist.Pixel(x,y,a) /= total; } } } return HOGHist; /******** END TODO ********/ }
CFloatImage SupportVectorMachine::predictSlidingWindow(const Feature& feat) const { CFloatImage score(CShape(feat.Shape().width,feat.Shape().height,1)); score.ClearPixels(); /******** BEGIN TODO ********/ // Sliding window prediction. // // In this project we are using a linear SVM. This means that // it's classification function is very simple, consisting of a // dot product of the feature vector with a set of weights learned // during training, followed by a subtraction of a bias term // // pred <- dot(feat, weights) - bias term // // Now this is very simple to compute when we are dealing with // cropped images, our computed features have the same dimensions // as the SVM weights. Things get a little more tricky when you // want to evaluate this function over all possible subwindows of // a larger feature, one that we would get by running our feature // extraction on an entire image. // // Here you will evaluate the above expression by breaking // the dot product into a series of convolutions (remember that // a convolution can be though of as a point wise dot product with // the convolution kernel), each one with a different band. // // Convolve each band of the SVM weights with the corresponding // band in feat, and add the resulting score image. The final // step is to subtract the SVM bias term given by this->getBiasTerm(). // // Hint: you might need to set the origin for the convolution kernel // in order to get the result from convoltion to be correctly centered // // Useful functions: // Convolve, BandSelect, this->getWeights(), this->getBiasTerm() //printf("TODO: SupportVectorMachine.cpp:273\n"); exit(EXIT_FAILURE); Feature weights = getWeights(); for (int b=0; b<feat.Shape().nBands; b++){ CFloatImage currentBandWeights = CFloatImage(weights.Shape().width, weights.Shape().height, 1); CFloatImage currentBandFeatures = CFloatImage(feat.Shape().width, feat.Shape().height, 1); CFloatImage convolved = CFloatImage(CShape(feat.Shape().width, feat.Shape().height, 1)); CFloatImage final(CShape(feat.Shape().width, feat.Shape().height, 1)); BandSelect(weights, currentBandWeights, b, 0); BandSelect(feat, currentBandFeatures, b, 0); currentBandWeights.origin[0] = weights.origin[0]; currentBandWeights.origin[1] = weights.origin[1]; Convolve(feat, convolved, currentBandWeights); BandSelect(convolved, final, b, 0); try{ score += final; } catch (CError err) { printf("OH NOES: the final chapter!"); } } score-=getBiasTerm(); /******** END TODO ********/ return score; }
void ConvolveSeparable(CImageOf<T> src, CImageOf<T>& dst, CFloatImage x_kernel, CFloatImage y_kernel, float scale, float offset, int decimate, int interpolate) { // Allocate the result, if necessary CShape dShape = src.Shape(); if (decimate > 1) { dShape.width = (dShape.width + decimate-1) / decimate; dShape.height = (dShape.height + decimate-1) / decimate; } dst.ReAllocate(dShape, false); // Allocate the intermediate images CImageOf<T> tmpImg1(src.Shape()); //CImageOf<T> tmpImgCUDA(src.Shape()); CImageOf<T> tmpImg2(src.Shape()); // Create a proper vertical convolution kernel CFloatImage v_kernel(1, y_kernel.Shape().width, 1); for (int k = 0; k < y_kernel.Shape().width; k++) v_kernel.Pixel(0, k, 0) = y_kernel.Pixel(k, 0, 0); v_kernel.origin[1] = y_kernel.origin[0]; #ifdef RUN_ON_GPU // Modifications for integrating CUDA kernels BinomialFilterType type; profilingTimer->startTimer(); // CUDA Convolve switch (x_kernel.Shape().width) { case 3: type = BINOMIAL6126; break; case 5: type = BINOMIAL14641; break; default: // Unsupported kernel case throw CError("Convolution kernel Unknown"); assert(false); } // Skip copy if decimation is not required if (decimate != 1) CudaConvolveXY(src, tmpImg2, type); else CudaConvolveXY(src, dst, type); printf("\nGPU convolution time = %f ms\n", profilingTimer->stopAndGetTimerValue()); #else profilingTimer->startTimer(); //VerifyComputedData(&tmpImg2.Pixel(0, 0, 0), &tmpImgCUDA.Pixel(0, 0, 0), 7003904); // Perform the two convolutions Convolve(src, tmpImg1, x_kernel, 1.0f, 0.0f); Convolve(tmpImg1, tmpImg2, v_kernel, scale, offset); printf("\nCPU Convolution time = %f ms\n", profilingTimer->stopAndGetTimerValue()); #endif profilingTimer->startTimer(); // Downsample or copy // Skip decimate and recopy if not required #ifdef RUN_ON_GPU if (decimate != 1) { #endif for (int y = 0; y < dShape.height; y++) { T* sPtr = &tmpImg2.Pixel(0, y * decimate, 0); T* dPtr = &dst.Pixel(0, y, 0); int nB = dShape.nBands; for (int x = 0; x < dShape.width; x++) { for (int b = 0; b < nB; b++) dPtr[b] = sPtr[b]; sPtr += decimate * nB; dPtr += nB; } } #ifdef RUN_ON_GPU } #endif printf("\nDecimate/Recopy took = %f ms\n", profilingTimer->stopAndGetTimerValue()); }
void Coder_ld8h( Word16 ana[], /* (o) : analysis parameters */ Word16 rate /* input : rate selector/frame =0 6.4kbps , =1 8kbps,= 2 11.8 kbps*/ ) { /* LPC analysis */ Word16 r_l_fwd[MP1], r_h_fwd[MP1]; /* Autocorrelations low and hi (forward) */ Word32 r_bwd[M_BWDP1]; /* Autocorrelations (backward) */ Word16 r_l_bwd[M_BWDP1]; /* Autocorrelations low (backward) */ Word16 r_h_bwd[M_BWDP1]; /* Autocorrelations high (backward) */ Word16 rc_fwd[M]; /* Reflection coefficients : forward analysis */ Word16 rc_bwd[M_BWD]; /* Reflection coefficients : backward analysis */ Word16 A_t_fwd[MP1*2]; /* A(z) forward unquantized for the 2 subframes */ Word16 A_t_fwd_q[MP1*2]; /* A(z) forward quantized for the 2 subframes */ Word16 A_t_bwd[2*M_BWDP1]; /* A(z) backward for the 2 subframes */ Word16 *Aq; /* A(z) "quantized" for the 2 subframes */ Word16 *Ap; /* A(z) "unquantized" for the 2 subframes */ Word16 *pAp, *pAq; Word16 Ap1[M_BWDP1]; /* A(z) with spectral expansion */ Word16 Ap2[M_BWDP1]; /* A(z) with spectral expansion */ Word16 lsp_new[M], lsp_new_q[M]; /* LSPs at 2th subframe */ Word16 lsf_int[M]; /* Interpolated LSF 1st subframe. */ Word16 lsf_new[M]; Word16 lp_mode; /* Backward / Forward Indication mode */ Word16 m_ap, m_aq, i_gamma; Word16 code_lsp[2]; /* Other vectors */ Word16 h1[L_SUBFR]; /* Impulse response h1[] */ Word16 xn[L_SUBFR]; /* Target vector for pitch search */ Word16 xn2[L_SUBFR]; /* Target vector for codebook search */ Word16 code[L_SUBFR]; /* Fixed codebook excitation */ Word16 y1[L_SUBFR]; /* Filtered adaptive excitation */ Word16 y2[L_SUBFR]; /* Filtered fixed codebook excitation */ Word16 g_coeff[4]; /* Correlations between xn & y1 */ Word16 res2[L_SUBFR]; /* residual after long term prediction*/ Word16 g_coeff_cs[5]; Word16 exp_g_coeff_cs[5]; /* Correlations between xn, y1, & y2 <y1,y1>, -2<xn,y1>, <y2,y2>, -2<xn,y2>, 2<y1,y2> */ /* Scalars */ Word16 i, j, k, i_subfr; Word16 T_op, T0, T0_min, T0_max, T0_frac; Word16 gain_pit, gain_code, index; Word16 taming, pit_sharp; Word16 sat_filter; Word32 L_temp; Word16 freq_cur[M]; Word16 temp; /*------------------------------------------------------------------------* * - Perform LPC analysis: * * * autocorrelation + lag windowing * * * Levinson-durbin algorithm to find a[] * * * convert a[] to lsp[] * * * quantize and code the LSPs * * * find the interpolated LSPs and convert to a[] for the 2 * * subframes (both quantized and unquantized) * *------------------------------------------------------------------------*/ /* ------------------- */ /* LP Forward analysis */ /* ------------------- */ Autocorr(p_window, M, r_h_fwd, r_l_fwd); /* Autocorrelations */ Lag_window(M, r_h_fwd, r_l_fwd); /* Lag windowing */ Levinsone(M, r_h_fwd, r_l_fwd, &A_t_fwd[MP1], rc_fwd, old_A_fwd, old_rc_fwd); /* Levinson Durbin */ Az_lsp(&A_t_fwd[MP1], lsp_new, lsp_old); /* From A(z) to lsp */ /* -------------------- */ /* LP Backward analysis */ /* -------------------- */ /* -------------------- */ /* LP Backward analysis */ /* -------------------- */ if ( rate== G729E) { /* LPC recursive Window as in G728 */ autocorr_hyb_window(synth, r_bwd, rexp); /* Autocorrelations */ Lag_window_bwd(r_bwd, r_h_bwd, r_l_bwd); /* Lag windowing */ /* Fixed Point Levinson (as in G729) */ Levinsone(M_BWD, r_h_bwd, r_l_bwd, &A_t_bwd[M_BWDP1], rc_bwd, old_A_bwd, old_rc_bwd); /* Tests saturation of A_t_bwd */ sat_filter = 0; for (i=M_BWDP1; i<2*M_BWDP1; i++) if (A_t_bwd[i] >= 32767) sat_filter = 1; if (sat_filter == 1) Copy(A_t_bwd_mem, &A_t_bwd[M_BWDP1], M_BWDP1); else Copy(&A_t_bwd[M_BWDP1], A_t_bwd_mem, M_BWDP1); /* Additional bandwidth expansion on backward filter */ Weight_Az(&A_t_bwd[M_BWDP1], GAMMA_BWD, M_BWD, &A_t_bwd[M_BWDP1]); } /*--------------------------------------------------* * Update synthesis signal for next frame. * *--------------------------------------------------*/ Copy(&synth[L_FRAME], &synth[0], MEM_SYN_BWD); /*--------------------------------------------------------------------* * Find interpolated LPC parameters in all subframes (unquantized). * * The interpolated parameters are in array A_t[] of size (M+1)*4 * *--------------------------------------------------------------------*/ if( prev_lp_mode == 0) { Int_lpc(lsp_old, lsp_new, lsf_int, lsf_new, A_t_fwd); } else { /* no interpolation */ /* unquantized */ Lsp_Az(lsp_new, A_t_fwd); /* Subframe 1 */ Lsp_lsf(lsp_new, lsf_new, M); /* transformation from LSP to LSF (freq.domain) */ Copy(lsf_new, lsf_int, M); /* Subframe 1 */ } /* ---------------- */ /* LSP quantization */ /* ---------------- */ Qua_lspe(lsp_new, lsp_new_q, code_lsp, freq_prev, freq_cur); /*--------------------------------------------------------------------* * Find interpolated LPC parameters in all subframes (quantized) * * the quantized interpolated parameters are in array Aq_t[] * *--------------------------------------------------------------------*/ if( prev_lp_mode == 0) { Int_qlpc(lsp_old_q, lsp_new_q, A_t_fwd_q); } else { /* no interpolation */ Lsp_Az(lsp_new_q, &A_t_fwd_q[MP1]); /* Subframe 2 */ Copy(&A_t_fwd_q[MP1], A_t_fwd_q, MP1); /* Subframe 1 */ } /*---------------------------------------------------------------------* * - Decision for the switch Forward / Backward * *---------------------------------------------------------------------*/ if(rate == G729E) { set_lpc_modeg(speech, A_t_fwd_q, A_t_bwd, &lp_mode, lsp_new, lsp_old, &bwd_dominant, prev_lp_mode, prev_filter, &C_int, &glob_stat, &stat_bwd, &val_stat_bwd); } else { update_bwd( &lp_mode, &bwd_dominant, &C_int, &glob_stat); } /* ---------------------------------- */ /* update the LSPs for the next frame */ /* ---------------------------------- */ Copy(lsp_new, lsp_old, M); /*----------------------------------------------------------------------* * - Find the weighted input speech w_sp[] for the whole speech frame * *----------------------------------------------------------------------*/ if(lp_mode == 0) { m_ap = M; if (bwd_dominant == 0) Ap = A_t_fwd; else Ap = A_t_fwd_q; perc_var(gamma1, gamma2, lsf_int, lsf_new, rc_fwd); } else { if (bwd_dominant == 0) { m_ap = M; Ap = A_t_fwd; } else { m_ap = M_BWD; Ap = A_t_bwd; } perc_vare(gamma1, gamma2, bwd_dominant); } pAp = Ap; for (i=0; i<2; i++) { Weight_Az(pAp, gamma1[i], m_ap, Ap1); Weight_Az(pAp, gamma2[i], m_ap, Ap2); Residue(m_ap, Ap1, &speech[i*L_SUBFR], &wsp[i*L_SUBFR], L_SUBFR); Syn_filte(m_ap, Ap2, &wsp[i*L_SUBFR], &wsp[i*L_SUBFR], L_SUBFR, &mem_w[M_BWD-m_ap], 0); for(j=0; j<M_BWD; j++) mem_w[j] = wsp[i*L_SUBFR+L_SUBFR-M_BWD+j]; pAp += m_ap+1; } *ana++ = rate+ (Word16)2; /* bit rate mode */ if(lp_mode == 0) { m_aq = M; Aq = A_t_fwd_q; /* update previous filter for next frame */ Copy(&Aq[MP1], prev_filter, MP1); for(i=MP1; i <M_BWDP1; i++) prev_filter[i] = 0; for(j=MP1; j<M_BWDP1; j++) ai_zero[j] = 0; } else { m_aq = M_BWD; Aq = A_t_bwd; if (bwd_dominant == 0) { for(j=MP1; j<M_BWDP1; j++) ai_zero[j] = 0; } /* update previous filter for next frame */ Copy(&Aq[M_BWDP1], prev_filter, M_BWDP1); } if (rate == G729E) *ana++ = lp_mode; /*----------------------------------------------------------------------* * - Find the weighted input speech w_sp[] for the whole speech frame * * - Find the open-loop pitch delay * *----------------------------------------------------------------------*/ if( lp_mode == 0) { Copy(lsp_new_q, lsp_old_q, M); Lsp_prev_update(freq_cur, freq_prev); *ana++ = code_lsp[0]; *ana++ = code_lsp[1]; } /* Find open loop pitch lag */ T_op = Pitch_ol(wsp, PIT_MIN, PIT_MAX, L_FRAME); /* Range for closed loop pitch search in 1st subframe */ T0_min = sub(T_op, 3); if (sub(T0_min,PIT_MIN)<0) { T0_min = PIT_MIN; } T0_max = add(T0_min, 6); if (sub(T0_max ,PIT_MAX)>0) { T0_max = PIT_MAX; T0_min = sub(T0_max, 6); } /*------------------------------------------------------------------------* * Loop for every subframe in the analysis frame * *------------------------------------------------------------------------* * To find the pitch and innovation parameters. The subframe size is * * L_SUBFR and the loop is repeated 2 times. * * - find the weighted LPC coefficients * * - find the LPC residual signal res[] * * - compute the target signal for pitch search * * - compute impulse response of weighted synthesis filter (h1[]) * * - find the closed-loop pitch parameters * * - encode the pitch delay * * - update the impulse response h1[] by including fixed-gain pitch * * - find target vector for codebook search * * - codebook search * * - encode codebook address * * - VQ of pitch and codebook gains * * - find synthesis speech * * - update states of weighting filter * *------------------------------------------------------------------------*/ pAp = Ap; /* pointer to interpolated "unquantized"LPC parameters */ pAq = Aq; /* pointer to interpolated "quantized" LPC parameters */ i_gamma = 0; for (i_subfr = 0; i_subfr < L_FRAME; i_subfr += L_SUBFR) { /*---------------------------------------------------------------* * Find the weighted LPC coefficients for the weighting filter. * *---------------------------------------------------------------*/ Weight_Az(pAp, gamma1[i_gamma], m_ap, Ap1); Weight_Az(pAp, gamma2[i_gamma], m_ap, Ap2); /*---------------------------------------------------------------* * Compute impulse response, h1[], of weighted synthesis filter * *---------------------------------------------------------------*/ for (i = 0; i <=m_ap; i++) ai_zero[i] = Ap1[i]; Syn_filte(m_aq, pAq, ai_zero, h1, L_SUBFR, zero, 0); Syn_filte(m_ap, Ap2, h1, h1, L_SUBFR, zero, 0); /*------------------------------------------------------------------------* * * * Find the target vector for pitch search: * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * * * |------| res[n] * * speech[n]---| A(z) |-------- * * |------| | |--------| error[n] |------| * * zero -- (-)--| 1/A(z) |-----------| W(z) |-- target * * exc |--------| |------| * * * * Instead of subtracting the zero-input response of filters from * * the weighted input speech, the above configuration is used to * * compute the target vector. This configuration gives better performance * * with fixed-point implementation. The memory of 1/A(z) is updated by * * filtering (res[n]-exc[n]) through 1/A(z), or simply by subtracting * * the synthesis speech from the input speech: * * error[n] = speech[n] - syn[n]. * * The memory of W(z) is updated by filtering error[n] through W(z), * * or more simply by subtracting the filtered adaptive and fixed * * codebook excitations from the target: * * target[n] - gain_pit*y1[n] - gain_code*y2[n] * * as these signals are already available. * * * *------------------------------------------------------------------------*/ Residue(m_aq, pAq, &speech[i_subfr], &exc[i_subfr], L_SUBFR); /* LPC residual */ for (i=0; i<L_SUBFR; i++) res2[i] = exc[i_subfr+i]; Syn_filte(m_aq, pAq, &exc[i_subfr], error, L_SUBFR, &mem_err[M_BWD-m_aq], 0); Residue(m_ap, Ap1, error, xn, L_SUBFR); Syn_filte(m_ap, Ap2, xn, xn, L_SUBFR, &mem_w0[M_BWD-m_ap], 0); /* target signal xn[]*/ /*----------------------------------------------------------------------* * Closed-loop fractional pitch search * *----------------------------------------------------------------------*/ T0 = Pitch_fr3cp(&exc[i_subfr], xn, h1, L_SUBFR, T0_min, T0_max, i_subfr, &T0_frac, rate); index = Enc_lag3cp(T0, T0_frac, &T0_min, &T0_max,PIT_MIN,PIT_MAX, i_subfr, rate); *ana++ = index; if ( (i_subfr == 0) && (rate != G729D) ) { *ana = Parity_Pitch(index); if( rate == G729E) { *ana ^= (shr(index, 1) & 0x0001); } ana++; } /*-----------------------------------------------------------------* * - find unity gain pitch excitation (adaptive codebook entry) * * with fractional interpolation. * * - find filtered pitch exc. y1[]=exc[] convolve with h1[]) * * - compute pitch gain and limit between 0 and 1.2 * * - update target vector for codebook search * * - find LTP residual. * *-----------------------------------------------------------------*/ Pred_lt_3(&exc[i_subfr], T0, T0_frac, L_SUBFR); Convolve(&exc[i_subfr], h1, y1, L_SUBFR); gain_pit = G_pitch(xn, y1, g_coeff, L_SUBFR); /* clip pitch gain if taming is necessary */ taming = test_err(T0, T0_frac); if( taming == 1){ if (sub(gain_pit, GPCLIP) > 0) { gain_pit = GPCLIP; } } /* xn2[i] = xn[i] - y1[i] * gain_pit */ for (i = 0; i < L_SUBFR; i++) { L_temp = L_mult(y1[i], gain_pit); L_temp = L_shl(L_temp, 1); /* gain_pit in Q14 */ xn2[i] = sub(xn[i], extract_h(L_temp)); } /*-----------------------------------------------------* * - Innovative codebook search. * *-----------------------------------------------------*/ switch (rate) { case G729: /* 8 kbit/s */ { /* case 8 kbit/s */ index = ACELP_Codebook(xn2, h1, T0, sharp, i_subfr, code, y2, &i); *ana++ = index; /* Positions index */ *ana++ = i; /* Signs index */ break; } case G729D: /* 6.4 kbit/s */ { index = ACELP_CodebookD(xn2, h1, T0, sharp, code, y2, &i); *ana++ = index; /* Positions index */ *ana++ = i; /* Signs index */ break; } case G729E: /* 11.8 kbit/s */ { /*-----------------------------------------------------------------* * Include fixed-gain pitch contribution into impulse resp. h[] * *-----------------------------------------------------------------*/ pit_sharp = shl(sharp, 1); /* From Q14 to Q15 */ if(T0 < L_SUBFR) { for (i = T0; i < L_SUBFR; i++){ /* h[i] += pitch_sharp*h[i-T0] */ h1[i] = add(h1[i], mult(h1[i-T0], pit_sharp)); } } /* calculate residual after long term prediction */ /* res2[i] -= exc[i+i_subfr] * gain_pit */ for (i = 0; i < L_SUBFR; i++) { L_temp = L_mult(exc[i+i_subfr], gain_pit); L_temp = L_shl(L_temp, 1); /* gain_pit in Q14 */ res2[i] = sub(res2[i], extract_h(L_temp)); } if (lp_mode == 0) ACELP_10i40_35bits(xn2, res2, h1, code, y2, ana); /* Forward */ else ACELP_12i40_44bits(xn2, res2, h1, code, y2, ana); /* Backward */ ana += 5; /*-----------------------------------------------------------------* * Include fixed-gain pitch contribution into code[]. * *-----------------------------------------------------------------*/ if(T0 < L_SUBFR) { for (i = T0; i < L_SUBFR; i++) { /* code[i] += pitch_sharp*code[i-T0] */ code[i] = add(code[i], mult(code[i-T0], pit_sharp)); } } break; } default : { printf("Unrecognized bit rate\n"); exit(-1); } } /* end of switch */ /*-----------------------------------------------------* * - Quantization of gains. * *-----------------------------------------------------*/ g_coeff_cs[0] = g_coeff[0]; /* <y1,y1> */ exp_g_coeff_cs[0] = negate(g_coeff[1]); /* Q-Format:XXX -> JPN */ g_coeff_cs[1] = negate(g_coeff[2]); /* (xn,y1) -> -2<xn,y1> */ exp_g_coeff_cs[1] = negate(add(g_coeff[3], 1)); /* Q-Format:XXX -> JPN */ Corr_xy2( xn, y1, y2, g_coeff_cs, exp_g_coeff_cs ); /* Q0 Q0 Q12 ^Qx ^Q0 */ /* g_coeff_cs[3]:exp_g_coeff_cs[3] = <y2,y2> */ /* g_coeff_cs[4]:exp_g_coeff_cs[4] = -2<xn,y2> */ /* g_coeff_cs[5]:exp_g_coeff_cs[5] = 2<y1,y2> */ if (rate == G729D) index = Qua_gain_6k(code, g_coeff_cs, exp_g_coeff_cs, L_SUBFR, &gain_pit, &gain_code, taming, past_qua_en); else index = Qua_gain_8k(code, g_coeff_cs, exp_g_coeff_cs, L_SUBFR, &gain_pit, &gain_code, taming, past_qua_en); *ana++ = index; /*------------------------------------------------------------* * - Update pitch sharpening "sharp" with quantized gain_pit * *------------------------------------------------------------*/ sharp = gain_pit; if (sub(sharp, SHARPMAX) > 0) sharp = SHARPMAX; else { if (sub(sharp, SHARPMIN) < 0) sharp = SHARPMIN; } /*------------------------------------------------------* * - Find the total excitation * * - find synthesis speech corresponding to exc[] * * - update filters memories for finding the target * * vector in the next subframe * * (update error[-m..-1] and mem_w_err[]) * * update error function for taming process * *------------------------------------------------------*/ for (i = 0; i < L_SUBFR; i++) { /* exc[i] = gain_pit*exc[i] + gain_code*code[i]; */ /* exc[i] in Q0 gain_pit in Q14 */ /* code[i] in Q13 gain_cod in Q1 */ L_temp = L_mult(exc[i+i_subfr], gain_pit); L_temp = L_mac(L_temp, code[i], gain_code); L_temp = L_shl(L_temp, 1); exc[i+i_subfr] = round(L_temp); } update_exc_err(gain_pit, T0); Syn_filte(m_aq, pAq, &exc[i_subfr], &synth_ptr[i_subfr], L_SUBFR, &mem_syn[M_BWD-m_aq], 0); for(j=0; j<M_BWD; j++) mem_syn[j] = synth_ptr[i_subfr+L_SUBFR-M_BWD+j]; for (i = L_SUBFR-M_BWD, j = 0; i < L_SUBFR; i++, j++) { mem_err[j] = sub(speech[i_subfr+i], synth_ptr[i_subfr+i]); temp = extract_h(L_shl( L_mult(y1[i], gain_pit), 1) ); k = extract_h(L_shl( L_mult(y2[i], gain_code), 2) ); mem_w0[j] = sub(xn[i], add(temp, k)); } pAp += m_ap+1; pAq += m_aq+1; i_gamma = add(i_gamma,1); } /*--------------------------------------------------* * Update signal for next frame. * * -> shift to the left by L_FRAME: * * speech[], wsp[] and exc[] * *--------------------------------------------------*/ Copy(&old_speech[L_FRAME], &old_speech[0], L_TOTAL-L_FRAME); Copy(&old_wsp[L_FRAME], &old_wsp[0], PIT_MAX); Copy(&old_exc[L_FRAME], &old_exc[0], PIT_MAX+L_INTERPOL); prev_lp_mode = lp_mode; return; }
// Compute MOPs descriptors. void ComputeMOPSDescriptors(CFloatImage &image, FeatureSet &features) { int w = image.Shape().width; // image width int h = image.Shape().height; // image height // Create grayscale image used for Harris detection CFloatImage grayImage=ConvertToGray(image); // Apply a 7x7 gaussian blur to the grayscale image CFloatImage blurImage(w,h,1); Convolve(grayImage, blurImage, ConvolveKernel_7x7); // Transform matrices CTransform3x3 xform; CTransform3x3 trans1; CTransform3x3 rotate; CTransform3x3 scale; CTransform3x3 trans2; // Declare additional variables float pxl; // pixel value double mean, sq_sum, stdev; // variables for normailizing data set // This image represents the window around the feature you need to compute to store as the feature descriptor const int windowSize = 8; CFloatImage destImage(windowSize, windowSize, 1); for (vector<Feature>::iterator i = features.begin(); i != features.end(); i++) { Feature &f = *i; // Compute the transform from each pixel in the 8x8 image to sample from the appropriate // pixels in the 40x40 rotated window surrounding the feature trans1 = CTransform3x3::Translation(f.x, f.y); // translate window to feature point rotate = CTransform3x3::Rotation(f.angleRadians * 180.0 / PI); // rotate window by angle scale = CTransform3x3::Scale(5.0); // scale window by 5 trans2 = CTransform3x3::Translation(-windowSize/2, -windowSize/2); // translate window to origin // transform resulting from combining above transforms xform = trans1*scale*rotate*trans2; //Call the Warp Global function to do the mapping WarpGlobal(blurImage, destImage, xform, eWarpInterpLinear); // Resize data field for a 8x8 square window f.data.resize(windowSize * windowSize); // Find mean of window mean = 0; for (int y = 0; y < windowSize; y++) { for (int x = 0; x < windowSize; x++) { pxl = destImage.Pixel(x, y, 0); f.data[y*windowSize + x] = pxl; mean += pxl/(windowSize*windowSize); } } // Find standard deviation of window sq_sum = 0; for (int k = 0; k < windowSize*windowSize; k++) { sq_sum += (mean - f.data[k]) * (mean - f.data[k]); } stdev = sqrt(sq_sum/(windowSize*windowSize)); // Normalize window to have 0 mean and unit variance by subtracting // by mean and dividing by standard deviation for (int k = 0; k < windowSize*windowSize; k++) { f.data[k] = (f.data[k]-mean)/stdev; } } }
static void Norm_Corr(Word16 exc[], Word16 xn[], Word16 h[], Word16 L_subfr, Word16 t_min, Word16 t_max, Word16 corr_norm[]) { Word16 i,j,k; Word16 corr_h, corr_l, norm_h, norm_l; Word32 s, L_temp; Word16 excf[L_SUBFR]; Word16 scaling, h_fac, *s_excf, scaled_excf[L_SUBFR]; k = negate(t_min); /* compute the filtered excitation for the first delay t_min */ Convolve(&exc[k], h, excf, L_subfr); /* scaled "excf[]" to avoid overflow */ for(j=0; j<L_subfr; j++) scaled_excf[j] = shr(excf[j], 2); /* Compute energy of excf[] for danger of overflow */ s = 0; for (j = 0; j < L_subfr; j++) s = L_mac(s, excf[j], excf[j]); L_temp = L_sub(s, 67108864L); if (L_temp <= 0L) /* if (s <= 2^26) */ { s_excf = excf; h_fac = 15-12; /* h in Q12 */ scaling = 0; } else { s_excf = scaled_excf; /* "excf[]" is divide by 2 */ h_fac = 15-12-2; /* h in Q12, divide by 2 */ scaling = 2; } /* loop for every possible period */ for (i = t_min; i <= t_max; i++) { /* Compute 1/sqrt(energy of excf[]) */ s = 0; for (j = 0; j < L_subfr; j++) s = L_mac(s, s_excf[j], s_excf[j]); s = Inv_sqrt(s); /* Result in Q30 */ L_Extract(s, &norm_h, &norm_l); /* Compute correlation between xn[] and excf[] */ s = 0; for (j = 0; j < L_subfr; j++) s = L_mac(s, xn[j], s_excf[j]); L_Extract(s, &corr_h, &corr_l); /* Normalize correlation = correlation * (1/sqrt(energy)) */ s = Mpy_32(corr_h, corr_l, norm_h, norm_l); corr_norm[i] = extract_h(L_shl(s, 16)); /* Result is on 16 bits */ /* modify the filtered excitation excf[] for the next iteration */ if( sub(i, t_max) != 0) { k=sub(k,1); for (j = L_subfr-(Word16)1; j > 0; j--) { s = L_mult(exc[k], h[j]); s = L_shl(s, h_fac); /* h is in Q(12-scaling) */ s_excf[j] = add(extract_h(s), s_excf[j-1]); } s_excf[0] = shr(exc[k], scaling); } } return; }
void SupportVectorMachine::predictSlidingWindow(const Feature &feat, CFloatImage &response) const { response.ReAllocate(CShape(feat.Shape().width, feat.Shape().height, 1)); response.ClearPixels(); /******** BEGIN TODO ********/ // Sliding window prediction. // // In this project we are using a linear SVM. This means that // it's classification function is very simple, consisting of a // dot product of the feature vector with a set of weights learned // during training, followed by a subtraction of a bias term // // pred <- dot(feat, weights) - bias term // // Now this is very simple to compute when we are dealing with // cropped images, our computed features have the same dimensions // as the SVM weights. Things get a little more tricky when you // want to evaluate this function over all possible subwindows of // a larger feature, one that we would get by running our feature // extraction on an entire image. // // Here you will evaluate the above expression by breaking // the dot product into a series of convolutions (remember that // a convolution can be though of as a point wise dot product with // the convolution kernel), each one with a different band. // // Convolve each band of the SVM weights with the corresponding // band in feat, and add the resulting score image. The final // step is to subtract the SVM bias term given by this->getBiasTerm(). // // Hint: you might need to set the origin for the convolution kernel // in order to get the result from convoltion to be correctly centered // // Useful functions: // Convolve, BandSelect, this->getWeights(), this->getBiasTerm() Feature weights = this->getWeights(); int nWtBands = weights.Shape().nBands; // Set the center of the window as the origin for the conv. kernel for (int band = 0; band < nWtBands; band++) { // Select a band CFloatImage featBand; CFloatImage weightBand; BandSelect(feat, featBand, band, 0); BandSelect(weights, weightBand, band, 0); // Set the origin of the kernel weightBand.origin[0] = weights.Shape().width / 2; weightBand.origin[1] = weights.Shape().height / 2; // Compute the dot product CFloatImage dotproduct; dotproduct.ClearPixels(); Convolve(featBand, dotproduct, weightBand); // Add the resulting score image for (int y = 0; y < feat.Shape().height; y++) { for (int x = 0; x < feat.Shape().width; x++) { response.Pixel(x, y, 0) += dotproduct.Pixel(x, y, 0); } // End of x loop } // End of y loop } // End of band loop // Substract the SVM bias term for (int y = 0; y < feat.Shape().height; y++) { for (int x = 0; x < feat.Shape().width; x++) { response.Pixel(x, y, 0) -= this->getBiasTerm(); } // End of x loop } // End of y loop /******** END TODO ********/ }
// Compute MOPs descriptors. void ComputeMOPSDescriptors(CFloatImage &image, FeatureSet &features) { CFloatImage grayImage=ConvertToGray(image); CFloatImage blurredImage; Convolve(grayImage, blurredImage, ConvolveKernel_7x7); CFloatImage postHomography = CFloatImage(); CFloatImage gaussianImage = GetImageFromMatrix((float *)gaussian5x5Float, 5, 5); //first make the image invariant to changes in illumination by subtracting off the mean int grayHeight = grayImage.Shape().height; int grayWidth = grayImage.Shape().width; // now make this rotation invariant vector<Feature>::iterator featureIterator = features.begin(); while (featureIterator != features.end()) { Feature &f = *featureIterator; CTransform3x3 scaleTransform = CTransform3x3(); CTransform3x3 translationNegative; CTransform3x3 translationPositive; CTransform3x3 rotation; double scaleFactor = 41/8; scaleTransform[0][0] = scaleFactor; scaleTransform[1][1] = scaleFactor; translationNegative = translationNegative.Translation(f.x,f.y); translationPositive = translationPositive.Translation(-4, -4); rotation = rotation.Rotation(f.angleRadians * 180/ PI); CTransform3x3 finalTransformation = translationNegative * rotation * scaleTransform * translationPositive; //CFloatImage sample61x61Window = //CFloatImage pixelWindow = GetXWindowAroundPixel(grayImage, f.x, f.y, 61); WarpGlobal(blurredImage, postHomography, finalTransformation, eWarpInterpLinear, 1.0f); //now we get the 41x41 box around the feature for(int row=0; row< 8; row++) { for(int col=0;col< 8;col++) { f.data.push_back(postHomography.Pixel(col, row, 0)); } } /* // now we do the subsampling first round to reduce to a 20x20 int imgSize = 41; subsample(&f, imgSize, gaussianImage); //second round of subsampling to get it to a 10x10 imgSize = 20; subsample(&f, imgSize, gaussianImage); imgSize = 10; CFloatImage img = featureToImage(f, imgSize, imgSize); CFloatImage blurredImg(img.Shape()); Convolve(img, blurredImg, gaussianImage); featuresFromImage(&f,blurredImg,imgSize,imgSize); int count = 0; for(int y=0; y<imgSize; y++) { for(int x=0; x<imgSize; x++) { if(x == 3 || x == 7 || y == 3 || y == 7) { f.data.erase(f.data.begin() + count); } else { count++; } } } */ normalizeIntensities(&f, 8, 8); featureIterator++; } }
void InstantiateConvolutionOf(CImageOf<T> img) { CFloatImage kernel; Convolve(img, img, kernel); ConvolveSeparable(img, img, kernel, kernel, 1); }
// Loop through the image to compute the harris corner values as described in class // srcImage: grayscale of original image // harrisImage: populate the harris values per pixel in this image void computeHarrisValues(CFloatImage &srcImage, CFloatImage &harrisImage, CFloatImage &orientationImage) { int w = srcImage.Shape().width; // image width int h = srcImage.Shape().height; // image height // Create images to store x-derivative and y-derivative values CFloatImage Ix(w,h,1); CFloatImage Iy(w,h,1); CFloatImage Ix_blur(w,h,1); CFloatImage Iy_blur(w,h,1); // Compute x-derivative values by convolving image with x sobel filter Convolve(srcImage, Ix, ConvolveKernel_SobelX); // Compute y-derivative values by convolving image with y sobel filter Convolve(srcImage, Iy, ConvolveKernel_SobelY); // Apply a 7x7 gaussian blur to the grayscale image CFloatImage blurImage(w,h,1); Convolve(srcImage, blurImage, ConvolveKernel_7x7); // Compute x-derivative values by convolving blurred image with x sobel filter Convolve(blurImage, Ix_blur, ConvolveKernel_SobelX); // Compute y-derivative values by convolving blurred image with y sobel filter Convolve(blurImage, Iy_blur, ConvolveKernel_SobelY); // Declare additional variables int newX, newY; // (x,y) coordinate for pixel in 5x5 sliding window float dx, dy; // x-derivative, y-derivative values double HMatrix[4]; // Harris matrix double determinant; // determinant of Harris matrix double trace; // trace of Harris matrix int padType = 2; // select variable for what type of padding to use: , 0->zero, 1->edge, 2->reflect // Loop through 'srcImage' and compute harris score for each pixel for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { // reset Harris matrix values to 0 memset(HMatrix, 0, sizeof(HMatrix)); // Loop through pixels in 5x5 window to calculate Harris matrix for (int j = 0; j < 25; j++) { find5x5Index(x,y,j,&newX,&newY); if(srcImage.Shape().InBounds(newX, newY)) { dx = Ix.Pixel(newX,newY,0); dy = Iy.Pixel(newX,newY,0); } else { // Depending on value of padType, perform different types of border padding switch (padType) { case 1: // 1 -> replicate border values if (newX < 0) { newX = 0; } else if (newX >= w) { newX = w-1; } if (newY < 0) { newY = 0; } else if (newY >= h) { newY = h-1; } dx = Ix.Pixel(newX,newY,0); dy = Iy.Pixel(newX,newY,0); break; case 2: // 2 -> reflect border pixels if (newX < 0) { newX = -newX; } else if (newX >= w) { newX = w-(newX%w)-1; } if (newY < 0) { newY = -newY; } else if (newY >= h) { newY = h-(newY%h)-1; } dx = Ix.Pixel(newX,newY,0); dy = Iy.Pixel(newX,newY,0); break; default: // 0 -> zero padding dx = 0.0; dy = 0.0; break; } } HMatrix[0] += dx*dx*gaussian5x5[j]; HMatrix[1] += dx*dy*gaussian5x5[j]; HMatrix[2] += dx*dy*gaussian5x5[j]; HMatrix[3] += dy*dy*gaussian5x5[j]; } // Calculate determinant and trace of harris matrix determinant = (HMatrix[0] * HMatrix[3]) - (HMatrix[1] * HMatrix[2]); trace = HMatrix[0] + HMatrix[3]; // Compute corner strength function c(H) = determinant(H)/trace(H) // and save result into harrisImage if(trace == 0) harrisImage.Pixel(x,y,0) = 0.0; else harrisImage.Pixel(x,y,0) = (determinant / trace); // Compute orientation and save result in 'orientationImage' dx = Ix_blur.Pixel(x,y,0); dy = Iy_blur.Pixel(x,y,0); if(dx == 0.0 && dy == 0.0) orientationImage.Pixel(x,y,0) = 0.0; else orientationImage.Pixel(x,y,0) = atan2(dy, dx); } } }