static int _ccv_read_bbf_stage_classifier(const char* file, ccv_bbf_stage_classifier_t* classifier) { FILE* r = fopen(file, "r"); if (r == 0) return -1; APPROX int stat = 0; stat |= fscanf(r, "%d", &classifier->count); union { float fl; int i; } fli; stat |= fscanf(r, "%d", &fli.i); classifier->threshold = fli.fl; classifier->feature = (ccv_bbf_feature_t*)ccmalloc(ENDORSE(classifier->count) * sizeof(ccv_bbf_feature_t)); classifier->alpha = (float*)DEDORSE(ccmalloc(ENDORSE(classifier->count) * 2 * sizeof(float))); int i, j; for (i = 0; i < ENDORSE(classifier->count); i++) { stat |= fscanf(r, "%d", &classifier->feature[i].size); for (j = 0; j < ENDORSE(classifier->feature[i].size); j++) { stat |= fscanf(r, "%d %d %d", &classifier->feature[i].px[j], &classifier->feature[i].py[j], &classifier->feature[i].pz[j]); stat |= fscanf(r, "%d %d %d", &classifier->feature[i].nx[j], &classifier->feature[i].ny[j], &classifier->feature[i].nz[j]); } union { float fl; int i; } flia, flib; stat |= fscanf(r, "%d %d", &flia.i, &flib.i); classifier->alpha[i * 2] = flia.fl; classifier->alpha[i * 2 + 1] = flib.fl; } fclose(r); return 0; }
/* Multiply quantization table with quality factor to get LQT and CQT */ void initQuantizationTables(UINT32 qualityFactor) { UINT16 i, index; APPROX UINT32 value; UINT8 luminanceQuantTable [] = { 16, 11, 10, 16, 24, 40, 51, 61, 12, 12, 14, 19, 26, 58, 60, 55, 14, 13, 16, 24, 40, 57, 69, 56, 14, 17, 22, 29, 51, 87, 80, 62, 18, 22, 37, 56, 68, 109, 103, 77, 24, 35, 55, 64, 81, 104, 113, 92, 49, 64, 78, 87, 103, 121, 120, 101, 72, 92, 95, 98, 112, 100, 103, 99 }; UINT8 chrominanceQuantTable [] = { 17, 18, 24, 47, 99, 99, 99, 99, 18, 21, 26, 66, 99, 99, 99, 99, 24, 26, 56, 99, 99, 99, 99, 99, 47, 66, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99 }; for (i = 0; i < 64; i++) { index = zigzagTable[i]; /* luminance quantization table * quality factor */ value = luminanceQuantTable[i] * qualityFactor; value = (value + 0x200) >> 10; if (ENDORSE(value == 0)) value = 1; else if (ENDORSE(value > 255)) value = 255; Lqt[index] = ENDORSE((UINT8) value); ILqt[i] = dspDivision(0x8000, ENDORSE(value)); /* chrominance quantization table * quality factor */ value = chrominanceQuantTable[i] * qualityFactor; value = (value + 0x200) >> 10; if (ENDORSE(value == 0)) value = 1; else if (ENDORSE(value > 255)) value = 255; Cqt[index] = ENDORSE((UINT8) value); ICqt[i] = dspDivision(0x8000, ENDORSE(value)); } }
/* * Some of the gaussian density computation can be carried out in advance: * log(determinant) calculation, * 1/(2*var) in the exponent, * NOTE; The density computation is performed in log domain. */ static int32 gauden_dist_precompute(gauden_t * g, logmath_t *lmath, APPROX float32 varfloor) { int32 i, m, f, d, flen; mfcc_t *meanp; mfcc_t *varp; mfcc_t *detp; int32 floored; floored = 0; /* Allocate space for determinants */ g->det = ckd_calloc_3d(g->n_mgau, g->n_feat, g->n_density, sizeof(***g->det)); for (m = 0; m < g->n_mgau; m++) { for (f = 0; f < g->n_feat; f++) { flen = g->featlen[f]; /* Determinants for all variance vectors in g->[m][f] */ for (d = 0, detp = g->det[m][f]; d < g->n_density; d++, detp++) { *detp = 0; for (i = 0, varp = g->var[m][f][d], meanp = g->mean[m][f][d]; i < flen; i++, varp++, meanp++) { float32 *fvarp = (float32 *)varp; #ifdef FIXED_POINT APPROX float32 *fmp = (float32 *)meanp; *meanp = FLOAT2MFCC(*fmp); #endif if (*fvarp < (ENDORSE(varfloor))) { *fvarp = (ENDORSE(varfloor)); ++floored; } *detp += (mfcc_t)logmath_log(lmath, 1.0 / sqrt(*fvarp * 2.0 * M_PI)); /* Precompute this part of the exponential */ *varp = (mfcc_t)logmath_ln_to_log(lmath, (1.0 / (*fvarp * 2.0))); } } } } E_INFO("%d variance values floored\n", floored); return 0; }
APPROX float fe_warp_piecewise_linear_unwarped_to_warped(APPROX float linear) { if (is_neutral) { return linear; } else { APPROX float temp; /* nonlinear = a * linear - b */ if ((ENDORSE(linear)) < (ENDORSE(params[1]))) { temp = linear * params[0]; } else { temp = final_piece[0] * linear + final_piece[1]; } return temp; } }
void activateHiddenUnits(int visible[], int stochastic, int hidden[]) { accept_roi_begin(); // Calculate activation energy for hidden units APPROX double hiddenEnergies[NUM_HIDDEN]; int h; for (h = 0; h < NUM_HIDDEN; h++) { // Get the sum of energies APPROX double sum = 0; int v; for (v = 0; v < NUM_VISIBLE + 1; v++) // remove the +1 if you want to skip the bias { if (visible[v] != -1) sum += (double) visible[v] * edges[v][h]; } hiddenEnergies[h] = sum; } // Activate hidden units for (h = 0; h < NUM_HIDDEN; h++) { double prob = 1.0 / ENDORSE(1.0 + exp(-hiddenEnergies[h])); if (stochastic) { if (ENDORSE(RAND) < prob) hidden[h] = 1; else hidden[h] = 0; } else { if (prob > 0.5) hidden[h] = 1; else hidden[h] = 0; } } hidden[NUM_HIDDEN] = 1; // turn on bias accept_roi_end(); }
/* Multiply DCT Coefficients with Quantization table and store in ZigZag location */ void quantization(APPROX INT16* const data, UINT16* const quant_table_ptr) { INT16 i; APPROX INT32 value; for (i = 63; i >= 0; i--) { value = data[i] * quant_table_ptr[i]; value = (value + 0x4000) >> 15; Temp[zigzagTable[i]] = ENDORSE((INT16) value); } }
void ccv_bbf_classifier_cascade_free(ccv_bbf_classifier_cascade_t* cascade) { int i; for (i = 0; i < ENDORSE(cascade->count); ++i) { ccfree(cascade->stage_classifier[i].feature); ccfree(cascade->stage_classifier[i].alpha); } ccfree(cascade->stage_classifier); ccfree(cascade); }
ccv_bbf_classifier_cascade_t* ccv_bbf_classifier_cascade_read_binary(char* s) { int i; ccv_bbf_classifier_cascade_t* cascade = (ccv_bbf_classifier_cascade_t*)ccmalloc(sizeof(ccv_bbf_classifier_cascade_t)); APPROX int* count_ptr = DEDORSE(&cascade->count); memcpy(count_ptr, s, sizeof(cascade->count)); s += sizeof(cascade->count); memcpy(&cascade->size.width, s, sizeof(cascade->size.width)); s += sizeof(cascade->size.width); memcpy(&cascade->size.height, s, sizeof(cascade->size.height)); s += sizeof(cascade->size.height); ccv_bbf_stage_classifier_t* classifier = cascade->stage_classifier = (ccv_bbf_stage_classifier_t*)ccmalloc(ENDORSE(cascade->count) * sizeof(ccv_bbf_stage_classifier_t)); for (i = 0; i < ENDORSE(cascade->count); i++, classifier++) { memcpy(&classifier->count, s, sizeof(classifier->count)); s += sizeof(classifier->count); memcpy(&classifier->threshold, s, sizeof(classifier->threshold)); s += sizeof(classifier->threshold); classifier->feature = (ccv_bbf_feature_t*)ccmalloc(ENDORSE(classifier->count) * sizeof(ccv_bbf_feature_t)); classifier->alpha = (float*)DEDORSE(ccmalloc(ENDORSE(classifier->count) * 2 * sizeof(float))); memcpy(classifier->feature, s, classifier->count * sizeof(ccv_bbf_feature_t)); s += classifier->count * sizeof(ccv_bbf_feature_t); memcpy(classifier->alpha, s, classifier->count * 2 * sizeof(float)); s += classifier->count * 2 * sizeof(float); } return cascade; }
static int _ccv_is_equal(const void* _r1, const void* _r2, void* data) { const ccv_comp_t* r1 = (const ccv_comp_t*)_r1; const ccv_comp_t* r2 = (const ccv_comp_t*)_r2; APPROX int distance = (int)(r1->rect.width * 0.25 + 0.5); return ENDORSE(r2->rect.x <= r1->rect.x + distance && r2->rect.x >= r1->rect.x - distance && r2->rect.y <= r1->rect.y + distance && r2->rect.y >= r1->rect.y - distance && r2->rect.width <= (int)(r1->rect.width * 1.5 + 0.5) && (int)(r2->rect.width * 1.5 + 0.5) >= r1->rect.width); }
I2D* iSortIndices(I2D* in, int dim) { I2D *sorted; int rows, cols, i, j, k, temp; I2D *ind; rows = in->height; cols = in->width; sorted = iDeepCopy(in); ind = iMallocHandle(rows, cols); for(i=0; i<cols; i++) for(j=0; j<rows; j++) subsref(ind,j,i) = 0; for(k=0; k<cols; k++) { for(i=0; i<rows; i++) { APPROX int localMax = subsref(in,i,k); int localIndex = i; subsref(ind,i,k) = i; for(j=0; j<rows; j++) { if((ENDORSE(localMax)) < (ENDORSE(subsref(in,j,k)))) { subsref(ind,i,k) = j; localMax = subsref(in,j,k); localIndex = j; } } subsref(in,localIndex,k) = 0; } } return ind; }
int selfCheck(I2D* in1, char* path, int tol) { int r1, c1, ret=1; FILE* fd; int count=0, *buffer, i, j; char file[100]; int* data = ENDORSE(in1->data); r1 = in1->height; c1 = in1->width; buffer = (int*)malloc(sizeof(int)*r1*c1); sprintf(file, "%s/expected_C.txt", path); fd = fopen(file, "r"); if(fd == NULL) { printf("Error: Expected file not opened \n"); return -1; } while(!feof(fd)) { fscanf(fd, "%d", &buffer[count]); count++; } count--; if(count < (r1*c1)) { printf("Checking error: dimensions mismatch. Expected = %d, Observed = %d \n", count, (r1*c1)); return -1; } for(i=0; i<r1*c1; i++) { if((abs(data[i])-abs(buffer[i]))>tol || (abs(buffer[i])-abs(data[i]))>tol) { printf("Checking error: Values mismtach at %d element\n", i); printf("Expected value = %d, observed = %d\n", buffer[i], data[i] ); return -1; } } fclose(fd); free(buffer); printf("Verification\t\t- Successful\n"); return ret; }
APPROX float fe_warp_piecewise_linear_warped_to_unwarped(APPROX float nonlinear) { if (is_neutral) { return nonlinear; } else { /* linear = (nonlinear - b) / a */ APPROX float temp; if ((ENDORSE(nonlinear)) < (ENDORSE(params[0] * params[1]))) { temp = nonlinear / params[0]; } else { temp = nonlinear - final_piece[1]; temp /= final_piece[0]; } if ((ENDORSE(temp)) > (ENDORSE(nyquist_frequency))) { E_WARN ("Warp factor %g results in frequency (%.1f) higher than Nyquist (%.1f)\n", params[0], temp, nyquist_frequency); } return temp; } }
static inline int _ccv_run_bbf_feature(ccv_bbf_feature_t* feature, int* step, APPROX unsigned char** u8) { #define pf_at(i) (*(u8[feature->pz[i]] + feature->px[i] + feature->py[i] * step[feature->pz[i]])) #define nf_at(i) (*(u8[feature->nz[i]] + feature->nx[i] + feature->ny[i] * step[feature->nz[i]])) APPROX unsigned char pmin = pf_at(0), nmax = nf_at(0); /* check if every point in P > every point in N, and take a shortcut */ if (ENDORSE(pmin <= nmax)) return 0; int i; for (i = 1; i < ENDORSE(feature->size); i++) { if (ENDORSE(feature->pz[i]) >= 0) { int p = ENDORSE(pf_at(i)); if (p < ENDORSE(pmin)) { if (p <= ENDORSE(nmax)) return 0; pmin = p; } } if (ENDORSE(feature->nz[i]) >= 0) { int n = ENDORSE(nf_at(i)); if (n > ENDORSE(nmax)) { if (ENDORSE(pmin) <= n) return 0; nmax = n; } } } #undef pf_at #undef nf_at return 1; }
int ccv_bbf_classifier_cascade_write_binary(ccv_bbf_classifier_cascade_t* cascade, APPROX char* s, int slen) { int i; APPROX int len = sizeof(cascade->count) + sizeof(cascade->size.width) + sizeof(cascade->size.height); ccv_bbf_stage_classifier_t* classifier = cascade->stage_classifier; for (i = 0; i < ENDORSE(cascade->count); i++, classifier++) len += sizeof(classifier->count) + sizeof(classifier->threshold) + classifier->count * sizeof(ccv_bbf_feature_t) + classifier->count * 2 * sizeof(float); if (slen >= ENDORSE(len)) { APPROX int* count_ptr = DEDORSE(&cascade->count); memcpy(s, count_ptr, sizeof(cascade->count)); s += sizeof(cascade->count); memcpy(s, &cascade->size.width, sizeof(cascade->size.width)); s += sizeof(cascade->size.width); memcpy(s, &cascade->size.height, sizeof(cascade->size.height)); s += sizeof(cascade->size.height); classifier = cascade->stage_classifier; for (i = 0; i < ENDORSE(cascade->count); i++, classifier++) { memcpy(s, &classifier->count, sizeof(classifier->count)); s += sizeof(classifier->count); memcpy(s, &classifier->threshold, sizeof(classifier->threshold)); s += sizeof(classifier->threshold); memcpy(s, classifier->feature, classifier->count * sizeof(ccv_bbf_feature_t)); s += classifier->count * sizeof(ccv_bbf_feature_t); memcpy(s, classifier->alpha, classifier->count * 2 * sizeof(float)); s += classifier->count * 2 * sizeof(float); } } return ENDORSE(len); }
ccv_bbf_classifier_cascade_t* ccv_bbf_read_classifier_cascade(const char* directory) { char buf[1024]; sprintf(buf, "%s/cascade.txt", directory); int s, i; FILE* r = fopen(buf, "r"); if (r == 0) return 0; ccv_bbf_classifier_cascade_t* cascade = (ccv_bbf_classifier_cascade_t*)ccmalloc(sizeof(ccv_bbf_classifier_cascade_t)); s = fscanf(r, "%d %d %d", &cascade->count, &cascade->size.width, &cascade->size.height); assert(s > 0); cascade->stage_classifier = (ccv_bbf_stage_classifier_t*)ccmalloc(ENDORSE(cascade->count) * sizeof(ccv_bbf_stage_classifier_t)); for (i = 0; i < ENDORSE(cascade->count); i++) { sprintf(buf, "%s/stage-%d.txt", directory, i); if (_ccv_read_bbf_stage_classifier(buf, &cascade->stage_classifier[i]) < 0) { cascade->count = i; break; } } fclose(r); return cascade; }
static void compute_gamma_weights( APPROX float gamma[N_STEERING], complex (* const adaptive_weights)[N_BLOCKS][N_STEERING][N_CHAN*TDOF], complex (* const steering_vectors)[N_CHAN*TDOF], int range_block, int dop_index) { int i, sv; complex accum; for (sv = 0; sv < N_STEERING; ++sv) { accum.re = accum.im = 0.0f; for (i = 0; i < N_CHAN*TDOF; ++i) { const complex prod = cmult( cconj(adaptive_weights[dop_index][range_block][sv][i]), steering_vectors[sv][i]); accum.re += prod.re; accum.im += prod.im; } /* * In exact arithmetic, accum should be a real positive * scalar and thus the imaginary component should be zero. * However, with limited precision that may not be the case, * so we take the magnitude of accum. Also, gamma is a * normalization scalar and thus we take the inverse of * the computed inner product, w*v. */ gamma[sv] = sqrt(accum.re*accum.re + accum.im*accum.im); if (ENDORSE(gamma[sv] > 0)) { gamma[sv] = 1.0f / gamma[sv]; } else { gamma[sv] = 1.0f; } } }
static int32 senone_mixw_read(senone_t * s, char const *file_name, logmath_t *lmath) { char eofchk; FILE *fp; int32 byteswap, chksum_present; uint32 chksum; float32 *pdf; int32 i, f, c, p, n_err; char **argname, **argval; E_INFO("Reading senone mixture weights: %s\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("Failed to open mixture weights file '%s' for reading", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("Failed to read header from file '%s'\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], MIXW_PARAM_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; /* Read #senones, #features, #codewords, arraysize */ if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); } if (i != s->n_sen * s->n_feat * s->n_cw) { E_FATAL ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n", file_name, i, s->n_sen, s->n_feat, s->n_cw); } /* * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits. * All PDF values will be truncated (in the LSB positions) by these many bits. */ if (((ENDORSE(s->mixwfloor)) <= 0.0) || ((ENDORSE(s->mixwfloor)) >= 1.0)) E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor); /* Use a fixed shift for compatibility with everything else. */ E_INFO("Truncating senone logs3(pdf) values by %d bits\n", SENSCR_SHIFT); /* * Allocate memory for senone PDF data. Organize normally or transposed depending on * s->n_gauden. */ if (s->n_gauden > 1) { E_INFO("Not transposing mixture weights in memory\n"); s->pdf = (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw, sizeof(senprob_t)); } else { E_INFO("Transposing mixture weights in memory\n"); s->pdf = (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen, sizeof(senprob_t)); } /* Temporary structure to read in floats */ pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32)); /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ n_err = 0; for (i = 0; i < s->n_sen; i++) { for (f = 0; f < s->n_feat; f++) { if (bio_fread ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap, &chksum) != s->n_cw) { E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); } /* Normalize and floor */ if ((ENDORSE(vector_sum_norm(pdf, s->n_cw))) <= 0.0) n_err++; vector_floor(pdf, s->n_cw, s->mixwfloor); vector_sum_norm(pdf, s->n_cw); /* Convert to logs3, truncate to 8 bits, and store in s->pdf */ for (c = 0; c < s->n_cw; c++) { p = -(logmath_log(lmath, pdf[c])); p += (1 << (SENSCR_SHIFT - 1)) - 1; /* Rounding before truncation */ if (s->n_gauden > 1) s->pdf[i][f][c] = (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255; else s->pdf[f][c][i] = (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255; } } }
void fe_warp_piecewise_linear_set_parameters(char const *param_str, APPROX float sampling_rate) { char *tok; char *seps = " \t"; char temp_param_str[256]; int param_index = 0; nyquist_frequency = sampling_rate / 2; if (param_str == NULL) { is_neutral = YES; return; } /* The new parameters are the same as the current ones, so do nothing. */ if (strcmp(param_str, p_str) == 0) { return; } is_neutral = NO; strcpy(temp_param_str, param_str); memset(params, 0, N_PARAM * sizeof(float)); memset(final_piece, 0, 2 * sizeof(float)); strcpy(p_str, param_str); /* FIXME: strtok() is not re-entrant... */ tok = strtok(temp_param_str, seps); while (tok != NULL) { params[param_index++] = (float) atof_c(tok); tok = strtok(NULL, seps); if (param_index >= N_PARAM) { break; } } if (tok != NULL) { E_INFO ("Piecewise linear warping takes up to two arguments, %s ignored.\n", tok); } if ((ENDORSE(params[1])) < (ENDORSE(sampling_rate))) { /* Precompute these. These are the coefficients of a * straight line that contains the points (F, aF) and (N, * N), where a = params[0], F = params[1], N = Nyquist * frequency. */ if ((ENDORSE(params[1])) == 0) { params[1] = sampling_rate * 0.85f; } final_piece[0] = (nyquist_frequency - params[0] * params[1]) / (nyquist_frequency - params[1]); final_piece[1] = nyquist_frequency * params[1] * (params[0] - 1.0f) / (nyquist_frequency - params[1]); } else { memset(final_piece, 0, 2 * sizeof(float)); } if ((ENDORSE(params[0])) == 0) { is_neutral = YES; E_INFO ("Piecewise linear warping cannot have slope zero, warping not applied.\n"); } }
int main(int argc, char **argv) { u16 (*bayer)[WAMI_DEBAYER_IMG_NUM_COLS] = NULL; rgb_pixel (*debayer)[WAMI_DEBAYER_IMG_NUM_COLS-2*PAD] = NULL; char *input_directory = NULL; #ifdef ENABLE_CORRECTNESS_CHECKING rgb_pixel (*gold_debayer)[WAMI_DEBAYER_IMG_NUM_COLS-2*PAD] = NULL; #endif const size_t num_bayer_pixels = WAMI_DEBAYER_IMG_NUM_ROWS * WAMI_DEBAYER_IMG_NUM_COLS; const size_t num_debayer_pixels = (WAMI_DEBAYER_IMG_NUM_ROWS-2*PAD) * (WAMI_DEBAYER_IMG_NUM_COLS-2*PAD); if (argc != 2) { fprintf(stderr, "%s <directory-containing-input-files>\n", argv[0]); exit(EXIT_FAILURE); } input_directory = argv[1]; bayer = XMALLOC(sizeof(u16) * num_bayer_pixels); debayer = XMALLOC(sizeof(rgb_pixel) * num_debayer_pixels); #ifdef ENABLE_CORRECTNESS_CHECKING gold_debayer = XMALLOC(sizeof(rgb_pixel) * num_debayer_pixels); #endif read_image_file( (char *) bayer, input_filename, input_directory, sizeof(u16) * num_bayer_pixels); memset(debayer, 0, sizeof(u16) * num_debayer_pixels); printf("WAMI kernel 1 parameters:\n\n"); printf("Input image width = %u pixels\n", WAMI_DEBAYER_IMG_NUM_COLS); printf("Input image height = %u pixels\n", WAMI_DEBAYER_IMG_NUM_ROWS); printf("Output image width = %u pixels\n", WAMI_DEBAYER_IMG_NUM_COLS-2*PAD); printf("Output image height = %u pixels\n", WAMI_DEBAYER_IMG_NUM_ROWS-2*PAD); printf("\nStarting WAMI kernel 1 (debayer).\n"); tic(); accept_roi_begin(); wami_debayer( debayer, bayer); accept_roi_end(); PRINT_STAT_DOUBLE("CPU time using func toc - ", toc()); #ifdef ENABLE_CORRECTNESS_CHECKING read_image_file( (char *) gold_debayer, golden_output_filename, input_directory, sizeof(rgb_pixel) * num_debayer_pixels); /* * An exact match is expected for the debayer kernel, so we check * each pixel individually and report either the first failure or * a success message. */ { /* // original error metric int r, c, success = 1; for (r = 0; success && r < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++r) { for (c = 0; c < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++c) { if (ENDORSE(debayer[r][c].r != gold_debayer[r][c].r)) { printf("Validation error: red pixel mismatch at row=%d, col=%d : " "test value = %u, golden value = %u\n\n", r, c, debayer[r][c].r, gold_debayer[r][c].r); success = 0; break; } if (ENDORSE(debayer[r][c].g != gold_debayer[r][c].g)) { printf("Validation error: green pixel mismatch at row=%d, col=%d : " "test value = %u, golden value = %u\n\n", r, c, debayer[r][c].g, gold_debayer[r][c].g); success = 0; break; } if (ENDORSE(debayer[r][c].b != gold_debayer[r][c].b)) { printf("Validation error: blue pixel mismatch at row=%d, col=%d : " "test value = %u, golden value = %u\n\n", r, c, debayer[r][c].b, gold_debayer[r][c].b); success = 0; break; } } } if (success) { printf("\nValidation checks passed -- the test output matches the golden output.\n\n"); } */ // new error metric int r, c; double err; for (r = 0; r < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++r) { for (c = 0; c < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++c) { double pixel_error = 0.0; pixel_error += ENDORSE(((double) abs(debayer[r][c].r - gold_debayer[r][c].r)) / ((double) 65535)); pixel_error += ENDORSE(((double) abs(debayer[r][c].g - gold_debayer[r][c].g)) / ((double) 65535)); pixel_error += ENDORSE(((double) abs(debayer[r][c].b - gold_debayer[r][c].b)) / ((double) 65535)); err += (pixel_error / ((double) 3)) / ((double) ((WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD) * (WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD))); } } FILE *fp = fopen("err.txt", "wb"); assert(fp != NULL); fprintf(fp, "%.2f\n", err); fclose(fp); } #endif #ifdef WRITE_OUTPUT_TO_DISK printf("Writing output to %s/%s.\n", output_directory, output_filename); { const u16 output_channels = 3; write_image_file( (char *) debayer, output_filename, output_directory, WAMI_DEBAYER_IMG_NUM_COLS - 2*PAD, WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD, output_channels); } #endif FREE_AND_NULL(bayer); FREE_AND_NULL(debayer); #ifdef ENABLE_CORRECTNESS_CHECKING FREE_AND_NULL(gold_debayer); #endif return 0; }
int main(int argc, char** argv) { FILE* file; char *output_file = "my_output.txt"; int i, ret_val; ccv_dense_matrix_t* image = 0; ccv_array_t* seq; accept_roi_begin(); assert(argc >= 3); ccv_enable_default_cache(); ccv_bbf_classifier_cascade_t* cascade = ccv_bbf_read_classifier_cascade(argv[2]); ccv_read(argv[1], &image, CCV_IO_GRAY | CCV_IO_ANY_FILE); if (image != 0) { unsigned int elapsed_time = get_current_time(); seq = ccv_bbf_detect_objects(image, &cascade, 1, ccv_bbf_default_params); elapsed_time = get_current_time() - elapsed_time; for (i = 0; ENDORSE(i < seq->rnum); i++) { ccv_comp_t* comp = (ccv_comp_t*)ENDORSE(ccv_array_get(seq, i)); printf("%d %d %d %d %f\n", comp->rect.x, comp->rect.y, comp->rect.width, comp->rect.height, comp->classification.confidence); } printf("total : %d in time %dms\n", seq->rnum, elapsed_time); ccv_bbf_classifier_cascade_free(cascade); ccv_disable_cache(); accept_roi_end(); file = fopen(output_file, "w"); if (file == NULL) { perror("fopen for write failed"); return EXIT_FAILURE; } // latest changes struct coordinates { APPROX int x; APPROX int y; }; for (i = 0; ENDORSE(i < seq->rnum); i++) { ccv_comp_t* comp = (ccv_comp_t*) ENDORSE(ccv_array_get(seq, i)); struct coordinates upperleft, upperright, lowerleft, lowerright; upperleft.x = comp->rect.x; upperleft.y = comp->rect.y; upperright.x = comp->rect.x + comp->rect.width; upperright.y = upperleft.y; lowerleft.x = upperleft.x; lowerleft.y = upperleft.y + comp->rect.height; lowerright.x = upperright.x; lowerright.y = lowerleft.y; ret_val = fprintf(file, "%d %d\n%d %d\n%d %d\n%d %d\n", upperleft.x, upperleft.y, upperright.x, upperright.y, lowerright.x, lowerright.y, lowerleft.x, lowerleft.y); // latest changes if (ret_val < 0) { perror("fprintf of coordinates failed"); fclose(file); return EXIT_FAILURE; } } ret_val = fclose(file); if (ret_val != 0) { perror("fclose failed"); return EXIT_FAILURE; } ccv_array_free(seq); ccv_matrix_free(image); } else { FILE* r = fopen(argv[1], "rt"); if (argc == 4) chdir(argv[3]); if(r) { size_t len = 1024; char* file = (char*)malloc(len); ssize_t read; while((read = getline(&file, &len, r)) != -1) { while(read > 1 && isspace(file[read - 1])) read--; file[read] = 0; image = 0; ccv_read(file, &image, CCV_IO_GRAY | CCV_IO_ANY_FILE); assert(image != 0); seq = ccv_bbf_detect_objects(image, &cascade, 1, ccv_bbf_default_params); // seq already declared above for (i = 0; ENDORSE(i < seq->rnum); i++) { ccv_comp_t* comp = (ccv_comp_t*) ENDORSE(ccv_array_get(seq, i)); printf("%s %d %d %d %d %f\n", file, comp->rect.x, comp->rect.y, comp->rect.width, comp->rect.height, comp->classification.confidence); } } free(file); fclose(r); } ccv_bbf_classifier_cascade_free(cascade); ccv_disable_cache(); accept_roi_end(); file = fopen(output_file, "w"); if (file == NULL) { perror("fopen for write failed"); return EXIT_FAILURE; } for (i = 0; ENDORSE(i < seq->rnum); i++) { ccv_comp_t* comp = (ccv_comp_t*) ENDORSE(ccv_array_get(seq, i)); ret_val = fprintf(file, "%d\n%d\n%d\n%d\n%f\n", comp->rect.x, comp->rect.y, comp->rect.width, comp->rect.height, comp->classification.confidence); if (ret_val < 0) { perror("fprintf of coordinates and confidence failed"); fclose(file); return EXIT_FAILURE; } } ret_val = fclose(file); if (ret_val != 0) { perror("fclose failed"); return EXIT_FAILURE; } ccv_array_free(seq); ccv_matrix_free(image); } return 0; }
static void forward_and_back_substitution( complex adaptive_weights[N_DOP][N_BLOCKS][N_STEERING][N_CHAN*TDOF], complex (* const cholesky_factors)[N_BLOCKS][N_CHAN*TDOF][N_CHAN*TDOF], complex (* const steering_vectors)[N_CHAN*TDOF]) { /* * We are solving the system R*Rx = b where upper triangular matrix R * is the result of Cholesky factorization. To do so, we first apply * forward substitution to solve R*y = b for y and then apply back * substitution to solve Rx = y for x. In this case, b and x correspond * to the steering vectors and adaptive weights, respectively. */ complex (* R)[N_BLOCKS][N_CHAN*TDOF][N_CHAN*TDOF] = cholesky_factors; complex (* x)[N_BLOCKS][N_STEERING][N_CHAN*TDOF] = adaptive_weights; complex (* b)[N_CHAN*TDOF] = steering_vectors; int dop, block, sv, i, k; APPROX int j; complex accum; for (dop = 0; dop < N_DOP; ++dop) { for (block = 0; block < N_BLOCKS; ++block) { for (sv = 0; sv < N_STEERING; ++sv) { /* First apply forward substitution */ for (i = 0; i < N_CHAN*TDOF; ++i) { APPROX const float Rii_inv = 1.0f / R[dop][block][i][i].re; accum.re = accum.im = 0.0f; for (j = 0; ENDORSE(j < i); ++j) { /* * Use the conjugate of the upper triangular entries * of R as the lower triangular entries. */ const complex prod = cmult( cconj(R[dop][block][j][i]), x[dop][block][sv][j]); accum.re += prod.re; accum.im += prod.im; } x[dop][block][sv][i].re = (b[sv][i].re - accum.re) * Rii_inv; x[dop][block][sv][i].im = (b[sv][i].im - accum.im) * Rii_inv; } /* And now apply back substitution */ for (j = N_CHAN*TDOF-1; ENDORSE(j >= 0); --j) { APPROX const float Rjj_inv = 1.0f / R[dop][block][j][j].re; accum.re = accum.im = 0.0f; for (k = ENDORSE(j+1); k < N_CHAN*TDOF; ++k) { const complex prod = cmult( R[dop][block][j][k], x[dop][block][sv][k]); accum.re += prod.re; accum.im += prod.im; } x[dop][block][sv][j].re = (x[dop][block][sv][j].re - accum.re) * Rjj_inv; x[dop][block][sv][j].im = (x[dop][block][sv][j].im - accum.im) * Rjj_inv; } } } } }
static void cholesky_factorization( complex cholesky_factors[N_DOP][N_BLOCKS][N_CHAN*TDOF][N_CHAN*TDOF], complex (* const covariance)[N_BLOCKS][N_CHAN*TDOF][N_CHAN*TDOF]) { int k, dop, block; APPROX int i, j; complex (* R)[N_BLOCKS][N_CHAN*TDOF][N_CHAN*TDOF] = NULL; APPROX float Rkk_inv, Rkk_inv_sqrt; /* * cholesky_factors is a working buffer used to factorize the * covariance matrices in-place. We copy the covariance matrices * into cholesky_factors and give cholesky_factors the convenient * name R for a more succinct inner loop below. */ memcpy(cholesky_factors, covariance, sizeof(complex)*N_DOP*N_BLOCKS*N_CHAN*TDOF*N_CHAN*TDOF); R = cholesky_factors; for (dop = 0; dop < N_DOP; ++dop) { for (block = 0; block < N_BLOCKS; ++block) { /* * The following Cholesky factorization notation is based * upon the presentation in "Numerical Linear Algebra" by * Trefethen and Bau, SIAM, 1997. */ for (k = 0; k < N_CHAN*TDOF; ++k) { /* * Hermitian positive definite matrices are assumed, but * for safety we check that the diagonal is always positive. */ //assert(R[dop][block][k][k].re > 0); /* Diagonal entries are real-valued. */ Rkk_inv = 1.0f / R[dop][block][k][k].re; Rkk_inv_sqrt = sqrt(Rkk_inv); for (j = k+1; ENDORSE(j < N_CHAN*TDOF); ++j) { const complex Rkj_conj = cconj(R[dop][block][k][j]); for (i = j; ENDORSE(i < N_CHAN*TDOF); ++i) { const complex Rki_Rkj_conj = cmult( R[dop][block][k][i], Rkj_conj); R[dop][block][j][i].re -= Rki_Rkj_conj.re * Rkk_inv; R[dop][block][j][i].im -= Rki_Rkj_conj.im * Rkk_inv; } } for (i = k; ENDORSE(i < N_CHAN*TDOF); ++i) { R[dop][block][k][i].re *= Rkk_inv_sqrt; R[dop][block][k][i].im *= Rkk_inv_sqrt; } } /* * Copy the conjugate of the upper triangular portion of R * into the lower triangular portion. This is not required * for correctness, but can help with testing and validation * (e.g., correctness metrics calculated over all elements * will not be "diluted" by trivially correct zeros in the * lower diagonal region). */ for (i = 0; ENDORSE(i < N_CHAN*TDOF); ++i) { for (j = i+1; ENDORSE(j < N_CHAN*TDOF); ++j) { const complex x = R[dop][block][i][j]; // ACCEPT_PERMIT R[dop][block][j][i].re = x.re; R[dop][block][j][i].im = -1.0f * x.im; } } } } }
void activateVisibleUnits(int hidden[], int stochastic, int visible[]) { accept_roi_begin(); // Calculate activation energy for visible units APPROX double visibleEnergies[NUM_VISIBLE]; int v; for (v = 0; v < NUM_VISIBLE; v++) { // Get the sum of energies APPROX double sum = 0; int h; for (h = 0; h < NUM_HIDDEN + 1; h++) // remove the +1 if you want to skip the bias sum += (double) hidden[h] * edges[v][h]; visibleEnergies[v] = sum; } // Activate visible units, handles K visible units at a time for (v = 0; v < NUM_VISIBLE; v += K) { APPROX double exps[K]; // this is the numerator APPROX double sumOfExps = 0.0; // this is the denominator int j; for (j = 0; j < K; j++) { exps[j] = exp(visibleEnergies[v + j]); sumOfExps += exps[j]; } // Getting the probabilities APPROX double probs[K]; for (j = 0; j < K; j++) probs[j] = exps[j] / sumOfExps; // Activate units if (stochastic) // used for training { for (j = 0; j < K; j++) { if (ENDORSE(RAND) < ENDORSE(probs[j])) visible[v + j] = 1; else visible[v + j] = 0; } } else // used for prediction: uses expectation { APPROX double expectation = 0.0; for (j = 0; j < K; j++) expectation += j * probs[j]; // we will predict rating between 0 to K-1, not between 1 to K long prediction = round((ENDORSE(expectation))); for (j = 0; j < K; j++) { if (j == prediction) visible[v + j] = 1; else visible[v + j] = 0; } } } visible[NUM_VISIBLE] = 1; // turn on bias accept_roi_end(); }
ccv_array_t* ccv_bbf_detect_objects(ccv_dense_matrix_t* a, ccv_bbf_classifier_cascade_t** _cascade, int count, ccv_bbf_param_t params) { int hr = a->rows / ENDORSE(params.size.height); int wr = a->cols / ENDORSE(params.size.width); double scale = pow(2., 1. / (params.interval + 1.)); APPROX int next = params.interval + 1; int scale_upto = (int)(log((double)ccv_min(hr, wr)) / log(scale)); ccv_dense_matrix_t** pyr = (ccv_dense_matrix_t**)alloca(ENDORSE(scale_upto + next * 2) * 4 * sizeof(ccv_dense_matrix_t*)); memset(pyr, 0, (scale_upto + next * 2) * 4 * sizeof(ccv_dense_matrix_t*)); if (ENDORSE(params.size.height != _cascade[0]->size.height || params.size.width != _cascade[0]->size.width)) ccv_resample(a, &pyr[0], 0, a->rows * ENDORSE(_cascade[0]->size.height / params.size.height), a->cols * ENDORSE(_cascade[0]->size.width / params.size.width), CCV_INTER_AREA); else pyr[0] = a; APPROX int i; int j, k, t, x, y, q; for (i = 1; ENDORSE(i < ccv_min(params.interval + 1, scale_upto + next * 2)); i++) ccv_resample(pyr[0], &pyr[i * 4], 0, (int)(pyr[0]->rows / pow(scale, i)), (int)(pyr[0]->cols / pow(scale, i)), CCV_INTER_AREA); for (i = next; ENDORSE(i < scale_upto + next * 2); i++) ccv_sample_down(pyr[i * 4 - next * 4], &pyr[i * 4], 0, 0, 0); if (params.accurate) for (i = next * 2; ENDORSE(i < scale_upto + next * 2); i++) { ccv_sample_down(pyr[i * 4 - next * 4], &pyr[i * 4 + 1], 0, 1, 0); ccv_sample_down(pyr[i * 4 - next * 4], &pyr[i * 4 + 2], 0, 0, 1); ccv_sample_down(pyr[i * 4 - next * 4], &pyr[i * 4 + 3], 0, 1, 1); } ccv_array_t* idx_seq; ccv_array_t* seq = ccv_array_new(sizeof(ccv_comp_t), 64, 0); ccv_array_t* seq2 = ccv_array_new(sizeof(ccv_comp_t), 64, 0); ccv_array_t* result_seq = ccv_array_new(sizeof(ccv_comp_t), 64, 0); /* detect in multi scale */ for (t = 0; t < count; t++) { ccv_bbf_classifier_cascade_t* cascade = _cascade[t]; APPROX float scale_x = (float) params.size.width / (float) cascade->size.width; APPROX float scale_y = (float) params.size.height / (float) cascade->size.height; ccv_array_clear(seq); for (i = 0; ENDORSE(i < scale_upto); i++) { APPROX int dx[] = {0, 1, 0, 1}; APPROX int dy[] = {0, 0, 1, 1}; APPROX int i_rows = pyr[i * 4 + next * 8]->rows - ENDORSE(cascade->size.height >> 2); APPROX int steps[] = { pyr[i * 4]->step, pyr[i * 4 + next * 4]->step, pyr[i * 4 + next * 8]->step }; APPROX int i_cols = pyr[i * 4 + next * 8]->cols - ENDORSE(cascade->size.width >> 2); int paddings[] = { pyr[i * 4]->step * 4 - i_cols * 4, pyr[i * 4 + next * 4]->step * 2 - i_cols * 2, pyr[i * 4 + next * 8]->step - i_cols }; for (q = 0; q < (params.accurate ? 4 : 1); q++) { APPROX unsigned char* u8[] = { pyr[i * 4]->data.u8 + dx[q] * 2 + dy[q] * pyr[i * 4]->step * 2, pyr[i * 4 + next * 4]->data.u8 + dx[q] + dy[q] * pyr[i * 4 + next * 4]->step, pyr[i * 4 + next * 8 + q]->data.u8 }; for (y = 0; ENDORSE(y < i_rows); y++) { for (x = 0; ENDORSE(x < i_cols); x++) { APPROX float sum; APPROX int flag = 1; ccv_bbf_stage_classifier_t* classifier = cascade->stage_classifier; for (j = 0; j < ENDORSE(cascade->count); ++j, ++classifier) { sum = 0; APPROX float* alpha = classifier->alpha; ccv_bbf_feature_t* feature = classifier->feature; for (k = 0; k < ENDORSE(classifier->count); ++k, alpha += 2, ++feature) sum += alpha[_ccv_run_bbf_feature(feature, ENDORSE(steps), u8)]; if (ENDORSE(sum) < ENDORSE(classifier->threshold)) { flag = 0; break; } } if (ENDORSE(flag)) { ccv_comp_t comp; comp.rect = ccv_rect((int)((x * 4 + dx[q] * 2) * scale_x + 0.5), (int)((y * 4 + dy[q] * 2) * scale_y + 0.5), (int)(cascade->size.width * scale_x + 0.5), (int)(cascade->size.height * scale_y + 0.5)); comp.neighbors = 1; comp.classification.id = t; comp.classification.confidence = sum; ccv_array_push(seq, &comp); } u8[0] += 4; u8[1] += 2; u8[2] += 1; } u8[0] += paddings[0]; u8[1] += paddings[1]; u8[2] += paddings[2]; } } scale_x *= scale; scale_y *= scale; } /* the following code from OpenCV's haar feature implementation */ if(params.min_neighbors == 0) { for (i = 0; ENDORSE(i < seq->rnum); i++) { ccv_comp_t* comp = (ccv_comp_t*)ENDORSE(ccv_array_get(seq, i)); ccv_array_push(result_seq, comp); } } else { idx_seq = 0; ccv_array_clear(seq2); // group retrieved rectangles in order to filter out noise int ncomp = ccv_array_group(seq, &idx_seq, _ccv_is_equal_same_class, 0); ccv_comp_t* comps = (ccv_comp_t*)ccmalloc((ncomp + 1) * sizeof(ccv_comp_t)); memset(comps, 0, (ncomp + 1) * sizeof(ccv_comp_t)); // count number of neighbors for(i = 0; ENDORSE(i < seq->rnum); i++) { ccv_comp_t r1 = *(ccv_comp_t*)ENDORSE(ccv_array_get(seq, i)); int idx = *(int*)ENDORSE(ccv_array_get(idx_seq, i)); if (ENDORSE(comps[idx].neighbors) == 0) comps[idx].classification.confidence = r1.classification.confidence; ++comps[idx].neighbors; comps[idx].rect.x += r1.rect.x; comps[idx].rect.y += r1.rect.y; comps[idx].rect.width += r1.rect.width; comps[idx].rect.height += r1.rect.height; comps[idx].classification.id = r1.classification.id; comps[idx].classification.confidence = ccv_max(comps[idx].classification.confidence, r1.classification.confidence); } // calculate average bounding box for(i = 0; ENDORSE(i < ncomp); i++) { int n = ENDORSE(comps[i].neighbors); if(n >= params.min_neighbors) { ccv_comp_t comp; comp.rect.x = (comps[i].rect.x * 2 + n) / (2 * n); comp.rect.y = (comps[i].rect.y * 2 + n) / (2 * n); comp.rect.width = (comps[i].rect.width * 2 + n) / (2 * n); comp.rect.height = (comps[i].rect.height * 2 + n) / (2 * n); comp.neighbors = comps[i].neighbors; comp.classification.id = comps[i].classification.id; comp.classification.confidence = comps[i].classification.confidence; ccv_array_push(seq2, &comp); } } // filter out small face rectangles inside large face rectangles for(i = 0; ENDORSE(i < seq2->rnum); i++) { ccv_comp_t r1 = *(ccv_comp_t*)ENDORSE(ccv_array_get(seq2, i)); APPROX int flag = 1; for(j = 0; ENDORSE(j < seq2->rnum); j++) { ccv_comp_t r2 = *(ccv_comp_t*)ENDORSE(ccv_array_get(seq2, j)); APPROX int distance = (int)(r2.rect.width * 0.25 + 0.5); if(ENDORSE(i != j && r1.classification.id == r2.classification.id && r1.rect.x >= r2.rect.x - distance && r1.rect.y >= r2.rect.y - distance && r1.rect.x + r1.rect.width <= r2.rect.x + r2.rect.width + distance && r1.rect.y + r1.rect.height <= r2.rect.y + r2.rect.height + distance && (r2.neighbors > ccv_max(3, r1.neighbors) || r1.neighbors < 3))) { flag = 0; break; } } if(ENDORSE(flag)) ccv_array_push(result_seq, &r1); } ccv_array_free(idx_seq); ccfree(comps); } } ccv_array_free(seq); ccv_array_free(seq2); ccv_array_t* result_seq2; /* the following code from OpenCV's haar feature implementation */ if (params.flags & CCV_BBF_NO_NESTED) { result_seq2 = ccv_array_new(sizeof(ccv_comp_t), 64, 0); idx_seq = 0; // group retrieved rectangles in order to filter out noise int ncomp = ccv_array_group(result_seq, &idx_seq, _ccv_is_equal, 0); ccv_comp_t* comps = (ccv_comp_t*)ccmalloc((ncomp + 1) * sizeof(ccv_comp_t)); memset(comps, 0, (ncomp + 1) * sizeof(ccv_comp_t)); // count number of neighbors for(i = 0; ENDORSE(i < result_seq->rnum); i++) { ccv_comp_t r1 = *(ccv_comp_t*)ENDORSE(ccv_array_get(result_seq, i)); int idx = *(int*)ENDORSE(ccv_array_get(idx_seq, i)); if (ENDORSE(comps[idx].neighbors == 0 || comps[idx].classification.confidence < r1.classification.confidence)) { comps[idx].classification.confidence = r1.classification.confidence; comps[idx].neighbors = 1; comps[idx].rect = r1.rect; comps[idx].classification.id = r1.classification.id; } } // calculate average bounding box for(i = 0; ENDORSE(i < ncomp); i++) if(ENDORSE(comps[i].neighbors)) ccv_array_push(result_seq2, &comps[i]); ccv_array_free(result_seq); ccfree(comps); } else { result_seq2 = result_seq; } for (i = 1; ENDORSE(i < scale_upto + next * 2); i++) ccv_matrix_free(pyr[i * 4]); if (params.accurate) for (i = next * 2; ENDORSE(i < scale_upto + next * 2); i++) { ccv_matrix_free(pyr[i * 4 + 1]); ccv_matrix_free(pyr[i * 4 + 2]); ccv_matrix_free(pyr[i * 4 + 3]); } if (ENDORSE(params.size.height != _cascade[0]->size.height || params.size.width != _cascade[0]->size.width)) ccv_matrix_free(pyr[0]); return result_seq2; }
int main (int argc, char * argv[]) { APPROX int * frame; APPROX int * output; int i; int nFilterRowsFD = 9; int nFilterColsFD = 9; APPROX fltPixel_t FD[] = { 1, 3, 4, 5, 6, 5, 4, 3, 1, 3, 9, 12, 15, 18, 15, 12, 9, 3, 4, 12, 16, 20, 24, 20, 16, 12, 4, 5, 15, 20, 25, 30, 25, 20, 15, 5, 6, 18, 24, 30, 36, 30, 24, 18, 6, 5, 15, 20, 25, 30, 25, 20, 15, 5, 4, 12, 16, 20, 24, 20, 16, 12, 4, 3, 9, 12, 15, 18, 15, 12, 9, 3, 1, 3, 4, 5, 6, 5, 4, 3, 1 }; for (i = 0; i < nFilterRowsFD * nFilterColsFD; i++) // ACCEPT_FORBID { FD[i] /= (1024.0); } srand (time (NULL)); STATS_INIT (); PRINT_STAT_STRING ("kernel", "2d_convolution"); PRINT_STAT_INT ("rows", N); PRINT_STAT_INT ("columns", M); PRINT_STAT_INT ("num_frames", BATCH_SIZE); frame = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t)); output = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t)); if (!frame || !output) { fprintf(stderr, "ERROR: Allocation failed.\n"); exit(-1); } /* load image */ tic (); read_array_from_octave (ENDORSE(frame), N, M, FILENAME); PRINT_STAT_DOUBLE ("time_load_image", toc ()); /* Make BATCH_SIZE-1 copies */ tic (); for (i = 1; i < BATCH_SIZE; i++) // ACCEPT_FORBID { memcpy (&frame[i * M * N], frame, M * N * sizeof(algPixel_t)); } PRINT_STAT_DOUBLE ("time_copy", toc ()); /* Perform the 2D convolution */ tic (); accept_roi_begin(); for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID { conv2d (&frame[i * M * N], &output[i * M * N], N, M, FD, 1.0, nFilterRowsFD, nFilterColsFD); } accept_roi_end(); PRINT_STAT_DOUBLE ("time_2d_convolution", toc ()); /* Write the results out to disk */ for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID { char buffer [30]; sprintf (buffer, "2dconv_output.%d.mat", i); write_array_to_octave (ENDORSE(&output[i * M * N]), N, M, buffer, "output_" SIZE); } PRINT_STAT_STRING ("output_file", "2dconv_output." SIZE ".#.mat"); STATS_END (); free (output); free (frame); return 0; }