void cvSoftmaxDer(CvMat * X, CvMat * dE_dY, CvMat * dE_dY_afder) { CV_FUNCNAME("cvSoftmaxDer"); __BEGIN__; const int nr = X->rows, nc = X->cols, dtype = CV_MAT_TYPE(X->type); CvMat * Y = cvCreateMat(nr, nc, dtype); CvMat * dE_dY_transpose = cvCreateMat(nr, nc, dtype); CvMat * sum = cvCreateMat(nr, 1, dtype); CvMat * sum_repeat = cvCreateMat(nr, nc, dtype); cvSoftmax(X, Y); if (dE_dY->rows==nc && dE_dY->cols==nr){ cvTranspose(dE_dY,dE_dY_transpose); cvMul(Y,dE_dY_transpose,dE_dY_afder); }else{ cvMul(Y,dE_dY,dE_dY_afder); } cvReduce(dE_dY_afder,sum,-1,CV_REDUCE_SUM); cvRepeat(sum,sum_repeat); cvMul(Y,sum_repeat,sum_repeat); cvSub(dE_dY_afder,sum_repeat,dE_dY_afder); cvReleaseMat(&dE_dY_transpose); cvReleaseMat(&sum); cvReleaseMat(&sum_repeat); cvReleaseMat(&Y); __END__; }
//! assuming row vectors (a row is a sample) void cvSoftmax(CvMat * src, CvMat * dst){ CV_FUNCNAME("cvSoftmax"); __BEGIN__; CV_ASSERT(cvCountNAN(src)<1); cvExp(src,dst); CV_ASSERT(cvCountNAN(dst)<1); const int dtype = CV_MAT_TYPE(src->type); CvMat * sum = cvCreateMat(src->rows,1,dtype); CvMat * sum_repeat = cvCreateMat(src->rows,src->cols,dtype); cvReduce(dst,sum,-1,CV_REDUCE_SUM); CV_ASSERT(cvCountNAN(sum)<1); cvRepeat(sum,sum_repeat); cvDiv(dst,sum_repeat,dst); cvReleaseMat(&sum); cvReleaseMat(&sum_repeat); __END__; }
DMZ_INTERNAL NHorizontalSegmentation best_n_hseg(IplImage *y_strip, NVerticalSegmentation vseg) { // Gradient IplImage *grad = cvCreateImage(cvSize(428, 27), IPL_DEPTH_8U, 1); llcv_morph_grad3_2d_cross_u8(y_strip, grad); // Reduce (sum), normalize IplImage *grad_sum = cvCreateImage(cvSize(428, 1), IPL_DEPTH_32F, 1); // could sum to IPL_DEPTH_16U and then convert to 32F for normalization, doing it this way for simplicity, will probably get changed during optimization cvReduce(grad, grad_sum, 0 /* reduce to single row */, CV_REDUCE_SUM); cvNormalize(grad_sum, grad_sum, 0.0f, 1.0f, CV_MINMAX, NULL); cvReleaseImage(&grad); NHorizontalSegmentation best; best.n_offsets = vseg.number_length; best.score = 428.0f; // lower is better, this is the max possible (i.e. the worst) best.number_width = 0.0f; memset(&best.offsets, 0, 16 * sizeof(uint16_t)); float *grad_sum_data = (float *)llcv_get_data_origin(grad_sum); SliceF32 width_slice; SliceU16 offset_slice; width_slice.min = 17.1f; width_slice.max = 19.7f; width_slice.step = 0.5f; offset_slice.min = 0; offset_slice.max = SliceU16_MAX; offset_slice.step = 10; best = best_n_hseg_constrained(grad_sum_data, vseg, best, width_slice, offset_slice); // In the following lines, there's some bounds checking on offset_slice.min. // It is needed because it prevents underflow due to using uints. (The uint/int issue // also explains the ?: method instead of subtracting and taking max vs 0.) // There's no bounds checking needed on width_slice.min/max/step, because they can't // get outside of a reasonable range in the steps below. // The bounds checking on offset_slice.max is done in best_n_hseg_constrained, because // it can't overflow, and because we don't know enough here to do it conveniently and DRYly width_slice.min = best.number_width - 0.5f; width_slice.max = best.number_width + 0.5f; width_slice.step = 0.2f; offset_slice.min = best.pattern_offset < 10 ? 0 : best.pattern_offset - 10; offset_slice.max = best.pattern_offset + 10; offset_slice.step = 1; best = best_n_hseg_constrained(grad_sum_data, vseg, best, width_slice, offset_slice); width_slice.min = best.number_width - 0.2f; width_slice.max = best.number_width + 0.2f; width_slice.step = 0.1f; offset_slice.min = best.pattern_offset < 3 ? 0 : best.pattern_offset - 3; offset_slice.max = best.pattern_offset + 3; offset_slice.step = 1; best = best_n_hseg_constrained(grad_sum_data, vseg, best, width_slice, offset_slice); width_slice.min = best.number_width - 0.1f; width_slice.max = best.number_width + 0.1f; width_slice.step = 0.05f; offset_slice.min = best.pattern_offset < 3 ? 0 : best.pattern_offset - 3; offset_slice.max = best.pattern_offset + 3; offset_slice.step = 1; best = best_n_hseg_constrained(grad_sum_data, vseg, best, width_slice, offset_slice); cvReleaseImage(&grad_sum); return best; }