void update(par* p, gsl_vector** dbiases, gsl_matrix** dweights, double step){ regularisation(p, dbiases, dweights); momentum_update(p, dbiases, dweights, step); }
double pewarpcost(float *x, float *bX, float *bY, float *dbY, float *bZ, mwSignedIndex *bdims, uint8_T *target, float *source, uint8_T *mask, mwSignedIndex *ddims, float *skrn, int skrnl, float xscale, float regstrength, float *H, uint8_T *warped, float *def) { int nvox = ddims[0] * ddims[1] * ddims[2]; /* float *def = mxMalloc(sizeof(float) * nvox); */ float *jac = mxMalloc(sizeof(float) * nvox); /* uint8_T *warped = mxMalloc(sizeof(uint8_T) * nvox); */ /* float *H = mxMalloc(sizeof(float) * 256 * 256); */ float *Htmp = mxMalloc(sizeof(float) * 256 * 256); double *s1 = mxMalloc(sizeof(double) * 256); double *s2 = mxMalloc(sizeof(double) * 256); double s, sL, s1L, s2L; double cost; /* mexPrintf(">>> Start -> %lu (CLOCKS_PER_SEC: %lu)\n", clock(), CLOCKS_PER_SEC); */ getDef(def, ddims, bdims[0], bX, bdims[1], bY, bdims[2], bZ, x, xscale); /* The actual Jacobian is 1 + jac, but it is easier to add 1 later. */ getDef(jac, ddims, bdims[0], bX, bdims[1], dbY, bdims[2], bZ, x, xscale); /* mexPrintf("getDef done -> %lu\n", clock()); */ /* For some reason, execution is single-threaded without the num_threads clause. * TODO: Find out why and change. */ #pragma omp parallel num_threads(4) shared(s, sL, s1L, s2L) { peResampleAndApplyJacobian(warped, source, def, /* jac, */ mask, ddims); /* if (omp_get_thread_num() == 1) mexPrintf("peResampleAndApplyJacobian done -> %lu\n", clock()); */ hist2(H, target, warped, mask, nvox); /* if (omp_get_thread_num() == 1) mexPrintf("hist2 done -> %lu\n", clock()); */ smoothRows(Htmp, H, 256, 256, skrn, skrnl); smoothCols(H, Htmp, 256, 256, skrn, skrnl); /* if (omp_get_thread_num() == 1) mexPrintf("smooth... done -> %lu\n", clock()); */ #pragma omp single { s = histSumAll(H, 65536); } #pragma omp sections { #pragma omp section { sL = histLogAll(H, s, 65536); } #pragma omp section { s1L = histLogVert(H, s1, s, 256, 256); } #pragma omp section { s2L = histLogHorz(H, s2, s, 256, 256); } } /* if (omp_get_thread_num() == 1) mexPrintf("hist... done -> %lu\n", clock()); */ } cost = - (s1L + s2L) / sL + regstrength * regularisation(jac, nvox); /* mexPrintf("regularisation done -> %lu\n", clock()); mexPrintf("<<<\n"); */ /* mexPrintf("cost: %f sL: %f s1L: %f s2L: %f #voxels: %d\n", cost, sL, s1L, s2L, nvox); */ /* mxFree(def); */ mxFree(jac); /* mxFree(warped); */ /* mxFree(H); */ mxFree(Htmp); mxFree(s1); mxFree(s2); return cost; }