/* The matlab mex function */ void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] ) { /* Ox and Oy are the grid points */ /* Zo is the input image */ /* Zi is the transformed image */ /* nx and ny are the number of grid points (inside the image) */ double *Ox,*Oy,*Oz,*dxa, *dya,*dza,*Iout; mxArray *matlabCallOut[1]={0}; mxArray *matlabCallIn[1]={0}; double *Nthreadsd; int Nthreads; /* double pointer array to store all needed function variables) */ double ***ThreadArgs; double **ThreadArgs1; /* Handles to the worker threads */ ThreadHANDLE *ThreadList; /* ID of Threads */ double **ThreadID; double *ThreadID1; double nlhs_d[1]={0}; /* Size of input image */ double *Isize_d; mwSize dims[3]; /* Size of grid */ mwSize Osizex, Osizey, Osizez; double Osize_d[3]={0,0,0}; const mwSize *dimso; /* B-spline variablesl */ double u,v,w; int u_index=0; int v_index=0; int w_index=0; double *Bu, *Bv, *Bw; double *Bdu, *Bdv, *Bdw; /* Loop variable */ int i; /* Grid distance */ int dx,dy,dz; /* X,Y,Z coordinates of current pixel */ int x,y,z; /* Check for proper number of arguments. */ if(nrhs!=7) { mexErrMsgTxt("Seven inputs are required."); } /* Get the sizes of the grid */ dimso = mxGetDimensions(prhs[0]); Osizex = dimso[0]; Osizey = dimso[1]; Osizez = dimso[2]; /* Assign pointers to each input. */ Ox=(double *)mxGetData(prhs[0]); Oy=(double *)mxGetData(prhs[1]); Oz=(double *)mxGetData(prhs[2]); Isize_d=(double *)mxGetData(prhs[3]); dxa=(double *)mxGetData(prhs[4]); dya=(double *)mxGetData(prhs[5]); dza=(double *)mxGetData(prhs[6]); /* Create image matrix for the return arguments with the size of input image */ dims[0]=(mwSize)Isize_d[0]; dims[1]=(mwSize)Isize_d[1]; dims[2]=(mwSize)Isize_d[2]; plhs[0] = mxCreateNumericArray(3, dims, mxDOUBLE_CLASS, mxREAL); /* Get the spacing of the uniform b-spline grid */ dx=(int)dxa[0]; dy=(int)dya[0]; dz=(int)dza[0]; /* Get number of allowed threads */ mexCallMATLAB(1, matlabCallOut, 0, matlabCallIn, "maxNumCompThreads"); Nthreadsd=mxGetPr(matlabCallOut[0]); Nthreads=(int)Nthreadsd[0]; /* Reserve room for handles of threads in ThreadList */ ThreadList = (ThreadHANDLE*)malloc(Nthreads* sizeof( ThreadHANDLE )); ThreadID = (double **)malloc( Nthreads* sizeof(double *) ); ThreadArgs = (double ***)malloc( Nthreads* sizeof(double **) ); /* Assign pointer to output. */ Iout = (double *)mxGetData(plhs[0]); /* Make polynomial look up tables */ Bu=malloc(dx*4*sizeof(double)); Bv=malloc(dy*4*sizeof(double)); Bw=malloc(dz*4*sizeof(double)); Bdu=malloc(dx*4*sizeof(double)); Bdv=malloc(dy*4*sizeof(double)); Bdw=malloc(dz*4*sizeof(double)); for (x=0; x<dx; x++) { u=((double)x/(double)dx)-floor((double)x/(double)dx); Bu[mindex2(0,x,4)] = BsplineCoefficient(u,0); Bu[mindex2(1,x,4)] = BsplineCoefficient(u,1); Bu[mindex2(2,x,4)] = BsplineCoefficient(u,2); Bu[mindex2(3,x,4)] = BsplineCoefficient(u,3); Bdu[mindex2(0,x,4)] = BsplineCoefficientDerivative(u,0)/dxa[0]; Bdu[mindex2(1,x,4)] = BsplineCoefficientDerivative(u,1)/dxa[0]; Bdu[mindex2(2,x,4)] = BsplineCoefficientDerivative(u,2)/dxa[0]; Bdu[mindex2(3,x,4)] = BsplineCoefficientDerivative(u,3)/dxa[0]; } for (y=0; y<dy; y++) { v=((double)y/(double)dy)-floor((double)y/(double)dy); Bv[mindex2(0,y,4)] = BsplineCoefficient(v,0); Bv[mindex2(1,y,4)] = BsplineCoefficient(v,1); Bv[mindex2(2,y,4)] = BsplineCoefficient(v,2); Bv[mindex2(3,y,4)] = BsplineCoefficient(v,3); Bdv[mindex2(0,y,4)] = BsplineCoefficientDerivative(v,0)/dya[0]; Bdv[mindex2(1,y,4)] = BsplineCoefficientDerivative(v,1)/dya[0]; Bdv[mindex2(2,y,4)] = BsplineCoefficientDerivative(v,2)/dya[0]; Bdv[mindex2(3,y,4)] = BsplineCoefficientDerivative(v,3)/dya[0]; } for (z=0; z<dz; z++) { w=((double)z/(double)dz)-floor((double)z/(double)dz); Bw[mindex2(0,z,4)] = BsplineCoefficient(w,0); Bw[mindex2(1,z,4)] = BsplineCoefficient(w,1); Bw[mindex2(2,z,4)] = BsplineCoefficient(w,2); Bw[mindex2(3,z,4)] = BsplineCoefficient(w,3); Bdw[mindex2(0,z,4)] = BsplineCoefficientDerivative(w,0)/dza[0]; Bdw[mindex2(1,z,4)] = BsplineCoefficientDerivative(w,1)/dza[0]; Bdw[mindex2(2,z,4)] = BsplineCoefficientDerivative(w,2)/dza[0]; Bdw[mindex2(3,z,4)] = BsplineCoefficientDerivative(w,3)/dza[0]; } Osize_d[0]=(double)Osizex; Osize_d[1]=(double)Osizey; Osize_d[2]=(double)Osizez; nlhs_d[0]=(double)nlhs; /* Reserve room for 16 function variables(arrays) */ for (i=0; i<Nthreads; i++) { /* Make Thread ID */ ThreadID1= (double *)malloc( 1* sizeof(double) ); ThreadID1[0]=(double)i; ThreadID[i]=ThreadID1; /* Make Thread Structure */ ThreadArgs1 = (double **)malloc( 17* sizeof( double * ) ); ThreadArgs1[0]=Bu; ThreadArgs1[1]=Bv; ThreadArgs1[2]=Bw; ThreadArgs1[3]=Isize_d; ThreadArgs1[4]=Osize_d; ThreadArgs1[5]=Iout; ThreadArgs1[6]=dxa; ThreadArgs1[7]=dya; ThreadArgs1[8]=dza; ThreadArgs1[9]=ThreadID[i]; ThreadArgs1[10]=Ox; ThreadArgs1[11]=Oy; ThreadArgs1[12]=Oz; ThreadArgs1[13]=Nthreadsd; ThreadArgs1[14]=Bdu; ThreadArgs1[15]=Bdv; ThreadArgs1[16]=Bdw; ThreadArgs[i]=ThreadArgs1; StartThread(ThreadList[i], &transformvolume_jacobiandet, ThreadArgs[i]) } for (i=0; i<Nthreads; i++) { WaitForThreadFinish(ThreadList[i]); } for (i=0; i<Nthreads; i++) { free(ThreadArgs[i]); free(ThreadID[i]); } free(ThreadArgs); free(ThreadID ); free(ThreadList); free(Bu); free(Bv); free(Bw); free(Bdu); free(Bdv); free(Bdw); }
/* The matlab mex function */ void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] ) { /* Ox and Oy are the grid points */ /* Zo is the input image */ /* Zi is the transformed image */ /* dx and dy are the spacing of the b-spline knots */ double *Ox, *Oy, *dxa, *dya, *E, *Egradient; double *ThreadErrorOut, *ThreadGradientOutX, *ThreadGradientOutY; mxArray *matlabCallOut[1]={0}; mxArray *matlabCallIn[1]={0}; double *Nthreadsd; int Nthreads; /* Finite difference step size */ double step=0.001; /* index offsets */ int offset1; /* double pointer array to store all needed function variables) */ double ***ThreadArgs; double **ThreadArgs1; /* Handles to the worker threads */ ThreadHANDLE *ThreadList; /* ID of Threads */ double **ThreadID; double *ThreadID1; /* Dims outputs */ const int dims_error[2]={1, 1}; int dims_error_gradient[3]={1, 1, 2}; /* Size of input image */ double *Isize_d; /* Size of grid */ mwSize Osizex, Osizey; int Onumel; double Inumel; double Osize_d[2]={0, 0}; /* B-spline variablesl */ double u, v; int u_index=0; int v_index=0; double *Bu, *Bv, *Bdu, *Bdv; /* Loop variables */ int i, j; /* X,Y coordinates of current pixel */ int x, y; /* Grid distance */ int dx, dy; /* Check for proper number of arguments. */ if(nrhs!=5) { mexErrMsgTxt("Five nputs are required."); } /* Get the sizes of the grid */ Osizex = (mwSize)mxGetM(prhs[0]); Osizey = (mwSize)mxGetN(prhs[0]); /* Assign pointers to each input. */ Ox=mxGetPr(prhs[0]); Oy=mxGetPr(prhs[1]); Isize_d=mxGetPr(prhs[2]); dxa=mxGetPr(prhs[3]); dya=mxGetPr(prhs[4]); Onumel= Osizex*Osizey; Inumel = Isize_d[0]*Isize_d[1]; /* Create image matrix for the Error return argument */ plhs[0] = mxCreateNumericArray(2, dims_error, mxDOUBLE_CLASS, mxREAL); if(nlhs>1) { dims_error_gradient[0]=Osizex; dims_error_gradient[1]=Osizey; /* Error Gradient needed */ plhs[1] = mxCreateNumericArray(3, dims_error_gradient, mxDOUBLE_CLASS, mxREAL); } /* Get the spacing of the uniform b-spline grid */ dx=(int)dxa[0]; dy=(int)dya[0]; /* Get number of allowed threads */ mexCallMATLAB(1, matlabCallOut, 0, matlabCallIn, "maxNumCompThreads"); Nthreadsd=mxGetPr(matlabCallOut[0]); Nthreads=(int)Nthreadsd[0]; /* Reserve room for handles of threads in ThreadList */ ThreadList = (ThreadHANDLE*)malloc(Nthreads* sizeof( ThreadHANDLE )); ThreadID = (double **)malloc( Nthreads* sizeof(double *) ); ThreadArgs = (double ***)malloc( Nthreads* sizeof(double **) ); ThreadErrorOut= (double *)malloc(Nthreads* sizeof(double) ); if(nlhs==1) { ThreadGradientOutX=NULL; ThreadGradientOutY=NULL; } else { ThreadGradientOutX= (double *)malloc(Nthreads*Onumel*sizeof(double)); ThreadGradientOutY= (double *)malloc(Nthreads*Onumel*sizeof(double)); } /* Assign pointer to output. */ E = mxGetPr(plhs[0]); if(nlhs>1) { Egradient = mxGetPr(plhs[1]); } /* Make polynomial look up tables */ Bu=malloc(dx*4*sizeof(double)); Bv=malloc(dy*4*sizeof(double)); Bdu=malloc(dx*4*sizeof(double)); Bdv=malloc(dy*4*sizeof(double)); for (x=0; x<dx; x++) { u=(x/(double)dx)-floor(x/(double)dx); Bu[mindex2(0, x, 4)] = BsplineCoefficient(u, 0); Bu[mindex2(1, x, 4)] = BsplineCoefficient(u, 1); Bu[mindex2(2, x, 4)] = BsplineCoefficient(u, 2); Bu[mindex2(3, x, 4)] = BsplineCoefficient(u, 3); Bdu[mindex2(0, x, 4)] = BsplineCoefficientDerivative(u, 0)/dxa[0]; Bdu[mindex2(1, x, 4)] = BsplineCoefficientDerivative(u, 1)/dxa[0]; Bdu[mindex2(2, x, 4)] = BsplineCoefficientDerivative(u, 2)/dxa[0]; Bdu[mindex2(3, x, 4)] = BsplineCoefficientDerivative(u, 3)/dxa[0]; } for (y=0; y<dy; y++) { v=(y/(double)dy)-floor(y/(double)dy); Bv[mindex2(0, y, 4)] = BsplineCoefficient(v, 0); Bv[mindex2(1, y, 4)] = BsplineCoefficient(v, 1); Bv[mindex2(2, y, 4)] = BsplineCoefficient(v, 2); Bv[mindex2(3, y, 4)] = BsplineCoefficient(v, 3); Bdv[mindex2(0, y, 4)] = BsplineCoefficientDerivative(v, 0)/dya[0]; Bdv[mindex2(1, y, 4)] = BsplineCoefficientDerivative(v, 1)/dya[0]; Bdv[mindex2(2, y, 4)] = BsplineCoefficientDerivative(v, 2)/dya[0]; Bdv[mindex2(3, y, 4)] = BsplineCoefficientDerivative(v, 3)/dya[0]; } Osize_d[0]=Osizex; Osize_d[1]=Osizey; /* Reserve room for 14 function variables(arrays) */ for (i=0; i<Nthreads; i++) { /* Make Thread ID */ ThreadID1= (double *)malloc( 1* sizeof(double) ); ThreadID1[0]=i; ThreadID[i]=ThreadID1; /* Make Thread Structure */ ThreadArgs1 = (double **)malloc( 15 * sizeof( double * ) ); ThreadArgs1[0]=Bu; ThreadArgs1[1]=Bv; ThreadArgs1[2]=Isize_d; ThreadArgs1[3]=Osize_d; ThreadArgs1[4]=ThreadErrorOut; ThreadArgs1[5]=dxa; ThreadArgs1[6]=dya; ThreadArgs1[7]=ThreadID[i]; ThreadArgs1[8]=Ox; ThreadArgs1[9]=Oy; ThreadArgs1[10]=Nthreadsd; ThreadArgs1[11]=Bdu; ThreadArgs1[12]=Bdv; ThreadArgs1[13]=ThreadGradientOutX; ThreadArgs1[14]=ThreadGradientOutY; ThreadArgs[i]=ThreadArgs1; if(nlhs>1) { StartThread(ThreadList[i], &jacobian_errorgradient, ThreadArgs[i]) } else { StartThread(ThreadList[i], &jacobian_error, ThreadArgs[i]) } } for (i=0; i<Nthreads; i++) { WaitForThreadFinish(ThreadList[i]); } /* Add accumlated error of all threads */ E[0]=0; for (i=0; i<Nthreads; i++) { E[0]+=ThreadErrorOut[i]; } E[0]/=Inumel; if(nlhs>1) { for (i=0; i<Nthreads; i++) { offset1=i*Onumel; for(j=0; j<Onumel; j++) { Egradient[j]+=ThreadGradientOutX[j+offset1]; Egradient[j+Onumel]+=ThreadGradientOutY[j+offset1]; } } for(j=0; j<Onumel; j++) { Egradient[j]/=Inumel*step; Egradient[j+Onumel]/=Inumel*step; } } for (i=0; i<Nthreads; i++) { free(ThreadArgs[i]); free(ThreadID[i]); } free(ThreadErrorOut); free(ThreadGradientOutX); free(ThreadGradientOutY); free(ThreadArgs); free(ThreadID ); free(ThreadList); free(Bu); free(Bdu); free(Bv); free(Bdv); }