/* * Class: jcuda_jcufft_JCufft * Method: cufftPlanManyNative * Signature: (Ljcuda/jcufft/cufftHandle;I[I[III[IIIII)I */ JNIEXPORT jint JNICALL Java_jcuda_jcufft_JCufft_cufftPlanManyNative (JNIEnv *env, jclass cla, jobject handle, jint rank, jintArray n, jintArray inembed, jint istride, jint idist, jintArray onembed, jint ostride, jint odist, jint type, jint batch) { if (handle == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'handle' is null for cufftPlanMany"); return JCUFFT_INTERNAL_ERROR; } if (n == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'n' is null for cufftPlanMany"); return JCUFFT_INTERNAL_ERROR; } Logger::log(LOG_TRACE, "Executing cufftPlanMany\n"); cufftHandle plan = env->GetIntField(handle, cufftHandle_plan); int *nativeN = getArrayContents(env, n); int *nativeInembed = getArrayContents(env, inembed); int *nativeOnembed = getArrayContents(env, onembed); cufftResult result = cufftPlanMany(&plan, rank, nativeN, nativeInembed, (int)istride, (int)idist, nativeOnembed, (int)ostride, (int)odist, getCufftType(type), (int)batch); delete[] nativeN; delete[] nativeInembed; delete[] nativeOnembed; env->SetIntField(handle, cufftHandle_plan, plan); return result; }
static void instantiate(const callable_type_data *DYND_UNUSED(self), const callable_type *DYND_UNUSED(self_tp), kernel_builder *ckb, intptr_t ckb_offset, const ndt::type &DYND_UNUSED(dst_tp), const char *dst_arrmeta, intptr_t DYND_UNUSED(nsrc), const ndt::type *src_tp, const char *const *src_arrmeta, kernel_request_t kernreq, const eval::eval_context *DYND_UNUSED(ectx), const nd::array &kwds, const std::map<std::string, ndt::type> &DYND_UNUSED(tp_vars)) { const size_stride_t *dst_size_stride = reinterpret_cast<const size_stride_t *>(dst_arrmeta); const size_stride_t *src_size_stride = reinterpret_cast<const size_stride_t *>(src_arrmeta[0]); array axes = kwds.p("axes"); array shape = kwds.p("shape"); int ndim = static_cast<int>(src_tp[0].get_ndim()); int rank = static_cast<int>(axes.is_missing() ? ndim : (ndim - 1)); int istride = static_cast<int>(src_size_stride[ndim - 1].stride / sizeof(src_type)); int idist = static_cast<int>(src_size_stride[0].stride / sizeof(src_type)); int ostride = static_cast<int>(dst_size_stride[ndim - 1].stride / sizeof(dst_type)); int odist = static_cast<int>(dst_size_stride[0].stride / sizeof(dst_type)); std::vector<int> n(rank), inembed(rank), onembed(rank); for (int i = 0, j = axes.is_missing() ? 0 : 1; j < ndim; ++i, ++j) { n[i] = static_cast<int>(src_size_stride[j].dim_size); inembed[i] = static_cast<int>(src_size_stride[j].dim_size); onembed[i] = static_cast<int>(dst_size_stride[j].dim_size); } int batch = static_cast<int>(axes.is_missing() ? 1 : src_size_stride[0].dim_size); self_type *self = self_type::create(ckb, kernreq, ckb_offset); cufftPlanMany(&self->plan, rank, n.data(), inembed.data(), istride, idist, onembed.data(), ostride, odist, CUFFT_Z2Z, batch); }
/* Create a n-d CUFFT plan * * sign = 0 (forward) or 1 (inverse) */ int dfft_cuda_create_nd_plan( cuda_plan_t *plan, int ndim, int *dim, int howmany, int *iembed, int istride, int idist, int *oembed, int ostride, int odist, int dir) { cufftResult res; res = cufftPlanMany(plan, ndim, dim, iembed, istride, idist, oembed, ostride, odist, CUFFT_C2C, howmany); if (res != CUFFT_SUCCESS) { printf("CUFFT Error: %d\n", res); return 1; } return 0; }
/* Create a 1d CUFFT plan * * sign = 0 (forward) or 1 (inverse) */ int dfft_cuda_create_1d_plan( cuda_plan_t *plan, int dim, int howmany, int istride, int idist, int ostride, int odist, int dir) { int dims[1]; dims[0] = dim; cufftResult res; res = cufftPlanMany(plan, 1, dims, dims, istride, idist, dims, ostride, odist, CUFFT_C2C, howmany); if (res != CUFFT_SUCCESS) { printf("CUFFT Error: %d\n", res); return 1; } return 0; }
/** * Creates a 3D C2C parallel FFT plan. If data_out point to the same location as the input * data, then an inplace plan will be created. Otherwise the plan would be outplace. * @param n Integer array of size 3, corresponding to the global data size * @param data Input data in spatial domain * @param data_out Output data in frequency domain * @param c_comm Cartesian communicator returned by \ref accfft_create_comm * @param flags AccFFT flags, See \ref flags for more details. * @return */ accfft_plan_gpu* accfft_plan_dft_3d_c2c_gpu(int * n, Complex * data_d, Complex * data_out_d, MPI_Comm c_comm,unsigned flags){ accfft_plan_gpu *plan=new accfft_plan_gpu; int nprocs, procid; MPI_Comm_rank(c_comm, &procid); plan->procid=procid; MPI_Cart_get(c_comm,2,plan->np,plan->periods,plan->coord); plan->c_comm=c_comm; int *coord=plan->coord; MPI_Comm_split(c_comm,coord[0],coord[1],&plan->row_comm); MPI_Comm_split(c_comm,coord[1],coord[0],&plan->col_comm); plan->N[0]=n[0];plan->N[1]=n[1];plan->N[2]=n[2]; int NX=n[0], NY=n[1], NZ=n[2]; cufftResult_t cufft_error; plan->data_c=data_d; plan->data_out_c=data_out_d; if(data_out_d==data_d){ plan->inplace=true;} else{plan->inplace=false;} if(plan->np[1]==1) plan->oneD=true; else plan->oneD=false; int *osize_0 =plan->osize_0, *ostart_0 =plan->ostart_0; int *osize_1 =plan->osize_1, *ostart_1 =plan->ostart_1; int *osize_2 =plan->osize_2, *ostart_2 =plan->ostart_2; int *osize_1i=plan->osize_1i,*ostart_1i=plan->ostart_1i; int *osize_2i=plan->osize_2i,*ostart_2i=plan->ostart_2i; int alloc_local; int alloc_max=0,n_tuples=n[2]*2; //int isize[3],osize[3],istart[3],ostart[3]; alloc_max=accfft_local_size_dft_c2c_gpu(n,plan->isize,plan->istart,plan->osize,plan->ostart,c_comm); plan->alloc_max=alloc_max; dfft_get_local_size_gpu(n[0],n[1],n[2],osize_0,ostart_0,c_comm); dfft_get_local_size_gpu(n[0],n[2],n[1],osize_1,ostart_1,c_comm); dfft_get_local_size_gpu(n[1],n[2],n[0],osize_2,ostart_2,c_comm); std::swap(osize_1[1],osize_1[2]); std::swap(ostart_1[1],ostart_1[2]); std::swap(ostart_2[1],ostart_2[2]); std::swap(ostart_2[0],ostart_2[1]); std::swap(osize_2[1],osize_2[2]); std::swap(osize_2[0],osize_2[1]); for(int i=0;i<3;i++){ osize_1i[i]=osize_1[i]; osize_2i[i]=osize_2[i]; ostart_1i[i]=ostart_1[i]; ostart_2i[i]=ostart_2[i]; } // fplan_0 { int f_inembed[1]={NZ}; int f_onembed[1]={NZ}; int idist=(NZ); int odist=(NZ); int istride=1; int ostride=1; int batch=osize_0[0]*osize_0[1];//NX; if(batch!=0) { cufft_error=cufftPlanMany(&plan->fplan_0, 1, &n[2], f_inembed, istride, idist, // *inembed, istride, idist f_onembed, ostride, odist, // *onembed, ostride, odist CUFFT_Z2Z, batch); if(cufft_error!= CUFFT_SUCCESS) { fprintf(stderr, "CUFFT error: fplan_0 creation failed %d \n",cufft_error); return NULL; } //cufftSetCompatibilityMode(fplan,CUFFT_COMPATIBILITY_FFTW_PADDING); if (cudaGetLastError() != cudaSuccess){fprintf(stderr, "Cuda error:Failed at fplan cuda compatibility\n"); return;} } } // fplan_1 { int f_inembed[1]={NY}; int f_onembed[1]={NY}; int idist=1; int odist=1; int istride=osize_1[2]; int ostride=osize_1[2]; int batch=osize_1[2]; if(batch!=0) { cufft_error=cufftPlanMany(&plan->fplan_1, 1, &n[1], f_inembed, istride, idist, // *inembed, istride, idist f_onembed, ostride, odist, // *onembed, ostride, odist CUFFT_Z2Z, batch); if(cufft_error!= CUFFT_SUCCESS) { fprintf(stderr, "CUFFT error: fplan_1 creation failed %d \n",cufft_error); return NULL; } //cufftSetCompatibilityMode(fplan,CUFFT_COMPATIBILITY_FFTW_PADDING); if (cudaGetLastError() != cudaSuccess){fprintf(stderr, "Cuda error:Failed at fplan cuda compatibility\n"); return;} } } // fplan_2 { int f_inembed[1]={NX}; int f_onembed[1]={NX}; int idist=1; int odist=1; int istride=osize_2[1]*osize_2[2]; int ostride=osize_2[1]*osize_2[2]; int batch=osize_2[1]*osize_2[2];; if(batch!=0) { cufft_error=cufftPlanMany(&plan->fplan_2, 1, &n[0], f_inembed, istride, idist, // *inembed, istride, idist f_onembed, ostride, odist, // *onembed, ostride, odist CUFFT_Z2Z, batch); if(cufft_error!= CUFFT_SUCCESS) { fprintf(stderr, "CUFFT error: fplan_2 creation failed %d \n",cufft_error); return NULL; } //cufftSetCompatibilityMode(fplan,CUFFT_COMPATIBILITY_FFTW_PADDING); if (cudaGetLastError() != cudaSuccess){fprintf(stderr, "Cuda error:Failed at fplan cuda compatibility\n"); return;} } } // 1D Decomposition if (plan->oneD){ int NX=n[0],NY=n[1],NZ=n[2]; plan->alloc_max=alloc_max; plan->Mem_mgr= new Mem_Mgr_gpu <double>(NX,NY,(NZ)*2,c_comm); plan->T_plan_2= new T_Plan_gpu <double>(NX,NY,(NZ)*2, plan->Mem_mgr,c_comm); plan->T_plan_2i= new T_Plan_gpu<double>(NY,NX,NZ*2, plan->Mem_mgr,c_comm); plan->T_plan_2->alloc_local=alloc_max; plan->T_plan_2i->alloc_local=alloc_max; plan->T_plan_1=NULL; plan->T_plan_1i=NULL; if(flags==ACCFFT_MEASURE){ plan->T_plan_2->which_fast_method_gpu(plan->T_plan_2,(double*)data_out_d); } else{ plan->T_plan_2->method=2; plan->T_plan_2->kway=2; } checkCuda_accfft (cudaDeviceSynchronize()); MPI_Barrier(plan->c_comm); plan->T_plan_2i->method=-plan->T_plan_2->method; plan->T_plan_2i->kway=plan->T_plan_2->kway; plan->T_plan_2i->kway_async=plan->T_plan_2->kway_async; }// end 1d c2c // 2D Decomposition if (!plan->oneD){ // the reaseon for n_tuples/2 is to avoid splitting of imag and real parts of complex numbers plan->Mem_mgr= new Mem_Mgr_gpu<double>(n[1],n[2],2,plan->row_comm,osize_0[0],alloc_max); plan->T_plan_1= new T_Plan_gpu<double>(n[1],n[2],2, plan->Mem_mgr, plan->row_comm,osize_0[0]); plan->T_plan_2= new T_Plan_gpu<double>(n[0],n[1],2*osize_2[2], plan->Mem_mgr, plan->col_comm); plan->T_plan_2i= new T_Plan_gpu<double>(n[1],n[0],2*osize_2i[2], plan->Mem_mgr, plan->col_comm); plan->T_plan_1i= new T_Plan_gpu<double>(n[2],n[1],2, plan->Mem_mgr, plan->row_comm,osize_1i[0]); plan->T_plan_1->alloc_local=plan->alloc_max; plan->T_plan_2->alloc_local=plan->alloc_max; plan->T_plan_2i->alloc_local=plan->alloc_max; plan->T_plan_1i->alloc_local=plan->alloc_max; plan->iplan_0=NULL; plan->iplan_1=NULL; plan->iplan_2=NULL; int coords[2],np[2],periods[2]; MPI_Cart_get(c_comm,2,np,periods,coords); if(flags==ACCFFT_MEASURE){ if(coords[0]==0){ plan->T_plan_1->which_fast_method_gpu(plan->T_plan_1,(double*)data_out_d,osize_0[0]); } } else{ plan->T_plan_1->method=2; plan->T_plan_1->kway=2; } MPI_Bcast(&plan->T_plan_1->method,1, MPI_INT,0, c_comm ); MPI_Bcast(&plan->T_plan_1->kway,1, MPI_INT,0, c_comm ); MPI_Bcast(&plan->T_plan_1->kway_async,1, MPI::BOOL,0, c_comm ); checkCuda_accfft (cudaDeviceSynchronize()); MPI_Barrier(plan->c_comm); plan->T_plan_1->method =plan->T_plan_1->method; plan->T_plan_2->method =plan->T_plan_1->method; plan->T_plan_2i->method=-plan->T_plan_1->method; plan->T_plan_1i->method=-plan->T_plan_1->method; plan->T_plan_1->kway =plan->T_plan_1->kway; plan->T_plan_2->kway =plan->T_plan_1->kway; plan->T_plan_2i->kway=plan->T_plan_1->kway; plan->T_plan_1i->kway=plan->T_plan_1->kway; plan->T_plan_1->kway_async =plan->T_plan_1->kway_async; plan->T_plan_2->kway_async =plan->T_plan_1->kway_async; plan->T_plan_2i->kway_async=plan->T_plan_1->kway_async; plan->T_plan_1i->kway_async=plan->T_plan_1->kway_async; }// end 2d c2c plan->c2c_plan_baked=true; return plan; }// end accfft_plan_dft_c2c_gpu
cufftResult WINAPI wine_cufftPlanMany(cufftHandle *plan, int rank, int *n, int *inembed, int istride, int idist, int *onembed, int ostride, int odist, cufftType type, int batch){ WINE_TRACE("\n"); return cufftPlanMany( plan, rank, n, inembed, istride, idist, onembed, ostride, odist, type, batch); }
/** * Creates a 3D single precision R2C parallel FFT plan. If data_out point to the same location as the input * data, then an inplace plan will be created. Otherwise the plan would be outplace. * @param n Integer array of size 3, corresponding to the global data size * @param data Input data in spatial domain * @param data_out Output data in frequency domain * @param c_comm Cartesian communicator returned by \ref accfft_create_comm * @param flags AccFFT flags, See \ref flags for more details. * @return */ accfft_plan_gpuf* accfft_plan_dft_3d_r2c_gpuf(int * n, float * data_d,float * data_out_d, MPI_Comm c_comm,unsigned flags){ accfft_plan_gpuf *plan=new accfft_plan_gpuf; int procid; MPI_Comm_rank(c_comm, &procid); plan->procid=procid; MPI_Cart_get(c_comm,2,plan->np,plan->periods,plan->coord); plan->c_comm=c_comm; int *coord=plan->coord; MPI_Comm_split(c_comm,coord[0],coord[1],&plan->row_comm); MPI_Comm_split(c_comm,coord[1],coord[0],&plan->col_comm); plan->N[0]=n[0];plan->N[1]=n[1];plan->N[2]=n[2]; plan->data=data_d; plan->data_out=data_out_d; if(plan->np[1]==1) plan->oneD=true; else plan->oneD=false; if(data_out_d==data_d){ plan->inplace=true;} else{plan->inplace=false;} int *osize_0 =plan->osize_0, *ostart_0 =plan->ostart_0; int *osize_1 =plan->osize_1, *ostart_1 =plan->ostart_1; int *osize_2 =plan->osize_2, *ostart_2 =plan->ostart_2; int *osize_1i=plan->osize_1i,*ostart_1i=plan->ostart_1i; int *osize_2i=plan->osize_2i,*ostart_2i=plan->ostart_2i; int alloc_max=0; int n_tuples_i, n_tuples_o; //plan->inplace==true ? n_tuples=(n[2]/2+1)*2: n_tuples=n[2]*2; plan->inplace==true ? n_tuples_i=(n[2]/2+1)*2: n_tuples_i=n[2]; n_tuples_o=(n[2]/2+1)*2; int isize[3],osize[3],istart[3],ostart[3]; alloc_max=accfft_local_size_dft_r2c_gpuf(n,isize,istart,osize,ostart,c_comm,plan->inplace); plan->alloc_max=alloc_max; dfft_get_local_size_gpuf(n[0],n[1],n_tuples_o,osize_0,ostart_0,c_comm); dfft_get_local_size_gpuf(n[0],n_tuples_o/2,n[1],osize_1,ostart_1,c_comm); dfft_get_local_size_gpuf(n[1],n_tuples_o/2,n[0],osize_2,ostart_2,c_comm); std::swap(osize_1[1],osize_1[2]); std::swap(ostart_1[1],ostart_1[2]); std::swap(ostart_2[1],ostart_2[2]); std::swap(ostart_2[0],ostart_2[1]); std::swap(osize_2[1],osize_2[2]); std::swap(osize_2[0],osize_2[1]); for(int i=0;i<3;i++){ osize_1i[i]=osize_1[i]; osize_2i[i]=osize_2[i]; ostart_1i[i]=ostart_1[i]; ostart_2i[i]=ostart_2[i]; } // fplan_0 int NX=n[0], NY=n[1], NZ=n[2]; cufftResult_t cufft_error; { int f_inembed[1]={n_tuples_i}; int f_onembed[1]={n_tuples_o/2}; int idist=(n_tuples_i); int odist=n_tuples_o/2; int istride=1; int ostride=1; int batch=osize_0[0]*osize_0[1];//NX; if(batch!=0) { cufft_error=cufftPlanMany(&plan->fplan_0, 1, &n[2], f_inembed, istride, idist, // *inembed, istride, idist f_onembed, ostride, odist, // *onembed, ostride, odist CUFFT_R2C, batch); if(cufft_error!= CUFFT_SUCCESS) { fprintf(stderr, "CUFFT error: fplan_0 creation failed %d \n",cufft_error); return NULL; } //cufftSetCompatibilityMode(fplan,CUFFT_COMPATIBILITY_FFTW_PADDING); if (cudaGetLastError() != cudaSuccess){fprintf(stderr, "Cuda error:Failed at fplan cuda compatibility\n"); return;} } if(batch!=0) { cufft_error=cufftPlanMany(&plan->iplan_0, 1, &n[2], f_onembed, ostride, odist, // *onembed, ostride, odist f_inembed, istride, idist, // *inembed, istride, idist CUFFT_C2R, batch); if(cufft_error!= CUFFT_SUCCESS) { fprintf(stderr, "CUFFT error: iplan_0 creation failed %d \n",cufft_error); return NULL; } //cufftSetCompatibilityMode(fplan,CUFFT_COMPATIBILITY_FFTW_PADDING); if (cudaGetLastError() != cudaSuccess){fprintf(stderr, "Cuda error:Failed at fplan cuda compatibility\n"); return;} } } // fplan_1 { int f_inembed[1]={NY}; int f_onembed[1]={NY}; int idist=1; int odist=1; int istride=osize_1[2]; int ostride=osize_1[2]; int batch=osize_1[2]; if(batch!=0) { cufft_error=cufftPlanMany(&plan->fplan_1, 1, &n[1], f_inembed, istride, idist, // *inembed, istride, idist f_onembed, ostride, odist, // *onembed, ostride, odist CUFFT_C2C, batch); if(cufft_error!= CUFFT_SUCCESS) { fprintf(stderr, "CUFFT error: fplan_1 creation failed %d \n",cufft_error); return NULL; } //cufftSetCompatibilityMode(fplan,CUFFT_COMPATIBILITY_FFTW_PADDING); if (cudaGetLastError() != cudaSuccess){fprintf(stderr, "Cuda error:Failed at fplan cuda compatibility\n"); return;} } } // fplan_2 { int f_inembed[1]={NX}; int f_onembed[1]={NX}; int idist=1; int odist=1; int istride=osize_2[1]*osize_2[2]; int ostride=osize_2[1]*osize_2[2]; int batch=osize_2[1]*osize_2[2];; if(batch!=0) { cufft_error=cufftPlanMany(&plan->fplan_2, 1, &n[0], f_inembed, istride, idist, // *inembed, istride, idist f_onembed, ostride, odist, // *onembed, ostride, odist CUFFT_C2C, batch); if(cufft_error!= CUFFT_SUCCESS) { fprintf(stderr, "CUFFT error: fplan_2 creation failed %d \n",cufft_error); return NULL; } //cufftSetCompatibilityMode(fplan,CUFFT_COMPATIBILITY_FFTW_PADDING); if (cudaGetLastError() != cudaSuccess){fprintf(stderr, "Cuda error:Failed at fplan cuda compatibility\n"); return;} } } // 1D Decomposition if(plan->oneD){ int N0=n[0], N1=n[1], N2=n[2]; plan->Mem_mgr = new Mem_Mgr_gpu<float>(N0,N1,n_tuples_o,c_comm); plan->T_plan_2 = new T_Plan_gpu <float>(N0,N1,n_tuples_o, plan->Mem_mgr, c_comm); plan->T_plan_2i= new T_Plan_gpu <float>(N1,N0,n_tuples_o,plan->Mem_mgr, c_comm); plan->T_plan_1=NULL; plan->T_plan_1i=NULL; plan->alloc_max=alloc_max; plan->T_plan_2->alloc_local=alloc_max; plan->T_plan_2i->alloc_local=alloc_max; if(flags==ACCFFT_MEASURE){ plan->T_plan_2->which_fast_method_gpu(plan->T_plan_2,data_out_d); } else{ plan->T_plan_2->method=2; plan->T_plan_2->kway=2; } checkCuda_accfft (cudaDeviceSynchronize()); MPI_Barrier(plan->c_comm); plan->T_plan_2->method =plan->T_plan_2->method; plan->T_plan_2i->method=plan->T_plan_2->method; plan->T_plan_2->kway =plan->T_plan_2->kway; plan->T_plan_2i->kway=plan->T_plan_2->kway; } // 2D Decomposition if (!plan->oneD){ // the reaseon for n_tuples/2 is to avoid splitting of imag and real parts of complex numbers plan->Mem_mgr =new Mem_Mgr_gpu<float>(n[1],n_tuples_o/2,2,plan->row_comm,osize_0[0],alloc_max); plan->T_plan_1 = new T_Plan_gpu<float>(n[1],n_tuples_o/2,2, plan->Mem_mgr, plan->row_comm,osize_0[0]); plan->T_plan_2 = new T_Plan_gpu<float>(n[0],n[1],osize_2[2]*2,plan->Mem_mgr, plan->col_comm); plan->T_plan_2i= new T_Plan_gpu<float>(n[1],n[0],osize_2i[2]*2, plan->Mem_mgr, plan->col_comm); plan->T_plan_1i= new T_Plan_gpu<float>(n_tuples_o/2,n[1],2, plan->Mem_mgr, plan->row_comm,osize_1i[0]); plan->T_plan_1->alloc_local=plan->alloc_max; plan->T_plan_2->alloc_local=plan->alloc_max; plan->T_plan_2i->alloc_local=plan->alloc_max; plan->T_plan_1i->alloc_local=plan->alloc_max; if(flags==ACCFFT_MEASURE){ if(coord[0]==0){ plan->T_plan_1->which_fast_method_gpu(plan->T_plan_1,data_out_d,osize_0[0]); } } else{ plan->T_plan_1->method=2; plan->T_plan_1->kway=2; } MPI_Bcast(&plan->T_plan_1->method,1, MPI_INT,0, c_comm ); MPI_Bcast(&plan->T_plan_1->kway,1, MPI_INT,0, c_comm ); checkCuda_accfft (cudaDeviceSynchronize()); MPI_Barrier(plan->c_comm); plan->T_plan_1->method =plan->T_plan_1->method; plan->T_plan_2->method =plan->T_plan_1->method; plan->T_plan_2i->method=plan->T_plan_1->method; plan->T_plan_1i->method=plan->T_plan_1->method; plan->T_plan_1->kway =plan->T_plan_1->kway; plan->T_plan_2->kway =plan->T_plan_1->kway; plan->T_plan_2i->kway=plan->T_plan_1->kway; plan->T_plan_1i->kway=plan->T_plan_1->kway; plan->iplan_1=-1; plan->iplan_2=-1; } plan->r2c_plan_baked=true; return plan; }