Type objective_function<Type>::operator()() { DATA_FACTOR(Sex); DATA_VECTOR(Age); DATA_VECTOR(Length); int n = Length.size(); // These are the parameters (three are vectors; one is a scalar) PARAMETER_VECTOR(Linf); PARAMETER_VECTOR(Kappa); PARAMETER_VECTOR(t0); PARAMETER(LogSigma); Type Sigma = exp(LogSigma); vector<Type> LengthPred(n); // Provide the standard error of Sigma ADREPORT(Sigma); // Predictions and likelihoods for(int i=0;i<n;i++){ Type Temp = Kappa(Sex(i))*(Age(i)-t0(Sex(i))); LengthPred(i) = Linf(Sex(i))*(1.0-exp(-Temp)); } Type nll = -sum(dnorm(Length,LengthPred,Sigma,true)); // Prediction for sex 1 and age 10 Type Temp = Kappa(0)*(Type(10)-t0(0)); Type PredLen10 = Linf(0)*(1.0-exp(-Temp)); ADREPORT(PredLen10); // Predicted growth curve matrix<Type>LenPred(2,50); for (int Isex=0;Isex<2;Isex++) for (int Iage=1;Iage<=50;Iage++) { Temp = Kappa(Isex)*(Iage*1.0-t0(Isex)); LenPred(Isex,Iage-1) = Linf(Isex)*(1.0-exp(-Temp)); } REPORT(LenPred); return nll; }
void TerminationCriterion::accumulate_inner( PatchData& pd, double of_value, Vector3D* grad_array, MsqError& err ) { //if terminating on the norm of the gradient //currentGradL2NormSquared = HUGE_VAL; if (terminationCriterionFlag & (GRADIENT_L2_NORM_ABSOLUTE | GRADIENT_L2_NORM_RELATIVE)) { currentGradL2NormSquared = length_squared(grad_array, pd.num_free_vertices()); // get the L2 norm MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << " o Info -- gradient L2 norm: " << " " << RPM(std::sqrt(currentGradL2NormSquared)) << std::endl; } //currentGradInfNorm = 10e6; if (terminationCriterionFlag & (GRADIENT_INF_NORM_ABSOLUTE | GRADIENT_INF_NORM_RELATIVE)) { currentGradInfNorm = Linf(grad_array, pd.num_free_vertices()); // get the Linf norm MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << " o Info -- gradient Inf norm: " << " " << RPM(currentGradInfNorm) << std::endl; } if (terminationCriterionFlag & VERTEX_MOVEMENT_RELATIVE) { maxSquaredInitialMovement = pd.get_max_vertex_movement_squared( initialVerticesMemento, err ); MSQ_ERRRTN(err); MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << " o Info -- max initial vertex movement: " << " " << RPM(maxSquaredInitialMovement) << std::endl; } previousOFValue = currentOFValue; currentOFValue = of_value; if (terminationCriterionFlag & OF_FLAGS) { MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << " o Info -- OF Value: " << " " << RPM(of_value) << " iterationCounter= " << iterationCounter << std::endl; } else if (grad_array) { MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << " o OF Value: " << " " << RPM(of_value) << " iterationCounter= " << iterationCounter //<< " terminationCriterionFlag= " << terminationCriterionFlag << " OF_FLAGS = " << OF_FLAGS << std::endl; } ++iterationCounter; if (timeStepFileType) write_timestep( pd, grad_array, err); if (plotFile.is_open()) plotFile << iterationCounter << '\t' << mTimer.since_birth() << '\t' << of_value << '\t' << std::sqrt( currentGradL2NormSquared ) << '\t' << currentGradInfNorm << '\t' << (maxSquaredMovement > 0.0 ? std::sqrt( maxSquaredMovement ) : 0.0) << '\t' << globalInvertedCount << std::endl; }
void single_test(const int n, const double alpha, reconstruct r, complex_mesh_func cmf, err_t *err) { data_t d, nd; integrate(n, alpha, r, cmf, &d); save_data("bis2.txt", &d); copy_data(&d, &nd); initial_sin4(T, &nd); save_data("none.txt", &d); err->l1 = L1(&d, &nd); err->l2 = L2(&d, &nd); err->linf = Linf(&d, &nd); free_data(&d, 1); free_data(&nd, 1); }
/*!Performs Conjugate gradient minimization on the PatchData, pd.*/ void ConjugateGradient::optimize_vertex_positions(PatchData &pd, MsqError &err){ // pd.reorder(); MSQ_FUNCTION_TIMER( "ConjugateGradient::optimize_vertex_positions" ); Timer c_timer; size_t num_vert=pd.num_free_vertices(); if(num_vert<1){ MSQ_DBGOUT(1) << "\nEmpty free vertex list in ConjugateGradient\n"; return; } /* //zero out arrays int zero_loop=0; while(zero_loop<arraySize){ fGrad[zero_loop].set(0,0,0); pGrad[zero_loop].set(0,0,0); fNewGrad[zero_loop].set(0,0,0); ++zero_loop; } */ // get OF evaluator OFEvaluator& objFunc = get_objective_function_evaluator(); size_t ind; //Michael cull list: possibly set soft_fixed flags here //MsqFreeVertexIndexIterator free_iter(pd, err); MSQ_ERRRTN(err); double f=0; //Michael, this isn't equivalent to CUBIT because we only want to check //the objective function value of the 'bad' elements //if invalid initial patch set an error. bool temp_bool = objFunc.update(pd, f, fGrad, err); assert(fGrad.size() == num_vert); if(MSQ_CHKERR(err)) return; if( ! temp_bool){ MSQ_SETERR(err)("Conjugate Gradient not able to get valid gradient " "and function values on intial patch.", MsqError::INVALID_MESH); return; } double grad_norm=MSQ_MAX_CAP; if(conjGradDebug>0){ MSQ_PRINT(2)("\nCG's DEGUB LEVEL = %i \n",conjGradDebug); grad_norm=Linf(arrptr(fGrad),fGrad.size()); MSQ_PRINT(2)("\nCG's FIRST VALUE = %f,grad_norm = %f",f,grad_norm); MSQ_PRINT(2)("\n TIME %f",c_timer.since_birth()); grad_norm=MSQ_MAX_CAP; } //Initializing pGrad (search direction). pGrad.resize(fGrad.size()); for (ind = 0; ind < num_vert; ++ind) pGrad[ind]=(-fGrad[ind]); int j=0; // total nb of step size changes ... not used much int i=0; // iteration counter unsigned m=0; // double alp=MSQ_MAX_CAP; // alp: scale factor of search direction //we know inner_criterion is false because it was checked in //loop_over_mesh before being sent here. TerminationCriterion* term_crit=get_inner_termination_criterion(); //while ((i<maxIteration && alp>stepBound && grad_norm>normGradientBound) // && !inner_criterion){ while(!term_crit->terminate()){ ++i; //std::cout<<"\Michael delete i = "<<i; int k=0; alp=get_step(pd,f,k,err); j+=k; if(conjGradDebug>2){ MSQ_PRINT(2)("\n Alp initial, alp = %20.18f",alp); } // if alp == 0, revert to steepest descent search direction if(alp==0){ for (m = 0; m < num_vert; ++m) { pGrad[m]=(-fGrad[m]); } alp=get_step(pd,f,k,err); j+=k; if(conjGradDebug>1){ MSQ_PRINT(2)("\n CG's search direction reset."); if(conjGradDebug>2) MSQ_PRINT(2)("\n Alp was zero, alp = %20.18f",alp); } } if(alp!=0){ pd.move_free_vertices_constrained( arrptr(pGrad), num_vert, alp, err ); MSQ_ERRRTN(err); if (! objFunc.update(pd, f, fNewGrad, err)){ MSQ_SETERR(err)("Error inside Conjugate Gradient, vertices moved " "making function value invalid.", MsqError::INVALID_MESH); return; } assert(fNewGrad.size() == (unsigned)num_vert); if(conjGradDebug>0){ grad_norm=Linf(arrptr(fNewGrad),num_vert); MSQ_PRINT(2)("\nCG's VALUE = %f, iter. = %i, grad_norm = %f, alp = %f",f,i,grad_norm,alp); MSQ_PRINT(2)("\n TIME %f",c_timer.since_birth()); } double s11=0; double s12=0; double s22=0; //free_iter.reset(); //while (free_iter.next()) { // m=free_iter.value(); for (m = 0; m < num_vert; ++m) { s11+=fGrad[m]%fGrad[m]; s12+=fGrad[m]%fNewGrad[m]; s22+=fNewGrad[m]%fNewGrad[m]; } // Steepest Descent (takes 2-3 times as long as P-R) //double bet=0; // Fletcher-Reeves (takes twice as long as P-R) //double bet = s22/s11; // Polack-Ribiere double bet; if (!divide( s22-s12, s11, bet )) return; // gradient is zero //free_iter.reset(); //while (free_iter.next()) { // m=free_iter.value(); for (m = 0; m < num_vert; ++m) { pGrad[m]=(-fNewGrad[m]+(bet*pGrad[m])); fGrad[m]=fNewGrad[m]; } if(conjGradDebug>2){ MSQ_PRINT(2)(" \nSEARCH DIRECTION INFINITY NORM = %e", Linf(arrptr(fNewGrad),num_vert)); } }//end if on alp == 0 term_crit->accumulate_patch( pd, err ); MSQ_ERRRTN(err); term_crit->accumulate_inner( pd, f, arrptr(fGrad), err ); MSQ_ERRRTN(err); }//end while if(conjGradDebug>0){ MSQ_PRINT(2)("\nConjugate Gradient complete i=%i ",i); MSQ_PRINT(2)("\n- FINAL value = %f, alp=%4.2e grad_norm=%4.2e",f,alp,grad_norm); MSQ_PRINT(2)("\n FINAL TIME %f",c_timer.since_birth()); } }
/*!Reset function using using a PatchData object. This function is called for the inner-stopping criterion directly from the loop over mesh function in VertexMover. For outer criterion, it is called from the reset function which takes a MeshSet object. This function prepares the object to be used by setting the initial values of some of the data members. As examples, if needed, it resets the cpu timer to zero, the iteration counter to zero, and the initial and previous objective function values to the current objective function value for this patch. The return value for this function is similar to that of terminate(). The function returns false if the checked criteria have not been satisfied, and true if they have been. reset() only checks the GRADIENT_INF_NORM_ABSOLUTE, GRADIENT_L2_NORM_ABSOLUTE, and the QUALITY_IMPROVEMENT_ABSOLUTE criteria. Checking these criteria allows the QualityImprover to skip the entire optimization if the initial mesh satisfies the appropriate conditions. */ void TerminationCriterion::reset_inner(PatchData &pd, OFEvaluator& obj_eval, MsqError &err) { const unsigned long totalFlag = terminationCriterionFlag | cullingMethodFlag; // clear flag for BOUNDED_VERTEX_MOVEMENT vertexMovementExceedsBound = 0; // Use -1 to denote that this isn't initialized yet. // As all valid values must be >= 0.0, a negative // value indicates that it is uninitialized and is // always less than any valid value. maxSquaredMovement = -1; // Clear the iteration count. iterationCounter = 0; //reset the inner timer if needed if(totalFlag & CPU_TIME){ mTimer.reset(); } //GRADIENT currentGradInfNorm = initialGradInfNorm = 0.0; currentGradL2NormSquared = initialGradL2NormSquared = 0.0; if(totalFlag & GRAD_FLAGS) { if (!obj_eval.have_objective_function()) { MSQ_SETERR(err)("Error termination criteria set which uses objective " "functions, but no objective function is available.", MsqError::INVALID_STATE); return; } int num_vertices=pd.num_free_vertices(); mGrad.resize( num_vertices ); //get gradient and make sure it is valid bool b = obj_eval.evaluate(pd, currentOFValue, mGrad, err); MSQ_ERRRTN(err); if (!b) { MSQ_SETERR(err)("Initial patch is invalid for gradient computation.", MsqError::INVALID_STATE); return; } //get the gradient norms if (totalFlag & (GRADIENT_INF_NORM_ABSOLUTE|GRADIENT_INF_NORM_RELATIVE)) { currentGradInfNorm = initialGradInfNorm = Linf(mGrad); MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << " o Initial gradient Inf norm: " << " " << RPM(initialGradInfNorm) << std::endl; } if (totalFlag & (GRADIENT_L2_NORM_ABSOLUTE|GRADIENT_L2_NORM_RELATIVE)) { currentGradL2NormSquared = initialGradL2NormSquared = length_squared(mGrad); MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << " o Initial gradient L2 norm: " << " " << RPM(std::sqrt(initialGradL2NormSquared)) << std::endl; } //the OFvalue comes for free, so save it previousOFValue=currentOFValue; initialOFValue=currentOFValue; } //find the initial objective function value if needed and not already //computed. If we needed the gradient, we have the OF value for free. // Also, if possible, get initial OF value if writing plot file. Solvers // often supply the OF value for subsequent iterations so by calculating // the initial value we can generate OF value plots. else if ((totalFlag & OF_FLAGS) || (plotFile.is_open() && pd.num_free_vertices() && obj_eval.have_objective_function())) { //ensure the obj_ptr is not null if(!obj_eval.have_objective_function()){ MSQ_SETERR(err)("Error termination criteria set which uses objective " "functions, but no objective function is available.", MsqError::INVALID_STATE); return; } bool b = obj_eval.evaluate(pd, currentOFValue, err); MSQ_ERRRTN(err); if (!b){ MSQ_SETERR(err)("Initial patch is invalid for evaluation.",MsqError::INVALID_STATE); return; } //std::cout<<"\nReseting initial of value = "<<initialOFValue; previousOFValue=currentOFValue; initialOFValue=currentOFValue; } if (totalFlag & (GRAD_FLAGS|OF_FLAGS)) MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << " o Initial OF value: " << " " << RPM(initialOFValue) << std::endl; // Store current vertex locations now, because we'll // need them later to compare the current movement with. if (totalFlag & VERTEX_MOVEMENT_RELATIVE) { if (initialVerticesMemento) { pd.recreate_vertices_memento( initialVerticesMemento, err ); } else { initialVerticesMemento = pd.create_vertices_memento( err ); } MSQ_ERRRTN(err); maxSquaredInitialMovement = DBL_MAX; } else { maxSquaredInitialMovement = 0; } if (terminationCriterionFlag & UNTANGLED_MESH) { globalInvertedCount = count_inverted( pd, err ); //if (innerOuterType==TYPE_OUTER) MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << " o Num Inverted: " << " " << globalInvertedCount << std::endl; patchInvertedCount = 0; MSQ_ERRRTN(err); } if (timeStepFileType) { // If didn't already calculate gradient abive, calculate it now. if (!(totalFlag & GRAD_FLAGS)) { mGrad.resize( pd.num_free_vertices() ); obj_eval.evaluate(pd, currentOFValue, mGrad, err); err.clear(); } write_timestep( pd, mGrad.empty() ? 0 : arrptr(mGrad), err); } if (plotFile.is_open()) { // two newlines so GNU plot knows that we are starting a new data set plotFile << std::endl << std::endl; // write column headings as comment in data file plotFile << "#Iter\tCPU\tObjFunc\tGradL2\tGradInf\tMovement\tInverted" << std::endl; // write initial values plotFile << 0 << '\t' << mTimer.since_birth() << '\t' << initialOFValue << '\t' << std::sqrt( currentGradL2NormSquared ) << '\t' << currentGradInfNorm << '\t' << 0.0 << '\t' << globalInvertedCount << std::endl; } }
int main(int argc,char **argv){ // Print GPU properties //print_properties(); // Files to print the result after the last time step FILE *rho_file; FILE *E_file; rho_file = fopen("rho_final.txt", "w"); E_file = fopen("E_final.txt", "w"); // Construct initial condition for problem ICsinus Config(-1.0, 1.0, -1.0, 1.0); //ICsquare Config(0.5,0.5,gasGam); // Set initial values for Configuration 1 /* Config.set_rho(rhoConfig19); Config.set_pressure(pressureConfig19); Config.set_u(uConfig19); Config.set_v(vConfig19); */ // Determining global border based on left over tiles (a little hack) int globalPadding; globalPadding = (nx+2*border+16)/16; globalPadding = 16*globalPadding - (nx+2*border); //printf("Globalpad: %i\n", globalPadding); // Change border to add padding //border = border + globalPadding/2; // Initiate the matrices for the unknowns in the Euler equations cpu_ptr_2D rho(nx, ny, border,1); cpu_ptr_2D E(nx, ny, border,1); cpu_ptr_2D rho_u(nx, ny, border,1); cpu_ptr_2D rho_v(nx, ny, border,1); cpu_ptr_2D zeros(nx, ny, border,1); // Set initial condition Config.setIC(rho, rho_u, rho_v, E); double timeStart = get_wall_time(); // Test cpu_ptr_2D rho_dummy(nx, ny, border); cpu_ptr_2D E_dummy(nx, ny, border); /* rho_dummy.xmin = -1.0; rho_dummy.ymin = -1.0; E_dummy.xmin = -1.0; E_dummy.ymin = -1.0; */ // Set block and grid sizes dim3 gridBC = dim3(1, 1, 1); dim3 blockBC = dim3(BLOCKDIM_BC,1,1); dim3 gridBlockFlux; dim3 threadBlockFlux; dim3 gridBlockRK; dim3 threadBlockRK; computeGridBlock(gridBlockFlux, threadBlockFlux, nx + 2*border, ny + 2*border, INNERTILEDIM_X, INNERTILEDIM_Y, BLOCKDIM_X, BLOCKDIM_Y); computeGridBlock(gridBlockRK, threadBlockRK, nx + 2*border, ny + 2*border, BLOCKDIM_X_RK, BLOCKDIM_Y_RK, BLOCKDIM_X_RK, BLOCKDIM_Y_RK); int nElements = gridBlockFlux.x*gridBlockFlux.y; // Allocate memory for the GPU pointers gpu_ptr_1D L_device(nElements); gpu_ptr_1D dt_device(1); gpu_ptr_2D rho_device(nx, ny, border); gpu_ptr_2D E_device(nx, ny, border); gpu_ptr_2D rho_u_device(nx, ny, border); gpu_ptr_2D rho_v_device(nx, ny, border); gpu_ptr_2D R0(nx, ny, border); gpu_ptr_2D R1(nx, ny, border); gpu_ptr_2D R2(nx, ny, border); gpu_ptr_2D R3(nx, ny, border); gpu_ptr_2D Q0(nx, ny, border); gpu_ptr_2D Q1(nx, ny, border); gpu_ptr_2D Q2(nx, ny, border); gpu_ptr_2D Q3(nx, ny, border); // Allocate pinned memory on host init_allocate(); // Set BC arguments set_bc_args(BCArgs[0], rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx+2*border, ny+2*border, border); set_bc_args(BCArgs[1], Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), nx+2*border, ny+2*border, border); set_bc_args(BCArgs[2], rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx+2*border, ny+2*border, border); // Set FLUX arguments set_flux_args(fluxArgs[0], L_device.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), R0.getRawPtr(),R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), nx, ny, border, rho.get_dx(), rho.get_dy(), theta, gasGam, INNERTILEDIM_X, INNERTILEDIM_Y); set_flux_args(fluxArgs[1], L_device.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), R0.getRawPtr(),R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), nx, ny, border, rho.get_dx(), rho.get_dy(), theta, gasGam, INNERTILEDIM_X, INNERTILEDIM_Y); // Set TIME argument set_dt_args(dtArgs, L_device.getRawPtr(), dt_device.getRawPtr(), nElements, rho.get_dx(), rho.get_dy(), cfl_number); // Set Rk arguments set_rk_args(RKArgs[0], dt_device.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), R0.getRawPtr(), R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), nx, ny, border); set_rk_args(RKArgs[1], dt_device.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), R0.getRawPtr(), R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx, ny, border); L_device.set(FLT_MAX); /* R0.upload(zeros.get_ptr()); R1.upload(zeros.get_ptr()); R2.upload(zeros.get_ptr()); R3.upload(zeros.get_ptr()); Q0.upload(zeros.get_ptr()); Q1.upload(zeros.get_ptr()); Q2.upload(zeros.get_ptr()); Q3.upload(zeros.get_ptr()); */ R0.set(0,0,0,nx,ny,border); R1.set(0,0,0,nx,ny,border); R2.set(0,0,0,nx,ny,border); R3.set(0,0,0,nx,ny,border); Q0.set(0,0,0,nx,ny,border); Q1.set(0,0,0,nx,ny,border); Q2.set(0,0,0,nx,ny,border); Q3.set(0,0,0,nx,ny,border); rho_device.upload(rho.get_ptr()); rho_u_device.upload(rho_u.get_ptr()); rho_v_device.upload(rho_v.get_ptr()); E_device.upload(E.get_ptr()); // Update boudries callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[0]); //Create cuda stream cudaStream_t stream1; cudaStreamCreate(&stream1); cudaEvent_t dt_complete; cudaEventCreate(&dt_complete); while (currentTime < timeLength && step < maxStep){ //RK1 //Compute flux callFluxKernel(gridBlockFlux, threadBlockFlux, 0, fluxArgs[0]); // Compute timestep (based on CFL condition) callDtKernel(TIMETHREADS, dtArgs); cudaMemcpyAsync(dt_host, dt_device.getRawPtr(), sizeof(float), cudaMemcpyDeviceToHost, stream1); cudaEventRecord(dt_complete, stream1); // Perform RK1 step callRKKernel(gridBlockRK, threadBlockRK, 0, RKArgs[0]); //Update boudries callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[1]); //RK2 // Compute flux callFluxKernel(gridBlockFlux, threadBlockFlux, 1, fluxArgs[1]); //Perform RK2 step callRKKernel(gridBlockRK, threadBlockRK, 1, RKArgs[1]); //cudaEventRecord(srteam_sync, srteam1); callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[2]); cudaEventSynchronize(dt_complete); step++; currentTime += *dt_host; // printf("Step: %i, current time: %.6f dt:%.6f\n" , step,currentTime, dt_host[0]); } //cuProfilerStop(); //cudaProfilerStop(); printf("Elapsed time %.5f", get_wall_time() - timeStart); E_device.download(E.get_ptr()); rho_u_device.download(rho_u.get_ptr()); rho_v_device.download(rho_v.get_ptr()); rho_device.download(rho_dummy.get_ptr()); rho_dummy.printToFile(rho_file, true, false); Config.exactSolution(E_dummy, currentTime); E_dummy.printToFile(E_file, true, false); float LinfError = Linf(E_dummy, rho_dummy); float L1Error = L1(E_dummy, rho_dummy); float L1Error2 = L1test(E_dummy, rho_dummy); printf("nx: %i\t Linf error %.9f\t L1 error %.7f L1test erro %.7f", nx, LinfError, L1Error, L1Error2); printf("nx: %i step: %i, current time: %.6f dt:%.6f\n" , nx, step,currentTime, dt_host[0]); /* cudaMemcpy(L_host, L_device, sizeof(float)*(nElements), cudaMemcpyDeviceToHost); for (int i =0; i < nElements; i++) printf(" %.7f ", L_host[i]); */ printf("%s\n", cudaGetErrorString(cudaGetLastError())); return(0); }