コード例 #1
0
ファイル: ex1.cpp プロジェクト: yijay/TMB
Type objective_function<Type>::operator()()
{
 DATA_FACTOR(Sex);
 DATA_VECTOR(Age);
 DATA_VECTOR(Length);
 int n = Length.size();

 // These are the parameters (three are vectors; one is a scalar)
 PARAMETER_VECTOR(Linf);
 PARAMETER_VECTOR(Kappa);
 PARAMETER_VECTOR(t0);
 PARAMETER(LogSigma);
 Type Sigma = exp(LogSigma);
 vector<Type> LengthPred(n);

 // Provide the standard error of Sigma
 ADREPORT(Sigma);

 // Predictions and likelihoods
 for(int i=0;i<n;i++){
  Type Temp = Kappa(Sex(i))*(Age(i)-t0(Sex(i)));
  LengthPred(i) = Linf(Sex(i))*(1.0-exp(-Temp));
  }
 Type nll = -sum(dnorm(Length,LengthPred,Sigma,true));

 // Prediction for sex 1 and age 10
 Type Temp = Kappa(0)*(Type(10)-t0(0));
 Type PredLen10 = Linf(0)*(1.0-exp(-Temp));
 ADREPORT(PredLen10);

 // Predicted growth curve
 matrix<Type>LenPred(2,50);
 for (int Isex=0;Isex<2;Isex++)
  for (int Iage=1;Iage<=50;Iage++)
   {
   Temp = Kappa(Isex)*(Iage*1.0-t0(Isex));
   LenPred(Isex,Iage-1) = Linf(Isex)*(1.0-exp(-Temp));
   }
 REPORT(LenPred);

 return nll;
}
void TerminationCriterion::accumulate_inner( PatchData& pd, 
                                             double of_value,
                                             Vector3D* grad_array,
                                             MsqError& err )
{
  //if terminating on the norm of the gradient
  //currentGradL2NormSquared = HUGE_VAL;
  if (terminationCriterionFlag & (GRADIENT_L2_NORM_ABSOLUTE | GRADIENT_L2_NORM_RELATIVE)) 
  {
    currentGradL2NormSquared = length_squared(grad_array, pd.num_free_vertices()); // get the L2 norm
    MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << "  o Info -- gradient L2 norm: " << " "
                                   << RPM(std::sqrt(currentGradL2NormSquared)) << std::endl;
  }
  //currentGradInfNorm = 10e6;
  if (terminationCriterionFlag & (GRADIENT_INF_NORM_ABSOLUTE | GRADIENT_INF_NORM_RELATIVE)) 
  {
    currentGradInfNorm = Linf(grad_array, pd.num_free_vertices()); // get the Linf norm
    MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << "  o Info -- gradient Inf norm: " << " "
                                   << RPM(currentGradInfNorm) << std::endl;
  } 
  
  if (terminationCriterionFlag & VERTEX_MOVEMENT_RELATIVE)
  {
    maxSquaredInitialMovement = pd.get_max_vertex_movement_squared(
                               initialVerticesMemento, err );  MSQ_ERRRTN(err);
    MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << "  o Info -- max initial vertex movement: " << " "
                                   << RPM(maxSquaredInitialMovement) << std::endl;
  }
  
  previousOFValue = currentOFValue;
  currentOFValue = of_value;
  if (terminationCriterionFlag & OF_FLAGS) {
    MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << "  o Info -- OF Value: " << " " << RPM(of_value) << " iterationCounter= " << iterationCounter << std::endl;
  }
  else if (grad_array) {
    MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << "  o OF Value: " << " " << RPM(of_value) << " iterationCounter= " << iterationCounter 
      //<< " terminationCriterionFlag= " << terminationCriterionFlag << " OF_FLAGS = " << OF_FLAGS
                                   << std::endl;
  }
  
  ++iterationCounter;
  if (timeStepFileType)
    write_timestep( pd, grad_array, err);
    
  if (plotFile.is_open()) 
    plotFile << iterationCounter 
     << '\t' << mTimer.since_birth() 
     << '\t' << of_value 
     << '\t' << std::sqrt( currentGradL2NormSquared ) 
     << '\t' << currentGradInfNorm 
     << '\t' << (maxSquaredMovement > 0.0 ? std::sqrt( maxSquaredMovement ) : 0.0)
     << '\t' << globalInvertedCount
     << std::endl;
}
コード例 #3
0
ファイル: test3.c プロジェクト: khokhlov/compact3
void single_test(const int n, const double alpha, reconstruct r, complex_mesh_func cmf, err_t *err)
{
	data_t d, nd;
	integrate(n, alpha, r, cmf, &d);
	save_data("bis2.txt", &d);
	copy_data(&d, &nd);
	initial_sin4(T, &nd);
	save_data("none.txt", &d);
	err->l1 = L1(&d, &nd);
	err->l2 = L2(&d, &nd);
	err->linf = Linf(&d, &nd);
	free_data(&d, 1);
	free_data(&nd, 1);
}
コード例 #4
0
/*!Performs Conjugate gradient minimization on the PatchData, pd.*/
void ConjugateGradient::optimize_vertex_positions(PatchData &pd, 
                                                MsqError &err){
  // pd.reorder();

  MSQ_FUNCTION_TIMER( "ConjugateGradient::optimize_vertex_positions" );

  Timer c_timer;
  size_t num_vert=pd.num_free_vertices();
  if(num_vert<1){
     MSQ_DBGOUT(1) << "\nEmpty free vertex list in ConjugateGradient\n";
     return;
  }
/*  
    //zero out arrays
  int zero_loop=0;
  while(zero_loop<arraySize){
    fGrad[zero_loop].set(0,0,0);
    pGrad[zero_loop].set(0,0,0);
    fNewGrad[zero_loop].set(0,0,0);
    ++zero_loop;
  }
*/
  
  // get OF evaluator
  OFEvaluator& objFunc = get_objective_function_evaluator();
  
  size_t ind;
    //Michael cull list:  possibly set soft_fixed flags here
  
   //MsqFreeVertexIndexIterator free_iter(pd, err);  MSQ_ERRRTN(err);
  
      
   double f=0;
   //Michael, this isn't equivalent to CUBIT because we only want to check
   //the objective function value of the 'bad' elements
   //if invalid initial patch set an error.
   bool temp_bool = objFunc.update(pd, f, fGrad, err);
   assert(fGrad.size() == num_vert);
   if(MSQ_CHKERR(err))
      return;
  if( ! temp_bool){
    MSQ_SETERR(err)("Conjugate Gradient not able to get valid gradient "
                    "and function values on intial patch.", 
                    MsqError::INVALID_MESH);
    return;
  }
  double grad_norm=MSQ_MAX_CAP;
  
  if(conjGradDebug>0){
    MSQ_PRINT(2)("\nCG's DEGUB LEVEL = %i \n",conjGradDebug);
    grad_norm=Linf(arrptr(fGrad),fGrad.size());
    MSQ_PRINT(2)("\nCG's FIRST VALUE = %f,grad_norm = %f",f,grad_norm);
    MSQ_PRINT(2)("\n   TIME %f",c_timer.since_birth());
    grad_norm=MSQ_MAX_CAP;
  }

    //Initializing pGrad (search direction).  
  pGrad.resize(fGrad.size());
  for (ind = 0; ind < num_vert; ++ind)
    pGrad[ind]=(-fGrad[ind]);

  int j=0; // total nb of step size changes ... not used much
  int i=0; // iteration counter
  unsigned m=0; // 
  double alp=MSQ_MAX_CAP; // alp: scale factor of search direction
    //we know inner_criterion is false because it was checked in
    //loop_over_mesh before being sent here.
  TerminationCriterion* term_crit=get_inner_termination_criterion();
  
    //while ((i<maxIteration && alp>stepBound && grad_norm>normGradientBound)
    //     && !inner_criterion){
  while(!term_crit->terminate()){
    ++i;
      //std::cout<<"\Michael delete i = "<<i;
    int k=0;
    alp=get_step(pd,f,k,err);
    j+=k;
    if(conjGradDebug>2){
      MSQ_PRINT(2)("\n  Alp initial, alp = %20.18f",alp);
    }

     // if alp == 0, revert to steepest descent search direction
    if(alp==0){
      for (m = 0; m < num_vert; ++m) {
        pGrad[m]=(-fGrad[m]);
      }
      alp=get_step(pd,f,k,err);
      j+=k;
      if(conjGradDebug>1){
        MSQ_PRINT(2)("\n CG's search direction reset.");
        if(conjGradDebug>2)
          MSQ_PRINT(2)("\n  Alp was zero, alp = %20.18f",alp);
      }
      
    }
    if(alp!=0){
      pd.move_free_vertices_constrained( arrptr(pGrad), num_vert, alp, err );
      MSQ_ERRRTN(err);
      
      if (! objFunc.update(pd, f, fNewGrad, err)){
        MSQ_SETERR(err)("Error inside Conjugate Gradient, vertices moved "
                        "making function value invalid.", 
                        MsqError::INVALID_MESH);
        return;
      }
      assert(fNewGrad.size() == (unsigned)num_vert);
      
      if(conjGradDebug>0){
        grad_norm=Linf(arrptr(fNewGrad),num_vert);
        MSQ_PRINT(2)("\nCG's VALUE = %f,  iter. = %i,  grad_norm = %f,  alp = %f",f,i,grad_norm,alp);
        MSQ_PRINT(2)("\n   TIME %f",c_timer.since_birth());
      }
      double s11=0;
      double s12=0;
      double s22=0;
      //free_iter.reset();
      //while (free_iter.next()) {
      //  m=free_iter.value();
      for (m = 0; m < num_vert; ++m) {
        s11+=fGrad[m]%fGrad[m];
        s12+=fGrad[m]%fNewGrad[m];
        s22+=fNewGrad[m]%fNewGrad[m];
      }
      
        // Steepest Descent (takes 2-3 times as long as P-R)
        //double bet=0;
      
        // Fletcher-Reeves (takes twice as long as P-R)
        //double bet = s22/s11;

        // Polack-Ribiere  
      double bet;  
      if (!divide( s22-s12, s11, bet ))
        return; // gradient is zero    
      //free_iter.reset();
      //while (free_iter.next()) {
      //  m=free_iter.value();
      for (m = 0; m < num_vert; ++m) {
        pGrad[m]=(-fNewGrad[m]+(bet*pGrad[m]));
        fGrad[m]=fNewGrad[m];
      }
      if(conjGradDebug>2){
        MSQ_PRINT(2)(" \nSEARCH DIRECTION INFINITY NORM = %e",
                   Linf(arrptr(fNewGrad),num_vert));
      }
      
    }//end if on alp == 0

    term_crit->accumulate_patch( pd, err ); MSQ_ERRRTN(err);
    term_crit->accumulate_inner( pd, f, arrptr(fGrad), err );  MSQ_ERRRTN(err);
  }//end while
  if(conjGradDebug>0){
    MSQ_PRINT(2)("\nConjugate Gradient complete i=%i ",i);
    MSQ_PRINT(2)("\n-  FINAL value = %f, alp=%4.2e grad_norm=%4.2e",f,alp,grad_norm);
    MSQ_PRINT(2)("\n   FINAL TIME %f",c_timer.since_birth());
  }
}
/*!Reset function using using a PatchData object.  This function is
  called for the inner-stopping criterion directly from the
  loop over mesh function in VertexMover.  For outer criterion,
  it is called from the reset function which takes a MeshSet object.
  This function prepares the object to be used by setting the initial
  values of some of the data members.  As examples, if needed, it resets
  the cpu timer to zero, the iteration counter to zero, and the
  initial and previous objective function values to the current
  objective function value for this patch.
  The return value for this function is similar to that of terminate().
  The function returns false if the checked criteria have not been
  satisfied, and true if they have been.  reset() only checks the
  GRADIENT_INF_NORM_ABSOLUTE, GRADIENT_L2_NORM_ABSOLUTE, and the
  QUALITY_IMPROVEMENT_ABSOLUTE criteria.  Checking these criteria
  allows the QualityImprover to skip the entire optimization if
  the initial mesh satisfies the appropriate conditions.
 */
void TerminationCriterion::reset_inner(PatchData &pd, OFEvaluator& obj_eval,
                                    MsqError &err)
{
  const unsigned long totalFlag = terminationCriterionFlag | cullingMethodFlag;
  
    // clear flag for BOUNDED_VERTEX_MOVEMENT
  vertexMovementExceedsBound = 0;
  
    // Use -1 to denote that this isn't initialized yet.
    // As all valid values must be >= 0.0, a negative
    // value indicates that it is uninitialized and is
    // always less than any valid value.
  maxSquaredMovement = -1;
  
    // Clear the iteration count.
  iterationCounter = 0;
  
    //reset the inner timer if needed
  if(totalFlag & CPU_TIME){
    mTimer.reset();
  }
   
    //GRADIENT
  currentGradInfNorm = initialGradInfNorm = 0.0;
  currentGradL2NormSquared = initialGradL2NormSquared = 0.0;
  if(totalFlag & GRAD_FLAGS)
  {
    if (!obj_eval.have_objective_function()) {
      MSQ_SETERR(err)("Error termination criteria set which uses objective "
                      "functions, but no objective function is available.",
                      MsqError::INVALID_STATE);   
      return;
    } 
    int num_vertices=pd.num_free_vertices();
    mGrad.resize( num_vertices );

      //get gradient and make sure it is valid
    bool b = obj_eval.evaluate(pd, currentOFValue, mGrad, err); MSQ_ERRRTN(err);
    if (!b) {
      MSQ_SETERR(err)("Initial patch is invalid for gradient computation.", 
                      MsqError::INVALID_STATE);
      return;
    } 

      //get the gradient norms
    if (totalFlag & (GRADIENT_INF_NORM_ABSOLUTE|GRADIENT_INF_NORM_RELATIVE))
    {
      currentGradInfNorm = initialGradInfNorm = Linf(mGrad);
      MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << "  o Initial gradient Inf norm: " << " "
                                     << RPM(initialGradInfNorm) << std::endl;
    }  
      
    if (totalFlag & (GRADIENT_L2_NORM_ABSOLUTE|GRADIENT_L2_NORM_RELATIVE))
    {
      currentGradL2NormSquared = initialGradL2NormSquared = length_squared(mGrad);
      MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << "  o Initial gradient L2 norm: " << " "
                                     << RPM(std::sqrt(initialGradL2NormSquared)) << std::endl;
    }  

      //the OFvalue comes for free, so save it
    previousOFValue=currentOFValue;
    initialOFValue=currentOFValue;
  }
  //find the initial objective function value if needed and not already
  //computed.  If we needed the gradient, we have the OF value for free.
  // Also, if possible, get initial OF value if writing plot file.  Solvers
  // often supply the OF value for subsequent iterations so by calculating
  // the initial value we can generate OF value plots.
  else if ((totalFlag & OF_FLAGS) || 
           (plotFile.is_open() && pd.num_free_vertices() && obj_eval.have_objective_function()))
  {
      //ensure the obj_ptr is not null
    if(!obj_eval.have_objective_function()){
      MSQ_SETERR(err)("Error termination criteria set which uses objective "
                      "functions, but no objective function is available.",
                      MsqError::INVALID_STATE);
      return;
    }
    
    bool b = obj_eval.evaluate(pd, currentOFValue, err); MSQ_ERRRTN(err);
    if (!b){
      MSQ_SETERR(err)("Initial patch is invalid for evaluation.",MsqError::INVALID_STATE);
      return;
    }
      //std::cout<<"\nReseting initial of value = "<<initialOFValue;
    previousOFValue=currentOFValue;
    initialOFValue=currentOFValue;
  }
  
  if (totalFlag & (GRAD_FLAGS|OF_FLAGS))
    MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << "  o Initial OF value: " << " " << RPM(initialOFValue) << std::endl;
  
    // Store current vertex locations now, because we'll
    // need them later to compare the current movement with.
  if (totalFlag & VERTEX_MOVEMENT_RELATIVE)
  {
    if (initialVerticesMemento)
    {
      pd.recreate_vertices_memento( initialVerticesMemento, err );
    }
    else
    {
      initialVerticesMemento = pd.create_vertices_memento( err );
    }
    MSQ_ERRRTN(err);
    maxSquaredInitialMovement = DBL_MAX;
  }
  else {
    maxSquaredInitialMovement = 0;
  }
  
  if (terminationCriterionFlag & UNTANGLED_MESH) {
    globalInvertedCount = count_inverted( pd, err );
    //if (innerOuterType==TYPE_OUTER) MSQ_DBGOUT_P0_ONLY(debugLevel) << par_string() << "  o Num Inverted: " << " " << globalInvertedCount << std::endl;
    patchInvertedCount = 0;
    MSQ_ERRRTN(err);
  }

  if (timeStepFileType) {
      // If didn't already calculate gradient abive, calculate it now.
    if (!(totalFlag & GRAD_FLAGS)) {
      mGrad.resize( pd.num_free_vertices() );
      obj_eval.evaluate(pd, currentOFValue, mGrad, err);
      err.clear();
    }
    write_timestep( pd, mGrad.empty() ? 0 : arrptr(mGrad), err);
  }
    
  if (plotFile.is_open()) {
      // two newlines so GNU plot knows that we are starting a new data set
    plotFile << std::endl << std::endl;
      // write column headings as comment in data file
    plotFile << "#Iter\tCPU\tObjFunc\tGradL2\tGradInf\tMovement\tInverted" << std::endl;
      // write initial values
    plotFile << 0 
     << '\t' << mTimer.since_birth() 
     << '\t' << initialOFValue 
     << '\t' << std::sqrt( currentGradL2NormSquared ) 
     << '\t' << currentGradInfNorm 
     << '\t' << 0.0
     << '\t' << globalInvertedCount
     << std::endl;
  }
}
コード例 #6
0
int main(int argc,char **argv){

// Print GPU properties
//print_properties();

// Files to print the result after the last time step
FILE *rho_file;
FILE *E_file;
rho_file = fopen("rho_final.txt", "w");
E_file = fopen("E_final.txt", "w");

// Construct initial condition for problem
ICsinus Config(-1.0, 1.0, -1.0, 1.0); 
//ICsquare Config(0.5,0.5,gasGam);

// Set initial values for Configuration 1
/*
Config.set_rho(rhoConfig19);
Config.set_pressure(pressureConfig19);
Config.set_u(uConfig19);
Config.set_v(vConfig19);
*/

// Determining global border based on left over tiles (a little hack)
int globalPadding;
globalPadding = (nx+2*border+16)/16;
globalPadding = 16*globalPadding - (nx+2*border);
//printf("Globalpad: %i\n", globalPadding);

// Change border to add padding
//border = border + globalPadding/2;

// Initiate the matrices for the unknowns in the Euler equations
cpu_ptr_2D rho(nx, ny, border,1);
cpu_ptr_2D E(nx, ny, border,1);
cpu_ptr_2D rho_u(nx, ny, border,1);
cpu_ptr_2D rho_v(nx, ny, border,1);
cpu_ptr_2D zeros(nx, ny, border,1);

// Set initial condition
Config.setIC(rho, rho_u, rho_v, E);

double timeStart = get_wall_time();

// Test 
cpu_ptr_2D rho_dummy(nx, ny, border);
cpu_ptr_2D E_dummy(nx, ny, border);

/*
rho_dummy.xmin = -1.0;
rho_dummy.ymin = -1.0;
E_dummy.xmin = -1.0;
E_dummy.ymin = -1.0;
*/

// Set block and grid sizes
dim3 gridBC = dim3(1, 1, 1);
dim3 blockBC = dim3(BLOCKDIM_BC,1,1);

dim3 gridBlockFlux;
dim3 threadBlockFlux;

dim3 gridBlockRK;
dim3 threadBlockRK;

computeGridBlock(gridBlockFlux, threadBlockFlux, nx + 2*border, ny + 2*border, INNERTILEDIM_X, INNERTILEDIM_Y, BLOCKDIM_X, BLOCKDIM_Y);

computeGridBlock(gridBlockRK, threadBlockRK, nx + 2*border, ny + 2*border, BLOCKDIM_X_RK, BLOCKDIM_Y_RK, BLOCKDIM_X_RK, BLOCKDIM_Y_RK);

int nElements = gridBlockFlux.x*gridBlockFlux.y;

// Allocate memory for the GPU pointers
gpu_ptr_1D L_device(nElements);
gpu_ptr_1D dt_device(1);

gpu_ptr_2D rho_device(nx, ny, border);
gpu_ptr_2D E_device(nx, ny, border);
gpu_ptr_2D rho_u_device(nx, ny, border);
gpu_ptr_2D rho_v_device(nx, ny, border); 

gpu_ptr_2D R0(nx, ny, border);
gpu_ptr_2D R1(nx, ny, border);
gpu_ptr_2D R2(nx, ny, border);
gpu_ptr_2D R3(nx, ny, border);

gpu_ptr_2D Q0(nx, ny, border);
gpu_ptr_2D Q1(nx, ny, border);
gpu_ptr_2D Q2(nx, ny, border);
gpu_ptr_2D Q3(nx, ny, border);

// Allocate pinned memory on host
init_allocate();

// Set BC arguments
set_bc_args(BCArgs[0], rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx+2*border, ny+2*border, border);
set_bc_args(BCArgs[1], Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), nx+2*border, ny+2*border, border);
set_bc_args(BCArgs[2], rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx+2*border, ny+2*border, border);

// Set FLUX arguments
set_flux_args(fluxArgs[0], L_device.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), R0.getRawPtr(),R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), nx, ny, border, rho.get_dx(), rho.get_dy(), theta, gasGam, INNERTILEDIM_X, INNERTILEDIM_Y);
set_flux_args(fluxArgs[1], L_device.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), R0.getRawPtr(),R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), nx, ny, border, rho.get_dx(), rho.get_dy(), theta, gasGam, INNERTILEDIM_X, INNERTILEDIM_Y);

// Set TIME argument
set_dt_args(dtArgs, L_device.getRawPtr(), dt_device.getRawPtr(), nElements, rho.get_dx(), rho.get_dy(), cfl_number);

// Set Rk arguments
set_rk_args(RKArgs[0], dt_device.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), R0.getRawPtr(), R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), nx, ny, border); 
set_rk_args(RKArgs[1], dt_device.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), R0.getRawPtr(), R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx, ny, border); 


L_device.set(FLT_MAX);

/*
R0.upload(zeros.get_ptr()); 
R1.upload(zeros.get_ptr()); 
R2.upload(zeros.get_ptr()); 
R3.upload(zeros.get_ptr()); 

Q0.upload(zeros.get_ptr()); 
Q1.upload(zeros.get_ptr()); 
Q2.upload(zeros.get_ptr()); 
Q3.upload(zeros.get_ptr()); 
*/

R0.set(0,0,0,nx,ny,border); 
R1.set(0,0,0,nx,ny,border); 
R2.set(0,0,0,nx,ny,border); 
R3.set(0,0,0,nx,ny,border); 

Q0.set(0,0,0,nx,ny,border); 
Q1.set(0,0,0,nx,ny,border); 
Q2.set(0,0,0,nx,ny,border); 
Q3.set(0,0,0,nx,ny,border); 


rho_device.upload(rho.get_ptr());
rho_u_device.upload(rho_u.get_ptr());
rho_v_device.upload(rho_v.get_ptr());
E_device.upload(E.get_ptr());

// Update boudries
callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[0]);

//Create cuda stream
cudaStream_t stream1;
cudaStreamCreate(&stream1);
cudaEvent_t dt_complete;
cudaEventCreate(&dt_complete);


while (currentTime < timeLength && step < maxStep){	
	
	//RK1	
	//Compute flux
	callFluxKernel(gridBlockFlux, threadBlockFlux, 0, fluxArgs[0]);	
	
	// Compute timestep (based on CFL condition)
	callDtKernel(TIMETHREADS, dtArgs);
	
	cudaMemcpyAsync(dt_host, dt_device.getRawPtr(), sizeof(float), cudaMemcpyDeviceToHost, stream1);
	cudaEventRecord(dt_complete, stream1);

	// Perform RK1 step
	callRKKernel(gridBlockRK, threadBlockRK, 0, RKArgs[0]);
	
	//Update boudries
	callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[1]);		

	//RK2
	// Compute flux
	callFluxKernel(gridBlockFlux, threadBlockFlux, 1, fluxArgs[1]);

	//Perform RK2 step
	callRKKernel(gridBlockRK, threadBlockRK, 1, RKArgs[1]);	

	//cudaEventRecord(srteam_sync, srteam1);

	callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[2]);

	cudaEventSynchronize(dt_complete);

	step++;	
	currentTime += *dt_host;	
//	printf("Step: %i, current time: %.6f dt:%.6f\n" , step,currentTime, dt_host[0]);

}


//cuProfilerStop();
//cudaProfilerStop();

printf("Elapsed time %.5f", get_wall_time() - timeStart);

E_device.download(E.get_ptr());
rho_u_device.download(rho_u.get_ptr());
rho_v_device.download(rho_v.get_ptr());
rho_device.download(rho_dummy.get_ptr());

rho_dummy.printToFile(rho_file, true, false);


Config.exactSolution(E_dummy, currentTime);
E_dummy.printToFile(E_file, true, false);


float LinfError = Linf(E_dummy, rho_dummy);
float L1Error = L1(E_dummy, rho_dummy); 
float L1Error2 = L1test(E_dummy, rho_dummy);

printf("nx: %i\t Linf error %.9f\t L1 error %.7f L1test erro %.7f", nx, LinfError, L1Error, L1Error2);


printf("nx: %i step: %i, current time: %.6f dt:%.6f\n" , nx, step,currentTime, dt_host[0]); 


/*
cudaMemcpy(L_host, L_device, sizeof(float)*(nElements), cudaMemcpyDeviceToHost);
for (int i =0; i < nElements; i++)
	printf(" %.7f ", L_host[i]); 
*/


printf("%s\n", cudaGetErrorString(cudaGetLastError()));

return(0);
}