int8_t LegendManager::notify(ManuvrMsg* active_event) {
  int8_t return_value = 0;
  uint8_t temp_uint_8 = 0;

  /* Some class-specific set of conditionals below this line. */
  switch (active_event->eventCode()) {
    case DIGITABULUM_MSG_IMU_READ:
      iius[last_imu_read].readSensor();
      return_value++;
      break;


    case MANUVR_MSG_SESS_ESTABLISHED:
      event_legend_frame_ready.delaySchedule(1100);     // Enable the periodic frame broadcast.
      {
        ManuvrMsg *event = Kernel::returnEvent(DIGITABULUM_MSG_IMU_INIT);
        event->addArg((uint8_t) 4);  // Set the desired init stage.
        event->priority(0);
        raiseEvent(event);
      }
      event_iiu_read.delaySchedule(1000);  // Enable the periodic read after letting the dust settle.
      return_value++;
      break;

    case MANUVR_MSG_SESS_HANGUP:
      event_legend_frame_ready.enableSchedule(false);
      for (uint8_t i = 0; i < LEGEND_DATASET_IIU_COUNT; i++) {
        ManuvrMsg *event = Kernel::returnEvent(DIGITABULUM_MSG_IMU_INIT);
        event->addArg((uint8_t) 4);  // Set the desired init stage.
        event->priority(0);
        raiseEvent(event);
      }
      event_iiu_read.enableSchedule(false);    // Disable the periodic read.
      return_value++;
      break;

    case DIGITABULUM_MSG_IMU_INIT:
      /* This is a request (probably from elsewhere in this class) to move one-or-more
           IMUs into the given INIT stage. The argument forms are...
           None        A request to move all IMUs into the minimum meaningful INIT stage (INIT-1).
           uint8       A request to move all IMUs into the given INIT stage.
       */
      if (0 == active_event->argCount()) {
        if (last_imu_read > 16) {
          if (getVerbosity() > 1) local_log.concat("MSG_IMU_INIT: last_imu_read > 16.\n");
        }
        else {
          iius[last_imu_read].init();
          return_value++;
        }
      }
      else if (0 == active_event->getArgAs(&temp_uint_8)) {
        // If the arg was present, we interpret this as a specified INIT stage...
        if (temp_uint_8 > 16) {
          if (getVerbosity() > 1) local_log.concat("MSG_IMU_INIT had an IMU idx > 16.\n");
        }
        else {
          iius[last_imu_read].state_pass_through(temp_uint_8);
          return_value++;
        }
      }
      break;

    case DIGITABULUM_MSG_IMU_MAP_STATE:
      //if (0 == active_event->argCount()) {
        // No args means a request. Send it.
      //  send_map_event();
      //  return_value++;
      //}
      break;


    case DIGITABULUM_MSG_CPLD_RESET_COMPLETE:
      if (getVerbosity() > 3) local_log.concatf("Initializing IMUs...\n");
      // Range-bind everything....
      for (uint8_t i = 0; i < 17; i++) iius[i].rangeBind(true);

      // Fire the event to put the IMUs into INIT-1.
      //raiseEvent(Kernel::returnEvent(DIGITABULUM_MSG_IMU_INIT));
      return_value++;
      break;

    case DIGITABULUM_MSG_IMU_TAP:
      if (0 == active_event->argCount()) {
        // Somthing wants the thresholds for all configured taps.
      }
      else {
        // Otherwise, it means we've emitted the event. No need to respond.
      }
      break;

    case DIGITABULUM_MSG_IMU_QUAT_CRUNCH:
      if (0 == active_event->getArgAs(&temp_uint_8)) {
        if (temp_uint_8 > 16) {
          if (getVerbosity() > 1) local_log.concat("QUAT_CRUNCH had an IMU idx > 16.\n");
        }
        else {
          iius[temp_uint_8].MadgwickQuaternionUpdate();
        }
        return_value++;
      }
      else {
        if (getVerbosity() > 2) local_log.concatf("QUAT_CRUNCH handler (IIU %u) got a bad return from an Arg..\n", temp_uint_8);
      }
      break;

    default:
      return_value += EventReceiver::notify(active_event);
      break;
  }

  flushLocalLog();
  return return_value;
}
Beispiel #2
0
  void MR::operator()(cudaColorSpinorField &x, cudaColorSpinorField &b)
  {

    globalReduce = false; // use local reductions for DD solver

    if (!init) {
      ColorSpinorParam csParam(x);
      csParam.create = QUDA_ZERO_FIELD_CREATE;
      if (param.preserve_source == QUDA_PRESERVE_SOURCE_YES) {
	rp = new cudaColorSpinorField(x, csParam); 
	allocate_r = true;
      }
      Arp = new cudaColorSpinorField(x);
      tmpp = new cudaColorSpinorField(x, csParam); //temporary for mat-vec

      init = true;
    }
    cudaColorSpinorField &r = 
      (param.preserve_source == QUDA_PRESERVE_SOURCE_YES) ? *rp : b;
    cudaColorSpinorField &Ar = *Arp;
    cudaColorSpinorField &tmp = *tmpp;

    // set initial guess to zero and thus the residual is just the source
    zeroCuda(x);  // can get rid of this for a special first update kernel  
    double b2 = normCuda(b);
    if (&r != &b) copyCuda(r, b);

    // domain-wise normalization of the initial residual to prevent underflow
    double r2=0.0; // if zero source then we will exit immediately doing no work
    if (b2 > 0.0) {
      axCuda(1/sqrt(b2), r); // can merge this with the prior copy
      r2 = 1.0; // by definition by this is now true
    }

    if (param.inv_type_precondition != QUDA_GCR_INVERTER) {
      quda::blas_flops = 0;
      profile.TPSTART(QUDA_PROFILE_COMPUTE);
    }

    double omega = 1.0;

    int k = 0;
    if (getVerbosity() >= QUDA_DEBUG_VERBOSE) {
      double x2 = norm2(x);
      double3 Ar3 = cDotProductNormBCuda(Ar, r);
      printfQuda("MR: %d iterations, r2 = %e, <r|A|r> = (%e, %e), x2 = %e\n", 
		 k, Ar3.z, Ar3.x, Ar3.y, x2);
    }

    while (k < param.maxiter && r2 > 0.0) {
    
      mat(Ar, r, tmp);

      double3 Ar3 = cDotProductNormACuda(Ar, r);
      Complex alpha = Complex(Ar3.x, Ar3.y) / Ar3.z;

      // x += omega*alpha*r, r -= omega*alpha*Ar, r2 = norm2(r)
      //r2 = caxpyXmazNormXCuda(omega*alpha, r, x, Ar);
      caxpyXmazCuda(omega*alpha, r, x, Ar);

      if (getVerbosity() >= QUDA_DEBUG_VERBOSE) {
	double x2 = norm2(x);
	double r2 = norm2(r);
	printfQuda("MR: %d iterations, r2 = %e, <r|A|r> = (%e,%e) x2 = %e\n", 
		   k+1, r2, Ar3.x, Ar3.y, x2);
      } else if (getVerbosity() >= QUDA_VERBOSE) {
	printfQuda("MR: %d iterations, <r|A|r> = (%e, %e)\n", k, Ar3.x, Ar3.y);
      }

      k++;
    }
  
    if (getVerbosity() >= QUDA_VERBOSE) {
      mat(Ar, r, tmp);    
      Complex Ar2 = cDotProductCuda(Ar, r);
      printfQuda("MR: %d iterations, <r|A|r> = (%e, %e)\n", k, real(Ar2), imag(Ar2));
    }

    // Obtain global solution by rescaling
    if (b2 > 0.0) axCuda(sqrt(b2), x);

    if (param.inv_type_precondition != QUDA_GCR_INVERTER) {
        profile.TPSTOP(QUDA_PROFILE_COMPUTE);
        profile.TPSTART(QUDA_PROFILE_EPILOGUE);
	param.secs += profile.Last(QUDA_PROFILE_COMPUTE);
  
	double gflops = (quda::blas_flops + mat.flops())*1e-9;
	reduceDouble(gflops);
	
	param.gflops += gflops;
	param.iter += k;
	
	// this is the relative residual since it has been scaled by b2
	r2 = norm2(r);

	if (param.preserve_source == QUDA_PRESERVE_SOURCE_YES) {
	  // Calculate the true residual
	  mat(r, x);
	  double true_res = xmyNormCuda(b, r);
	  param.true_res = sqrt(true_res / b2);
	  if (getVerbosity() >= QUDA_SUMMARIZE) {
	    printfQuda("MR: Converged after %d iterations, relative residua: iterated = %e, true = %e\n",
		       k, sqrt(r2), param.true_res);
	  }
	} else {
	  if (getVerbosity() >= QUDA_SUMMARIZE) {
	    printfQuda("MR: Converged after %d iterations, relative residua: iterated = %e\n", k, sqrt(r2));
	  }
	}

	// reset the flops counters
	quda::blas_flops = 0;
	mat.flops();
        profile.TPSTOP(QUDA_PROFILE_EPILOGUE);
    }

    globalReduce = true; // renable global reductions for outer solver

    return;
  }
/**
* If we find ourselves in this fxn, it means an event that this class built (the argument)
*   has been serviced and we are now getting the chance to see the results. The argument
*   to this fxn will never be NULL.
*
* Depending on class implementations, we might choose to handle the completed Event differently. We
*   might add values to event's Argument chain and return RECYCLE. We may also free() the event
*   ourselves and return DROP. By default, we will return REAP to instruct the Kernel
*   to either free() the event or return it to it's preallocate queue, as appropriate. If the event
*   was crafted to not be in the heap in its own allocation, we will return DROP instead.
*
* @param  event  The event for which service has been completed.
* @return A callback return code.
*/
int8_t LegendManager::callback_proc(ManuvrMsg* event) {
  /* Setup the default return code. If the event was marked as mem_managed, we return a DROP code.
     Otherwise, we will return a REAP code. Downstream of this assignment, we might choose differently. */
  int8_t return_value = (0 == event->refCount()) ? EVENT_CALLBACK_RETURN_REAP : EVENT_CALLBACK_RETURN_DROP;

  /* Some class-specific set of conditionals below this line. */
  switch (event->eventCode()) {
    case DIGITABULUM_MSG_IMU_READ:
      switch (last_imu_read) {
        case 0:
        case 1:
        case 2:
        case 3:
        case 4:
        case 5:
        case 6:
        case 7:
        case 8:
        case 9:
        case 10:
        case 11:
        case 12:
        case 13:
        case 14:
        case 15:
          last_imu_read++;
          return EVENT_CALLBACK_RETURN_RECYCLE;

        case 16:
          last_imu_read = 0;
          break;

        default:
          if (getVerbosity() > 2) local_log.concat("LegendManager::callback_proc(IMU_READ): Bad arg\n");
          last_imu_read = 0;
          break;
      }
      break;


    case DIGITABULUM_MSG_IMU_INIT:
      switch (last_imu_read) {
        case 0:
        case 1:
        case 2:
        case 3:
        case 4:
        case 5:
        case 6:
        case 7:
        case 8:
        case 9:
        case 10:
        case 11:
        case 12:
        case 13:
        case 14:
        case 15:
          last_imu_read++;
          if (getVerbosity() > 6) local_log.concat("LegendManager::callback_proc(IMU_INIT): RECYCLING\n");
          // We still have IMUs left to deal with. Recycle the event...
          return EVENT_CALLBACK_RETURN_RECYCLE;
        case 16:
          last_imu_read = 0;
          if (getVerbosity() > 6) local_log.concat("LegendManager::callback_proc(IMU_INIT): DROPPING\n");
          break;

        default:
          if (getVerbosity() > 2) local_log.concat("LegendManager::callback_proc(IMU_READ): Bad arg\n");
          last_imu_read = 0;
          break;
      }
      break;

    case DIGITABULUM_MSG_IMU_LEGEND:
      // We take this as an indication that our notice of altered Legend was sent.
      _er_set_flag(LEGEND_MGR_FLAGS_LEGEND_SENT);
      break;

    case DIGITABULUM_MSG_IMU_MAP_STATE:
      *(_ptr_sequence) = *(_ptr_sequence) + 1;
      if (operating_legend && _er_flag(LEGEND_MGR_FLAGS_LEGEND_SENT)) {
        operating_legend->copy_frame();
        Kernel::staticRaiseEvent(&event_legend_frame_ready);
        return 0;
      }
      break;

    case DIGITABULUM_MSG_IMU_QUAT_CRUNCH:
      {
        uint8_t temp_uint_8;
        if (0 == event->getArgAs(&temp_uint_8)) {
          if (iius[temp_uint_8 % 17].has_quats_left()) {
            return_value = EVENT_CALLBACK_RETURN_RECYCLE;
          }
        }
        else {
          local_log.concat("LegendManager::callback_proc(): QUAT crunch had no argument?!.\n");
        }
      }
      break;

    default:
      if (getVerbosity() > 5) {
        local_log.concat("LegendManager::callback_proc(): Default case.\n");
        #if defined(__MANUVR_DEBUG)
          event->printDebug(&local_log);
        #endif
        Kernel::log(&local_log);
      }
      break;
  }

  flushLocalLog();
  return return_value;
}
Beispiel #4
0
  void CG::operator()(cudaColorSpinorField &x, cudaColorSpinorField &b) 
  {
    profile.Start(QUDA_PROFILE_INIT);

    // Check to see that we're not trying to invert on a zero-field source    
    const double b2 = norm2(b);
    if(b2 == 0){
      profile.Stop(QUDA_PROFILE_INIT);
      printfQuda("Warning: inverting on zero-field source\n");
      x=b;
      param.true_res = 0.0;
      param.true_res_hq = 0.0;
      return;
    }


    cudaColorSpinorField r(b);

    ColorSpinorParam csParam(x);
    csParam.create = QUDA_ZERO_FIELD_CREATE;
    cudaColorSpinorField y(b, csParam); 
  
    mat(r, x, y);
//    zeroCuda(y);

    double r2 = xmyNormCuda(b, r);
  
    csParam.setPrecision(param.precision_sloppy);
    cudaColorSpinorField Ap(x, csParam);
    cudaColorSpinorField tmp(x, csParam);

    cudaColorSpinorField *tmp2_p = &tmp;
    // tmp only needed for multi-gpu Wilson-like kernels
    if (mat.Type() != typeid(DiracStaggeredPC).name() && 
	mat.Type() != typeid(DiracStaggered).name()) {
      tmp2_p = new cudaColorSpinorField(x, csParam);
    }
    cudaColorSpinorField &tmp2 = *tmp2_p;

    cudaColorSpinorField *x_sloppy, *r_sloppy;
    if (param.precision_sloppy == x.Precision()) {
      csParam.create = QUDA_REFERENCE_FIELD_CREATE;
      x_sloppy = &x;
      r_sloppy = &r;
    } else {
      csParam.create = QUDA_COPY_FIELD_CREATE;
      x_sloppy = new cudaColorSpinorField(x, csParam);
      r_sloppy = new cudaColorSpinorField(r, csParam);
    }

    cudaColorSpinorField &xSloppy = *x_sloppy;
    cudaColorSpinorField &rSloppy = *r_sloppy;
    cudaColorSpinorField p(rSloppy);

    if(&x != &xSloppy){
      copyCuda(y,x);
      zeroCuda(xSloppy);
    }else{
      zeroCuda(y);
    }
    
    const bool use_heavy_quark_res = 
      (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false;
    
    profile.Stop(QUDA_PROFILE_INIT);
    profile.Start(QUDA_PROFILE_PREAMBLE);

    double r2_old;
    double stop = b2*param.tol*param.tol; // stopping condition of solver

    double heavy_quark_res = 0.0; // heavy quark residual
    if(use_heavy_quark_res) heavy_quark_res = sqrt(HeavyQuarkResidualNormCuda(x,r).z);
    int heavy_quark_check = 10; // how often to check the heavy quark residual

    double alpha=0.0, beta=0.0;
    double pAp;
    int rUpdate = 0;

    double rNorm = sqrt(r2);
    double r0Norm = rNorm;
    double maxrx = rNorm;
    double maxrr = rNorm;
    double delta = param.delta;

    // this parameter determines how many consective reliable update
    // reisudal increases we tolerate before terminating the solver,
    // i.e., how long do we want to keep trying to converge
    int maxResIncrease = 0; // 0 means we have no tolerance 

    profile.Stop(QUDA_PROFILE_PREAMBLE);
    profile.Start(QUDA_PROFILE_COMPUTE);
    blas_flops = 0;

    int k=0;
    
    PrintStats("CG", k, r2, b2, heavy_quark_res);

    int steps_since_reliable = 1;

    while ( !convergence(r2, heavy_quark_res, stop, param.tol_hq) && 
	    k < param.maxiter) {
      matSloppy(Ap, p, tmp, tmp2); // tmp as tmp
    
      double sigma;

      bool breakdown = false;

      if (param.pipeline) {
	double3 triplet = tripleCGReductionCuda(rSloppy, Ap, p);
	r2 = triplet.x; double Ap2 = triplet.y; pAp = triplet.z;
	r2_old = r2;

	alpha = r2 / pAp;        
	sigma = alpha*(alpha * Ap2 - pAp);
	if (sigma < 0.0 || steps_since_reliable==0) { // sigma condition has broken down
	  r2 = axpyNormCuda(-alpha, Ap, rSloppy);
	  sigma = r2;
	  breakdown = true;
	}

	r2 = sigma;
      } else {
	r2_old = r2;
	pAp = reDotProductCuda(p, Ap);
	alpha = r2 / pAp;        

	// here we are deploying the alternative beta computation 
	Complex cg_norm = axpyCGNormCuda(-alpha, Ap, rSloppy);
	r2 = real(cg_norm); // (r_new, r_new)
	sigma = imag(cg_norm) >= 0.0 ? imag(cg_norm) : r2; // use r2 if (r_k+1, r_k+1-r_k) breaks
      }

      // reliable update conditions
      rNorm = sqrt(r2);
      if (rNorm > maxrx) maxrx = rNorm;
      if (rNorm > maxrr) maxrr = rNorm;
      int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0;
      int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0;
    
      // force a reliable update if we are within target tolerance (only if doing reliable updates)
      if ( convergence(r2, heavy_quark_res, stop, param.tol_hq) && delta >= param.tol) updateX = 1;

      if ( !(updateR || updateX)) {
	//beta = r2 / r2_old;
	beta = sigma / r2_old; // use the alternative beta computation

	if (param.pipeline && !breakdown) tripleCGUpdateCuda(alpha, beta, Ap, rSloppy, xSloppy, p);
	else axpyZpbxCuda(alpha, p, xSloppy, rSloppy, beta);

	if (use_heavy_quark_res && k%heavy_quark_check==0) { 
	  copyCuda(tmp,y);
	  heavy_quark_res = sqrt(xpyHeavyQuarkResidualNormCuda(xSloppy, tmp, rSloppy).z);
	}

	steps_since_reliable++;
      } else {
	axpyCuda(alpha, p, xSloppy);
	if (x.Precision() != xSloppy.Precision()) copyCuda(x, xSloppy);
      
	xpyCuda(x, y); // swap these around?
	mat(r, y, x); // here we can use x as tmp
	r2 = xmyNormCuda(b, r);

	if (x.Precision() != rSloppy.Precision()) copyCuda(rSloppy, r);            
	zeroCuda(xSloppy);

	// break-out check if we have reached the limit of the precision
	static int resIncrease = 0;
	if (sqrt(r2) > r0Norm && updateX) { // reuse r0Norm for this
	  warningQuda("CG: new reliable residual norm %e is greater than previous reliable residual norm %e", sqrt(r2), r0Norm);
	  k++;
	  rUpdate++;
	  if (++resIncrease > maxResIncrease) break; 
	} else {
	  resIncrease = 0;
	}

	rNorm = sqrt(r2);
	maxrr = rNorm;
	maxrx = rNorm;
	r0Norm = rNorm;      
	rUpdate++;

	// explicitly restore the orthogonality of the gradient vector
	double rp = reDotProductCuda(rSloppy, p) / (r2);
	axpyCuda(-rp, rSloppy, p);

	beta = r2 / r2_old; 
	xpayCuda(rSloppy, beta, p);

	if(use_heavy_quark_res) heavy_quark_res = sqrt(HeavyQuarkResidualNormCuda(y,r).z);
	
	steps_since_reliable = 0;
      }

      breakdown = false;
      k++;

      PrintStats("CG", k, r2, b2, heavy_quark_res);
    }

    if (x.Precision() != xSloppy.Precision()) copyCuda(x, xSloppy);
    xpyCuda(y, x);

    profile.Stop(QUDA_PROFILE_COMPUTE);
    profile.Start(QUDA_PROFILE_EPILOGUE);

    param.secs = profile.Last(QUDA_PROFILE_COMPUTE);
    double gflops = (quda::blas_flops + mat.flops() + matSloppy.flops())*1e-9;
    reduceDouble(gflops);
      param.gflops = gflops;
    param.iter += k;

    if (k==param.maxiter) 
      warningQuda("Exceeded maximum iterations %d", param.maxiter);

    if (getVerbosity() >= QUDA_VERBOSE)
      printfQuda("CG: Reliable updates = %d\n", rUpdate);

    // compute the true residuals
    mat(r, x, y);
    param.true_res = sqrt(xmyNormCuda(b, r) / b2);
#if (__COMPUTE_CAPABILITY__ >= 200)
    param.true_res_hq = sqrt(HeavyQuarkResidualNormCuda(x,r).z);
#else
    param.true_res_hq = 0.0;
#endif      

    PrintSummary("CG", k, r2, b2);

    // reset the flops counters
    quda::blas_flops = 0;
    mat.flops();
    matSloppy.flops();

    profile.Stop(QUDA_PROFILE_EPILOGUE);
    profile.Start(QUDA_PROFILE_FREE);

    if (&tmp2 != &tmp) delete tmp2_p;

    if (param.precision_sloppy != x.Precision()) {
      delete r_sloppy;
      delete x_sloppy;
    }

    profile.Stop(QUDA_PROFILE_FREE);

    return;
  }
Beispiel #5
0
inline unsigned int FMPWPartTmpl<GainTmpl>::
doPartitionInternal(vector<unsigned char>& part) 
{
  typedef FMPartTmpl<FMBiPartCore, GainTmpl> PWPartMgrType; 
  typedef FMPartTmpl4<FMKWayPartCore4, FMKWayGainMgr2> RefineType; 

  unsigned int cost1 = _initCost;
  unsigned int cost = _initCost;

  RefineType rfMgr(getParam());
  rfMgr.setBalanceTol(getBalanceTol());
  rfMgr.setPValue(1);
  rfMgr.setQValue(10);
  rfMgr.setVerbosity(getVerbosity());
  rfMgr.setBoundType(getBoundType());
  rfMgr.noNeedSetHighFanoutNets();

  while (1) {
    initGrouping2();
    getNetlist().pairWisePhase1(part, _groupMap, _sGVec);
    //xxx int cost3 = cost;
    cost = cost1;

    for (unsigned int j=0; j<_numGroups; ++j) {
      const FMParam param(*_sGVec[j]);
      PWPartMgrType PWMgr(param);
      //xxx PWMgr.setBalanceTol(getBalanceTol());
      PWMgr.setUpperBound(getUpperBound());
      PWMgr.setLowerBound(getLowerBound());
      // PWMgr.setPValue(_pvalue);
      // PWMgr.setQValue(_qvalue);
      PWMgr.setVerbosity(getVerbosity());
      //xxx PWMgr.setBoundType(getBoundType());
      PWMgr.noNeedSetHighFanoutNets();
      PWMgr.setNoInit(cost);
      vector<unsigned char> pw_part;
      projectUp(part, pw_part, j);
      // cost = PWMgr.doPartitionOne(pw_part);
      boost::array<int, 64>& diff = getDiff();
      const unsigned int part0 = _groupInvMap[j];
      const unsigned int part1 = _moveTo[part0];
      int diff2[2];
      diff2[0] = diff[part0];
      diff2[1] = diff[part1];
      PWMgr.setDiff(diff2);
      cost = PWMgr.doPartitionOne4(pw_part);
      // PWMgr.doPartition(pw_part, 1);
      // cost = PWMgr.getBestCost();
      projectDown(part, pw_part, j);
      int* diff3 = PWMgr.getDiff();
      diff[part0] = diff3[0];
      diff[part1] = diff3[1];
      delete _sGVec[j];
    }
    // printDiff();
    // std::cout << "cost = " << cost << std::endl;

    assert(cost == getNetlist().cutCost(part, getNumPartitions()));
    //xxx initDiff(part);  

    rfMgr.setNoInit(cost); // only refine the solution
    rfMgr.setDiff(getDiff());
    cost1 = rfMgr.doPartitionOne4(part);
    // rfMgr.doPartition(part, 1);
    // cost1 = rfMgr.getBestCost();
    assert(cost1 == getNetlist().cutCost(part, getNumPartitions()));
    setDiff(rfMgr.getDiff());
    // printDiff();
    // std::cout << "cost1 = " << cost1 << std::endl;

    if (cost1 >= cost) break;
  }


  return cost1;
}
Beispiel #6
0
void MGC3130::dispatchGestureEvents() {
  ManuvrMsg* event = nullptr;

  if (isPositionDirty()) {
    // We don't want to spam the Kernel. We need to rate-limit.
    if ((millis() - MGC3130_MINIMUM_NUANCE_PERIOD) > last_nuance_sent) {
      if (!position_asserted()) {
        // If we haven't asserted the position gesture yet, do so now.
        event = Kernel::returnEvent(MANUVR_MSG_GESTURE_RECOGNIZED);
        event->addArg((uint32_t) 1);
        raiseEvent(event);
        position_asserted(true);
      }
      last_nuance_sent = millis();
      event = Kernel::returnEvent(MANUVR_MSG_GESTURE_NUANCE);
      event->addArg((uint32_t) 1);
      event->addArg((uint16_t) _pos_x);
      event->addArg((uint16_t) _pos_y);
      event->addArg((uint16_t) _pos_z);
      raiseEvent(event);
      _pos_x = -1;
      _pos_y = -1;
      _pos_z = -1;
    }
  }
  else if (position_asserted()) {
    // We need to disassert the position gesture.
    event = Kernel::returnEvent(MANUVR_MSG_GESTURE_DISASSERT);
    event->addArg((uint32_t) 1);
    raiseEvent(event);
    position_asserted(false);
    _pos_x = -1;
    _pos_y = -1;
    _pos_z = -1;
  }

  if (0 < wheel_position) {
    // We don't want to spam the Kernel. We need to rate-limit.
    if ((millis() - MGC3130_MINIMUM_NUANCE_PERIOD) > last_nuance_sent) {
      if (!airwheel_asserted()) {
        // If we haven't asserted the airwheel gesture yet, do so now.
        event = Kernel::returnEvent(MANUVR_MSG_GESTURE_RECOGNIZED);
        event->addArg((uint32_t) 2);
        raiseEvent(event);
        airwheel_asserted(true);
      }
      last_nuance_sent = millis();
      event = Kernel::returnEvent(MANUVR_MSG_GESTURE_NUANCE);
      event->addArg((uint32_t) 2);
      event->addArg((int32_t) wheel_position);
      raiseEvent(event);
      wheel_position = 0;
    }
  }
  else if (airwheel_asserted()) {
    // We need to disassert the airwheel gesture.
    event = Kernel::returnEvent(MANUVR_MSG_GESTURE_DISASSERT);
    event->addArg((uint32_t) 2);
    raiseEvent(event);
    airwheel_asserted(false);
    wheel_position = 0;
  }

  if (0 < last_tap) {
    event = Kernel::returnEvent(MANUVR_MSG_GESTURE_ONE_SHOT);
    event->addArg(getTouchTapString(last_tap));
    raiseEvent(event);
    last_tap = 0;
  }
  if (0 < last_double_tap) {
    event = Kernel::returnEvent(MANUVR_MSG_GESTURE_ONE_SHOT);
    event->addArg(getTouchTapString(last_double_tap));
    raiseEvent(event);
    last_double_tap = 0;
  }
  if (0 < last_swipe) {
    event = Kernel::returnEvent(MANUVR_MSG_GESTURE_ONE_SHOT);
    event->addArg(getTouchTapString(last_swipe));
    raiseEvent(event);
    last_swipe = 0;
  }
  if (isTouchDirty()) {
    event = Kernel::returnEvent(MANUVR_MSG_GESTURE_ONE_SHOT);
    event->addArg(getTouchTapString(last_touch));
    raiseEvent(event);
    last_touch_noted = last_touch;
  }
  if (special) {
    // TODO: Not sure how to deal with this yet...
    #ifdef MANUVR_DEBUG
    if (getVerbosity() > 3) {
      local_log.concatf("MGC3130 special code 0x08\n", special);
      Kernel::log(&local_log);
    }
    #endif
    special = 0;
  }
  if (last_event) {
    // TODO: Not sure how to deal with this yet...
    #ifdef MANUVR_DEBUG
    if (getVerbosity() > 3) {
      local_log.concatf("MGC3130 last_event 0x08\n", last_event);
      Kernel::log(&local_log);
    }
    #endif
    last_event = 0;
  }
}
Beispiel #7
0
 static void report(const char *type) {
   if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating a %s solver\n", type);
 }
/**
* Assumes that the operation prior set the state to whatever is current.
*
* @return true if the state is stable, and the integrator should be notified.
*/
bool LSM9DSx_Common::step_state() {
  if (!desired_state_attained()) {
    if (error_condition) {
      // We shouldn't be changing states if there is an error condition.
      // Reset is the only way to exit the condition at present.
      if (getVerbosity() > 2) {
        local_log.concatf("%s step_state() was called while we are in an error condition: %s\n", imu_type(), getErrorString());
        Kernel::log(&local_log);
      }
      return true;
    }

    switch (getState()) {
      case State::STAGE_0:  // We think the IIU might be physicaly absent.
        //reset(); ?
        identity_check();
        break;

      case State::STAGE_1:  // We are sure the IMU is present, but we haven't done anything with it.
        configure_sensor();
        break;

      case State::STAGE_2:  // Discovered and initiallized, but unknown register values.
        if (is_setup_completed()) {
          bulk_refresh();
        }
        else {
          set_state(State::STAGE_1);
          return true;
        }
        break;

      case State::STAGE_3:  // Fully initialized and sync'd. Un-calibrated.
        integrator->state_change_notice(this, State::STAGE_3, State::STAGE_3);  // TODO: Wrong.
        sb_next_write = 0;
        readSensor();
        break;                                                       // TODO: Stop skipping calibrate().

      case State::STAGE_4:  // Calibrated and idle.
        if (desiredState() == State::STAGE_5) {
          // Enable the chained reads, and start the process rolling.
          readSensor();
        }
        else {
          // Downgrading to init state 3 (recalibrate).
          set_state(State::STAGE_3);
          sb_next_write = 0;
          readSensor();
          return true;
        }
        break;

      case State::STAGE_5:  // Calibrated and reading.
        switch (desiredState()) {
          case State::STAGE_4:   // Stop reads.
            set_state(State::STAGE_4);
            return false;   /// Note the slight break from convention... Careful...

          case State::STAGE_3:  // Downgrading to init state 3 (recalibrate).
            set_state(State::STAGE_3);
            sb_next_write = 0;
            readSensor();
            return true;
          case State::STAGE_5:  // Keep reading.
            return true;
          default:
            break;
        }
        break;

      default:
        break;
    }
    return false;
  }
  return true;
}
  void ColorSpinorField::createGhostZone() {

    if (getVerbosity() == QUDA_DEBUG_VERBOSE) 
      printfQuda("Precision = %d, Subset = %d\n", precision, siteSubset);

    int num_faces = 1;
    int num_norm_faces=2;
    if (nSpin == 1) { //staggered
      num_faces=6;
      num_norm_faces=6;
    }

    // calculate size of ghost zone required
    int ghostVolume = 0;
    //temporal hack
    int dims = nDim == 5 ? (nDim - 1) : nDim;
    int x5   = nDim == 5 ? x[4] : 1; ///includes DW  and non-degenerate TM ghosts
    for (int i=0; i<dims; i++) {
      ghostFace[i] = 0;
      if (commDimPartitioned(i)) {
	ghostFace[i] = 1;
	for (int j=0; j<dims; j++) {
	  if (i==j) continue;
	  ghostFace[i] *= x[j];
	}
	ghostFace[i] *= x5; ///temporal hack : extra dimension for DW ghosts
	if (i==0 && siteSubset != QUDA_FULL_SITE_SUBSET) ghostFace[i] /= 2;
	if (siteSubset == QUDA_FULL_SITE_SUBSET) ghostFace[i] /= 2;
	ghostVolume += ghostFace[i];
      }
      if(i==0){
	ghostOffset[i] = 0;
	ghostNormOffset[i] = 0;
      }else{
	ghostOffset[i] = ghostOffset[i-1] + num_faces*ghostFace[i-1];
	ghostNormOffset[i] = ghostNormOffset[i-1] + num_norm_faces*ghostFace[i-1];
      }

#ifdef MULTI_GPU
      if (getVerbosity() == QUDA_DEBUG_VERBOSE) 
	printfQuda("face %d = %6d commDimPartitioned = %6d ghostOffset = %6d ghostNormOffset = %6d\n", 
		   i, ghostFace[i], commDimPartitioned(i), ghostOffset[i], ghostNormOffset[i]);
#endif
    }//end of outmost for loop
    int ghostNormVolume = num_norm_faces * ghostVolume;
    ghostVolume *= num_faces;

    if (getVerbosity() == QUDA_DEBUG_VERBOSE) 
      printfQuda("Allocated ghost volume = %d, ghost norm volume %d\n", ghostVolume, ghostNormVolume);

    // ghost zones are calculated on c/b volumes
#ifdef MULTI_GPU
    ghost_length = ghostVolume*nColor*nSpin*2; 
    ghost_norm_length = (precision == QUDA_HALF_PRECISION) ? ghostNormVolume : 0;
#else
    ghost_length = 0;
    ghost_norm_length = 0;
#endif

    if (siteSubset == QUDA_FULL_SITE_SUBSET) {
      total_length = length + 2*ghost_length; // 2 ghost zones in a full field
      total_norm_length = 2*(stride + ghost_norm_length); // norm length = 2*stride
    } else {
      total_length = length + ghost_length;
      total_norm_length = (precision == QUDA_HALF_PRECISION) ? stride + ghost_norm_length : 0; // norm length = stride
    }

    if (precision != QUDA_HALF_PRECISION) total_norm_length = 0;

    if (getVerbosity() == QUDA_DEBUG_VERBOSE) {
      printfQuda("ghost length = %d, ghost norm length = %d\n", ghost_length, ghost_norm_length);
      printfQuda("total length = %d, total norm length = %d\n", total_length, total_norm_length);
    }

    // initialize the ghost pointers 
    if(siteSubset == QUDA_PARITY_SITE_SUBSET) {
      for(int i=0; i<dims; ++i){
        if(commDimPartitioned(i)){
          ghost[i] = (char*)v + (stride + ghostOffset[i])*nColor*nSpin*2*precision;
          if(precision == QUDA_HALF_PRECISION)
            ghostNorm[i] = (char*)norm + (stride + ghostNormOffset[i])*QUDA_SINGLE_PRECISION;
        }
      }
    }

  } // createGhostZone
Beispiel #10
0
 Dirac::~Dirac() {   
   if (getVerbosity() > QUDA_VERBOSE) profile.Print();
 }
Beispiel #11
0
static void
comm_partition(void)
{
  /*
  printf("xgridsize=%d\n", xgridsize);
  printf("ygridsize=%d\n", ygridsize);
  printf("zgridsize=%d\n", zgridsize);
  printf("tgridsize=%d\n", tgridsize);
  */
  if(xgridsize*ygridsize*zgridsize*tgridsize != size){
    if (rank ==0){
      printf("ERROR: Invalid configuration (t,z,y,x gridsize=%d %d %d %d) "
             "but # of MPI processes is %d\n", tgridsize, zgridsize, ygridsize, xgridsize, size);
    }
    comm_exit(1);
  }

  int leftover;

#ifdef X_FASTEST_DIM_NODE_RANKING
  tgridid  = rank/(zgridsize*ygridsize*xgridsize);
  leftover = rank%(zgridsize*ygridsize*xgridsize);
  zgridid  = leftover/(ygridsize*xgridsize);
  leftover = leftover%(ygridsize*xgridsize);
  ygridid  = leftover/xgridsize;
  xgridid  = leftover%xgridsize;
  #define GRID_ID(xid,yid,zid,tid) (tid*zgridsize*ygridsize*xgridsize+zid*ygridsize*xgridsize+yid*xgridsize+xid)
#else
  xgridid  = rank/(ygridsize*zgridsize*tgridsize);
  leftover = rank%(ygridsize*zgridsize*tgridsize);
  ygridid  = leftover/(zgridsize*tgridsize);
  leftover = leftover%(zgridsize*tgridsize);
  zgridid  = leftover/tgridsize;
  tgridid  = leftover%tgridsize;  
#define GRID_ID(xid,yid,zid,tid) (xid*ygridsize*zgridsize*tgridsize+yid*zgridsize*tgridsize+zid*tgridsize+tid)
#endif

  if (getVerbosity() >= QUDA_DEBUG_VERBOSE)
    printf("My rank: %d, gridid(t,z,y,x): %d %d %d %d\n", rank, tgridid, zgridid, ygridid, xgridid);


  int xid, yid, zid, tid;
  //X direction neighbors
  yid =ygridid;
  zid =zgridid;
  tid =tgridid;
  xid=(xgridid +1)%xgridsize;
  x_fwd_nbr = GRID_ID(xid,yid,zid,tid);
  xid=(xgridid -1+xgridsize)%xgridsize;
  x_back_nbr = GRID_ID(xid,yid,zid,tid);

  //Y direction neighbors
  xid =xgridid;
  zid =zgridid;
  tid =tgridid;
  yid =(ygridid+1)%ygridsize;
  y_fwd_nbr = GRID_ID(xid,yid,zid,tid);
  yid=(ygridid -1+ygridsize)%ygridsize;
  y_back_nbr = GRID_ID(xid,yid,zid,tid);

  //Z direction neighbors
  xid =xgridid;
  yid =ygridid;
  tid =tgridid;
  zid =(zgridid+1)%zgridsize;
  z_fwd_nbr = GRID_ID(xid,yid,zid,tid);
  zid=(zgridid -1+zgridsize)%zgridsize;
  z_back_nbr = GRID_ID(xid,yid,zid,tid);

  //T direction neighbors
  xid =xgridid;
  yid =ygridid;
  zid =zgridid;
  tid =(tgridid+1)%tgridsize;
  t_fwd_nbr = GRID_ID(xid,yid,zid,tid);
  tid=(tgridid -1+tgridsize)%tgridsize;
  t_back_nbr = GRID_ID(xid,yid,zid,tid);

  if (getVerbosity() >= QUDA_DEBUG_VERBOSE) {
    printf("MPI rank: rank=%d, hostname=%s, x_fwd_nbr=%d, x_back_nbr=%d\n", rank, comm_hostname(), x_fwd_nbr, x_back_nbr);
    printf("MPI rank: rank=%d, hostname=%s, y_fwd_nbr=%d, y_back_nbr=%d\n", rank, comm_hostname(), y_fwd_nbr, y_back_nbr);
    printf("MPI rank: rank=%d, hostname=%s, z_fwd_nbr=%d, z_back_nbr=%d\n", rank, comm_hostname(), z_fwd_nbr, z_back_nbr);
    printf("MPI rank: rank=%d, hostname=%s, t_fwd_nbr=%d, t_back_nbr=%d\n", rank, comm_hostname(), t_fwd_nbr, t_back_nbr);
  }
}
Beispiel #12
0
/** 
 * ssh_connect
 *
 * opens a communication channel (a socket) to a target using ssh.
 * 
 * @param host host name of the target. Is used as the SSH host name parameter.
 * @param ssh_username If not NULL, specifies the SSH user name to login as
 *        (defaults to the current user).
 * @param ssh_port If not 0, specifiesd the port of the remote SSH daemon.
 * @param key_file If not NULL, specifies the key to use.
 * @param socket Filed with the result socket. Use it for later communication.
 * @result the PID of the child SSH process or -1 in case of an error.
 */
pid_t ssh_connect(char *host, char *ssh_username, char *ssh_port, char *key_file, int *socket) {
    pid_t pid;
    int socket_pair[2];  // socket[1] is the SSH side

    /* check */
    if (host == NULL) {
        LOG(LOG_ERR, "null input");
        return -1;
    }

    /* socket */
    if (socketpair(AF_UNIX, SOCK_STREAM, 0, socket_pair) == -1) {
        LOG(LOG_ERR, "socketpair() fail");
        goto err;
    }

    /* fork */
    if ((pid = fork()) == -1) {
        LOG(LOG_ERR, "fork() fail");
        goto err_close;
    }
    if (pid == 0) {
        /* child process */
        char *arguments[16];
        int arg_idx = 0;
        char identity_string[PATH_MAX + /* "IdentityFile " */ 13];

        // these belong to father
        close(socket_pair[0]);
        close(0);
        close(1);

        // replace stdin and stdout with the socket end
        dup2(socket_pair[1], 0);
        dup2(socket_pair[1], 1);
        close(socket_pair[1]);  // no longer needed

        arguments[arg_idx++] = "ssh";
        arguments[arg_idx++] = "-2";
        if (ssh_username != NULL) {
            arguments[arg_idx++] = "-l";
            arguments[arg_idx++] = ssh_username;
        }
        if (ssh_port != NULL) {
            arguments[arg_idx++] = "-p";
            arguments[arg_idx++] = ssh_port;
        }
        /* // should be specified in the ssh_conf file 
        arguments[arg_idx++] = "-o";
        arguments[arg_idx++] = "BatchMode yes";
        */
        if (key_file != NULL) {
            snprintf(identity_string, PATH_MAX + 13, "IdentityFile %s", key_file);
            arguments[arg_idx++] = "-o";
            arguments[arg_idx++] = identity_string;
        }
        arguments[arg_idx++] = host;
        arguments[arg_idx++] = ptsc_command;
#if 0
        // TODO
        /* Sync verbose level between verifier and collector? */
        if (verbose_sync) {
           int verboseLevel;
            for ( verboseLevel = 0; (verboseLevel < getVerbosity()) && (arg_idx < 15); verboseLevel++ ) {
                arguments[arg_idx++] = "-v";
            }
        }
#endif
        arguments[arg_idx++] = NULL;

        DEBUG("ptsc_command %s\n", ptsc_command);

        execvp("ssh", arguments);
        LOG(LOG_ERR, "execvp(ssh)");
        exit(1);
    }

    close(socket_pair[1]);
    *socket = socket_pair[0];

    fcntl(*socket, F_SETFD, FD_CLOEXEC);

    // success
    return pid;

  err_close:
    close(socket_pair[0]);
    close(socket_pair[1]);
  err:
    return -1;
}
Beispiel #13
0
  void PreconCG::operator()(cudaColorSpinorField &x, cudaColorSpinorField &b)
  {

    profile.Start(QUDA_PROFILE_INIT);
    // Check to see that we're not trying to invert on a zero-field source
    const double b2 = norm2(b);
    if(b2 == 0){
      profile.Stop(QUDA_PROFILE_INIT);
      printfQuda("Warning: inverting on zero-field source\n");
      x=b;
      param.true_res = 0.0;
      param.true_res_hq = 0.0;
    }

    int k=0;
    int rUpdate=0;

    cudaColorSpinorField* minvrPre;
    cudaColorSpinorField* rPre;
    cudaColorSpinorField* minvr;
    cudaColorSpinorField* minvrSloppy;
    cudaColorSpinorField* p;


    ColorSpinorParam csParam(b);
    cudaColorSpinorField r(b);
    if(K) minvr = new cudaColorSpinorField(b);
    csParam.create = QUDA_ZERO_FIELD_CREATE;
    cudaColorSpinorField y(b,csParam);

    mat(r, x, y); // => r = A*x;
    double r2 = xmyNormCuda(b,r);

    csParam.setPrecision(param.precision_sloppy);
    cudaColorSpinorField tmpSloppy(x,csParam);
    cudaColorSpinorField Ap(x,csParam);

    cudaColorSpinorField *r_sloppy;
    if(param.precision_sloppy == x.Precision())
    {
      r_sloppy = &r;
      minvrSloppy = minvr;
    }else{
      csParam.create = QUDA_COPY_FIELD_CREATE;
      r_sloppy = new cudaColorSpinorField(r,csParam);
      if(K) minvrSloppy = new cudaColorSpinorField(*minvr,csParam);
    }
  

    cudaColorSpinorField *x_sloppy;
    if(param.precision_sloppy == x.Precision() ||
        !param.use_sloppy_partial_accumulator) {
      csParam.create = QUDA_REFERENCE_FIELD_CREATE;
      x_sloppy = &x;
    }else{
      csParam.create = QUDA_COPY_FIELD_CREATE;
      x_sloppy = new cudaColorSpinorField(x,csParam);
    }


    cudaColorSpinorField &xSloppy = *x_sloppy;
    cudaColorSpinorField &rSloppy = *r_sloppy;

    if(&x != &xSloppy){
      copyCuda(y, x); // copy x to y
      zeroCuda(xSloppy);
    }else{
      zeroCuda(y); // no reliable updates // NB: check this
    }

    const bool use_heavy_quark_res = (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false;

    if(K){
      csParam.create = QUDA_COPY_FIELD_CREATE;
      csParam.setPrecision(param.precision_precondition);
      rPre = new cudaColorSpinorField(rSloppy,csParam);
      // Create minvrPre 
      minvrPre = new cudaColorSpinorField(*rPre);
      globalReduce = false;
      (*K)(*minvrPre, *rPre);  
      globalReduce = true;
      *minvrSloppy = *minvrPre;
      p = new cudaColorSpinorField(*minvrSloppy);
    }else{
      p = new cudaColorSpinorField(rSloppy);
    }

  
    profile.Stop(QUDA_PROFILE_INIT);


    profile.Start(QUDA_PROFILE_PREAMBLE);



    double stop = stopping(param.tol, b2, param.residual_type); // stopping condition of solver
    double heavy_quark_res = 0.0; // heavy quark residual 
    if(use_heavy_quark_res) heavy_quark_res = sqrt(HeavyQuarkResidualNormCuda(x,r).z);
    int heavy_quark_check = 10; // how often to check the heavy quark residual


    double alpha = 0.0, beta=0.0;
    double pAp;
    double rMinvr  = 0;
    double rMinvr_old = 0.0;
    double r_new_Minvr_old = 0.0;
    double r2_old = 0;
    r2 = norm2(r);

    double rNorm = sqrt(r2);
    double r0Norm = rNorm;
    double maxrx = rNorm;
    double maxrr = rNorm;
    double delta = param.delta;


    if(K) rMinvr = reDotProductCuda(rSloppy,*minvrSloppy);

    profile.Stop(QUDA_PROFILE_PREAMBLE);
    profile.Start(QUDA_PROFILE_COMPUTE);


    quda::blas_flops = 0;

    int steps_since_reliable = 1;

    const int maxResIncrease = 0;

    while(!convergence(r2, heavy_quark_res, stop, param.tol_hq) && k < param.maxiter){

      matSloppy(Ap, *p, tmpSloppy);

      double sigma;
      bool breakdown = false;
      pAp   = reDotProductCuda(*p,Ap);

      alpha = (K) ? rMinvr/pAp : r2/pAp;
      Complex cg_norm = axpyCGNormCuda(-alpha, Ap, rSloppy); 
      // r --> r - alpha*A*p
      r2_old = r2;
      r2 = real(cg_norm);
  
      sigma = imag(cg_norm) >= 0.0 ? imag(cg_norm) : r2; // use r2 if (r_k+1, r_k-1 - r_k) breaks

      if(K) rMinvr_old = rMinvr;

      rNorm = sqrt(r2);
      if(rNorm > maxrx) maxrx = rNorm;
      if(rNorm > maxrr) maxrr = rNorm;


      int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0;
      int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0;

  
      // force a reliable update if we are within target tolerance (only if doing reliable updates)
      if( convergence(r2, heavy_quark_res, stop, param.tol_hq) && delta >= param.tol) updateX = 1;
    

      if( !(updateR || updateX) ){

        if(K){
          r_new_Minvr_old = reDotProductCuda(rSloppy,*minvrSloppy);
          *rPre = rSloppy;
          globalReduce = false;
          (*K)(*minvrPre, *rPre);
          globalReduce = true;
      

          *minvrSloppy = *minvrPre;

          rMinvr = reDotProductCuda(rSloppy,*minvrSloppy);
          beta = (rMinvr - r_new_Minvr_old)/rMinvr_old; 
          axpyZpbxCuda(alpha, *p, xSloppy, *minvrSloppy, beta);
        }else{
          beta = sigma/r2_old; // use the alternative beta computation
          axpyZpbxCuda(alpha, *p, xSloppy, rSloppy, beta);
        }
      } else { // reliable update

        axpyCuda(alpha, *p, xSloppy); // xSloppy += alpha*p
        copyCuda(x, xSloppy);
        xpyCuda(x, y); // y += x
        // Now compute r 
        mat(r, y, x); // x is just a temporary here
        r2 = xmyNormCuda(b, r);
        copyCuda(rSloppy, r); // copy r to rSloppy
        zeroCuda(xSloppy);


        // break-out check if we have reached the limit of the precision
        static int resIncrease = 0;
        if(sqrt(r2) > r0Norm && updateX) { // reuse r0Norm for this 
          warningQuda("PCG: new reliable residual norm %e is greater than previous reliable residual norm %e", sqrt(r2), r0Norm);

          k++;
          rUpdate++;
          if(++resIncrease > maxResIncrease) break;
        }else{
          resIncrease = 0;
        }

        rNorm = sqrt(r2);
        maxrr = rNorm;
        maxrx = rNorm;
        r0Norm = rNorm;
        ++rUpdate;

        if(K){
          *rPre = rSloppy;
          globalReduce = false;
          (*K)(*minvrPre, *rPre);
          globalReduce = true;

          *minvrSloppy = *minvrPre;

          rMinvr = reDotProductCuda(rSloppy,*minvrSloppy);
          beta = rMinvr/rMinvr_old;        

          xpayCuda(*minvrSloppy, beta, *p); // p = minvrSloppy + beta*p
        }else{ // standard CG - no preconditioning

          // explicitly restore the orthogonality of the gradient vector
          double rp = reDotProductCuda(rSloppy, *p)/(r2);
          axpyCuda(-rp, rSloppy, *p);

          beta = r2/r2_old;
          xpayCuda(rSloppy, beta, *p);

          steps_since_reliable = 0;
        }
      }      
      breakdown = false;
      ++k;
      PrintStats("PCG", k, r2, b2, heavy_quark_res);
    }


    profile.Stop(QUDA_PROFILE_COMPUTE);

    profile.Start(QUDA_PROFILE_EPILOGUE);

    if(x.Precision() != param.precision_sloppy) copyCuda(x, xSloppy);
    xpyCuda(y, x); // x += y


    param.secs = profile.Last(QUDA_PROFILE_COMPUTE);
    double gflops = (quda::blas_flops + mat.flops() + matSloppy.flops() + matPrecon.flops())*1e-9;
    reduceDouble(gflops);
    param.gflops = gflops;
    param.iter += k;

    if (k==param.maxiter)
      warningQuda("Exceeded maximum iterations %d", param.maxiter);

    if (getVerbosity() >= QUDA_VERBOSE)
      printfQuda("CG: Reliable updates = %d\n", rUpdate);





    // compute the true residual 
    mat(r, x, y);
    double true_res = xmyNormCuda(b, r);
    param.true_res = sqrt(true_res / b2);

    // reset the flops counters
    quda::blas_flops = 0;
    mat.flops();
    matSloppy.flops();
    matPrecon.flops();

    profile.Stop(QUDA_PROFILE_EPILOGUE);
    profile.Start(QUDA_PROFILE_FREE);

    if(K){ // These are only needed if preconditioning is used
      delete minvrPre;
      delete rPre;
      delete minvr;
      if(x.Precision() != param.precision_sloppy)  delete minvrSloppy;
    }
    delete p;

    if(x.Precision() != param.precision_sloppy){
      delete x_sloppy;
      delete r_sloppy;
    }

    profile.Stop(QUDA_PROFILE_FREE);
    return;
  }
Beispiel #14
0
/**
 *
 * Selftest
 * - Find right RM for this boot
 *
 * Check RM set by rm_uuid file 
 *    OK-> OPENPTS_SELFTEST_SUCCESS
 *    NG -> next
 * Check RM set by newrm_uuid file 
 *    OK -> OPENPTS_SELFTEST_RENEWED
 *    NG -> next
 * Check RM set by oldrm_uuid file 
 *    OK -> OPENPTS_SELFTEST_FALLBACK
 *    NG -> OPENPTS_SELFTEST_FAILED
 *
 *
 * Return
 *   OPENPTS_SELFTEST_SUCCESS   stable:-)
 *   OPENPTS_SELFTEST_RENEWED   update/reboot -> success
 *   OPENPTS_SELFTEST_FALLBACK
 *   OPENPTS_SELFTEST_FAILED
 *   PTS_FATAL                  something wrong:-(
 */
int selftest(OPENPTS_CONFIG *conf, int prop_count, OPENPTS_PROPERTY *prop_start, OPENPTS_PROPERTY *prop_end) {
    int rc = PTS_INTERNAL_ERROR;
    int result;
    OPENPTS_CONTEXT *ctx;
    int i;
    OPENPTS_PROPERTY *prop;
    char * ir_filename;

    DEBUG_CAL("selftest() start\n");

    /* Step 1 - Generate IR --------------------------------------------------*/

    /* new CTX for generation */
    ctx = newPtsContext(conf);
    if (ctx == NULL) {
        LOG(LOG_ERR, "newPtsContext() fail. no memory?");
        return PTS_FATAL;
    }

    /* copy properties */
    prop = prop_start;
    for (i = 0; i < prop_count; i++) {
        if (prop == NULL) {
            LOG(LOG_ERR, "prop == NULL");
            rc = PTS_FATAL;
            goto free;
        }
        addProperty(ctx, prop->name, prop->value);
        prop = prop->next;
    }

    /* additional properties from the pts config file */
    addPropertiesFromConfig(conf, ctx);

    /* set dummy nonce for IR gen */
    ctx->nonce->nonce_length = 20;
    ctx->nonce->nonce = xmalloc_assert(20);
    if (ctx->nonce->nonce == NULL) {
            LOG(LOG_ERR, "no memory");
            rc = PTS_FATAL;
            goto free;
    }
    memset(ctx->nonce->nonce, 0x5A, 20);
    /* set dummy target uuid */
    ctx->str_uuid = smalloc("SELFTEST");
    if (ctx->str_uuid == NULL) {
            LOG(LOG_ERR, "no memory");
            rc = PTS_FATAL;
            goto free;
    }

    /* gen IR */
    rc = genIr(ctx, NULL);
    if (rc != PTS_SUCCESS) {
        LOG(LOG_ERR, "selftest() - genIR failed\n");
        rc = PTS_FATAL;
        goto free;
    }

    /* hold the IR filename */
    ir_filename = ctx->ir_filename;
    ctx->ir_filename = NULL;

    /* free CTX */
    freePtsContext(ctx);
    ctx = NULL;

    DEBUG("selftest() - generate IR - done (ir file = %s)\n", ir_filename);

    /* Step 2 - Validate IR --------------------------------------------------*/

    /* Keep conf but reset some flags in conf */
#ifdef CONFIG_AUTO_RM_UPDATE
    /* clear ARU */
    conf->update_exist = 0;
#endif
    /* new CTX for validation */
    ctx = newPtsContext(conf);
    if (ctx == NULL) {
        LOG(LOG_ERR, "newPtsContext() fail. no memory?");
        return PTS_FATAL;
    }

    /* set generated IR */
    ctx->ir_filename = ir_filename;

    /* setup RMs */
    rc = getRmSetDir(conf);
    if (rc != PTS_SUCCESS) {
        LOG(LOG_ERR, "selftest() - getRmSetDir() failed\n");
        LOG(LOG_TODO, "conf->rm_uuid->filename %s\n", conf->rm_uuid->filename);
        LOG(LOG_TODO, "conf->rm_uuid->str      %s\n", conf->rm_uuid->str);
        rc = PTS_FATAL;
        goto free;
    }

    /* load RMs */
    for (i = 0; i <  conf->rm_num; i++) {
        rc = readRmFile(ctx, conf->rm_filename[i], i);
        if (rc < 0) {
            LOG(LOG_ERR, "readRmFile fail\n");
            rc = PTS_FATAL;
            goto free;
        }
    }


    /* verify */
    DEBUG("selftest() - validate IR - start\n");

    // TODO 2011-01-21 SM just use same conf
    ctx->target_conf = ctx->conf;

    // Disable Quote
    // 2011-01-28 SM, If FSM did not covers all PCRs Quote validation will fail?
    // iml_mode = ctx->conf->iml_mode;
    // ir_without_quote = ctx->conf->ir_without_quote;
    // ctx->conf->iml_mode = 1;
    // ctx->conf->ir_without_quote = 1;

    result = validateIr(ctx);  /* ir.c */

    /* check RM integrity status */
    DEBUG("selftest() - validate IR - done (rc = %d)\n", result);
    if ((result != OPENPTS_RESULT_VALID) && (getVerbosity() > 0)) {
        ERROR(NLS(MS_OPENPTS, OPENPTS_COLLECTOR_SELFTEST_FAILED_4,
            "The self test has failed"));
        printReason(ctx, 0);
    }

    if (result != OPENPTS_RESULT_VALID) {
        addReason(ctx, -1, NLS(MS_OPENPTS, OPENPTS_COLLECTOR_SELFTEST_FAILED,
            "[SELFTEST] The self test failed"));
        if ((conf->newrm_uuid != NULL) && (conf->newrm_uuid->uuid != NULL)) {
            /* New RM exist (for reboot after the update), Try the new RM */

            /* chenge the UUID */  // TODO add exchange func
            conf->rm_uuid->uuid = conf->newrm_uuid->uuid;
            conf->rm_uuid->str  = conf->newrm_uuid->str;
            conf->rm_uuid->time = conf->newrm_uuid->time;

            // del newrm
            conf->newrm_uuid->uuid = NULL;
            conf->newrm_uuid->str  = NULL;
            conf->newrm_uuid->time = NULL;

            // TODO free

            /* try selftest again */
            DEBUG("selftest again UUID=%s\n", conf->rm_uuid->str);
            rc = selftest(conf, prop_count, prop_start, prop_end);
            if (rc == OPENPTS_SELFTEST_SUCCESS) {
                /* Update the RM UUID by NEWRM_UUID */
                DEBUG("use UUID=%s\n", conf->rm_uuid->str);
                /* update rm_uuid */
                rc = writeOpenptsUuidFile(conf->rm_uuid, 1);
                if (rc != PTS_SUCCESS) {
                    LOG(LOG_ERR, "writeOpenptsUuidFile fail\n");
                    rc = PTS_FATAL;
                    goto free;
                }
                /* delete newrm_uuid */
                rc = remove(conf->newrm_uuid->filename);
                if (rc != 0) {
                    LOG(LOG_ERR, "remove(%s) fail\n", conf->newrm_uuid->filename);
                    rc = PTS_FATAL;
                    goto free;
                }
                rc = OPENPTS_SELFTEST_RENEWED;
            } else {
                /* fail */
                LOG(LOG_ERR, "2nd selftest with NEWRM also fail\n");
                addReason(ctx, -1, NLS(MS_OPENPTS, OPENPTS_COLLECTOR_SELFTEST_FAILED_2,
                               "[SELFTEST] The self test using both current and new UUIDs has failed"));
                printReason(ctx, 0);
                rc = OPENPTS_SELFTEST_FAILED;
            }
        } else {
            /* Missing NEWRM */
            printReason(ctx, 0);
            rc = OPENPTS_SELFTEST_FAILED;
        }
    } else {
        /* valid :-) */
        rc = OPENPTS_SELFTEST_SUCCESS;
    }

    /* leaving lots of temp 100K+ files lying around quickly fills up certain
       filesystems, i.e. on AIX /tmp is typically small, so we 
       unlink them after use */
    if (NULL != conf->ir_filename) {
        unlink(conf->ir_filename);
    }

 free:
    /* free */
    freePtsContext(ctx);

    if (rc == PTS_FATAL) {
        ERROR(NLS(MS_OPENPTS, OPENPTS_COLLECTOR_SELFTEST_FAILED_3,
            "The self test has failed. See log for details."));
    }

    return rc;
}