示例#1
0
void GameScene::guiUpdate(float dt)
{
    // Points handling
    pointsLabel->setString("Score: " + std::to_string(points));
    pointsLabel->setPosition(10 + pointsLabel->getBoundingBox().size.width / 2, 20);

    // Bars update
    mHpBarFill->setTextureRect(cocos2d::Rect(0.f, 0.f,
        _MAX(_MIN(mPlayer->getHp() / (float)Entities::maxHP * mHpBarBorder->getBoundingBox().size.width,
        mHpBarBorder->getBoundingBox().size.width), 0),
        28.f));

    mRageBarFill->setTextureRect(cocos2d::Rect(0.f, 0.f,
        _MAX(_MIN(mPlayer->getRage() / (float)Entities::rageCharging * mRageBarBorder->getBoundingBox().size.width,
        mRageBarBorder->getBoundingBox().size.width), 0),
        28.f));

    // Change difficulty if condition true
    switch (currentDifficulty)
    {
        case Enemies::EASY: {
            if (points >= Enemies::MEDIUM_CONDITION)
                currentDifficulty = Enemies::MEDIUM;
            break;
        }
        case Enemies::MEDIUM: {
            if (points >= Enemies::HARD_CONDITION)
                currentDifficulty = Enemies::HARD;
            break;
        }
    }

    Entities::playerShootingFreq = _MAX(0.1f, 0.2f - (float)((points / 2000) / 100.f));

}
示例#2
0
void InitADERDG(ADERDG* adg, double xmin, double xmax)
{
  adg->xmin = xmin;
  adg->xmax = xmax;

  for(int i = 0; i < _NBFACES; i++) {
    double xh = (double)i / _NBELEMS_IN;
    double x = stretching(xh);
    adg->face[i] = xmin + x * (xmax - xmin);
    //printf("i=%d x=%f\n",i, adg->face[i]);
  }

  adg->dx = 0.0;
  for(int ie = 1; ie <= _NBELEMS_IN; ie++) {
    adg->dx = _MAX(adg->dx, adg->face[ie] - adg->face[ie - 1]);
  }

  adg->cfl = _CFL;
  adg->dt = adg->cfl * adg->dx;  // TODO: put the velocity
  adg->ncfl = 0;

  for(int ie = 1; ie <= _NBELEMS_IN; ie++) {
    adg->cell_level[ie] =
      (int) (log(adg->dx / (adg->face[ie] - adg->face[ie-1])) / log(2));
    adg->ncfl = _MAX ( adg->ncfl , adg->cell_level[ie] );
  }
  // convention: first and last cell are among the biggest elements
  adg->cell_level[0] = 0;
  adg->cell_level[_NBELEMS_IN + 1] = 0;

  printf("found %d cfl levels\n", adg->ncfl);

  adg->dt_small = adg->dt / (1 << adg->ncfl);

  printf("small dt=%f max cell size=%f\n", adg->dt_small, adg->dx);

  for(int ie = 1; ie <= _NBELEMS_IN; ie++) {
    for(int j = 0;j < _NGLOPS; j++) {
      double h = adg->face[ie] - adg->face[ie-1];
      double x = adg->face[ie-1]
	+ h * gauss_lob_point[gauss_lob_offset[_D] + j];
      ExactSol(x, 0, adg->wnow[ie][j]);
    }
  }
   
  for(int ie = 1;ie <= _NBELEMS; ie++){
    for(int j = 0;j < _NGLOPS; j++){
      for(int iv = 0; iv < _M; iv++){
	adg->wnext[ie][j][iv] = adg->wnow[ie][j][iv];
      }
    }
  }
    
  for(int i=0;i<_NBFACES;i++){
    adg->face_level[i]=_MAX(adg->cell_level[i],adg->cell_level[i+1]);
  }
}
示例#3
0
bool TimeManager::IsDiffDay20(time_t uPrevANSITime, time_t uNextANSITime)
{
	time_t tTime1 = uPrevANSITime - 72000;
	time_t tTime2 = uNextANSITime - 72000;

	tTime1 = _MAX(tTime1, 0);
	tTime2 = _MAX(tTime2, 0);

	return IsDiffDay00(tTime1, tTime2);
}
示例#4
0
bool TimeManager::IsDiffDay18(time_t uPrevANSITime, time_t uNextANSITime)
{
	//都减去18小时,判断是否跨0点
	time_t tTime1 = uPrevANSITime - 64800;
	time_t tTime2 = uNextANSITime - 64800;

	tTime1 = _MAX(tTime1, 0);
	tTime2 = _MAX(tTime2, 0);

	return IsDiffDay00(tTime1, tTime2);
}
示例#5
0
BOOL
q_layoutStructNextEncode(StructLayout layout) {
    unsigned int    desired_align;
    const char*     type;

    // add size of previous layout
    if (layout->prev_type) {
        type = q_skipVarQualifiersEncode(layout->prev_type);

        if (*type == Q_C_BFLD) while (isdigit(*++type)); // empty loop
        layout->offset += q_sizeOfTypeEncode(type) * kBitsPerUnit;
    }

    if (*layout->cur_type == Q_C_STRUCT_E) return NO;

    layout->cur_type = q_skipVarNameEncode(layout->cur_type);
    type = q_skipVarQualifiersEncode(layout->cur_type);

    if (*type == Q_C_BFLD) while (isdigit(*++type)); // empty loop

    desired_align = q_alignOfTypeEncode(type) * kBitsPerUnit;
    layout->align = _MAX(layout->align, desired_align);

    if (layout->offset % desired_align != 0)
        // skip space before this field
        // bump the cumulative size to multiple of field alignment
        layout->offset = _ROUND(layout->offset, desired_align);

    layout->prev_type = layout->cur_type;
    layout->cur_type = q_skipTypeSpecEncode(layout->cur_type);
    return YES;
}
示例#6
0
/**
 * @function GUI_W_UsrEntryDelete
 * @brief Delete the selected string part (if any), or the car corresponding to insert line position
 * @param void *_g_obj: generic object
 * @return none
 */
void GUI_W_UsrEntryDelete(g_obj_st *obj) {

  uint16_t from, to;
  usr_entry_st *entry;

  if(obj != NULL && obj->draw == EntryDraw) {

    entry = (usr_entry_st *) obj->obj;

    if(entry->bEditable) {
      from = _MIN(entry->cursStart, entry->cursStop);
      to = _MAX(entry->cursStart, entry->cursStop);

      /*delecte the selection, or car located before the insert line*/
      StrDelete(entry->buffer, entry->sizeMax, from, to);

      /*if there was no user selection, decrease the position of the insert bar*/
      if(from == to && from > 0) {
        entry->cursStart--;
        entry->cursStop--;
      }

      /*always ensure that the resulting insert line is bounded into the string size*/
      entry->len = gstrlen(entry->buffer);
      if(entry->len < entry->cursStart) entry->cursStart = entry->len;

      /*after a deletion, set insert line to the lower cursor; since insert line -> entry->cursStop = entry->cursStart*/
      entry->cursStart = _MIN(entry->cursStart, entry->cursStop);
      entry->cursStop = entry->cursStart;

      /*force refresh*/
      GUI_ObjSetNeedRefresh(obj, true);
    }
  }
}
示例#7
0
文件: testutil.c 项目: mita/Test
static void collect_results(struct benchmark_config *config,
			struct work_queue *queue, unsigned long long start,
			unsigned long long elapsed)
{
	int i;
	unsigned long long sum[2] = { 0, 0 }, min[2] = { ULONG_MAX, ULONG_MAX };
	unsigned long long max[2] = { 0, 0 }, avg[2];

	for (i = 0; i < config->num_works; i++) {
		struct work *work = work_queue_pop(queue);

		sum[0] += work->elapsed[0];
		sum[1] += work->elapsed[1];
		min[0] = _MIN(min[0], work->elapsed[0]);
		min[1] = _MIN(min[1], work->elapsed[1]);
		max[0] = _MAX(max[0], work->elapsed[0]);
		max[1] = _MAX(max[1], work->elapsed[1]);

		if (config->verbose > 1) {
			printf(
			"%lld.%03lld %lld.%03lld %lld.%03lld %lld.%03lld\n",
				(work->start[0] - start) / 1000000,
				(work->start[0] - start) / 1000 % 1000,
				work->elapsed[0] / 1000000,
				work->elapsed[0] / 1000 % 1000,
				(work->start[1] - start) / 1000000,
				(work->start[1] - start) / 1000 % 1000,
				work->elapsed[1] / 1000000,
				work->elapsed[1] / 1000 % 1000);
		}
		free(work);
	}
	avg[0] = sum[0] / config->num_works;
	avg[1] = sum[1] / config->num_works;

	if (config->verbose > 0) {
		printf(
		"# %lld.%03lld %lld.%03lld %lld.%03lld %lld.%03lld %lld.%03lld %lld.%03lld\n",
			avg[0] / 1000000, avg[0] / 1000 % 1000,
			min[0] / 1000000, min[0] / 1000 % 1000,
			max[0] / 1000000, max[0] / 1000 % 1000,
			avg[1] / 1000000, avg[1] / 1000 % 1000,
			min[1] / 1000000, min[1] / 1000 % 1000,
			max[1] / 1000000, max[1] / 1000 % 1000);
	}
}
示例#8
0
static void benchmark(void)
{
	int i;
	long sum = 0;
	long min_ms = LONG_MAX, max_ms = 0, avg_ms;
	pthread_t *tid;
	struct benchmark_thread_data *data;

#ifdef CHUNKD_BENCHMARK
	stc_init();
#endif

	tid = xmalloc(sizeof(tid[0]) * threads);
	data = xmalloc(sizeof(data[0]) * threads);

	for (i = 0; i < threads; i++) {
		data[i].id = i;
		xpthread_create(&tid[i], NULL, benchmark_thread, &data[i]);
	}
	wait_threads(tid, threads);

#define _MIN(a, b) ((a) < (b) ? (a) : (b))
#define _MAX(a, b) ((a) < (b) ? (b) : (a))

	for (i = 0; i < threads; i++) {
		long ms = data[i].time_ms;

		sum += ms;
		min_ms = _MIN(min_ms, ms);
		max_ms = _MAX(max_ms, ms);
	}

	avg_ms = sum / threads;

	printf("%d %ld.%03ld %ld.%03ld %ld.%03ld\n", threads,
			avg_ms / 1000, avg_ms % 1000,
			min_ms / 1000, min_ms % 1000,
			max_ms / 1000, max_ms % 1000);

	if (verbose) {
		unsigned long long total_bytes;
		unsigned long long bytes_per_msec;

		total_bytes = value_length;
		total_bytes *= threads;
		total_bytes *= requests;

		bytes_per_msec = total_bytes / avg_ms;

		printf("Throughput: %llu KB/sec\n",
				bytes_per_msec * 1000UL / 1024UL);
	}

	free(data);
	free(tid);
}
示例#9
0
void
q_layoutStructEndEncode(StructLayout layout, int* size, unsigned int* align) {
    if (layout->cur_type && *layout->cur_type == Q_C_STRUCT_E) {
        layout->align = _MAX(1, layout->align);
        layout->offset = _ROUND(layout->offset, layout->align);
        layout->cur_type = nil;
    }

    if (size) *size = layout->offset / kBitsPerUnit;
    if (align) *align = layout->align / kBitsPerUnit;
}
示例#10
0
CBotNeuralNet :: CBotNeuralNet ( unsigned short int numinputs, unsigned short int numhiddenlayers, 
							  unsigned short int neuronsperhiddenlayer, unsigned short int numoutputs, 
								ga_nn_value learnrate)
{
	register unsigned short int i;
	register unsigned short int j;

	m_pOutputs = new CLogisticalNeuron[numoutputs];
	m_pHidden = new CLogisticalNeuron*[numhiddenlayers];

	m_layerinput = new ga_nn_value[_MAX(numinputs,neuronsperhiddenlayer)];
	m_layeroutput = new ga_nn_value[_MAX(numoutputs,_MAX(numinputs,neuronsperhiddenlayer))];

	for ( j = 0; j < numhiddenlayers; j ++ )
	{
		m_pHidden[j] = new CLogisticalNeuron[neuronsperhiddenlayer];

		for ( i = 0; i < neuronsperhiddenlayer; i ++ )
		{
			if ( j == 0 )
				m_pHidden[j][i].init(numinputs,learnrate);
			else
				m_pHidden[j][i].init(neuronsperhiddenlayer,learnrate);
		}
	}

	for ( i = 0; i < numoutputs; i ++ )
		m_pOutputs[i].init(neuronsperhiddenlayer,learnrate);

	//m_transferFunction = new CSigmoidTransfer ();

	m_numInputs = numinputs;
	m_numOutputs = numoutputs;
	m_numHidden = neuronsperhiddenlayer;
	m_numHiddenLayers = numhiddenlayers;


}
示例#11
0
void
_hbuf_reserve(struct hdfs_heap_buf *h, size_t space)
{
	int remain, toalloc;

	remain = h->size - h->used;
	if ((size_t)remain >= space)
		return;

	toalloc = _MAX(32, space - remain + 16);

	h->buf = realloc(h->buf, h->size + toalloc);
	ASSERT(h->buf);
	h->size += toalloc;
}
示例#12
0
void
q_layoutStructBeginEncode(const char* type, StructLayout layout) {
    const char* ntype;

    if (*type++ != Q_C_STRUCT_B) q_throwError(er2, type);

    ntype = type;
    while (*ntype != Q_C_STRUCT_E && *ntype != Q_C_STRUCT_B && *ntype != Q_C_UNION_B && *ntype++ != '='); // empty loop
    if (*(ntype - 1) == '=')
        type = ntype;

    layout->cur_type = type;
    layout->prev_type = nil;
    layout->offset = 0;
    layout->align = _MAX(kBitsPerUnit, STRUCT_SIZE_BOUNDARY);
}
示例#13
0
void NavDialog::CreateBitmap()
{
	RECT r;
	::GetClientRect(_hSelf, &r);

	const int maxLines	= _MAX(m_view[0].m_lines, m_view[1].m_lines);
	const int maxHeight	= (r.bottom - r.top) - 2 * cSpace - 2;

	int reductionRatio = m_compact ? maxLines / maxHeight : 0;

	if (reductionRatio && (maxLines % maxHeight))
		++reductionRatio;

	m_view[0].create(m_clr, reductionRatio);
	m_view[1].create(m_clr, reductionRatio);

	SetScalingFactor();
}
示例#14
0
bool IsPointInContour(int x, int y, TVAZone* contour)
{
	int count = 0;
	for (int i = 0; i < contour->NumPoints; i++)
	{
		int j = (i+1)%contour->NumPoints;
		//горизонтальный отрезок. 
		if ((int)contour->Points[i].Y == (int)contour->Points[j].Y)
			continue;
		//отрезок выше от луча
		else if (contour->Points[i].Y > y && contour->Points[j].Y > y)
			continue;
		//отрезок ниже от луча
		else if (contour->Points[i].Y < y && contour->Points[j].Y < y)
			continue;
		// отрезок справа от луча
		else if ((int)_MIN(contour->Points[i].X,contour->Points[j].X) > x)
			continue;
		//нижняя граница на луче
		else if ((int)_MIN(contour->Points[i].Y,contour->Points[j].Y) == (int)y)
			continue;
		//верхняя граница отрезка на луче
		else if ((int)_MAX(contour->Points[i].Y,contour->Points[j].Y) == (int)y)
		{
			double x1 = contour->Points[i].Y > contour->Points[j].Y ? contour->Points[i].X : contour->Points[j].X;
			if (x > x1)
				count++;
		}
		else
		{
			double k,b;
			k = (contour->Points[j].Y - contour->Points[i].Y) / (contour->Points[j].X - contour->Points[i].X);
			b = contour->Points[j].Y - k*contour->Points[j].X;
			// точка пересечения. 
			double t;
			t = (y - b) / k;
			if (t < x ) 
				count++;
		}
	}	
	return count & 1;
}
示例#15
0
void NavDialog::SetScalingFactor()
{
	m_view[0].m_lines = ::SendMessage(m_view[0].m_hView, SCI_GETLINECOUNT, 0, 0);
	m_view[1].m_lines = ::SendMessage(m_view[1].m_hView, SCI_GETLINECOUNT, 0, 0);

	m_view[0].m_firstVisible = ::SendMessage(m_view[0].m_hView, SCI_GETFIRSTVISIBLELINE, 0, 0);
	m_view[1].m_firstVisible = ::SendMessage(m_view[1].m_hView, SCI_GETFIRSTVISIBLELINE, 0, 0);

	m_maxBmpLines = _MAX(m_view[0].maxBmpLines(), m_view[1].maxBmpLines());
	m_syncView = (m_maxBmpLines == m_view[0].maxBmpLines()) ? &m_view[0] : &m_view[1];

	RECT r;
	::GetClientRect(_hSelf, &r);

	m_navViewWidth = ((r.right - r.left) - 3 * cSpace - 4) / 2;
	m_navHeight = (r.bottom - r.top) - 2 * cSpace - 2;

	m_pixelsPerLine = m_navHeight / m_maxBmpLines;

	if (m_pixelsPerLine == 0)
	{
		m_pixelsPerLine = 1;

		ShowScroller(r);
	}
	else
	{
		if (m_pixelsPerLine > 5)
			m_pixelsPerLine = 5;

		m_navHeight = m_pixelsPerLine * m_maxBmpLines;

		if (m_hScroll)
			::ShowScrollBar(m_hScroll, SB_CTL, FALSE);
	}

	updateScroll();
	updateDockingDlg();

	if (isVisible())
		::InvalidateRect(_hSelf, NULL, TRUE);
}
示例#16
0
/* accepts password file */
static int
find_strchr (char *username, char *file)
{
  FILE *fd;
  char *pos;
  char line[5 * 1024];
  unsigned int i;

  fd = fopen (file, "r");
  if (fd == NULL)
    {
      fprintf (stderr, "Cannot open file '%s'\n", file);
      return -1;
    }

  while (fgets (line, sizeof (line), fd) != NULL)
    {
      /* move to first ':' */
      i = 0;
      while ((line[i] != ':') && (line[i] != '\0') && (i < sizeof (line)))
	{
	  i++;
	}
      if (strncmp (username, line, _MAX (i, strlen (username))) == 0)
	{
	  /* find the index */
	  pos = strrchr (line, ':');
	  pos++;
	  fclose (fd);
	  return atoi (pos);
	}
    }

  fclose (fd);
  return -1;
}
示例#17
0
/**
 * @function GUI_W_UsrEntryGetSelection
 * @brief copy the selected string part into a given buffer
 * @param void *_g_obj: generic object
 * @param uint8_t *buffer: user buffer; will be \0 terminated
 * @param uint16_t bufSize: size of the user buffer
 * @param void *_g_obj: generic object
 * @return none
 */
void GUI_W_UsrEntryGetSelection(g_obj_st *obj, uint8_t *buffer, uint16_t bufSize) {

  usr_entry_st *entry;
  uint16_t pos, end;

  if(obj != NULL && obj->draw == EntryDraw && buffer != NULL && bufSize > 0) {

    entry = (usr_entry_st *) obj->obj;

    /*copy the highlighted text of the entry in the user buffer*/
    pos = _MIN(entry->cursStart, entry->cursStop);
    end = _MAX(entry->cursStart, entry->cursStop);

    while(bufSize > 1 && pos < end) {
      *buffer = entry->buffer[pos];
      pos++;
      buffer++;
      bufSize--;
    }

    /*always be sure that str is '\0' terminated*/
    *buffer = 0;
  }
}
示例#18
0
int
q_sizeOfTypeEncode(const char* type) {
    type = q_skipVarNameEncode(type);
    type = q_skipVarQualifiersEncode(type);

    switch (*type) {
    case Q_C_ID:
        return sizeof(id);
    case Q_C_CLASS:
        return sizeof(Class);
    case Q_C_SEL:
        return sizeof(SEL);
    case Q_C_CHR:
        return sizeof(char);
    case Q_C_UCHR:
        return sizeof(unsigned char);
    case Q_C_SHT:
        return sizeof(short);
    case Q_C_USHT:
        return sizeof(unsigned short);
    case Q_C_INT:
        return sizeof(int);
    case Q_C_UINT:
        return sizeof(unsigned int);
    case Q_C_LNG:
        return sizeof(long);
    case Q_C_ULNG:
        return sizeof(unsigned long);
    case Q_C_FLT:
        return sizeof(float);
    case Q_C_DBL:
        return sizeof(double);
    case Q_C_VOID:
        return 0;
    case Q_C_PTR:
    case Q_C_CHARPTR:
        return sizeof(char*);
    case Q_C_ARY_B: {
        int len = atoi(type + 1);
        while (isdigit(*++type)); // empty loop
        return len * q_sizeOfTypeEncode(type);
    }
    case Q_C_BFLD:
        return atoi(type + 1) / kBitsPerUnit;
    case Q_C_STRUCT_B: {
        struct _StructLayout    layout;
        unsigned int            size;

        q_layoutStructBeginEncode(type, &layout);
        while (q_layoutStructNextEncode(&layout)); // empty loop
        q_layoutStructEndEncode(&layout, &size, nil);
        return size;
    }
    case Q_C_UNION_B: {
        int max_size = 0;
        while (*type != Q_C_UNION_E && *type++ != '='); // empty loop
        while (*type != Q_C_UNION_E) {
            type = q_skipVarNameEncode(type);
            max_size = _MAX(max_size, q_sizeOfTypeEncode(type));
            type = q_skipTypeSpecEncode(type);
        }
        return max_size;
    }
    default:
        q_throwError(er1, type);
    }
    return 0;
}
示例#19
0
文件: eig3.cpp 项目: phaedon/aletler
// Symmetric tridiagonal QL algorithm.
static void tql2(double V[3][3], double d[3], double e[3]) {

//  This is derived from the Algol procedures tql2, by
//  Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
//  Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
//  Fortran subroutine in EISPACK.

  e[0] = e[1]; e[1] = e[2]; e[2] = 0.0;

  double f = 0.0;
  double tst1 = 0.0;
  double eps = pow(2.0,-52.0);
  for (int l = 0; l < n; l++) {

    // Find small subdiagonal element
    tst1 = _MAX(tst1, fabs(d[l]) + fabs(e[l]));
    int m = l;
    while (m < n) {
      if ( fabs(e[m]) <= eps*tst1 ) break;
      m++;
    }

    // If m == l, d[l] is an eigenvalue,
    // otherwise, iterate.
    if (m > l) {
      int iter = 0;
      do {
        iter = iter + 1;  // (Could check iteration count here.)

        // Compute implicit shift

        double g = d[l];
        double p = (d[l+1] - g) / (2.0 * e[l]);
        double r = hypot2(p,1.0);
        if (p < 0) {
          r = -r;
        }
        d[l] = e[l] / (p + r);
        d[l+1] = e[l] * (p + r);
        double dl1 = d[l+1];
        double h = g - d[l];
        for (int i = l+2; i < n; i++) {
          d[i] -= h;
        }
        f = f + h;

        // Implicit QL transformation.

        p = d[m];
        double c = 1.0;
        double c2 = c;
        double c3 = c;
        double el1 = e[l+1];
        double s = 0.0;
        double s2 = 0.0;
        for (int i = m-1; i >= l; --i) {
          c3 = c2;
          c2 = c;
          s2 = s;
          g = c * e[i];
          h = c * p;
          r = hypot2(p,e[i]);
          e[i+1] = s * r;
          s = e[i] / r;
          c = p / r;
          p = c * d[i] - s * g;
          d[i+1] = h + s * (c * g + s * d[i]);

          // Accumulate transformation.

          for (int k = 0; k < n; ++k) {
            h = V[k][i+1];
            V[k][i+1] = s * V[k][i] + c * h;
            V[k][i] = c * V[k][i] - s * h;
          }
        }
        p = -s * s2 * c3 * el1 * e[l] / dl1;
        e[l] = s * p;
        d[l] = c * p;

        // Check for convergence.
      } while (fabs(e[l]) > eps*tst1);
    }
    d[l] = d[l] + f;
    e[l] = 0.0;
  }
  
  // Sort eigenvalues and corresponding vectors.

  for (int i = 0; i < n-1; i++) {
    int k = i;
    double p = d[i];
    for (int j = i+1; j < n; j++) {
      if (d[j] < p) {
        k = j;
        p = d[j];
      }
    }

    if (k != i) {
      d[k] = d[i];
      d[i] = p;
      for (int j = 0; j < n; j++) {
        p = V[j][i];
        V[j][i] = V[j][k];
        V[j][k] = p;
      }
    }
  }
}
// Sort the bunch in ascending z (ct) order, and return
// a vector of iterators which point to the equal-spaced
// bin boundaries defines by zmin to zmax in steps of dz
//
// Returns the number of particles removed from tails
// i.e. z<zmin || z>=zmax
//
// hdp contains the derivative of the distribution
// calculated using the Savitzky-Golay filter c
// If c is empty, then the derivative will be zero.
size_t ParticleBinList(ParticleBunch& bunch, double zmin, double zmax, size_t nbins,
                       vector<ParticleBunch::iterator>& pbins,
                       vector<double>& hd, vector<double>& hdp, vector<double>* c)
{
//cout << "In ParticleBinList" << endl;
//cout << zmin << "\t" << zmax << "\t" << nbins << endl;
	double dz = (zmax-zmin)/double(nbins);
	vector<ParticleBunch::iterator> bins;
	vector<double> hbins(nbins,0);
	bins.reserve(nbins+1);

	bunch.SortByCT();

	size_t lost = TruncateZ(bunch,zmin,zmax);

	ParticleBunch::iterator p = bunch.begin();
	bins.push_back(p);

	double z=zmin;
	double total=0;
	size_t n;

	for(n=0; n<nbins; n++)
	{
		z+=dz;
		while(p!=bunch.end() && p->ct()<z)
		{
			total++;
			hbins[n]++;
			p++;
		}
		bins.push_back(p);
	}


	if(p!=bunch.end())
	{
#ifdef ENABLE_MPI
		cerr << "bad slicing in rank: " << MPI::COMM_WORLD.Get_rank() << endl;
#endif

#ifndef ENABLE_MPI
		cerr << "bad slicing" << endl;
#endif

		cerr << "z = " << z << " ct = " << p->ct() << " zmax = " << zmax << endl;
		//Dump out the bad bunch
		/*
			ofstream* badbunch = new ofstream("badbunch.bunch");
			bunch.Output(*badbunch);
			badbunch->close();
			delete badbunch;
			cerr << "Output of the current bunch is to badbunch.bunch" << endl;
		*/
#ifndef ENABLE_MPI
		abort();
#endif

#ifdef ENABLE_MPI
		MPI::COMM_WORLD.Abort(1);
#endif
	}

	//	bins.push_back(p); // should be end()

	// normalise distribution
	// and apply filter

	vector<double> fbins(nbins,0);
	vector<double> fpbins(nbins,0);

	double a = 1/total/dz;
	int w = c ? (c->size()-1)/2 : 0;
	size_t m;

	for(n=0; n<nbins; n++)
	{
		fbins[n] = hbins[n]*a;
		if(c)
			//for(m=_MAX(0,int(n)-w); m<=_MIN(nbins,int(n)+w); m++)// ERROR! m can be set to nbins -> out of range!
			for(m=_MAX(0,int(n)-w); m<_MIN(nbins,size_t(n)+w); m++)   // This needs to be checked!
			{
				fpbins[n] += hbins[m]*(*c)[m-n+w]*a;
			}
	}

	pbins.swap(bins);
	hd.swap(fbins);
	hdp.swap(fpbins);

	return lost;
}
示例#21
0
void process_main()
{
    create("start_proc");
    g_sim.m_num_CSIM_process++;

    fprintf(stderr, "started simulation.\n"); 

    // simulation progress verbose
    if (g_cfg.sim_show_progress)
        process_sim_progress();

    // router
    for (unsigned int i=0; i<g_Router_vec.size(); i++) {
        process_router(g_Router_vec[i]);
    }

    // input/output NI
    for (unsigned int n=0; n<g_NIInput_vec.size(); n++) {
        switch (g_cfg.NIin_type) {
        case NI_INPUT_TYPE_PER_PC:
            process_NI_input(g_NIInput_vec[n], 0);
            break;
        case NI_INPUT_TYPE_PER_VC:
            for (int NI_vc=0; NI_vc<g_cfg.router_num_vc; NI_vc++)
                process_NI_input(g_NIInput_vec[n], NI_vc);
            break;
        default:
            assert(0);
        }
    }

    for (unsigned int n=0; n<g_NIOutput_vec.size(); n++) {
        process_NI_output(g_NIOutput_vec[n]);
    }

    // profile
    if (g_cfg.profile_perf || g_cfg.profile_power) {
        if (g_cfg.profile_interval_cycle)
            process_profile_cycle();
        else
            process_profile_instr();
    }

#ifdef LINK_DVS
    // link-dvs
    process_link_dvs_link_speedup();
    process_link_dvs_link_slowdown();
    process_link_dvs_set();
#endif

    // injection
    switch (g_cfg.wkld_type) {
    case WORKLOAD_TRIPS_TRACE:
    case WORKLOAD_TILED_CMP_TRACE:
    case WORKLOAD_TILED_CMP_VALUE_TRACE:
    case WORKLOAD_SNUCA_CMP_VALUE_TRACE:
        process_parse_trace();
        break;

    case WORKLOAD_SYNTH_SPATIAL:
    case WORKLOAD_SYNTH_TRAFFIC_MATRIX:
        for (unsigned int c=0; c<g_Core_vec.size(); c++)
            process_gen_synth_traffic(c);
        break;

    default:
        assert(0);
    }

    // control simulation for warmup and finalize
    process_control_sim();

    g_ev_sim_done->wait();

    // Now the simulation is done.
    fprintf(stderr, "finished simulation at clk=%.0lf.\n", simtime());

    // Find the simulation end time
    g_sim.m_end_time = time((time_t *)NULL);
    g_sim.m_elapsed_time = _MAX(g_sim.m_end_time - g_sim.m_start_time, 1);

#ifdef _DEBUG_ROUTER_PROCESS
    printf("PROCESS COMPLETE: process_main()\n");
#endif
}
示例#22
0
/* Parses the tpasswd files, in order to verify the given
 * username/password pair.
 */
int
verify_passwd (char *conffile, char *tpasswd, char *username,
	       const char *passwd)
{
  FILE *fd;
  char line[5 * 1024];
  unsigned int i;
  gnutls_datum_t g, n;
  int iindex;
  char *p, *pos;

  iindex = find_strchr (username, tpasswd);
  if (iindex == -1)
    {
      fprintf (stderr, "Cannot find '%s' in %s\n", username, tpasswd);
      return -1;
    }

  fd = fopen (conffile, "r");
  if (fd == NULL)
    {
      fprintf (stderr, "Cannot find %s\n", conffile);
      return -1;
    }

  do
    {
      p = fgets (line, sizeof (line) - 1, fd);
    }
  while (p != NULL && atoi (p) != iindex);

  if (p == NULL)
    {
      fprintf (stderr, "Cannot find entry in %s\n", conffile);
      return -1;
    }
  line[sizeof (line) - 1] = 0;

  fclose (fd);

  if ((iindex = read_conf_values (&g, &n, line)) < 0)
    {
      fprintf (stderr, "Cannot parse conf file '%s'\n", conffile);
      return -1;
    }

  fd = fopen (tpasswd, "r");
  if (fd == NULL)
    {
      fprintf (stderr, "Cannot open file '%s'\n", tpasswd);
      return -1;
    }

  while (fgets (line, sizeof (line), fd) != NULL)
    {
      /* move to first ':' 
       * This is the actual verifier.
       */
      i = 0;
      while ((line[i] != ':') && (line[i] != '\0') && (i < sizeof (line)))
	{
	  i++;
	}
      if (strncmp (username, line, _MAX (i, strlen (username))) == 0)
	{
	  char *verifier_pos, *salt_pos;

	  pos = strchr (line, ':');
	  fclose (fd);
	  if (pos == NULL)
	    {
	      fprintf (stderr, "Cannot parse conf file '%s'\n", conffile);
	      return -1;
	    }
	  pos++;
	  verifier_pos = pos;

	  /* Move to the salt */
	  pos = strchr (pos, ':');
	  if (pos == NULL)
	    {
	      fprintf (stderr, "Cannot parse conf file '%s'\n", conffile);
	      return -1;
	    }
	  pos++;
	  salt_pos = pos;

	  return _verify_passwd_int (username, passwd,
				     verifier_pos, salt_pos, &g, &n);
	}
    }

  fclose (fd);
  return -1;

}
示例#23
0
int
crypt_int (const char *username, const char *passwd, int salt_size,
	   char *tpasswd_conf, char *tpasswd, int uindex)
{
  FILE *fd;
  char *cr;
  gnutls_datum_t g, n;
  char line[5 * 1024];
  char *p, *pp;
  int iindex;
  char tmpname[1024];

  fd = fopen (tpasswd_conf, "r");
  if (fd == NULL)
    {
      fprintf (stderr, "Cannot find %s\n", tpasswd_conf);
      return -1;
    }

  do
    {				/* find the specified uindex in file */
      p = fgets (line, sizeof (line) - 1, fd);
      iindex = atoi (p);
    }
  while (p != NULL && iindex != uindex);

  if (p == NULL)
    {
      fprintf (stderr, "Cannot find entry in %s\n", tpasswd_conf);
      return -1;
    }
  line[sizeof (line) - 1] = 0;

  fclose (fd);
  if ((iindex = read_conf_values (&g, &n, line)) < 0)
    {
      fprintf (stderr, "Cannot parse conf file '%s'\n", tpasswd_conf);
      return -1;
    }

  cr = _srp_crypt (username, passwd, salt_size, &g, &n);
  if (cr == NULL)
    {
      fprintf (stderr, "Cannot _srp_crypt()...\n");
      return -1;
    }
  else
    {
      /* delete previous entry */
      struct stat st;
      FILE *fd2;
      int put;

      if (strlen (tpasswd) > sizeof (tmpname) + 5)
	{
	  fprintf (stderr, "file '%s' is tooooo long\n", tpasswd);
	  return -1;
	}
      strcpy (tmpname, tpasswd);
      strcat (tmpname, ".tmp");

      if (stat (tmpname, &st) != -1)
	{
	  fprintf (stderr, "file '%s' is locked\n", tpasswd);
	  return -1;
	}

      if (filecopy (tpasswd, tmpname) != 0)
	{
	  fprintf (stderr, "Cannot copy '%s' to '%s'\n", tpasswd, tmpname);
	  return -1;
	}

      fd = fopen (tpasswd, "w");
      if (fd == NULL)
	{
	  fprintf (stderr, "Cannot open '%s' for write\n", tpasswd);
	  remove (tmpname);
	  return -1;
	}

      fd2 = fopen (tmpname, "r");
      if (fd2 == NULL)
	{
	  fprintf (stderr, "Cannot open '%s' for read\n", tmpname);
	  remove (tmpname);
	  return -1;
	}

      put = 0;
      do
	{
	  p = fgets (line, sizeof (line) - 1, fd2);
	  if (p == NULL)
	    break;

	  pp = strchr (line, ':');
	  if (pp == NULL)
	    continue;

	  if (strncmp
	      (p, username,
	       _MAX (strlen (username), (unsigned int) (pp - p))) == 0)
	    {
	      put = 1;
	      fprintf (fd, "%s:%s:%u\n", username, cr, iindex);
	    }
	  else
	    {
	      fputs (line, fd);
	    }
	}
      while (1);

      if (put == 0)
	{
	  fprintf (fd, "%s:%s:%u\n", username, cr, iindex);
	}

      fclose (fd);
      fclose (fd2);

      remove (tmpname);

    }


  return 0;
}
示例#24
0
/**
 * @function EntryDraw
 * @brief user entry draw function
 * @param void *_g_obj: generic object
 * @param void *_obj: frame object
 * @return none
 */
static void EntryDraw(void *_g_obj, void *_obj) {

  g_obj_st *g_obj;
  usr_entry_st *entry;

  uint8_t glyph;
  uint16_t ii, selStart, selStop;
  coord_t x, xInsertLine, xMin, yMin, xMax, yMax;
  rect_st rec;
  color_t colBack, colText;

  /*retreive generic & specific object*/
  if(_g_obj != NULL && _obj != NULL) {
    g_obj = (g_obj_st *) _g_obj;
    entry =  (usr_entry_st*) _obj;

    /*P2D configuration*/
    P2D_SetDisplayMode(DISPLAY_SOLID);
    P2D_SetLineType(LINE_SOLID);
    SetFont(entry->font);

    if(GUI_ObjIsDisabled(g_obj)) {
      colBack = GetColor(G_COL_BACKGROUND);
      colText = GetColor(G_COL_D_TEXT);
    }
    else {
      colBack = GetColor(G_COL_E_BACKGROUND);
      colText = entry->colText;
    }

    /*retrieve text coord*/
    GetTextCoords(&(g_obj->rec), &xMin, &yMin, &xMax, &yMax);

    /*display usr_entry glyphs, one by one*/
    selStart = _MIN(entry->cursStart, entry->cursStop);
    selStop = _MAX(entry->cursStart, entry->cursStop);
    x = xMin;
    xInsertLine = x;
    ii = entry->offsetDisplay;

    while(x < xMax && entry->buffer[ii] != 0) {

      /*select the color of the text, according to the user selection (i.e. reverse colors if the current glyph is a part of the user selection)*/
      if(entry->bEditable == false || selStart == selStop || ii < selStart || ii >= selStop) {
        P2D_SetColors(colText, colBack);
      }
      else {
        P2D_SetColors(colBack, GetColor(G_COL_SPECIAL));
      }

      /*display the glyph*/
      glyph = entry->buffer[ii];
      P2D_PutGlyph(x, yMin, glyph);
      x += P2D_GetGlyphWidth(glyph);

      /*if the car corresponds to the user selection bar, store its coord*/
      if(ii == entry->cursStop - 1) xInsertLine = x;

      /*next car*/
      ii++;
    }

    /*clear from last car to the end of the entry*/
    P2D_SetColors(colBack, colBack);
    (void) P2D_CoordToRect(&rec, x, yMin, xMax, yMax);
    P2D_FillRect(&rec);

    /*clear between text and object rect (1 px width)*/
    (void) P2D_CoordToRect(&rec, xMin-1, yMin-1, xMax+1, yMax+1);
    P2D_Rect(&rec);

    /*display the insert line*/
    if(entry->bEditable && entry->bBlink) {
      P2D_SetColor(colText);
      P2D_Line(xInsertLine, yMin, xInsertLine, yMax);
    }

    /*object rect*/
    P2D_SetColor(GetColor(G_COL_LOWER_REC));
    P2D_Rect(&(g_obj->rec));
  }
}
示例#25
0
int main(int argc, char **argv) {
  
  int c, i, mu, status;
  int ispin, icol, isc;
  int n_c = 3;
  int n_s = 4;
  int count        = 0;
  int filename_set = 0;
  int dims[4]      = {0,0,0,0};
  int grid_size[4];
  int l_LX_at, l_LXstart_at;
  int x0, x1, x2, x3, ix, iix, iy, is, it, i3;
  int sl0, sl1, sl2, sl3, have_source_flag=0;
  int source_proc_coords[4], lsl0, lsl1, lsl2, lsl3;
  int check_residuum = 0;
  unsigned int VOL3, V5;
  int do_gt   = 0;
  int full_orbit = 0;
  int smear_source = 0;
  char filename[200], source_filename[200], source_filename_write[200];
  double ratime, retime;
  double plaq_r=0., plaq_m=0., norm, norm2;
  double spinor1[24];
  double *gauge_qdp[4], *gauge_field_timeslice=NULL, *gauge_field_smeared=NULL;
  double _1_2_kappa, _2_kappa, phase;
  FILE *ofs;
  int mu_trans[4] = {3, 0, 1, 2};
  int threadid, nthreads;
  int timeslice, source_timeslice;
  char rng_file_in[100], rng_file_out[100];
  int *source_momentum=NULL;
  int source_momentum_class = -1;
  int source_momentum_no = 0;
  int source_momentum_runs = 1;
  int imom;
  int num_gpu_on_node=0, rank;
  int source_location_5d_iseven;
  int convert_sign=0;
#ifdef HAVE_QUDA
  int rotate_gamma_basis = 1;
#else
  int rotate_gamma_basis = 0;
#endif
  omp_lock_t *lck = NULL, gen_lck[1];
  int key = 0;


  /****************************************************************************/
  /* for smearing parallel to inversion                                       */
  double *smearing_spinor_field[] = {NULL,NULL};
  int dummy_flag = 0;
  /****************************************************************************/


  /****************************************************************************/
#if (defined HAVE_QUDA) && (defined MULTI_GPU)
  int x_face_size, y_face_size, z_face_size, t_face_size, pad_size;
#endif
  /****************************************************************************/

  /************************************************/
  int qlatt_nclass;
  int *qlatt_id=NULL, *qlatt_count=NULL, **qlatt_rep=NULL, **qlatt_map=NULL;
  double **qlatt_list=NULL;
  /************************************************/

  /************************************************/
  double boundary_condition_factor;
  int boundary_condition_factor_set = 0;
  /************************************************/

//#ifdef MPI       
//  kernelPackT = true;
//#endif

  /***********************************************
   * QUDA parameters
   ***********************************************/
#ifdef HAVE_QUDA
  QudaPrecision cpu_prec         = QUDA_DOUBLE_PRECISION;
  QudaPrecision cuda_prec        = QUDA_DOUBLE_PRECISION;
  QudaPrecision cuda_prec_sloppy = QUDA_SINGLE_PRECISION;

  QudaGaugeParam gauge_param = newQudaGaugeParam();
  QudaInvertParam inv_param = newQudaInvertParam();
#endif

  while ((c = getopt(argc, argv, "soch?vgf:p:b:S:R:")) != -1) {
    switch (c) {
    case 'v':
      g_verbose = 1;
      break;
    case 'g':
      do_gt = 1;
      break;
    case 'f':
      strcpy(filename, optarg);
      filename_set=1;
      break;
    case 'c':
      check_residuum = 1;
      fprintf(stdout, "# [invert_dw_quda] will check residuum again\n");
      break;
    case 'p':
      n_c = atoi(optarg);
      fprintf(stdout, "# [invert_dw_quda] will use number of colors = %d\n", n_c);
      break;
    case 'o':
      full_orbit = 1;
      fprintf(stdout, "# [invert_dw_quda] will invert for full orbit, if source momentum set\n");
    case 's':
      smear_source = 1;
      fprintf(stdout, "# [invert_dw_quda] will smear the sources if they are read from file\n");
      break;
    case 'b':
      boundary_condition_factor = atof(optarg);
      boundary_condition_factor_set = 1;
      fprintf(stdout, "# [invert_dw_quda] const. boundary condition factor set to %e\n", boundary_condition_factor);
      break;
    case 'S':
      convert_sign = atoi(optarg);
      fprintf(stdout, "# [invert_dw_quda] using convert sign %d\n", convert_sign);
      break;
    case 'R':
      rotate_gamma_basis = atoi(optarg);
      fprintf(stdout, "# [invert_dw_quda] rotate gamma basis %d\n", rotate_gamma_basis);
      break;
    case 'h':
    case '?':
    default:
      usage();
      break;
    }
  }

  // get the time stamp
  g_the_time = time(NULL);

  /**************************************
   * set the default values, read input
   **************************************/
  if(filename_set==0) strcpy(filename, "cvc.input");
  if(g_proc_id==0) fprintf(stdout, "# Reading input from file %s\n", filename);
  read_input_parser(filename);

#ifdef MPI
#ifdef HAVE_QUDA
  grid_size[0] = g_nproc_x;
  grid_size[1] = g_nproc_y;
  grid_size[2] = g_nproc_z;
  grid_size[3] = g_nproc_t;
  fprintf(stdout, "# [] g_nproc = (%d,%d,%d,%d)\n", g_nproc_x, g_nproc_y, g_nproc_z, g_nproc_t);
  initCommsQuda(argc, argv, grid_size, 4);
#else
  MPI_Init(&argc, &argv);
#endif
#endif

#if (defined PARALLELTX) || (defined PARALLELTXY)
  EXIT_WITH_MSG(1, "[] Error, 2-dim./3-dim. MPI-Version not yet implemented");
#endif


  // some checks on the input data
  if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) {
    if(g_proc_id==0) fprintf(stderr, "[invert_dw_quda] Error, T and L's must be set\n");
    usage();
  }

  // set number of openmp threads

  // initialize MPI parameters
  mpi_init(argc, argv);
  
  // the volume of a timeslice
  VOL3 = LX*LY*LZ;
  V5   = T*LX*LY*LZ*L5;
  g_kappa5d = 0.5 / (5. + g_m5);
  if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] kappa5d = %e\n", g_kappa5d);

  fprintf(stdout, "# [%2d] parameters:\n"\
                  "# [%2d] T            = %3d\n"\
		  "# [%2d] Tstart       = %3d\n"\
		  "# [%2d] L5           = %3d\n",\
                  g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, L5);


#ifdef MPI
  if(T==0) {
    fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id);
    MPI_Abort(MPI_COMM_WORLD, 1);
    MPI_Finalize();
    exit(2);
  }
#endif

  if(init_geometry() != 0) {
    fprintf(stderr, "[invert_dw_quda] Error from init_geometry\n");
    EXIT(1);
  }
  geometry();

  if( init_geometry_5d() != 0 ) {
    fprintf(stderr, "[invert_dw_quda] Error from init_geometry_5d\n");
    EXIT(2);
  }
  geometry_5d();

  /**************************************
   * initialize the QUDA library
   **************************************/
  if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] initializing quda\n");
#ifdef HAVE_QUDA
  // cudaGetDeviceCount(&num_gpu_on_node);
  if(g_gpu_per_node<0) {
    if(g_cart_id==0) fprintf(stderr, "[] Error, number of GPUs per node not set\n");
    EXIT(106);
  } else {
    num_gpu_on_node = g_gpu_per_node;
  }
#ifdef MPI
  rank = comm_rank();
#else
  rank = 0;
#endif
  g_gpu_device_number = rank % num_gpu_on_node;
  fprintf(stdout, "# [] process %d/%d uses device %d\n", rank, g_cart_id, g_gpu_device_number);

  initQuda(g_gpu_device_number);

#endif
 
  /**************************************
   * prepare the gauge field
   **************************************/
  // read the gauge field from file
  alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND);
  if(strcmp( gaugefilename_prefix, "identity")==0 ) {
    if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Setting up unit gauge field\n");
    for(ix=0;ix<VOLUME; ix++) {
      for(mu=0;mu<4;mu++) {
        _cm_eq_id(g_gauge_field+_GGI(ix,mu));
      }
    }
  } else if(strcmp( gaugefilename_prefix, "random")==0 ) {
    if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Setting up random gauge field with seed = %d\n", g_seed);
    init_rng_state(g_seed, &g_rng_state);
    random_gauge_field(g_gauge_field, 1.);
    plaquette(&plaq_m);
    sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
    check_error(write_lime_gauge_field(filename, plaq_m, Nconf, 64), "write_lime_gauge_field", NULL, 12);
  } else {
    if(g_gauge_file_format == 0) {
      // ILDG
      sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf);
      if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename);
      status = read_lime_gauge_field_doubleprec(filename);
    } else if(g_gauge_file_format == 1) {
      // NERSC
      sprintf(filename, "%s.%.5d", gaugefilename_prefix, Nconf);
      if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename);
      status = read_nersc_gauge_field(g_gauge_field, filename, &plaq_r);
      //status = read_nersc_gauge_field_3x3(g_gauge_field, filename, &plaq_r);

    }
    if(status != 0) {
      fprintf(stderr, "[invert_dw_quda] Error, could not read gauge field");
      EXIT(12);
    }
  }
#ifdef MPI
  xchange_gauge();
#endif

  // measure the plaquette
  plaquette(&plaq_m);
  if(g_cart_id==0) fprintf(stdout, "# Measured plaquette value: %25.16e\n", plaq_m);
  if(g_cart_id==0) fprintf(stdout, "# Read plaquette value    : %25.16e\n", plaq_r);

#ifndef HAVE_QUDA
  if(N_Jacobi>0) {
#endif
    // allocate the smeared / qdp ordered gauge field
    alloc_gauge_field(&gauge_field_smeared, VOLUMEPLUSRAND);
    for(i=0;i<4;i++) {
      gauge_qdp[i] = gauge_field_smeared + i*18*VOLUME;
    }
#ifndef HAVE_QUDA
  }
#endif

#ifdef HAVE_QUDA
  // transcribe the gauge field

  omp_set_num_threads(g_num_threads);
#pragma omp parallel for private(ix,iy,mu)
  for(ix=0;ix<VOLUME;ix++) {
    iy = g_lexic2eot[ix];
    for(mu=0;mu<4;mu++) {
      _cm_eq_cm(gauge_qdp[mu_trans[mu]]+18*iy, g_gauge_field+_GGI(ix,mu));
    }
  }
  // multiply timeslice T-1 with factor of -1 (antiperiodic boundary condition)
  if(g_proc_coords[0]==g_nproc_t-1) {
    if(!boundary_condition_factor_set) boundary_condition_factor = -1.;
    fprintf(stdout, "# [] process %d multiplies gauge-field timeslice T_global-1 with boundary condition factor %e\n", g_cart_id,
      boundary_condition_factor);

  omp_set_num_threads(g_num_threads);
#pragma omp parallel for private(ix,iy)
    for(ix=0;ix<VOL3;ix++) {
      iix = (T-1)*VOL3 + ix;
      iy = g_lexic2eot[iix];
      _cm_ti_eq_re(gauge_qdp[mu_trans[0]]+18*iy, -1.);
    }
  }

  // QUDA precision parameters
  switch(g_cpu_prec) {
    case 0: cpu_prec = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = half\n"); break;
    case 1: cpu_prec = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = single\n"); break;
    case 2: cpu_prec = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = double\n"); break;
    default: cpu_prec = QUDA_DOUBLE_PRECISION; break;
  }
  switch(g_gpu_prec) {
    case 0: cuda_prec = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = half\n"); break;
    case 1: cuda_prec = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = single\n"); break;
    case 2: cuda_prec = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = double\n"); break;
    default: cuda_prec = QUDA_DOUBLE_PRECISION; break;
  }
  switch(g_gpu_prec_sloppy) {
    case 0: cuda_prec_sloppy = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = half\n"); break;
    case 1: cuda_prec_sloppy = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = single\n"); break;
    case 2: cuda_prec_sloppy = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = double\n"); break;
    default: cuda_prec_sloppy = QUDA_SINGLE_PRECISION; break;
  }

  // QUDA gauge parameters
  gauge_param.X[0] = LX;
  gauge_param.X[1] = LY;
  gauge_param.X[2] = LZ;
  gauge_param.X[3] = T;
  inv_param.Ls = L5;

  gauge_param.anisotropy  = 1.0;
  gauge_param.type        = QUDA_WILSON_LINKS;
  gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
  gauge_param.t_boundary  = QUDA_ANTI_PERIODIC_T;

  gauge_param.cpu_prec           = cpu_prec;
  gauge_param.cuda_prec          = cuda_prec;
  gauge_param.reconstruct        = QUDA_RECONSTRUCT_12;
  gauge_param.cuda_prec_sloppy   = cuda_prec_sloppy;
  gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
  gauge_param.gauge_fix          = QUDA_GAUGE_FIXED_NO;

  gauge_param.ga_pad = 0;
  inv_param.sp_pad = 0;
  inv_param.cl_pad = 0;

  // For multi-GPU, ga_pad must be large enough to store a time-slice
#ifdef MULTI_GPU
  x_face_size = inv_param.Ls * gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2;
  y_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2;
  z_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2;
  t_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2;
  pad_size = _MAX(x_face_size, y_face_size);
  pad_size = _MAX(pad_size, z_face_size);
  pad_size = _MAX(pad_size, t_face_size);
  gauge_param.ga_pad = pad_size;
  if(g_cart_id==0) printf("# [invert_dw_quda] pad_size = %d\n", pad_size);
#endif

  // load the gauge field
  if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] loading gauge field\n");
  loadGaugeQuda((void*)gauge_qdp, &gauge_param);
  gauge_qdp[0] = NULL; 
  gauge_qdp[1] = NULL; 
  gauge_qdp[2] = NULL; 
  gauge_qdp[3] = NULL; 

#endif

  /*********************************************
   * APE smear the gauge field
   *********************************************/
  if(N_Jacobi>0) {
    memcpy(gauge_field_smeared, g_gauge_field, 72*VOLUMEPLUSRAND*sizeof(double));
    fprintf(stdout, "# [invert_dw_quda] APE smearing gauge field with paramters N_APE=%d, alpha_APE=%e\n", N_ape, alpha_ape);
    APE_Smearing_Step_threads(gauge_field_smeared, N_ape, alpha_ape);
    xchange_gauge_field(gauge_field_smeared);
  }

  // allocate memory for the spinor fields
#ifdef HAVE_QUDA
  no_fields = 3+2;
#else
  no_fields = 6+2;
#endif
  g_spinor_field = (double**)calloc(no_fields, sizeof(double*));
  for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND*L5);
  smearing_spinor_field[0] = g_spinor_field[no_fields-2];
  smearing_spinor_field[1] = g_spinor_field[no_fields-1];

  switch(g_source_type) {
    case 0:
    case 5:
      // the source locaton
      sl0 =   g_source_location                              / (LX_global*LY_global*LZ);
      sl1 = ( g_source_location % (LX_global*LY_global*LZ) ) / (          LY_global*LZ);
      sl2 = ( g_source_location % (          LY_global*LZ) ) / (                    LZ);
      sl3 =   g_source_location %                      LZ;
      if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] global sl = (%d, %d, %d, %d)\n", sl0, sl1, sl2, sl3);
      source_proc_coords[0] = sl0 / T;
      source_proc_coords[1] = sl1 / LX;
      source_proc_coords[2] = sl2 / LY;
      source_proc_coords[3] = sl3 / LZ;
    #ifdef MPI
      MPI_Cart_rank(g_cart_grid, source_proc_coords, &g_source_proc_id);
    #else
      g_source_proc_id = 0;
    #endif
      have_source_flag = g_source_proc_id == g_cart_id;
    
      lsl0 = sl0 % T;
      lsl1 = sl1 % LX;
      lsl2 = sl2 % LY;
      lsl3 = sl3 % LZ;
      if(have_source_flag) {
        fprintf(stdout, "# [invert_dw_quda] process %d has the source at (%d, %d, %d, %d)\n", g_cart_id, lsl0, lsl1, lsl2, lsl3);
      }
      break;
    case 2:
    case 3:
    case 4:
      // the source timeslice
#ifdef MPI
      source_proc_coords[0] = g_source_timeslice / T;
      source_proc_coords[1] = 0;
      source_proc_coords[2] = 0;
      source_proc_coords[3] = 0;
      MPI_Cart_rank(g_cart_grid, source_proc_coords, &g_source_proc_id);
      have_source_flag = ( g_source_proc_id == g_cart_id );
      source_timeslice = have_source_flag ? g_source_timeslice % T : -1;
#else
      g_source_proc_id = 0;
      have_source_flag = 1;
      source_timeslice = g_source_timeslice;
#endif
      break;
  }

#ifdef HAVE_QUDA
  /*************************************************************
   * QUDA inverter parameters
   *************************************************************/
  inv_param.dslash_type    = QUDA_DOMAIN_WALL_DSLASH;

  if(strcmp(g_inverter_type_name, "cg") == 0) {
    inv_param.inv_type       = QUDA_CG_INVERTER;
    if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using cg inverter\n"); 
  } else if(strcmp(g_inverter_type_name, "bicgstab") == 0) {
    inv_param.inv_type       = QUDA_BICGSTAB_INVERTER;
    if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using bicgstab inverter\n");
#ifdef MULTI_GPU    
  } else if(strcmp(g_inverter_type_name, "gcr") == 0) {
    inv_param.inv_type       = QUDA_GCR_INVERTER;
    if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using gcr inverter\n"); 
#endif
  } else {
    if(g_cart_id==0) fprintf(stderr, "[invert_dw_quda] Error, unrecognized inverter type %s\n", g_inverter_type_name);
    EXIT(123);
  }


  if(inv_param.inv_type == QUDA_CG_INVERTER) {
    inv_param.solution_type = QUDA_MAT_SOLUTION;
    inv_param.solve_type    = QUDA_NORMEQ_PC_SOLVE;
  } else if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER) {
    inv_param.solution_type = QUDA_MAT_SOLUTION;
    inv_param.solve_type    = QUDA_DIRECT_PC_SOLVE;
  } else {
    inv_param.solution_type = QUDA_MATPC_SOLUTION;
    inv_param.solve_type    = QUDA_DIRECT_PC_SOLVE;
  }

  inv_param.m5             = g_m5;
  inv_param.kappa          = 0.5 / (5. + inv_param.m5);
  inv_param.mass           = g_m0;

  inv_param.tol            = solver_precision;
  inv_param.maxiter        = niter_max;
  inv_param.reliable_delta = reliable_delta;

#ifdef MPI
  // domain decomposition preconditioner parameters
  if(inv_param.inv_type == QUDA_GCR_INVERTER) {
    if(g_cart_id == 0) printf("# [] settup DD parameters\n");
    inv_param.gcrNkrylov     = 15;
    inv_param.inv_type_precondition = QUDA_MR_INVERTER;
    inv_param.tol_precondition = 1e-6;
    inv_param.maxiter_precondition = 200;
    inv_param.verbosity_precondition = QUDA_VERBOSE;
    inv_param.prec_precondition = cuda_prec_sloppy;
    inv_param.omega = 0.7;
  }
#endif

  inv_param.matpc_type         = QUDA_MATPC_EVEN_EVEN;
  inv_param.dagger             = QUDA_DAG_NO;
  inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION; //;QUDA_MASS_NORMALIZATION;

  inv_param.cpu_prec         = cpu_prec;
  inv_param.cuda_prec        = cuda_prec;
  inv_param.cuda_prec_sloppy = cuda_prec_sloppy;

  inv_param.verbosity = QUDA_VERBOSE;

  inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
  inv_param.dirac_order = QUDA_DIRAC_ORDER;
#ifdef MPI
  inv_param.preserve_dirac = QUDA_PRESERVE_DIRAC_YES;
  inv_param.prec_precondition = cuda_prec_sloppy;
  inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
  inv_param.dirac_tune = QUDA_TUNE_NO;
#endif
#endif

  /*******************************************
   * write initial rng state to file
   *******************************************/
  if( g_source_type==2 && g_coherent_source==2 ) {
    sprintf(rng_file_out, "%s.0", g_rng_filename);
    status = init_rng_stat_file (g_seed, rng_file_out);
    if( status != 0 ) {
      fprintf(stderr, "[invert_dw_quda] Error, could not write rng status\n");
      EXIT(210);
    }
  } else if( (g_source_type==2 /*&& g_coherent_source==1*/) || g_source_type==3 || g_source_type==4) {
    if( init_rng_state(g_seed, &g_rng_state) != 0 ) {
      fprintf(stderr, "[invert_dw_quda] Error, could initialize rng state\n");
      EXIT(211);
    }
  }

  /*******************************************
   * prepare locks for openmp
   *******************************************/
  nthreads = g_num_threads - 1;
  lck = (omp_lock_t*)malloc(nthreads * sizeof(omp_lock_t));
  if(lck == NULL) {
      EXIT_WITH_MSG(97, "[invert_dw_quda] Error, could not allocate lck\n");
  }
  // init locks
  for(i=0;i<nthreads;i++) {
    omp_init_lock(lck+i);
  }
  omp_init_lock(gen_lck);

  // check the source momenta
  if(g_source_momentum_set) {
    source_momentum = (int*)malloc(3*sizeof(int));

    if(g_source_momentum[0]<0) g_source_momentum[0] += LX_global;
    if(g_source_momentum[1]<0) g_source_momentum[1] += LY_global;
    if(g_source_momentum[2]<0) g_source_momentum[2] += LZ_global;
    fprintf(stdout, "# [invert_dw_quda] using final source momentum ( %d, %d, %d )\n", g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]);


    if(full_orbit) {
      status = make_qcont_orbits_3d_parity_avg( &qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map);
      if(status != 0) {
        if(g_cart_id==0) fprintf(stderr, "\n[invert_dw_quda] Error while creating O_3-lists\n");
        EXIT(4);
      }
      source_momentum_class = qlatt_id[g_ipt[0][g_source_momentum[0]][g_source_momentum[1]][g_source_momentum[2]]];
      source_momentum_no    = qlatt_count[source_momentum_class];
      source_momentum_runs  = source_momentum_class==0 ? 1 : source_momentum_no + 1;
      if(g_cart_id==0) fprintf(stdout, "# [] source momentum belongs to class %d with %d members, which means %d runs\n",
          source_momentum_class, source_momentum_no, source_momentum_runs);
    }
  }

  if(g_source_type == 5) {
    if(g_seq_source_momentum_set) {
      if(g_seq_source_momentum[0]<0) g_seq_source_momentum[0] += LX_global;
      if(g_seq_source_momentum[1]<0) g_seq_source_momentum[1] += LY_global;
      if(g_seq_source_momentum[2]<0) g_seq_source_momentum[2] += LZ_global;
    } else if(g_source_momentum_set) {
      g_seq_source_momentum[0] = g_source_momentum[0];
      g_seq_source_momentum[1] = g_source_momentum[1];
      g_seq_source_momentum[2] = g_source_momentum[2];
    }
    fprintf(stdout, "# [invert_dw_quda] using final sequential source momentum ( %d, %d, %d )\n",
        g_seq_source_momentum[0], g_seq_source_momentum[1], g_seq_source_momentum[2]);
  }


  /***********************************************
   * loop on spin-color-index
   ***********************************************/
  for(isc=g_source_index[0]; isc<=g_source_index[1]; isc++)
//  for(isc=g_source_index[0]; isc<=g_source_index[0]; isc++)
  {
    ispin = isc / n_c;
    icol  = isc % n_c;

    for(imom=0; imom<source_momentum_runs; imom++) {

      /***********************************************
       * set source momentum
       ***********************************************/
      if(g_source_momentum_set) {
        if(imom == 0) {
          if(full_orbit) {
            source_momentum[0] = 0;
            source_momentum[1] = 0;
            source_momentum[2] = 0;
          } else {
            source_momentum[0] = g_source_momentum[0];
            source_momentum[1] = g_source_momentum[1];
            source_momentum[2] = g_source_momentum[2];
          }
        } else {
          source_momentum[0] = qlatt_map[source_momentum_class][imom-1] / (LY_global*LZ_global);
          source_momentum[1] = ( qlatt_map[source_momentum_class][imom-1] % (LY_global*LZ_global) ) / LZ_global;
          source_momentum[2] = qlatt_map[source_momentum_class][imom-1] % LZ_global;
        }
        if(g_cart_id==0) fprintf(stdout, "# [] run no. %d, source momentum (%d, %d, %d)\n",
            imom, source_momentum[0], source_momentum[1], source_momentum[2]);
      
      }
 
      /***********************************************
       * prepare the souce
       ***********************************************/
      if(g_read_source == 0) {  // create source
        switch(g_source_type) {
          case 0:
            // point source
            if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating point source\n");
            for(ix=0;ix<L5*VOLUME;ix++) { _fv_eq_zero(g_spinor_field[0]+ix); }
            if(have_source_flag) {
              if(g_source_momentum_set) {
                phase = 2*M_PI*( source_momentum[0]*sl1/(double)LX_global + source_momentum[1]*sl2/(double)LY_global + source_momentum[2]*sl3/(double)LZ_global );
                g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol)  ] = cos(phase);
                g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol)+1] = sin(phase);
              } else {
                g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol)  ] = 1.;
              }
            }
            if(g_source_momentum_set) {
              sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d",
                  filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol, source_momentum[0], source_momentum[1], source_momentum[2]);
            } else {
              sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol);
            }
#ifdef HAVE_QUDA
            // set matpc_tpye
            source_location_5d_iseven = ( (g_iseven[g_ipt[lsl0][lsl1][lsl2][lsl3]] && ispin<n_s/2) || (!g_iseven[g_ipt[lsl0][lsl1][lsl2][lsl3]] && ispin>=n_s/2) ) ? 1 : 0;
            if(source_location_5d_iseven) {
              inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
              if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] matpc type is MATPC_EVEN_EVEN\n");
            } else {
              inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
              if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] matpc type is MATPC_ODD_ODD\n");
            }
#endif
            break;
          case 2:
            // timeslice source
            if(g_coherent_source==1) {
              if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating coherent timeslice source\n");
              status = prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_state, 1);
              if(status != 0) {
                fprintf(stderr, "[invert_dw_quda] Error from prepare source, status was %d\n", status);
#ifdef MPI
                MPI_Abort(MPI_COMM_WORLD, 123);
                MPI_Finalize();
#endif
                exit(123);
              }
              check_error(prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_state, 1),
                  "prepare_coherent_timeslice_source", NULL, 123);
              timeslice = g_coherent_source_base;
            } else {
              if(g_coherent_source==2) {
                timeslice = (g_coherent_source_base+isc*g_coherent_source_delta)%T_global;
                fprintf(stdout, "# [invert_dw_quda] Creating timeslice source\n");
                check_error(prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, timeslice, VOLUME, g_rng_state, 1),
                    "prepare_timeslice_source", NULL, 123);
              } else {
                if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source\n");
                check_error(prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_source_timeslice, VOLUME, g_rng_state, 1),
                    "prepare_timeslice_source", NULL, 124);
                timeslice = g_source_timeslice;
              }
            }
            if(g_source_momentum_set) {
              sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, 
                  timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]);
            } else {
              sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix, Nconf, timeslice, isc);
            }
            break;
          case 3:
            // timeslice sources for one-end trick (spin dilution)
            fprintf(stdout, "# [invert_dw_quda] Creating timeslice source for one-end-trick\n");
            check_error( prepare_timeslice_source_one_end(g_spinor_field[0], gauge_field_smeared, source_timeslice, source_momentum, isc%n_s, g_rng_state, \
                ( isc%n_s==(n_s-1) && imom==source_momentum_runs-1 )), "prepare_timeslice_source_one_end", NULL, 125 );
            c = N_Jacobi > 0 ? isc%n_s + n_s : isc%n_s;
            if(g_source_momentum_set) {
              sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, 
                  g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]);
            } else {
              sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c);
            }
            break;
          case 4:
            // timeslice sources for one-end trick (spin and color dilution )
            fprintf(stdout, "# [invert_dw_quda] Creating timeslice source for one-end-trick\n");
            check_error(prepare_timeslice_source_one_end_color(g_spinor_field[0], gauge_field_smeared, source_timeslice, source_momentum,\
                isc%(n_s*n_c), g_rng_state, ( isc%(n_s*n_c)==(n_s*n_c-1)  && imom==source_momentum_runs-1 )), "prepare_timeslice_source_one_end_color", NULL, 126);
            c = N_Jacobi > 0 ? isc%(n_s*n_c) + (n_s*n_c) : isc%(n_s*n_c);
            if(g_source_momentum_set) {
              sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, 
                  g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]);
            } else {
              sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c);
            }
            break;
          case 5:
            if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] preparing sequential point source\n");
            check_error( prepare_sequential_point_source (g_spinor_field[0], isc, sl0, g_seq_source_momentum, 
                  smear_source, g_spinor_field[1], gauge_field_smeared), "prepare_sequential_point_source", NULL, 33);
            sprintf(source_filename, "%s.%.4d.t%.2dx%.2d.y%.2d.z%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf,
                sl0, sl1, sl2, sl3, isc, g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]);
            break;
          default:
            fprintf(stderr, "\nError, unrecognized source type\n");
            exit(32);
            break;
        }
      } else { // read source
        switch(g_source_type) {
          case 0:  // point source
            if(g_source_momentum_set) {
              sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", \
                  filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc, source_momentum[0], source_momentum[1], source_momentum[2]);
            } else  {
              sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc);
            }
            fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename);
            check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115);
            break;
          case 2:  // timeslice source
            if(g_source_momentum_set) {
              sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf, g_source_timeslice,
                  isc, source_momentum[0], source_momentum[1], source_momentum[2]);
            } else {
              sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix2, Nconf, g_source_timeslice, isc);
            }
            fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename);
            check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115);
            break;
          default:
            check_error(1, "source type", NULL, 104);
            break;
          case -1:  // timeslice source
            sprintf(source_filename, "%s", filename_prefix2);
            fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename);
            check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115);
            break;
        }
      }  // of if g_read_source
  
      if(g_write_source) {
        check_error(write_propagator(g_spinor_field[0], source_filename, 0, g_propagator_precision), "write_propagator", NULL, 27);
      }

/***********************************************************************************************
 * here threads split: 
 ***********************************************************************************************/
      if(dummy_flag==0) strcpy(source_filename_write, source_filename);
      memcpy((void*)(smearing_spinor_field[0]), (void*)(g_spinor_field[0]), 24*VOLUME*sizeof(double));
      if(dummy_flag>0) {
        // copy only if smearing has been done; otherwise do not copy, do not invert
        if(g_cart_id==0) fprintf(stdout, "# [] copy smearing field -> g field\n");
        memcpy((void*)(g_spinor_field[0]), (void*)(smearing_spinor_field[1]), 24*VOLUME*sizeof(double));
      }

      omp_set_num_threads(g_num_threads);
#pragma omp parallel private(threadid, _2_kappa, is, ix, iy, iix, ratime, retime) shared(key,g_read_source, smear_source, N_Jacobi, kappa_Jacobi, smearing_spinor_field, g_spinor_field, nthreads, convert_sign, VOLUME, VOL3, T, L5, isc, rotate_gamma_basis, g_cart_id) firstprivate(inv_param, gauge_param, ofs)
{
      threadid = omp_get_thread_num();

  if(threadid < nthreads) {
      fprintf(stdout, "# [] proc%.2d thread%.2d starting source preparation\n", g_cart_id, threadid);

      // smearing
      if( ( !g_read_source || (g_read_source && smear_source ) ) && N_Jacobi > 0 ) {
        if(g_cart_id==0) fprintf(stdout, "#  [invert_dw_quda] smearing source with N_Jacobi=%d, kappa_Jacobi=%e\n", N_Jacobi, kappa_Jacobi);
        Jacobi_Smearing_threaded(gauge_field_smeared, smearing_spinor_field[0], smearing_spinor_field[1], kappa_Jacobi, N_Jacobi, threadid, nthreads);
      }


      /***********************************************
       * create the 5-dim. source field
       ***********************************************/
      if(convert_sign == 0) {
        spinor_4d_to_5d_threaded(smearing_spinor_field[0], smearing_spinor_field[0], threadid, nthreads);
      }  else if(convert_sign == 1 || convert_sign == -1) {
        spinor_4d_to_5d_sign_threaded(smearing_spinor_field[0], smearing_spinor_field[0], convert_sign, threadid, nthreads);
      }


      for(is=0; is<L5; is++) {
        for(it=threadid; it<T; it+=nthreads) {
          memcpy((void*)(g_spinor_field[0]+_GSI(g_ipt_5d[is][it][0][0][0])), (void*)(smearing_spinor_field[0]+_GSI(g_ipt_5d[is][it][0][0][0])), VOL3*24*sizeof(double));
        }
      }


      // reorder, multiply with g2
      for(is=0; is<L5; is++) {
        for(it=threadid; it<T; it+=nthreads) {
          for(i3=0; i3<VOL3; i3++) {
            ix = (is*T+it)*VOL3 + i3;
            _fv_eq_zero(smearing_spinor_field[1]+_GSI(ix));
      }}} 

      if(rotate_gamma_basis) {
        for(it=threadid; it<T; it+=nthreads) {
          for(i3=0; i3<VOL3; i3++) {
            ix = it * VOL3 + i3;
            iy = lexic2eot_5d(0, ix);
            _fv_eq_gamma_ti_fv(smearing_spinor_field[1]+_GSI(iy), 2, smearing_spinor_field[0]+_GSI(ix));
        }}
        for(it=threadid; it<T; it+=nthreads) {
          for(i3=0; i3<VOL3; i3++) {
            ix = it * VOL3 + i3;
            iy = lexic2eot_5d(L5-1, ix);
            _fv_eq_gamma_ti_fv(smearing_spinor_field[1]+_GSI(iy), 2, smearing_spinor_field[0]+_GSI(ix+(L5-1)*VOLUME));
        }}
      } else {
        for(it=threadid; it<T; it+=nthreads) {
          for(i3=0; i3<VOL3; i3++) {
            ix = it * VOL3 + i3;
            iy = lexic2eot_5d(0, ix);
            _fv_eq_fv(smearing_spinor_field[1]+_GSI(iy), smearing_spinor_field[0]+_GSI(ix));
        }}
        for(it=threadid; it<T; it+=nthreads) {
          for(i3=0; i3<VOL3; i3++) {
            ix = it * VOL3 + i3;
            iy = lexic2eot_5d(L5-1, ix);
            _fv_eq_fv(smearing_spinor_field[1]+_GSI(iy), smearing_spinor_field[0]+_GSI(ix+(L5-1)*VOLUME));
        }}
      }
      fprintf(stdout, "# [] proc%.2d thread%.2d finished source preparation\n", g_cart_id, threadid);

  } else if(threadid == g_num_threads-1 && dummy_flag > 0) {  // else branch on threadid
      fprintf(stdout, "# [] proc%.2d thread%.2d starting inversion for dummy_flag = %d\n", g_cart_id, threadid, dummy_flag);

      /***********************************************
       * perform the inversion
       ***********************************************/
      if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] starting inversion\n");

      xchange_field_5d(g_spinor_field[0]);
      memset(g_spinor_field[1], 0, (VOLUME+RAND)*L5*24*sizeof(double));
      ratime = CLOCK;
#ifdef MPI
      if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER  || inv_param.inv_type == QUDA_GCR_INVERTER) {
        if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling invertQuda\n");
        invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param);
      } else if(inv_param.inv_type == QUDA_CG_INVERTER) {
        if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling testCG\n");
        testCG(g_spinor_field[1], g_spinor_field[0], &inv_param);
      } else {
        if(g_cart_id==0) fprintf(stderr, "# [invert_dw_quda] unrecognized inverter\n");
      }
#else
      invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param);
#endif
      retime = CLOCK;

      if(g_cart_id==0) {
        fprintf(stdout, "# [invert_dw_quda] QUDA time:  %e seconds\n", inv_param.secs);
        fprintf(stdout, "# [invert_dw_quda] QUDA Gflops: %e\n", inv_param.gflops/inv_param.secs);
        fprintf(stdout, "# [invert_dw_quda] wall time:  %e seconds\n", retime-ratime);
        fprintf(stdout, "# [invert_dw_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n",
        inv_param.spinorGiB, gauge_param.gaugeGiB);
      }
  }  // of if threadid

// wait till all threads are here
#pragma omp barrier

      if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) {
        _2_kappa = 2. * g_kappa5d;
        for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) {
          _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa );
        }
      }
  
#pragma omp barrier
      // reorder, multiply with g2
      for(is=0;is<L5;is++) {
      for(ix=threadid; ix<VOLUME; ix+=g_num_threads) {
        iy  = lexic2eot_5d(is, ix);
        iix = is*VOLUME + ix;
        _fv_eq_fv(g_spinor_field[0]+_GSI(iix), g_spinor_field[1]+_GSI(iy));
      }}
#pragma omp barrier
      if(rotate_gamma_basis) {
        for(ix=threadid; ix<VOLUME*L5; ix+=g_num_threads) {
          _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix));
        }
      } else {
        for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) {
          _fv_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix));
        }
      }
      if(g_cart_id==0 && threadid==g_num_threads-1) fprintf(stdout, "# [invert_dw_quda] inversion done in %e seconds\n", retime-ratime);

#pragma omp single
  {

#ifdef MPI
      xchange_field_5d(g_spinor_field[1]);
#endif
      /***********************************************
       * check residuum
       ***********************************************/
      if(check_residuum && dummy_flag>0) {
        // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg,
        //   which uses the tmLQCD conventions (same as in contractions)
        //   without explicit boundary conditions
#ifdef MPI
        xchange_field_5d(g_spinor_field[2]);
        xchange_field_5d(g_spinor_field[1]);
#endif
        memset(g_spinor_field[0], 0, 24*(VOLUME+RAND)*L5*sizeof(double));

        //sprintf(filename, "%s.inverted.ascii.%.2d", source_filename, g_cart_id);
        //ofs = fopen(filename, "w");
        //printf_spinor_field_5d(g_spinor_field[1], ofs);
        //fclose(ofs);

        Q_DW_Wilson_phi(g_spinor_field[0], g_spinor_field[1]);
  
        for(ix=0;ix<VOLUME*L5;ix++) {
          _fv_mi_eq_fv(g_spinor_field[0]+_GSI(ix), g_spinor_field[2]+_GSI(ix));
        }
  
        spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5);
        spinor_scalar_product_re(&norm, g_spinor_field[0], g_spinor_field[0], VOLUME*L5);
        if(g_cart_id==0) fprintf(stdout, "\n# [invert_dw_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) );

      }
  
      if(dummy_flag>0) {
        /***********************************************
         * create 4-dim. propagator
         ***********************************************/
        if(convert_sign == 0) {
          spinor_5d_to_4d(g_spinor_field[1], g_spinor_field[1]);
        } else if(convert_sign == -1 || convert_sign == +1) {
          spinor_5d_to_4d_sign(g_spinor_field[1], g_spinor_field[1], convert_sign);
        }
  
        /***********************************************
         * write the solution 
         ***********************************************/
        sprintf(filename, "%s.inverted", source_filename_write);
        if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] writing propagator to file %s\n", filename);
        check_error(write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision), "write_propagator", NULL, 22);
        
        //sprintf(filename, "prop.ascii.4d.%.2d.%.2d.%.2d", isc, g_nproc, g_cart_id);
        //ofs = fopen(filename, "w");
        //printf_spinor_field(g_spinor_field[1], ofs);
        //fclose(ofs);
      }

      if(check_residuum) memcpy(g_spinor_field[2], smearing_spinor_field[0], 24*VOLUME*L5*sizeof(double));

  }  // of omp single

}    // of omp parallel region

      if(dummy_flag > 0) strcpy(source_filename_write, source_filename);

      dummy_flag++;
 
    }  // of loop on momenta

  }  // of isc

#if 0
  // last inversion

  {
      memcpy(g_spinor_field[0], smearing_spinor_field[1], 24*VOLUME*L5*sizeof(double));
      if(g_cart_id==0) fprintf(stdout, "# [] proc%.2d starting last inversion\n", g_cart_id);


      /***********************************************
       * perform the inversion
       ***********************************************/
      if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] starting inversion\n");

      xchange_field_5d(g_spinor_field[0]);
      memset(g_spinor_field[1], 0, (VOLUME+RAND)*L5*24*sizeof(double));
      ratime = CLOCK;
#ifdef MPI
      if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER  || inv_param.inv_type == QUDA_GCR_INVERTER) {
        if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling invertQuda\n");
        invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param);
      } else if(inv_param.inv_type == QUDA_CG_INVERTER) {
        if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling testCG\n");
        testCG(g_spinor_field[1], g_spinor_field[0], &inv_param);
      } else {
        if(g_cart_id==0) fprintf(stderr, "# [invert_dw_quda] unrecognized inverter\n");
      }
#else
      invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param);
#endif
      retime = CLOCK;

      if(g_cart_id==0) {
        fprintf(stdout, "# [invert_dw_quda] QUDA time:  %e seconds\n", inv_param.secs);
        fprintf(stdout, "# [invert_dw_quda] QUDA Gflops: %e\n", inv_param.gflops/inv_param.secs);
        fprintf(stdout, "# [invert_dw_quda] wall time:  %e seconds\n", retime-ratime);
        fprintf(stdout, "# [invert_dw_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n",
        inv_param.spinorGiB, gauge_param.gaugeGiB);
      }

      omp_set_num_threads(g_num_threads);
#pragma omp parallel private(threadid,_2_kappa,is,ix,iy,iix) shared(VOLUME,L5,g_kappa,g_spinor_field,g_num_threads)
    {
      threadid = omp_get_thread_num();

      if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) {
        _2_kappa = 2. * g_kappa5d;
        for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) {
          _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa );
        }
      }
#pragma omp barrier
      // reorder, multiply with g2
      for(is=0;is<L5;is++) {
      for(ix=threadid; ix<VOLUME; ix+=g_num_threads) {
        iy  = lexic2eot_5d(is, ix);
        iix = is*VOLUME + ix;
        _fv_eq_fv(g_spinor_field[0]+_GSI(iix), g_spinor_field[1]+_GSI(iy));
      }}
#pragma omp barrier
      if(rotate_gamma_basis) {
        for(ix=threadid; ix<VOLUME*L5; ix+=g_num_threads) {
          _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix));
        }
      } else {
        for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) {
          _fv_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix));
        }
      }

    }  // end of parallel region

    if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] inversion done in %e seconds\n", retime-ratime);


#ifdef MPI
      xchange_field_5d(g_spinor_field[1]);
#endif
      /***********************************************
       * check residuum
       ***********************************************/
      if(check_residuum && dummy_flag>0) {
        // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg,
        //   which uses the tmLQCD conventions (same as in contractions)
        //   without explicit boundary conditions
#ifdef MPI
        xchange_field_5d(g_spinor_field[2]);
#endif
        memset(g_spinor_field[0], 0, 24*(VOLUME+RAND)*L5*sizeof(double));

        //sprintf(filename, "%s.inverted.ascii.%.2d", source_filename, g_cart_id);
        //ofs = fopen(filename, "w");
        //printf_spinor_field_5d(g_spinor_field[1], ofs);
        //fclose(ofs);


        Q_DW_Wilson_phi(g_spinor_field[0], g_spinor_field[1]);
  
        for(ix=0;ix<VOLUME*L5;ix++) {
          _fv_mi_eq_fv(g_spinor_field[0]+_GSI(ix), g_spinor_field[2]+_GSI(ix));
        }
  
        spinor_scalar_product_re(&norm, g_spinor_field[0], g_spinor_field[0], VOLUME*L5);
        spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5);
        if(g_cart_id==0) fprintf(stdout, "\n# [invert_dw_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) );

      }
  
      /***********************************************
       * create 4-dim. propagator
       ***********************************************/
      if(convert_sign == 0) {
        spinor_5d_to_4d(g_spinor_field[1], g_spinor_field[1]);
      } else if(convert_sign == -1 || convert_sign == +1) {
        spinor_5d_to_4d_sign(g_spinor_field[1], g_spinor_field[1], convert_sign);
      }
  
      /***********************************************
       * write the solution 
       ***********************************************/
      sprintf(filename, "%s.inverted", source_filename_write);
      if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] writing propagator to file %s\n", filename);
      check_error(write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision), "write_propagator", NULL, 22);
        
      //sprintf(filename, "prop.ascii.4d.%.2d.%.2d.%.2d", isc, g_nproc, g_cart_id);
      //ofs = fopen(filename, "w");
      //printf_spinor_field(g_spinor_field[1], ofs);
      //fclose(ofs);
  }  // of last inversion

#endif  // of if 0

  /***********************************************
   * free the allocated memory, finalize 
   ***********************************************/

#ifdef HAVE_QUDA
  // finalize the QUDA library
  if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] finalizing quda\n");
#ifdef MPI
  freeGaugeQuda();
#endif
  endQuda();
#endif
  if(g_gauge_field != NULL) free(g_gauge_field);
  if(gauge_field_smeared != NULL) free(gauge_field_smeared);
  if(no_fields>0) {
    if(g_spinor_field!=NULL) {
      for(i=0; i<no_fields; i++) if(g_spinor_field[i]!=NULL) free(g_spinor_field[i]);
      free(g_spinor_field);
    }
  }
  free_geometry();

  if(g_source_momentum_set && full_orbit) {
    finalize_q_orbits(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_rep);
    if(qlatt_map != NULL) {
      free(qlatt_map[0]);
      free(qlatt_map);
    }
  }
  if(source_momentum != NULL) free(source_momentum);
  if(lck != NULL) free(lck);


#ifdef MPI
#ifdef HAVE_QUDA
  endCommsQuda();
#else
  MPI_Finalize();
#endif
#endif
  if(g_cart_id==0) {
    g_the_time = time(NULL);
    fprintf(stdout, "\n# [invert_dw_quda] %s# [invert_dw_quda] end of run\n", ctime(&g_the_time));
    fprintf(stderr, "\n# [invert_dw_quda] %s# [invert_dw_quda] end of run\n", ctime(&g_the_time));
  }
  return(0);
}
示例#26
0
int
q_alignOfTypeEncode(const char* type) {
    type = q_skipVarNameEncode(type);

    switch (*type) {
    case Q_C_ID:
        return __alignof(id);
    case Q_C_CLASS:
        return __alignof(Class);
    case Q_C_SEL:
        return __alignof(SEL);
    case Q_C_CHR:
        return __alignof(char);
    case Q_C_UCHR:
        return __alignof(unsigned char);
    case Q_C_SHT:
        return __alignof(short);
    case Q_C_USHT:
        return __alignof(unsigned short);
    case Q_C_INT:
        return __alignof(int);
    case Q_C_UINT:
        return __alignof(unsigned int);
    case Q_C_LNG:
        return __alignof(long);
    case Q_C_ULNG:
        return __alignof(unsigned long);
    case Q_C_FLT:
        return __alignof(float);
    case Q_C_DBL:
        return __alignof(double);
    case Q_C_VOID:
        return 0;
    case Q_C_PTR:
    case Q_C_CHARPTR:
        return __alignof(char*);
    case Q_C_ARY_B:
        while (isdigit(*++type)); // empty loop
        return q_alignOfTypeEncode(type);
    case Q_C_STRUCT_B: {
        struct _StructLayout    layout;
        unsigned int            align;

        q_layoutStructBeginEncode(type, &layout);
        while (q_layoutStructNextEncode(&layout)); // empty loop
        q_layoutStructEndEncode(&layout, nil, &align);
        return align;
    }
    case Q_C_UNION_B: {
        int maxaling = 0;
        while (*type != Q_C_UNION_E && *type++ != '='); // empty loop
        while (*type != Q_C_UNION_E) {
            type = q_skipVarNameEncode(type);
            maxaling = _MAX(maxaling, q_alignOfTypeEncode(type));
            type = q_skipTypeSpecEncode(type);
        }
        return maxaling;
    }
    default:
        q_throwError(er1, type);
    }
    return 0;
}