void GameScene::guiUpdate(float dt) { // Points handling pointsLabel->setString("Score: " + std::to_string(points)); pointsLabel->setPosition(10 + pointsLabel->getBoundingBox().size.width / 2, 20); // Bars update mHpBarFill->setTextureRect(cocos2d::Rect(0.f, 0.f, _MAX(_MIN(mPlayer->getHp() / (float)Entities::maxHP * mHpBarBorder->getBoundingBox().size.width, mHpBarBorder->getBoundingBox().size.width), 0), 28.f)); mRageBarFill->setTextureRect(cocos2d::Rect(0.f, 0.f, _MAX(_MIN(mPlayer->getRage() / (float)Entities::rageCharging * mRageBarBorder->getBoundingBox().size.width, mRageBarBorder->getBoundingBox().size.width), 0), 28.f)); // Change difficulty if condition true switch (currentDifficulty) { case Enemies::EASY: { if (points >= Enemies::MEDIUM_CONDITION) currentDifficulty = Enemies::MEDIUM; break; } case Enemies::MEDIUM: { if (points >= Enemies::HARD_CONDITION) currentDifficulty = Enemies::HARD; break; } } Entities::playerShootingFreq = _MAX(0.1f, 0.2f - (float)((points / 2000) / 100.f)); }
void InitADERDG(ADERDG* adg, double xmin, double xmax) { adg->xmin = xmin; adg->xmax = xmax; for(int i = 0; i < _NBFACES; i++) { double xh = (double)i / _NBELEMS_IN; double x = stretching(xh); adg->face[i] = xmin + x * (xmax - xmin); //printf("i=%d x=%f\n",i, adg->face[i]); } adg->dx = 0.0; for(int ie = 1; ie <= _NBELEMS_IN; ie++) { adg->dx = _MAX(adg->dx, adg->face[ie] - adg->face[ie - 1]); } adg->cfl = _CFL; adg->dt = adg->cfl * adg->dx; // TODO: put the velocity adg->ncfl = 0; for(int ie = 1; ie <= _NBELEMS_IN; ie++) { adg->cell_level[ie] = (int) (log(adg->dx / (adg->face[ie] - adg->face[ie-1])) / log(2)); adg->ncfl = _MAX ( adg->ncfl , adg->cell_level[ie] ); } // convention: first and last cell are among the biggest elements adg->cell_level[0] = 0; adg->cell_level[_NBELEMS_IN + 1] = 0; printf("found %d cfl levels\n", adg->ncfl); adg->dt_small = adg->dt / (1 << adg->ncfl); printf("small dt=%f max cell size=%f\n", adg->dt_small, adg->dx); for(int ie = 1; ie <= _NBELEMS_IN; ie++) { for(int j = 0;j < _NGLOPS; j++) { double h = adg->face[ie] - adg->face[ie-1]; double x = adg->face[ie-1] + h * gauss_lob_point[gauss_lob_offset[_D] + j]; ExactSol(x, 0, adg->wnow[ie][j]); } } for(int ie = 1;ie <= _NBELEMS; ie++){ for(int j = 0;j < _NGLOPS; j++){ for(int iv = 0; iv < _M; iv++){ adg->wnext[ie][j][iv] = adg->wnow[ie][j][iv]; } } } for(int i=0;i<_NBFACES;i++){ adg->face_level[i]=_MAX(adg->cell_level[i],adg->cell_level[i+1]); } }
bool TimeManager::IsDiffDay20(time_t uPrevANSITime, time_t uNextANSITime) { time_t tTime1 = uPrevANSITime - 72000; time_t tTime2 = uNextANSITime - 72000; tTime1 = _MAX(tTime1, 0); tTime2 = _MAX(tTime2, 0); return IsDiffDay00(tTime1, tTime2); }
bool TimeManager::IsDiffDay18(time_t uPrevANSITime, time_t uNextANSITime) { //都减去18小时,判断是否跨0点 time_t tTime1 = uPrevANSITime - 64800; time_t tTime2 = uNextANSITime - 64800; tTime1 = _MAX(tTime1, 0); tTime2 = _MAX(tTime2, 0); return IsDiffDay00(tTime1, tTime2); }
BOOL q_layoutStructNextEncode(StructLayout layout) { unsigned int desired_align; const char* type; // add size of previous layout if (layout->prev_type) { type = q_skipVarQualifiersEncode(layout->prev_type); if (*type == Q_C_BFLD) while (isdigit(*++type)); // empty loop layout->offset += q_sizeOfTypeEncode(type) * kBitsPerUnit; } if (*layout->cur_type == Q_C_STRUCT_E) return NO; layout->cur_type = q_skipVarNameEncode(layout->cur_type); type = q_skipVarQualifiersEncode(layout->cur_type); if (*type == Q_C_BFLD) while (isdigit(*++type)); // empty loop desired_align = q_alignOfTypeEncode(type) * kBitsPerUnit; layout->align = _MAX(layout->align, desired_align); if (layout->offset % desired_align != 0) // skip space before this field // bump the cumulative size to multiple of field alignment layout->offset = _ROUND(layout->offset, desired_align); layout->prev_type = layout->cur_type; layout->cur_type = q_skipTypeSpecEncode(layout->cur_type); return YES; }
/** * @function GUI_W_UsrEntryDelete * @brief Delete the selected string part (if any), or the car corresponding to insert line position * @param void *_g_obj: generic object * @return none */ void GUI_W_UsrEntryDelete(g_obj_st *obj) { uint16_t from, to; usr_entry_st *entry; if(obj != NULL && obj->draw == EntryDraw) { entry = (usr_entry_st *) obj->obj; if(entry->bEditable) { from = _MIN(entry->cursStart, entry->cursStop); to = _MAX(entry->cursStart, entry->cursStop); /*delecte the selection, or car located before the insert line*/ StrDelete(entry->buffer, entry->sizeMax, from, to); /*if there was no user selection, decrease the position of the insert bar*/ if(from == to && from > 0) { entry->cursStart--; entry->cursStop--; } /*always ensure that the resulting insert line is bounded into the string size*/ entry->len = gstrlen(entry->buffer); if(entry->len < entry->cursStart) entry->cursStart = entry->len; /*after a deletion, set insert line to the lower cursor; since insert line -> entry->cursStop = entry->cursStart*/ entry->cursStart = _MIN(entry->cursStart, entry->cursStop); entry->cursStop = entry->cursStart; /*force refresh*/ GUI_ObjSetNeedRefresh(obj, true); } } }
static void collect_results(struct benchmark_config *config, struct work_queue *queue, unsigned long long start, unsigned long long elapsed) { int i; unsigned long long sum[2] = { 0, 0 }, min[2] = { ULONG_MAX, ULONG_MAX }; unsigned long long max[2] = { 0, 0 }, avg[2]; for (i = 0; i < config->num_works; i++) { struct work *work = work_queue_pop(queue); sum[0] += work->elapsed[0]; sum[1] += work->elapsed[1]; min[0] = _MIN(min[0], work->elapsed[0]); min[1] = _MIN(min[1], work->elapsed[1]); max[0] = _MAX(max[0], work->elapsed[0]); max[1] = _MAX(max[1], work->elapsed[1]); if (config->verbose > 1) { printf( "%lld.%03lld %lld.%03lld %lld.%03lld %lld.%03lld\n", (work->start[0] - start) / 1000000, (work->start[0] - start) / 1000 % 1000, work->elapsed[0] / 1000000, work->elapsed[0] / 1000 % 1000, (work->start[1] - start) / 1000000, (work->start[1] - start) / 1000 % 1000, work->elapsed[1] / 1000000, work->elapsed[1] / 1000 % 1000); } free(work); } avg[0] = sum[0] / config->num_works; avg[1] = sum[1] / config->num_works; if (config->verbose > 0) { printf( "# %lld.%03lld %lld.%03lld %lld.%03lld %lld.%03lld %lld.%03lld %lld.%03lld\n", avg[0] / 1000000, avg[0] / 1000 % 1000, min[0] / 1000000, min[0] / 1000 % 1000, max[0] / 1000000, max[0] / 1000 % 1000, avg[1] / 1000000, avg[1] / 1000 % 1000, min[1] / 1000000, min[1] / 1000 % 1000, max[1] / 1000000, max[1] / 1000 % 1000); } }
static void benchmark(void) { int i; long sum = 0; long min_ms = LONG_MAX, max_ms = 0, avg_ms; pthread_t *tid; struct benchmark_thread_data *data; #ifdef CHUNKD_BENCHMARK stc_init(); #endif tid = xmalloc(sizeof(tid[0]) * threads); data = xmalloc(sizeof(data[0]) * threads); for (i = 0; i < threads; i++) { data[i].id = i; xpthread_create(&tid[i], NULL, benchmark_thread, &data[i]); } wait_threads(tid, threads); #define _MIN(a, b) ((a) < (b) ? (a) : (b)) #define _MAX(a, b) ((a) < (b) ? (b) : (a)) for (i = 0; i < threads; i++) { long ms = data[i].time_ms; sum += ms; min_ms = _MIN(min_ms, ms); max_ms = _MAX(max_ms, ms); } avg_ms = sum / threads; printf("%d %ld.%03ld %ld.%03ld %ld.%03ld\n", threads, avg_ms / 1000, avg_ms % 1000, min_ms / 1000, min_ms % 1000, max_ms / 1000, max_ms % 1000); if (verbose) { unsigned long long total_bytes; unsigned long long bytes_per_msec; total_bytes = value_length; total_bytes *= threads; total_bytes *= requests; bytes_per_msec = total_bytes / avg_ms; printf("Throughput: %llu KB/sec\n", bytes_per_msec * 1000UL / 1024UL); } free(data); free(tid); }
void q_layoutStructEndEncode(StructLayout layout, int* size, unsigned int* align) { if (layout->cur_type && *layout->cur_type == Q_C_STRUCT_E) { layout->align = _MAX(1, layout->align); layout->offset = _ROUND(layout->offset, layout->align); layout->cur_type = nil; } if (size) *size = layout->offset / kBitsPerUnit; if (align) *align = layout->align / kBitsPerUnit; }
CBotNeuralNet :: CBotNeuralNet ( unsigned short int numinputs, unsigned short int numhiddenlayers, unsigned short int neuronsperhiddenlayer, unsigned short int numoutputs, ga_nn_value learnrate) { register unsigned short int i; register unsigned short int j; m_pOutputs = new CLogisticalNeuron[numoutputs]; m_pHidden = new CLogisticalNeuron*[numhiddenlayers]; m_layerinput = new ga_nn_value[_MAX(numinputs,neuronsperhiddenlayer)]; m_layeroutput = new ga_nn_value[_MAX(numoutputs,_MAX(numinputs,neuronsperhiddenlayer))]; for ( j = 0; j < numhiddenlayers; j ++ ) { m_pHidden[j] = new CLogisticalNeuron[neuronsperhiddenlayer]; for ( i = 0; i < neuronsperhiddenlayer; i ++ ) { if ( j == 0 ) m_pHidden[j][i].init(numinputs,learnrate); else m_pHidden[j][i].init(neuronsperhiddenlayer,learnrate); } } for ( i = 0; i < numoutputs; i ++ ) m_pOutputs[i].init(neuronsperhiddenlayer,learnrate); //m_transferFunction = new CSigmoidTransfer (); m_numInputs = numinputs; m_numOutputs = numoutputs; m_numHidden = neuronsperhiddenlayer; m_numHiddenLayers = numhiddenlayers; }
void _hbuf_reserve(struct hdfs_heap_buf *h, size_t space) { int remain, toalloc; remain = h->size - h->used; if ((size_t)remain >= space) return; toalloc = _MAX(32, space - remain + 16); h->buf = realloc(h->buf, h->size + toalloc); ASSERT(h->buf); h->size += toalloc; }
void q_layoutStructBeginEncode(const char* type, StructLayout layout) { const char* ntype; if (*type++ != Q_C_STRUCT_B) q_throwError(er2, type); ntype = type; while (*ntype != Q_C_STRUCT_E && *ntype != Q_C_STRUCT_B && *ntype != Q_C_UNION_B && *ntype++ != '='); // empty loop if (*(ntype - 1) == '=') type = ntype; layout->cur_type = type; layout->prev_type = nil; layout->offset = 0; layout->align = _MAX(kBitsPerUnit, STRUCT_SIZE_BOUNDARY); }
void NavDialog::CreateBitmap() { RECT r; ::GetClientRect(_hSelf, &r); const int maxLines = _MAX(m_view[0].m_lines, m_view[1].m_lines); const int maxHeight = (r.bottom - r.top) - 2 * cSpace - 2; int reductionRatio = m_compact ? maxLines / maxHeight : 0; if (reductionRatio && (maxLines % maxHeight)) ++reductionRatio; m_view[0].create(m_clr, reductionRatio); m_view[1].create(m_clr, reductionRatio); SetScalingFactor(); }
bool IsPointInContour(int x, int y, TVAZone* contour) { int count = 0; for (int i = 0; i < contour->NumPoints; i++) { int j = (i+1)%contour->NumPoints; //горизонтальный отрезок. if ((int)contour->Points[i].Y == (int)contour->Points[j].Y) continue; //отрезок выше от луча else if (contour->Points[i].Y > y && contour->Points[j].Y > y) continue; //отрезок ниже от луча else if (contour->Points[i].Y < y && contour->Points[j].Y < y) continue; // отрезок справа от луча else if ((int)_MIN(contour->Points[i].X,contour->Points[j].X) > x) continue; //нижняя граница на луче else if ((int)_MIN(contour->Points[i].Y,contour->Points[j].Y) == (int)y) continue; //верхняя граница отрезка на луче else if ((int)_MAX(contour->Points[i].Y,contour->Points[j].Y) == (int)y) { double x1 = contour->Points[i].Y > contour->Points[j].Y ? contour->Points[i].X : contour->Points[j].X; if (x > x1) count++; } else { double k,b; k = (contour->Points[j].Y - contour->Points[i].Y) / (contour->Points[j].X - contour->Points[i].X); b = contour->Points[j].Y - k*contour->Points[j].X; // точка пересечения. double t; t = (y - b) / k; if (t < x ) count++; } } return count & 1; }
void NavDialog::SetScalingFactor() { m_view[0].m_lines = ::SendMessage(m_view[0].m_hView, SCI_GETLINECOUNT, 0, 0); m_view[1].m_lines = ::SendMessage(m_view[1].m_hView, SCI_GETLINECOUNT, 0, 0); m_view[0].m_firstVisible = ::SendMessage(m_view[0].m_hView, SCI_GETFIRSTVISIBLELINE, 0, 0); m_view[1].m_firstVisible = ::SendMessage(m_view[1].m_hView, SCI_GETFIRSTVISIBLELINE, 0, 0); m_maxBmpLines = _MAX(m_view[0].maxBmpLines(), m_view[1].maxBmpLines()); m_syncView = (m_maxBmpLines == m_view[0].maxBmpLines()) ? &m_view[0] : &m_view[1]; RECT r; ::GetClientRect(_hSelf, &r); m_navViewWidth = ((r.right - r.left) - 3 * cSpace - 4) / 2; m_navHeight = (r.bottom - r.top) - 2 * cSpace - 2; m_pixelsPerLine = m_navHeight / m_maxBmpLines; if (m_pixelsPerLine == 0) { m_pixelsPerLine = 1; ShowScroller(r); } else { if (m_pixelsPerLine > 5) m_pixelsPerLine = 5; m_navHeight = m_pixelsPerLine * m_maxBmpLines; if (m_hScroll) ::ShowScrollBar(m_hScroll, SB_CTL, FALSE); } updateScroll(); updateDockingDlg(); if (isVisible()) ::InvalidateRect(_hSelf, NULL, TRUE); }
/* accepts password file */ static int find_strchr (char *username, char *file) { FILE *fd; char *pos; char line[5 * 1024]; unsigned int i; fd = fopen (file, "r"); if (fd == NULL) { fprintf (stderr, "Cannot open file '%s'\n", file); return -1; } while (fgets (line, sizeof (line), fd) != NULL) { /* move to first ':' */ i = 0; while ((line[i] != ':') && (line[i] != '\0') && (i < sizeof (line))) { i++; } if (strncmp (username, line, _MAX (i, strlen (username))) == 0) { /* find the index */ pos = strrchr (line, ':'); pos++; fclose (fd); return atoi (pos); } } fclose (fd); return -1; }
/** * @function GUI_W_UsrEntryGetSelection * @brief copy the selected string part into a given buffer * @param void *_g_obj: generic object * @param uint8_t *buffer: user buffer; will be \0 terminated * @param uint16_t bufSize: size of the user buffer * @param void *_g_obj: generic object * @return none */ void GUI_W_UsrEntryGetSelection(g_obj_st *obj, uint8_t *buffer, uint16_t bufSize) { usr_entry_st *entry; uint16_t pos, end; if(obj != NULL && obj->draw == EntryDraw && buffer != NULL && bufSize > 0) { entry = (usr_entry_st *) obj->obj; /*copy the highlighted text of the entry in the user buffer*/ pos = _MIN(entry->cursStart, entry->cursStop); end = _MAX(entry->cursStart, entry->cursStop); while(bufSize > 1 && pos < end) { *buffer = entry->buffer[pos]; pos++; buffer++; bufSize--; } /*always be sure that str is '\0' terminated*/ *buffer = 0; } }
int q_sizeOfTypeEncode(const char* type) { type = q_skipVarNameEncode(type); type = q_skipVarQualifiersEncode(type); switch (*type) { case Q_C_ID: return sizeof(id); case Q_C_CLASS: return sizeof(Class); case Q_C_SEL: return sizeof(SEL); case Q_C_CHR: return sizeof(char); case Q_C_UCHR: return sizeof(unsigned char); case Q_C_SHT: return sizeof(short); case Q_C_USHT: return sizeof(unsigned short); case Q_C_INT: return sizeof(int); case Q_C_UINT: return sizeof(unsigned int); case Q_C_LNG: return sizeof(long); case Q_C_ULNG: return sizeof(unsigned long); case Q_C_FLT: return sizeof(float); case Q_C_DBL: return sizeof(double); case Q_C_VOID: return 0; case Q_C_PTR: case Q_C_CHARPTR: return sizeof(char*); case Q_C_ARY_B: { int len = atoi(type + 1); while (isdigit(*++type)); // empty loop return len * q_sizeOfTypeEncode(type); } case Q_C_BFLD: return atoi(type + 1) / kBitsPerUnit; case Q_C_STRUCT_B: { struct _StructLayout layout; unsigned int size; q_layoutStructBeginEncode(type, &layout); while (q_layoutStructNextEncode(&layout)); // empty loop q_layoutStructEndEncode(&layout, &size, nil); return size; } case Q_C_UNION_B: { int max_size = 0; while (*type != Q_C_UNION_E && *type++ != '='); // empty loop while (*type != Q_C_UNION_E) { type = q_skipVarNameEncode(type); max_size = _MAX(max_size, q_sizeOfTypeEncode(type)); type = q_skipTypeSpecEncode(type); } return max_size; } default: q_throwError(er1, type); } return 0; }
// Symmetric tridiagonal QL algorithm. static void tql2(double V[3][3], double d[3], double e[3]) { // This is derived from the Algol procedures tql2, by // Bowdler, Martin, Reinsch, and Wilkinson, Handbook for // Auto. Comp., Vol.ii-Linear Algebra, and the corresponding // Fortran subroutine in EISPACK. e[0] = e[1]; e[1] = e[2]; e[2] = 0.0; double f = 0.0; double tst1 = 0.0; double eps = pow(2.0,-52.0); for (int l = 0; l < n; l++) { // Find small subdiagonal element tst1 = _MAX(tst1, fabs(d[l]) + fabs(e[l])); int m = l; while (m < n) { if ( fabs(e[m]) <= eps*tst1 ) break; m++; } // If m == l, d[l] is an eigenvalue, // otherwise, iterate. if (m > l) { int iter = 0; do { iter = iter + 1; // (Could check iteration count here.) // Compute implicit shift double g = d[l]; double p = (d[l+1] - g) / (2.0 * e[l]); double r = hypot2(p,1.0); if (p < 0) { r = -r; } d[l] = e[l] / (p + r); d[l+1] = e[l] * (p + r); double dl1 = d[l+1]; double h = g - d[l]; for (int i = l+2; i < n; i++) { d[i] -= h; } f = f + h; // Implicit QL transformation. p = d[m]; double c = 1.0; double c2 = c; double c3 = c; double el1 = e[l+1]; double s = 0.0; double s2 = 0.0; for (int i = m-1; i >= l; --i) { c3 = c2; c2 = c; s2 = s; g = c * e[i]; h = c * p; r = hypot2(p,e[i]); e[i+1] = s * r; s = e[i] / r; c = p / r; p = c * d[i] - s * g; d[i+1] = h + s * (c * g + s * d[i]); // Accumulate transformation. for (int k = 0; k < n; ++k) { h = V[k][i+1]; V[k][i+1] = s * V[k][i] + c * h; V[k][i] = c * V[k][i] - s * h; } } p = -s * s2 * c3 * el1 * e[l] / dl1; e[l] = s * p; d[l] = c * p; // Check for convergence. } while (fabs(e[l]) > eps*tst1); } d[l] = d[l] + f; e[l] = 0.0; } // Sort eigenvalues and corresponding vectors. for (int i = 0; i < n-1; i++) { int k = i; double p = d[i]; for (int j = i+1; j < n; j++) { if (d[j] < p) { k = j; p = d[j]; } } if (k != i) { d[k] = d[i]; d[i] = p; for (int j = 0; j < n; j++) { p = V[j][i]; V[j][i] = V[j][k]; V[j][k] = p; } } } }
// Sort the bunch in ascending z (ct) order, and return // a vector of iterators which point to the equal-spaced // bin boundaries defines by zmin to zmax in steps of dz // // Returns the number of particles removed from tails // i.e. z<zmin || z>=zmax // // hdp contains the derivative of the distribution // calculated using the Savitzky-Golay filter c // If c is empty, then the derivative will be zero. size_t ParticleBinList(ParticleBunch& bunch, double zmin, double zmax, size_t nbins, vector<ParticleBunch::iterator>& pbins, vector<double>& hd, vector<double>& hdp, vector<double>* c) { //cout << "In ParticleBinList" << endl; //cout << zmin << "\t" << zmax << "\t" << nbins << endl; double dz = (zmax-zmin)/double(nbins); vector<ParticleBunch::iterator> bins; vector<double> hbins(nbins,0); bins.reserve(nbins+1); bunch.SortByCT(); size_t lost = TruncateZ(bunch,zmin,zmax); ParticleBunch::iterator p = bunch.begin(); bins.push_back(p); double z=zmin; double total=0; size_t n; for(n=0; n<nbins; n++) { z+=dz; while(p!=bunch.end() && p->ct()<z) { total++; hbins[n]++; p++; } bins.push_back(p); } if(p!=bunch.end()) { #ifdef ENABLE_MPI cerr << "bad slicing in rank: " << MPI::COMM_WORLD.Get_rank() << endl; #endif #ifndef ENABLE_MPI cerr << "bad slicing" << endl; #endif cerr << "z = " << z << " ct = " << p->ct() << " zmax = " << zmax << endl; //Dump out the bad bunch /* ofstream* badbunch = new ofstream("badbunch.bunch"); bunch.Output(*badbunch); badbunch->close(); delete badbunch; cerr << "Output of the current bunch is to badbunch.bunch" << endl; */ #ifndef ENABLE_MPI abort(); #endif #ifdef ENABLE_MPI MPI::COMM_WORLD.Abort(1); #endif } // bins.push_back(p); // should be end() // normalise distribution // and apply filter vector<double> fbins(nbins,0); vector<double> fpbins(nbins,0); double a = 1/total/dz; int w = c ? (c->size()-1)/2 : 0; size_t m; for(n=0; n<nbins; n++) { fbins[n] = hbins[n]*a; if(c) //for(m=_MAX(0,int(n)-w); m<=_MIN(nbins,int(n)+w); m++)// ERROR! m can be set to nbins -> out of range! for(m=_MAX(0,int(n)-w); m<_MIN(nbins,size_t(n)+w); m++) // This needs to be checked! { fpbins[n] += hbins[m]*(*c)[m-n+w]*a; } } pbins.swap(bins); hd.swap(fbins); hdp.swap(fpbins); return lost; }
void process_main() { create("start_proc"); g_sim.m_num_CSIM_process++; fprintf(stderr, "started simulation.\n"); // simulation progress verbose if (g_cfg.sim_show_progress) process_sim_progress(); // router for (unsigned int i=0; i<g_Router_vec.size(); i++) { process_router(g_Router_vec[i]); } // input/output NI for (unsigned int n=0; n<g_NIInput_vec.size(); n++) { switch (g_cfg.NIin_type) { case NI_INPUT_TYPE_PER_PC: process_NI_input(g_NIInput_vec[n], 0); break; case NI_INPUT_TYPE_PER_VC: for (int NI_vc=0; NI_vc<g_cfg.router_num_vc; NI_vc++) process_NI_input(g_NIInput_vec[n], NI_vc); break; default: assert(0); } } for (unsigned int n=0; n<g_NIOutput_vec.size(); n++) { process_NI_output(g_NIOutput_vec[n]); } // profile if (g_cfg.profile_perf || g_cfg.profile_power) { if (g_cfg.profile_interval_cycle) process_profile_cycle(); else process_profile_instr(); } #ifdef LINK_DVS // link-dvs process_link_dvs_link_speedup(); process_link_dvs_link_slowdown(); process_link_dvs_set(); #endif // injection switch (g_cfg.wkld_type) { case WORKLOAD_TRIPS_TRACE: case WORKLOAD_TILED_CMP_TRACE: case WORKLOAD_TILED_CMP_VALUE_TRACE: case WORKLOAD_SNUCA_CMP_VALUE_TRACE: process_parse_trace(); break; case WORKLOAD_SYNTH_SPATIAL: case WORKLOAD_SYNTH_TRAFFIC_MATRIX: for (unsigned int c=0; c<g_Core_vec.size(); c++) process_gen_synth_traffic(c); break; default: assert(0); } // control simulation for warmup and finalize process_control_sim(); g_ev_sim_done->wait(); // Now the simulation is done. fprintf(stderr, "finished simulation at clk=%.0lf.\n", simtime()); // Find the simulation end time g_sim.m_end_time = time((time_t *)NULL); g_sim.m_elapsed_time = _MAX(g_sim.m_end_time - g_sim.m_start_time, 1); #ifdef _DEBUG_ROUTER_PROCESS printf("PROCESS COMPLETE: process_main()\n"); #endif }
/* Parses the tpasswd files, in order to verify the given * username/password pair. */ int verify_passwd (char *conffile, char *tpasswd, char *username, const char *passwd) { FILE *fd; char line[5 * 1024]; unsigned int i; gnutls_datum_t g, n; int iindex; char *p, *pos; iindex = find_strchr (username, tpasswd); if (iindex == -1) { fprintf (stderr, "Cannot find '%s' in %s\n", username, tpasswd); return -1; } fd = fopen (conffile, "r"); if (fd == NULL) { fprintf (stderr, "Cannot find %s\n", conffile); return -1; } do { p = fgets (line, sizeof (line) - 1, fd); } while (p != NULL && atoi (p) != iindex); if (p == NULL) { fprintf (stderr, "Cannot find entry in %s\n", conffile); return -1; } line[sizeof (line) - 1] = 0; fclose (fd); if ((iindex = read_conf_values (&g, &n, line)) < 0) { fprintf (stderr, "Cannot parse conf file '%s'\n", conffile); return -1; } fd = fopen (tpasswd, "r"); if (fd == NULL) { fprintf (stderr, "Cannot open file '%s'\n", tpasswd); return -1; } while (fgets (line, sizeof (line), fd) != NULL) { /* move to first ':' * This is the actual verifier. */ i = 0; while ((line[i] != ':') && (line[i] != '\0') && (i < sizeof (line))) { i++; } if (strncmp (username, line, _MAX (i, strlen (username))) == 0) { char *verifier_pos, *salt_pos; pos = strchr (line, ':'); fclose (fd); if (pos == NULL) { fprintf (stderr, "Cannot parse conf file '%s'\n", conffile); return -1; } pos++; verifier_pos = pos; /* Move to the salt */ pos = strchr (pos, ':'); if (pos == NULL) { fprintf (stderr, "Cannot parse conf file '%s'\n", conffile); return -1; } pos++; salt_pos = pos; return _verify_passwd_int (username, passwd, verifier_pos, salt_pos, &g, &n); } } fclose (fd); return -1; }
int crypt_int (const char *username, const char *passwd, int salt_size, char *tpasswd_conf, char *tpasswd, int uindex) { FILE *fd; char *cr; gnutls_datum_t g, n; char line[5 * 1024]; char *p, *pp; int iindex; char tmpname[1024]; fd = fopen (tpasswd_conf, "r"); if (fd == NULL) { fprintf (stderr, "Cannot find %s\n", tpasswd_conf); return -1; } do { /* find the specified uindex in file */ p = fgets (line, sizeof (line) - 1, fd); iindex = atoi (p); } while (p != NULL && iindex != uindex); if (p == NULL) { fprintf (stderr, "Cannot find entry in %s\n", tpasswd_conf); return -1; } line[sizeof (line) - 1] = 0; fclose (fd); if ((iindex = read_conf_values (&g, &n, line)) < 0) { fprintf (stderr, "Cannot parse conf file '%s'\n", tpasswd_conf); return -1; } cr = _srp_crypt (username, passwd, salt_size, &g, &n); if (cr == NULL) { fprintf (stderr, "Cannot _srp_crypt()...\n"); return -1; } else { /* delete previous entry */ struct stat st; FILE *fd2; int put; if (strlen (tpasswd) > sizeof (tmpname) + 5) { fprintf (stderr, "file '%s' is tooooo long\n", tpasswd); return -1; } strcpy (tmpname, tpasswd); strcat (tmpname, ".tmp"); if (stat (tmpname, &st) != -1) { fprintf (stderr, "file '%s' is locked\n", tpasswd); return -1; } if (filecopy (tpasswd, tmpname) != 0) { fprintf (stderr, "Cannot copy '%s' to '%s'\n", tpasswd, tmpname); return -1; } fd = fopen (tpasswd, "w"); if (fd == NULL) { fprintf (stderr, "Cannot open '%s' for write\n", tpasswd); remove (tmpname); return -1; } fd2 = fopen (tmpname, "r"); if (fd2 == NULL) { fprintf (stderr, "Cannot open '%s' for read\n", tmpname); remove (tmpname); return -1; } put = 0; do { p = fgets (line, sizeof (line) - 1, fd2); if (p == NULL) break; pp = strchr (line, ':'); if (pp == NULL) continue; if (strncmp (p, username, _MAX (strlen (username), (unsigned int) (pp - p))) == 0) { put = 1; fprintf (fd, "%s:%s:%u\n", username, cr, iindex); } else { fputs (line, fd); } } while (1); if (put == 0) { fprintf (fd, "%s:%s:%u\n", username, cr, iindex); } fclose (fd); fclose (fd2); remove (tmpname); } return 0; }
/** * @function EntryDraw * @brief user entry draw function * @param void *_g_obj: generic object * @param void *_obj: frame object * @return none */ static void EntryDraw(void *_g_obj, void *_obj) { g_obj_st *g_obj; usr_entry_st *entry; uint8_t glyph; uint16_t ii, selStart, selStop; coord_t x, xInsertLine, xMin, yMin, xMax, yMax; rect_st rec; color_t colBack, colText; /*retreive generic & specific object*/ if(_g_obj != NULL && _obj != NULL) { g_obj = (g_obj_st *) _g_obj; entry = (usr_entry_st*) _obj; /*P2D configuration*/ P2D_SetDisplayMode(DISPLAY_SOLID); P2D_SetLineType(LINE_SOLID); SetFont(entry->font); if(GUI_ObjIsDisabled(g_obj)) { colBack = GetColor(G_COL_BACKGROUND); colText = GetColor(G_COL_D_TEXT); } else { colBack = GetColor(G_COL_E_BACKGROUND); colText = entry->colText; } /*retrieve text coord*/ GetTextCoords(&(g_obj->rec), &xMin, &yMin, &xMax, &yMax); /*display usr_entry glyphs, one by one*/ selStart = _MIN(entry->cursStart, entry->cursStop); selStop = _MAX(entry->cursStart, entry->cursStop); x = xMin; xInsertLine = x; ii = entry->offsetDisplay; while(x < xMax && entry->buffer[ii] != 0) { /*select the color of the text, according to the user selection (i.e. reverse colors if the current glyph is a part of the user selection)*/ if(entry->bEditable == false || selStart == selStop || ii < selStart || ii >= selStop) { P2D_SetColors(colText, colBack); } else { P2D_SetColors(colBack, GetColor(G_COL_SPECIAL)); } /*display the glyph*/ glyph = entry->buffer[ii]; P2D_PutGlyph(x, yMin, glyph); x += P2D_GetGlyphWidth(glyph); /*if the car corresponds to the user selection bar, store its coord*/ if(ii == entry->cursStop - 1) xInsertLine = x; /*next car*/ ii++; } /*clear from last car to the end of the entry*/ P2D_SetColors(colBack, colBack); (void) P2D_CoordToRect(&rec, x, yMin, xMax, yMax); P2D_FillRect(&rec); /*clear between text and object rect (1 px width)*/ (void) P2D_CoordToRect(&rec, xMin-1, yMin-1, xMax+1, yMax+1); P2D_Rect(&rec); /*display the insert line*/ if(entry->bEditable && entry->bBlink) { P2D_SetColor(colText); P2D_Line(xInsertLine, yMin, xInsertLine, yMax); } /*object rect*/ P2D_SetColor(GetColor(G_COL_LOWER_REC)); P2D_Rect(&(g_obj->rec)); } }
int main(int argc, char **argv) { int c, i, mu, status; int ispin, icol, isc; int n_c = 3; int n_s = 4; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int grid_size[4]; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix, iy, is, it, i3; int sl0, sl1, sl2, sl3, have_source_flag=0; int source_proc_coords[4], lsl0, lsl1, lsl2, lsl3; int check_residuum = 0; unsigned int VOL3, V5; int do_gt = 0; int full_orbit = 0; int smear_source = 0; char filename[200], source_filename[200], source_filename_write[200]; double ratime, retime; double plaq_r=0., plaq_m=0., norm, norm2; double spinor1[24]; double *gauge_qdp[4], *gauge_field_timeslice=NULL, *gauge_field_smeared=NULL; double _1_2_kappa, _2_kappa, phase; FILE *ofs; int mu_trans[4] = {3, 0, 1, 2}; int threadid, nthreads; int timeslice, source_timeslice; char rng_file_in[100], rng_file_out[100]; int *source_momentum=NULL; int source_momentum_class = -1; int source_momentum_no = 0; int source_momentum_runs = 1; int imom; int num_gpu_on_node=0, rank; int source_location_5d_iseven; int convert_sign=0; #ifdef HAVE_QUDA int rotate_gamma_basis = 1; #else int rotate_gamma_basis = 0; #endif omp_lock_t *lck = NULL, gen_lck[1]; int key = 0; /****************************************************************************/ /* for smearing parallel to inversion */ double *smearing_spinor_field[] = {NULL,NULL}; int dummy_flag = 0; /****************************************************************************/ /****************************************************************************/ #if (defined HAVE_QUDA) && (defined MULTI_GPU) int x_face_size, y_face_size, z_face_size, t_face_size, pad_size; #endif /****************************************************************************/ /************************************************/ int qlatt_nclass; int *qlatt_id=NULL, *qlatt_count=NULL, **qlatt_rep=NULL, **qlatt_map=NULL; double **qlatt_list=NULL; /************************************************/ /************************************************/ double boundary_condition_factor; int boundary_condition_factor_set = 0; /************************************************/ //#ifdef MPI // kernelPackT = true; //#endif /*********************************************** * QUDA parameters ***********************************************/ #ifdef HAVE_QUDA QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec_sloppy = QUDA_SINGLE_PRECISION; QudaGaugeParam gauge_param = newQudaGaugeParam(); QudaInvertParam inv_param = newQudaInvertParam(); #endif while ((c = getopt(argc, argv, "soch?vgf:p:b:S:R:")) != -1) { switch (c) { case 'v': g_verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'c': check_residuum = 1; fprintf(stdout, "# [invert_dw_quda] will check residuum again\n"); break; case 'p': n_c = atoi(optarg); fprintf(stdout, "# [invert_dw_quda] will use number of colors = %d\n", n_c); break; case 'o': full_orbit = 1; fprintf(stdout, "# [invert_dw_quda] will invert for full orbit, if source momentum set\n"); case 's': smear_source = 1; fprintf(stdout, "# [invert_dw_quda] will smear the sources if they are read from file\n"); break; case 'b': boundary_condition_factor = atof(optarg); boundary_condition_factor_set = 1; fprintf(stdout, "# [invert_dw_quda] const. boundary condition factor set to %e\n", boundary_condition_factor); break; case 'S': convert_sign = atoi(optarg); fprintf(stdout, "# [invert_dw_quda] using convert sign %d\n", convert_sign); break; case 'R': rotate_gamma_basis = atoi(optarg); fprintf(stdout, "# [invert_dw_quda] rotate gamma basis %d\n", rotate_gamma_basis); break; case 'h': case '?': default: usage(); break; } } // get the time stamp g_the_time = time(NULL); /************************************** * set the default values, read input **************************************/ if(filename_set==0) strcpy(filename, "cvc.input"); if(g_proc_id==0) fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); #ifdef MPI #ifdef HAVE_QUDA grid_size[0] = g_nproc_x; grid_size[1] = g_nproc_y; grid_size[2] = g_nproc_z; grid_size[3] = g_nproc_t; fprintf(stdout, "# [] g_nproc = (%d,%d,%d,%d)\n", g_nproc_x, g_nproc_y, g_nproc_z, g_nproc_t); initCommsQuda(argc, argv, grid_size, 4); #else MPI_Init(&argc, &argv); #endif #endif #if (defined PARALLELTX) || (defined PARALLELTXY) EXIT_WITH_MSG(1, "[] Error, 2-dim./3-dim. MPI-Version not yet implemented"); #endif // some checks on the input data if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stderr, "[invert_dw_quda] Error, T and L's must be set\n"); usage(); } // set number of openmp threads // initialize MPI parameters mpi_init(argc, argv); // the volume of a timeslice VOL3 = LX*LY*LZ; V5 = T*LX*LY*LZ*L5; g_kappa5d = 0.5 / (5. + g_m5); if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] kappa5d = %e\n", g_kappa5d); fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] L5 = %3d\n",\ g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, L5); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "[invert_dw_quda] Error from init_geometry\n"); EXIT(1); } geometry(); if( init_geometry_5d() != 0 ) { fprintf(stderr, "[invert_dw_quda] Error from init_geometry_5d\n"); EXIT(2); } geometry_5d(); /************************************** * initialize the QUDA library **************************************/ if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] initializing quda\n"); #ifdef HAVE_QUDA // cudaGetDeviceCount(&num_gpu_on_node); if(g_gpu_per_node<0) { if(g_cart_id==0) fprintf(stderr, "[] Error, number of GPUs per node not set\n"); EXIT(106); } else { num_gpu_on_node = g_gpu_per_node; } #ifdef MPI rank = comm_rank(); #else rank = 0; #endif g_gpu_device_number = rank % num_gpu_on_node; fprintf(stdout, "# [] process %d/%d uses device %d\n", rank, g_cart_id, g_gpu_device_number); initQuda(g_gpu_device_number); #endif /************************************** * prepare the gauge field **************************************/ // read the gauge field from file alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); if(strcmp( gaugefilename_prefix, "identity")==0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Setting up unit gauge field\n"); for(ix=0;ix<VOLUME; ix++) { for(mu=0;mu<4;mu++) { _cm_eq_id(g_gauge_field+_GGI(ix,mu)); } } } else if(strcmp( gaugefilename_prefix, "random")==0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Setting up random gauge field with seed = %d\n", g_seed); init_rng_state(g_seed, &g_rng_state); random_gauge_field(g_gauge_field, 1.); plaquette(&plaq_m); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); check_error(write_lime_gauge_field(filename, plaq_m, Nconf, 64), "write_lime_gauge_field", NULL, 12); } else { if(g_gauge_file_format == 0) { // ILDG sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename); status = read_lime_gauge_field_doubleprec(filename); } else if(g_gauge_file_format == 1) { // NERSC sprintf(filename, "%s.%.5d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename); status = read_nersc_gauge_field(g_gauge_field, filename, &plaq_r); //status = read_nersc_gauge_field_3x3(g_gauge_field, filename, &plaq_r); } if(status != 0) { fprintf(stderr, "[invert_dw_quda] Error, could not read gauge field"); EXIT(12); } } #ifdef MPI xchange_gauge(); #endif // measure the plaquette plaquette(&plaq_m); if(g_cart_id==0) fprintf(stdout, "# Measured plaquette value: %25.16e\n", plaq_m); if(g_cart_id==0) fprintf(stdout, "# Read plaquette value : %25.16e\n", plaq_r); #ifndef HAVE_QUDA if(N_Jacobi>0) { #endif // allocate the smeared / qdp ordered gauge field alloc_gauge_field(&gauge_field_smeared, VOLUMEPLUSRAND); for(i=0;i<4;i++) { gauge_qdp[i] = gauge_field_smeared + i*18*VOLUME; } #ifndef HAVE_QUDA } #endif #ifdef HAVE_QUDA // transcribe the gauge field omp_set_num_threads(g_num_threads); #pragma omp parallel for private(ix,iy,mu) for(ix=0;ix<VOLUME;ix++) { iy = g_lexic2eot[ix]; for(mu=0;mu<4;mu++) { _cm_eq_cm(gauge_qdp[mu_trans[mu]]+18*iy, g_gauge_field+_GGI(ix,mu)); } } // multiply timeslice T-1 with factor of -1 (antiperiodic boundary condition) if(g_proc_coords[0]==g_nproc_t-1) { if(!boundary_condition_factor_set) boundary_condition_factor = -1.; fprintf(stdout, "# [] process %d multiplies gauge-field timeslice T_global-1 with boundary condition factor %e\n", g_cart_id, boundary_condition_factor); omp_set_num_threads(g_num_threads); #pragma omp parallel for private(ix,iy) for(ix=0;ix<VOL3;ix++) { iix = (T-1)*VOL3 + ix; iy = g_lexic2eot[iix]; _cm_ti_eq_re(gauge_qdp[mu_trans[0]]+18*iy, -1.); } } // QUDA precision parameters switch(g_cpu_prec) { case 0: cpu_prec = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = half\n"); break; case 1: cpu_prec = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = single\n"); break; case 2: cpu_prec = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = double\n"); break; default: cpu_prec = QUDA_DOUBLE_PRECISION; break; } switch(g_gpu_prec) { case 0: cuda_prec = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = half\n"); break; case 1: cuda_prec = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = single\n"); break; case 2: cuda_prec = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = double\n"); break; default: cuda_prec = QUDA_DOUBLE_PRECISION; break; } switch(g_gpu_prec_sloppy) { case 0: cuda_prec_sloppy = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = half\n"); break; case 1: cuda_prec_sloppy = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = single\n"); break; case 2: cuda_prec_sloppy = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = double\n"); break; default: cuda_prec_sloppy = QUDA_SINGLE_PRECISION; break; } // QUDA gauge parameters gauge_param.X[0] = LX; gauge_param.X[1] = LY; gauge_param.X[2] = LZ; gauge_param.X[3] = T; inv_param.Ls = L5; gauge_param.anisotropy = 1.0; gauge_param.type = QUDA_WILSON_LINKS; gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T; gauge_param.cpu_prec = cpu_prec; gauge_param.cuda_prec = cuda_prec; gauge_param.reconstruct = QUDA_RECONSTRUCT_12; gauge_param.cuda_prec_sloppy = cuda_prec_sloppy; gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12; gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO; gauge_param.ga_pad = 0; inv_param.sp_pad = 0; inv_param.cl_pad = 0; // For multi-GPU, ga_pad must be large enough to store a time-slice #ifdef MULTI_GPU x_face_size = inv_param.Ls * gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2; y_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2; z_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2; t_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2; pad_size = _MAX(x_face_size, y_face_size); pad_size = _MAX(pad_size, z_face_size); pad_size = _MAX(pad_size, t_face_size); gauge_param.ga_pad = pad_size; if(g_cart_id==0) printf("# [invert_dw_quda] pad_size = %d\n", pad_size); #endif // load the gauge field if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] loading gauge field\n"); loadGaugeQuda((void*)gauge_qdp, &gauge_param); gauge_qdp[0] = NULL; gauge_qdp[1] = NULL; gauge_qdp[2] = NULL; gauge_qdp[3] = NULL; #endif /********************************************* * APE smear the gauge field *********************************************/ if(N_Jacobi>0) { memcpy(gauge_field_smeared, g_gauge_field, 72*VOLUMEPLUSRAND*sizeof(double)); fprintf(stdout, "# [invert_dw_quda] APE smearing gauge field with paramters N_APE=%d, alpha_APE=%e\n", N_ape, alpha_ape); APE_Smearing_Step_threads(gauge_field_smeared, N_ape, alpha_ape); xchange_gauge_field(gauge_field_smeared); } // allocate memory for the spinor fields #ifdef HAVE_QUDA no_fields = 3+2; #else no_fields = 6+2; #endif g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND*L5); smearing_spinor_field[0] = g_spinor_field[no_fields-2]; smearing_spinor_field[1] = g_spinor_field[no_fields-1]; switch(g_source_type) { case 0: case 5: // the source locaton sl0 = g_source_location / (LX_global*LY_global*LZ); sl1 = ( g_source_location % (LX_global*LY_global*LZ) ) / ( LY_global*LZ); sl2 = ( g_source_location % ( LY_global*LZ) ) / ( LZ); sl3 = g_source_location % LZ; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] global sl = (%d, %d, %d, %d)\n", sl0, sl1, sl2, sl3); source_proc_coords[0] = sl0 / T; source_proc_coords[1] = sl1 / LX; source_proc_coords[2] = sl2 / LY; source_proc_coords[3] = sl3 / LZ; #ifdef MPI MPI_Cart_rank(g_cart_grid, source_proc_coords, &g_source_proc_id); #else g_source_proc_id = 0; #endif have_source_flag = g_source_proc_id == g_cart_id; lsl0 = sl0 % T; lsl1 = sl1 % LX; lsl2 = sl2 % LY; lsl3 = sl3 % LZ; if(have_source_flag) { fprintf(stdout, "# [invert_dw_quda] process %d has the source at (%d, %d, %d, %d)\n", g_cart_id, lsl0, lsl1, lsl2, lsl3); } break; case 2: case 3: case 4: // the source timeslice #ifdef MPI source_proc_coords[0] = g_source_timeslice / T; source_proc_coords[1] = 0; source_proc_coords[2] = 0; source_proc_coords[3] = 0; MPI_Cart_rank(g_cart_grid, source_proc_coords, &g_source_proc_id); have_source_flag = ( g_source_proc_id == g_cart_id ); source_timeslice = have_source_flag ? g_source_timeslice % T : -1; #else g_source_proc_id = 0; have_source_flag = 1; source_timeslice = g_source_timeslice; #endif break; } #ifdef HAVE_QUDA /************************************************************* * QUDA inverter parameters *************************************************************/ inv_param.dslash_type = QUDA_DOMAIN_WALL_DSLASH; if(strcmp(g_inverter_type_name, "cg") == 0) { inv_param.inv_type = QUDA_CG_INVERTER; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using cg inverter\n"); } else if(strcmp(g_inverter_type_name, "bicgstab") == 0) { inv_param.inv_type = QUDA_BICGSTAB_INVERTER; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using bicgstab inverter\n"); #ifdef MULTI_GPU } else if(strcmp(g_inverter_type_name, "gcr") == 0) { inv_param.inv_type = QUDA_GCR_INVERTER; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using gcr inverter\n"); #endif } else { if(g_cart_id==0) fprintf(stderr, "[invert_dw_quda] Error, unrecognized inverter type %s\n", g_inverter_type_name); EXIT(123); } if(inv_param.inv_type == QUDA_CG_INVERTER) { inv_param.solution_type = QUDA_MAT_SOLUTION; inv_param.solve_type = QUDA_NORMEQ_PC_SOLVE; } else if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER) { inv_param.solution_type = QUDA_MAT_SOLUTION; inv_param.solve_type = QUDA_DIRECT_PC_SOLVE; } else { inv_param.solution_type = QUDA_MATPC_SOLUTION; inv_param.solve_type = QUDA_DIRECT_PC_SOLVE; } inv_param.m5 = g_m5; inv_param.kappa = 0.5 / (5. + inv_param.m5); inv_param.mass = g_m0; inv_param.tol = solver_precision; inv_param.maxiter = niter_max; inv_param.reliable_delta = reliable_delta; #ifdef MPI // domain decomposition preconditioner parameters if(inv_param.inv_type == QUDA_GCR_INVERTER) { if(g_cart_id == 0) printf("# [] settup DD parameters\n"); inv_param.gcrNkrylov = 15; inv_param.inv_type_precondition = QUDA_MR_INVERTER; inv_param.tol_precondition = 1e-6; inv_param.maxiter_precondition = 200; inv_param.verbosity_precondition = QUDA_VERBOSE; inv_param.prec_precondition = cuda_prec_sloppy; inv_param.omega = 0.7; } #endif inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; inv_param.dagger = QUDA_DAG_NO; inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION; //;QUDA_MASS_NORMALIZATION; inv_param.cpu_prec = cpu_prec; inv_param.cuda_prec = cuda_prec; inv_param.cuda_prec_sloppy = cuda_prec_sloppy; inv_param.verbosity = QUDA_VERBOSE; inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO; inv_param.dirac_order = QUDA_DIRAC_ORDER; #ifdef MPI inv_param.preserve_dirac = QUDA_PRESERVE_DIRAC_YES; inv_param.prec_precondition = cuda_prec_sloppy; inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; inv_param.dirac_tune = QUDA_TUNE_NO; #endif #endif /******************************************* * write initial rng state to file *******************************************/ if( g_source_type==2 && g_coherent_source==2 ) { sprintf(rng_file_out, "%s.0", g_rng_filename); status = init_rng_stat_file (g_seed, rng_file_out); if( status != 0 ) { fprintf(stderr, "[invert_dw_quda] Error, could not write rng status\n"); EXIT(210); } } else if( (g_source_type==2 /*&& g_coherent_source==1*/) || g_source_type==3 || g_source_type==4) { if( init_rng_state(g_seed, &g_rng_state) != 0 ) { fprintf(stderr, "[invert_dw_quda] Error, could initialize rng state\n"); EXIT(211); } } /******************************************* * prepare locks for openmp *******************************************/ nthreads = g_num_threads - 1; lck = (omp_lock_t*)malloc(nthreads * sizeof(omp_lock_t)); if(lck == NULL) { EXIT_WITH_MSG(97, "[invert_dw_quda] Error, could not allocate lck\n"); } // init locks for(i=0;i<nthreads;i++) { omp_init_lock(lck+i); } omp_init_lock(gen_lck); // check the source momenta if(g_source_momentum_set) { source_momentum = (int*)malloc(3*sizeof(int)); if(g_source_momentum[0]<0) g_source_momentum[0] += LX_global; if(g_source_momentum[1]<0) g_source_momentum[1] += LY_global; if(g_source_momentum[2]<0) g_source_momentum[2] += LZ_global; fprintf(stdout, "# [invert_dw_quda] using final source momentum ( %d, %d, %d )\n", g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]); if(full_orbit) { status = make_qcont_orbits_3d_parity_avg( &qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map); if(status != 0) { if(g_cart_id==0) fprintf(stderr, "\n[invert_dw_quda] Error while creating O_3-lists\n"); EXIT(4); } source_momentum_class = qlatt_id[g_ipt[0][g_source_momentum[0]][g_source_momentum[1]][g_source_momentum[2]]]; source_momentum_no = qlatt_count[source_momentum_class]; source_momentum_runs = source_momentum_class==0 ? 1 : source_momentum_no + 1; if(g_cart_id==0) fprintf(stdout, "# [] source momentum belongs to class %d with %d members, which means %d runs\n", source_momentum_class, source_momentum_no, source_momentum_runs); } } if(g_source_type == 5) { if(g_seq_source_momentum_set) { if(g_seq_source_momentum[0]<0) g_seq_source_momentum[0] += LX_global; if(g_seq_source_momentum[1]<0) g_seq_source_momentum[1] += LY_global; if(g_seq_source_momentum[2]<0) g_seq_source_momentum[2] += LZ_global; } else if(g_source_momentum_set) { g_seq_source_momentum[0] = g_source_momentum[0]; g_seq_source_momentum[1] = g_source_momentum[1]; g_seq_source_momentum[2] = g_source_momentum[2]; } fprintf(stdout, "# [invert_dw_quda] using final sequential source momentum ( %d, %d, %d )\n", g_seq_source_momentum[0], g_seq_source_momentum[1], g_seq_source_momentum[2]); } /*********************************************** * loop on spin-color-index ***********************************************/ for(isc=g_source_index[0]; isc<=g_source_index[1]; isc++) // for(isc=g_source_index[0]; isc<=g_source_index[0]; isc++) { ispin = isc / n_c; icol = isc % n_c; for(imom=0; imom<source_momentum_runs; imom++) { /*********************************************** * set source momentum ***********************************************/ if(g_source_momentum_set) { if(imom == 0) { if(full_orbit) { source_momentum[0] = 0; source_momentum[1] = 0; source_momentum[2] = 0; } else { source_momentum[0] = g_source_momentum[0]; source_momentum[1] = g_source_momentum[1]; source_momentum[2] = g_source_momentum[2]; } } else { source_momentum[0] = qlatt_map[source_momentum_class][imom-1] / (LY_global*LZ_global); source_momentum[1] = ( qlatt_map[source_momentum_class][imom-1] % (LY_global*LZ_global) ) / LZ_global; source_momentum[2] = qlatt_map[source_momentum_class][imom-1] % LZ_global; } if(g_cart_id==0) fprintf(stdout, "# [] run no. %d, source momentum (%d, %d, %d)\n", imom, source_momentum[0], source_momentum[1], source_momentum[2]); } /*********************************************** * prepare the souce ***********************************************/ if(g_read_source == 0) { // create source switch(g_source_type) { case 0: // point source if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating point source\n"); for(ix=0;ix<L5*VOLUME;ix++) { _fv_eq_zero(g_spinor_field[0]+ix); } if(have_source_flag) { if(g_source_momentum_set) { phase = 2*M_PI*( source_momentum[0]*sl1/(double)LX_global + source_momentum[1]*sl2/(double)LY_global + source_momentum[2]*sl3/(double)LZ_global ); g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol) ] = cos(phase); g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol)+1] = sin(phase); } else { g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol) ] = 1.; } } if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol); } #ifdef HAVE_QUDA // set matpc_tpye source_location_5d_iseven = ( (g_iseven[g_ipt[lsl0][lsl1][lsl2][lsl3]] && ispin<n_s/2) || (!g_iseven[g_ipt[lsl0][lsl1][lsl2][lsl3]] && ispin>=n_s/2) ) ? 1 : 0; if(source_location_5d_iseven) { inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] matpc type is MATPC_EVEN_EVEN\n"); } else { inv_param.matpc_type = QUDA_MATPC_ODD_ODD; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] matpc type is MATPC_ODD_ODD\n"); } #endif break; case 2: // timeslice source if(g_coherent_source==1) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating coherent timeslice source\n"); status = prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_state, 1); if(status != 0) { fprintf(stderr, "[invert_dw_quda] Error from prepare source, status was %d\n", status); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 123); MPI_Finalize(); #endif exit(123); } check_error(prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_state, 1), "prepare_coherent_timeslice_source", NULL, 123); timeslice = g_coherent_source_base; } else { if(g_coherent_source==2) { timeslice = (g_coherent_source_base+isc*g_coherent_source_delta)%T_global; fprintf(stdout, "# [invert_dw_quda] Creating timeslice source\n"); check_error(prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, timeslice, VOLUME, g_rng_state, 1), "prepare_timeslice_source", NULL, 123); } else { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source\n"); check_error(prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_source_timeslice, VOLUME, g_rng_state, 1), "prepare_timeslice_source", NULL, 124); timeslice = g_source_timeslice; } } if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix, Nconf, timeslice, isc); } break; case 3: // timeslice sources for one-end trick (spin dilution) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source for one-end-trick\n"); check_error( prepare_timeslice_source_one_end(g_spinor_field[0], gauge_field_smeared, source_timeslice, source_momentum, isc%n_s, g_rng_state, \ ( isc%n_s==(n_s-1) && imom==source_momentum_runs-1 )), "prepare_timeslice_source_one_end", NULL, 125 ); c = N_Jacobi > 0 ? isc%n_s + n_s : isc%n_s; if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c); } break; case 4: // timeslice sources for one-end trick (spin and color dilution ) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source for one-end-trick\n"); check_error(prepare_timeslice_source_one_end_color(g_spinor_field[0], gauge_field_smeared, source_timeslice, source_momentum,\ isc%(n_s*n_c), g_rng_state, ( isc%(n_s*n_c)==(n_s*n_c-1) && imom==source_momentum_runs-1 )), "prepare_timeslice_source_one_end_color", NULL, 126); c = N_Jacobi > 0 ? isc%(n_s*n_c) + (n_s*n_c) : isc%(n_s*n_c); if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c); } break; case 5: if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] preparing sequential point source\n"); check_error( prepare_sequential_point_source (g_spinor_field[0], isc, sl0, g_seq_source_momentum, smear_source, g_spinor_field[1], gauge_field_smeared), "prepare_sequential_point_source", NULL, 33); sprintf(source_filename, "%s.%.4d.t%.2dx%.2d.y%.2d.z%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc, g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]); break; default: fprintf(stderr, "\nError, unrecognized source type\n"); exit(32); break; } } else { // read source switch(g_source_type) { case 0: // point source if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", \ filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc); } fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename); check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115); break; case 2: // timeslice source if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf, g_source_timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix2, Nconf, g_source_timeslice, isc); } fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename); check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115); break; default: check_error(1, "source type", NULL, 104); break; case -1: // timeslice source sprintf(source_filename, "%s", filename_prefix2); fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename); check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115); break; } } // of if g_read_source if(g_write_source) { check_error(write_propagator(g_spinor_field[0], source_filename, 0, g_propagator_precision), "write_propagator", NULL, 27); } /*********************************************************************************************** * here threads split: ***********************************************************************************************/ if(dummy_flag==0) strcpy(source_filename_write, source_filename); memcpy((void*)(smearing_spinor_field[0]), (void*)(g_spinor_field[0]), 24*VOLUME*sizeof(double)); if(dummy_flag>0) { // copy only if smearing has been done; otherwise do not copy, do not invert if(g_cart_id==0) fprintf(stdout, "# [] copy smearing field -> g field\n"); memcpy((void*)(g_spinor_field[0]), (void*)(smearing_spinor_field[1]), 24*VOLUME*sizeof(double)); } omp_set_num_threads(g_num_threads); #pragma omp parallel private(threadid, _2_kappa, is, ix, iy, iix, ratime, retime) shared(key,g_read_source, smear_source, N_Jacobi, kappa_Jacobi, smearing_spinor_field, g_spinor_field, nthreads, convert_sign, VOLUME, VOL3, T, L5, isc, rotate_gamma_basis, g_cart_id) firstprivate(inv_param, gauge_param, ofs) { threadid = omp_get_thread_num(); if(threadid < nthreads) { fprintf(stdout, "# [] proc%.2d thread%.2d starting source preparation\n", g_cart_id, threadid); // smearing if( ( !g_read_source || (g_read_source && smear_source ) ) && N_Jacobi > 0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] smearing source with N_Jacobi=%d, kappa_Jacobi=%e\n", N_Jacobi, kappa_Jacobi); Jacobi_Smearing_threaded(gauge_field_smeared, smearing_spinor_field[0], smearing_spinor_field[1], kappa_Jacobi, N_Jacobi, threadid, nthreads); } /*********************************************** * create the 5-dim. source field ***********************************************/ if(convert_sign == 0) { spinor_4d_to_5d_threaded(smearing_spinor_field[0], smearing_spinor_field[0], threadid, nthreads); } else if(convert_sign == 1 || convert_sign == -1) { spinor_4d_to_5d_sign_threaded(smearing_spinor_field[0], smearing_spinor_field[0], convert_sign, threadid, nthreads); } for(is=0; is<L5; is++) { for(it=threadid; it<T; it+=nthreads) { memcpy((void*)(g_spinor_field[0]+_GSI(g_ipt_5d[is][it][0][0][0])), (void*)(smearing_spinor_field[0]+_GSI(g_ipt_5d[is][it][0][0][0])), VOL3*24*sizeof(double)); } } // reorder, multiply with g2 for(is=0; is<L5; is++) { for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = (is*T+it)*VOL3 + i3; _fv_eq_zero(smearing_spinor_field[1]+_GSI(ix)); }}} if(rotate_gamma_basis) { for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(0, ix); _fv_eq_gamma_ti_fv(smearing_spinor_field[1]+_GSI(iy), 2, smearing_spinor_field[0]+_GSI(ix)); }} for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(L5-1, ix); _fv_eq_gamma_ti_fv(smearing_spinor_field[1]+_GSI(iy), 2, smearing_spinor_field[0]+_GSI(ix+(L5-1)*VOLUME)); }} } else { for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(0, ix); _fv_eq_fv(smearing_spinor_field[1]+_GSI(iy), smearing_spinor_field[0]+_GSI(ix)); }} for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(L5-1, ix); _fv_eq_fv(smearing_spinor_field[1]+_GSI(iy), smearing_spinor_field[0]+_GSI(ix+(L5-1)*VOLUME)); }} } fprintf(stdout, "# [] proc%.2d thread%.2d finished source preparation\n", g_cart_id, threadid); } else if(threadid == g_num_threads-1 && dummy_flag > 0) { // else branch on threadid fprintf(stdout, "# [] proc%.2d thread%.2d starting inversion for dummy_flag = %d\n", g_cart_id, threadid, dummy_flag); /*********************************************** * perform the inversion ***********************************************/ if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] starting inversion\n"); xchange_field_5d(g_spinor_field[0]); memset(g_spinor_field[1], 0, (VOLUME+RAND)*L5*24*sizeof(double)); ratime = CLOCK; #ifdef MPI if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER || inv_param.inv_type == QUDA_GCR_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling invertQuda\n"); invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); } else if(inv_param.inv_type == QUDA_CG_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling testCG\n"); testCG(g_spinor_field[1], g_spinor_field[0], &inv_param); } else { if(g_cart_id==0) fprintf(stderr, "# [invert_dw_quda] unrecognized inverter\n"); } #else invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); #endif retime = CLOCK; if(g_cart_id==0) { fprintf(stdout, "# [invert_dw_quda] QUDA time: %e seconds\n", inv_param.secs); fprintf(stdout, "# [invert_dw_quda] QUDA Gflops: %e\n", inv_param.gflops/inv_param.secs); fprintf(stdout, "# [invert_dw_quda] wall time: %e seconds\n", retime-ratime); fprintf(stdout, "# [invert_dw_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n", inv_param.spinorGiB, gauge_param.gaugeGiB); } } // of if threadid // wait till all threads are here #pragma omp barrier if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) { _2_kappa = 2. * g_kappa5d; for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa ); } } #pragma omp barrier // reorder, multiply with g2 for(is=0;is<L5;is++) { for(ix=threadid; ix<VOLUME; ix+=g_num_threads) { iy = lexic2eot_5d(is, ix); iix = is*VOLUME + ix; _fv_eq_fv(g_spinor_field[0]+_GSI(iix), g_spinor_field[1]+_GSI(iy)); }} #pragma omp barrier if(rotate_gamma_basis) { for(ix=threadid; ix<VOLUME*L5; ix+=g_num_threads) { _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix)); } } else { for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix)); } } if(g_cart_id==0 && threadid==g_num_threads-1) fprintf(stdout, "# [invert_dw_quda] inversion done in %e seconds\n", retime-ratime); #pragma omp single { #ifdef MPI xchange_field_5d(g_spinor_field[1]); #endif /*********************************************** * check residuum ***********************************************/ if(check_residuum && dummy_flag>0) { // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg, // which uses the tmLQCD conventions (same as in contractions) // without explicit boundary conditions #ifdef MPI xchange_field_5d(g_spinor_field[2]); xchange_field_5d(g_spinor_field[1]); #endif memset(g_spinor_field[0], 0, 24*(VOLUME+RAND)*L5*sizeof(double)); //sprintf(filename, "%s.inverted.ascii.%.2d", source_filename, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field_5d(g_spinor_field[1], ofs); //fclose(ofs); Q_DW_Wilson_phi(g_spinor_field[0], g_spinor_field[1]); for(ix=0;ix<VOLUME*L5;ix++) { _fv_mi_eq_fv(g_spinor_field[0]+_GSI(ix), g_spinor_field[2]+_GSI(ix)); } spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5); spinor_scalar_product_re(&norm, g_spinor_field[0], g_spinor_field[0], VOLUME*L5); if(g_cart_id==0) fprintf(stdout, "\n# [invert_dw_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) ); } if(dummy_flag>0) { /*********************************************** * create 4-dim. propagator ***********************************************/ if(convert_sign == 0) { spinor_5d_to_4d(g_spinor_field[1], g_spinor_field[1]); } else if(convert_sign == -1 || convert_sign == +1) { spinor_5d_to_4d_sign(g_spinor_field[1], g_spinor_field[1], convert_sign); } /*********************************************** * write the solution ***********************************************/ sprintf(filename, "%s.inverted", source_filename_write); if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] writing propagator to file %s\n", filename); check_error(write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision), "write_propagator", NULL, 22); //sprintf(filename, "prop.ascii.4d.%.2d.%.2d.%.2d", isc, g_nproc, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field(g_spinor_field[1], ofs); //fclose(ofs); } if(check_residuum) memcpy(g_spinor_field[2], smearing_spinor_field[0], 24*VOLUME*L5*sizeof(double)); } // of omp single } // of omp parallel region if(dummy_flag > 0) strcpy(source_filename_write, source_filename); dummy_flag++; } // of loop on momenta } // of isc #if 0 // last inversion { memcpy(g_spinor_field[0], smearing_spinor_field[1], 24*VOLUME*L5*sizeof(double)); if(g_cart_id==0) fprintf(stdout, "# [] proc%.2d starting last inversion\n", g_cart_id); /*********************************************** * perform the inversion ***********************************************/ if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] starting inversion\n"); xchange_field_5d(g_spinor_field[0]); memset(g_spinor_field[1], 0, (VOLUME+RAND)*L5*24*sizeof(double)); ratime = CLOCK; #ifdef MPI if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER || inv_param.inv_type == QUDA_GCR_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling invertQuda\n"); invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); } else if(inv_param.inv_type == QUDA_CG_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling testCG\n"); testCG(g_spinor_field[1], g_spinor_field[0], &inv_param); } else { if(g_cart_id==0) fprintf(stderr, "# [invert_dw_quda] unrecognized inverter\n"); } #else invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); #endif retime = CLOCK; if(g_cart_id==0) { fprintf(stdout, "# [invert_dw_quda] QUDA time: %e seconds\n", inv_param.secs); fprintf(stdout, "# [invert_dw_quda] QUDA Gflops: %e\n", inv_param.gflops/inv_param.secs); fprintf(stdout, "# [invert_dw_quda] wall time: %e seconds\n", retime-ratime); fprintf(stdout, "# [invert_dw_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n", inv_param.spinorGiB, gauge_param.gaugeGiB); } omp_set_num_threads(g_num_threads); #pragma omp parallel private(threadid,_2_kappa,is,ix,iy,iix) shared(VOLUME,L5,g_kappa,g_spinor_field,g_num_threads) { threadid = omp_get_thread_num(); if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) { _2_kappa = 2. * g_kappa5d; for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa ); } } #pragma omp barrier // reorder, multiply with g2 for(is=0;is<L5;is++) { for(ix=threadid; ix<VOLUME; ix+=g_num_threads) { iy = lexic2eot_5d(is, ix); iix = is*VOLUME + ix; _fv_eq_fv(g_spinor_field[0]+_GSI(iix), g_spinor_field[1]+_GSI(iy)); }} #pragma omp barrier if(rotate_gamma_basis) { for(ix=threadid; ix<VOLUME*L5; ix+=g_num_threads) { _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix)); } } else { for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix)); } } } // end of parallel region if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] inversion done in %e seconds\n", retime-ratime); #ifdef MPI xchange_field_5d(g_spinor_field[1]); #endif /*********************************************** * check residuum ***********************************************/ if(check_residuum && dummy_flag>0) { // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg, // which uses the tmLQCD conventions (same as in contractions) // without explicit boundary conditions #ifdef MPI xchange_field_5d(g_spinor_field[2]); #endif memset(g_spinor_field[0], 0, 24*(VOLUME+RAND)*L5*sizeof(double)); //sprintf(filename, "%s.inverted.ascii.%.2d", source_filename, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field_5d(g_spinor_field[1], ofs); //fclose(ofs); Q_DW_Wilson_phi(g_spinor_field[0], g_spinor_field[1]); for(ix=0;ix<VOLUME*L5;ix++) { _fv_mi_eq_fv(g_spinor_field[0]+_GSI(ix), g_spinor_field[2]+_GSI(ix)); } spinor_scalar_product_re(&norm, g_spinor_field[0], g_spinor_field[0], VOLUME*L5); spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5); if(g_cart_id==0) fprintf(stdout, "\n# [invert_dw_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) ); } /*********************************************** * create 4-dim. propagator ***********************************************/ if(convert_sign == 0) { spinor_5d_to_4d(g_spinor_field[1], g_spinor_field[1]); } else if(convert_sign == -1 || convert_sign == +1) { spinor_5d_to_4d_sign(g_spinor_field[1], g_spinor_field[1], convert_sign); } /*********************************************** * write the solution ***********************************************/ sprintf(filename, "%s.inverted", source_filename_write); if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] writing propagator to file %s\n", filename); check_error(write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision), "write_propagator", NULL, 22); //sprintf(filename, "prop.ascii.4d.%.2d.%.2d.%.2d", isc, g_nproc, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field(g_spinor_field[1], ofs); //fclose(ofs); } // of last inversion #endif // of if 0 /*********************************************** * free the allocated memory, finalize ***********************************************/ #ifdef HAVE_QUDA // finalize the QUDA library if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] finalizing quda\n"); #ifdef MPI freeGaugeQuda(); #endif endQuda(); #endif if(g_gauge_field != NULL) free(g_gauge_field); if(gauge_field_smeared != NULL) free(gauge_field_smeared); if(no_fields>0) { if(g_spinor_field!=NULL) { for(i=0; i<no_fields; i++) if(g_spinor_field[i]!=NULL) free(g_spinor_field[i]); free(g_spinor_field); } } free_geometry(); if(g_source_momentum_set && full_orbit) { finalize_q_orbits(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_rep); if(qlatt_map != NULL) { free(qlatt_map[0]); free(qlatt_map); } } if(source_momentum != NULL) free(source_momentum); if(lck != NULL) free(lck); #ifdef MPI #ifdef HAVE_QUDA endCommsQuda(); #else MPI_Finalize(); #endif #endif if(g_cart_id==0) { g_the_time = time(NULL); fprintf(stdout, "\n# [invert_dw_quda] %s# [invert_dw_quda] end of run\n", ctime(&g_the_time)); fprintf(stderr, "\n# [invert_dw_quda] %s# [invert_dw_quda] end of run\n", ctime(&g_the_time)); } return(0); }
int q_alignOfTypeEncode(const char* type) { type = q_skipVarNameEncode(type); switch (*type) { case Q_C_ID: return __alignof(id); case Q_C_CLASS: return __alignof(Class); case Q_C_SEL: return __alignof(SEL); case Q_C_CHR: return __alignof(char); case Q_C_UCHR: return __alignof(unsigned char); case Q_C_SHT: return __alignof(short); case Q_C_USHT: return __alignof(unsigned short); case Q_C_INT: return __alignof(int); case Q_C_UINT: return __alignof(unsigned int); case Q_C_LNG: return __alignof(long); case Q_C_ULNG: return __alignof(unsigned long); case Q_C_FLT: return __alignof(float); case Q_C_DBL: return __alignof(double); case Q_C_VOID: return 0; case Q_C_PTR: case Q_C_CHARPTR: return __alignof(char*); case Q_C_ARY_B: while (isdigit(*++type)); // empty loop return q_alignOfTypeEncode(type); case Q_C_STRUCT_B: { struct _StructLayout layout; unsigned int align; q_layoutStructBeginEncode(type, &layout); while (q_layoutStructNextEncode(&layout)); // empty loop q_layoutStructEndEncode(&layout, nil, &align); return align; } case Q_C_UNION_B: { int maxaling = 0; while (*type != Q_C_UNION_E && *type++ != '='); // empty loop while (*type != Q_C_UNION_E) { type = q_skipVarNameEncode(type); maxaling = _MAX(maxaling, q_alignOfTypeEncode(type)); type = q_skipTypeSpecEncode(type); } return maxaling; } default: q_throwError(er1, type); } return 0; }