MapBeamLine::MapBeamLine(string filename, string filenameerr, int order, int nbthreads, int fmultipole, bool strpl) { Twiss t = Twiss(filename); Twiss terr = Twiss(filenameerr); if (strpl) { t.stripLine(); terr.stripLine(); } omp_set_num_threads(nbthreads); vector<vector<Polynom<double>>> v = separateComplexList(EQ(4, order)); Polynom<double> x = X<double>(order); Polynom<double> px = PX<double>(order); Polynom<double> y = Y<double>(order); Polynom<double> py = PY<double>(order); Polynom<double> d = D<double>(order); Polynom<double> s = S<double>(order); Polmap<double> R = generateDefaultMap(x, px, y, py, d, s); Polmap<double>* Res = new Polmap<double>[nbthreads]; for (int i = 0; i < nbthreads; i ++) Res[i] = R; int size = t.elems.size(); Polmap<double>* mp = new Polmap<double>[size]; #pragma omp parallel for shared(Res) schedule(dynamic, CHUNK_SIZE) for (int i = 0; i < size; i ++) mp[i] = mapForElement(t.elems[i], v, x, px, y, py, d, s, fmultipole); if (strpl) { #pragma omp parallel for shared(Res) schedule(static) for (int i = 0; i < size; i ++) { int index = omp_get_thread_num(); double dx = atof(terr.elems[i][DX].c_str()); double dy = atof(terr.elems[i][DY].c_str()); mp[i] = mp[i].eval("x", Polynom<double>(order, 1E-18, "x", 1) + dx); mp[i] = mp[i].eval("y", Polynom<double>(order, 1E-18, "y", 1) + dy); Res[index] = mp[i] * Res[index]; } } else { #pragma omp parallel for shared(Res) schedule(static) for (int i = 0; i < size; i ++) { int index = omp_get_thread_num(); double dx = atof(terr.elems[i][DX].c_str()); double dy = atof(terr.elems[i][DY].c_str()); mp[i] = mp[i].eval("x", Polynom<double>(order, 1E-18, "x", 1) + dx); mp[i] = mp[i].eval("y", Polynom<double>(order, 1E-18, "y", 1) + dy); if (mp[i].pols.size() != 0) Res[index] = mp[i] * Res[index]; } } R = Res[0]; for (int i = 1; i < nbthreads; i ++) R = Res[i] * R; polmap = R.getMap(); for (unordered_map<string, Polynom<double>>:: iterator it = R.pols.begin(); it != R.pols.end(); it ++) pols[it->first] = it->second; delete [] Res; delete [] mp; }
MapBeamLine::MapBeamLine(string filename, int order, int nbthreads, int fmultipole, bool strpl) { Twiss t = Twiss(filename); if (strpl) t.stripLine(); omp_set_num_threads(nbthreads); vector<vector<Polynom<double>>> v = separateComplexList(EQ(4, order)); Polynom<double> x = X<double>(order); Polynom<double> px = PX<double>(order); Polynom<double> y = Y<double>(order); Polynom<double> py = PY<double>(order); Polynom<double> d = D<double>(order); Polynom<double> s = S<double>(order); Polmap<double> R = generateDefaultMap( x, px, y, py, d, s); Polmap<double>* Res = new Polmap<double>[nbthreads]; for (int i = 0; i < nbthreads; i ++) Res[i] = R; int size = t.elems.size(); if (strpl) { #pragma omp parallel for shared(Res) schedule(static) for (int i = 0; i < size; i ++) { int index = omp_get_thread_num(); Polmap<double> mp = mapForElement(t.elems[i], v, x, px, y, py, d, s, fmultipole); Res[index] = mp * Res[index]; } } else { #pragma omp parallel for shared(Res) schedule(static) for (int i = 0; i < size; i ++) { int index = omp_get_thread_num(); Polmap<double> mp = mapForElement(t.elems[i], v, x, px, y, py, d, s, fmultipole); if (mp.pols.size() != 0) Res[index] = mp * Res[index]; } } R = Res[0]; for (int i = 1; i < nbthreads; i ++) R = Res[i] * R; polmap = R.getMap(); for (unordered_map<string, Polynom<double>>:: iterator it = R.pols.begin(); it != R.pols.end(); it ++) pols[it->first] = it->second; delete [] Res; }
MapBeamLine::MapBeamLine(Twiss t, int order, int nbthreads, int fmultipole, bool strpl) { omp_set_num_threads(nbthreads); if (strpl) t.stripLine(); vector<vector<Polynom<double>>> v = separateComplexList(EQ(4, order)); Polynom<double> x = X<double>(order); Polynom<double> px = PX<double>(order); Polynom<double> y = Y<double>(order); Polynom<double> py = PY<double>(order); Polynom<double> d = D<double>(order); Polynom<double> s = S<double>(order); Polmap<double> R = generateDefaultMap(x, px, y, py, d, s); Polmap<double>* Res = new Polmap<double>[nbthreads]; for (int i = 0; i < nbthreads; i ++) Res[i] = R; int size = t.elems.size(); Polmap<double>* mp = new Polmap<double>[size]; #pragma omp parallel for shared(Res) schedule(dynamic, 10) for (int i = 0; i < size; i ++) { mp[i] = mapForElement(t.elems[i], v, x, px, y, py, d, s, fmultipole); } if (strpl) { double start = omp_get_wtime(); #pragma omp parallel for shared(Res) schedule(static) for (int i = 0; i < size; i ++) { int index = omp_get_thread_num(); Res[index] = mp[i] * Res[index]; } double end = omp_get_wtime(); cout << 1000 * (end - start) << endl; } else { #pragma omp parallel for shared(Res) schedule(static) for (int i = 0; i < size; i ++) { int index = omp_get_thread_num(); Polmap<double> mp = mapForElement(t.elems[i], v, x, px, y, py, d, s, fmultipole); if (mp.pols.size() != 0) Res[index] = mp * Res[index]; } } double start = omp_get_wtime(); R = Res[0]; for (int i = 1; i < nbthreads; i ++) R = Res[i].parallel_composition(R); double end = omp_get_wtime(); cout << 1000 *(end - start) << endl; polmap = R.getMap(); for (unordered_map<string, Polynom<double>>:: iterator it = R.pols.begin(); it != R.pols.end(); it ++) pols[it->first] = it->second; delete [] Res; delete [] mp; }
MapBeamLine::MapBeamLine(Twiss t, Twiss terr, int order, int nbthreads, int fmultipole, bool strpl) { omp_set_num_threads(nbthreads); cudaError_t error; int devID = 0; error = cudaGetDevice(&devID); if (strpl) { t.stripLine(); terr.stripLine(); } vector<vector<Polynom<double>>> v = separateComplexList(EQ(4, order)); Polynom<double> x = X<double>(order); Polynom<double> px = PX<double>(order); Polynom<double> y = Y<double>(order); Polynom<double> py = PY<double>(order); Polynom<double> d = D<double>(order); Polynom<double> s = S<double>(order); Polmap<double> R = generateDefaultMap(x, px, y, py, d, s); //alloc memory for arrays which will be transfered to/from the GPU int nvars = 6; int* exp_A = allocExponentsMemory(SIZE, nvars); int* exp_B = allocExponentsMemory(SIZE, nvars); int* exp_C = allocExponentsMemory(SIZE * SIZE, nvars); double* coeff_A = allocCoefficientsMemory(SIZE); double* coeff_B = allocCoefficientsMemory(SIZE); double* coeff_C = allocCoefficientsMemory(SIZE * SIZE); Polmap<double>* Res = new Polmap<double>[nbthreads]; for (int i = 0; i < nbthreads; i ++) Res[i] = R; int size = t.elems.size(); Polmap<double>* mp = new Polmap<double>[size]; #pragma omp parallel for shared(Res) schedule(dynamic, CHUNK_SIZE) for (int i = 0; i < size; i ++) mp[i] = mapForElement(t.elems[i], v, x, px, y, py, d, s, fmultipole); if (strpl) { #pragma omp parallel for shared(Res) schedule(static) for (int i = 0; i < size; i ++) { int index = omp_get_thread_num(); double dx = atof(terr.elems[i][DX].c_str()); double dy = atof(terr.elems[i][DY].c_str()); mp[i] = mp[i].eval("x", Polynom<double>(order, 1E-18, "x", 1) + dx); mp[i] = mp[i].eval("y", Polynom<double>(order, 1E-18, "y", 1) + dy); Res[index] = mp[i] * Res[index]; } } else { #pragma omp parallel for shared(Res) schedule(static) for (int i = 0; i < size; i ++) { int index = omp_get_thread_num(); double dx = atof(terr.elems[i][DX].c_str()); double dy = atof(terr.elems[i][DY].c_str()); mp[i] = mp[i].eval("x", Polynom<double>(order, 1E-18, "x", 1) + dx); mp[i] = mp[i].eval("y", Polynom<double>(order, 1E-18, "y", 1) + dy); if (mp[i].pols.size() != 0) Res[index] = mp[i] * Res[index]; } } R = Res[0]; for (int i = 1; i < nbthreads; i ++) R = compose(Res[i], R, exp_C, exp_A, exp_B, coeff_C, coeff_A, coeff_B); polmap = R.getMap(); for (unordered_map<string, Polynom<double>>:: iterator it = R.pols.begin(); it != R.pols.end(); it ++) pols[it->first] = it->second; delete [] Res; delete [] mp; freeMemory(exp_A, coeff_A); freeMemory(exp_B, coeff_B); freeMemory(exp_C, coeff_C); cudaDeviceReset(); }
MapBeamLine::MapBeamLine(Twiss t, int order, int nbthreads, int fmultipole, bool strpl) { omp_set_num_threads(nbthreads); if (strpl) t.stripLine(); vector<vector<Polynom<double>>> v = separateComplexList(EQ(4, order)); Polynom<double> x = X<double>(order); Polynom<double> px = PX<double>(order); Polynom<double> y = Y<double>(order); Polynom<double> py = PY<double>(order); Polynom<double> d = D<double>(order); Polynom<double> s = S<double>(order); Polmap<double> R = generateDefaultMap(x, px, y, py, d, s); Polmap<double>* Res = new Polmap<double>[nbthreads]; for (int i = 0; i < nbthreads; i ++) Res[i] = R; int size = t.elems.size(); Polmap<double>* mp = new Polmap<double>[size]; #pragma omp parallel for shared(Res) schedule(dynamic, 10) for (int i = 0; i < size; i ++) mp[i] = mapForElement(t.elems[i], v, x, px, y, py, d, s, fmultipole); if (strpl) { double start = omp_get_wtime(); omp_set_nested(true); #pragma omp parallel for shared(Res) schedule(static) for (int i = 0; i < size; i ++) { int index = omp_get_thread_num(); if (t.elems[i]["KEYWORD"].compare(sextupole) == 0 || t.elems[i]["KEYWORD"].compare(octupole) == 0 ) Res[index] = mp[i].parallel_composition(Res[index]); else Res[index] = mp[i] * Res[index]; } double end = omp_get_wtime(); cout << 1000 * (end - start) << endl; } else { #pragma omp parallel for shared(Res) schedule(static) for (int i = 0; i < size; i ++) { int index = omp_get_thread_num(); Polmap<double> mp = mapForElement(t.elems[i], v, x, px, y, py, d, s, fmultipole); if (mp.pols.size() != 0) Res[index] = mp * Res[index]; } } double start = omp_get_wtime(); if (nbthreads >= 8) { Polmap<double>* Res2 = new Polmap<double>[2]; Res2[0] = R; Res2[1] = R; //double start2 = omp_get_wtime(); omp_set_nested(true); #pragma omp parallel for shared(Res) schedule(static) num_threads(2) for (int i = 0; i < nbthreads; i ++) { //cout << (i - 1) << endl; //for (unordered_map<string, Polynom<double>>:: iterator it = Res[i - 1].pols.begin(); it != Res[i - 1].pols.end(); it ++) // cout << it->first << " " << (it->second).terms.size() << endl; int index = omp_get_thread_num(); Res2[index] = Res[i].parallel_composition(Res2[index]); } //double end2 = omp_get_wtime(); //cout << 1000 * (end2 - start2) << endl; //double start3 = omp_get_wtime(); R = Res2[1].parallel_composition(Res2[0]); //double end3 = omp_get_wtime(); //cout << 1000 * (end3 - start3) << endl; } else { R = Res[0]; for (int i = 1; i < nbthreads; i ++) { R = Res[i] * R; } } double end = omp_get_wtime(); cout << 1000 *(end - start) << endl; polmap = R.getMap(); for (unordered_map<string, Polynom<double>>:: iterator it = R.pols.begin(); it != R.pols.end(); it ++) pols[it->first] = it->second; delete [] Res; delete [] mp; }