int main(int argc, char **argv) { int i, n=200, chunk, a[n], suma=0; omp_sched_t schedule_type; int chunk_value; if(argc < 3) { fprintf(stderr,"\nFalta iteraciones o chunk \n"); exit(-1); } n = atoi(argv[1]); if (n>200) n=200; chunk = atoi(argv[2]); for (i=0; i<n; i++) a[i] = i; #pragma omp parallel for firstprivate(suma) lastprivate(suma) \ schedule(dynamic,chunk) for (i=0; i<n; i++) { suma = suma + a[i]; printf(" thread %d suma a[%d]=%d suma=%d \n", omp_get_thread_num(),i,a[i],suma); if(omp_get_thread_num() == 0) { printf(" Dentro de 'parallel for':\n"); printf(" static = 1, dynamic = 2, guided = 3, auto = 4\n"); omp_get_schedule(&schedule_type, &chunk_value); printf(" dyn-var: %d, nthreads-var:%d, thread-limit-var:%d,run-sched-var: %d, chunk: %d\n", \ omp_get_dynamic(), \ omp_get_max_threads(), omp_get_thread_limit(), \ schedule_type, chunk_value); printf(" get_num_threads: %d,get_num_procs: %d,in_parallel():%d \n", \ omp_get_num_threads(),omp_get_num_procs(),omp_in_parallel()); } } printf("Fuera de 'parallel for' suma=%d\n",suma); printf(" static = 1, dynamic = 2, guided = 3, auto = 4\n"); omp_get_schedule(&schedule_type, &chunk_value); printf(" dyn-var: %d, nthreads-var:%d, thread-limit-var:%d,run-sched-var: %d, chunk: %d\n" \ , omp_get_dynamic(), \ omp_get_max_threads(), omp_get_thread_limit(), \ schedule_type, chunk_value); printf(" get_num_threads: %d,get_num_procs: %d,in_parallel():%d \n", \ omp_get_num_threads(),omp_get_num_procs(),omp_in_parallel()); }
int main() { int n = 9; int i, a, b[n]; #ifdef _OPENMP (void) omp_set_dynamic(FALSE); if (omp_get_dynamic()) {printf("Warning: dynamic adjustment of threads has been set\n");} (void) omp_set_num_threads(4); #endif for (i=0; i<n; i++) b[i] = -1; #pragma omp parallel shared(a,b) private(i) { #pragma omp single { a = 10; printf("Single construct executed by thread %d\n", omp_get_thread_num()); } #pragma omp for for (i=0; i<n; i++) b[i] = a; } /*-- End of parallel region --*/ printf("After the parallel region:\n"); for (i=0; i<n; i++) printf("b[%d] = %d\n",i,b[i]); return(0); }
int main() { #ifdef _OPENMP (void) omp_set_dynamic(FALSE); if (omp_get_dynamic()) {printf("Warning: dynamic adjustment of threads has been set\n");} (void) omp_set_num_threads(3); (void) omp_set_nested(TRUE); if (! omp_get_nested()) {printf("Warning: nested parallelism not set\n");} #endif printf("Nested parallelism is %s\n", omp_get_nested() ? "supported" : "not supported"); /* ------------------------------------------------------------------------ Inside the parallel region we can no longer distinguish between the threads ------------------------------------------------------------------------ */ #pragma omp parallel { printf("Thread %d executes the outer parallel region\n", omp_get_thread_num()); #pragma omp parallel num_threads(2) { printf(" Thread %d executes the inner parallel region\n", omp_get_thread_num()); } /*-- End of inner parallel region --*/ } /*-- End of outer parallel region --*/ return(0); }
int main () { int a, b, c, i, n; int a_check, c_check; #ifdef _OPENMP (void) omp_set_dynamic(FALSE); if (omp_get_dynamic()) {printf("Warning: dynamic adjustment of threads has been set\n");} (void) omp_set_num_threads(4); #endif b = 50; n = 1858; a_check = b + n-1; c_check = a_check + b; printf("Before parallel loop: b = %d n = %d\n",b,n); #pragma omp parallel for private(i), firstprivate(b), \ lastprivate(a) for (i=0; i<n; i++) { a = b+i; } /*-- End of parallel for --*/ c = a + b; printf("Values of a and c after parallel for:\n"); printf("\ta = %d\t(correct value is %d)\n",a,a_check); printf("\tc = %d\t(correct value is %d)\n",c,c_check); return(0); }
int get_omp_dynamic() { #ifdef _OPENMP return omp_get_dynamic(); #else return 0; #endif }
void setup_threading(const int threads, const boost::optional<int> eigen_threads, const bool dynamic) { #ifdef _OPENMP Eigen::initParallel(); if (threads != 0) { omp_set_num_threads(threads); } if (eigen_threads) { Eigen::setNbThreads(*eigen_threads); } omp_set_dynamic(dynamic); LOGD << "OpenMP initialized with dynamic teams set to " << omp_get_dynamic(); LOGD << "Eigen is using " << Eigen::nbThreads() << " threads"; #endif }
int main (int argc, char *argv[]) { int nthreads, tid, procs, maxt, inpar, dynamic, nested; char name[50]; /* Start parallel region */ #pragma omp parallel private(nthreads, tid) { /* Obtain thread number */ tid = omp_get_thread_num(); /* Only master thread does this We could also use #pragma omp master */ if (tid == 0) { printf("Thread %d getting environment info...\n", tid); /* Get host name */ gethostname(name, 50); /* Get environment information */ procs = omp_get_num_procs(); nthreads = omp_get_num_threads(); maxt = omp_get_max_threads(); inpar = omp_in_parallel(); dynamic = omp_get_dynamic(); nested = omp_get_nested(); /* Print environment information */ printf("Hostname = %s\n", name); printf("Number of processors = %d\n", procs); printf("Number of threads = %d\n", nthreads); printf("Max threads = %d\n", maxt); printf("In parallel? = %d\n", inpar); printf("Dynamic threads enabled? = %d\n", dynamic); printf("Nested parallelism supported? = %d\n", nested); } } /* Done */ exit(0); }
int main() { int i, n = 9; #ifdef _OPENMP (void) omp_set_dynamic(FALSE); if (omp_get_dynamic()) {printf("Warning: dynamic adjustment of threads has been set\n");} (void) omp_set_num_threads(4); #endif #pragma omp parallel default(none) shared(n) private(i) { #pragma omp for for (i=0; i<n; i++) printf("Thread %d executes loop iteration %d\n", omp_get_thread_num(),i); } /*-- End of parallel region --*/ return(0); }
int main() { int i, n = 9; int a[n], b[n]; #ifdef _OPENMP (void) omp_set_dynamic(FALSE); if (omp_get_dynamic()) {printf("Warning: dynamic adjustment of threads has been set\n");} (void) omp_set_num_threads(4); #endif #pragma omp parallel default(none) shared(n,a,b) private(i) { #pragma omp single printf("First for-loop: number of threads is %d\n", omp_get_num_threads()); #pragma omp for schedule(runtime) for (i=0; i<n; i++) { printf("Thread %d executes loop iteration %d\n", omp_get_thread_num(),i); a[i] = i; } #pragma omp single printf("Second for-loop: number of threads is %d\n", omp_get_num_threads()); #pragma omp for schedule(runtime) for (i=0; i<n; i++) { printf("Thread %d executes loop iteration %d\n", omp_get_thread_num(),i); b[i] = 2 * a[i]; } } /*-- End of parallel region --*/ return(0); }
int main (void) { double d, e; int l; omp_lock_t lck; omp_nest_lock_t nlck; d = omp_get_wtime (); omp_init_lock (&lck); omp_set_lock (&lck); if (omp_test_lock (&lck)) abort (); omp_unset_lock (&lck); if (! omp_test_lock (&lck)) abort (); if (omp_test_lock (&lck)) abort (); omp_unset_lock (&lck); omp_destroy_lock (&lck); omp_init_nest_lock (&nlck); if (omp_test_nest_lock (&nlck) != 1) abort (); omp_set_nest_lock (&nlck); if (omp_test_nest_lock (&nlck) != 3) abort (); omp_unset_nest_lock (&nlck); omp_unset_nest_lock (&nlck); if (omp_test_nest_lock (&nlck) != 2) abort (); omp_unset_nest_lock (&nlck); omp_unset_nest_lock (&nlck); omp_destroy_nest_lock (&nlck); omp_set_dynamic (1); if (! omp_get_dynamic ()) abort (); omp_set_dynamic (0); if (omp_get_dynamic ()) abort (); omp_set_nested (1); if (! omp_get_nested ()) abort (); omp_set_nested (0); if (omp_get_nested ()) abort (); omp_set_num_threads (5); if (omp_get_num_threads () != 1) abort (); if (omp_get_max_threads () != 5) abort (); if (omp_get_thread_num () != 0) abort (); omp_set_num_threads (3); if (omp_get_num_threads () != 1) abort (); if (omp_get_max_threads () != 3) abort (); if (omp_get_thread_num () != 0) abort (); l = 0; #pragma omp parallel reduction (|:l) { l = omp_get_num_threads () != 3; l |= omp_get_thread_num () < 0; l |= omp_get_thread_num () >= 3; #pragma omp master l |= omp_get_thread_num () != 0; } if (l) abort (); if (omp_get_num_procs () <= 0) abort (); if (omp_in_parallel ()) abort (); #pragma omp parallel reduction (|:l) l = ! omp_in_parallel (); #pragma omp parallel reduction (|:l) if (1) l = ! omp_in_parallel (); if (l) abort (); e = omp_get_wtime (); if (d > e) abort (); d = omp_get_wtick (); /* Negative precision is definitely wrong, bigger than 1s clock resolution is also strange. */ if (d <= 0 || d > 1) abort (); return 0; }
int32_t omp_get_dynamic_ (void) { return omp_get_dynamic (); }
main(int argc, char **argv) { int i, n=200,chunk,a[n],suma=0; if(argc < 3) { fprintf(stderr,"\nFalta iteraciones o chunk \n"); exit(-1); } n = atoi(argv[1]); if (n>200) n=200; chunk = atoi(argv[2]); for (i=0; i<n; i++) a[i] = i; #pragma omp parallel for firstprivate(suma) lastprivate(suma) schedule(dynamic,chunk) num_threads(4) for (i=0; i<n; i++) { if( i == 0) printf("Dentro del parallel; dyn-var = %d, nthreads-var = %d, thread-limit-var = %d\n",omp_get_dynamic(), omp_get_max_threads(),omp_get_thread_limit() ); suma = suma + a[i]; printf(" thread %d suma a[%d]=%d suma=%d \n",omp_get_thread_num(),i,a[i],suma); } printf("Fuera de 'parallel for' suma=%d\n",suma); printf("Fuera del parallel; dyn-var = %d, nthreads-var = %d, thread-limit-var = %d\n",omp_get_dynamic(), omp_get_max_threads(),omp_get_thread_limit() ); }
// call only once in main void InitSysVar() { // for very sensitive compilers (which need a SizeT for dimension()) const SizeT one=1; // !NULL NullGDL* nullInstance = NullGDL::GetSingleInstance(); DVar *nullVar = new DVar( "NULL", nullInstance); nullIx=sysVarList.size(); sysVarList.push_back(nullVar); // !TRUE DByteGDL* trueData = new DByteGDL(1); DVar *true_logical = new DVar( "TRUE", trueData ); trueIx=sysVarList.size(); sysVarList.push_back(true_logical); // !FALSE DByteGDL* falseData = new DByteGDL(0); DVar *false_logical = new DVar( "FALSE", falseData ); falseIx=sysVarList.size(); sysVarList.push_back(false_logical); // !PATH // DString initPath(""); // set here the initial path DStringGDL* pathData=new DStringGDL( ""); DVar *path=new DVar( "PATH", pathData); pathIx=sysVarList.size(); sysVarList.push_back(path); // !PROMPT DStringGDL* promptData=new DStringGDL( "GDL> "); DVar *prompt=new DVar( "PROMPT", promptData); promptIx=sysVarList.size(); sysVarList.push_back(prompt); // !EDIT_INPUT DIntGDL* edit_inputData=new DIntGDL( 1); DVar *edit_input=new DVar( "EDIT_INPUT", edit_inputData); edit_inputIx=sysVarList.size(); sysVarList.push_back(edit_input); // !QUIET DLongGDL* quietData=new DLongGDL( 0); DVar *quiet=new DVar( "QUIET", quietData); quietIx=sysVarList.size(); sysVarList.push_back(quiet); // !C DLongGDL* cData=new DLongGDL( 0); DVar *c=new DVar( "C", cData); cIx=sysVarList.size(); sysVarList.push_back(c); // !D defined in Graphics DVar *d=new DVar( "D", NULL); dIx=sysVarList.size(); sysVarList.push_back(d); sysVarRdOnlyList.push_back( d); // make it read only // plotting // !P SizeT clipDim = 6; // DLong p_clipInit[] = { 0, 0, 1024, 1024, 0, 1000}; DLong p_clipInit[] = { 0, 0, 639, 511, 0, 0}; DLongGDL* p_clip = new DLongGDL( dimension( &clipDim, one)); for( UInt i=0; i<clipDim; i++) (*p_clip)[ i] = p_clipInit[ i]; SizeT multiDim = 5; SizeT positionDim = 4; SizeT regionDim = 4; SizeT tDim[] = { 4, 4}; DStructGDL* plt = new DStructGDL( "!PLT"); plt->NewTag("BACKGROUND", new DLongGDL( 0)); plt->NewTag("CHARSIZE", new DFloatGDL( 0.0)); plt->NewTag("CHARTHICK", new DFloatGDL( 0.0)); plt->NewTag("CLIP", p_clip); plt->NewTag("COLOR", new DLongGDL( 255)); plt->NewTag("FONT", new DLongGDL( -1)); plt->NewTag("LINESTYLE", new DLongGDL( 0)); plt->NewTag("MULTI", new DLongGDL( dimension( &multiDim, one))); plt->NewTag("NOCLIP", new DLongGDL( 0)); plt->NewTag("NOERASE", new DLongGDL( 0)); plt->NewTag("NSUM", new DLongGDL( 0)); plt->NewTag("POSITION", new DFloatGDL( dimension( &positionDim, one))); plt->NewTag("PSYM", new DLongGDL( 0)); plt->NewTag("REGION", new DFloatGDL( dimension( ®ionDim, one))); plt->NewTag("SUBTITLE", new DStringGDL( "")); plt->NewTag("SYMSIZE", new DFloatGDL( 0.0)); { DDoubleGDL* tmp = new DDoubleGDL( dimension( tDim, 2)); (*tmp)[0] = (*tmp)[5] = (*tmp)[10] = (*tmp)[15] = 1; plt->NewTag("T", tmp); } plt->NewTag("T3D", new DLongGDL( 0)); plt->NewTag("THICK", new DFloatGDL( 0.0)); plt->NewTag("TITLE", new DStringGDL( "")); plt->NewTag("TICKLEN", new DFloatGDL( 0.02)); plt->NewTag("CHANNEL", new DLongGDL( 0)); DVar *p=new DVar( "P", plt); pIx=sysVarList.size(); sysVarList.push_back(p); // some constants // !ORDER DLongGDL *orderData = new DLongGDL( 0 ); DVar *order = new DVar( "ORDER", orderData); orderIx = sysVarList.size(); sysVarList.push_back( order); // !GDL_WARNING (to be used in VOIGT() and BeselIJKY() to warm on // different behaviour between IDL and GDL DLongGDL *gdlWarningData = new DLongGDL( 1 ); DVar *gdlWarning = new DVar( "GDL_WARNING", gdlWarningData); gdlWarningIx = sysVarList.size(); sysVarList.push_back( gdlWarning); // !GDL (to allow distinguish IDL/GDL with DEFSYSV, '!gdl', exists=exists ) DStructGDL* gdlStruct = new DStructGDL( "!GNUDATALANGUAGE"); gdlStruct->NewTag("RELEASE", new DStringGDL( VERSION)); // creating an explicit build date in !GDL (also exist in !version) gdlStruct->NewTag("BUILD_DATE", new DStringGDL(BUILD_DATE)); // creating and Epoch entry in order to have a simple incremental number int CompilationMonth =0, CompilationYear=0, CompilationDay=0; string MyDate= BUILD_DATE; string SCompilationYear; SCompilationYear=MyDate.substr(7,4); CompilationYear=atoi(SCompilationYear.c_str()); string SCompilationDay; SCompilationDay=MyDate.substr(4,2); CompilationDay=atoi(SCompilationDay.c_str()); // for the months, it is more difficult if (MyDate.find("Jan")!=string::npos) CompilationMonth=1; if (MyDate.find("Feb")!=string::npos) CompilationMonth=2; if (MyDate.find("Mar")!=string::npos) CompilationMonth=3; if (MyDate.find("Apr")!=string::npos) CompilationMonth=4; if (MyDate.find("May")!=string::npos) CompilationMonth=5; if (MyDate.find("Jun")!=string::npos) CompilationMonth=6; if (MyDate.find("Jul")!=string::npos) CompilationMonth=7; if (MyDate.find("Aug")!=string::npos) CompilationMonth=8; if (MyDate.find("Sep")!=string::npos) CompilationMonth=9; if (MyDate.find("Oct")!=string::npos) CompilationMonth=10; if (MyDate.find("Nov")!=string::npos) CompilationMonth=11; if (MyDate.find("Dec")!=string::npos) CompilationMonth=12; //cout << SCompilationYear << " "<< CompilationMonth <<endl; //cout << CompilationYear<< endl; struct tm t; time_t t_of_day; t.tm_year = CompilationYear -1900; t.tm_mon = CompilationMonth-1; // Month, 0 - jan t.tm_mday = CompilationDay; // Day of the month t.tm_hour = 0; t.tm_min = 0; t.tm_sec = 0; t.tm_isdst = -1; // Is DST on? 1 = yes, 0 = no, -1 = unknown t_of_day = mktime(&t); // printing Epoch on the Command Line $ date +"%s" // printf("seconds since the Epoch: %ld\n", (long) t_of_day); gdlStruct->NewTag("EPOCH", new DLongGDL((long) t_of_day)); gdlStruct->NewTag("GDL_NO_DSFMT", new DByteGDL(0)); gdlStruct->NewTag("GDL_USE_WX", new DByteGDL(0)); gdlStruct->NewTag("MAP_QUALITY", new DStringGDL("CRUDE")); DVar *gdl = new DVar( "GDL", gdlStruct); gdlIx=sysVarList.size(); sysVarList.push_back(gdl); sysVarRdOnlyList.push_back( gdl); // make it read only // !DPI DDoubleGDL *dpiData = new DDoubleGDL( (double)(4*atan(1.0)) ); DVar *dpi = new DVar( "DPI", dpiData); sysVarList.push_back( dpi); sysVarRdOnlyList.push_back( dpi); // make it read only // !PI DFloatGDL *piData = new DFloatGDL( (float)(4*atan(1.0)) ); DVar *pi = new DVar( "PI", piData); sysVarList.push_back( pi); sysVarRdOnlyList.push_back( pi); // make it read only // !DTOR DFloatGDL *dtorData = new DFloatGDL((*piData)[0] / 180.);// 0.0174533); DVar *dtor = new DVar( "DTOR", dtorData); sysVarList.push_back( dtor); sysVarRdOnlyList.push_back( dtor); // make it read only // !RADEG DFloatGDL *radegData = new DFloatGDL(180. / (*piData)[0]);// 57.2957764); DVar *radeg = new DVar( "RADEG", radegData); sysVarList.push_back( radeg); sysVarRdOnlyList.push_back( radeg); // make it read only // !CONST // source : http://physics.nist.gov/cgi-bin/cuu/Results?category=abbr_in DStructGDL *constantList = new DStructGDL( "!CONST"); // Fine structure constant constantList ->NewTag("ALPHA", new DDoubleGDL(7.2973525698e-3)); // Astronomical Unit [m] constantList ->NewTag("AU", new DDoubleGDL(1.49597870700e11)); // Speed of Light in Vacuum [m/s] constantList ->NewTag("C", new DDoubleGDL(299792458.)); // Degrees to radians constantList ->NewTag("DTOR", new DDoubleGDL((*dpiData)[0] / 180.)); // Elementary Charge [Coulon] constantList ->NewTag("E", new DDoubleGDL(1.602176565e-19)); // Electric Vacuum Permittivity [F/m] constantList ->NewTag("EPS0", new DDoubleGDL(8.854187817e-12)); // Euler's number constantList ->NewTag("EULER", new DDoubleGDL(2.7182818284590452)); // Faraday constant NAe [C/mol] constantList ->NewTag("F", new DDoubleGDL(96485.3365)); // Gravitation constant [m^3/kg/s^2] constantList ->NewTag("G", new DDoubleGDL(6.67384e-11)); // Earth standard gravity [m/s^2] constantList ->NewTag("GN", new DDoubleGDL(9.80665)); // Planck constant [Js] constantList ->NewTag("H", new DDoubleGDL(6.62606957e-34)); // h_bar (h/!pi) [Js] constantList ->NewTag("HBAR", new DDoubleGDL(1.054571726e-34)); //Imaginary number complex<double> imaginary(0., 1.); constantList ->NewTag("I", new DComplexDblGDL(imaginary)); // Boltzmann constant (R/NA) [J/K] constantList ->NewTag("K", new DDoubleGDL(1.3806488e-23 )); // Light-Year distance [m] constantList ->NewTag("LY", new DDoubleGDL(9.4607304725808e15)); // Mass of the Earth [kg] constantList ->NewTag("M_EARTH", new DDoubleGDL(5.972186390e24)); // Mass of the Sun [kg] constantList ->NewTag("M_SUN", new DDoubleGDL(1.98841586057e30)); // electron mass [kg] constantList ->NewTag("ME", new DDoubleGDL(9.10938291e-31)); // neutron mass [kg] constantList ->NewTag("MN", new DDoubleGDL(1.674927351e-27)); // proton mass [kg] constantList ->NewTag("MP", new DDoubleGDL(1.672621777e-27)); // magnetic vacuum permeability [N/A^2] constantList ->NewTag("MU0", new DDoubleGDL(12.566370614e-7)); // Loschmidt's number NAp0/(RT0) [m-3] constantList ->NewTag("N0", new DDoubleGDL(2.6867805e25)); // Avogadro constant NA [mol-1] constantList ->NewTag("NA", new DDoubleGDL(6.02214129e23)); // Standard atmosphere Pression [Pa] constantList ->NewTag("P0", new DDoubleGDL(101325.)); // Parsec distance [m] constantList ->NewTag("PARSEC", new DDoubleGDL(3.0856775814671912e16)); //golden ratio ((1+sqrt(5))/2) constantList ->NewTag("PHI", new DDoubleGDL(1.6180339887498948)); // Pi constantList ->NewTag("PI", new DDoubleGDL((*dpiData)[0])); // molar gas constant [J/mol/K] constantList ->NewTag("R", new DDoubleGDL(8.3144621)); // Earth radius (assuming spherical) [m] constantList ->NewTag("R_EARTH", new DDoubleGDL(6378136.6)); // Radians to degrees constantList ->NewTag("RTOD", new DDoubleGDL(180./(*dpiData)[0])); // classical electron radius [m] constantList ->NewTag("RE", new DDoubleGDL(2.8179403267e-15)); // Rydberg constant R∞ [1/m] constantList ->NewTag("RYDBERG", new DDoubleGDL(10973731.568539)); // Stefan-Boltzmann constant [W/m^2/K^4] constantList ->NewTag("SIGMA", new DDoubleGDL(5.670373e-8)); // Standard temperature [K] constantList ->NewTag("T0", new DDoubleGDL(273.15)); // unified atomic mass unit [kg] constantList ->NewTag("U", new DDoubleGDL(1.660538921e-27)); // Molar volume, ideal gas at Standard temperature and Pression (STP) [m^3/mol] constantList ->NewTag("VM", new DDoubleGDL(22.413968e-3)); DVar *constant = new DVar("CONST",constantList); sysVarList.push_back(constant); sysVarRdOnlyList.push_back(constant); // make it read only // ![XYZ] SizeT dim2 = 2; SizeT dim60 = 60; SizeT dim10 = 10; DStructGDL* xAxis = new DStructGDL( "!AXIS"); xAxis->NewTag("TITLE", new DStringGDL( "")); xAxis->NewTag("TYPE", new DLongGDL( 0)); xAxis->NewTag("STYLE", new DLongGDL( 0)); xAxis->NewTag("TICKS", new DLongGDL( 0)); xAxis->NewTag("TICKLEN", new DFloatGDL( 0.0)); xAxis->NewTag("THICK", new DFloatGDL( 0.0)); xAxis->NewTag("RANGE", new DDoubleGDL( dimension( &dim2,one))); xAxis->NewTag("CRANGE", new DDoubleGDL( dimension( &dim2,one))); xAxis->NewTag("S", new DDoubleGDL( dimension( &dim2,one))); xAxis->NewTag("MARGIN", new DFloatGDL( dimension( &dim2,one))); xAxis->NewTag("OMARGIN", new DFloatGDL( dimension( &dim2,one))); xAxis->NewTag("WINDOW", new DFloatGDL( dimension( &dim2,one))); xAxis->NewTag("REGION", new DFloatGDL( dimension( &dim2,one))); xAxis->NewTag("CHARSIZE", new DFloatGDL( 0.0)); xAxis->NewTag("MINOR", new DLongGDL( 0)); xAxis->NewTag("TICKV", new DDoubleGDL( dimension( &dim60,one))); xAxis->NewTag("TICKNAME", new DStringGDL( dimension( &dim60,one))); xAxis->NewTag("GRIDSTYLE", new DLongGDL( 0)); xAxis->NewTag("TICKFORMAT", new DStringGDL( dimension( &dim10,one))); xAxis->NewTag("TICKINTERVAL", new DDoubleGDL( 0)); xAxis->NewTag("TICKLAYOUT", new DLongGDL( 0)); xAxis->NewTag("TICKUNITS", new DStringGDL( dimension( &dim10,one))); (*static_cast<DDoubleGDL*>( xAxis->GetTag( 8, 0)))[1] = 1.0; (*static_cast<DFloatGDL*>( xAxis->GetTag( 9, 0)))[0] = 10.0; (*static_cast<DFloatGDL*>( xAxis->GetTag( 9, 0)))[1] = 3.0; DVar *x = new DVar( "X", xAxis); xIx = sysVarList.size(); sysVarList.push_back(x); DStructGDL* yAxis = new DStructGDL( "!AXIS"); (*static_cast<DDoubleGDL*>( yAxis->GetTag( 8, 0)))[1] = 1.0; (*static_cast<DFloatGDL*>( yAxis->GetTag( 9, 0)))[0] = 4.0; (*static_cast<DFloatGDL*>( yAxis->GetTag( 9, 0)))[1] = 2.0; DVar* y = new DVar( "Y", yAxis); yIx = sysVarList.size(); sysVarList.push_back(y); DStructGDL* zAxis = new DStructGDL( "!AXIS"); (*static_cast<DDoubleGDL*>( zAxis->GetTag( 8, 0)))[1] = 1.0; DVar* z = new DVar( "Z", zAxis); zIx = sysVarList.size(); sysVarList.push_back(z); // !VERSION DStructGDL* ver = new DStructGDL( "!VERSION"); #ifdef _WIN32 #ifdef __MINGW32__ typedef void (WINAPI *GetNativeSystemInfoFunc)(LPSYSTEM_INFO); HMODULE hModule = LoadLibraryW(L"kernel32.dll"); GetNativeSystemInfoFunc GetNativeSystemInfo =(GetNativeSystemInfoFunc) GetProcAddress(hModule, "GetNativeSystemInfo"); #endif const char* SysName = "Windows"; SYSTEM_INFO stInfo; GetNativeSystemInfo( &stInfo ); DStringGDL *arch; switch(stInfo.wProcessorArchitecture) { case PROCESSOR_ARCHITECTURE_AMD64: arch = new DStringGDL("x64"); break; case PROCESSOR_ARCHITECTURE_INTEL: arch = new DStringGDL("x86"); break; case PROCESSOR_ARCHITECTURE_ARM: arch = new DStringGDL("ARM"); break; default: arch = new DStringGDL("unknown"); } ver->NewTag("ARCH", arch); ver->NewTag("OS", new DStringGDL(SysName)); ver->NewTag("OS_FAMILY", new DStringGDL(SysName)); ver->NewTag("OS_NAME", new DStringGDL(SysName)); #else struct utsname uts; uname(&uts); ver->NewTag("ARCH", new DStringGDL( uts.machine)); const char *SysName=uts.sysname; if (strcmp(SysName,"Linux") ==0) SysName="linux"; if (strcmp(SysName,"Darwin") ==0) SysName="darwin"; ver->NewTag("OS", new DStringGDL(SysName)); //correct IDL order ver->NewTag("OS_FAMILY", new DStringGDL( "unix")); // AC 2018-sep-07 if (strcmp(SysName,"darwin") ==0) SysName="Mac OS X"; ver->NewTag("OS_NAME", new DStringGDL(SysName)); #endif ver->NewTag("RELEASE", new DStringGDL( "8.2")); //we are at least 6.4 ver->NewTag("BUILD_DATE", new DStringGDL(BUILD_DATE)); ver->NewTag("MEMORY_BITS", new DIntGDL( sizeof(BaseGDL*)*8)); ver->NewTag("FILE_OFFSET_BITS", new DIntGDL( sizeof(SizeT)*8)); DVar *v = new DVar( "VERSION", ver); vIx = sysVarList.size(); sysVarList.push_back(v); sysVarRdOnlyList.push_back(v); // !Mouse DStructGDL* MouseData = new DStructGDL( "!MOUSE"); MouseData->NewTag("X", new DLongGDL( 0)); MouseData->NewTag("Y", new DLongGDL( 0)); MouseData->NewTag("BUTTON", new DLongGDL( 0)); MouseData->NewTag("TIME", new DLongGDL( 0)); DVar *Mouse = new DVar( "MOUSE", MouseData); MouseIx = sysVarList.size(); sysVarList.push_back(Mouse); // !Make_dll DStructGDL* MakeDllData = new DStructGDL( "!MAKE_DLL"); MakeDllData->NewTag("COMPILE_DIRECTORY", new DStringGDL("/tmp/")); MakeDllData->NewTag("COMPILER_NAME", new DStringGDL("GCC")); MakeDllData->NewTag("CC", new DStringGDL("gcc %X -fPIC -I%Z -c -D_REENTRANT %C -o %O")); MakeDllData->NewTag("LD", new DStringGDL("ld -shared -o %L %O %X")); DVar *MakeDll = new DVar( "MAKE_DLL", MakeDllData); MakeDllIx = sysVarList.size(); sysVarList.push_back(MakeDll); // !ERROR_STATE DStructGDL* eStateData = new DStructGDL( "!ERROR_STATE"); eStateData->NewTag("NAME", new DStringGDL( "IDL_M_SUCCESS")); eStateData->NewTag("BLOCK", new DStringGDL( "IDL_MBLK_CORE")); eStateData->NewTag("CODE", new DLongGDL( 0)); eStateData->NewTag("SYS_CODE", new DLongGDL( dimension( &dim2,one))); //idl 8 eStateData->NewTag("SYS_CODE_TYPE", new DStringGDL( "")); eStateData->NewTag("MSG", new DStringGDL( "")); eStateData->NewTag("SYS_MSG", new DStringGDL( "")); eStateData->NewTag("MSG_PREFIX", new DStringGDL( "% ")); DVar *eState = new DVar( "ERROR_STATE", eStateData); errorStateIx = sysVarList.size(); sysVarList.push_back(eState); // sysVarRdOnlyList.push_back(eState); // !ERROR DLongGDL *errorData = new DLongGDL( 0 ); DVar *errorVar = new DVar( "ERROR", errorData ); errorIx = sysVarList.size(); sysVarList.push_back( errorVar); //sysVarRdOnlyList.push_back( errorVar); !error is (no more?) a readonly variable. // !ERR DLongGDL *errData = new DLongGDL( 0 ); DVar *errVar = new DVar( "ERR", errData ); errIx = sysVarList.size(); sysVarList.push_back( errVar ); // sysVarRdOnlyList.push_back( errVar); // !ERR_STRING DStringGDL *err_stringData = new DStringGDL( ""); DVar *err_stringVar = new DVar( "ERR_STRING", err_stringData ); err_stringIx = sysVarList.size(); sysVarList.push_back( err_stringVar ); sysVarRdOnlyList.push_back( err_stringVar); //!err_string IS a readonly variable! // !VALUES DStructGDL* valuesData = new DStructGDL( "!VALUES"); if( std::numeric_limits< DFloat>::has_infinity) { valuesData->NewTag("F_INFINITY", new DFloatGDL( std::numeric_limits< DFloat>::infinity())); } else { #ifndef _MSC_VER // Can be ignored, because the windows version of limit has infinity() valuesData->NewTag("F_INFINITY", new DFloatGDL((float)1.0/0.0)); #endif } #ifdef NAN valuesData->NewTag("F_NAN", new DFloatGDL(NAN)); #else valuesData->NewTag("F_NAN", new DFloatGDL(sqrt((float) -1.0))); //sign depends on the architecture, dangerous way to define a +Nan! #endif if( std::numeric_limits< DDouble>::has_infinity) { valuesData->NewTag("D_INFINITY", new DDoubleGDL( std::numeric_limits< DDouble>::infinity())); } else { #ifndef _MSC_VER // Can be ignored, because the windows version of limit has infinity() valuesData->NewTag("D_INFINITY", new DDoubleGDL( (double)1.0/0.0)); #endif } #ifdef NAN valuesData->NewTag("D_NAN", new DDoubleGDL(NAN)); #else valuesData->NewTag("D_NAN", new DDoubleGDL(-sqrt((double) -1.0))); //sign depends on the architecture, dangerous way to define a +Nan! #endif DVar *values = new DVar( "VALUES", valuesData); valuesIx = sysVarList.size(); sysVarList.push_back(values); sysVarRdOnlyList.push_back( values); // !JOURNAL hold journal file lun DLongGDL *journalData = new DLongGDL( 0); DVar *journal = new DVar( "JOURNAL", journalData); journalIx = sysVarList.size(); sysVarList.push_back( journal); sysVarRdOnlyList.push_back( journal); // !EXCEPT DIntGDL *exceptData = new DIntGDL( 1); DVar *except = new DVar( "EXCEPT", exceptData); exceptIx=sysVarList.size(); sysVarList.push_back( except); // !MAP DStructGDL* mapData = new DStructGDL( "!MAP"); mapData->NewTag("PROJECTION", new DLongGDL( 0)); mapData->NewTag("SIMPLE", new DLongGDL( 0)); mapData->NewTag("FILL_METHOD", new DLongGDL( 0)); mapData->NewTag("UP_FLAGS", new DLongGDL( 0)); mapData->NewTag("UP_NAME", new DStringGDL( "")); mapData->NewTag("P0LON", new DDoubleGDL( 0.0)); mapData->NewTag("P0LAT", new DDoubleGDL( 0.0)); mapData->NewTag("U0", new DDoubleGDL( 0.0)); mapData->NewTag("V0", new DDoubleGDL( 0.0)); mapData->NewTag("SINO", new DDoubleGDL( 0.0)); mapData->NewTag("COSO", new DDoubleGDL( 0.0)); mapData->NewTag("ROTATION", new DDoubleGDL( 0.0)); mapData->NewTag("SINR", new DDoubleGDL( 0.0)); mapData->NewTag("COSR", new DDoubleGDL( 0.0)); mapData->NewTag("A", new DDoubleGDL( 0.0)); mapData->NewTag("E2", new DDoubleGDL( 0.0)); mapData->NewTag("UV", new DDoubleGDL( dimension( 2))); mapData->NewTag("POLE", new DDoubleGDL( dimension( 7))); mapData->NewTag("UV_BOX", new DDoubleGDL( dimension( 4))); mapData->NewTag("LL_BOX", new DDoubleGDL( dimension( 4))); mapData->NewTag("SEGMENT_LENGTH", new DDoubleGDL( 0.0)); mapData->NewTag("P", new DDoubleGDL( dimension( 16))); mapData->NewTag("PIPELINE", new DDoubleGDL( dimension( 8, 12))); DVar *map=new DVar( "MAP", mapData); mapIx=sysVarList.size(); sysVarList.push_back( map); // !CPU // init independent of OpenMP usage #ifdef _OPENMP CpuTPOOL_NTHREADS = omp_get_num_procs(); omp_set_num_threads(CpuTPOOL_NTHREADS); #else CpuTPOOL_NTHREADS = 1; #endif CpuTPOOL_MIN_ELTS = DefaultTPOOL_MIN_ELTS; CpuTPOOL_MAX_ELTS = DefaultTPOOL_MAX_ELTS; DStructGDL* cpuData = new DStructGDL( "!CPU"); cpuData->NewTag("HW_VECTOR", new DLongGDL( 0)); cpuData->NewTag("VECTOR_ENABLE", new DLongGDL( 0)); #ifdef _OPENMP cpuData->NewTag("HW_NCPU", new DLongGDL( omp_get_num_procs())); #else cpuData->NewTag("HW_NCPU", new DLongGDL( 1)); #endif cpuData->NewTag("TPOOL_NTHREADS", new DLongGDL( CpuTPOOL_NTHREADS)); //if use DLong64 below, please update basic_pro.cpp (function cpu()) and //add an 'assureLong64Kw()' function in envt.cpp. Otherwise the program will //crash in cpu(). (should have been done on 2014 March 18 by AC (tested).) cpuData->NewTag("TPOOL_MIN_ELTS", new DLong64GDL( CpuTPOOL_MIN_ELTS)); cpuData->NewTag("TPOOL_MAX_ELTS", new DLong64GDL( CpuTPOOL_MAX_ELTS)); DVar *cpu=new DVar( "CPU", cpuData); cpuIx=sysVarList.size(); sysVarList.push_back( cpu); sysVarRdOnlyList.push_back( cpu); #ifdef _OPENMP if( omp_get_dynamic()) omp_set_dynamic( 1); #endif #if defined (_WIN32) #define realpath(N,R) _fullpath((R),(N),_MAX_PATH) // ref:http://sourceforge.net/p/mingw/patches/256/ Keith Marshall 2005-12-02 #endif // !DIR #ifndef EXEC_PREFIX #define EXEC_PREFIX "" #endif DStringGDL *dirData = new DStringGDL( EXEC_PREFIX); string gdlDir=GetEnvString("GDL_DIR"); if( gdlDir == "") gdlDir=GetEnvString("IDL_DIR"); if( gdlDir != "") { delete dirData; dirData = new DStringGDL( gdlDir); } DVar *dir = new DVar( "DIR", dirData); dirIx=sysVarList.size(); sysVarList.push_back( dir); // !GDL_MAPS_DIR string tmpDir=GetEnvString("GDL_MAPS_DIR"); if( tmpDir == "") tmpDir = string(GDLDATADIR) + "/resource/maps/"; char *symlinkpath =const_cast<char*> (tmpDir.c_str());// is the path a true path ? #ifdef _MSC_VER #define PATH_MAX MAX_PATH #endif //patch #90 #ifndef PATH_MAX #define PATH_MAX 4096 #endif char actualpath [PATH_MAX+1]; char *ptr; ptr = realpath(symlinkpath, actualpath); if( ptr != NULL ) tmpDir=string(ptr)+lib::PathSeparator(); else tmpDir=""; DStringGDL *GdlMapsDataDir = new DStringGDL( tmpDir); DVar *GdlMapsDir = new DVar("GDL_MAPS_DIR", GdlMapsDataDir); sysVarList.push_back(GdlMapsDir); // !STIME DStringGDL *stimeData = new DStringGDL( ""); DVar *stime = new DVar( "STIME", stimeData); stimeIx=sysVarList.size(); sysVarList.push_back( stime); sysVarRdOnlyList.push_back( stime); // make it read only // !WARN DStructGDL* warnData = new DStructGDL( "!WARN"); warnData->NewTag("OBS_ROUTINES", new DByteGDL( 0)); warnData->NewTag("OBS_SYSVARS", new DByteGDL( 0)); warnData->NewTag("PARENS", new DByteGDL( 0)); DVar *warn = new DVar( "WARN", warnData); warnIx = sysVarList.size(); sysVarList.push_back(warn); //!COLOR static const int col[]={240,248,255,250,235,215,0,255,255,127,255,212,240,255,255,245,245,220,255,228,196,0,0,0,255,235,205,0,0,255,138,43, 226,165,42,42,222,184,135,95,158,160,127,255,0,210,105,30,255,127,80,100,149,237,255,248,220,220,20,60,0,255,255,0, 0,139,0,139,139,184,134,11,169,169,169,0,100,0,169,169,169,189,183,107,139,0,139,85,107,47,255,140,0,153,50,204, 139,0,0,233,150,122,143,188,143,72,61,139,47,79,79,47,79,79,0,206,209,148,0,211,255,20,147,0,191,255,105,105, 105,105,105,105,30,144,255,178,34,34,255,250,240,34,139,34,255,0,255,220,220,220,248,248,255,255,215,0,218,165,32,127, 127,127,0,127,0,173,255,47,127,127,127,240,255,240,255,105,180,205,92,92,75,0,130,255,255,240,240,230,140,230,230,250, 255,240,245,124,252,0,255,250,205,173,216,230,240,128,128,224,255,255,250,250,210,144,238,144,211,211,211,211,211,211,255,182, 193,255,160,122,32,178,170,135,206,250,119,136,153,119,136,153,176,196,222,255,255,224,0,255,0,50,205,50,250,240,230,255, 0,255,127,0,0,102,205,170,0,0,205,186,85,211,147,112,219,60,179,113,123,104,238,0,250,154,72,209,204,199,21,133, 025,25,112,245,255,250,255,228,225,255,228,181,255,222,173,0,0,128,253,245,230,128,128,0,107,142,35,255,165,0,255,69, 0,218,112,214,238,232,170,152,251,152,175,238,238,219,112,147,255,239,213,255,218,185,205,133,63,255,192,203,221,160,221,176, 224,230,127,0,127,255,0,0,188,143,143,65,105,225,139,69,19,250,128,114,244,164,96,46,139,87,255,245,238,160,82,45, 192,192,192,135,206,235,106,90,205,112,128,144,112,128,144,255,250,250,0,255,127,70,130,180,210,180,140,0,128,128,216,191, 216,255,99,71,64,224,208,238,130,238,245,222,179,255,255,255,245,245,245,255,255,0,154,205,50}; static const string coln[]={"ALICE_BLUE","ANTIQUE_WHITE","AQUA","AQUAMARINE","AZURE","BEIGE","BISQUE","BLACK","BLANCHED_ALMOND", "BLUE","BLUE_VIOLET","BROWN","BURLYWOOD","CADET_BLUE","CHARTREUSE","CHOCOLATE","CORAL","CORNFLOWER","CORNSILK", "CRIMSON","CYAN","DARK_BLUE","DARK_CYAN","DARK_GOLDENROD","DARK_GRAY","DARK_GREEN","DARK_GREY","DARK_KHAKI", "DARK_MAGENTA","DARK_OLIVE_GREEN","DARK_ORANGE","DARK_ORCHID","DARK_RED","DARK_SALMON","DARK_SEA_GREEN", "DARK_SLATE_BLUE","DARK_SLATE_GRAY","DARK_SLATE_GREY","DARK_TURQUOISE","DARK_VIOLET","DEEP_PINK","DEEP_SKY_BLUE", "DIM_GRAY","DIM_GREY","DODGER_BLUE","FIREBRICK","FLORAL_WHITE","FOREST_GREEN","FUCHSIA","GAINSBORO","GHOST_WHITE", "GOLD","GOLDENROD","GRAY","GREEN","GREEN_YELLOW","GREY","HONEYDEW","HOT_PINK","INDIAN_RED","INDIGO","IVORY","KHAKI", "LAVENDER","LAVENDER_BLUSH","LAWN_GREEN","LEMON_CHIFFON","LIGHT_BLUE","LIGHT_CORAL","LIGHT_CYAN","LIGHT_GOLDENROD", "LIGHT_GREEN","LIGHT_GRAY","LIGHT_GREY","LIGHT_PINK","LIGHT_SALMON","LIGHT_SEA_GREEN","LIGHT_SKY_BLUE","LIGHT_SLATE_GRAY", "LIGHT_SLATE_GREY","LIGHT_STEEL_BLUE","LIGHT_YELLOW","LIME","LIME_GREEN","LINEN","MAGENTA","MAROON","MEDIUM_AQUAMARINE", "MEDIUM_BLUE","MEDIUM_ORCHID","MEDIUM_PURPLE","MEDIUM_SEA_GREEN","MEDIUM_SLATE_BLUE","MEDIUM_SPRING_GREEN","MEDIUM_TURQUOISE", "MEDIUM_VIOLET_RED","MIDNIGHT_BLUE","MINT_CREAM","MISTY_ROSE","MOCCASIN","NAVAJO_WHITE","NAVY","OLD_LACE","OLIVE", "OLIVE_DRAB","ORANGE","ORANGE_RED","ORCHID","PALE_GOLDENROD","PALE_GREEN","PALE_TURQUOISE","PALE_VIOLET_RED","PAPAYA_WHIP", "PEACH_PUFF","PERU","PINK","PLUM","POWDER_BLUE","PURPLE","RED","ROSY_BROWN","ROYAL_BLUE","SADDLE_BROWN","SALMON", "SANDY_BROWN","SEA_GREEN","SEASHELL","SIENNA","SILVER","SKY_BLUE","SLATE_BLUE","SLATE_GRAY","SLATE_GREY","SNOW","SPRING_GREEN", "STEEL_BLUE","TAN","TEAL","THISTLE","TOMATO","TURQUOISE","VIOLET","WHEAT","WHITE","WHITE_SMOKE","YELLOW","YELLOW_GREEN"}; int ncol=147; int i,k; DStructGDL* colorData = new DStructGDL( "!COLOR"); for (i=0, k=0; i<ncol; ++i){ colorData->NewTag(coln[i], new DByteGDL( dimension(3))); for (int j=0; j<3; ++j) (*static_cast<DByteGDL*>( colorData->GetTag( i, 0)))[j] = col[k++]; } DVar *color = new DVar( "COLOR", colorData); colorIx = sysVarList.size(); sysVarList.push_back(color); sysVarRdOnlyList.push_back( color); //Is Readonly. }
int /* O [nbr] Thread number */ nco_openmp_ini /* [fnc] Initialize OpenMP threading environment */ (const int thr_nbr) /* I [nbr] User-requested thread number */ { /* Purpose: Initialize OpenMP multi-threading environment Honor user-requested thread number, balance against known code efficiency, print diagnostics Returns thr_nbr=1 in three situations: 1. UP codes (not threaded) 2. SMP codes compiled with compilers which lack OpenMP support 3. SMP codes where single thread requested/advised Otherwise returns system-dependent thr_nbr */ /* Using naked stdin/stdout/stderr in parallel region generates warning Copy appropriate filehandle to variable scoped shared in parallel clause */ char *nvr_OMP_NUM_THREADS; /* [sng] Environment variable OMP_NUM_THREADS */ char *sng_cnv_rcd=NULL_CEWI; /* [sng] strtol()/strtoul() return code */ FILE * const fp_stderr=stderr; /* [fl] stderr filehandle CEWI */ nco_bool USR_SPC_THR_RQS=False; int dyn_thr=1; /* [flg] Allow system to dynamically set number of threads */ int ntg_OMP_NUM_THREADS=int_CEWI; // [nbr] OMP_NUM_THREADS environment variable int prc_nbr_max; /* [nbr] Maximum number of processors available */ int thr_nbr_act; /* O [nbr] Number of threads NCO uses */ int thr_nbr_max_fsh=4; /* [nbr] Maximum number of threads program can use efficiently */ int thr_nbr_max=int_CEWI; /* [nbr] Maximum number of threads system allows */ int thr_nbr_rqs=int_CEWI; /* [nbr] Number of threads to request */ #ifndef _OPENMP if(nco_dbg_lvl_get() >= nco_dbg_std) (void)fprintf(fp_stderr,"%s: INFO Build compiler lacked (or user turned off) OpenMP support. Code will execute with single thread in Uni-Processor (UP) mode.\n",nco_prg_nm_get()); return (int)1; #endif /* !_OPENMP */ /* Strategy: 0. Determine maximum number of threads system will allocate (thr_nbr_max) 1. Command-line thread request, if any, overrides automatic algorithm 2. If no command-line request then system allocates OMP_NUM_THREADS if possible 3. Reduce maximum number of threads available to system to thr_nbr_max_fsh Many operators cannot use more than thr_nbr_max_fsh ~ 2--4 threads efficiently Play nice: Set dynamic threading so that system can make efficiency decisions When dynamic threads are set, system never allocates more than thr_nbr_max_fsh */ if(thr_nbr < 0){ (void)fprintf(fp_stderr,"%s: ERROR User-requested thread number = %d is less than zero\n",nco_prg_nm_get(),thr_nbr); nco_exit(EXIT_FAILURE); } /* endif err */ if(thr_nbr == 0) if(nco_dbg_lvl_get() >= nco_dbg_scl && nco_dbg_lvl_get() != nco_dbg_dev ) (void)fprintf(fp_stderr,"%s: INFO User did not specify thread request > 0 on command line. NCO will automatically assign threads based on OMP_NUM_THREADS environment and machine capabilities.\nHINT: Not specifiying any --thr_nbr (or specifying --thr_nbr=0) causes NCO to try to pick the optimal thread number. Specifying --thr_nbr=1 tells NCO to execute in Uni-Processor (UP) (i.e., single-threaded) mode.\n",nco_prg_nm_get()); if(thr_nbr > 0) USR_SPC_THR_RQS=True; prc_nbr_max=omp_get_num_procs(); /* [nbr] Maximum number of processors available */ if(omp_in_parallel()){ (void)fprintf(fp_stderr,"%s: ERROR Attempted to get maximum thread number from within parallel region\n",nco_prg_nm_get()); nco_exit(EXIT_FAILURE); }else{ thr_nbr_max=omp_get_max_threads(); /* [nbr] Maximum number of threads system allows */ } /* end error */ if(nco_dbg_lvl_get() >= nco_dbg_scl && nco_dbg_lvl_get() != nco_dbg_dev){ if((nvr_OMP_NUM_THREADS=getenv("OMP_NUM_THREADS"))) ntg_OMP_NUM_THREADS=(int)strtol(nvr_OMP_NUM_THREADS,&sng_cnv_rcd,NCO_SNG_CNV_BASE10); /* [sng] Environment variable OMP_NUM_THREADS */ if(nvr_OMP_NUM_THREADS && *sng_cnv_rcd) nco_sng_cnv_err(nvr_OMP_NUM_THREADS,"strtol",sng_cnv_rcd); (void)fprintf(fp_stderr,"%s: INFO Environment variable OMP_NUM_THREADS ",nco_prg_nm_get()); if(ntg_OMP_NUM_THREADS > 0) (void)fprintf(fp_stderr,"= %d\n",ntg_OMP_NUM_THREADS); else (void)fprintf(fp_stderr,"does not exist\n"); (void)fprintf(fp_stderr,"%s: INFO omp_get_num_procs() reports number of processors available is %d\n",nco_prg_nm_get(),prc_nbr_max); (void)fprintf(fp_stderr,"%s: INFO omp_get_max_threads() reports maximum number of threads system allows is %d\n",nco_prg_nm_get(),thr_nbr_max); } /* endif dbg */ if(USR_SPC_THR_RQS){ /* Try to honor user-specified thread request... */ thr_nbr_rqs=thr_nbr; /* [nbr] Number of threads to request */ /* ...if possible... */ if(nco_dbg_lvl_get() >= nco_dbg_scl) (void)fprintf(fp_stderr,"%s: INFO Command-line requests %d thread%s\n",nco_prg_nm_get(),thr_nbr,(thr_nbr > 1) ? "s" : ""); if(thr_nbr > thr_nbr_max){ (void)fprintf(fp_stderr,"%s: WARNING Reducing user-requested thread number = %d to maximum thread number allowed = %d\n",nco_prg_nm_get(),thr_nbr,thr_nbr_max); thr_nbr_rqs=thr_nbr_max; /* [nbr] Number of threads to request */ } /* endif */ }else{ /* !USR_SPC_THR_RQS */ /* Otherwise use automatic thread allocation algorithm */ /* Request maximum number of threads permitted */ thr_nbr_rqs=thr_nbr_max; /* [nbr] Number of threads to request */ /* Restrict threading on per-program basis to play nicely with others */ switch(nco_prg_id_get()){ /* Operators with pre-set thread limit NB: All operators currently have default restrictions 2007: Only ncwa and ncap2 have a chance to scale on non-parallel filesystems ncap2 may, one day, see a big performance boost from threading However, as of 20090327, ncap2 threading may be buggy due to ANTLR Moreover, we want to prevent hogging processes on 32-way nodes until/unless clear benefits of threading are demonstrated. 2015: Threads improve ncks regridding performance by 2-3x on ACME ~1-20 GB netCDF3 files */ case ncap: /* 20090327: Restrict ncap2 to one thread until ANTLR threading resolved */ thr_nbr_max_fsh=1; break; case ncecat: case ncrcat: /* ncecat and ncrcat are extremely I/O intensive Maximum efficiency when one thread reads from input file while other writes to output file */ // 20140219: Turn-off OpenMP until thoroughly tested // thr_nbr_max_fsh=2; thr_nbr_max_fsh=1; break; case ncks: // 20150529: Turn-on OpenMP for regridder thr_nbr_max_fsh=16; break; case ncwa: // 20150530: Turn-on OpenMP for debugging // 20150610: Eight threads with ncwa seemed to work for a little while, then it got flaky. Turned-off for 4.5.0 release // 20150622: Allowing eight threads again for debugging with -D 3 // 20150701: Firmly established that netCDF4 involvement hoses threading because HDF5 is not threadsafe by default // 20150710: Turned-off for 4.5.1 release // Symptoms of bugs, if any, show up with // cd ~/nco/bm;nco_bm.pl --regress ncwa;cd - thr_nbr_max_fsh=1; if(nco_dbg_lvl_get() >= nco_dbg_scl) thr_nbr_max_fsh=1; break; /* Operators with higher maximum pre-set thread limit (NB: not all of these are threaded!) */ case ncra: thr_nbr_max_fsh=1; if(nco_dbg_lvl_get() >= nco_dbg_scl) thr_nbr_max_fsh=1; break; case ncbo: case ncatted: case ncfe: case ncflint: case ncpdq: case ncrename: case ncge: // 20140219: Turn-off OpenMP until thoroughly tested // thr_nbr_max_fsh=4; thr_nbr_max_fsh=1; break; default: nco_dfl_case_prg_id_err(); break; } /* end case */ /* Automatic algorithm tries to play nice with others */ (void)omp_set_dynamic(dyn_thr); /* [flg] Allow system to dynamically set number of threads */ if(nco_dbg_lvl_get() >= nco_dbg_std) (void)fprintf(fp_stderr,"%s: INFO omp_set_dynamic() used to %s OS to dynamically set threads\n",nco_prg_nm_get(),(dyn_thr ? "ALLOW" : "DISALLOW")); dyn_thr=omp_get_dynamic(); /* [flg] Allow system to dynamically set number of threads */ if(nco_dbg_lvl_get() >= nco_dbg_std) (void)fprintf(fp_stderr,"%s: INFO omp_get_dynamic() reports system will%s utilize dynamic threading\n",nco_prg_nm_get(),(dyn_thr ? "" : " NOT")); /* Apply program/system limitations */ if(thr_nbr_max > thr_nbr_max_fsh){ if(nco_dbg_lvl_get() >= nco_dbg_std) (void)fprintf(fp_stderr,"%s: INFO Reducing default thread number from %d to %d, an operator-dependent \"play-nice\" number set in nco_openmp_ini()\n",nco_prg_nm_get(),thr_nbr_max,thr_nbr_max_fsh); thr_nbr_rqs=thr_nbr_max_fsh; /* [nbr] Number of threads to request */ } /* endif */ } /* !USR_SPC_THR_RQS */ #ifdef ENABLE_NETCDF4 if(nco_prg_id_get() != ncks && nco_prg_id_get() != ncwa && nco_prg_id_get() != ncra && thr_nbr_rqs > 1){ if(USR_SPC_THR_RQS && nco_dbg_lvl_get() >= nco_dbg_fl) (void)fprintf(stdout,"%s: WARNING This is TODO nco939. Requested threading with netCDF4 (HDF5) support. The NCO thread request algorithm considers user-input, environment variables, and software and hardware limitations in determining the number of threads to request, thr_nbr_rqs. At this point NCO would request result %d threads from a netCDF3-based library. However, this NCO was built with netCDF4, which relies on HDF5. netCDF4 is not thread-safe unless HDF5 is configured with the (non-default) --enable-threadsafe option. NCO currently has no way to know whether HDF5 was built thread-safe. Hence, all netCDF4-based operators are currently restricted to a single thread. The program will now automatically set thr_nbr_rqs = 1.\nThis unfortunate limitation is necessary to keep the NCO developers sane. If you want/need threading in netCDF4-based NCO, please politely yet firmly request of the Unidata netCDF developers that better thread support be built into netCDF4, and request of the HDF5 developers that they make the --enable-threadsafe option compatible with all HDF5 libraries and APIs, including Fortran (which, as of HDF5 1.8.0 in 2008, is incompatible with --enable-threadsafe).\n",nco_prg_nm_get(),thr_nbr_rqs); thr_nbr_rqs=1; } /* endif */ #endif /* !ENABLE_NETCDF4 */ /* Set thread number */ if(omp_in_parallel()){ (void)fprintf(fp_stderr,"%s: ERROR Attempted to set thread number from within parallel region\n",nco_prg_nm_get()); nco_exit(EXIT_FAILURE); }else{ (void)omp_set_num_threads(thr_nbr_rqs); if(nco_dbg_lvl_get() >= nco_dbg_std) (void)fprintf(fp_stderr,"%s: INFO omp_set_num_threads() used to set execution environment to spawn teams of %d thread(s)\n",nco_prg_nm_get(),thr_nbr_rqs); } /* end error */ thr_nbr_act=omp_get_max_threads(); if(nco_dbg_lvl_get() >= nco_dbg_scl) (void)fprintf(fp_stderr,"%s: INFO After using omp_set_num_threads() to adjust for any user requests/NCO optimizations, omp_get_max_threads() reports that a parallel construct here/now would spawn %d thread(s)\n",nco_prg_nm_get(),thr_nbr_act); #ifdef _OPENMP if(nco_dbg_lvl_get() >= nco_dbg_scl){ # pragma omp parallel default(none) shared(thr_nbr_act) { /* begin OpenMP parallel */ # pragma omp single nowait { /* begin OpenMP single */ thr_nbr_act=omp_get_num_threads(); /* [nbr] Number of threads NCO uses */ if(nco_dbg_lvl_get() >= nco_dbg_std) (void)fprintf(fp_stderr,"%s: INFO Small parallel test region spawned team of %d thread(s)\n",nco_prg_nm_get(),thr_nbr_act); } /* end OpenMP single */ } /* end OpenMP parallel */ } /* end dbg */ #endif /* !_OPENMP */ /* Issue any warnings about OpenMP credibility during debugging phase */ if(True) if((nco_prg_id_get() == ncwa || nco_prg_id_get() == ncra) && thr_nbr_act > 1) if(nco_dbg_lvl_get() >= nco_dbg_std) (void)fprintf(fp_stderr,"%s: WARNING OpenMP threading active with %d threads but not guaranteed to work on this operator. If strange behavior (e.g., NaN results) ensues, manually turn-off multi-threading by specifying \"-t 1\" option.\n",nco_prg_nm_get(),thr_nbr_act); return thr_nbr_act; /* O [nbr] Number of threads NCO uses */ } /* end nco_openmp_ini() */
const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); LIBXSTREAM_TARGET(mic) void process(LIBXSTREAM_INVAL(size_t) size, LIBXSTREAM_INVAL(size_t) nn, const size_t* idata, const double* adata, const double* bdata, double* cdata) { if (0 < LIBXSTREAM_GETVAL(size)) { static const double alpha = 1, beta = 1; static const char trans = 'N'; const int isize = static_cast<int>(size); const size_t base = idata[0]; #if defined(_OPENMP) && defined(MULTI_DGEMM_USE_NESTED) const int nthreads = omp_get_max_threads() / LIBXSTREAM_GETVAL(size); const int dynamic = omp_get_dynamic(), nested = omp_get_nested(); omp_set_dynamic(0); omp_set_nested(1); # pragma omp parallel for schedule(dynamic,1) num_threads(LIBXSTREAM_GETVAL(size)) #endif for (int i = 0; i < isize; ++i) { #if defined(_OPENMP) && defined(MULTI_DGEMM_USE_NESTED) omp_set_num_threads(nthreads); #endif LIBXSTREAM_ASSERT(base <= idata[i]); const size_t i0 = idata[i], i1 = (i + 1) < isize ? idata[i+1] : (i0 + LIBXSTREAM_GETVAL(nn)), n2 = i1 - i0, offset = i0 - base; const int n = static_cast<int>(std::sqrt(static_cast<double>(n2)) + 0.5); DGEMM(&trans, &trans, &n, &n, &n, &alpha, adata + offset, &n, bdata + offset, &n, &beta, cdata + offset, &n); } #if defined(_OPENMP) && defined(MULTI_DGEMM_USE_NESTED)
int main(int argc, char *argv[]) { char Class; logical verified; double mflops; double t, tmax, trecs[t_last+1]; int i; char *t_names[t_last+1]; int num_threads; if(argc == 2) { num_threads = atoi(argv[1]); printf("Number of threads received are - %d", num_threads); } else { num_threads = -1; } if(num_threads != -1) { omp_set_dynamic(num_threads); //Nitin Chugh printf(" Dynamic environment state - %d\n", omp_get_dynamic()); omp_set_num_threads(num_threads); } printf(" Number of processors - %d\n", omp_get_num_procs()); //--------------------------------------------------------------------- // Setup info for timers //--------------------------------------------------------------------- FILE *fp; if ((fp = fopen("timer.flag", "r")) != NULL) { timeron = true; t_names[t_total] = "total"; t_names[t_rhsx] = "rhsx"; t_names[t_rhsy] = "rhsy"; t_names[t_rhsz] = "rhsz"; t_names[t_rhs] = "rhs"; t_names[t_jacld] = "jacld"; t_names[t_blts] = "blts"; t_names[t_jacu] = "jacu"; t_names[t_buts] = "buts"; t_names[t_add] = "add"; t_names[t_l2norm] = "l2norm"; fclose(fp); } else { timeron = false; } //--------------------------------------------------------------------- // read input data //--------------------------------------------------------------------- read_input(); //--------------------------------------------------------------------- // set up domain sizes //--------------------------------------------------------------------- domain(); //--------------------------------------------------------------------- // set up coefficients //--------------------------------------------------------------------- setcoeff(); //--------------------------------------------------------------------- // set the boundary values for dependent variables //--------------------------------------------------------------------- setbv(); //--------------------------------------------------------------------- // set the initial values for dependent variables //--------------------------------------------------------------------- setiv(); //--------------------------------------------------------------------- // compute the forcing term based on prescribed exact solution //--------------------------------------------------------------------- erhs(); //--------------------------------------------------------------------- // perform one SSOR iteration to touch all data pages //--------------------------------------------------------------------- ssor(1); //--------------------------------------------------------------------- // reset the boundary and initial values //--------------------------------------------------------------------- setbv(); setiv(); //--------------------------------------------------------------------- // perform the SSOR iterations //--------------------------------------------------------------------- ssor(itmax); //--------------------------------------------------------------------- // compute the solution error //--------------------------------------------------------------------- error(); //--------------------------------------------------------------------- // compute the surface integral //--------------------------------------------------------------------- pintgr(); //--------------------------------------------------------------------- // verification test //--------------------------------------------------------------------- verify ( rsdnm, errnm, frc, &Class, &verified ); mflops = (double)itmax * (1984.77 * (double)nx0 * (double)ny0 * (double)nz0 - 10923.3 * pow(((double)(nx0+ny0+nz0)/3.0), 2.0) + 27770.9 * (double)(nx0+ny0+nz0)/3.0 - 144010.0) / (maxtime*1000000.0); print_results("LU", Class, nx0, ny0, nz0, itmax, maxtime, mflops, " floating point", verified, NPBVERSION, COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6, "(none)"); //--------------------------------------------------------------------- // More timers //--------------------------------------------------------------------- if (timeron) { for (i = 1; i <= t_last; i++) { trecs[i] = timer_read(i); } tmax = maxtime; if (tmax == 0.0) tmax = 1.0; printf(" SECTION Time (secs)\n"); for (i = 1; i <= t_last; i++) { printf(" %-8s:%9.3f (%6.2f%%)\n", t_names[i], trecs[i], trecs[i]*100./tmax); if (i == t_rhs) { t = trecs[t_rhsx] + trecs[t_rhsy] + trecs[t_rhsz]; printf(" --> %8s:%9.3f (%6.2f%%)\n", "sub-rhs", t, t*100./tmax); t = trecs[i] - t; printf(" --> %8s:%9.3f (%6.2f%%)\n", "rest-rhs", t, t*100./tmax); } } } return 0; }
int main () { int d_o = omp_get_dynamic (); int n_o = omp_get_nested (); omp_sched_t s_o; int c_o; omp_get_schedule (&s_o, &c_o); int m_o = omp_get_max_threads (); omp_set_dynamic (1); omp_set_nested (1); omp_set_schedule (omp_sched_static, 2); omp_set_num_threads (4); int d = omp_get_dynamic (); int n = omp_get_nested (); omp_sched_t s; int c; omp_get_schedule (&s, &c); int m = omp_get_max_threads (); if (!omp_is_initial_device ()) abort (); #pragma omp target if (0) { omp_sched_t s_c; int c_c; omp_get_schedule (&s_c, &c_c); if (d_o != omp_get_dynamic () || n_o != omp_get_nested () || s_o != s_c || c_o != c_c || m_o != omp_get_max_threads ()) abort (); omp_set_dynamic (0); omp_set_nested (0); omp_set_schedule (omp_sched_dynamic, 4); omp_set_num_threads (2); if (!omp_is_initial_device ()) abort (); } if (!omp_is_initial_device ()) abort (); omp_sched_t s_c; int c_c; omp_get_schedule (&s_c, &c_c); if (d != omp_get_dynamic () || n != omp_get_nested () || s != s_c || c != c_c || m != omp_get_max_threads ()) abort (); #pragma omp target if (0) #pragma omp teams { omp_sched_t s_c; int c_c; omp_get_schedule (&s_c, &c_c); if (d_o != omp_get_dynamic () || n_o != omp_get_nested () || s_o != s_c || c_o != c_c || m_o != omp_get_max_threads ()) abort (); omp_set_dynamic (0); omp_set_nested (0); omp_set_schedule (omp_sched_dynamic, 4); omp_set_num_threads (2); if (!omp_is_initial_device ()) abort (); } if (!omp_is_initial_device ()) abort (); omp_get_schedule (&s_c, &c_c); if (d != omp_get_dynamic () || n != omp_get_nested () || s != s_c || c != c_c || m != omp_get_max_threads ()) abort (); return 0; }