int main () { test1 (); test2 (); test3 (); omp_set_schedule (omp_sched_static, 0); test4 (); omp_set_schedule (omp_sched_static, 3); test4 (); omp_set_schedule (omp_sched_dynamic, 5); test4 (); omp_set_schedule (omp_sched_guided, 2); test4 (); return 0; }
int main (void) { if (2 * sizeof (int) != sizeof (long long)) return 0; test1 (); test2 (); test3 (); test4 (); omp_set_schedule (omp_sched_static, 0); test5 (); omp_set_schedule (omp_sched_static, 3); test5 (); omp_set_schedule (omp_sched_dynamic, 5); test5 (); omp_set_schedule (omp_sched_guided, 2); test5 (); return 0; }
int main(int argc, char*argv[]) { int max = MAX; int min = MIN; int size = (max-min)/2; int * array = (int*) malloc(sizeof(int)*size); int i; struct timespec start, end,elapsed; int c = 0; int num_threads = 1; int modifier = 0; omp_sched_t schedule = 1; while((c = getopt(argc,argv,"n:s:m:h")) != -1) { switch(c) { case 'n': num_threads = atoi(optarg); break; case 's': schedule = atoi(optarg); break; case 'm': modifier = atoi(optarg); break; case 'h': usage(); exit(0); default: break; } } omp_set_num_threads(num_threads); omp_set_schedule(schedule,modifier); clock_gettime(CLOCK_REALTIME, &start); for(i = 1; i < size; i++) array[i] = 0; #pragma omp parallel for for(i = 1; i < size; i++) array[i] = gold(2*i); #pragma omp parallel for for(i = 1; i < size; i++) if(array[i] == 0) fprintf(stdout,"Violated at %d\n",2*i); clock_gettime(CLOCK_REALTIME, &end); elapsed.tv_sec = end.tv_sec - start.tv_sec; elapsed.tv_nsec = end.tv_nsec - start.tv_nsec; if(elapsed.tv_nsec < 0) { elapsed.tv_sec -= 1; elapsed.tv_nsec += 1000000000; } printf("%ld.%ld\n",elapsed.tv_sec,elapsed.tv_nsec); return 0; }
void testScheduler(int nThreads, graph* G, char debug) { double runtime; //Set max nThreads omp_set_num_threads(nThreads); if(mpi_id == 0) printf("Scheduler (Static, %d)", G->N/100 ); resetGraph(G); omp_set_schedule(omp_sched_static, G->N/100); if(mpi_id == 0) tick(); dijkstra(G, 0, debug); if(mpi_id == 0){ runtime = tack(); printf("working for [%f] sec.\n",runtime); } if(mpi_id == 0) printf("Scheduler (dynamic, %d)", G->N/100 ); resetGraph(G); omp_set_schedule(omp_sched_dynamic, G->N/100); if(mpi_id == 0) tick(); dijkstra(G, 0, debug); if(mpi_id == 0){ runtime = tack(); printf("working for [%f] sec.\n",runtime); } if(mpi_id == 0) printf("Scheduler (guided, %d)", G->N/100 ); resetGraph(G); omp_set_schedule(omp_sched_guided, G->N/100); if(mpi_id == 0) tick(); dijkstra(G, 0, debug); if(mpi_id == 0){ runtime = tack(); printf("working for [%f] sec.\n",runtime); } }
int main(int argc, char *argv[]) { int chunk = 0; // static (no chunk) omp_set_schedule(omp_sched_static,0); #pragma omp parallel// num_threads(num_th) run_loop(0, 26, 1, chunk); // auto (chunk should be ignorted) omp_set_schedule(omp_sched_auto,0); #pragma omp parallel// num_threads(num_th) run_loop(0, 26, 1, chunk); // static,1 chunk = 1; omp_set_schedule(omp_sched_static,1); #pragma omp parallel// num_threads(num_th) run_loop(0, 26, 1, chunk); // dynamic,1 omp_set_schedule(omp_sched_dynamic,1); #pragma omp parallel// num_threads(num_th) run_loop(0, 26, 1, chunk); // guided,1 omp_set_schedule(omp_sched_guided,1); #pragma omp parallel// num_threads(num_th) run_loop(0, 26, 1, chunk); // dynamic,0 - use default chunk size 1 omp_set_schedule(omp_sched_dynamic,0); #pragma omp parallel// num_threads(num_th) run_loop(0, 26, 1, chunk); // guided,0 - use default chunk size 1 omp_set_schedule(omp_sched_guided,0); #pragma omp parallel// num_threads(num_th) run_loop(0, 26, 1, chunk); if (err) { printf("failed, err = %d\n", err); return 1; } else { printf("passed\n"); return 0; } }
main(int argc, char **argv) { int i, n=20,a[n],suma=0; if(argc < 2) { fprintf(stderr,"\nFalta iteraciones \n"); exit(-1); } n = atoi(argv[1]); if (n>20) n=20; for (i=0; i<n; i++) a[i] = i; omp_set_schedule(2,2); #pragma omp parallel for firstprivate(suma) lastprivate(suma) schedule(runtime) for (i=0; i<n; i++) { suma = suma + a[i]; printf(" thread %d suma a[%d]=%d suma=%d \n", omp_get_thread_num(),i,a[i],suma); } printf("Fuera de 'parallel for' suma=%d\n",suma); }
void omp_set_schedule_8_ (const int32_t *kind, const int64_t *chunk_size) { omp_set_schedule (*kind, TO_INT (*chunk_size)); }
void omp_set_schedule_ (const int32_t *kind, const int32_t *chunk_size) { omp_set_schedule (*kind, *chunk_size); }
int main () { int a[N], aa[N]; int b[N], bb[N]; int c[N], cc[N]; int d[N], dd[N]; int e[N], ee[N]; int i, errors; int cond = 0; check_offloading(); // Test: task within target // init for(i=0; i<N; i++) { a[i] = aa[i] = i+1; b[i] = bb[i] = 2*i +1; } // target starts 1 team and many threads in it #pragma omp target map(tofrom: a, b) { int id = omp_get_thread_num(); a[id]++; #if TRY_TASK #pragma omp task firstprivate(id) shared(b) default(none) { #if TASK_COMPUTE PRINT("hi alex from %d\n", id); b[id]++; #endif } #pragma omp taskwait #endif } // reproduce aa[0]++; #if TRY_TASK && TASK_COMPUTE bb[0]++; #endif // verify errors = 0; for(i=0; i<N; i++) { if (a[i] != aa[i]) printf("%4i: got a %d, expected %d, error %d\n", i, a[i], aa[i], ++errors); if (b[i] != bb[i]) printf("%4i: got b %d, expected %d, error %d\n", i, b[i], bb[i], ++errors); } printf("got %d errors\n", errors); // Test: task within parallel // init for(i=0; i<N; i++) { a[i] = aa[i] = i+1; b[i] = bb[i] = 2*i +1; } // target starts 1 team and many threads in it #pragma omp target map(tofrom: a, b) { #pragma omp parallel num_threads(64) { int id = omp_get_thread_num(); a[id]++; #if TRY_TASK #pragma omp task firstprivate(id) shared(b) { #if TASK_COMPUTE PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; #endif } #endif } } // reproduce for(i=0; i<N; i++) { aa[i]++; #if TRY_TASK && TASK_COMPUTE bb[i]++; #endif } // verify errors = 0; for(i=0; i<N; i++) { if (a[i] != aa[i]) printf("%4i: got a %d, expected %d, error %d\n", i, a[i], aa[i], ++errors); if (b[i] != bb[i]) printf("%4i: got b %d, expected %d, error %d\n", i, b[i], bb[i], ++errors); } printf("got %d errors\n", errors); // Test: multiple nested tasks in parallel region // init for(i=0; i<N; i++) { a[i] = aa[i] = i+1; b[i] = bb[i] = 2*i +1; } // target starts 1 team and many threads in it #pragma omp target map(tofrom: a, b) { #pragma omp parallel num_threads(64) { int id = omp_get_thread_num(); a[id]++; #pragma omp task firstprivate(id) shared(b) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; #pragma omp task firstprivate(id) shared(b) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; #pragma omp task firstprivate(id) shared(b) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; } } } } } // reproduce for(i=0; i<N; i++) { aa[i]++; bb[i]+=3; } // verify errors = 0; for(i=0; i<N; i++) { if (a[i] != aa[i]) printf("%4i: got a %d, expected %d, error %d\n", i, a[i], aa[i], ++errors); if (b[i] != bb[i]) printf("%4i: got b %d, expected %d, error %d\n", i, b[i], bb[i], ++errors); } printf("got %d errors\n", errors); // Test: three successive tasks in a parallel region // init for(i=0; i<N; i++) { a[i] = aa[i] = i+1; b[i] = bb[i] = 2*i +1; } // target starts 1 team and many threads in it #pragma omp target map(tofrom: a, b) { #pragma omp parallel num_threads(64) { int id = omp_get_thread_num(); a[id]++; #pragma omp task firstprivate(id) shared(b) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; } #pragma omp task firstprivate(id) shared(b) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; } #pragma omp task firstprivate(id) shared(b) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; } } } // reproduce for(i=0; i<N; i++) { aa[i]++; bb[i]+=3; } // verify errors = 0; for(i=0; i<N; i++) { if (a[i] != aa[i]) printf("%4i: got a %d, expected %d, error %d\n", i, a[i], aa[i], ++errors); if (b[i] != bb[i]) printf("%4i: got b %d, expected %d, error %d\n", i, b[i], bb[i], ++errors); } printf("got %d errors\n", errors); // Test: change of context when entering/exiting tasks // init for(i=0; i<N; i++) { a[i] = aa[i] = i+1; b[i] = bb[i] = 2*i+1; c[i] = cc[i] = 3*i+1; d[i] = dd[i] = 4*i+1; e[i] = ee[i] = 5*i+1; } // target starts 1 team and many threads in it #pragma omp target map(tofrom: a, b, c, d, e) { omp_set_schedule(omp_sched_static, 1); #pragma omp parallel num_threads(64) { omp_set_schedule(omp_sched_static, 2); int id = omp_get_thread_num(); // task 1 #pragma omp task firstprivate(id) shared(b, c, d, e) { omp_set_schedule(omp_sched_static, 3); PRINT("hi alex from %d\n", id); // task 2 #pragma omp task firstprivate(id) shared(b, c, d, e) { omp_set_schedule(omp_sched_static, 4); PRINT("hi alex from %d\n", id); // task 3 #pragma omp task firstprivate(id) shared(b, c, d, e) { omp_set_schedule(omp_sched_static, 5); PRINT("hi alex from %d\n", id); // task 3 omp_sched_t s; int chunk; omp_get_schedule(&s, &chunk); if (s == omp_sched_static && chunk == 5) e[id]++; } // task 2 omp_sched_t s; int chunk; omp_get_schedule(&s, &chunk); if (s == omp_sched_static && chunk == 4) d[id]++; } // task 1 omp_sched_t s; int chunk; omp_get_schedule(&s, &chunk); if (s == omp_sched_static && chunk == 3) c[id]++; } // par omp_sched_t s; int chunk; omp_get_schedule(&s, &chunk); if (s == omp_sched_static && chunk == 2) b[id]++; } // team omp_sched_t s; int chunk; omp_get_schedule(&s, &chunk); if (s == omp_sched_static && chunk == 1) a[0]++; } // reproduce aa[0]++; for(i=0; i<N; i++) { bb[i]++; cc[i]++; dd[i]++; ee[i]++; } // verify errors = 0; for(i=0; i<N; i++) { if (a[i] != aa[i]) printf("%4i: got a %d, expected %d, error %d\n", i, a[i], aa[i], ++errors); if (b[i] != bb[i]) printf("%4i: got b %d, expected %d, error %d\n", i, b[i], bb[i], ++errors); if (c[i] != cc[i]) printf("%4i: got c %d, expected %d, error %d\n", i, c[i], cc[i], ++errors); if (d[i] != dd[i]) printf("%4i: got d %d, expected %d, error %d\n", i, d[i], dd[i], ++errors); if (e[i] != ee[i]) printf("%4i: got e %d, expected %d, error %d\n", i, e[i], ee[i], ++errors); } printf("got %d errors\n", errors); // Test: change of context when using if clause // init for(i=0; i<N; i++) { a[i] = aa[i] = i+1; b[i] = bb[i] = 2*i+1; c[i] = cc[i] = 3*i+1; d[i] = dd[i] = 4*i+1; e[i] = ee[i] = 5*i+1; } // target starts 1 team and many threads in it #pragma omp target map(tofrom: a, b, c, d, e, cond) { omp_set_schedule(omp_sched_static, 1); #pragma omp parallel num_threads(64) { omp_set_schedule(omp_sched_static, 2); int id = omp_get_thread_num(); // task 1 #pragma omp task firstprivate(id) shared(b, c, d, e) if(cond) { omp_set_schedule(omp_sched_static, 3); PRINT("hi alex from %d\n", id); // task 2 #pragma omp task firstprivate(id) shared(b, c, d, e) if(cond) { omp_set_schedule(omp_sched_static, 4); PRINT("hi alex from %d\n", id); // task 3 #pragma omp task firstprivate(id) shared(b, c, d, e) if(cond) { omp_set_schedule(omp_sched_static, 5); PRINT("hi alex from %d\n", id); // task 3 omp_sched_t s; int chunk; omp_get_schedule(&s, &chunk); if (s == omp_sched_static && chunk == 5) e[id]++; } // task 2 omp_sched_t s; int chunk; omp_get_schedule(&s, &chunk); if (s == omp_sched_static && chunk == 4) d[id]++; } // task 1 omp_sched_t s; int chunk; omp_get_schedule(&s, &chunk); if (s == omp_sched_static && chunk == 3) c[id]++; } // par omp_sched_t s; int chunk; omp_get_schedule(&s, &chunk); if (s == omp_sched_static && chunk == 2) b[id]++; } // team omp_sched_t s; int chunk; omp_get_schedule(&s, &chunk); if (s == omp_sched_static && chunk == 1) a[0]++; } // reproduce aa[0]++; for(i=0; i<N; i++) { bb[i]++; cc[i]++; dd[i]++; ee[i]++; } // verify errors = 0; for(i=0; i<N; i++) { if (a[i] != aa[i]) printf("%4i: got a %d, expected %d, error %d\n", i, a[i], aa[i], ++errors); if (b[i] != bb[i]) printf("%4i: got b %d, expected %d, error %d\n", i, b[i], bb[i], ++errors); if (c[i] != cc[i]) printf("%4i: got c %d, expected %d, error %d\n", i, c[i], cc[i], ++errors); if (d[i] != dd[i]) printf("%4i: got d %d, expected %d, error %d\n", i, d[i], dd[i], ++errors); if (e[i] != ee[i]) printf("%4i: got e %d, expected %d, error %d\n", i, e[i], ee[i], ++errors); } printf("got %d errors\n", errors); // Test: final // init for(i=0; i<N; i++) { a[i] = aa[i] = i+1; b[i] = bb[i] = 2*i +1; } // target starts 1 team and many threads in it #pragma omp target map(tofrom: a, b) { #pragma omp parallel num_threads(64) { int id = omp_get_thread_num(); a[id]++; #pragma omp task firstprivate(id) shared(b) final(1) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; #pragma omp task firstprivate(id) shared(b) final(1) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; #pragma omp task firstprivate(id) shared(b) final(1) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; } } } } } // reproduce for(i=0; i<N; i++) { aa[i]++; bb[i]+=3; } // verify errors = 0; for(i=0; i<N; i++) { if (a[i] != aa[i]) printf("%4i: got a %d, expected %d, error %d\n", i, a[i], aa[i], ++errors); if (b[i] != bb[i]) printf("%4i: got b %d, expected %d, error %d\n", i, b[i], bb[i], ++errors); } printf("got %d errors\n", errors); #if 0 // Test: untied // init for(i=0; i<N; i++) { a[i] = aa[i] = i+1; b[i] = bb[i] = 2*i +1; } // target starts 1 team and many threads in it #pragma omp target map(tofrom: a, b) { #pragma omp parallel num_threads(64) { int id = omp_get_thread_num(); a[id]++; #pragma omp task firstprivate(id) shared(b) untied { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; #pragma omp task firstprivate(id) shared(b) untied { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; #pragma omp task firstprivate(id) shared(b) untied { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; } } } } } // reproduce for(i=0; i<N; i++) { aa[i]++; bb[i]+=3; } // verify errors = 0; for(i=0; i<N; i++) { if (a[i] != aa[i]) printf("%4i: got a %d, expected %d, error %d\n", i, a[i], aa[i], ++errors); if (b[i] != bb[i]) printf("%4i: got b %d, expected %d, error %d\n", i, b[i], bb[i], ++errors); } printf("got %d errors\n", errors); #endif // Test: mergeaeble // init for(i=0; i<N; i++) { a[i] = aa[i] = i+1; b[i] = bb[i] = 2*i +1; } // target starts 1 team and many threads in it #pragma omp target map(tofrom: a, b) { #pragma omp parallel num_threads(64) { int id = omp_get_thread_num(); a[id]++; #pragma omp task firstprivate(id) shared(b) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; #pragma omp task firstprivate(id) shared(b) mergeable { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; #pragma omp task firstprivate(id) shared(b) mergeable { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; } } } } } // reproduce for(i=0; i<N; i++) { aa[i]++; bb[i]+=3; } // verify errors = 0; for(i=0; i<N; i++) { if (a[i] != aa[i]) printf("%4i: got a %d, expected %d, error %d\n", i, a[i], aa[i], ++errors); if (b[i] != bb[i]) printf("%4i: got b %d, expected %d, error %d\n", i, b[i], bb[i], ++errors); } printf("got %d errors\n", errors); // Test: private // init for(i=0; i<N; i++) { a[i] = aa[i] = i+1; b[i] = bb[i] = 2*i +1; } // target starts 1 team and many threads in it #pragma omp target map(tofrom: a, b) { #pragma omp parallel num_threads(64) { int id = omp_get_thread_num(); a[id]++; #if TRY_TASK #pragma omp task private(id) shared(b) { #if TASK_COMPUTE PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; #endif } #endif } } // reproduce for(i=0; i<N; i++) { aa[i]++; #if TRY_TASK && TASK_COMPUTE bb[i]++; #endif } // verify errors = 0; for(i=0; i<N; i++) { if (a[i] != aa[i]) printf("%4i: got a %d, expected %d, error %d\n", i, a[i], aa[i], ++errors); if (b[i] != bb[i]) printf("%4i: got b %d, expected %d, error %d\n", i, b[i], bb[i], ++errors); } printf("got %d errors\n", errors); // Test: depend // init int x; for(i=0; i<N; i++) { a[i] = aa[i] = i+1; b[i] = bb[i] = 2*i +1; } // target starts 1 team and many threads in it #pragma omp target map(tofrom: a, b) { #pragma omp parallel num_threads(64) { int id = omp_get_thread_num(); a[id]++; #pragma omp task firstprivate(id) shared(b) depend(out:x) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; } #pragma omp task firstprivate(id) shared(b) depend(inout:x) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; } #pragma omp task firstprivate(id) shared(b) depend(in:x) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; } } } // reproduce for(i=0; i<N; i++) { aa[i]++; bb[i]+=3; } // verify errors = 0; for(i=0; i<N; i++) { if (a[i] != aa[i]) printf("%4i: got a %d, expected %d, error %d\n", i, a[i], aa[i], ++errors); if (b[i] != bb[i]) printf("%4i: got b %d, expected %d, error %d\n", i, b[i], bb[i], ++errors); } printf("got %d errors\n", errors); // Test: inverted priority // init for(i=0; i<N; i++) { a[i] = aa[i] = i+1; b[i] = bb[i] = 2*i +1; } // target starts 1 team and many threads in it #pragma omp target map(tofrom: a, b) { #pragma omp parallel num_threads(64) { int id = omp_get_thread_num(); a[id]++; #pragma omp task firstprivate(id) shared(b) priority(0) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; } #pragma omp task firstprivate(id) shared(b) priority(10) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; } #pragma omp task firstprivate(id) shared(b) priority(20) { PRINT("hi alex from %d\n", id); int id = omp_get_thread_num(); b[id]++; } } } // reproduce for(i=0; i<N; i++) { aa[i]++; bb[i]+=3; } // verify errors = 0; for(i=0; i<N; i++) { if (a[i] != aa[i]) printf("%4i: got a %d, expected %d, error %d\n", i, a[i], aa[i], ++errors); if (b[i] != bb[i]) printf("%4i: got b %d, expected %d, error %d\n", i, b[i], bb[i], ++errors); } printf("got %d errors\n", errors); return 0; }
int main(int argc, char** argv) { /* omp.h: typedef enum omp_sched_t { omp_sched_static = 1, omp_sched_dynamic = 2, omp_sched_guided = 3, omp_sched_auto = 4 } omp_sched_t; */ omp_sched_t omp_sched_kind = omp_sched_static; int omp_sched_modifier = 0; // value <= 0 sets to default long array_size = 1000; int opt; while ((opt = getopt(argc, argv, "k:m:s:")) != -1) { switch (opt) { case 'k': omp_sched_kind = strtol(optarg, NULL, 10); break; case 'm': omp_sched_modifier = strtol(optarg, NULL, 10); break; case 's': array_size = strtol(optarg, NULL, 10); break; default: printf("usage: %s -k <omp_sched_kind> -m <omp_sched_modifier> -s <array_size>\n", argv[0]); } } int* array = make_array(array_size); if (array == NULL) { exit(EXIT_FAILURE); } omp_sched_print(omp_sched_kind, omp_sched_modifier); omp_set_schedule(omp_sched_kind, omp_sched_modifier); // reset to test if omp_get_schedule() works omp_sched_modifier = -1; omp_sched_kind = -1; double t = omp_get_wtime(); #pragma omp parallel { #pragma omp single { omp_get_schedule(&omp_sched_kind, &omp_sched_modifier); omp_sched_print(omp_sched_kind, omp_sched_modifier); } #pragma omp for schedule(runtime) // "runtime" means set by omp_set_schedule() for (int i = 0; i < array_size; i++) { process(array[i]); } } printf("%d %d %lf\n", omp_sched_kind, omp_sched_modifier, omp_get_wtime() - t); free(array); exit(EXIT_SUCCESS); }
int main(int argc, char **argv) { int N; int nThreads; int nColumns; int i,j,k; double *A,*Bi,*C,*Ci; int BiRows, BiColumns; CompressedMatrix *cBi; CompressedMatrix *cCi; double elapsed; char printDebug; //************ Check Input **************/ if(argc < 3){ printf("Usage: %s MaxtrixSize NumberOfThreads\n" , argv[0] ); exit(EXIT_FAILURE); } N = atoi(argv[1]); if( N <= 1){ printf("MatrixSize must be bigger than 1!"); exit(EXIT_FAILURE); } nThreads = atoi(argv[2]); if( nThreads <= 1){ printf("NumberOfThreads must be bigger than 1!"); exit(EXIT_FAILURE); } omp_set_num_threads(nThreads); omp_set_schedule(omp_sched_dynamic, N/10); MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_id); MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); nColumns = N / mpi_size; //For the moment depend on N being a multiple the number of MPI nodes //************ Prepare Matrix **************/ A = (double *) malloc( N*N * sizeof(double) ); if((A == NULL) ){ printf("Running out of memory!\n"); exit(EXIT_FAILURE); } // if(mpi_id != 0){ // MPI_Finalize(); // exit(0); // } if(mpi_id == 0) { printDebug = 0; if(printDebug) printf("[%d] Generating A ...",mpi_id); //Fill matrixes. Generate Identity like matrix for A and B , So C should result in an matrix with a single major diagonal for(i=0; i < N; i++ ){ for(j=0; j < N; j++){ A[i+N*j] = (i==j)?i:0.0; // //Sparse Matrix with 10% population // A[i+N*j] = rand()%10; // if(A[i+N*j] == 0) // A[i+N*j] = rand()%10; // else // A[i+N*j] = 0; } } // printMatrix(A, N, nColumns); // cA = compressMatrix(A, N, nColumns); // printCompressedMatrix(cA); // uncompressMatrix(cA, &Bi, &i, &j); // printMatrix(Bi, i, j); // // MPI_Finalize(); // exit(0); tick(); if(printDebug) printf("[%d] Broadcasting A ...",mpi_id); MPI_Bcast( A, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD); if(printDebug) printf("[%d] Generating B ...",mpi_id); double* B; CompressedMatrix* cB; B = (double *) malloc( N*N * sizeof(double) ); for(i=0; i < N; i++ ){ for(j=0; j < N; j++){ B[j+N*i] = (i==j)?1.0:0.0; } } if(printDebug) printf("[%d] Compressing and distributing Bi ...",mpi_id); cB = compressMatrix(B, N, N); for(i=1; i < mpi_size; i++){ mpiSendCompressedMatrix(cB, i*nColumns, (i+1)*nColumns, i); } //Fake shorten cB free(B); cB->columns = nColumns; uncompressMatrix(cB, &Bi, &BiRows, &BiColumns); Ci = MatrixMultiply(A, N, N, Bi, nColumns); if(printDebug) printf("[%d] Ci = A x Bi ...", mpi_id); if(printDebug) printMatrix(Ci, N, nColumns); cCi = compressMatrix(Ci, N, nColumns); if(printDebug) printf("cCi ...\n"); if(printDebug) printCompressedMatrix(cCi); MPI_Barrier(MPI_COMM_WORLD); if(printDebug) printf("[%d] Receiving Ci fragments ...\n", mpi_id); CompressedMatrix** Cii; Cii = (CompressedMatrix**) malloc(sizeof(CompressedMatrix*) * mpi_size); if(Cii == NULL){ perror("malloc"); exit(EXIT_FAILURE); } Cii[0] = cCi; for(i=1; i < mpi_size; i++){ Cii[i] = mpiRecvCompressedMatrix(N,nColumns, i); } if(printDebug) printf("[%d] Joining Cii ...\n", mpi_id); CompressedMatrix *cC; cC = joinCompressedMatrices(Cii, mpi_size); if(printDebug) printCompressedMatrix(cC); elapsed = tack(); printf("[%d] C ...\n", mpi_id); uncompressMatrix(cC, &C, &i,&j); if(i <= 20){ printMatrix(C, i,j); } else { if(i < 1000){ printf("C is too big, only printing first diagonal %d.\n[",j); for(k=0; (k < i) && (k < j); k++){ printf("%3.2f ",C[k + k*j]); } printf("]\n"); } else { printf("C is just too big!"); } } printf("Took [%f] seconds!\n",elapsed); } else { printDebug = 0; if(printDebug) printf("[%d] Waiting for A ...",mpi_id); MPI_Bcast( A, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD); if(printDebug) printf("[%d] Received A ...\n", mpi_id); if(printDebug) printMatrix(A, N, N); if(printDebug) printf("[%d] Waiting for Bi ...",mpi_id); cBi = mpiRecvCompressedMatrix(N, nColumns, 0); uncompressMatrix(cBi, &Bi, &BiRows, &BiColumns); if(printDebug) printf("[%d] Received Bi ...",mpi_id); if(printDebug) printMatrix(Bi,BiRows, BiColumns); assert( (BiRows == N) && "Number or Rows in Bi is not right!"); assert( (BiColumns == nColumns) && "Number or Columns in Bi is not right!"); Ci = MatrixMultiply(A, N, N, Bi, BiColumns); if(printDebug) printf("[%d] Ci = A x Bi ...", mpi_id); if(printDebug) printMatrix(Ci, N, nColumns); cCi = compressMatrix(Ci, N, nColumns); if(printDebug) printCompressedMatrix(cCi); MPI_Barrier(MPI_COMM_WORLD); if(printDebug) printf("[%d] Returning Ci ...\n", mpi_id); mpiSendCompressedMatrix(cCi, 0, nColumns, 0); } MPI_Finalize(); // NxM = NxN * NxM exit(EXIT_SUCCESS); }
void omp_set_schedule_8_ (const int32_t *kind, const int64_t *modifier) { omp_set_schedule (*kind, TO_INT (*modifier)); }
void omp_set_schedule_ (const int32_t *kind, const int32_t *modifier) { omp_set_schedule (*kind, *modifier); }
int main(int argc, char ** argv){ int **M; int *v1, *v2; int i, k, a, N; double cgt1, cgt2, ncgt; //para tiempo de ejecución time_t t; // Semilla de rand() srand((unsigned) time(&t)); // Obtenemos el numero de filas x columnas de la matriz cuadrada if(argc < 4){ fprintf(stderr,"Error: %s <N_filas> <Chunk (0...I)> <Sched (static, dynamic, guided)>\n", argv[0]); exit(-1); } N = atoi(argv[1]); // == Directivas de OpenMP // ====================================================> int chunk = 0; omp_sched_t kind; if(strcmp(argv[2], "default") == 0) omp_get_schedule(&kind, &chunk); else chunk = atoi(argv[2]); // Modificar OMP_SCHEDULE if(strcmp(argv[3], "static") == 0) omp_set_schedule(1, chunk); else if(strcmp(argv[3], "dynamic") == 0) omp_set_schedule(2, chunk); else if(strcmp(argv[3], "guided") == 0) omp_set_schedule(3, chunk); else { printf("Error en el metodo de asignacion de trabajo a las hebras (static, dynamic, guided)\n"); exit(-1); } // El numero de hebras que se vayan a usar debe ser el mismo que el numero de procesadores disponibles omp_set_num_threads(omp_get_num_procs()); // == Reserva de Memoria // ====================================================> v1 = (int *) malloc(N*sizeof(int)); v2 = (int *) malloc(N*sizeof(int)); if ( v1 == NULL || v2 == NULL ){ printf("Error en la reserva de espacio para los vectores\n"); exit(-2); } M = (int**) malloc (N*sizeof(int*)); // i como private en un for establece que cada hebra tendra una copia de i, pero en parallel for tendra cada una i como sigue // i = 0, i = 3, i = 6 para un bucle de N = 9 #pragma omp parallel for shared(M,N) private(i) default(none) schedule(runtime) for(i = 0; i<N; i++){ M[i] = (int*) malloc (N*sizeof(int)); if( M[i] == NULL ){ printf("Error en la reserva de espacio para los vectores\n"); exit(-2); } } // == Inicializacion // ====================================================> // M, v1, v2, N, i compartidas // Cada hebra se encargará de una parte del bucle usando i // k es privada // Para que cada hebra que este calculando la parte iesima del bucle y tenga una copia de k = 0 propia, parte k es secuencial #pragma omp parallel for shared(N,M) private(i,k,a) default(none) schedule(runtime) for(i = 0; i<N; i++){ if(i>0){ for(a = 0; a<i; a++) M[i][a] = 0; for(k = a; k<N; k++) M[i][k] = rand() % 8; } else { for(k = 0; k<N; k++){ M[i][k] = rand() % 8; } } } #pragma omp parallel for shared(v1,v2,N) private(i) default(none) schedule(runtime) for(i = 0; i<N; i++){ v1[i] = rand() % 6; v2[i] = 0; } // == Calculo // ====================================================> cgt1 = omp_get_wtime(); #pragma omp parallel for shared(v1,v2,M,N) private(i,k) default(none) schedule(runtime) for(i = 0; i<N; i++){ for(k = i; k<N; k++) v2[i] += M[i][k] * v1[k]; } cgt2 = omp_get_wtime(); ncgt = (double)(cgt2 - cgt1); // == Imprimir Mensajes // ====================================================> printf("Tiempo(seg.):%11.9f\n", ncgt); printf("Tamaño de los vectores: %u\n", N); printf("\tv1 = %uElem -> %lu bytes\n\tv2 = %uElem -> %lu bytes\n", N, N*sizeof(int), N, N*sizeof(int)); printf("Tamaño de la matriz: %ux%u -> %lu bytes\n", N, N, N*N*sizeof(int)); // Imprimir el primer y último componente del resultado evita que las optimizaciones del compilador // eliminen el código de la suma. printf("v2[0] = %u ... v2[N-1] = %u \n", v2[0], v2[N-1]); // Para tamaños pequeños de N < 15 mostrar los valores calculados if(N < 15){ printf("\n----------- Matriz M ----------- \n"); for(i = 0; i<N; i++){ for(k = 0; k<N; k++) printf("%u\t", M[i][k]); printf("\n"); } printf("\n----------- Vector V1 ----------- \n"); for(i = 0; i<N; i++) printf("%u\t", v1[i]); printf("\n"); printf("\n----------- Vector V2----------- \n"); for(i = 0; i<N; i++) printf("%u\t", v2[i]); printf("\n"); } // == Liberar Memoria // ====================================================> free(v1); free(v2); #pragma omp parallel for shared(M,N) private(i) default(none) schedule(runtime) for(i = 0; i<N; i++) free(M[i]); free(M); }
int main () { int d_o = omp_get_dynamic (); int n_o = omp_get_nested (); omp_sched_t s_o; int c_o; omp_get_schedule (&s_o, &c_o); int m_o = omp_get_max_threads (); omp_set_dynamic (1); omp_set_nested (1); omp_set_schedule (omp_sched_static, 2); omp_set_num_threads (4); int d = omp_get_dynamic (); int n = omp_get_nested (); omp_sched_t s; int c; omp_get_schedule (&s, &c); int m = omp_get_max_threads (); if (!omp_is_initial_device ()) abort (); #pragma omp target if (0) { omp_sched_t s_c; int c_c; omp_get_schedule (&s_c, &c_c); if (d_o != omp_get_dynamic () || n_o != omp_get_nested () || s_o != s_c || c_o != c_c || m_o != omp_get_max_threads ()) abort (); omp_set_dynamic (0); omp_set_nested (0); omp_set_schedule (omp_sched_dynamic, 4); omp_set_num_threads (2); if (!omp_is_initial_device ()) abort (); } if (!omp_is_initial_device ()) abort (); omp_sched_t s_c; int c_c; omp_get_schedule (&s_c, &c_c); if (d != omp_get_dynamic () || n != omp_get_nested () || s != s_c || c != c_c || m != omp_get_max_threads ()) abort (); #pragma omp target if (0) #pragma omp teams { omp_sched_t s_c; int c_c; omp_get_schedule (&s_c, &c_c); if (d_o != omp_get_dynamic () || n_o != omp_get_nested () || s_o != s_c || c_o != c_c || m_o != omp_get_max_threads ()) abort (); omp_set_dynamic (0); omp_set_nested (0); omp_set_schedule (omp_sched_dynamic, 4); omp_set_num_threads (2); if (!omp_is_initial_device ()) abort (); } if (!omp_is_initial_device ()) abort (); omp_get_schedule (&s_c, &c_c); if (d != omp_get_dynamic () || n != omp_get_nested () || s != s_c || c != c_c || m != omp_get_max_threads ()) abort (); return 0; }