void do_sort(long n, T data[n], T tmp[n]) { #if _EXTRAE_ Extrae_event(PROGRAM, MULTISORT); #else double sort_time = omp_get_wtime(); #endif #pragma omp parallel #pragma omp single multisort(N, data, tmp); #if _EXTRAE_ Extrae_event(PROGRAM,END); #else sort_time = omp_get_wtime() - sort_time; fprintf(stdout, "%g\n", sort_time); #endif #if _EXTRAE_ Extrae_event(PROGRAM,CHECK); #endif check_sorted (N, data); #if _EXTRAE_ Extrae_event(PROGRAM,END); #endif }
void multisort(long n, T data[n], T tmp[n]) { if (n >= MIN_SORT_SIZE*4L) { // Recursive decomposition #pragma omp task depend(out: data[0:n/4L]) multisort(n/4L, &data[0], &tmp[0]); #pragma omp task depend(out: data[n/4L:n/4L]) multisort(n/4L, &data[n/4L], &tmp[n/4L]); #pragma omp task depend(out: data[n/2L:n/4L]) multisort(n/4L, &data[n/2L], &tmp[n/2L]); #pragma omp task depend(out: data[3L*n/4L:n/4L]) multisort(n/4L, &data[3L*n/4L], &tmp[3L*n/4L]); #pragma omp task depend(in: data[0:n/4L], data[n/4L:n/4L]) depend(out: tmp[0:n/2L]) merge(n/4L, &data[0], &data[n/4L], &tmp[0], 0, n/2L); #pragma omp task depend(in: data[n/2L:n/4L], data[3L*n/4L:n/4L]) depend(out: tmp[n/2L:n/2L]) merge(n/4L, &data[n/2L], &data[3L*n/4L], &tmp[n/2L], 0, n/2L); #pragma omp task depend(in: tmp[0:n/2L], tmp[n/2L:n/2L]) merge(n/2L, &tmp[0], &tmp[n/2L], &data[0], 0, n); #pragma omp taskwait } else { // Base case #if _EXTRAE_ Extrae_event(PROGRAM, SORT); #endif basicsort(n, data); #if _EXTRAE_ Extrae_event(PROGRAM, END); #endif } }
void merge(long n, T left[n], T right[n], T result[n*2], long start, long length) { if (length < MIN_MERGE_SIZE*2L) { // Una tasca de tareador por cada llamada // Base case #if _EXTRAE_ Extrae_event(PROGRAM, MERGE); #endif #if _TAREADOR_ tareador_start_task("basemerge"); #endif basicmerge(n, left, right, result, start, length); #if _TAREADOR_ tareador_end_task(); #endif #if _EXTRAE_ Extrae_event(PROGRAM, END); #endif } else { // Recursive decomposition #if _TAREADOR_ tareador_start_task("mergeinside1"); #endif merge(n, left, right, result, start, length/2); #if _TAREADOR_ tareador_end_task(); tareador_start_task("mergeinside2"); #endif merge(n, left, right, result, start + length/2, length/2); #if _TAREADOR_ tareador_end_task(); #endif } }
JNIEXPORT void JNICALL Java_es_bsc_cepbatools_extrae_Wrapper_Event (JNIEnv *env, jclass jc, jint id, jlong val) { UNREFERENCED(env); UNREFERENCED(jc); Extrae_event ((extrae_type_t)id, (extrae_value_t)val); }
void timestamp_event() { // https://stackoverflow.com/questions/5833094/get-a-timestamp-in-c-in-microseconds // https://stackoverflow.com/questions/12392278/measure-time-in-linux-getrusage-vs-clock-gettime-vs-clock-vs-gettimeofday // https://www.gnu.org/software/libc/manual/html_node/Elapsed-Time.html struct timeval tv; gettimeofday(&tv, NULL); Extrae_event((extrae_type_t) 88883, (extrae_value_t) ((unsigned long long) 1000000ull*tv.tv_sec + tv.tv_usec)); }
int main(int argc, char **argv) { int n = 1000000; double PI25DT = 3.141592653589793238462643; double pi, h, area, x; Extrae_init(); h = 1.0 / (double) n; Extrae_event (1000, 1); area = pi_kernel (n, h); Extrae_event (1000, 0); pi = h * area; printf("pi is approximately %.16f, Error is %.16f\n",pi,fabs(pi - PI25DT)); Extrae_fini(); }
void merge(long n, T left[n], T right[n], T result[n*2], long start, long length) { if (length < MIN_MERGE_SIZE*2L) { // Base case #if _EXTRAE_ Extrae_event(PROGRAM, MERGE); #endif basicmerge(n, left, right, result, start, length); #if _EXTRAE_ Extrae_event(PROGRAM, END); #endif } else { // Recursive decomposition #pragma omp task merge(n, left, right, result, start, length/2); #pragma omp task merge(n, left, right, result, start + length/2, length/2); #pragma omp taskwait } }
void *routine1 (void *parameters) { long th_id = (long) parameters; Extrae_event (1, 1); if (th_id == 0) { printf ("routine1 thread 0 executing a long function\n"); longExecution(th_id); } printf ("routine1 stopped for barrier : (thread=%08lx, param %p)\n", pthread_self(), parameters); // Synchronization point int rc = pthread_barrier_wait(&barrier); if(rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD) { printf("Could not wait on barrier\n"); exit(-1); } printf ("routine1 exiting from barrier : (thread=%08lx, param %p)\n", pthread_self(), parameters); Extrae_event (1, 0); }
void multisort(long n, T data[n], T tmp[n]) { if (n >= MIN_SORT_SIZE*4L) { // Recursive decomposition #pragma omp taskgroup { #pragma omp task multisort(n/4L, &data[0], &tmp[0]); #pragma omp task multisort(n/4L, &data[n/4L], &tmp[n/4L]); #pragma omp task multisort(n/4L, &data[n/2L], &tmp[n/2L]); #pragma omp task multisort(n/4L, &data[3L*n/4L], &tmp[3L*n/4L]); } #pragma omp taskgroup { #pragma omp task merge(n/4L, &data[0], &data[n/4L], &tmp[0], 0, n/2L); #pragma omp task merge(n/4L, &data[n/2L], &data[3L*n/4L], &tmp[n/2L], 0, n/2L); } merge(n/2L, &tmp[0], &tmp[n/2L], &data[0], 0, n); } else { // Base case #if _EXTRAE_ Extrae_event(PROGRAM, SORT); #endif basicsort(n, data); #if _EXTRAE_ Extrae_event(PROGRAM, END); #endif } }
int main(int argc, char **argv) { int n = 1000000; double PI25DT = 3.141592653589793238462643; double pi, h, area, x; extrae_type_t type = 1000; int nvalues = 2; extrae_value_t values[2] = {0, 1}; char * description_values[2] = {"End", "Begin" }; Extrae_init(); Extrae_define_event_type (&type, "Kernel execution", &nvalues, values, description_values); h = 1.0 / (double) n; Extrae_event (1000, 1); area = pi_kernel (n, h); Extrae_event (1000, 0); pi = h * area; printf("pi is approximately %.16f, Error is %.16f\n",pi,fabs(pi - PI25DT)); Extrae_fini(); }
static void* concurrent_jit_run(void *info) { gcc_jit_context *ctx; ctx = gcc_jit_context_acquire (); if (ctx == NULL) { fprintf(stderr, "acquired JIT context is NULL"); return NULL; } gcc_jit_context_set_int_option(ctx, GCC_JIT_INT_OPTION_OPTIMIZATION_LEVEL, 2); #if EXTRAE_SUPPORT Extrae_event(JIT_EVENT_TYPE, JIT_CODE_GENERATION); #endif generate_code_regexp(ctx, regexp); #if EXTRAE_SUPPORT Extrae_event(JIT_EVENT_TYPE, 0); Extrae_event(JIT_EVENT_TYPE, JIT_COMPILATION); #endif gcc_jit_result *result = gcc_jit_context_compile(ctx); #if EXTRAE_SUPPORT Extrae_event(JIT_EVENT_TYPE, 0); #endif if (result == NULL) { fprintf(stderr, "compilation failed"); return NULL; } #if EXTRAE_SUPPORT Extrae_event(JIT_EVENT_TYPE, JIT_GET_CODE); #endif match_fun_t function_addr = (match_fun_t)gcc_jit_result_get_code(result, "match"); #if EXTRAE_SUPPORT Extrae_event(JIT_EVENT_TYPE, 0); #endif #if EXTRAE_SUPPORT if (function_addr == NULL) { fprintf(stderr, "error getting 'match'"); return NULL; } #endif atomic_store(&match_fun, function_addr); return NULL; }
int main(int argc, char **argv) { if (argc != 4) { fprintf(stderr, "Usage: %s <vector size in K> <sort size in K> <merge size in K>\n", argv[0]); return 1; } N = atol(argv[1]) * BLOCK_SIZE; MIN_SORT_SIZE = atol(argv[2]) * BLOCK_SIZE; MIN_MERGE_SIZE = atol(argv[3]) * BLOCK_SIZE; T *data = malloc(N*sizeof(T)); T *tmp = malloc(N*sizeof(T)); #if _EXTRAE_ Extrae_init(); Extrae_event(PROGRAM, INITIALIZE); #else double stamp; START_COUNT_TIME; #endif initialize(N, data); clear(N, tmp); #if _EXTRAE_ Extrae_event(PROGRAM, END); #else STOP_COUNT_TIME("Initialization time in seconds"); #endif #if _EXTRAE_ Extrae_event(PROGRAM, MULTISORT); #else START_COUNT_TIME; #endif #pragma omp parallel #pragma omp single { multisort(N, data, tmp); } #if _EXTRAE_ Extrae_event(PROGRAM,END); #else STOP_COUNT_TIME("Multisort execution time"); #endif #if _EXTRAE_ Extrae_event(PROGRAM,CHECK); #else START_COUNT_TIME; #endif check_sorted (N, data); #if _EXTRAE_ Extrae_event(PROGRAM,END); Extrae_fini(); #else STOP_COUNT_TIME("Check sorted data execution time"); #endif fprintf(stdout, "Multisort program finished\n"); return 0; }
void multisort(long n, T data[n], T tmp[n]) { if (n >= MIN_SORT_SIZE*4L) { // Recursive decomposition // Una tasca de tareador por cada llamada #if _TAREADOR_ tareador_start_task("multisort1"); #endif multisort(n/4L, &data[0], &tmp[0]); #if _TAREADOR_ tareador_end_task(); tareador_start_task("multisort2"); #endif multisort(n/4L, &data[n/4L], &tmp[n/4L]); #if _TAREADOR_ tareador_end_task(); tareador_start_task("multisort3"); #endif multisort(n/4L, &data[n/2L], &tmp[n/2L]); #if _TAREADOR_ tareador_end_task(); tareador_start_task("multisort4"); #endif multisort(n/4L, &data[3L*n/4L], &tmp[3L*n/4L]); #if _TAREADOR_ tareador_end_task(); tareador_start_task("merge1"); #endif merge(n/4L, &data[0], &data[n/4L], &tmp[0], 0, n/2L); #if _TAREADOR_ tareador_end_task(); tareador_start_task("merge2"); #endif merge(n/4L, &data[n/2L], &data[3L*n/4L], &tmp[n/2L], 0, n/2L); #if _TAREADOR_ tareador_end_task(); tareador_start_task("merge3"); #endif merge(n/2L, &tmp[0], &tmp[n/2L], &data[0], 0, n); #if _TAREADOR_ tareador_end_task(); #endif } else { // Base case #if _EXTRAE_ Extrae_event(PROGRAM, SORT); #endif #if _TAREADOR_ tareador_start_task("basesort"); #endif basicsort(n, data); #if _TAREADOR_ tareador_end_task(); #endif #if _EXTRAE_ Extrae_event(PROGRAM, END); #endif } }
JNIEXPORT void JNICALL Java_es_bsc_tools_extrae_Wrapper_Event(JNIEnv *env, jclass jc, jint id, jlong val) { Extrae_event((extrae_type_t) id, (extrae_value_t) val); }
void *routine2 (void *parameters) { Extrae_event (2, 1); printf ("routine 2 : (thread=%08lx, param %p)\n", pthread_self(), parameters); Extrae_event (2, 0); }
int main(int argc, char **argv) { if (argc != 4) { fprintf(stderr, "Usage: %s <vector size in K> <sort size in K> <merge size in K>\n", argv[0]); return 1; } #if _EXTRAE_ Extrae_init(); #endif N = atol(argv[1]) * 1024L; MIN_SORT_SIZE = atol(argv[2]) * 1024L; MIN_MERGE_SIZE = atol(argv[3]) * 1024L; T *data = malloc(N*sizeof(T)); T *tmp = malloc(N*sizeof(T)); #if _TAREADOR_ tareador_ON(); #endif #if _EXTRAE_ Extrae_event(PROGRAM, INITIALIZE); #else double init_time = omp_get_wtime(); #endif initialize(N, data); clear(N, tmp); #if _EXTRAE_ Extrae_event(PROGRAM, END); #else init_time = omp_get_wtime() - init_time; fprintf(stdout, "Initialization time in seconds = %g\n", init_time); #endif fprintf(stdout, "Multisort execution time using randomly generated data = "); do_sort(N, data, tmp); //sort randomly generated data #if _TAREADOR_ tareador_OFF(); #endif #if _EXTRAE_ Extrae_event(PROGRAM, INITIALIZE); Extrae_event(PROGRAM, END); #endif fprintf(stdout, "Multisort execution time using already sorted data = "); do_sort(N, data, tmp); // sort already sorted #if _EXTRAE_ Extrae_event(PROGRAM, INITIALIZE); #endif for (int i=0; i<N/2; i++) { // Reverse order double tmp =data[N-1-i]; data[N-1-i] = data[i]; data[i]=tmp; } #if _EXTRAE_ Extrae_event(PROGRAM, END); #endif fprintf(stdout, "Multisort execution time using reverse order data = "); do_sort(N, data, tmp); //sort data in inverted order #if _EXTRAE_ Extrae_fini(); #endif fprintf(stdout, "Multisort program finished\n"); return 0; }
int Extrae_CMD_Emit (int i, int argc, char *argv[]) { int threadid; int type; long long value; extrae_type_t TYPE = 0; extrae_value_t VALUE = 0; char *endptr; char extrae_append_pid[128]; if (argc-i < 3) { fprintf (stderr, CMD_EMIT" command requires 3 parameters SLOT, TYPE and VALUE\n"); return 0; } Extrae_CMD_Emit_get_info(); threadid = strtol (argv[i], &endptr, 10); if (endptr == &argv[i][strlen(argv[i])]) { if (threadid < 0) { fprintf (stderr, CMD_EMIT" command cannot handle negative SLOT\n"); return 0; } else _THREADID = threadid; } type = strtol (argv[i+1], &endptr, 10); if (endptr == &argv[i+1][strlen(argv[i+1])]) { if (type < 0) { fprintf (stderr, CMD_EMIT" command cannot handle negative TYPE\n"); return 0; } else TYPE = type; } value = strtoll (argv[i+2], &endptr, 10); if (endptr == &argv[i+2][strlen(argv[i+2])]) { if (value < 0) { fprintf (stderr, CMD_EMIT" command cannot handle negative VALUE\n"); return 0; } else VALUE = value; } Extrae_set_taskid_function (CMD_EMIT_TASKID); Extrae_set_numthreads_function (CMD_EMIT_NUMTHREADS); Extrae_set_threadid_function (CMD_EMIT_NUMTHREAD); _NTASKS = _TASKID+1; Extrae_set_numtasks_function (CMD_EMIT_NUMTASKS); putenv ("EXTRAE_ON=1"); sprintf (extrae_append_pid, "EXTRAE_APPEND_PID=%u", pid); putenv (extrae_append_pid); Extrae_init (); Extrae_event (TYPE, VALUE); Extrae_fini (); return 3; }