cudaError_t cudaMemcpy (void * a1, const void * a2, size_t a3, enum cudaMemcpyKind a4) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaMemcpy"; Event.eid = 326; Event.type = CUDA_MEM; Event.memcpy_size = a3; Event.memcpy_type = a4; Event.memcpy_kind = SYNC; cudaMemcpy_real = (cudaError_t (*)(void*, const void*, size_t, enum cudaMemcpyKind)) dlsym(RTLD_NEXT, "cudaMemcpy"); Event.starttime = gettime (); retVal = (*cudaMemcpy_real)( a1, a2, a3, a4); Event.endtime = gettime (); #ifdef DEBUG printf("[DEBUG]:\t%s\tlu\t%lu\n", Event.event_name, Event.starttime, Event.endtime ); #endif Record (&Event, CMPI_TRACE); return retVal; }
/* 指导语句: #pragma omp for 结构功能: for开始函数(无parallel时) 函数功能: 创建一个任务共享结构 */ int GOMP_loop_dynamic_start (long p1, long p2, long p3, long p4, long *p5, long *p6) { int res = 0; Record_Event Event = Event_init (); Event.event_name = "GOMP_loop_dynamic_start"; Event.eid = 220; Event.type = NONE; Event.omp_rank = get_thread_num (); Event.omp_level = get_level (); Event.p_rank = omp_get_ancestor_thread_num (get_level () - 1); GOMP_loop_dynamic_start_real = (int(*)(long,long,long,long,long*,long*)) dlsym (RTLD_NEXT, "GOMP_loop_dynamic_start"); if (GOMP_loop_dynamic_start_real != NULL) { Event.starttime = gettime (); res = GOMP_loop_dynamic_start_real (p1, p2, p3, p4, p5, p6); Event.endtime = gettime (); Record (&Event, OMPI_TRACE); } else { printf_d("GOMP_loop_dynamic_start is not hooked! exiting!!\n"); } return res; }
/* 指导语句: #pragma omp sections 结构功能: section调度函数 函数功能: 当一个线程结束其执行的任务时,调用该函数分配下一个任务 */ unsigned GOMP_sections_next (void) { Record_Event Event = Event_init (); Event.event_name = "GOMP_sections_next"; Event.eid = 224; Event.type = NONE; Event.omp_rank = get_thread_num (); Event.omp_level = get_level (); Event.p_rank = omp_get_ancestor_thread_num (get_level () - 1); unsigned res = 0; GOMP_sections_next_real=(unsigned(*)(void)) dlsym (RTLD_NEXT, "GOMP_sections_next"); if (GOMP_sections_next_real != NULL) { Event.starttime=gettime(); res = GOMP_sections_next_real(); Event.endtime=gettime(); Record(&Event, OMPI_TRACE); } else { printf_d("GOMP_sections_next is not hooked! exiting!!\n"); } return res; }
/* 指导语句: #pragma omp for 结构功能: for预初始化函数(有parallel时) 函数功能: 预初始化一个任务共享结构 */ void GOMP_parallel_loop_guided_start(void *p1, void *p2, unsigned p3, long p4, long p5, long p6, long p7) { TaskInfo old_task; Record_Event Event = Event_init (); Event.event_name = "GOMP_parallel_loop_guided_start"; Event.eid = 212; Event.type = NONE; Event.omp_rank = get_thread_num (); Event.omp_level = get_level (); Event.p_rank = omp_get_ancestor_thread_num (get_level () - 1); old_task = current_task; //If current task is not exist, create a new task if (current_task.flag == 0) { current_task = create_itask (); Event.task_state_start = TASK_CREATE; } else Event.task_state_start = TASK_SUSPEND; create_team (current_task); Event.p_task_id_start = current_task.task_parent_id; Event.task_id_start = current_task.task_id; GOMP_parallel_loop_guided_start_real=(void(*)(void*,void*,unsigned, long, long, long, long)) dlsym (RTLD_NEXT, "GOMP_parallel_loop_guided_start"); if (GOMP_parallel_loop_guided_start_real != NULL) { pardo_uf = (void(*)(void*))p1; pardo_uf_id++; /*if (PAPI == PAPI_ON) retVal = PAPI_thread_init(get_thread_num()); if (retVal != PAPI_OK) ERROR_RETURN(retVal);*/ Event.starttime=gettime(); GOMP_parallel_loop_guided_start_real (callme_pardo, p2, p3, p4, p5, p6, p7); Event.endtime=gettime(); } else { printf_d("GOMP_parallel_loop_guided_start is not hooked! exiting!!\n"); } Event.p_task_id_end = current_task.task_parent_id; Event.task_id_end = current_task.task_id; if (old_task.flag == 0) Event.task_state_end = TASK_START; else Event.task_state_end = TASK_RESUME; Record(&Event, OMPI_TRACE); }
/* 指导语句: #pragma omp parallel 结构功能: parallel用户子函数 函数功能: parallel中调用的用户子函数 */ static void callme_par (void *p1) { TaskInfo old_task; char fun_name[30] = "Parallel_User_fun_"; char id [10]; Record_Event Event = Event_init (); old_task = current_task; current_task = create_itask (); //Is it necessary to add this task to the thread team ? add_itask (current_task); itoa (par_uf_id, id); strcat (fun_name, id); Event.event_name = fun_name; Event.eid = 234; Event.type = NONE; Event.omp_rank = get_thread_num (); Event.omp_level = get_level (); Event.p_rank = omp_get_ancestor_thread_num (get_level () - 1); Event.p_task_id_start = current_task.task_parent_id; Event.task_id_start = current_task.task_id; Event.task_state_start = TASK_CREATE; if (par_uf == NULL) { printf_d("Error! Invalid initialization of 'par_uf'\n"); return ; } if (PAPI == PAPI_ON) { PAPI_get_info (fun_name, 0, PAPI_THREAD); Event.starttime = gettime (); par_uf (p1); Event.endtime = gettime (); PAPI_get_info (fun_name, 1, PAPI_THREAD); } else { Event.starttime = gettime (); par_uf (p1); Event.endtime = gettime (); } Event.p_task_id_end = current_task.task_parent_id; Event.task_id_end = current_task.task_id; Event.task_state_end = TASK_END; remove_itask (current_task); current_task = old_task; Record (&Event, OMPI_TRACE); }
/* 指导语句: #pragma omp task 结构功能: task用户子函数 函数功能: task中调用的用户子函数 */ static void callme_task (void *p1) { TaskInfo old_task; struct eTask *task; char fun_name[30] = "Task_User_do_fun_"; char id [10]; Record_Event Event = Event_init (); old_task = current_task; task = etask_schedule (); current_task = task.task_info; current_task.thread_id = get_thread_id (get_level ()); // current_etask = task; itoa (task_uf_id, id); strcat (fun_name, id); Event.event_name = fun_name; Event.eid = 235; Event.type = NONE; Event.omp_rank = get_thread_num (); Event.omp_level = get_level (); Event.p_rank = omp_get_ancestor_thread_num (get_level () - 1); Event.p_task_id_start = current_task.task_parent_id; Event.task_id_start = current_task.task_id; Event.task_state_start = TASK_START; if (task_uf == NULL) { printf_d("Error! Invalid initialization of 'task_uf'\n"); return ; } if (PAPI == PAPI_ON) { PAPI_get_info (fun_name, 0, PAPI_THREAD); Event.starttime = gettime (); task_uf (p1); Event.endtime = gettime (); PAPI_get_info (fun_name, 1, PAPI_THREAD); } else { Event.starttime = gettime (); task_uf (p1); Event.endtime = gettime (); } Event.p_task_id_end = current_task.task_parent_id; Event.task_id_end = current_task.task_id; Event.task_state_end = TASK_END; remove_etask (task); current_task = old_task; Record (&Event, OMPI_TRACE); }
void GOMP_task (void *p1, void *p2, void *p3,long p4, long p5, _Bool p6, unsigned p7) { struct eTask *task; int old_block = block_statue; Record_Event Event = Event_init (); Event.event_name = "GOMP_task"; Event.eid = 229; Event.type = NONE; Event.omp_rank = get_thread_num (); Event.omp_level = get_level (); Event.p_rank = omp_get_ancestor_thread_num (get_level () - 1); if (current_task.flag == 1) { Event.p_task_id_start = current_task.task_parent_id; Event.task_id_start = current_task.task_id; Event.task_state_start = TASK_CREATE; } if (p6) { /*Create a task that will be executed immediately*/ block_statue = RUN_AT_ONCE_BLOCK; } else { task = create_etask (); add_etask (task); } GOMP_task_real = (void(*)(void *,void *,void *,long,long,_Bool,unsigned))dlsym(RTLD_NEXT,"GOMP_task"); if(GOMP_task_real != NULL) { task_uf = (void(*)(void*))p1; task_uf_id++; Event.starttime = gettime(); GOMP_task_real (callme_task, p2, p3, p4, p5, p6, p7); Event.endtime = gettime(); } block_statue = old_block; if (current_task.flag == 1) { Event.p_task_id_end = current_task.task_parent_id; Event.task_id_end = current_task.task_id; Event.task_state_end = TASK_RESUME; } Record(&Event, OMPI_TRACE); }
/* 指导语句: #pragma omp parallel 结构功能: parallel开始函数 函数功能: 初始化一个parallel并行结构 */ void GOMP_parallel_start (void *p1, void *p2, unsigned p3) { TaskInfo old_task; Record_Event Event = Event_init (); //初始化 Event.event_name = "GOMP_parallel_start"; //获取函数名 Event.eid = 200; Event.type = NONE; Event.omp_rank = get_thread_num (); //获取线程编号 Event.omp_level = get_level (); Event.p_rank = omp_get_ancestor_thread_num (get_level () - 1); old_task = current_task; //If current task is not exist, create a new task if (current_task.flag == 0) { current_task = create_itask (); Event.task_state_start = TASK_CREATE; } else Event.task_state_start = TASK_SUSPEND; create_team (current_task); Event.p_task_id_start = current_task.task_parent_id; Event.task_id_start = current_task.task_id; /*dlsym函数返回 GOMP_parallel_start 在动态链接库中的下一个地址,供调用使用*/ GOMP_parallel_start_real = (void(*)(void*,void*,unsigned))dlsym (RTLD_NEXT, "GOMP_parallel_start"); if (GOMP_parallel_start_real != NULL) { par_uf = (void(*)(void*))p1; //调用子函数的包装函数 par_uf_id++; Event.starttime = gettime(); //获取开始时间 GOMP_parallel_start_real (callme_par, p2, p3); //调用OpenMP库中的GOMP_parallel_start()实现功能 Event.endtime = gettime (); //获取结束时间 } else { printf_d ("GOMP_parallel_start is not hooked! exiting!!\n"); } Event.p_task_id_end = current_task.task_parent_id; Event.task_id_end = current_task.task_id; if (old_task.flag == 0) Event.task_state_end = TASK_START; else Event.task_state_end = TASK_RESUME; Record (&Event, OMPI_TRACE); }
/* 指导语句: #pragma omp parallel for 结构功能: for调度函数 函数功能: 当一个线程完成指定给它的任务时,调用该函数分配下个任务 */ int GOMP_loop_guided_next(long *p1, long *p2) { int res = 0; Record_Event Event = Event_init (); Event.event_name = "GOMP_loop_guided_next"; Event.eid = 216; Event.type = NONE; Event.omp_rank = get_thread_num (); Event.omp_level = get_level (); Event.p_rank = omp_get_ancestor_thread_num (get_level () - 1); Event.p_task_id_start = current_task.task_parent_id; Event.task_id_start = current_task.task_id; Event.task_state_start= TASK_END; GOMP_loop_guided_next_real=(int(*)(long*,long*)) dlsym (RTLD_NEXT, "GOMP_loop_guided_next"); if (GOMP_loop_guided_next_real != NULL) { Event.starttime=gettime(); res = GOMP_loop_guided_next_real (p1, p2); Event.endtime=gettime(); Record(&Event, OMPI_TRACE); } else { printf_d("GOMP_loop_guided_next is not hooked! exiting!!\n"); } if (res == 1) //Create a new task for this thread { current_task = create_itask (); Event.p_task_id_end = current_task.task_parent_id; Event.task_id_end= current_task.task_id; Event.task_state_end = TASK_CREATE; } else { current_task = get_current_task (); if (current_task.flag == 1) { Event.p_task_id_end = current_task.task_parent_id; Event.task_id_end= current_task.task_id; Event.task_state_end = TASK_RESUME; } } Record (&Event, OMPI_TRACE); return res; }
int Event_Send(unsigned int Flags) { if (!bInitialized) Event_init(); if (mt_o.bEventDMsg) printf("%lu Event_Send(0x%08X), Flag = 0x%08X\n", pthread_self(), Flags, Flag | Flags); pthread_mutex_lock(&EventLock); Flag |= Flags; pthread_cond_broadcast(&EventSig); pthread_mutex_unlock(&EventLock); if (mt_o.bEventYields) sched_yield(); return 0; }
cudaError_t cudaEventCreateWithFlags (cudaEvent_t *a1, unsigned int a2) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaEventCreateWithFlags"; Event.eid = 316; Event.type = NONE; cudaEventCreateWithFlags_real = (cudaError_t (*)(cudaEvent_t *, unsigned int a2)) dlsym (RTLD_NEXT, "cudaEventCreateWithFlags"); Event.starttime = gettime (); retVal = cudaEventCreateWithFlags_real (a1, a2); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaEventDestroy (cudaEvent_t a1) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaEventDestroy"; Event.eid = 321; Event.type = NONE; cudaEventDestroy_real = (cudaError_t (*)(cudaEvent_t)) dlsym (RTLD_NEXT, "cudaEventDestroy"); Event.starttime = gettime (); retVal = cudaEventDestroy_real (a1); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaEventRecord (cudaEvent_t a1, cudaStream_t a2) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaEventRecord"; Event.eid = 317; Event.type = NONE; cudaEventRecord_real = (cudaError_t (*)(cudaEvent_t, cudaStream_t)) dlsym (RTLD_NEXT, "cudaEventRecord"); Event.starttime = gettime (); retVal = cudaEventRecord_real (a1, a2); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaMemGetInfo (size_t * a1, size_t * a2) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaMemGetInfo"; Event.eid = 341; Event.type = NONE; cudaMemGetInfo_real = (cudaError_t (*)(size_t *, size_t *)) dlsym (RTLD_NEXT, "cudaMemGetInfo"); Event.starttime = gettime (); retVal = cudaMemGetInfo_real (a1, a2); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaEventElapsedTime (float *a1, cudaEvent_t a2, cudaEvent_t a3) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaEventElapsedTime"; Event.eid = 322; Event.type = NONE; cudaEventElapsedTime_real = (cudaError_t (*)(float *, cudaEvent_t, cudaEvent_t)) dlsym (RTLD_NEXT, "cudaEventElapsedTime"); Event.starttime = gettime (); retVal = cudaEventElapsedTime_real (a1, a2, a3); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaStreamWaitEvent (cudaStream_t a1, cudaEvent_t a2, unsigned int a3) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaStreamWaitEvent"; Event.eid = 312; Event.type = NONE; cudaStreamWaitEvent_real = (cudaError_t (*)(cudaStream_t, cudaEvent_t, unsigned int)) dlsym (RTLD_NEXT, "cudaStreamWaitEvent"); Event.starttime = gettime (); retVal = cudaStreamWaitEvent_real (a1, a2, a3); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaPeekAtLastError () { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaPeekAtLastError"; Event.eid = 324; Event.type = NONE; cudaPeekAtLastError_real = (cudaError_t (*)(void)) dlsym (RTLD_NEXT, "cudaPeekAtLastError"); Event.starttime = gettime (); retVal = (*cudaPeekAtLastError_real) (); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaHostGetDevicePointer (void ** a1, void * a2, unsigned int a3) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaHostGetDevicePointer"; Event.eid = 335; Event.type = NONE; cudaHostGetDevicePointer_real = (cudaError_t (*)(void **, void *, unsigned int)) dlsym (RTLD_NEXT, "cudaHostGetDevicePointer"); Event.starttime = gettime (); retVal = cudaHostGetDevicePointer_real (a1, a2, a3); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaHostGetFlags (unsigned int * a1, void * a2) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaHostGetFlags"; Event.eid = 336; Event.type = NONE; cudaHostGetFlags_real = (cudaError_t (*)(unsigned int *, void *)) dlsym (RTLD_NEXT, "cudaHostGetFlags"); Event.starttime = gettime (); retVal = cudaHostGetFlags_real (a1, a2); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaSetDeviceFlags (unsigned int a1) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaSetDeviceFlags"; Event.eid = 307; Event.type = NONE; cudaSetDeviceFlags_real = (cudaError_t (*)(unsigned int)) dlsym (RTLD_NEXT, "cudaSetDeviceFlags"); Event.starttime = gettime (); retVal = cudaSetDeviceFlags_real (a1); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaHostAlloc (void ** a1, size_t a2, unsigned int a3) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaHostAlloc"; Event.eid = 334; Event.type = NONE; cudaHostAlloc_real = (cudaError_t (*)(void **, size_t, unsigned int)) dlsym (RTLD_NEXT, "cudaHostAlloc"); Event.starttime = gettime (); retVal = cudaHostAlloc_real (a1, a2, a3); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaFreeHost (void *a1) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaFreeHost"; Event.eid = 332; Event.type = NONE; cudaFreeHost_real = (cudaError_t (*)(void *)) dlsym (RTLD_NEXT, "cudaFreeHost"); Event.starttime = gettime (); retVal = cudaFreeHost_real (a1); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaMallocPitch (void ** a1, size_t * a2, size_t a3, size_t a4) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaMallocPitch"; Event.eid = 329; Event.type = NONE; cudaMallocPitch_real = (cudaError_t (*)(void **, size_t *, size_t, size_t)) dlsym (RTLD_NEXT, "cudaMallocPitch"); Event.starttime = gettime (); retVal = cudaMallocPitch_real (a1, a2, a3, a4); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaMallocHost (void ** a1, size_t a2) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaMallocHost"; Event.eid = 328; Event.type = NONE; cudaMallocHost_real = (cudaError_t (*)(void **, size_t)) dlsym (RTLD_NEXT, "cudaMallocHost"); Event.starttime = gettime (); retVal = cudaMallocHost_real (a1, a2); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaEventCreate (cudaEvent_t * a1) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaEventCreate"; Event.eid = 315; Event.type = NONE; cudaEventCreate_real = (cudaError_t (*)(cudaEvent_t *)) dlsym (RTLD_NEXT, "cudaEventCreate"); Event.starttime = gettime (); retVal = cudaEventCreate_real (a1); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaMalloc3D (struct cudaPitchedPtr * a1, struct cudaExtent a2) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaMalloc3D"; Event.eid = 337; Event.type = NONE; cudaMalloc3D_real = (cudaError_t (*)(struct cudaPitchedPtr *, struct cudaExtent)) dlsym (RTLD_NEXT, "cudaMalloc3D"); Event.starttime = gettime (); retVal = cudaMalloc3D_real (a1, a2); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaStreamSynchronize (cudaStream_t a1) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaStreamSynchronize"; Event.eid = 313; Event.type = NONE; cudaStreamSynchronize_real = (cudaError_t (*)(cudaStream_t)) dlsym (RTLD_NEXT, "cudaStreamSynchronize"); Event.starttime = gettime (); retVal = cudaStreamSynchronize_real (a1); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaMalloc3DArray (struct cudaArray ** a1, const struct cudaChannelFormatDesc * a2, struct cudaExtent a3, unsigned int a4) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaMalloc3DArray"; Event.eid = 338; Event.type = NONE; cudaMalloc3DArray_real = (cudaError_t (*)(struct cudaArray **, const struct cudaChannelFormatDesc *, struct cudaExtent, unsigned int)) dlsym (RTLD_NEXT, "cudaMalloc3DArray"); Event.starttime = gettime (); retVal = cudaMalloc3DArray_real (a1, a2, a3, a4); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
cudaError_t cudaFreeArray (struct cudaArray * a1) { cudaError_t retVal; Record_Event Event = Event_init (); Event.event_name = "cudaFreeArray"; Event.eid = 333; Event.type = NONE; cudaFreeArray_real = (cudaError_t (*)(struct cudaArray *)) dlsym (RTLD_NEXT, "cudaFreeArray"); Event.starttime = gettime (); retVal = cudaFreeArray_real (a1); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }
const char * cudaGetErrorString (cudaError_t a1) { const char * retVal; Record_Event Event = Event_init (); Event.event_name = "cudaGetErrorString"; Event.eid = 325; Event.type = NONE; cudaGetErrorString_real = (const char *(*)(cudaError_t)) dlsym (RTLD_NEXT, "cudaGetErrorString"); Event.starttime = gettime (); retVal = (*cudaGetErrorString) (a1); Event.endtime = gettime (); Record (&Event, CMPI_TRACE); return retVal; }