myth_thread_t myth_eco_all_task_check(myth_running_env_t env) { myth_running_env_t busy_env; myth_thread_t next_run=NULL; int i=0; #ifdef MYTH_WS_PROF_DETAIL uint64_t t0,t1; t0=myth_get_rdtsc(); #endif while(i < g_worker_thread_num){ if(g_envs[i].c == RUNNING) { busy_env = &g_envs[i]; next_run = myth_queue_take(&busy_env->runnable_q); if(next_run){ next_run->env=env; return next_run; } } if(g_envs[i].c == FINISH){ return (myth_thread_t)FINISH; } i++; } return NULL; }
myth_thread_t myth_default_steal_func(int rank) { myth_running_env_t env,busy_env; myth_thread_t next_run=NULL; #ifdef MYTH_WS_PROF_DETAIL uint64_t t0,t1; t0=myth_get_rdtsc(); #endif //Choose a worker thread that seems to be busy env=&g_envs[rank]; busy_env=myth_env_get_first_busy(env); if (busy_env){ //int ws_victim; #if 0 #ifdef MYTH_SCHED_LOOP_DEBUG myth_dprintf("env %p is trying to steal thread from %p...\n",env,busy_env); #endif #endif //ws_victim=busy_env->rank; //Try to steal thread next_run=myth_queue_take(&busy_env->runnable_q); if (next_run){ #ifdef MYTH_SCHED_LOOP_DEBUG myth_dprintf("env %p is stealing thread %p from %p...\n",env,steal_th,busy_env); #endif myth_assert(next_run->status==MYTH_STATUS_READY); //Change worker thread descriptor } } #ifdef MYTH_WS_PROF_DETAIL t1=myth_get_rdtsc(); if (g_sched_prof){ env->prof_data.ws_attempt_count[busy_env->rank]++; if (next_run){ env->prof_data.ws_hit_cycles+=t1-t0; env->prof_data.ws_hit_cnt++; }else{ env->prof_data.ws_miss_cycles+=t1-t0; env->prof_data.ws_miss_cnt++; } } #endif return next_run; }
myth_thread_t myth_eco_steal(int rank) { myth_running_env_t env,busy_env; myth_thread_t next_run=NULL; #ifdef MYTH_WS_PROF_DETAIL uint64_t t0,t1; t0=myth_get_rdtsc(); #endif //Choose a worker thread that seems to be busy env=&g_envs[rank]; if(env->isSleepy == 1) { env->isSleepy = 0; busy_env = &g_envs[env->ws_target]; } else { busy_env=myth_env_get_first_busy(env); } if (busy_env){ myth_assert(busy_env!=env); //int ws_victim; //ws_victim=busy_env->rank; //Try to steal thread next_run=myth_queue_take(&busy_env->runnable_q); if (next_run){ #ifdef MYTH_SCHED_LOOP_DEBUG myth_dprintf("env %p is stealing thread %p from %p...\n",env,steal_th,busy_env); #endif myth_assert(next_run->status==MYTH_STATUS_READY); //Change worker thread descriptor next_run->env=env; } } if(!next_run) { if(busy_env->c == STEALING) { #ifdef MYTH_ECO_TEST if(env->thief_count < 3) { env->thief_count++; return 0; } #endif myth_sleep(); // This line seems not correct, it may occur infinite recursion //return myth_eco_steal(env->rank); return NULL; } else if(busy_env->c == SLEEPING) { MAY_BE_UNUSED int tmp = task_num; next_run = myth_eco_all_task_check(env); if(!next_run){ myth_sleep(); } else { return next_run; } } else if(busy_env->c == RUNNING) { // victim has one task and executing int tmp = task_num; next_run = myth_eco_all_task_check(env); if(!next_run){ myth_sleep_2(tmp); } else { return next_run; } } else if(busy_env->c == FINISH) { return (myth_thread_t)FINISH; } } #ifdef MYTH_WS_PROF_DETAIL t1=myth_get_rdtsc(); if (g_sched_prof){ env->prof_data.ws_attempt_count[busy_env->rank]++; if (next_run){ env->prof_data.ws_hit_cycles+=t1-t0; env->prof_data.ws_hit_cnt++; }else{ env->prof_data.ws_miss_cycles+=t1-t0; env->prof_data.ws_miss_cnt++; } } #endif #ifdef MYTH_ECO_TEST env->thief_count = 0; #endif return next_run; }
void myth_emit_log(FILE *fp_prof_out) { //Write profiling log uint64_t t1,t0,tx; { t0=0;t1=0;tx=0; int i; for (i=0;i<100;i++){ t0=myth_get_rdtsc(); t1=myth_get_rdtsc(); tx+=t1-t0; } tx/=100; } #ifdef MYTH_PROF_SHOW_WORKER int i; for (i=0;i<g_worker_thread_num;i++){ #if defined MYTH_CREATE_PROF && !defined MYTH_PROF_COUNT_CSV fprintf(fp_prof_out,"Create threads %lu : %lf cycles/creation\n",(unsigned long)env[i].prof_data.create_cnt,env[i].prof_data.create_cycles/(double)env[i].prof_data.create_cnt-tx); #endif #if defined MYTH_CREATE_PROF_DETAIL && !defined MYTH_PROF_COUNT_CSV fprintf(fp_prof_out,"A:%lf B:%lf C:%lf D:%lf\n" ,env[i].prof_data.create_cyclesA/(double)env[i].prof_data.create_cnt-tx ,env[i].prof_data.create_cyclesB/(double)env[i].prof_data.create_cnt-tx ,env[i].prof_data.create_cyclesC/(double)env[i].prof_data.create_cnt-tx ,env[i].prof_data.create_cyclesD/(double)env[i].prof_data.create_cnt-tx); #endif #if defined MYTH_ENTRY_POINT_PROF && !defined MYTH_PROF_COUNT_CSV #ifdef SWITCH_AFTER_CREATE fprintf(fp_prof_out,"Ran threads %lu : %lf cycle overhead/run\n",(unsigned long)env[i].prof_data.ep_cnt,(env[i].prof_data.ep_cyclesB)/(double)env[i].prof_data.ep_cnt-tx); #else fprintf(fp_prof_out,"Ran threads %lu : %lf cycle overhead/run\n",(unsigned long)env[i].prof_data.ep_cnt,(env[i].prof_data.ep_cyclesA+env[i].prof_data.ep_cyclesB)/(double)env[i].prof_data.ep_cnt-tx*2); fprintf(fp_prof_out,"A:%lf B:%lf\n" ,env[i].prof_data.ep_cyclesA/(double)env[i].prof_data.ep_cnt-tx ,env[i].prof_data.ep_cyclesB/(double)env[i].prof_data.ep_cnt-tx); #endif #endif #if defined MYTH_JOIN_PROF && !defined MYTH_PROF_COUNT_CSV fprintf(fp_prof_out,"Joins %lu : %lf cycles/join\n",(unsigned long)env[i].prof_data.join_cnt,env[i].prof_data.join_cycles/(double)env[i].prof_data.join_cnt-tx); #endif #ifdef MYTH_ALLOC_PROF fprintf(fp_prof_out,"Malloc %lu\n",(unsigned long)env[i].prof_data.malloc_cnt); #endif #ifdef MYTH_PROF_COUNT_CSV fprintf(fp_prof_out,"%lu,%lu,%lu\n",(unsigned long)env[i].prof_data.create_cnt,(unsigned long)env[i].prof_data.ep_cnt,(unsigned long)env[i].prof_data.join_cnt); #endif } #else MAY_BE_UNUSED uint64_t sum1,sum2,sum3,sum4,sum5; MAY_BE_UNUSED int i; i=0; sum1=0;sum2=0;sum3=0;sum4=0;sum5=0; #if defined MYTH_CREATE_PROF && !defined MYTH_PROF_COUNT_CSV sum1=0;sum2=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.create_cnt; sum2+=g_envs[i].prof_data.create_cycles; } fprintf(fp_prof_out,"Create threads %lu : %lf cycles/creation\n", (unsigned long)sum1,sum2/(double)sum1-tx); #endif #if defined MYTH_CREATE_PROF_DETAIL && !defined MYTH_PROF_COUNT_CSV sum1=0;sum2=0;sum3=0;sum4=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.create_d_cnt; sum2+=g_envs[i].prof_data.create_alloc; sum3+=g_envs[i].prof_data.create_switch; sum4+=g_envs[i].prof_data.create_push; } fprintf(fp_prof_out,"At thread creation (count : %ld ):\n",(long)sum1); fprintf(fp_prof_out,"Frame allocation : %lf\n",sum2/(double)sum1-tx); fprintf(fp_prof_out,"Context switch : %lf\n",sum3/(double)sum1-tx); fprintf(fp_prof_out,"Runqueue operation(push) : %lf\n",sum4/(double)sum1-tx); #endif #if defined MYTH_ENTRY_POINT_PROF && !defined MYTH_PROF_COUNT_CSV sum1=0;sum2=0;sum3=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.ep_cnt; sum2+=g_envs[i].prof_data.ep_cyclesA; sum3+=g_envs[i].prof_data.ep_cyclesB; } #ifdef SWITCH_AFTER_CREATE fprintf(fp_prof_out,"Ran threads %lu : %lf cycle overhead/run\n",(unsigned long)sum1,sum3/(double)sum1-tx); #else fprintf(fp_prof_out,"Ran threads %lu : %lf cycle overhead/run\n",(unsigned long)sum1,(sum2+sum3)/(double)sum1-tx*2); fprintf(fp_prof_out,"A:%lf B:%lf\n" ,sum2/(double)sum1-tx ,sum3/(double)sum1-tx); #endif #endif #if defined MYTH_EP_PROF_DETAIL sum1=0;sum2=0;sum3=0;sum4=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.ep_d_cnt; sum2+=g_envs[i].prof_data.ep_join; sum3+=g_envs[i].prof_data.ep_switch; sum4+=g_envs[i].prof_data.ep_pop; } fprintf(fp_prof_out,"At thread cleanup (count : %ld ):\n",sum1); fprintf(fp_prof_out,"Join operation : %lf\n",sum2/(double)sum1-tx*2); fprintf(fp_prof_out,"Context switch : %lf\n",sum3/(double)sum1-tx); fprintf(fp_prof_out,"Runqueue operation(pop) : %lf\n",sum4/(double)sum1-tx); #endif #if defined MYTH_JOIN_PROF && !defined MYTH_PROF_COUNT_CSV sum1=0;sum2=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.join_cnt; sum2+=g_envs[i].prof_data.join_cycles; } fprintf(fp_prof_out,"Joins %lu : %lf cycles/join\n",(unsigned long)sum1,sum2/(double)sum1-tx); #endif #if defined MYTH_JOIN_PROF_DETAIL sum1=0;sum2=0;sum3=0;sum4=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.join_d_cnt; sum2+=g_envs[i].prof_data.join_join; sum3+=g_envs[i].prof_data.join_release; } fprintf(fp_prof_out,"At join (count : %ld ):\n",sum1); fprintf(fp_prof_out,"Join operation : %lf\n",sum2/(double)sum1-tx); fprintf(fp_prof_out,"Frame release : %lf\n",sum3/(double)sum1-tx); #endif #ifdef MYTH_WS_PROF_DETAIL sum1=0;sum2=0;sum3=0;sum4=0; fprintf(fp_prof_out,"WS attempts:\n"); for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.ws_hit_cnt; sum2+=g_envs[i].prof_data.ws_hit_cycles; sum3+=g_envs[i].prof_data.ws_miss_cnt; sum4+=g_envs[i].prof_data.ws_miss_cycles; int j; for (j=0;j<g_worker_thread_num;j++){ fprintf(fp_prof_out,"%d ",(int)g_envs[i].prof_data.ws_attempt_count[j]); } fprintf(fp_prof_out,"\n"); } fprintf(fp_prof_out,"At work-stealing :\n"); fprintf(fp_prof_out,"Hit : %ld ( %lf )\n",(unsigned long)sum1,sum2/(double)sum1-tx); fprintf(fp_prof_out,"Miss : %ld ( %lf )\n",(unsigned long)sum3,sum4/(double)sum3-tx); #endif #ifdef MYTH_SWITCH_PROF sum1=0;sum2=0;sum3=0;sum4=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.sw_cnt; sum2+=g_envs[i].prof_data.sw_cycles; } fprintf(fp_prof_out,"Context switch (count : %ld ):\n",(unsigned long)sum1); fprintf(fp_prof_out,"Overhead : %lf cycles\n",sum2/(double)sum1-tx); #endif #ifdef MYTH_ALLOC_PROF sum1=0;sum2=0; sum3=0;sum4=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.malloc_cnt; sum2+=g_envs[i].prof_data.alloc_cnt; sum3+=g_envs[i].prof_data.malloc_cycles; sum4+=g_envs[i].prof_data.addlist_cycles; } fprintf(fp_prof_out,"s+d:\n"); fprintf(fp_prof_out,"Malloc %lu alloc %lu (ratio:%lf)\n",(unsigned long)sum1,(unsigned long)sum2,sum1/(double)sum2); fprintf(fp_prof_out,"mmap/malloc : %lf cycles/alloc, addlist : %lf cycles/alloc\n",sum3/(double)sum2,sum4/(double)sum2); sum1=0;sum2=0; sum3=0;sum4=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.dmalloc_cnt; sum2+=g_envs[i].prof_data.dalloc_cnt; sum3+=g_envs[i].prof_data.dmalloc_cycles; sum4+=g_envs[i].prof_data.daddlist_cycles; } fprintf(fp_prof_out,"Desc:\n"); fprintf(fp_prof_out,"Malloc %lu alloc %lu (ratio:%lf)\n",(unsigned long)sum1,(unsigned long)sum2,sum1/(double)sum2); fprintf(fp_prof_out,"mmap/malloc : %lf cycles/alloc, addlist : %lf cycles/alloc\n",sum3/(double)sum2,sum4/(double)sum2); sum1=0;sum2=0; sum3=0;sum4=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.smalloc_cnt; sum2+=g_envs[i].prof_data.salloc_cnt; sum3+=g_envs[i].prof_data.smalloc_cycles; sum4+=g_envs[i].prof_data.saddlist_cycles; } fprintf(fp_prof_out,"Stack:\n"); fprintf(fp_prof_out,"Malloc %lu alloc %lu (ratio:%lf)\n",(unsigned long)sum1,(unsigned long)sum2,sum1/(double)sum2); fprintf(fp_prof_out,"mmap/malloc : %lf cycles/alloc, addlist : %lf cycles/alloc\n",sum3/(double)sum2,sum4/(double)sum2); #endif #if defined MYTH_IO_PROF_DETAIL sum1=0;sum2=0;sum3=0;sum4=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.io_succ_send_cnt; sum2+=g_envs[i].prof_data.io_block_send_cnt; sum3+=g_envs[i].prof_data.io_succ_send_cycles; sum4+=g_envs[i].prof_data.io_block_send_cycles; } fprintf(fp_prof_out,"Send:\n"); fprintf(fp_prof_out,"Success:Block = %lu : %lu\n",(unsigned long)sum1,(unsigned long)sum2); fprintf(fp_prof_out,"Overhead %lf : %lf\n",sum3/(double)sum1-tx,sum4/(double)sum2-tx); sum1=0;sum2=0;sum3=0;sum4=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.io_succ_recv_cnt; sum2+=g_envs[i].prof_data.io_block_recv_cnt; sum3+=g_envs[i].prof_data.io_succ_recv_cycles; sum4+=g_envs[i].prof_data.io_block_recv_cycles; } fprintf(fp_prof_out,"Recv:\n"); fprintf(fp_prof_out,"Success:Block = %lu : %lu\n",(unsigned long)sum1,(unsigned long)sum2); fprintf(fp_prof_out,"Overhead %lf : %lf\n",sum3/(double)sum1-tx,sum4/(double)sum2-tx); sum1=0;sum2=0;sum3=0;sum4=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.io_epoll_hit; sum2+=g_envs[i].prof_data.io_epoll_hit_cycles; sum3+=g_envs[i].prof_data.io_epoll_miss; sum4+=g_envs[i].prof_data.io_epoll_miss_cycles; } fprintf(fp_prof_out,"epoll:\n"); fprintf(fp_prof_out,"hit: %lu ( %lf cycles)\n",(unsigned long)sum1,sum2/(double)sum1-tx); fprintf(fp_prof_out,"miss: %lu ( %lf cycles)\n",(unsigned long)sum3,sum4/(double)sum3-tx); fprintf(fp_prof_out,"overall: %lu ( %lf cycles)\n",(unsigned long)(sum1+sum3),(sum2+sum4)/(double)(sum1+sum3)-tx); sum1=0;sum2=0;sum3=0;sum4=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.io_chk_hit; sum2+=g_envs[i].prof_data.io_chk_hit_cycles; sum3+=g_envs[i].prof_data.io_chk_miss; sum4+=g_envs[i].prof_data.io_chk_miss_cycles; } fprintf(fp_prof_out,"I/O check:\n"); fprintf(fp_prof_out,"hit: %lu ( %lf cycles)\n",(unsigned long)sum1,sum2/(double)sum1-tx); fprintf(fp_prof_out,"miss: %lu ( %lf cycles)\n",(unsigned long)sum3,sum4/(double)sum3-tx); fprintf(fp_prof_out,"overall: %lu ( %lf cycles)\n",(unsigned long)(sum1+sum3),(sum2+sum4)/(double)(sum1+sum3)-tx); #endif #ifdef MYTH_PROF_COUNT_CSV sum1=0;sum2=0;sum3=0; for (i=0;i<g_worker_thread_num;i++){ sum1+=g_envs[i].prof_data.create_cnt; sum2+=g_envs[i].prof_data.ep_cnt; sum3+=g_envs[i].prof_data.join_cnt; } fprintf(fp_prof_out,"%lu,%lu,%lu\n",(unsigned long)sum1,(unsigned long)sum2,(unsigned long)sum3); #endif #endif }