//monitor the submitted tasks // by polling each server for its load information. If a server is idle then it will return load = -4 (queue length - num of idle cores) // so if every server returns load = -4 it implies that all submitted tasks are complete void *monitor_function(void* args) { ZHTClient *clientRet = (ZHTClient*)args; Package loadPackage, loadhpcPackage, shutdownPackage; string loadmessage("Monitoring Information!"); loadPackage.set_virtualpath(loadmessage); loadPackage.set_operation(15); string loadstr = loadPackage.SerializeAsString(); string loadhpcmessage("Monitoring HPC Information!"); loadhpcPackage.set_virtualpath(loadhpcmessage); loadhpcPackage.set_operation(24); string loadhpcstr = loadhpcPackage.SerializeAsString(); string endmessage("Shutdown!"); loadPackage.set_virtualpath(endmessage); loadPackage.set_operation(98); string endstr = loadPackage.SerializeAsString(); //int num_worker = clientRet.memberList.size(); int num_worker = clientRet->memberList.size(); int num_cores = 2; int index = 0; long termination_value = num_worker * num_cores * -1; int total_avail_cores = num_cores * num_worker; int32_t total_queued = 0; int32_t total_idle = 0; int32_t queued_busy = 0; int32_t queued_idle = 0; int32_t queued = 0; int32_t num_idle = 0; int32_t num_busy = 0; int32_t load = 0; int32_t total_busy = 0; //int32_t status = 0; int32_t finished = 0; int32_t total_msg_count = 0; int32_t ret = 0; //sleep(60); int min_lines = num_worker; int num = num_worker - 1; cout << "The Number is " << num << endl; stringstream num_ss; num_ss << num; //min_lines++; string filename(shared); filename = filename + "startinfo" + num_ss.str(); cout << "The filename is " << filename << endl; string cmd("cat "); cmd = cmd + filename + " | wc -l"; cout << "The command is " << cmd << endl; string result = executeShell(cmd); //cout << cmd << " " << result << endl; //cout << "client: minlines = " << min_lines << " cmd = " << cmd << " result = " << result << endl; /*string filename(shared); filename = filename + "start_info"; string cmd("wc -l "); cmd = cmd + filename + " | awk {\'print $1\'}"; string result = executeShell(cmd);*/ while(atoi(result.c_str()) < 1) { sleep(5); result = executeShell(cmd); cout << " temp result = " << result << endl; } cout << "client: minlines = 1 " << " cmd = " << cmd << " result = " << result << endl; //cout << "starting to monitor" << endl; cout << "TIME START: " << start_tasks.tv_sec << " SECONDS " << start_tasks.tv_nsec << " NANOSECONDS" << endl; while(1) { //If mtc task or only complete queue values are taken if(NUM_OF_CORES == 1) { total_queued = 0; total_idle = 0; queued_busy = 0; stringstream worker_load; for(index = 0; index < num_worker; index++) { //int32_t queued_idle = clientRet.svrtosvr(loadstr, loadstr.length(), index); queued_idle = clientRet->svrtosvr(loadstr, loadstr.length(), index); queued = queued_idle/10; // summation of the lengths of the three queues num_idle = queued_idle%10; // number of idle cores total_queued = total_queued + queued; total_idle = total_idle + num_idle; num_busy = num_cores - num_idle; load = queued + num_busy; worker_load << load << " "; } loadfile << worker_load.str() << endl; total_busy = total_avail_cores - total_idle; queued_busy = total_queued + total_busy; finished = total_num_tasks - queued_busy; clock_gettime(CLOCK_REALTIME, &end_tasks); cout << "Total busy cores " << total_busy << " Total Load on all workers = " << queued_busy << " No. of tasks finished = " << finished << " Total tasks submitted = " << total_num_tasks << endl;//" time = " << end_tasks.tv_sec << " " << end_tasks.tv_nsec << endl; if (client_logfile.is_open() && cl_LOGGING) { client_logfile << "Total busy cores " << total_busy << " Total Load on all workers = " << queued_busy << " No. of tasks finished = " << finished << " Total tasks submitted = " << total_num_tasks << endl; } if(finished == total_num_tasks) { clock_gettime(CLOCK_REALTIME, &end_tasks); cout << "\n\n\n\n==============================All tasks finished===========================\n\n\n\n"; break; } usleep(200000); } else { finished = 0; for(index = 0; index < num_worker; index++) { finished += clientRet->svrtosvr(loadhpcstr, loadhpcstr.length(), index); } cout << "The number of finished tasks = " << finished << endl; if(finished >= total_num_tasks-50) { clock_gettime(CLOCK_REALTIME, &end_tasks); cout << "\n\n\n\n==============================All tasks finished===========================\n\n\n\n"; break; } usleep(200000); } } total_msg_count = 0; for(index = 0; index < num_worker; index++) { //clientRet.svrtosvr(endstr, endstr.length(), index); ret = clientRet->svrtosvr(endstr, endstr.length(), index); total_msg_count += ret; } cout << "TIME START: " << start_tasks.tv_sec << " SECONDS " << start_tasks.tv_nsec << " NANOSECONDS" << "\n"; cout << "TIME END: " << end_tasks.tv_sec << " SECONDS " << end_tasks.tv_nsec << " NANOSECONDS" << "\n"; timespec diff = timediff(start_tasks, end_tasks); cout << "TIME TAKEN: " << diff.tv_sec << " SECONDS " << diff.tv_nsec << " NANOSECONDS" << "\n"; cout << "Total messages between all servers = " << total_msg_count << endl; if (client_logfile.is_open() && cl_LOGGING) { client_logfile << "\n\n\n\n==============================All tasks finished===========================\n\n\n\n"; client_logfile << "TIME START: " << start_tasks.tv_sec << " SECONDS " << start_tasks.tv_nsec << " NANOSECONDS" << "\n"; client_logfile << "TIME END: " << end_tasks.tv_sec << " SECONDS " << end_tasks.tv_nsec << " NANOSECONDS" << "\n"; client_logfile << "TIME TAKEN: " << diff.tv_sec << " SECONDS " << diff.tv_nsec << " NANOSECONDS" << endl; client_logfile << "Total messages between all servers = " << total_msg_count << endl; client_logfile.close(); //return 1; } pthread_exit(NULL); }
//monitor the submitted tasks // by polling each server for its load information. If a server is idle then it will return load = -4 (queue length - num of idle cores) // so if every server returns load = -4 it implies that all submitted tasks are complete void *monitor_function(void* args) { ZHTClient *clientRet = (ZHTClient*)args; Package loadPackage, shutdownPackage; string loadmessage("Monitoring Information!"); loadPackage.set_virtualpath(loadmessage); loadPackage.set_operation(15); string loadstr = loadPackage.SerializeAsString(); string endmessage("Shutdown!"); loadPackage.set_virtualpath(endmessage); loadPackage.set_operation(98); string endstr = loadPackage.SerializeAsString(); //int num_worker = clientRet.memberList.size(); int num_worker = clientRet->memberList.size(); int num_cores = ncores; int index = 0; long termination_value = num_worker * num_cores * -1; int total_avail_cores = num_cores * num_worker; int32_t total_queued = 0; int32_t total_idle = 0; int32_t queued_busy = 0; int32_t queued_idle = 0; int32_t queued = 0; int32_t num_idle = 0; int32_t num_busy = 0; int32_t load = 0; int32_t total_busy = 0; //int32_t status = 0; int32_t finished = 0; int32_t total_msg_count = 0; int32_t ret = 0; //sleep(60); int min_lines = num_worker; int num = num_worker - 1; stringstream num_ss; num_ss << num; long num_monitor = 0; //min_lines++; // not sure why we need this string filename(shared); filename = filename + "startinfo" + num_ss.str(); string cmd("cat "); cmd = cmd + filename + " | wc -l"; string result = executeShell(cmd); cout << cmd << " " << result << endl; //cout << "client: minlines = " << min_lines << " cmd = " << cmd << " result = " << result << endl; /*string filename(shared); filename = filename + "start_info"; string cmd("wc -l "); cmd = cmd + filename + " | awk {\'print $1\'}"; string result = executeShell(cmd);*/ while(atoi(result.c_str()) < 1) { usleep(minterval); result = executeShell(cmd); cout << " temp result = " << result << endl; } cout << "client: minlines = 1 " << " cmd = " << cmd << " result = " << result << endl; //cout << "starting to monitor" << endl; cout << "TIME START: " << start_tasks.tv_sec << " SECONDS " << start_tasks.tv_nsec << " NANOSECONDS" << endl; timespec local_start, local_diff; clock_gettime(CLOCK_REALTIME, &local_start); local_diff = timediff(start_tasks, local_start); if (client_logfile.is_open() && cl_LOGGING) { client_logfile << "Submission time: " << start_tasks.tv_sec << " SECONDS " << start_tasks.tv_nsec << " NANOSECONDS" << endl; client_logfile << "Monitoring time: " << local_start.tv_sec << " SECONDS " << local_start.tv_nsec << " NANOSECONDS" << endl; client_logfile << "TIME TAKEN: " << local_diff.tv_sec << " SECONDS " << local_diff.tv_nsec << " NANOSECONDS" << endl; } int total_fin = 0; while(1) { // total_queued = 0; // total_idle = 0; // queued_busy = 0; total_fin = 0; stringstream worker_load; for(index = 0; index < num_worker; index++) { //int32_t queued_idle = clientRet.svrtosvr(loadstr, loadstr.length(), index); queued_idle = clientRet->svrtosvr(loadstr, loadstr.length(), index); // queued = queued_idle/10; // summation of the lengths of the three queues // num_idle = queued_idle%10; // number of idle cores // total_queued = total_queued + queued; // total_idle = total_idle + num_idle; // num_busy = num_cores - num_idle; // load = queued + num_busy; total_fin += queued_idle; // worker_load << load << " "; worker_load << queued_idle << " "; } // loadfile << worker_load.str() << endl; // total_busy = total_avail_cores - total_idle; // queued_busy = total_queued + total_busy; // finished = total_num_tasks - queued_busy; num_monitor++; clock_gettime(CLOCK_REALTIME, &end_tasks); //cout << "Total busy cores " << total_busy << " Total Load on all workers = " << queued_busy << " No. of tasks finished = " << finished << " Total tasks submitted = " << total_num_tasks << endl;//" time = " << end_tasks.tv_sec << " " << end_tasks.tv_nsec << endl; if (client_logfile.is_open() && cl_LOGGING) { // client_logfile << "Total busy cores " << total_busy << " Total Load on all workers = " << queued_busy << " No. of tasks finished = " << finished << " Total tasks submitted = " << total_num_tasks << endl; //client_logfile << "No. of tasks finished is:" << total_fin <<", No. of tasks submitted is:" << total_num_tasks << endl; } // if(finished == total_num_tasks) if (total_fin == total_num_tasks) { clock_gettime(CLOCK_REALTIME, &end_tasks); cout << "\n\n\n\n==============================All tasks finished===========================\n\n\n\n"; break; } usleep(minterval); } total_msg_count = 0; for(index = 0; index < num_worker; index++) { //clientRet.svrtosvr(endstr, endstr.length(), index); ret = clientRet->svrtosvr(endstr, endstr.length(), index); total_msg_count += ret; } cout << "TIME START: " << start_tasks.tv_sec << " SECONDS " << start_tasks.tv_nsec << " NANOSECONDS" << "\n"; cout << "TIME END: " << end_tasks.tv_sec << " SECONDS " << end_tasks.tv_nsec << " NANOSECONDS" << "\n"; timespec diff = timediff(start_tasks, end_tasks); cout << "TIME TAKEN: " << diff.tv_sec << " SECONDS " << diff.tv_nsec << " NANOSECONDS" << "\n"; cout << "Total messages between all servers = " << total_msg_count << endl; if (client_logfile.is_open() && cl_LOGGING) { client_logfile << "\n\n\n\n==============================All tasks finished===========================\n\n\n\n"; client_logfile << "TIME START: " << start_tasks.tv_sec << " SECONDS " << start_tasks.tv_nsec << " NANOSECONDS" << "\n"; client_logfile << "TIME END: " << end_tasks.tv_sec << " SECONDS " << end_tasks.tv_nsec << " NANOSECONDS" << "\n"; client_logfile << "TIME TAKEN: " << diff.tv_sec << " SECONDS " << diff.tv_nsec << " NANOSECONDS" << endl; client_logfile << "Total messages between all servers = " << total_msg_count << endl; client_logfile << "Total monitoring times is = " << num_monitor << endl; client_logfile.flush(); client_logfile.close(); if (loadfile.is_open()) { loadfile.flush(); loadfile.close(); } //return 1; } pthread_exit(NULL); }
Worker::Worker(char *parameters[], NoVoHT *novoht) { /* set thread detachstate attribute to DETACHED */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); /* filename definitions */ set_dir(parameters[9], parameters[10]); file_worker_start.append(shared); file_worker_start.append("startinfo"); file_task_fp.append(prefix); file_task_fp.append("pkgs"); file_migrate_fp.append(prefix); file_migrate_fp.append("log_migrate"); file_fin_fp.append(prefix); file_fin_fp.append("finish"); file_log_fp.append(prefix); file_log_fp.append("log_worker"); pmap = novoht; Env_var::set_env_var(parameters); svrclient.initialize(Env_var::cfgFile, Env_var::membershipFile, Env_var::TCP); //svrzht.initialize(Env_var::cfgFile, Env_var::membershipFile, Env_var::TCP); //svrmig.initialize(Env_var::cfgFile, Env_var::membershipFile, Env_var::TCP); if (set_ip(ip)) { printf("Could not get the IP address of this machine!\n"); exit(1); } for (int i = 0; i < 10; i++) { msg_count[i] = 0; } poll_interval = start_poll; poll_threshold = start_thresh; num_nodes = svrclient.memberList.size(); num_cores = atoi(parameters[11]); ws_sleep = atoi(parameters[12]); num_idle_cores = num_cores; neigh_mode = 'd'; //worker.num_neigh = (int)(sqrt(worker.num_nodes)); num_neigh = (int) (sqrt(num_nodes)); neigh_index = new int[num_neigh]; selfIndex = getSelfIndex(ip, atoi(parameters[1]), svrclient.memberList);// replace "localhost" with proper hostname, host is the IP in C++ string ostringstream oss; oss << selfIndex; printf("<ip:selfIndex>: <%s:%d>\n", ip.c_str(), selfIndex); //string f1 = file_fin_fp; //f1 = f1 + oss.str(); //fin_fp.open(f1.c_str(), ios_base::app); if (LOGGING) { string f2 = file_task_fp; f2 = f2 + oss.str(); task_fp.open(f2.c_str(), ios_base::app); string f3 = file_log_fp; f3 = f3 + oss.str(); log_fp.open(f3.c_str(), ios_base::app); string f4 = file_migrate_fp; f4 = f4 + oss.str(); migrate_fp.open(f4.c_str(), ios_base::app); } migratev = bitvec(num_nodes); Package loadPackage, tasksPackage; string loadmessage("Load Information!"); loadPackage.set_virtualpath(loadmessage); loadPackage.set_operation(13); loadstr = loadPackage.SerializeAsString(); stringstream selfIndexstream; selfIndexstream << selfIndex; string taskmessage(selfIndexstream.str()); tasksPackage.set_virtualpath(taskmessage); tasksPackage.set_operation(14); taskstr = tasksPackage.SerializeAsString(); srand((selfIndex + 1) * (selfIndex + 5)); int rand_wait = rand() % 1000000; cout << "Worker ip = " << ip << " selfIndex = " << selfIndex << endl; //cout << "Worker ip = " << ip << " selfIndex = " << selfIndex << " going to wait for " << rand_wait << " seconds" << endl; usleep(rand_wait); file_worker_start.append(oss.str()); string cmd("touch "); cmd.append(file_worker_start); //executeShell(cmd); system(cmd.c_str()); FILE *fp = fopen(file_worker_start.c_str(), "w+"); if (fp != NULL) { //fputs("fopen example", fp); char fbuf[100]; memset(fbuf, 0, sizeof(fbuf)); sprintf(fbuf, "%s:%d ", ip.c_str(), selfIndex); fwrite(fbuf, sizeof(char), strlen(fbuf), fp); fflush(fp); fclose(fp); } /*worker_start.open(file_worker_start.c_str(), std::ofstream::out | ios_base::app); if (worker_start.is_open()) { worker_start << ip << ":" << selfIndex << " "; worker_start.flush(); worker_start.close(); worker_start.open(file_worker_start.c_str(), ios_base::app); } worker_start.open(file_worker_start.c_str(), std::ofstream::out | ios_base::app); */ clock_gettime(CLOCK_REALTIME, &poll_start); int err; /*pthread_t *ready_queue_thread = new pthread_t();//(pthread_t*)malloc(sizeof(pthread_t)); pthread_create(ready_queue_thread, &attr, check_ready_queue, NULL);*/ try { pthread_t *ready_queue_thread = new pthread_t[num_cores]; for (int i = 0; i < num_cores; i++) { err = pthread_create(&ready_queue_thread[i], &attr, check_ready_queue, (void*) this); if (err) { printf( "work_steal_init: pthread_create: ready_queue_thread: %s\n", strerror(errno)); exit(1); } } pthread_t *wait_queue_thread = new pthread_t(); err = pthread_create(wait_queue_thread, &attr, check_wait_queue, (void*) this); if (err) { printf("work_steal_init: pthread_create: wait_queue_thread: %s\n", strerror(errno)); exit(1); } pthread_t *complete_queue_thread = new pthread_t(); err = pthread_create(complete_queue_thread, &attr, check_complete_queue, (void*) this); if (err) { printf( "work_steal_init: pthread_create: complete_queue_thread: %s\n", strerror(errno)); exit(1); } package_thread_args rq_args, wq_args; rq_args.source = &insertq_new; wq_args.source = &waitq; rq_args.dest = &rqueue; wq_args.dest = &wqueue; rq_args.slock = &iq_new_lock; wq_args.slock = &waitq_lock; rq_args.dlock = &lock; wq_args.dlock = &w_lock; rq_args.worker = this; wq_args.worker = this; pthread_t *waitq_thread = new pthread_t(); err = pthread_create(waitq_thread, &attr, HB_insertQ_new, (void*) &wq_args); if (err) { printf("work_steal_init: pthread_create: waitq_thread: %s\n", strerror(errno)); exit(1); } pthread_t *readyq_thread = new pthread_t(); err = pthread_create(readyq_thread, &attr, HB_insertQ_new, (void*) &rq_args); if (err) { printf("work_steal_init: pthread_create: ready_thread: %s\n", strerror(errno)); exit(1); } pthread_t *migrateq_thread = new pthread_t(); err = pthread_create(migrateq_thread, &attr, migrateTasks, (void*) this); if (err) { printf("work_steal_init: pthread_create: migrateq_thread: %s\n", strerror(errno)); exit(1); } pthread_t *notq_thread = new pthread_t(); err = pthread_create(notq_thread, &attr, notQueue, (void*) this); if (err) { printf("work_steal_init: pthread_create: notq_thread: %s\n", strerror(errno)); exit(1); } int min_lines = svrclient.memberList.size(); //min_lines++; string filename(file_worker_start); //string cmd("wc -l "); //cmd = cmd + filename + " | awk {\'print $1\'}"; string cmd2("ls -l "); cmd2.append(shared); cmd2.append("startinfo*"); cmd2.append(" | wc -l"); string result = executeShell(cmd2); //cout << "server: minlines = " << min_lines << " cmd = " << cmd << " result = " << result << endl; while (atoi(result.c_str()) < min_lines) { sleep(2); //cout << "server: " << worker.selfIndex << " minlines = " << min_lines << " cmd = " << cmd << " result = " << result << endl; result = executeShell(cmd2); } //cout << "server: " << selfIndex << " minlines = " << min_lines << " cmd = " << cmd2 << " result = " << result << endl; /*int num = min_lines - 1; stringstream num_ss; num_ss << num; //min_lines++; string cmd1("cat "); cmd1.append(shared); cmd1.append("startinfo"); cmd1.append(num_ss.str()); cmd1.append(" | wc -l"); string result1 = executeShell(cmd1); //cout << "server: minlines = " << min_lines << " cmd = " << cmd << " result = " << result << endl; while(atoi(result1.c_str()) < 1) { sleep(2); result1 = executeShell(cmd1); } cout << "worksteal started: server: " << selfIndex << " minlines = " << 1 << " cmd = " << cmd1 << " result = " << result1 << endl;*/ pthread_t *work_steal_thread = new pthread_t(); //(pthread_t*)malloc(sizeof(pthread_t)); err = pthread_create(work_steal_thread, &attr, worksteal, (void*) this); if (err) { printf("work_steal_init: pthread_create: work_steal_thread: %s\n", strerror(errno)); exit(1); } delete ready_queue_thread; delete wait_queue_thread; delete complete_queue_thread; delete work_steal_thread; delete readyq_thread; delete waitq_thread; delete migrateq_thread; delete notq_thread; } catch (std::bad_alloc& exc) { cout << "work_steal_init: failed to allocate memory while creating threads" << endl; exit(1); } }