Example #1
0
//monitor the submitted tasks
// by polling each server for its load information. If a server is idle then it will return load = -4 (queue length - num of idle cores)
// so if every server returns load = -4 it implies that all submitted tasks are complete
void *monitor_function(void* args) {

	ZHTClient *clientRet = (ZHTClient*)args;
	
	Package loadPackage, loadhpcPackage, shutdownPackage;	
	string loadmessage("Monitoring Information!");
	loadPackage.set_virtualpath(loadmessage);
	loadPackage.set_operation(15);
	string loadstr = loadPackage.SerializeAsString();

	string loadhpcmessage("Monitoring HPC Information!");
        loadhpcPackage.set_virtualpath(loadhpcmessage);
        loadhpcPackage.set_operation(24);
        string loadhpcstr = loadhpcPackage.SerializeAsString();

	string endmessage("Shutdown!");
	loadPackage.set_virtualpath(endmessage);
	loadPackage.set_operation(98);
	string endstr = loadPackage.SerializeAsString();

	//int num_worker = clientRet.memberList.size();
	int num_worker = clientRet->memberList.size();
	int num_cores = 2;
	int index = 0;
	long termination_value = num_worker * num_cores * -1;

	int total_avail_cores = num_cores * num_worker;

	int32_t total_queued = 0;
	int32_t total_idle = 0;
	int32_t queued_busy = 0;

	int32_t queued_idle = 0;
	int32_t queued = 0;
	int32_t num_idle = 0;
	int32_t num_busy = 0;
	int32_t load = 0;
	int32_t total_busy = 0;
	//int32_t status = 0;
	int32_t finished = 0;

	int32_t total_msg_count = 0;
	int32_t ret = 0;
	//sleep(60);

	int min_lines = num_worker;
	int num = num_worker - 1;
	cout << "The Number is " << num << endl;
	stringstream num_ss;
	num_ss << num;
	//min_lines++;


	string filename(shared);
        filename = filename + "startinfo" + num_ss.str();
	cout << "The filename is " << filename << endl;
        string cmd("cat ");
        cmd = cmd + filename + " | wc -l";

	cout << "The command is " << cmd << endl;
        string result = executeShell(cmd);
	//cout << cmd << " " << result << endl;
	//cout << "client: minlines = " << min_lines << " cmd = " << cmd << " result = " << result << endl;
	/*string filename(shared);
	filename = filename + "start_info";
	string cmd("wc -l ");
	cmd = cmd + filename + " | awk {\'print $1\'}";
	string result = executeShell(cmd);*/
	
	while(atoi(result.c_str()) < 1) {
		sleep(5);
		result = executeShell(cmd); cout << " temp result = " << result << endl;
	} 
	cout << "client: minlines = 1 " << " cmd = " << cmd << " result = " << result << endl;
	//cout << "starting to monitor" << endl;
	cout << "TIME START: " << start_tasks.tv_sec << "  SECONDS  " << start_tasks.tv_nsec << "  NANOSECONDS" << endl;
	while(1) {

	//If mtc task or only complete queue values are taken 
	if(NUM_OF_CORES == 1)
	{
		total_queued = 0;
		total_idle = 0;
		queued_busy = 0;

		stringstream worker_load;
		for(index = 0; index < num_worker; index++) {
                        //int32_t queued_idle = clientRet.svrtosvr(loadstr, loadstr.length(), index);
			queued_idle = clientRet->svrtosvr(loadstr, loadstr.length(), index);
                        queued  = queued_idle/10;  // summation of the lengths of the three queues
                        num_idle = queued_idle%10;   // number of idle cores
                        total_queued = total_queued + queued;
                        total_idle   = total_idle + num_idle;
			num_busy = num_cores - num_idle;
			load = queued + num_busy;
			worker_load << load << " ";                 
                }
		loadfile << worker_load.str() << endl;
		total_busy = total_avail_cores - total_idle;
		queued_busy = total_queued + total_busy;
		finished = total_num_tasks - queued_busy;
		clock_gettime(CLOCK_REALTIME, &end_tasks);
		cout << "Total busy cores " << total_busy << " Total Load on all workers = " << queued_busy << " No. of tasks finished = " << finished << " Total tasks submitted = " << total_num_tasks << endl;//" time = " << end_tasks.tv_sec << " " << end_tasks.tv_nsec << endl;
		if (client_logfile.is_open() && cl_LOGGING) {
			client_logfile << "Total busy cores " << total_busy << "  Total Load on all workers = " << queued_busy << " No. of tasks finished = " << finished << " Total tasks submitted = " << total_num_tasks << endl;
		}
                if(finished == total_num_tasks) {

                        clock_gettime(CLOCK_REALTIME, &end_tasks);
                        cout << "\n\n\n\n==============================All tasks finished===========================\n\n\n\n";
                        break;
                }

		usleep(200000);
	}
	else
	{
		finished = 0;
		for(index = 0; index < num_worker; index++) 
		{
			finished += clientRet->svrtosvr(loadhpcstr, loadhpcstr.length(), index);
		}
		cout << "The number of finished tasks = " << finished << endl;
		if(finished >= total_num_tasks-50) {

                        clock_gettime(CLOCK_REALTIME, &end_tasks);
                        cout << "\n\n\n\n==============================All tasks finished===========================\n\n\n\n";
                        break;
                }

                usleep(200000);
	}
	}

	total_msg_count = 0;
	for(index = 0; index < num_worker; index++) {
		//clientRet.svrtosvr(endstr, endstr.length(), index);
		ret = clientRet->svrtosvr(endstr, endstr.length(), index);
		total_msg_count += ret;
	}

	cout << "TIME START: " << start_tasks.tv_sec << "  SECONDS  " << start_tasks.tv_nsec << "  NANOSECONDS" << "\n";
	cout << "TIME END: " << end_tasks.tv_sec << "  SECONDS  " << end_tasks.tv_nsec << "  NANOSECONDS" << "\n";
	timespec diff = timediff(start_tasks, end_tasks);
	cout << "TIME TAKEN: " << diff.tv_sec << "  SECONDS  " << diff.tv_nsec << "  NANOSECONDS" << "\n";
	cout << "Total messages between all servers = " << total_msg_count << endl;
	if (client_logfile.is_open() && cl_LOGGING) {		

		client_logfile << "\n\n\n\n==============================All tasks finished===========================\n\n\n\n";
		client_logfile << "TIME START: " << start_tasks.tv_sec << "  SECONDS  " << start_tasks.tv_nsec << "  NANOSECONDS" << "\n";
		client_logfile << "TIME END: " << end_tasks.tv_sec << "  SECONDS  " << end_tasks.tv_nsec << "  NANOSECONDS" << "\n";
		client_logfile << "TIME TAKEN: " << diff.tv_sec << "  SECONDS  " << diff.tv_nsec << "  NANOSECONDS" << endl;
		client_logfile << "Total messages between all servers = " << total_msg_count << endl;

		client_logfile.close();
		//return 1;
	}
	pthread_exit(NULL);
}
Example #2
0
//monitor the submitted tasks
// by polling each server for its load information. If a server is idle then it will return load = -4 (queue length - num of idle cores)
// so if every server returns load = -4 it implies that all submitted tasks are complete
void *monitor_function(void* args) {

	ZHTClient *clientRet = (ZHTClient*)args;
	
	Package loadPackage, shutdownPackage;	
	string loadmessage("Monitoring Information!");
	loadPackage.set_virtualpath(loadmessage);
	loadPackage.set_operation(15);
	string loadstr = loadPackage.SerializeAsString();

	string endmessage("Shutdown!");
	loadPackage.set_virtualpath(endmessage);
	loadPackage.set_operation(98);
	string endstr = loadPackage.SerializeAsString();

	//int num_worker = clientRet.memberList.size();
	int num_worker = clientRet->memberList.size();
	int num_cores = ncores;
	int index = 0;
	long termination_value = num_worker * num_cores * -1;

	int total_avail_cores = num_cores * num_worker;

	int32_t total_queued = 0;
	int32_t total_idle = 0;
	int32_t queued_busy = 0;

	int32_t queued_idle = 0;
	int32_t queued = 0;
	int32_t num_idle = 0;
	int32_t num_busy = 0;
	int32_t load = 0;
	int32_t total_busy = 0;
	//int32_t status = 0;
	int32_t finished = 0;

	int32_t total_msg_count = 0;
	int32_t ret = 0;
	//sleep(60);

	int min_lines = num_worker;
	int num = num_worker - 1;
	stringstream num_ss;
	num_ss << num;

	long num_monitor = 0;
	//min_lines++;

	// not sure why we need this
	string filename(shared);
        filename = filename + "startinfo" + num_ss.str();
        string cmd("cat ");
        cmd = cmd + filename + " | wc -l";
        string result = executeShell(cmd);
	cout << cmd << " " << result << endl;
	//cout << "client: minlines = " << min_lines << " cmd = " << cmd << " result = " << result << endl;
	/*string filename(shared);
	filename = filename + "start_info";
	string cmd("wc -l ");
	cmd = cmd + filename + " | awk {\'print $1\'}";
	string result = executeShell(cmd);*/
	
	while(atoi(result.c_str()) < 1) {
		usleep(minterval);
		result = executeShell(cmd); cout << " temp result = " << result << endl;
	}
	cout << "client: minlines = 1 " << " cmd = " << cmd << " result = " << result << endl;
	//cout << "starting to monitor" << endl;
	cout << "TIME START: " << start_tasks.tv_sec << "  SECONDS  " << start_tasks.tv_nsec << "  NANOSECONDS" << endl;
	timespec local_start, local_diff;
	clock_gettime(CLOCK_REALTIME, &local_start);
	local_diff = timediff(start_tasks, local_start);
	if (client_logfile.is_open() && cl_LOGGING) {
			client_logfile << "Submission time: " << start_tasks.tv_sec << "  SECONDS  " << start_tasks.tv_nsec << "  NANOSECONDS" << endl;
			client_logfile << "Monitoring time: " << local_start.tv_sec << "  SECONDS  " << local_start.tv_nsec << "  NANOSECONDS" << endl;
			client_logfile << "TIME TAKEN: " << local_diff.tv_sec << "  SECONDS  " << local_diff.tv_nsec << "  NANOSECONDS" << endl;
		}
	int total_fin = 0;
	while(1) {

//		total_queued = 0;
//		total_idle = 0;
//		queued_busy = 0;
		total_fin = 0;
		stringstream worker_load;
		for(index = 0; index < num_worker; index++) {
                        //int32_t queued_idle = clientRet.svrtosvr(loadstr, loadstr.length(), index);
			queued_idle = clientRet->svrtosvr(loadstr, loadstr.length(), index);
//                        queued  = queued_idle/10;  // summation of the lengths of the three queues
//                        num_idle = queued_idle%10;   // number of idle cores
//                        total_queued = total_queued + queued;
//                        total_idle   = total_idle + num_idle;
//			num_busy = num_cores - num_idle;
//			load = queued + num_busy;
			total_fin += queued_idle;
//			worker_load << load << " ";
			worker_load << queued_idle << " ";
                }
//		loadfile << worker_load.str() << endl;
//		total_busy = total_avail_cores - total_idle;
//		queued_busy = total_queued + total_busy;
//		finished = total_num_tasks - queued_busy;
		num_monitor++;
		clock_gettime(CLOCK_REALTIME, &end_tasks);
		//cout << "Total busy cores " << total_busy << " Total Load on all workers = " << queued_busy << " No. of tasks finished = " << finished << " Total tasks submitted = " << total_num_tasks << endl;//" time = " << end_tasks.tv_sec << " " << end_tasks.tv_nsec << endl;
		if (client_logfile.is_open() && cl_LOGGING) {
//			client_logfile << "Total busy cores " << total_busy << "  Total Load on all workers = " << queued_busy << " No. of tasks finished = " << finished << " Total tasks submitted = " << total_num_tasks << endl;
			//client_logfile << "No. of tasks finished is:" << total_fin <<", No. of tasks submitted is:" << total_num_tasks << endl;
		}
//                if(finished == total_num_tasks)
                if (total_fin == total_num_tasks)
                {

                        clock_gettime(CLOCK_REALTIME, &end_tasks);
                        cout << "\n\n\n\n==============================All tasks finished===========================\n\n\n\n";
                        break;
                }

		usleep(minterval);
	}

	total_msg_count = 0;
	for(index = 0; index < num_worker; index++) {
		//clientRet.svrtosvr(endstr, endstr.length(), index);
		ret = clientRet->svrtosvr(endstr, endstr.length(), index);
		total_msg_count += ret;
	}

	cout << "TIME START: " << start_tasks.tv_sec << "  SECONDS  " << start_tasks.tv_nsec << "  NANOSECONDS" << "\n";
	cout << "TIME END: " << end_tasks.tv_sec << "  SECONDS  " << end_tasks.tv_nsec << "  NANOSECONDS" << "\n";
	timespec diff = timediff(start_tasks, end_tasks);
	cout << "TIME TAKEN: " << diff.tv_sec << "  SECONDS  " << diff.tv_nsec << "  NANOSECONDS" << "\n";
	cout << "Total messages between all servers = " << total_msg_count << endl;
	if (client_logfile.is_open() && cl_LOGGING) {		

		client_logfile << "\n\n\n\n==============================All tasks finished===========================\n\n\n\n";
		client_logfile << "TIME START: " << start_tasks.tv_sec << "  SECONDS  " << start_tasks.tv_nsec << "  NANOSECONDS" << "\n";
		client_logfile << "TIME END: " << end_tasks.tv_sec << "  SECONDS  " << end_tasks.tv_nsec << "  NANOSECONDS" << "\n";
		client_logfile << "TIME TAKEN: " << diff.tv_sec << "  SECONDS  " << diff.tv_nsec << "  NANOSECONDS" << endl;
		client_logfile << "Total messages between all servers = " << total_msg_count << endl;
		client_logfile << "Total monitoring times is = " << num_monitor << endl;
		client_logfile.flush();
		client_logfile.close();
		if (loadfile.is_open())
		{
			loadfile.flush();
			loadfile.close();
		}
		//return 1;
	}
	pthread_exit(NULL);
}
Example #3
0
Worker::Worker(char *parameters[], NoVoHT *novoht) {
	/* set thread detachstate attribute to DETACHED */
	pthread_attr_init(&attr);
	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
	pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
	/* filename definitions */
	set_dir(parameters[9], parameters[10]);
	file_worker_start.append(shared);
	file_worker_start.append("startinfo");
	file_task_fp.append(prefix);
	file_task_fp.append("pkgs");
	file_migrate_fp.append(prefix);
	file_migrate_fp.append("log_migrate");
	file_fin_fp.append(prefix);
	file_fin_fp.append("finish");
	file_log_fp.append(prefix);
	file_log_fp.append("log_worker");

	pmap = novoht;
	Env_var::set_env_var(parameters);
	svrclient.initialize(Env_var::cfgFile, Env_var::membershipFile,
			Env_var::TCP);
	//svrzht.initialize(Env_var::cfgFile, Env_var::membershipFile, Env_var::TCP);
	//svrmig.initialize(Env_var::cfgFile, Env_var::membershipFile, Env_var::TCP);

	if (set_ip(ip)) {
		printf("Could not get the IP address of this machine!\n");
		exit(1);
	}

	for (int i = 0; i < 10; i++) {
		msg_count[i] = 0;
	}

	poll_interval = start_poll;
	poll_threshold = start_thresh;
	num_nodes = svrclient.memberList.size();
	num_cores = atoi(parameters[11]);
	ws_sleep = atoi(parameters[12]);
	num_idle_cores = num_cores;
	neigh_mode = 'd';
	//worker.num_neigh = (int)(sqrt(worker.num_nodes));
	num_neigh = (int) (sqrt(num_nodes));
	neigh_index = new int[num_neigh];
	selfIndex = getSelfIndex(ip, atoi(parameters[1]), svrclient.memberList);// replace "localhost" with proper hostname, host is the IP in C++ string
	ostringstream oss;
	oss << selfIndex;

	printf("<ip:selfIndex>: <%s:%d>\n", ip.c_str(), selfIndex);

	//string f1 = file_fin_fp;
	//f1 = f1 + oss.str();
	//fin_fp.open(f1.c_str(), ios_base::app);

	if (LOGGING) {

		string f2 = file_task_fp;
		f2 = f2 + oss.str();
		task_fp.open(f2.c_str(), ios_base::app);

		string f3 = file_log_fp;
		f3 = f3 + oss.str();
		log_fp.open(f3.c_str(), ios_base::app);

		string f4 = file_migrate_fp;
		f4 = f4 + oss.str();
		migrate_fp.open(f4.c_str(), ios_base::app);
	}

	migratev = bitvec(num_nodes);

	Package loadPackage, tasksPackage;
	string loadmessage("Load Information!");
	loadPackage.set_virtualpath(loadmessage);
	loadPackage.set_operation(13);
	loadstr = loadPackage.SerializeAsString();

	stringstream selfIndexstream;
	selfIndexstream << selfIndex;
	string taskmessage(selfIndexstream.str());
	tasksPackage.set_virtualpath(taskmessage);
	tasksPackage.set_operation(14);
	taskstr = tasksPackage.SerializeAsString();

	srand((selfIndex + 1) * (selfIndex + 5));
	int rand_wait = rand() % 1000000;
	cout << "Worker ip = " << ip << " selfIndex = " << selfIndex << endl;
	//cout << "Worker ip = " << ip << " selfIndex = " << selfIndex << " going to wait for " << rand_wait << " seconds" << endl;
	usleep(rand_wait);

	file_worker_start.append(oss.str());
	string cmd("touch ");
	cmd.append(file_worker_start);
	//executeShell(cmd);
	system(cmd.c_str());

	FILE *fp = fopen(file_worker_start.c_str(), "w+");
	if (fp != NULL) {
		//fputs("fopen example", fp);
		char fbuf[100];
		memset(fbuf, 0, sizeof(fbuf));
		sprintf(fbuf, "%s:%d ", ip.c_str(), selfIndex);
		fwrite(fbuf, sizeof(char), strlen(fbuf), fp);
		fflush(fp);
		fclose(fp);
	}

	/*worker_start.open(file_worker_start.c_str(),
	 std::ofstream::out | ios_base::app);
	 if (worker_start.is_open()) {

	 worker_start << ip << ":" << selfIndex << " ";
	 worker_start.flush();
	 worker_start.close();
	 worker_start.open(file_worker_start.c_str(), ios_base::app);
	 }
	 worker_start.open(file_worker_start.c_str(),
	 std::ofstream::out | ios_base::app);
	 */

	clock_gettime(CLOCK_REALTIME, &poll_start);

	int err;
	/*pthread_t *ready_queue_thread = new pthread_t();//(pthread_t*)malloc(sizeof(pthread_t));
	 pthread_create(ready_queue_thread, &attr, check_ready_queue, NULL);*/
	try {
		pthread_t *ready_queue_thread = new pthread_t[num_cores];
		for (int i = 0; i < num_cores; i++) {
			err = pthread_create(&ready_queue_thread[i], &attr,
					check_ready_queue, (void*) this);
			if (err) {
				printf(
						"work_steal_init: pthread_create: ready_queue_thread: %s\n",
						strerror(errno));
				exit(1);
			}
		}

		pthread_t *wait_queue_thread = new pthread_t();
		err = pthread_create(wait_queue_thread, &attr, check_wait_queue,
				(void*) this);
		if (err) {
			printf("work_steal_init: pthread_create: wait_queue_thread: %s\n",
					strerror(errno));
			exit(1);
		}

		pthread_t *complete_queue_thread = new pthread_t();
		err = pthread_create(complete_queue_thread, &attr, check_complete_queue,
				(void*) this);
		if (err) {
			printf(
					"work_steal_init: pthread_create: complete_queue_thread: %s\n",
					strerror(errno));
			exit(1);
		}

		package_thread_args rq_args, wq_args;
		rq_args.source = &insertq_new;
		wq_args.source = &waitq;
		rq_args.dest = &rqueue;
		wq_args.dest = &wqueue;
		rq_args.slock = &iq_new_lock;
		wq_args.slock = &waitq_lock;
		rq_args.dlock = &lock;
		wq_args.dlock = &w_lock;
		rq_args.worker = this;
		wq_args.worker = this;
		pthread_t *waitq_thread = new pthread_t();
		err = pthread_create(waitq_thread, &attr, HB_insertQ_new,
				(void*) &wq_args);
		if (err) {
			printf("work_steal_init: pthread_create: waitq_thread: %s\n",
					strerror(errno));
			exit(1);
		}

		pthread_t *readyq_thread = new pthread_t();
		err = pthread_create(readyq_thread, &attr, HB_insertQ_new,
				(void*) &rq_args);
		if (err) {
			printf("work_steal_init: pthread_create: ready_thread: %s\n",
					strerror(errno));
			exit(1);
		}

		pthread_t *migrateq_thread = new pthread_t();
		err = pthread_create(migrateq_thread, &attr, migrateTasks,
				(void*) this);
		if (err) {
			printf("work_steal_init: pthread_create: migrateq_thread: %s\n",
					strerror(errno));
			exit(1);
		}

		pthread_t *notq_thread = new pthread_t();
		err = pthread_create(notq_thread, &attr, notQueue, (void*) this);
		if (err) {
			printf("work_steal_init: pthread_create: notq_thread: %s\n",
					strerror(errno));
			exit(1);
		}

		int min_lines = svrclient.memberList.size();
		//min_lines++;
		string filename(file_worker_start);
		//string cmd("wc -l ");
		//cmd = cmd + filename + " | awk {\'print $1\'}";

		string cmd2("ls -l ");
		cmd2.append(shared);
		cmd2.append("startinfo*");
		cmd2.append(" | wc -l");
		string result = executeShell(cmd2);
		//cout << "server: minlines = " << min_lines << " cmd = " << cmd << " result = " << result << endl;
		while (atoi(result.c_str()) < min_lines) {
			sleep(2);
			//cout << "server: " << worker.selfIndex << " minlines = " << min_lines << " cmd = " << cmd << " result = " << result << endl;
			result = executeShell(cmd2);
		}
		//cout << "server: " << selfIndex << " minlines = " << min_lines << " cmd = " << cmd2 << " result = " << result << endl;

		/*int num = min_lines - 1;
		 stringstream num_ss;
		 num_ss << num;
		 //min_lines++;
		 string cmd1("cat ");    cmd1.append(shared);    cmd1.append("startinfo"); 	cmd1.append(num_ss.str());     cmd1.append(" | wc -l");
		 string result1 = executeShell(cmd1);
		 //cout << "server: minlines = " << min_lines << " cmd = " << cmd << " result = " << result << endl;
		 while(atoi(result1.c_str()) < 1) {
		 sleep(2);
		 result1 = executeShell(cmd1);
		 }
		 cout << "worksteal started: server: " << selfIndex << " minlines = " << 1 << " cmd = " << cmd1 << " result = " << result1 << endl;*/

		pthread_t *work_steal_thread = new pthread_t(); //(pthread_t*)malloc(sizeof(pthread_t));
		err = pthread_create(work_steal_thread, &attr, worksteal, (void*) this);
		if (err) {
			printf("work_steal_init: pthread_create: work_steal_thread: %s\n",
					strerror(errno));
			exit(1);
		}

		delete ready_queue_thread;
		delete wait_queue_thread;
		delete complete_queue_thread;
		delete work_steal_thread;
		delete readyq_thread;
		delete waitq_thread;
		delete migrateq_thread;
		delete notq_thread;
	} catch (std::bad_alloc& exc) {
		cout
				<< "work_steal_init: failed to allocate memory while creating threads"
				<< endl;
		exit(1);
	}
}