Exemplo n.º 1
0
int main(int argc, char const *argv[])
{
	/* code */
	double start, end;
	start = gethrtime_x86();
	empty();
	end = gethrtime_x86();
	printf("Time: %0.8f\n", end-start );
	return 0;
}
void *work_thread(void *id){
    int i,j,k;
    double pivotVal;
    double hrtime1, hrtime2;
    int task_id = *((int *) id);


    barrier(task_num); //wait for all threads to come and then start
    if(task_id == 0){
        hrtime1 = gethrtime_x86();
    }

    for(i=0; i<nsize; i++){
        if(task_id == i % task_num){
            getPivot(nsize,i); // select corresponding thread to find pivot in row
        }
        barrier(task_num); //wait for all threads finish 
        pivotVal = matrix[i][i];

        for (j = i + 1 ; j < nsize; j++){

            if(task_id == j % task_num){
                pivotVal = matrix[j][i];
                matrix[j][i] = 0.0;
                for (k = i + 1 ; k < nsize; k++){
                    matrix[j][k] -= pivotVal * matrix[i][k];
                }
                B[j] -= pivotVal * B[i];
            }
        }
        barrier(task_num);
    }
    hrtime2 = gethrtime_x86();

    if(task_id==0){
        printf("Hrtime = %f seconds\n", hrtime2 - hrtime1);
    }
    return NULL;
}
Exemplo n.º 3
0
int main(int argc, char **argv){

	setbuf(stdout,0);
	int num = sysconf(_SC_NPROCESSORS_CONF);
    	printf("system has %d processor(s)\n", num);
    	cpu_set_t mask,get;
    	CPU_ZERO(&mask);
        CPU_SET(0, &mask);
	int i,j ;
	int ite = 0;
	ite_converge = 0;
	hrtimer_converge = 0.0;
	int retcode,allone;
	void *retval;
	double hrtimer_tmp,hrtime_start,hrtime_end,l2normtotal;
	if ( argc != 4) {printf("[usage] SOR <matrix> <thread> <w>\n"); return 0;}
    	
        if (pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) < 0) {
            fprintf(stderr, "set thread affinity failed\n");
        }
 	thread_num = atoi(argv[2]);
 	w = atof(argv[3]);
	nsize = initMatrix(argv[1]); 	
 	initRHS(nsize);
   	initResult(nsize);
   	int workload = (int)ceil((double)nsize/(double)thread_num) ;
   	pthread_t thread[thread_num];
	int *seq = (int *)malloc(thread_num*sizeof(int));
	for(i=0;i<thread_num;i++)
		seq[i] = i;
	shuffle(seq,thread_num);
   	for(j=1;j<thread_num;j++)
	{
		retcode = pthread_create(&thread[j], NULL, task, new int(j));
		if (retcode != 0)
			fprintf (stderr, "create thread failed %d\n", retcode);
	}
	sense_reverse_barrier(0);  // start at the same time
	hrtime_start = gethrtime_x86();
	printf("Finish reading file. Start computing...\n");
	while(converge == 0)
	{
		ticket_acquire(&flags[0].tlock);
		if(flags[0].test == 1)
		{
			solve(0,workload-1,0,1);
			flags[0].flag = 0;
			flags[0].test = 0;
			sense_reverse_barrier(0);
		}
		else
		{
			hrtimer_tmp = gethrtime_x86();
			ite ++;
			solve(0,workload-1,0,0);
			hrtimer_compute[0] += gethrtime_x86() - hrtimer_tmp;
		}
		if(token == 0 && flags[0].flag == 1&&converge==0)
		{
			//observe all flags
			allone = 1;
			for(j=0;j<thread_num;j++)
			{
				allone = allone & flags[j].flag;
			}
			if(allone == 1)
			{
				ite_converge ++;
				shuffle(seq,thread_num);	
				for(i=0;i<thread_num;i++)
					if(seq[i] != 0)
					{
						ticket_acquire(&flags[seq[i]].tlock);	
						flags[seq[i]].test = 1;
						ticket_release(&flags[seq[i]].tlock);	
					}	
				hrtimer_tmp = gethrtime_x86();
				solve(0,workload-1,0,1);
				hrtimer_converge += gethrtime_x86() - hrtimer_tmp;
				sense_reverse_barrier(0);
				l2normtotal = 0.0;
				for(j=0;j<thread_num;j++)
					l2normtotal += flags[j].l2norm;
				if(l2normtotal < EPS)
				{
					converge = 1;
				}
				else
				{
					token = (token + 1) % thread_num;
				}
			}
		}
		ticket_release(&flags[0].tlock);
      	}
      	printf ("#%d finished\n",0);
      	iteration[0] = ite;
	for(j=1;j<thread_num;j++)
	{
		retcode = pthread_join(thread[j], &retval);
		iteration[j] = *(int *)retval;
		if (retcode != 0)
			fprintf (stderr, "join failed %d\n", retcode);
	}
	hrtime_end = gethrtime_x86();
	CPU_ZERO(&get);
        if (pthread_getaffinity_np(pthread_self(), sizeof(get), &get) < 0) 
            fprintf(stderr, "get thread affinity failed\n");
        for (j = 0; j < 24; j++) 
            if (CPU_ISSET(j, &get)) 
                printf("thread %d is running in processor %d sched_getcpu %d\n", -1, j,sched_getcpu());
#ifdef VERIFY
  	FILE *res;
        res = fopen ("ASOR result", "w");
        for (i = 0; i < nsize; i++) {
            fprintf (res, "[%d] = %lf\n", i+1, X[i]);
        }			
#endif
	double total = hrtime_end-hrtime_start;
	printf("Total time:%.16lfs\n",total);
	printf("Computation time:%.16lfs\n",max(thread_num,hrtimer_compute));
	printf("Convergence time:%.16lfs\n",hrtimer_converge);
	printf("Syn time:%.16lfs\n",total-max(thread_num,hrtimer_compute)-hrtimer_converge);
	for(i=0;i<thread_num;i++)
		printf("Iteration[%d]: %d\t\tComputation time:%.16lfs\n",i,iteration[i],hrtimer_compute[i]);
	printf("#Converge iteration:%d\n",ite_converge);
	printf("sizeof(cflag) = %lu\n",sizeof(cflag));

	return 0;
}
Exemplo n.º 4
0
static void *
task(void *arg)
{
	int index = *((int *) arg);
    	cpu_set_t mask,get;
    	CPU_ZERO(&mask);
        CPU_SET(index, &mask);
	int i,j,k,loop,allone;
	double l2normtotal;
	int counter = 0;
	int workload = (int)ceil((double)nsize/(double)thread_num) ;
	int start = workload*(index);
	int end = (index + 1)*workload-1;
	double hrtimer_tmp;
	int *seq = (int *)malloc(thread_num*sizeof(int));
	for(i=0;i<thread_num;i++)
		seq[i] = i;
        if (pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) < 0) {
            fprintf(stderr, "set thread affinity failed\n");
        }

	sense_reverse_barrier(index);
	int ite = 0;
	while(converge==0)
	{
		ticket_acquire(&flags[index].tlock);
		if(flags[index].test == 1)
		{
			solve(start,end,index,1);
			flags[index].flag = 0;
			flags[index].test = 0;
			sense_reverse_barrier(index);
		}
		else
		{
			hrtimer_tmp = gethrtime_x86();
			ite ++;
			solve(start,end,index,0);
			hrtimer_compute[index] += gethrtime_x86() - hrtimer_tmp;
		}
		if(token == index && flags[index].flag == 1&&converge==0)
		{
			//observe all flags
			allone = 1;
			for(j=0;j<thread_num;j++)
			{
				allone = allone & flags[j].flag;
			}
			if(allone == 1)
			{
				ite_converge ++;
				shuffle(seq,thread_num);	
				for(i=0;i<thread_num;i++)
					if(seq[i] != index)
					{
						ticket_acquire(&flags[seq[i]].tlock);
						flags[seq[i]].test = 1;
						ticket_release(&flags[seq[i]].tlock);
					}					
				hrtimer_tmp = gethrtime_x86();
				solve(start,end,index,1);
				hrtimer_converge += gethrtime_x86() - hrtimer_tmp;
				sense_reverse_barrier(index);
				l2normtotal = 0.0;
				for(j=0;j<thread_num;j++)
				{
					l2normtotal += flags[j].l2norm;
				}
				if(l2normtotal < EPS)
				{
					converge = 1;
				}
				else
				{
					token = (token + 1) % thread_num;
				}
			}
		}
	 	ticket_release(&flags[index].tlock);	
      	}
      	printf ("#%d finished\n",index);
      	    
        CPU_ZERO(&get);
        if (pthread_getaffinity_np(pthread_self(), sizeof(get), &get) < 0) {
            fprintf(stderr, "get thread affinity failed\n");
        }
        for (j = 0; j < 24; j++) {
            if (CPU_ISSET(j, &get)) {
                printf("thread %d is running in processor %d sched_getcpu %d\n", index, j,sched_getcpu());
            }
        }
	return  new int(ite);
}