コード例 #1
0
void *work_thread (void *lp) {
    int task_id = *((int *) lp);
    int begin, end;
    struct timeval start, finish;
    int i;
    /*get the divided task*/
    begin = (nsize * task_id) / task_num + 1;
    end = (nsize * (task_id + 1)) / task_num;
    if(task_id==0) gettimeofday (&start, NULL);
    fprintf (stderr, "thread %d: begin %d, end %d\n", task_id, begin, end);

    barrier (task_num);
    /* initialization */
    if (task_id == 0)
    initRHS(nsize, begin, end);
    barrier (task_num);
    initResult(nsize, begin, end);
    barrier (task_num);
    /* Gauss Compute */
    computeGauss(nsize, task_id);
    barrier (task_num);

    /* Gauss computation done */
    if(task_id==0) {
	/* Since there are dependencies in computing equation stage
    the upper part need the results of upper part), it should be done by thread 0 solely. */
        solveGauss(nsize);
        gettimeofday (&finish, NULL);
        printf ("Elapsed time: %.2f seconds\n",
	        (((finish.tv_sec * 1000000.0) + finish.tv_usec) -
	        ((start.tv_sec * 1000000.0) + start.tv_usec)) / 1000000.0);
    }
}
コード例 #2
0
int main(int argc, char *argv[])
{
    int i;
    struct timeval start, finish;
    double error;
    
    pthread_attr_t attr;
    pthread_t *tid;
    int *id;
    
    if (argc < 2) {
	fprintf(stderr, "usage: %s <matrixfile>\n", argv[0]);
	exit(-1);
    }
    
    // for getting the threads
    if(argc == 3) {
        task_num = strtol(argv[2], NULL, 10);
    }

    nsize = initMatrix(argv[1]);
    initRHS(nsize);
    initResult(nsize);
   
    // create threads
    id = (int *) malloc (sizeof (int) * task_num);
    tid = (pthread_t *) malloc (sizeof (pthread_t) * task_num);
    if (!id || !tid)
        errexit ("out of shared memory");
    pthread_attr_init (&attr);
    pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM);
    for (i = 1; i < task_num; i++) {
        id[i] = i;
        pthread_create (&tid[i], &attr, work_thread, &id[i]);
    }

    id[0]=0;
    work_thread(&id[0]);
    // wait for all threads to finish
    for (i = 1; i < task_num; i++)
        pthread_join (tid[i], NULL);
	    
    solveGauss(nsize);
    
    error = 0.0;
    for (i = 0; i < nsize; i++) {
	double error__ = (X__[i]==0.0) ? 1.0 : fabs((X[i]-X__[i])/X__[i]);
	if (error < error__) {
		error = error__;
	}
    }
    fprintf(stdout, "Error: %e\n", error);
}
コード例 #3
0
ファイル: ASOR.c プロジェクト: highpowerxh/CSC453BlogPost
int main(int argc, char **argv){

	setbuf(stdout,0);
	int num = sysconf(_SC_NPROCESSORS_CONF);
    	printf("system has %d processor(s)\n", num);
    	cpu_set_t mask,get;
    	CPU_ZERO(&mask);
        CPU_SET(0, &mask);
	int i,j ;
	int ite = 0;
	ite_converge = 0;
	hrtimer_converge = 0.0;
	int retcode,allone;
	void *retval;
	double hrtimer_tmp,hrtime_start,hrtime_end,l2normtotal;
	if ( argc != 4) {printf("[usage] SOR <matrix> <thread> <w>\n"); return 0;}
    	
        if (pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) < 0) {
            fprintf(stderr, "set thread affinity failed\n");
        }
 	thread_num = atoi(argv[2]);
 	w = atof(argv[3]);
	nsize = initMatrix(argv[1]); 	
 	initRHS(nsize);
   	initResult(nsize);
   	int workload = (int)ceil((double)nsize/(double)thread_num) ;
   	pthread_t thread[thread_num];
	int *seq = (int *)malloc(thread_num*sizeof(int));
	for(i=0;i<thread_num;i++)
		seq[i] = i;
	shuffle(seq,thread_num);
   	for(j=1;j<thread_num;j++)
	{
		retcode = pthread_create(&thread[j], NULL, task, new int(j));
		if (retcode != 0)
			fprintf (stderr, "create thread failed %d\n", retcode);
	}
	sense_reverse_barrier(0);  // start at the same time
	hrtime_start = gethrtime_x86();
	printf("Finish reading file. Start computing...\n");
	while(converge == 0)
	{
		ticket_acquire(&flags[0].tlock);
		if(flags[0].test == 1)
		{
			solve(0,workload-1,0,1);
			flags[0].flag = 0;
			flags[0].test = 0;
			sense_reverse_barrier(0);
		}
		else
		{
			hrtimer_tmp = gethrtime_x86();
			ite ++;
			solve(0,workload-1,0,0);
			hrtimer_compute[0] += gethrtime_x86() - hrtimer_tmp;
		}
		if(token == 0 && flags[0].flag == 1&&converge==0)
		{
			//observe all flags
			allone = 1;
			for(j=0;j<thread_num;j++)
			{
				allone = allone & flags[j].flag;
			}
			if(allone == 1)
			{
				ite_converge ++;
				shuffle(seq,thread_num);	
				for(i=0;i<thread_num;i++)
					if(seq[i] != 0)
					{
						ticket_acquire(&flags[seq[i]].tlock);	
						flags[seq[i]].test = 1;
						ticket_release(&flags[seq[i]].tlock);	
					}	
				hrtimer_tmp = gethrtime_x86();
				solve(0,workload-1,0,1);
				hrtimer_converge += gethrtime_x86() - hrtimer_tmp;
				sense_reverse_barrier(0);
				l2normtotal = 0.0;
				for(j=0;j<thread_num;j++)
					l2normtotal += flags[j].l2norm;
				if(l2normtotal < EPS)
				{
					converge = 1;
				}
				else
				{
					token = (token + 1) % thread_num;
				}
			}
		}
		ticket_release(&flags[0].tlock);
      	}
      	printf ("#%d finished\n",0);
      	iteration[0] = ite;
	for(j=1;j<thread_num;j++)
	{
		retcode = pthread_join(thread[j], &retval);
		iteration[j] = *(int *)retval;
		if (retcode != 0)
			fprintf (stderr, "join failed %d\n", retcode);
	}
	hrtime_end = gethrtime_x86();
	CPU_ZERO(&get);
        if (pthread_getaffinity_np(pthread_self(), sizeof(get), &get) < 0) 
            fprintf(stderr, "get thread affinity failed\n");
        for (j = 0; j < 24; j++) 
            if (CPU_ISSET(j, &get)) 
                printf("thread %d is running in processor %d sched_getcpu %d\n", -1, j,sched_getcpu());
#ifdef VERIFY
  	FILE *res;
        res = fopen ("ASOR result", "w");
        for (i = 0; i < nsize; i++) {
            fprintf (res, "[%d] = %lf\n", i+1, X[i]);
        }			
#endif
	double total = hrtime_end-hrtime_start;
	printf("Total time:%.16lfs\n",total);
	printf("Computation time:%.16lfs\n",max(thread_num,hrtimer_compute));
	printf("Convergence time:%.16lfs\n",hrtimer_converge);
	printf("Syn time:%.16lfs\n",total-max(thread_num,hrtimer_compute)-hrtimer_converge);
	for(i=0;i<thread_num;i++)
		printf("Iteration[%d]: %d\t\tComputation time:%.16lfs\n",i,iteration[i],hrtimer_compute[i]);
	printf("#Converge iteration:%d\n",ite_converge);
	printf("sizeof(cflag) = %lu\n",sizeof(cflag));

	return 0;
}
コード例 #4
0
ファイル: gauss_pthread1.c プロジェクト: alyshav/CMPT431-asn1
int main(int argc, char *argv[])
{
    int i;
    struct timeval start, finish;
    double error;

    pthread_attr_t attr;
    pthread_t *tid;
    int *id;
    
    //Ensure that the program takes two parameters
    //  first param is the input matrix and the second is the number of processors for the parallel run    
    if (argc != 3) {
        fprintf(stderr, "usage: %s <matrixfile> <#ofProcesses>\n", argv[0]);
        exit(-1);
    }

    task_num = atoi(argv[2]);
    nsize = initMatrix(argv[1]);
    initRHS(nsize);
    initResult(nsize);

    gettimeofday(&start, 0);

    // create threads
    id = (int *) malloc (sizeof (int) * task_num);
    tid = (pthread_t *) malloc (sizeof (pthread_t) * task_num);
    if (!id || !tid)
        errexit ("out of shared memory");
    pthread_attr_init (&attr);
    pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM);
    for (i = 1; i < task_num; i++) {
        id[i] = i;
        pthread_create (&tid[i], &attr, work_thread, &id[i]);
    }

    id[0]=0;
    work_thread(&id[0]);
    // wait for all threads to finish
    for (i = 1; i < task_num; i++)
        pthread_join (tid[i], NULL);

    gettimeofday(&finish, 0);

    solveGauss(nsize);

    fprintf(stdout, "Time:  %f seconds\n", (finish.tv_sec - start.tv_sec) + (finish.tv_usec - start.tv_usec)*0.000001);

    
    error = 0.0;
    for (i = 0; i < nsize; i++) {
    double error__ = (X__[i]==0.0) ? 1.0 : fabs((X[i]-X__[i])/X__[i]);
    if (error < error__) {
        error = error__;
    }
    }
    fprintf(stdout, "Error: %e\n", error);


    //File output
    // float timeOutput = ((finish.tv_sec - start.tv_sec) + (finish.tv_usec - start.tv_usec)*0.000001);

    // FILE *pthreadTimingFile = fopen("pthread1Timing.txt", "a");
    // fprintf(pthreadTimingFile, "%s %d %f %G\n", argv[1], task_num, timeOutput, error);
    // fclose(pthreadTimingFile);

    return 0;
}