コード例 #1
0
ファイル: omp-lu.c プロジェクト: kempj/hpxMP
int main (int argc, char *argv[])
{
  double *A,*A2,*L,*U, temp2;
  int i,j,k;
  int temp=0;
  int offset = 0;
  double t1,t2;

  if( argc > 1 )
    N = atoi(argv[1]);

  if( argc > 2 )
    //Block = atoi(argv[2]);
    M = atoi(argv[2]);

  A = (double *)malloc (N*N*sizeof(double));
  A2 = (double *)malloc (N*N*sizeof(double));
  L = (double *)malloc (N*N*sizeof(double));
  U = (double *)malloc (N*N*sizeof(double));
  if( A==NULL || A2==NULL || L==NULL || U==NULL) {
    printf("Can't allocate memory\n");
    exit(1);
  }

  /* INITIALIZATION */
  //InitMatrix(A,N);
  InitMatrix3(A,N);
  for(i=0; i<N*N; i++) {
    A2[i] = A[i]; // Copy of A for verification of correctness
    L[i] = 0;
    U[i] = 0;
  }

/*   /\* LU DECOMPOSITION *\/ */
/*   for (k=0;k<N-1;k++){ */
/*     for (i=k+1;i<N;i++){ */
/*       A[i*N+k] = A[i*N+k]/A[k*N+k]; */
/*    /\*  for (i=k+1;i<N;i++) *\/ */
/*       for (j=k+1;j<N;j++) */
/*         A[i*N+j] = A[i*N+j] - A[i*N+k]*A[k*N+j]; */
/*     } */
/*   } */
  
  int *sizedim;
  int *start;
  int R; //Remain
  int itr = 0;

  sizedim = (int*)malloc(M*sizeof(int));
  start = (int*)malloc(M*sizeof(int));
  R = N;

  t1 = GetTickCount();
#pragma omp parallel
  {
  //printf("The number of thread: %d\n", omp_get_num_threads());
#pragma omp master
      {
          while (N-offset>M){
            //  printf(" Iteration: %d\n", itr++);
              for (i=0;i<M;i++){
                  if (i<R%M){
                      sizedim[i]=R/M+1;
                      start[i]=(R/M+1)*i;
                  }
                  else{
                      sizedim[i]=R/M;
                      start[i]=(R/M+1)*(R%M)+(R/M)*(i-R%M);
                  }
                  //printf("%i,%i \n",sizedim[i],start[i]);
              }

              //Print_Matrix(sizedim,1,M);

              stage1(A, offset, sizedim, start, N, M);
              //Print_Matrix(A,N,N);
              stage2(A, offset, sizedim, start, N, M);
              //Print_Matrix(A,N,N);
              stage3(A, offset, sizedim, start, N, M);

              offset+=sizedim[0];
              R=R-sizedim[0];
              //Print_Matrix(A,N,N);
          } //while
      } //master
  } //omp parallel
  ProcessDiagonalBlock(&A[offset*N+offset], N-offset, N);

  t2 = GetTickCount();

  printf("Time for LU-decomposition in secs: %f \n", (t2-t1)/1000000);
  //Print_Matrix(A,N,N);


/*   while (N-offset>Block){ */
/*     stepLU(A,Block,offset,N); */
/*     offset+=Block; */
/*   } */
/*   ProcessDiagonalBlock(&A[offset*N+offset], N-offset, N); */

  //Print_Matrix(A,N,N);
#ifdef CHECK
  /* PROOF OF CORRECTNESS */
 
  for (i=0;i<N;i++)
    for (j=0;j<N;j++)
      if (i>j)
	L[i*N+j] = A[i*N+j];
      else
	U[i*N+j] = A[i*N+j];
  for (i=0;i<N;i++)
    L[i*N+i] = 1;

  //printf("L=\n");
  //Print_Matrix(L,N,N);
  //printf("U=\n");
  //Print_Matrix(U,N,N);

  for (i=0;i<N;i++)
    for (j=0;j<N;j++){
      temp2=0;
      for (k=0;k<N;k++)
          temp2+=L[i*N+k]*U[k*N+j];
      if ((A2[i*N+j]-temp2)/A2[i*N+j] >0.1 || (A2[i*N+j]-temp2)/A2[i*N+j] <-0.1)
      {
          temp++;
          printf("Error at: [%d, %d\n]",i,j);
      }
    }
  printf("Errors = %d \n", temp);
#endif
  return 0;

}
コード例 #2
0
int main (int argc, char *argv[])
{
	double *A,*A2,*L,*U, temp2;
	int i,j,k;
	int temp=0;
	int offset = 0;
	double t1,t2;

        if (argc < 3)
	{
		printf("Usage: ./lu <Matrix size> <number of blocks per dimension>\n");
		exit(1);
	}

	if( argc > 1 )
		N = atoi(argv[1]);

	if( argc > 2 )
		M = atoi(argv[2]);

	A = (double *)malloc (N*N*sizeof(double));
	A2 = (double *)malloc (N*N*sizeof(double));
	L = (double *)malloc (N*N*sizeof(double));
	U = (double *)malloc (N*N*sizeof(double));
	if( A==NULL || A2==NULL || L==NULL || U==NULL) {
		printf("Can't allocate memory\n");
		exit(1);
	}

	/* INITIALIZATION */
	InitMatrix3(A,N);
	for(i=0; i<N*N; i++) {
		A2[i] = A[i]; // Copy of A for verification of correctness
		L[i] = 0;
		U[i] = 0;
	}


	int *sizedim;
	int *start;
	int R; //Remain

	sizedim = (int*)malloc(M*sizeof(int));
	start = (int*)malloc(M*sizeof(int));
	R = N;

	t1 = GetTickCount();
#pragma omp parallel
	{
#pragma omp master
		{
			while (N-offset>M){

				for (i=0;i<M;i++){
					if (i<R%M){
						sizedim[i]=R/M+1;
						start[i]=(R/M+1)*i;
					}
					else{
						sizedim[i]=R/M;
						start[i]=(R/M+1)*(R%M)+(R/M)*(i-R%M);
					}
				}


				stage1(A, offset, sizedim, start, N, M);
				stage2(A, offset, sizedim, start, N, M);
				stage3(A, offset, sizedim, start, N, M);

				offset+=sizedim[0];
				R=R-sizedim[0];

			} //end of while
		} //end of master
	} //end of parallel region
	ProcessDiagonalBlock(&A[offset*N+offset], N-offset, N);

	t2 = GetTickCount();

	printf("Time for LU-decomposition in secs: %f \n", (t2-t1)/1000000);



#ifdef CHECK
	/* PROOF OF CORRECTNESS */

	for (i=0;i<N;i++)
		for (j=0;j<N;j++)
			if (i>j)
				L[i*N+j] = A[i*N+j];
			else
				U[i*N+j] = A[i*N+j];
	for (i=0;i<N;i++)
		L[i*N+i] = 1;


	for (i=0;i<N;i++)
		for (j=0;j<N;j++){
			temp2=0;
			for (k=0;k<N;k++)
				temp2+=L[i*N+k]*U[k*N+j];
			if ((A2[i*N+j]-temp2)/A2[i*N+j] >0.1 || (A2[i*N+j]-temp2)/A2[i*N+j] <-0.1)
				temp++;
		}
	printf("Errors = %d \n", temp);
#endif
	return;

}