int main(int argc, char **argv) {
  /* Timing variables */
  struct timeval etstart, etstop;  /* Elapsed times using gettimeofday() */
  struct timezone tzdummy;
  clock_t etstart2, etstop2;  /* Elapsed times using times() */
  unsigned long long usecstart, usecstop;
  struct tms cputstart, cputstop;  /* CPU times for my processes */

  /* Process program parameters */
  parameters(argc, argv);

  /* Initialize A and B */
  initialize_inputs();

  /* Print input matrices */
  print_inputs();

  /* Start Clock */
  printf("\nStarting clock.\n");
  gettimeofday(&etstart, &tzdummy);
  etstart2 = times(&cputstart);

  /* Gaussian Elimination */
  matrixNorm();

  /* Stop Clock */
  gettimeofday(&etstop, &tzdummy);
  etstop2 = times(&cputstop);
  printf("Stopped clock.\n");
  usecstart = (unsigned long long)etstart.tv_sec * 1000000 + etstart.tv_usec;
  usecstop = (unsigned long long)etstop.tv_sec * 1000000 + etstop.tv_usec;

  /* Display output */
  print_B();

  /* Display timing results */
  printf("\nElapsed time = %g ms.\n",
	 (float)(usecstop - usecstart)/(float)1000);

  printf("(CPU times are accurate to the nearest %g ms)\n",
	 1.0/(float)CLOCKS_PER_SEC * 1000.0);
  printf("My total CPU time for parent = %g ms.\n",
	 (float)( (cputstop.tms_utime + cputstop.tms_stime) -
		  (cputstart.tms_utime + cputstart.tms_stime) ) /
	 (float)CLOCKS_PER_SEC * 1000);
  printf("My system CPU time for parent = %g ms.\n",
	 (float)(cputstop.tms_stime - cputstart.tms_stime) /
	 (float)CLOCKS_PER_SEC * 1000);
  printf("My total CPU time for child processes = %g ms.\n",
	 (float)( (cputstop.tms_cutime + cputstop.tms_cstime) -
		  (cputstart.tms_cutime + cputstart.tms_cstime) ) /
	 (float)CLOCKS_PER_SEC * 1000);
      /* Contrary to the man pages, this appears not to include the parent */
  printf("--------------------------------------------\n");
  
  exit(0);
}
Exemplo n.º 2
0
/**
 * Essa função checa se a exponenciação de matriz convergiu à um valor
 * específico. Esse valor é definido como uma constante no cabeçalho da
 * biblioteca e aqui é definido por 1E-12.
 * Parte-se do pré-suposto que os expoentes seguem a seguinte regra M > N.
 * @param  matrixM Matriz exponenciada a M.
 * @param  matrixN Matriz exponenciada a N.
 * @param  size    Tamanho da matriz.
 * @return         Inteiro que indentifica se a matriz convergiu ou não.
 */
int matrixHasConverged(double **matrixM, double **matrixN, unsigned size) {
  double **matrixD = NULL, norm;

  matrixDifference(matrixM, matrixN, &matrixD, size, size);
  norm = matrixNorm(matrixD, size, size);
  matrixFree(matrixD, size);

  return (norm <= convergeValue);
}
Exemplo n.º 3
0
/******************************************
*		‘ункц≥¤ розв¤занн¤ —Ћј– методом	  *
*		град≥Їнтного спуску				  *
*	ѕараметри:							  *
*	matrix - матриц¤ системи			  *
*	free - стовпець в≥льних член≥в		  *
*	n - розм≥рн≥сть матриц≥				  *
******************************************/
double* GradientDescent(double** matrix, double* free, const int& n) {
	double *x = new double[n]; // ѕоточне р≥шенн¤ системи
	double *xk = new double[n]; // Ќаступне наближенн¤ р≥шенн¤
	double *r = new double[n]; // ѕоточне значенн¤ незв¤зка
	double *rk = new double[n]; // Ќаступне значенн¤ незв¤зки
	double *z = new double[n]; // ѕоточне значенн¤ вектору напр¤ку
	double *zk = new double[n]; // Ќаступне значенн¤ вектору напр¤мку

	// «аданн¤ початкових уточнень
	for (int i = 0; i < n; i++) {
		xk[i] = 0;
		rk[i] = free[i];
		zk[i] = free[i];
	}

	int iter = 0;

	do {

		iter++;
		// PreStep
		for (int i = 0; i < n; i++) {
			x[i] = xk[i];
			r[i] = rk[i];
			z[i] = zk[i];
		}

		// Step ONE
		double alpha = 0; // —кал¤рний крок град≥Їнту
		double p1 = 0; // —кал¤рний добуток вектор≥в (r, r)
		double p2 = 0; // Cкал¤рний добуток вектор≥в (A*z, z)

		for (int i = 0; i<n; i++) p1 += r[i] * r[i];
		for (int i = 0; i<n; i++) {
			double temp = 0;
			for (int j = 0; j<n; j++) temp += matrix[i][j] * z[j];
			p2 += temp*z[i];
		}

		alpha = p1 / p2;

		// Step TWO
		for (int i = 0; i<n; i++)
			xk[i] = x[i] + alpha * z[i];

		// Step THREE
		for (int i = 0; i<n; i++) {
			double temp = 0;
			for (int j = 0; j<n; j++) temp += matrix[i][j] * z[j];
			temp *= alpha;
			rk[i] = r[i] - temp;
		}

		// Step FOUR
		double beta = 0; // —кал¤рна корекц≥¤ напр¤мку
		double p3 = 0; // —кал¤рний добуток (rk, rk)
		double p4 = 0; // —кал¤рний добуток (r, r)

		for (int i = 0; i<n; i++) {
			p3 += rk[i] * rk[i];
			p4 += r[i] * r[i];
		}

		beta = p3 / p4;

		// Step FIVE
		for (int i = 0; i<n; i++) zk[i] = rk[i] + beta*z[i];

	} while (matrixNorm(x, xk, n) > eps);

	delete[] xk;
	delete[] zk;
	delete[] rk;
	delete[] r;
	delete[] z;

	printf("Iter - %d\n", iter);

	return x;
}
int gaussInvert(double *a, double *b, int matrix_side, int block_side, 
  int total_pr, int current_pr, 
  int* blocks_order_reversed, int* blocks_order,
  double* buf_1, double* buf_2, double* buf_string, double* buf_string_2){
  MPI_Status status;

  int first_row, first_row_proc_id, last_row_c;
  int current_row, current_row_proc_id;
  int start_nonzero_a, nonzero_a_size;

	int total_block_rows, total_full_block_rows, block_size, block_string_size;
	int max_block_rows_pp, max_rows_pp, short_block_string_size, last_block_row_proc_id, last_block_row_in_current_pr;
	int small_block_row_width, small_block_size, current_pr_full_rows, last_block_row_width, matrix_size_current_pr;
  int buf_size;
  int i, j, k, j1, min_j, min_k_global;
  int res;
  double temp=-1.;
  mainBlockInfo in, out;

	initParameters(matrix_side, block_side, total_pr, current_pr, 
	&total_block_rows, &total_full_block_rows, 
	&block_size, &block_string_size, 
	&max_block_rows_pp, &max_rows_pp, &short_block_string_size,
	&last_block_row_proc_id, &last_block_row_in_current_pr,
	&small_block_row_width, &small_block_size,
	&current_pr_full_rows, &last_block_row_width,
	&matrix_size_current_pr);

  buf_size = 2 * block_string_size;

 	in.rank = current_pr;
	in.minnorm = 0.;
  in.label = 0;
  in.min_k = 0;

 	for (i=0; i<buf_size; i++){
  	buf_string[i] = 0.;
		buf_string_2[i] = 0.;
 	}
 	for (i=0; i<total_full_block_rows; i++){
    start_nonzero_a = block_size*(i);
    nonzero_a_size = block_string_size - start_nonzero_a;
    buf_size = block_string_size + nonzero_a_size;

		first_row = (i+total_pr-1-current_pr)/total_pr;
		first_row_proc_id = i%total_pr;
    min_j = 0;
    min_k_global = 0;

   	in.minnorm = 0.;
   	in.rank = current_pr;
    in.label = 0;
    in.min_k = i;
		temp = 0.;
	
	  for (j=first_row; j<current_pr_full_rows; j++){
   		for (k=i; k<total_full_block_rows; k++){
   	    res = simpleInvert(a + j*block_string_size + k*block_size, buf_1, buf_2, block_side);
        if (!res) {
   			  temp = matrixNorm(buf_1, block_side);
          if (in.label){
            if (temp<in.minnorm){
     		      in.minnorm = temp;
              min_j=j;
              in.min_k = k;
            }
          }
          else{
            in.label = 1;
            in.minnorm = temp;
            min_j=j;
            in.min_k = k;
          }
        }
      }
    }

#ifdef WO_PIVOT_SEARCH_ATALL
		if (current_pr==first_row_proc_id){
			min_j=first_row;
			in.minnorm = 1.;
			in.min_k=i;
			in.label=1;
		}
#endif
    //rewrite
    MPI_Allreduce(&in, &out, 1, MPI_mainBlockInfo, MPI_searchMainBlock, MPI_COMM_WORLD);

		if (out.label==0){
			if (current_pr==0){
			  printf("Main block not found!\n\t -- Step %d\n", i);
			}
			fflush(stdout);
			return -1;
		}

#ifdef DEBUG_MODE
		if (current_pr == first_row_proc_id){
      printf("**\nOUT RANK %d\n", out.rank);
      printf("IN RANK %d\n**\n", in.rank);
     	fflush(stdout);
    }
#endif
   	min_k_global = out.min_k;
#ifdef W_FULL_PIVOT_SEARCH
  	for (j=0; j<max_block_rows_pp; j++){
			swapMatrix(a + j*block_string_size + i*block_size, a + j*block_string_size + min_k_global*block_size, block_size);
 		}
#endif
    temp = blocks_order[i];
    blocks_order[i]=blocks_order[min_k_global];
    blocks_order[min_k_global]=temp;

		//for debug purposes:
#ifdef WO_PIVOT_SEARCH_ATALL
		out.rank = first_row_proc_id;
		//don't forget about it
#endif
      /***************Multiply string by the main block*****************/
    if (current_pr==out.rank){
      simpleInvert(a + min_j*block_string_size + i*block_size, buf_1, buf_2, block_side);
      for (j=i+1; j<total_full_block_rows; j++){
        simpleMatrixMultiply(buf_1, a + min_j*block_string_size + j*block_size, buf_2, block_side, block_side, block_side);
        copyMatrix(buf_2, a + min_j*block_string_size + j*block_size, block_size);
      }
      for (j=0; j<total_full_block_rows; j++){
        simpleMatrixMultiply(buf_1, b + min_j*block_string_size + j*block_size, buf_2, block_side, block_side, block_side);
        copyMatrix(buf_2, b + min_j*block_string_size + j*block_size, block_size);
      }
      if(small_block_row_width){
        simpleMatrixMultiply(buf_1, b + min_j*block_string_size + total_full_block_rows*block_size, buf_2, block_side, block_side, small_block_row_width);
        copyMatrix(buf_2, b + min_j*block_string_size + total_full_block_rows*block_size, small_block_size);

        simpleMatrixMultiply(buf_1, a + min_j*block_string_size + total_full_block_rows*block_size, buf_2, block_side, block_side, small_block_row_width);
        copyMatrix(buf_2, a + min_j*block_string_size + total_full_block_rows*block_size, small_block_size);
      }
			for (j=0; j<block_string_size; j++){
		    //buf_string[j]=a[min_j*block_string_size+j];
        buf_string[j]=b[min_j*block_string_size+j];
			}
			//for (j=0; j<block_string_size; j++){
      for (j=start_nonzero_a, j1=block_string_size; j<block_string_size; j++, j1++){
				//buf_string[j+block_string_size]=b[min_j*block_string_size+j];
        buf_string[j1]=a[min_j*block_string_size+j];
			}
    }

		//MPI_Bcast(buf_string, buf_size, MPI_DOUBLE, out.rank, MPI_COMM_WORLD);
    MPI_Bcast(buf_string, buf_size, MPI_DOUBLE, out.rank, MPI_COMM_WORLD);

		if (out.rank!=first_row_proc_id){
			if (current_pr==first_row_proc_id){
				for (j=0; j<block_string_size; j++){
					//buf_string_2[j]=a[first_row*block_string_size+j];
          buf_string_2[j]=b[first_row*block_string_size+j];
          //a[first_row*block_string_size+j]=buf_string[j];
          b[first_row*block_string_size+j]=buf_string[j];
				}
				//for (j=0; j<block_string_size; j++){
        for (j=start_nonzero_a, j1=block_string_size; j<block_string_size; j++, j1++){
					//buf_string_2[j+block_string_size]=b[first_row*block_string_size+j];
          buf_string_2[j1]=a[first_row*block_string_size+j];
          //b[first_row*block_string_size+j]=buf_string[j+block_string_size];
          a[first_row*block_string_size+j]=buf_string[j1];
				}
				//MPI_Send(buf_string_2, buf_size, MPI_DOUBLE, out.rank, 42, MPI_COMM_WORLD);
        MPI_Send(buf_string_2, buf_size, MPI_DOUBLE, out.rank, 42, MPI_COMM_WORLD);
			}
			if(current_pr==out.rank){
				//MPI_Recv(buf_string_2, buf_size, MPI_DOUBLE, first_row_proc_id, 42, MPI_COMM_WORLD, &status);
        MPI_Recv(buf_string_2, buf_size, MPI_DOUBLE, first_row_proc_id, 42, MPI_COMM_WORLD, &status);
				for (j=0; j<block_string_size; j++){
					//a[min_j*block_string_size+j]=buf_string_2[j];
          b[min_j*block_string_size+j]=buf_string_2[j];
				}
				//for (j=0; j<block_string_size; j++){
        for (j=start_nonzero_a, j1=block_string_size; j<block_string_size; j++, j1++){
					//b[min_j*block_string_size+j]=buf_string_2[j+block_string_size];
          a[min_j*block_string_size+j]=buf_string_2[j1];
				}
			}
		}
		else{
			if(current_pr==out.rank){
				if (min_j!=first_row){
					for(j=0; j<block_string_size; j++){
						temp=a[min_j*block_string_size+j];
						a[min_j*block_string_size+j]=a[first_row*block_string_size+j];
						a[first_row*block_string_size+j]=temp;
					}
					for(j=0; j<block_string_size; j++){
						temp=b[min_j*block_string_size+j];
						b[min_j*block_string_size+j]=b[first_row*block_string_size+j];
						b[first_row*block_string_size+j]=temp;
					}
    		}
			}
		}

#ifdef DEBUG_MODE
		if (current_pr == out.rank){
			printf("SUCCESS SEARCHING MAIN BLOCK STEP %d\n", i);
			fflush(stdout);
		}
#endif

    if (current_pr == first_row_proc_id){
    	first_row++;
  	}

  	for (j=first_row; j<max_block_rows_pp; j++){
      //for (k = i+1; k<total_full_block_rows; k++){
      for (k = i+1, j1=1; k<total_full_block_rows; k++, j1++){
		    //simpleMatrixMultiply(a + j*block_string_size + i*block_size, buf_string + k*block_size, buf_2, block_side, block_side, block_side);
        simpleMatrixMultiply(a + j*block_string_size + i*block_size, buf_string + block_string_size + j1*block_size, buf_2, block_side, block_side, block_side);
			  subtractFromMatrix(a + j*block_string_size + k*block_size, buf_2, block_size);
        //subtractFromMatrix(a + j*block_string_size + start_nonzero_a + j1*block_size, buf_2, block_size);
    	}
      for (k = 0; k<total_full_block_rows; k++){
				//simpleMatrixMultiply(a + j*block_string_size + i*block_size, buf_string + block_string_size + k*block_size, buf_2, block_side, block_side, block_side);
        simpleMatrixMultiply(a + j*block_string_size + i*block_size, buf_string + k*block_size, buf_2, block_side, block_side, block_side);
				subtractFromMatrix(b + j*block_string_size + k*block_size, buf_2, block_size);
      }
      if(small_block_row_width){
        //simpleMatrixMultiply(a + j*block_string_size + i*block_size, buf_string + total_full_block_rows*block_size, buf_2, block_side, block_side, small_block_row_width);
        simpleMatrixMultiply(a + j*block_string_size + i*block_size, buf_string + buf_size - small_block_size, buf_2, block_side, block_side, small_block_row_width);
        subtractFromMatrix(a + j*block_string_size + total_full_block_rows*block_size, buf_2, small_block_size);

        //simpleMatrixMultiply(a + j*block_string_size + i*block_size, buf_string + total_full_block_rows*block_size + block_string_size, buf_2, block_side, block_side, small_block_row_width);
        simpleMatrixMultiply(a + j*block_string_size + i*block_size, buf_string + total_full_block_rows*block_size, buf_2, block_side, block_side, small_block_row_width);
        subtractFromMatrix(b + j*block_string_size + total_full_block_rows*block_size, buf_2, small_block_size);
    	}
   	}
	}

  if(small_block_row_width){
    if (current_pr==last_block_row_proc_id){
      simpleInvert(a + current_pr_full_rows*block_string_size + total_full_block_rows*block_size, buf_1, buf_2, small_block_row_width);
      for (k=0; k<total_full_block_rows; k++){
        simpleMatrixMultiply(buf_1, b + current_pr_full_rows*block_string_size + k*block_size, buf_2, 
          small_block_row_width, small_block_row_width, block_side);
        copyMatrix(buf_2, b + current_pr_full_rows*block_string_size + k*block_size, small_block_size);
    	}
      simpleMatrixMultiply(buf_1, b + current_pr_full_rows*block_string_size + total_full_block_rows*block_size, buf_2, 
      	small_block_row_width, small_block_row_width, small_block_row_width);
      copyMatrix(buf_2, b + current_pr_full_rows*block_string_size + total_full_block_rows*block_size, small_block_row_width*small_block_row_width);
    }
  }
#ifdef DEBUG_MODE
  if (current_pr==first_row_proc_id){
  	printf("SUCCESS IN DIRECT FLOW!!!\n");
  	fflush(stdout);
	}
#endif

  for(j=0; j<total_full_block_rows; j++){
    blocks_order_reversed[blocks_order[j]]=j;
  }

#ifdef W_REVERSE_FLOW
	if(small_block_row_width){
    if (current_pr==last_block_row_proc_id){
      for (j=0; j<block_string_size; j++){
        buf_string[j]=b[(last_block_row_in_current_pr-1)*block_string_size + j];
      }
    }
    MPI_Bcast(buf_string, block_string_size, MPI_DOUBLE, last_block_row_proc_id, MPI_COMM_WORLD);
    last_row_c = (current_pr==last_block_row_proc_id) ? (last_block_row_in_current_pr-1) : (last_block_row_in_current_pr);
    for (j=last_row_c-1;j>=0;j--){
      for (k=0; k<total_full_block_rows;k++){
        simpleMatrixMultiply(a+j*block_string_size+total_full_block_rows*block_size, buf_string + k*block_size, buf_2,block_side,small_block_row_width,block_side);
        subtractFromMatrix(b+j*block_string_size+k*block_size, buf_2, block_size);
      } 
      simpleMatrixMultiply(a+j*block_string_size+total_full_block_rows*block_size, buf_string + total_full_block_rows*block_size, buf_2,block_side,small_block_row_width,small_block_row_width);
      subtractFromMatrix(b+j*block_string_size+total_full_block_rows*block_size, buf_2, small_block_size);
    }
  }	
  for (i=total_full_block_rows-1; i>0; i--){//i-ю строчку вычитаем из всех
    //current_row = (i+total_pr-1-current_pr)/total_pr;//first row in cur_pr not upper than the subtracted string
    current_row_proc_id = i%total_pr;    
    current_row = i/total_pr;
    if(current_pr==current_row_proc_id){
    	for (j=0; j<block_string_size; j++){
       	buf_string[j]=b[current_row*block_string_size + j];
  		}
  	}
        
    if(current_pr<current_row_proc_id){
      current_row++;
    }
        
    MPI_Bcast(buf_string, block_string_size, MPI_DOUBLE, current_row_proc_id, MPI_COMM_WORLD);

    for(j=current_row-1; j>=0; j--){//из j-й строчки правой матрицы вычитается
      for (k=0; k<total_full_block_rows;k++){
        simpleMatrixMultiply(a+j*block_string_size+i*block_size, buf_string + k*block_size, buf_2, block_side, block_side, block_side);
      	subtractFromMatrix(b+j*block_string_size+k*block_size, buf_2, block_size);
      }
      if (small_block_row_width){
        simpleMatrixMultiply(a+j*block_string_size+i*block_size, buf_string + total_full_block_rows*block_size, buf_2,block_side,block_side,small_block_row_width);
        subtractFromMatrix(b+j*block_string_size+total_full_block_rows*block_size, buf_2, small_block_size);
    	}
    }
  }
#endif
#ifdef DEBUG_MODE
  if (current_pr==0){
    printf("SUCCESS IN REVERSE FLOW!!!\n");
    fflush(stdout);
    printf("Exit from gauss\n");
    fflush(stdout);
  }
#endif
	return 0;
}