/* Includes both algorithms */ void gauss() { void gaussElimination(); void backSubstitution(); /* Times */ double t1, t2; /* Barrier to sync all processes before starting the algorithms */ MPI_Barrier(MPI_COMM_WORLD); /* Initial time */ if ( my_rank == SOURCE ) t1 = MPI_Wtime(); /* Gauss Elimination is performed using MPI */ gaussElimination(); /* Back Substitution is performed sequentially */ if ( my_rank == SOURCE ) { backSubstitution(); /* Finish time */ t2 = MPI_Wtime(); printf("\nElapsed time: %f miliseconds\n", (t2-t1) * 1000 ); } }
double * LUSolver(double **A, double *b, int n){ double **L, **U, *c, *x; L=allocateDoubleMatrix(n, n); U=allocateDoubleMatrix(n, n); LUFactotisation(n,A,U,L); c=forwardSubstitution(n, L, b); x=backSubstitution(n, U, c); return x; }
/* * Computer problem 4.2 #2 */ void problem42_2() { // variables struct matrix mat,chol,cholT; struct matrix4 eq = {{ {0.05,0.07,0.06,0.05}, {0.07,0.10,0.08,0.07}, {0.06,0.08,0.10,0.09}, {0.05,0.07,0.09,0.10} }}; struct array ans = {{0.23,0.32,0.33,0.31}}; struct array y,x; // setup mat.m4 = eq; mat.size = NUM4; // print printf("Problem 4.2 #2\n"); printf("Solve this system by Cholesky method:\n"); printf("0.05x1 + 0.07x2 + 0.06x3 + 0.05x4 = 0.23\n"); printf("0.07x1 + 0.10x2 + 0.08x3 + 0.07x4 = 0.32\n"); printf("0.06x1 + 0.08x2 + 0.10x3 + 0.09x4 = 0.33\n"); printf("0.05x1 + 0.07x2 + 0.09x3 + 0.10x4 = 0.31\n"); // do the work chol = cholesky(mat); // transpose cholT = transposeMatrix(chol); // solve Ly = b (for y) y = forwardSubstitution(chol,ans); // solve L^Tx = y (for x) x = backSubstitution(cholT,y); // print out results printf("\nCholesky\n"); printMatrix(chol); printf("\nTranspose \n"); printMatrix(cholT); printf("\nLy = b (for y)\n"); printArray(y,"y"); printf("L^Tx = y (for x)\n"); printArray(x,"x"); printf("\n"); return; }
double * gaussianElimination (int n, double **A, double *b){ int i, j, k; double m, *x; for (k = 0; k<n-1; k++){ for (i = k+1; i<n; i++){ m = A[i][k] / A[k][k]; for (j=k+1; j<n; j++) A[i][j] = A[i][j] - m * A[k][j]; b[i] = b[i] - m * b[k]; } } x=backSubstitution (n, A, b); return x; }
int main(int argc, char const *argv[]) { int matrixSize = strtol(argv[1], NULL, 10); int coreCount = omp_get_num_procs(); int threadCount = strtol(argv[2], NULL, 10); double startTime, finishTime; double **a_augmented, **a; // n x n Matrix as a 2D array double diagonalElement, bestElement, factor; int bestRowIndex = 0; // used in partial pivoting (index of row having greatest absolute value) int i, j, k; // for loop counters double *x; // Solutions double *b; printf("Matrix Size: %d\n", matrixSize); printf("Number of Cores: %d\n", coreCount); #pragma omp parallel num_threads(threadCount) { if (omp_get_thread_num() == 0) printf("Thread Count: %d\n", omp_get_num_threads()); } // Start Timer startTime = omp_get_wtime(); // Allocate memory // a_augmented will be the augmented matrix a_augmented = (double **) malloc(matrixSize * sizeof(double *)); // a will be the randomly generated matrix a = (double **) malloc(matrixSize * sizeof(double *)); x = (double *) malloc(matrixSize * sizeof(double)); b = (double *) malloc(matrixSize * sizeof(double)); if (DEBUG == 1) Read_matrix(&a, &a_augmented, matrixSize); else Gen_matrix(&a, &a_augmented, matrixSize, threadCount); // a will not be modified after this point // Only the a_augmented will be modified // Display generated matrix: displayMatrix(a, matrixSize); for (i = 0; i < matrixSize - 1; ++i) { // Partial Pivoting: // the algorithm selects the entry with largest absolute value from // the column of the matrix that is currently being considered as // the pivot element. // Diagonal Element diagonalElement = a_augmented[i][i]; // debug_printf("diagonalElement%d = %f\n", i, diagonalElement); // Find the best row (the one with the largest absolute value in the // column being worked on) bestRowIndex = i; bestElement = diagonalElement; for (j = i + 1; j < matrixSize; ++j) { if (fabs(a_augmented[j][i]) > fabs(bestElement)) { bestRowIndex = j; bestElement = a_augmented[j][i]; // debug_printf("bestElement = %f\n", a_augmented[j][i]); } } // Swap the rows if (i != bestRowIndex) { // debug_printf("Row %d needs to be swapped with Row %d\n", i, bestRowIndex ); swapRow(&a_augmented[i], &a_augmented[bestRowIndex]); // Update the diagonal element diagonalElement = a_augmented[i][i]; // debug_printf("diagonalElement%d = %f\n", i, diagonalElement); // displayMatrix(a_augmented, matrixSize); } // End of Partial Pivoting // To make the diagonal element 1, // divide the whole row with the diagonal element // debug_printf("Row %d = Row %d / %f\n", i, i, diagonalElement); for (j = 0; j < matrixSize + 1; ++j) { a_augmented[i][j] = a_augmented[i][j] / diagonalElement; } // Force the diagonal to be 1 (to avoid any roundoff errors in dividing above) a_augmented[i][i] = 1; diagonalElement = 1; // debug_printf("Annihilation of column %d...\n", i); // Annihilation: Zero all the elements in the column below the diagonal element #pragma omp parallel for num_threads(threadCount) \ default(none) private(j, factor, k) shared(i, matrixSize, a_augmented) for (j = i + 1; j < matrixSize; ++j) { // sleep(1); factor = a_augmented[j][i]; if (factor != 0) { // debug_printf("Row %d = Row %d - %f*Row %d\n", j, j, factor, i); for (k = i; k < matrixSize + 1; ++k) { a_augmented[j][k] = a_augmented[j][k] - factor * a_augmented[i][k]; } // displayAugmentedMatrix(a, matrixSize); } } } // Make the diagonal element of the last row 1 a_augmented[matrixSize-1][matrixSize] = a_augmented[matrixSize-1][matrixSize] / a_augmented[matrixSize-1][matrixSize-1]; a_augmented[matrixSize-1][matrixSize-1] = 1; // Display augmented matrix: displayMatrix(a_augmented, matrixSize); // Back substitution (parallelized) backSubstitution(&a_augmented, matrixSize, threadCount); // Record the finish time finishTime = omp_get_wtime(); displayMatrix(a_augmented, matrixSize); // Matrix X from augmented matrix // Vector b from matrix A for (i = 0; i < matrixSize; ++i) { x[i] = a_augmented[i][matrixSize]; b[i] = a[i][matrixSize]; } // Find I^2 norm iSquaredNorm(&a, x, b, matrixSize, threadCount); // Print the time taken printf("Time taken = %f\n", finishTime - startTime); // Free memory for (i = 0; i < matrixSize; ++i) { free(a[i]); free(a_augmented[i]); } free(a); free(a_augmented); free(x); free(b); return 0; }
int main(int argc, char** argv){ //--------------------Initialize Matrices, Vectors, and other Variables------------------// int N = atoi(argv[1]); int numThreads = atoi(argv[2]); struct timeval startTime, endElimination, endSubstitution; double eliminationTime, substitutionTime, totalTime; pthread_t elimination_threads[numThreads]; // Ax = b //Allocate Matrix 'A' double **A = (double **)calloc(N,sizeof(double*)); for (int q=0; q < N; q++) A[q] = (double*)calloc(N,sizeof(double*)); //Allocate Vector 'b' double* b = (double*) malloc(sizeof(double)*N); double* x = (double*) malloc(sizeof(double)*N); fillMatrix(N, A, b); //Fill in matrix A and vector B with random floating points between 0 and 1000 if (N <= 8) printf("\nPerforming gaussian elimination with the following matrix (A) and vector (b):\n\n"); printMatrix(N, A, b); //Make sure that A, b, N, and the number of threads are all global and can be shared by the different threads. thread_data.A = A; thread_data.b = b; thread_data.N = N; thread_data.numThreads = numThreads; //Create an array that can be used to pass the thread_indices to the pthread function int *index = calloc (numThreads, sizeof (int)); for(int i = 0; i < numThreads; i++) { index[i] = i; } //--------------------Perform Gaussian Elimination and Back Substituion------------------// gettimeofday(&startTime, NULL); //Gaussian Elimination for (int j=0; j < N-1; j++){ partialPivot(N, A, b, j); thread_data.j = j; for (int thread_index = 0; thread_index < numThreads; thread_index++){ pthread_create(&elimination_threads[thread_index], NULL, eliminate, (void*)&index[thread_index]); } for (int thread_index = 0; thread_index < numThreads; thread_index++){ pthread_join(elimination_threads[thread_index], NULL); } } gettimeofday(&endElimination, NULL); //Set execution time for elimination printf("\n-------Gaussian Elimination Complete-------\n"); if (N <= 8){ printf("\nPerforming back substitution with the following matrix (A) and vector (b):\n\n"); printMatrix(N,A,b); } backSubstitution(N, A, b, x, numThreads); printf("\n--------Back Substitution Complete---------\n"); gettimeofday(&endSubstitution, NULL); //Set execution time for substitution //---------------------------Calculate Run Times and print out Solutions---------------------------------// eliminationTime = ((endElimination.tv_sec - startTime.tv_sec) * 1000000u + endElimination.tv_usec - startTime.tv_usec) / 1.e6; substitutionTime = ((endSubstitution.tv_sec - endElimination.tv_sec) * 1000000u + endSubstitution.tv_usec - endElimination.tv_usec) / 1.e6; totalTime = ((endSubstitution.tv_sec - startTime.tv_sec) * 1000000u + endSubstitution.tv_usec - startTime.tv_usec) / 1.e6; printSolutionVector(x, N); checkAnswer(A,x,b,N); printf("Substitution execution time: %.3f seconds.\n", eliminationTime); printf("Substitution execution time: %.3f seconds.\n", substitutionTime); printf("Total execution: \n%.3f seconds elapsed with %d threads used.\n\n", totalTime, numThreads); }