int Stokhos::ApproxSchurComplementPreconditioner:: ApplyInverse(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const { #ifdef STOKHOS_TEUCHOS_TIME_MONITOR TEUCHOS_FUNC_TIME_MONITOR("Stokhos: Total Approximate Schur Complement Time"); #endif // We have to be careful if Input and Result are the same vector. // If this is the case, the only possible solution is to make a copy const Epetra_MultiVector *input = &Input; bool made_copy = false; if (Input.Values() == Result.Values()) { input = new Epetra_MultiVector(Input); made_copy = true; } // Allocate temporary storage int m = input->NumVectors(); if (rhs_block == Teuchos::null || rhs_block->NumVectors() != m) rhs_block = Teuchos::rcp(new EpetraExt::BlockMultiVector(*base_map, *sg_map, m)); if (tmp == Teuchos::null || tmp->NumVectors() != m*max_num_mat_vec) tmp = Teuchos::rcp(new Epetra_MultiVector(*base_map, m*max_num_mat_vec)); j_ptr.resize(m*max_num_mat_vec); mj_indices.resize(m*max_num_mat_vec); // Extract blocks EpetraExt::BlockMultiVector input_block(View, *base_map, *input); EpetraExt::BlockMultiVector result_block(View, *base_map, Result); result_block.PutScalar(0.0); // Set right-hand-side to input_block rhs_block->Update(1.0, input_block, 0.0); // At level l, linear system has the structure // [ A_{l-1} B_l ][ u_l^{l-1} ] = [ r_l^{l-1} ] // [ C_l D_l ][ u_l^l ] [ r_l^l ] for (int l=P; l>=1; l--) { // Compute D_l^{-1} r_l^l divide_diagonal_block(block_indices[l], block_indices[l+1], *rhs_block, result_block); // Compute r_l^{l-1} = r_l^{l-1} - B_l D_l^{-1} r_l^l multiply_block(upper_block_Cijk[l], -1.0, result_block, *rhs_block); } // Solve A_0 u_0 = r_0 divide_diagonal_block(0, 1, *rhs_block, result_block); for (int l=1; l<=P; l++) { // Compute r_l^l - C_l*u_l^{l-1} multiply_block(lower_block_Cijk[l], -1.0, result_block, *rhs_block); // Compute D_l^{-1} (r_l^l - C_l*u_l^{l-1}) divide_diagonal_block(block_indices[l], block_indices[l+1], *rhs_block, result_block); } if (made_copy) delete input; return 0; }
int main(int argc,char *argv[]) { if (argc < 3) { printf("Must supply option for Matrix Size and block size, eg: matrix_multiply.exe 400 32 \n"); return -1; } int debug = 0; if (argc == 4) { debug = 1; } int matrix_size = atoi(argv[1]); int block_size = atoi(argv[2]); int number_of_blocks = matrix_size / block_size; double matrix1[matrix_size][matrix_size]; double matrix2[matrix_size][matrix_size]; double result[matrix_size][matrix_size]; struct timeval start; struct timeval end; int i, j, k ; int seed = 10000; srand(seed); // POPULATE the array for (i = 0; i < matrix_size; i++) { for (j = 0; j < matrix_size; j++){ // matrix1[i][j] = 0.0 + (double)(( i * matrix_size) + j) ; // matrix2[i][j] = 0.0 + (double)(( i * matrix_size) + j) ; matrix1[i][j] = (double)( i ) ; matrix2[i][j] = (double)( i + j ) ; if (debug) { printf("matrix2[%d][%d] = %6.1f, -- ", i,j,matrix2[i][j] ) ; printf("value = %6.1f,\n", 1.0 + (double)(( i * matrix_size) + j) ) ; printf("\n"); } result[i][j] = 0.0; } } gettimeofday(&start,NULL); if (debug) { printf("calling multiply_block(:number_of_blocks = > %d, :block_size => %d)\n", number_of_blocks, block_size); } for (i = 0; i < number_of_blocks; i++) { for (j = 0; j < number_of_blocks; j++){ for (k = 0; k < number_of_blocks; k++){ multiply_block(&result[0][0], &matrix1[0][0], &matrix2[0][0],block_size, number_of_blocks,i,j,k); } } } if (debug) { print_matrices(&result[0][0],&matrix1[0][0], &matrix2[0][0],matrix_size); } gettimeofday(&end,NULL); if (debug) { printf("Time difference for block implementation %.5f seconds\n", get_time_diff(&start, &end)); } else { printf("%d,%d,%.5f\n", matrix_size, block_size, get_time_diff(&start, &end)); } return 1; }