int 
Stokhos::ApproxSchurComplementPreconditioner::
ApplyInverse(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const
{
#ifdef STOKHOS_TEUCHOS_TIME_MONITOR
  TEUCHOS_FUNC_TIME_MONITOR("Stokhos: Total Approximate Schur Complement Time");
#endif

  // We have to be careful if Input and Result are the same vector.
  // If this is the case, the only possible solution is to make a copy
  const Epetra_MultiVector *input = &Input;
  bool made_copy = false;
  if (Input.Values() == Result.Values()) {
    input = new Epetra_MultiVector(Input);
    made_copy = true;
  } 

  // Allocate temporary storage
  int m = input->NumVectors();
  if (rhs_block == Teuchos::null || rhs_block->NumVectors() != m)
    rhs_block = 
      Teuchos::rcp(new EpetraExt::BlockMultiVector(*base_map, *sg_map, m));
  if (tmp == Teuchos::null || tmp->NumVectors() != m*max_num_mat_vec)
    tmp = Teuchos::rcp(new Epetra_MultiVector(*base_map, 
					      m*max_num_mat_vec));
  j_ptr.resize(m*max_num_mat_vec);
  mj_indices.resize(m*max_num_mat_vec);
  
  // Extract blocks
  EpetraExt::BlockMultiVector input_block(View, *base_map, *input);
  EpetraExt::BlockMultiVector result_block(View, *base_map, Result);

  result_block.PutScalar(0.0);

  // Set right-hand-side to input_block
  rhs_block->Update(1.0, input_block, 0.0);

  // At level l, linear system has the structure
  // [ A_{l-1} B_l ][ u_l^{l-1} ] = [ r_l^{l-1} ]
  // [ C_l     D_l ][ u_l^l     ]   [ r_l^l     ]

  for (int l=P; l>=1; l--) {
    // Compute D_l^{-1} r_l^l
    divide_diagonal_block(block_indices[l], block_indices[l+1], 
			  *rhs_block, result_block);

    // Compute r_l^{l-1} = r_l^{l-1} - B_l D_l^{-1} r_l^l
    multiply_block(upper_block_Cijk[l], -1.0, result_block, *rhs_block);
  }

  // Solve A_0 u_0 = r_0
  divide_diagonal_block(0, 1, *rhs_block, result_block);

  for (int l=1; l<=P; l++) {
    // Compute r_l^l - C_l*u_l^{l-1}
    multiply_block(lower_block_Cijk[l], -1.0, result_block, *rhs_block);

    // Compute D_l^{-1} (r_l^l - C_l*u_l^{l-1})
    divide_diagonal_block(block_indices[l], block_indices[l+1], 
			  *rhs_block, result_block);
  }

  if (made_copy)
    delete input;

  return 0; 
}
예제 #2
0
int main(int argc,char *argv[])
{

    if (argc < 3) {
	printf("Must supply option for Matrix Size and block size, eg: matrix_multiply.exe 400 32 \n"); return -1;
    }
    int debug = 0;
    if (argc == 4) {
	debug = 1;
    }

    int matrix_size = atoi(argv[1]);
    int block_size = atoi(argv[2]);
    int number_of_blocks = matrix_size / block_size;
    double matrix1[matrix_size][matrix_size];
    double matrix2[matrix_size][matrix_size];
    double result[matrix_size][matrix_size];

    struct timeval start;
    struct timeval end;
    int i, j, k ;

    int seed = 10000;
    srand(seed);

    // POPULATE the array

    for (i = 0; i < matrix_size; i++)  {
	for (j = 0; j < matrix_size; j++){
//	    matrix1[i][j] = 0.0 + (double)(( i * matrix_size) + j)  ;
//	    matrix2[i][j] = 0.0 + (double)(( i * matrix_size) + j)  ;
	    matrix1[i][j] = (double)( i )  ;
	    matrix2[i][j] = (double)( i + j )  ;
	    if (debug) {
		printf("matrix2[%d][%d] = %6.1f, -- ", i,j,matrix2[i][j] ) ;
		printf("value = %6.1f,\n",  1.0 + (double)(( i * matrix_size) + j) ) ;
		printf("\n");
	    }
	    result[i][j] = 0.0;
	}
    }
    gettimeofday(&start,NULL);
    if (debug) { 
        printf("calling  multiply_block(:number_of_blocks = > %d, :block_size => %d)\n", number_of_blocks, block_size); 
    } 
    for (i = 0; i < number_of_blocks; i++)  {
	for (j = 0; j < number_of_blocks; j++){
 	    for (k = 0; k < number_of_blocks; k++){
                 multiply_block(&result[0][0], &matrix1[0][0], &matrix2[0][0],block_size, number_of_blocks,i,j,k); 
	     }
	}
    }

    if (debug) {
	print_matrices(&result[0][0],&matrix1[0][0], &matrix2[0][0],matrix_size);
    }
    gettimeofday(&end,NULL);
    if (debug) { 
       printf("Time difference for block implementation  %.5f seconds\n",  get_time_diff(&start, &end));
    } else {
       printf("%d,%d,%.5f\n", matrix_size, block_size, get_time_diff(&start, &end));
    } 
    return 1;
}