コード例 #1
0
ファイル: pdsdpplapack.c プロジェクト: cavazos/DSDP
static int pmatfactor(void*MM, int *flag){
  plapackM* ctx=(plapackM*)MM;
  int info,dummy;
  double ddxerror;
  DSDPFunctionBegin;
  wallclock(&ctx->t1);
  info=PLA_Obj_set_to_one(ctx->wVec);DSDPCHKERR(info);
  info=PLA_Obj_set_to_zero(ctx->vVec);DSDPCHKERR(info);
  info=PLA_Symv( PLA_LOWER_TRIANGULAR, ctx->one, ctx->AMat, ctx->wVec, ctx->zero, ctx->vVec ); DSDPCHKERR(info);
  *flag=0;
  info = PLA_Chol(PLA_LOWER_TRIANGULAR, ctx->AMat); DSDPCHKERR(info);
  if (info!=0) {
    *flag=1;
    printf("PLAPACK WARNING: Non positive-definite Matrix M : Row: %d\n",info);
  }
  info = PLA_Trsv(PLA_LOWER_TRIANGULAR, PLA_NO_TRANSPOSE, PLA_NONUNIT_DIAG, ctx->AMat, ctx->vVec);DSDPCHKERR(info);
  info = PLA_Trsv(PLA_LOWER_TRIANGULAR, PLA_TRANSPOSE, PLA_NONUNIT_DIAG, ctx->AMat,ctx->vVec); DSDPCHKERR(info);  

  info=PLA_Obj_set_to_minus_one(ctx->wVec);DSDPCHKERR(info); 
  info=PLA_Axpy( ctx->one, ctx->vVec, ctx->wVec );DSDPCHKERR(info); 
  info=PLA_Nrm2( ctx->wVec, ctx->dxerror );DSDPCHKERR(info); 
  PLA_Obj_get_local_contents( ctx->dxerror, PLA_NO_TRANS, &dummy, &dummy,
			      &ddxerror, 1, 1 );
  if (ddxerror/sqrt(1.0*ctx->global_size) > 0.1){
    *flag=1;
    if (ctx->rank==-1){
      printf("PDSDPPLAPACK: Non positive-definite Matrix. %4.2e\n",ddxerror);
    }
  }
  wallclock(&ctx->t2);
  ctx->tsolve+=ctx->t2-ctx->t1;
  PPDSDPPrintTime(ctx->rank,"PLAPACK: Factor M",ctx->t2-ctx->t1,ctx->tsolve);
  PPDSDPPrintTime(ctx->rank,"Subtotal Time",0,ctx->t2-ctx->t1);
  DSDPFunctionReturn(0);
}
コード例 #2
0
int PLA_Bi_red( PLA_Obj A, PLA_Obj sL, PLA_Obj sR, PLA_Obj U, PLA_Obj V )
/*
  PLA_Tri_red

  Purpose: Reduce general matrix A to bidiagonal form using 
  Householder similarity transformations.

  input:
  A                       MATRIX to be reduced

  output:
  A                       Reduced matrix A.  Householder vectors used
                          to reduce A are stored below subdiagonal
                          of A and above first superdiagonal of A.
  sL                      Scaling factors for the Householder transforms
                          computed to reduce A (applied to left of A).
  sR                      Scaling factors for the Householder transforms
                          computed to reduce A (applied to right of A).
  U                       if U != NULL, U equals the accumulation of 
                          Householder transforms that were applied from the
                          left.
  V                       if V != NULL, V equals the accumulation of 
                          Householder transforms that were applied from the
                          right.
*/
{
  PLA_Obj
    uv = NULL,  uv_B = NULL, u = NULL,  v = NULL,
    sR_B = NULL, betaR_1 = NULL, betaR_1_dup = NULL,
    sL_B = NULL, betaL_1 = NULL, betaL_1_dup = NULL,
    A_BR = NULL, a_21 = NULL, A_21 = NULL, 
    u_11 = NULL, u_12 = NULL, u_21 = NULL, U_22 = NULL;
    v_11 = NULL, v_12 = NULL, v_21 = NULL, V_22 = NULL;
  int
    size, value = PLA_SUCCESS;

  if ( PLA_ERROR_CHECKING )    /* Perform parameter and error checking */
    value = PLA_Bi_red_enter( A, sR, sL, U, V );

  /* The whole purpose of the game is to update most of the matrix
     A = A - ( u | w_R ) / 0 | w_L^T \
                         \ 0 | v^T   /
     where u equals the Householder transform that hits
     the active part of A from the left and
     v equals the Householder transform that hits the active
           part from the right:
     A <- ( I - betaL_1 u u^T ) A ( I - betaR_1 / 0 \ ( 0 v^T ) 
                                                \ v /                 */

  /* Create a multivector in which to compute ( u  |  w_R ) */

  PLA_Mvector_create_conf_to( A, 1, &uwR );

  /* Create a multivector in which to compute ( w_L | v ) */

  PLA_Mvector_create_conf_to( A, 1, &wLv );
  
  /* Create duplicated multiscalars in which to hold the scaling factor
     for the Householder transforms being computed */
  PLA_Obj_horz_split_2( sL, 1, &betaL_1,
			      PLA_DUMMY );
  PLA_Mscalar_create_conf_to( betaL_1, PLA_ALL_ROWS, PLA_ALL_COLS, 
			      &betaL_1_dup );
  PLA_Obj_horz_split_2( sR, 1, &betaR_1,
			      PLA_DUMMY );
  PLA_Mscalar_create_conf_to( betaR_1, PLA_ALL_ROWS, PLA_ALL_COLS, 
			      &betaR_1_dup );

  /* Track the active parts of A, sL, sR, and uwR and wLv */
  PLA_Obj_view_all( A, &A_BR );    
  PLA_Obj_view_all( sL, &betaL_B );    
  PLA_Obj_view_all( sR, &betaR_B );    
  PLA_Obj_view_all( uwR, &uwR_B );
  PLA_Obj_horz_split_2( wLv, 1, PLA_DUMMY,
                                &wLv_B );

  while ( TRUE ){  
    PLA_Obj_global_length( A_BR, &size );
    if ( 0 == size ) break;

    /* Partition A_BR = ( a_L      A_R    ) where a_L is a column from
       which to compute the next u */
    PLA_Obj_vert_split_2( A_BR, 1, &a_L, &A_R );
    /* Split of the current element of vector sL */
    PLA_Obj_horz_split_2( betaL_B, 1, &betaL_1,
                                      &betaL_B );
    /* View the part of uwR in which to compute u and wR */
    PLA_Obj_vert_split_2( uwR_B, 1,  &u, &wR );

    /* View the part of wLv in which to compute w_L and v */
    PLA_Obj_vert_split_2( wLv_B, 1,  &w_L, &v );

    /* Redistributed a_L as a vector and compute Householder transform */
    PLA_Copy( a_L, u );  
    PLA_Compute_House_v( u, betaL_1_dup );

    /* Place data back in A and sL */
    PLA_Copy( u, a_L );
    PLA_Local_copy( betaL_1_dup, betaL_1 );

    /* Partition A_BR = /  *    a_12^T \  
                        \  *    A_22   / splitting off first column and row */
    PLA_Obj_vert_split_4( A_BR, 1, 1, PLA_DUMMY, &a_12,
                                      PLA_DUMMY, &A_BR );
    
    /* Copy a_12 into v */
    PLA_Obj_set_orientation( PLA_PROJ_ONTO_ROW, a_12 );
    PLA_Copy( a_12, v );
    
    /* w_L = betaL_1 * A_BR^T * u */
       First must split off first element of u */
    PLA_Obj_horz_split_2( u, 1, PLA_DUMMY,
			        &u_2 );

    PLA_Gemv( PLA_TRANSPOSE, betaL_1_dup, A_BR, u_2, zero, w_L );

    /* In v update a_12 = a_12 - betaL_1 * A_R^T u = 
                        = a_12 - betaL_1 * a_12 - w_L */

    PLA_Negate( betaL_1 );
    PLA_Axpy( betaL_1, v, v );
    PLA_Axpy( minus_one, w_L, v );

    /* Compute Householder transform to reflect resulting v */
    PLA_Compute_House_v( v, betaR_1_dup );

    /* Place data back in A and sR */
    PLA_Copy( v, a_12 );
    PLA_Local_copy( betaR_1_dup, betaR_1 );

    /* Set first element of v = 1 */
    PLA_Obj_horz_split_2( v, 1, &v_1,
			        PLA_DUMMY );
    PLA_Obj_set_to_one( v_1 );

    /* Update A_BR <- ( I - betaL_1 u_2 u_2^T ) A_BR ( I - betaR_1 v v^T )
                   =  ( A_BR - u_2 ( betaL_1 u_2^T A_BR ) ) ( I - betaR_1 v v^T )
		   =  A_BR - ( u_2 | w_R ) * ( w_L | v )
       where w_L = betaL_1 * A_BR^T u_2,  w_R = betaR_1 * A_BR * v - betaR_1 * w_L^T * v * u_2 
    */
    
    /* Compute w_R */
    PLA_Gemv( PLA_NO_TRANSPOSE, betaR_1, A_BR, v, zero, w_R );
    PLA_Dot( x_L, v, temp );
    PLA_Scal( betaR_1_dup, temp );
    PLA_Negate( temp );
    PLA_Axpy( temp, u_2, w_R );

    /* Update A_BR */
    PLA_Pmvector_create_conf_to( A_BR, PLA_PROJ_ONTO_COL, PLA_ALL_COLS, 2, &uwR_2_dup );
    PLA_Copy( uwR_2, uwR_2_dup );

    PLA_Pmvector_create_conf_to( A_BR, PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS, 2, &wLv_dup );
    PLA_Copy( wLv, wLv_dup );
    
    PLA_Gemm( PLA_NO_TRANSPOSE, PLA_NO_TRANSPOSE, minus_one, uwR_2, wLv, one, A_BR );
  }

  /* Free the temporary objects */
  PLA_Obj_free( &u );
  PLA_Obj_free( &u_B );
  PLA_Obj_free( &beta_B );
  PLA_Obj_free( &beta_1 );
  PLA_Obj_free( &beta_1_dup );
  PLA_Obj_free( &A_BR );
  PLA_Obj_free( &a_21 );
  PLA_Obj_free( &A_21 );
  PLA_Obj_free( &q_11 );
  PLA_Obj_free( &q_12 );
  PLA_Obj_free( &q_21 );
  PLA_Obj_free( &Q_22 );

  if ( PLA_ERROR_CHECKING )   
    value = PLA_Bi_red_exit( A, sL, sR, U, V );

  return PLA_SUCCESS;
}
コード例 #3
0
int main(int argc, char *argv[])
{
  /* Declarations */
  MPI_Comm 
    comm;

  PLA_Template 
    templ = NULL;

  PLA_Obj  
    A = NULL,    rhs = NULL,    
    A_append = NULL,
    pivots = NULL,
    x = NULL,         
    b = NULL, 
    b_norm = NULL,
    index = NULL,
    minus_one = NULL;

  double 
    operation_count,
    b_norm_value, 
    time;

  int  
    size, 
    nb_distr, nb_alg, 
    me, nprocs, 
    nprows, npcols,
    dummy, 
    ierror,
    info = 0;
  
  MPI_Datatype 
    datatype;

  /* Initialize MPI */
  MPI_Init(&argc, &argv);

#if MANUFACTURE == CRAY
  set_d_stream( 1 );
#endif

  /* Get problem size and distribution block size and broadcast */
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
  if (0 == me) {
    printf("enter processor mesh dimension ( rows cols ):\n");
    scanf("%d %d", &nprows, &npcols );
    printf("enter matrix size, distr. block size:\n");
    scanf("%d %d", &size, &nb_distr );
    printf("enter algorithmic blocksize:\n");
    scanf("%d", &nb_alg );
    printf("Turn on error checking? (1 = YES, 0 = NO):\n");
    scanf("%d", &ierror );
  }

  MPI_Bcast(&nprows,   1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&npcols,   1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&size,     1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&nb_distr, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&nb_alg, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&ierror, 1, MPI_INT, 0, MPI_COMM_WORLD);

  if ( ierror ) 
    PLA_Set_error_checking( ierror, TRUE, TRUE, FALSE );
  else
    PLA_Set_error_checking( ierror, FALSE, FALSE, FALSE );

  pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,
			  nb_alg);

  /* Create a 2D communicator */
  PLA_Comm_1D_to_2D(MPI_COMM_WORLD, nprows, npcols, &comm); 

  /* Initialize PLAPACK */
  PLA_Init(comm);

  /* Create object distribution template */
  PLA_Temp_create( nb_distr, 0, &templ );

  /* Set the datatype */
  datatype = MPI_DOUBLE;

  /* Create objects for problem to be solved */

  /* Matrix A is big enough to hold the right-hand-side appended */
  PLA_Matrix_create(  datatype, size, size+1, 
		      templ, PLA_ALIGN_FIRST, PLA_ALIGN_FIRST, &A_append );

  PLA_Mvector_create( datatype, size, 1, templ, PLA_ALIGN_FIRST, &x );

  PLA_Mvector_create( datatype, size, 1, templ, PLA_ALIGN_FIRST, &b );
   
  PLA_Mvector_create( MPI_INT, size, 1, templ, PLA_ALIGN_FIRST, &pivots );
   
  /* Create 1x1 multiscalars to hold largest (in abs. value) element 
     of b - x and index of largest value */
  PLA_Mscalar_create( MPI_DOUBLE,
		      PLA_ALL_ROWS, PLA_ALL_COLS,
			 1, 1, templ, &b_norm );

  /* Create duplicated scalar constants with same datatype and template as A */
  PLA_Create_constants_conf_to( A_append, &minus_one, NULL, NULL );

  /* View the appended system as the matrix and the right-hand-side */
  PLA_Obj_vert_split_2( A_append, -1, &A, &rhs );

  /* Create a problem to be solved: A x = b */
  create_problem( A, x, b );

  /* Copy b to the appended column */
  PLA_Copy( b, rhs );

  /* Start timing */
  MPI_Barrier( MPI_COMM_WORLD );
  time = MPI_Wtime( );

  /* Factor P A_append -> L U overwriting lower triangular portion of A with L, upper, U */

  info = PLA_LU( A_append, pivots);

  if ( info != 0 ) {
    printf("Zero pivot encountered at row %d.\n", info);
  }
  else {
    /* Apply the permutations to the right hand sides */
    /* Not necessery since system was appended */
    /* PLA_Apply_pivots_to_rows ( b, pivots); */
    
    /* Solve L y = b, overwriting b with y */
    /* Not necessary since the system was appended */
    /* PLA_Trsv( PLA_LOWER_TRIANGULAR, PLA_NO_TRANSPOSE, PLA_UNIT_DIAG, A, b ); */
    PLA_Copy( rhs, b );

    /* Solve U x = y (=b), overwriting b with x */
    PLA_Trsv( PLA_UPPER_TRIANGULAR, PLA_NO_TRANSPOSE,  PLA_NONUNIT_DIAG, A, b );

    /* Stop timing */
    MPI_Barrier( MPI_COMM_WORLD );
    time = MPI_Wtime() - time;

    /* Report performance */
    if ( me == 0 ) {
      MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

      operation_count = 2.0/3.0 * size * size * size;

      printf("n = %d, time = %lf, MFLOPS/node = %lf\n", size, time,
	      operation_count / time * 1.0e-6 / nprocs );
    }

    /* Process the answer.  As an example, this routine brings 
       result x (stored in b) to processor 0 and prints first and 
       last entry */
    Process_answer( b ); 

    /* Check answer by overwriting b <- b - x (where b holds computed
       approximation to x) */

    PLA_Axpy( minus_one, x, b );

    PLA_Nrm2( b, b_norm);

    /* Report norm of b - x */
    if ( me == 0 ) {
      PLA_Obj_get_local_contents( b_norm, PLA_NO_TRANS, &dummy, &dummy,
				  &b_norm_value, 1, 1 );
      printf( "Norm2 of x - computed x : %le\n", b_norm_value );
    }
  } 

  printf("****************************************************************\n");
  printf("* NOTE: while this driver times all operations performed by    *\n");
  printf("* a LINPACK benchmark, it does not use the ScaLAPACK random    *\n");
  printf("* matrix generator and thus according to the rules of the      *\n");
  printf("* LINPACK benchmark is not an official implementation.         *\n");
  printf("* Contact [email protected] if you are interested in creating *\n");
  printf("* a version that does meet the rules.                          *\n");
  printf("****************************************************************\n");

     
  /* Free the linear algebra objects */
  PLA_Obj_free(&A);            PLA_Obj_free(&x);
  PLA_Obj_free(&b);            PLA_Obj_free(&minus_one);
  PLA_Obj_free(&b_norm);       PLA_Obj_free(&pivots);
  PLA_Obj_free(&A_append);     PLA_Obj_free(&rhs);

  /* Free the template */
  PLA_Temp_free(&templ);

  /* Finalize PLAPACK and MPI */
  PLA_Finalize( );
  MPI_Finalize( );
}