コード例 #1
0
ファイル: pdsdpplapack.c プロジェクト: cavazos/DSDP
static int pmatmult(void *MM, double x[], double y[], int n){
  plapackM* ctx=(plapackM*)MM;
  double d_one=1.0,drank=1.0/ctx->nprocs;
  int i,info;

  DSDPFunctionBegin;
  info=PLA_Obj_set_to_zero(ctx->vVec);DSDPCHKERR(info);
  info=PLA_Obj_set_to_zero(ctx->wVec);DSDPCHKERR(info);
  info=PLA_API_begin();DSDPCHKERR(info);
  info=PLA_Obj_API_open(ctx->vVec);DSDPCHKERR(info);
  info=PLA_API_axpy_vector_to_global(n, &d_one, x, 1, 
				     ctx->vVec, 0); DSDPCHKERR(info);
  /* Copy solution from PLAPACK vector to DSDPVector */
  info=PLA_Obj_API_close(ctx->vVec); DSDPCHKERR(info);
  info=PLA_API_end(); DSDPCHKERR(info);

  PLA_Symv( PLA_LOWER_TRIANGULAR, ctx->one, ctx->AMat, ctx->vVec, ctx->zero, ctx->wVec ); 
  /* Copy solution from PLAPACK vector to DSDPVector */

  memset((void*)y,0,n*sizeof(double));
  info=PLA_API_begin();
  info=PLA_Obj_API_open(ctx->wVec);

  info=PLA_API_axpy_global_to_vector(n, &d_one, ctx->wVec, 0,
				     y, 1); DSDPCHKERR(info);
  info=PLA_Obj_API_close(ctx->wVec); DSDPCHKERR(info);
  info=PLA_API_end(); DSDPCHKERR(info);
  for (i=0;i<n;i++){ y[i]*=drank;}  /* Should be in PLA_API_axpy_vector_to_global */

  DSDPFunctionReturn(0);
}
コード例 #2
0
int PLA_Local_dot( PLA_Obj x, PLA_Obj y, PLA_Obj alpha )
{
  int 
    local_length_x, local_width_x,
    local_length_y, local_width_y,
    ldim_x, ldim_y, 
    istride_x, istride_y;

  char
    *buf_x, *buf_y, *buf_alpha;

  MPI_Datatype 
    datatype;

  PLA_Obj_set_to_zero( alpha );

  PLA_Obj_local_length( x, &local_length_x);
  PLA_Obj_local_width ( x, &local_width_x);
  PLA_Obj_local_ldim  ( x, &ldim_x );
  PLA_Obj_local_length( y, &local_length_y);
  PLA_Obj_local_width ( y, &local_width_y);
  PLA_Obj_local_ldim  ( y, &ldim_y );
  
  if ( local_length_x * local_width_x != 0 ){
    if ( local_length_x == 1 ){
      istride_x = ldim_x;
      local_length_x = local_width_x;
    }
    else
      istride_x = 1;

    if ( local_length_y == 1 ){
      istride_y = ldim_y;
      local_length_y = local_width_y;
    }
    else
      istride_y = 1;

    PLA_Obj_datatype( x, &datatype );
    PLA_Obj_local_buffer(x, ( void ** ) &buf_x);
    PLA_Obj_local_buffer(y, ( void ** ) &buf_y);
    PLA_Obj_local_buffer(alpha, ( void ** ) &buf_alpha);

    if ( datatype == MPI_DOUBLE ){
      *( ( double * ) buf_alpha ) = 
	PLA_ddot( &local_length_x, ( double *) buf_x, &istride_x,
                                   ( double *) buf_y, &istride_y );
    }
/*    else if ( datatype == MPI_FLOAT ){
      *( ( float * ) buf_alpha ) = 
	PLA_sdot( &local_length_x, ( float *) buf_x, &istride_x,
                                   ( float *) buf_y, &istride_y );
    } */
    else {
      PLA_Abort( "datatype not yet supported", __LINE__, __FILE__ );
    }
  }

  return PLA_SUCCESS;
}
コード例 #3
0
int PLA_Obj_set_to_identity( PLA_Obj A )
{
  int length, width;
  PLA_Obj A11 = NULL,     ABR = NULL;

  PLA_Obj_global_length( A, &length );
  PLA_Obj_global_width( A, &width );

  if ( length != width ){
    printf("PLA_Obj_set_to_identity() error:  object not square\n");
    exit ( 0 );
  }

  PLA_Obj_set_to_zero( A );
  PLA_Obj_view_all( A, &ABR );
  
  while ( TRUE ){
    PLA_Obj_global_length( ABR, &length );
    if ( 0 == length ) break;
    
    PLA_Obj_split_4( ABR, 1, 1, &A11,      PLA_DUMMY,
                                 PLA_DUMMY, &ABR );

    PLA_Obj_set_to_one( A11 );
  }

  PLA_Obj_free( &A11 );
  PLA_Obj_free( &ABR );

  return PLA_SUCCESS;
}
コード例 #4
0
ファイル: pdsdpplapack.c プロジェクト: cavazos/DSDP
static int pmatreduce(void*MM,double *v, int m){
  plapackM* ctx=(plapackM*)MM;
  double d_one=1.0;
  int info;
  int i;
  DSDPFunctionBegin;

  /* 
     Copy vec from DSDPVector to PLAPACK vector. This assumes the entries in the local
     DSDP Vectors are not duplicated on multiple processors. It adds the first element of
     each vector together, second element, ... 
  */
  info=PLA_Obj_set_to_zero(ctx->vVec);DSDPCHKERR(info);
  info=PLA_API_begin();DSDPCHKERR(info);
  info=PLA_Obj_API_open(ctx->vVec);DSDPCHKERR(info);
  info=PLA_API_axpy_vector_to_global(m, &d_one, v , 1, 
                                       ctx->vVec, 0); DSDPCHKERR(info);
  info=PLA_Obj_API_close(ctx->vVec); DSDPCHKERR(info);
  /* Copy solution from PLAPACK vector to DSDPVector */
  memset((void*)v,0,m*sizeof(double));

  info=PLA_Obj_API_open(ctx->vVec);DSDPCHKERR(info);
  info=PLA_API_axpy_global_to_vector(m, &d_one, ctx->vVec, 0,
				     v, 1); DSDPCHKERR(info);
  info=PLA_Obj_API_close(ctx->vVec); DSDPCHKERR(info);
  info=PLA_API_end(); DSDPCHKERR(info);
  DSDPFunctionReturn(0);
}
コード例 #5
0
ファイル: pdsdpplapack.c プロジェクト: cavazos/DSDP
static int pmatfactor(void*MM, int *flag){
  plapackM* ctx=(plapackM*)MM;
  int info,dummy;
  double ddxerror;
  DSDPFunctionBegin;
  wallclock(&ctx->t1);
  info=PLA_Obj_set_to_one(ctx->wVec);DSDPCHKERR(info);
  info=PLA_Obj_set_to_zero(ctx->vVec);DSDPCHKERR(info);
  info=PLA_Symv( PLA_LOWER_TRIANGULAR, ctx->one, ctx->AMat, ctx->wVec, ctx->zero, ctx->vVec ); DSDPCHKERR(info);
  *flag=0;
  info = PLA_Chol(PLA_LOWER_TRIANGULAR, ctx->AMat); DSDPCHKERR(info);
  if (info!=0) {
    *flag=1;
    printf("PLAPACK WARNING: Non positive-definite Matrix M : Row: %d\n",info);
  }
  info = PLA_Trsv(PLA_LOWER_TRIANGULAR, PLA_NO_TRANSPOSE, PLA_NONUNIT_DIAG, ctx->AMat, ctx->vVec);DSDPCHKERR(info);
  info = PLA_Trsv(PLA_LOWER_TRIANGULAR, PLA_TRANSPOSE, PLA_NONUNIT_DIAG, ctx->AMat,ctx->vVec); DSDPCHKERR(info);  

  info=PLA_Obj_set_to_minus_one(ctx->wVec);DSDPCHKERR(info); 
  info=PLA_Axpy( ctx->one, ctx->vVec, ctx->wVec );DSDPCHKERR(info); 
  info=PLA_Nrm2( ctx->wVec, ctx->dxerror );DSDPCHKERR(info); 
  PLA_Obj_get_local_contents( ctx->dxerror, PLA_NO_TRANS, &dummy, &dummy,
			      &ddxerror, 1, 1 );
  if (ddxerror/sqrt(1.0*ctx->global_size) > 0.1){
    *flag=1;
    if (ctx->rank==-1){
      printf("PDSDPPLAPACK: Non positive-definite Matrix. %4.2e\n",ddxerror);
    }
  }
  wallclock(&ctx->t2);
  ctx->tsolve+=ctx->t2-ctx->t1;
  PPDSDPPrintTime(ctx->rank,"PLAPACK: Factor M",ctx->t2-ctx->t1,ctx->tsolve);
  PPDSDPPrintTime(ctx->rank,"Subtotal Time",0,ctx->t2-ctx->t1);
  DSDPFunctionReturn(0);
}
コード例 #6
0
ファイル: pdsdpplapack.c プロジェクト: cavazos/DSDP
static int pmatzero(void*MM){
  plapackM* ctx=(plapackM*)MM;
  DSDPFunctionBegin;
  wallclock(&ctx->t1);
  PLA_Obj_set_to_zero(ctx->AMat);
  PLA_API_begin();
  PLA_Obj_API_open(ctx->AMat);
  DSDPFunctionReturn(0);
}
コード例 #7
0
PLA_Copy_sym_tridiag_to_msc( int uplo, PLA_Obj A, PLA_Obj tridiag )
{
    int
    size;
    PLA_Obj
    A_BR = NULL, A_11 = NULL, A_21 = NULL, A_21_1 = NULL,
    temp = NULL,
    diag_1 = NULL, diag_B = NULL,
    subdiag_1 = NULL, subdiag_B = NULL;

    if ( uplo != PLA_LOWER_TRIANGULAR )
        PLA_Abort( "uplo != PLA_LOWER_TRIANGULAR not yet implemented",
                   __LINE__, __FILE__ );

    PLA_Mscalar_create_conf_to( tridiag, PLA_ALL_ROWS, PLA_ALL_COLS,
                                &temp );
    PLA_Obj_set_to_zero( temp );
    PLA_Obj_view_all( A, &A_BR );
    PLA_Obj_vert_split_2( temp, 1, &diag_B, &subdiag_B );

    while ( TRUE ) {
        PLA_Obj_global_length( A_BR, &size );
        if ( size == 0 ) break;

        PLA_Obj_split_4( A_BR, 1, 1, &A_11, PLA_DUMMY,
                         &A_21, &A_BR );

        PLA_Obj_horz_split_2( diag_B, 1, &diag_1,
                              &diag_B );

        PLA_Obj_horz_split_2( subdiag_B, 1, &subdiag_1,
                              &subdiag_B );

        PLA_Local_copy( A_11, diag_1 );
        if ( size > 1 ) {
            PLA_Obj_horz_split_2( A_21, 1,   &A_21_1,
                                  PLA_DUMMY );
            PLA_Local_copy( A_21_1, subdiag_1 );
        }
    }

    PLA_Reduce( temp, MPI_SUM, tridiag );

    PLA_Obj_free( &A_BR );
    PLA_Obj_free( &A_11 );
    PLA_Obj_free( &A_21 );
    PLA_Obj_free( &A_21_1 );
    PLA_Obj_free( &temp );
    PLA_Obj_free( &diag_1 );
    PLA_Obj_free( &diag_B );
    PLA_Obj_free( &subdiag_1 );
    PLA_Obj_free( &subdiag_B );

    return PLA_SUCCESS;
}
コード例 #8
0
int PLA_Local_asum( PLA_Obj x, PLA_Obj alpha )
{
  int 
    local_length, local_width,
    ldim_x, istride;

  char
    *buf_x, *buf_alpha;

  MPI_Datatype 
    datatype;

  PLA_Obj_set_to_zero( alpha );

  PLA_Obj_local_length( x, &local_length);
  PLA_Obj_local_width ( x, &local_width);
  PLA_Obj_local_ldim  ( x, &ldim_x );
  
  if ( local_length * local_width != 0 ){
    if ( local_length == 1 ){
      istride = ldim_x;
      local_length = local_width;
    }
    else
      istride = 1;

    PLA_Obj_datatype( x, &datatype );
    PLA_Obj_local_buffer(x, ( void ** ) &buf_x);
    PLA_Obj_local_buffer(alpha, ( void ** ) &buf_alpha);

    if ( datatype == MPI_DOUBLE ){
      *( ( double * ) buf_alpha ) = 
	PLA_dasum( &local_length, ( double *) buf_x, &istride );
    }
    else if ( datatype == MPI_FLOAT ){
      *( ( float * ) buf_alpha ) = 
	PLA_sasum( &local_length, ( float *) buf_x, &istride );
    }
    else if ( datatype == MPI_DOUBLE_COMPLEX ){
      *( ( double * ) buf_alpha ) = 
	PLA_dzasum( &local_length, ( PLA_DOUBLE_COMPLEX *) buf_x, &istride );
    }
    else if ( datatype == MPI_COMPLEX ){
       *( ( float * ) buf_alpha ) = 
	      PLA_scasum( &local_length, ( PLA_COMPLEX *) buf_x, &istride ); 
    }
  }

  return PLA_SUCCESS;
}
コード例 #9
0
ファイル: pdsdpplapack.c プロジェクト: cavazos/DSDP
static int pmatsolve(void* MM, double bb[], double xx[], int n){
  plapackM* ctx=(plapackM*)MM;
  double d_one=1.0,drank=1.0/ctx->nprocs;
  int i,info;

  DSDPFunctionBegin;
  wallclock(&ctx->t1);
  /* 
     Copy RHS from DSDPVector to PLAPACK vector. This assumes the entries in the local
     DSDP Vectors are not duplicated on multiple processors. It adds the first element of
     each vector together, second element, ... 
  */
  info=PLA_Obj_set_to_zero(ctx->vVec);DSDPCHKERR(info);
  info=PLA_API_begin();DSDPCHKERR(info);
  info=PLA_Obj_API_open(ctx->vVec);DSDPCHKERR(info);
  info=PLA_API_axpy_vector_to_global(n, &d_one, bb , 1, 
				     ctx->vVec, 0); DSDPCHKERR(info);
  info=PLA_Obj_API_close(ctx->vVec); DSDPCHKERR(info);
  info=PLA_API_end(); DSDPCHKERR(info);


  /* Assuming the matrix is already factored, solve the equations. */
  info = PLA_Trsv(PLA_LOWER_TRIANGULAR, PLA_NO_TRANSPOSE, PLA_NONUNIT_DIAG, ctx->AMat, ctx->vVec);DSDPCHKERR(info);
  info = PLA_Trsv(PLA_LOWER_TRIANGULAR, PLA_TRANSPOSE, PLA_NONUNIT_DIAG, ctx->AMat,ctx->vVec); DSDPCHKERR(info);  

  /* Copy solution from PLAPACK vector to DSDPVector */
  memset((void*)xx,0,n*sizeof(double));
  info=PLA_API_begin();
  info=PLA_Obj_API_open(ctx->vVec);
  info=PLA_API_axpy_global_to_vector(n, &d_one, ctx->vVec, 0,
				     xx, 1); DSDPCHKERR(info);
  info=PLA_Obj_API_close(ctx->vVec); DSDPCHKERR(info);
  info=PLA_API_end(); DSDPCHKERR(info);
  for (i=0;i<n;i++){xx[i]*=drank;}

  wallclock(&ctx->t2);
  ctx->tsolve+=ctx->t2-ctx->t1;
  /* PPDSDPPrintTime(ctx->rank,"Solve M",ctx->t2-ctx->t1,ctx->tsolve);*/
  DSDPFunctionReturn(0);
}
コード例 #10
0
ファイル: pdsdpplapack.c プロジェクト: cavazos/DSDP
int pmatsetup(void *MM, int m){
  plapackM* ctx=(plapackM*)MM;
  MPI_Comm rowcomm,colcomm;
  int itmp,nprocs,info;

  DSDPFunctionBegin;
  ctx->global_size=m;

  info = MPI_Comm_size(ctx->mpi_comm,&nprocs); DSDPCHKERR(info);
  itmp=(m-nprocs+1)/nprocs;
  itmp=DSDPMax(2,itmp);
  ctx->nb_distr=DSDPMin(ctx->nb_distr,itmp);

  info = PLA_Comm_1D_to_2D_ratio(ctx->mpi_comm,ctx->ratio,&ctx->plapack_comm); DSDPCHKERR(info);
  info = PLA_Init(ctx->plapack_comm); DSDPCHKERR(info);
  info = PLA_Temp_create(ctx->nb_distr, 0, &ctx->templ); DSDPCHKERR(info);

  info=PLA_Matrix_create(MPI_DOUBLE, m, m, ctx->templ,
			 PLA_ALIGN_FIRST, PLA_ALIGN_FIRST, &ctx->AMat);DSDPCHKERR(info);  
  info=PLA_Mvector_create(MPI_DOUBLE, m, 1, ctx->templ, PLA_ALIGN_FIRST, &ctx->vVec);DSDPCHKERR(info);  
  info=PLA_Mvector_create(MPI_DOUBLE, m, 1, ctx->templ, PLA_ALIGN_FIRST, &ctx->wVec);DSDPCHKERR(info);  
  info=PLA_Mscalar_create( MPI_DOUBLE, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, ctx->templ, &ctx->dxerror );DSDPCHKERR(info);  
  info=PLA_Mscalar_create( MPI_DOUBLE, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, ctx->templ, &ctx->one );DSDPCHKERR(info);
  info=PLA_Mscalar_create( MPI_DOUBLE, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, ctx->templ, &ctx->zero );DSDPCHKERR(info);
  info=PLA_Obj_set_to_one(ctx->one);DSDPCHKERR(info);
  info=PLA_Obj_set_to_zero(ctx->zero);DSDPCHKERR(info);

  info = MPI_Comm_rank(ctx->plapack_comm,&ctx->rank); DSDPCHKERR(info);
  info = MPI_Comm_size(ctx->plapack_comm,&ctx->nprocs); DSDPCHKERR(info);

  info = PLA_Temp_comm_col_info(ctx->templ, &rowcomm, &ctx->rowrank, &ctx->numrownodes); DSDPCHKERR(info);
  info = PLA_Temp_comm_row_info(ctx->templ, &colcomm, &ctx->colrank, &ctx->numcolnodes); DSDPCHKERR(info);

  ctx->t0=0;ctx->t1=0;ctx->t2=0;
  ctx->thessian=0;ctx->tsolve=0;
  wallclock(&ctx->t0);
  DSDPFunctionReturn(0);
}
コード例 #11
0
int PLA_Obj_set_to_zero_below_diagonal( PLA_Obj A )
{
  PLA_Obj
    A_BR = NULL, a_21 = NULL;
  int
    size;

  PLA_Obj_view_all( A, &A_BR );

  while( TRUE ){
    PLA_Obj_global_length( A_BR, &size );
    if ( size == 0 ) break;
    
    PLA_Obj_split_4( A_BR, 1, 1, PLA_DUMMY, PLA_DUMMY,
		                  &a_21,      &A_BR );
    
    PLA_Obj_set_to_zero( a_21 );
  }

  PLA_Obj_free( &A_BR );
  PLA_Obj_free( &a_21 );
  
  return PLA_SUCCESS;
}
コード例 #12
0
int PLA_Triangular_invert_lower( PLA_Obj A )
{
  int 
    nb_alg, size, value = PLA_SUCCESS;

  PLA_Obj
    A_BR = NULL, A_11 = NULL, A_21 = NULL, A_21_dup = NULL, A_11_msc = NULL,
    Ainv = NULL, Ainv_BL = NULL, Ainv_10_11 = NULL, Ainv_10_11_dup = NULL,
    minus_one = NULL, zero = NULL, one = NULL;

  PLA_Template
    templ;

  PLA_Obj_template( A, &templ );
  PLA_Environ_nb_alg( PLA_OP_PAN_PAN, templ, &nb_alg );

  PLA_Create_constants_conf_to( A, &minus_one, &zero, &one );
  PLA_Obj_vert_split_2( A, 0, &Ainv_BL, &A_BR );

  
  while( TRUE ){
    PLA_Obj_global_length( A_BR, &size );
    if ( 0 == ( size = min( size, nb_alg ) ) ) break;

    PLA_Obj_split_4( A_BR, size, size, &A_11, PLA_DUMMY,
                                        &A_21, &A_BR );

    PLA_Obj_view_shift( Ainv_BL,       0,
                                 0,             size,
                                        0 ); 

    PLA_Obj_horz_split_2( Ainv_BL, size, &Ainv_10_11,
                                          &Ainv_BL );

    PLA_Mscalar_create_conf_to( A_11, PLA_ALL_ROWS, PLA_ALL_COLS,
				 &A_11_msc );
    PLA_Copy( A_11, A_11_msc );

    PLA_Obj_set_orientation( Ainv_10_11, PLA_PROJ_ONTO_ROW );
    PLA_Pmvector_create_conf_to( Ainv_BL, PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS,
				  size, &Ainv_10_11_dup );
    PLA_Pmvector_create_conf_to( Ainv_BL, PLA_PROJ_ONTO_COL, PLA_ALL_COLS,
				  size, &A_21_dup );

    /*    PLA_Trsm( PLA_SIDE_LEFT,    PLA_LOWER_TRIANGULAR,
	       PLA_NO_TRANSPOSE, PLA_NONUNIT_DIAG,
	       one, A_11, Ainv_10_11 );  */

    PLA_Obj_set_to_identity( A_11 );
    
    PLA_Copy( Ainv_10_11, Ainv_10_11_dup );

    PLA_Local_trsm( PLA_SIDE_LEFT,    PLA_LOWER_TRIANGULAR,
	       PLA_NO_TRANSPOSE, PLA_NONUNIT_DIAG,
	       one, A_11_msc, Ainv_10_11_dup ); 

    PLA_Copy( A_21,       A_21_dup );

    PLA_Obj_set_to_zero( A_21 );
 
    PLA_Local_gemm( PLA_NO_TRANS, PLA_NO_TRANS, 
	       minus_one, A_21_dup, Ainv_10_11_dup,
	       one, Ainv_BL );

    PLA_Copy( Ainv_10_11_dup, Ainv_10_11 );
  }

  PLA_Obj_free( &A_BR );
  PLA_Obj_free( &A_11 );
  PLA_Obj_free( &A_21 );
  PLA_Obj_free( &Ainv );
  PLA_Obj_free( &Ainv_BL );
  PLA_Obj_free( &Ainv_10_11 );
  PLA_Obj_free( &minus_one );
  PLA_Obj_free( &zero );
  PLA_Obj_free( &one );

  return PLA_SUCCESS;
}
コード例 #13
0
int PLA_Triangular_Lt_L( PLA_Obj A )
{
  int 
    nb_alg, size, value = PLA_SUCCESS;

  PLA_Obj
    A_TL = NULL, A_BL = NULL, A_10_11 = NULL, A_11 = NULL, 
    A_10_11_dpmv_rows = NULL, A_10_11_dpmv_cols = NULL,
    one = NULL;

  PLA_Template
    templ;

  PLA_Obj_template( A, &templ );
  PLA_Environ_nb_alg( PLA_OP_PAN_PAN, templ, &nb_alg );

  PLA_Create_constants_conf_to( A, NULL, NULL, &one );

  PLA_Obj_split_4( A, 0, 0, &A_TL, PLA_DUMMY,
		             &A_BL, PLA_DUMMY );

  while( TRUE ){
    PLA_Obj_global_length( A_BL, &size );
    if ( 0 == ( size = min( size, nb_alg ) ) ) break;

    PLA_Obj_view_shift( A_TL,         0,
                               0,            size,
                                     size );

    PLA_Obj_view_shift( A_BL,         0,
                               0,            size,
                                       0 );

    PLA_Obj_horz_split_2( A_BL, size,   &A_10_11,
			                 &A_BL );

    PLA_Obj_vert_split_2( A_10_11, -size, PLA_DUMMY, &A_11 );

    PLA_Set_triang_to_zero( PLA_LOWER_TRIANGULAR, PLA_NONUNIT_DIAG, A_11 );

    PLA_Obj_set_orientation( A_10_11, PLA_PROJ_ONTO_ROW );

    PLA_Pmvector_create_conf_to( A_TL,  PLA_PROJ_ONTO_COL, PLA_ALL_COLS,
				  size, &A_10_11_dpmv_cols );

    PLA_Pmvector_create_conf_to( A_TL,  PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS,
				  size, &A_10_11_dpmv_rows );

    PLA_Copy( A_10_11,           A_10_11_dpmv_rows );

    /*
    {
      PLA_Obj
	A_10_11_mv = NULL;

      PLA_Mvector_create_conf_to( A_10_11, 1, &A_10_11_mv );
      PLA_Copy( A_10_11_dpmv_rows, A_10_11_mv );
      PLA_Copy( A_10_11_mv, A_10_11_dpmv_cols );

      PLA_Obj_free( &A_10_11_mv );
    }
    */
    PLA_Copy( A_10_11,          A_10_11_dpmv_cols );
    PLA_Obj_set_to_zero( A_10_11 );

    PLA_Syrk_perform_local_part( PLA_LOWER_TRIANGULAR, 
				  one, A_10_11_dpmv_cols, A_10_11_dpmv_rows, 
                                  one, A_TL );
  }

  PLA_Obj_free( &A_TL );
  PLA_Obj_free( &A_BL );
  PLA_Obj_free( &A_10_11 );
  PLA_Obj_free( &A_11 );
  PLA_Obj_free( &A_10_11_dpmv_rows );
  PLA_Obj_free( &A_10_11_dpmv_cols );
  PLA_Obj_free( &one );

  return PLA_SUCCESS;
}
コード例 #14
0
int PLA_Symm_enter( int side, int uplo,
	             PLA_Obj alpha, PLA_Obj A, PLA_Obj B, 
                     PLA_Obj beta,  PLA_Obj C )
{
  int
    value = PLA_SUCCESS,
    size, length_A, width_A, length_B, width_B, length_C, width_C,
    objtype;
  char 
    routine_name[ 35 ] = "PLA_Symm";

  PLA_Routine_stack_push( routine_name );

  PLA_Routine_stack_push( "PLA_Symm_enter" );

  old_size_malloced = PLA_Total_size_malloced( );
  
  if ( PLA_CHECK_PARAMETERS ){
    /* Check if side and uplo parameters are valid */
    if ( !PLA_Valid_side_parameter( side ) ){
      PLA_Warning( "Invalid parameter side" );
      value--;
    }

    if ( !PLA_Valid_uplo_parameter( uplo ) ){
      PLA_Warning( "Invalid parameter uplo" );
      value--;
    }

    /* Check if alpha is valid multiscalar of size 1x1 */

    if ( alpha == NULL || !PLA_Valid_object( alpha ) ) {
      PLA_Warning( "Invalid object alpha" );
      value--;
    }

   PLA_Obj_objtype( alpha, &objtype );
    if ( objtype != PLA_MSCALAR ){
      PLA_Warning( "Invalid objtype for alpha" );
      value--;
    }      

    PLA_Obj_global_length( alpha, &size );
    if ( size != 1 ){
      PLA_Warning( "Invalid global length for alpha" );
      value--;
    }      

    PLA_Obj_global_width( alpha, &size );
    if ( size != 1 ){
      PLA_Warning( "Invalid global width for alpha" );
      value--;
    }      

    /* Check if A is valid matrix */

    if ( A == NULL || !PLA_Valid_object( A ) ) {
      PLA_Warning( "Invalid object A" );
      value--;
    }

    PLA_Obj_objtype( A, &objtype );
    if ( objtype != PLA_MATRIX ){
      PLA_Warning( "Invalid objtype for A" );
      value--;
    }      

    /* Check if B is valid matrix */

    if ( B == NULL || !PLA_Valid_object( B ) ) {
      PLA_Warning( "Invalid object B" );
      value--;
    }

    PLA_Obj_objtype( B, &objtype );
    if ( objtype != PLA_MATRIX ){
      PLA_Warning( "Invalid objtype for B" );
      value--;
    }      

    /* Check if beta is valid multiscalar of size 1x1 */

    if ( beta == NULL || !PLA_Valid_object( beta ) ) {
      PLA_Warning( "Invalid object beta" );
      value--;
    }

    PLA_Obj_objtype( beta, &objtype );
    if ( objtype != PLA_MSCALAR ){
      PLA_Warning( "Invalid objtype for beta" );
      value--;
    }      

    PLA_Obj_global_length( beta, &size );
    if ( size != 1 ){
      PLA_Warning( "Invalid global length for beta" );
      value--;
    }      

    PLA_Obj_global_width( beta, &size );
    if ( size != 1 ){
      PLA_Warning( "Invalid global width for beta" );
      value--;
    }      

    /* Check if C is valid matrix */

    if ( C == NULL || !PLA_Valid_object( C ) ) {
      PLA_Warning( "Invalid object C" );
      value--;
    }

    PLA_Obj_objtype( C, &objtype );
    if ( objtype != PLA_MATRIX ){
      PLA_Warning( "Invalid objtype for C" );
      value--;
    }      

    /* Check if matrix dimensions match */
    PLA_Obj_global_length( A, &length_A );
    PLA_Obj_global_width(  A, &width_A );
    PLA_Obj_global_length( B, &length_B );
    PLA_Obj_global_width(  B, &width_B );
    PLA_Obj_global_length( C, &length_C );
    PLA_Obj_global_width(  C, &width_C );

    if ( length_A != width_A ){
      PLA_Warning( "A is not square" );
      value--;
    }

    if ( side == PLA_SIDE_LEFT ){
      if ( length_A != length_C ){
	PLA_Warning( "length of A does not match length of C" );
	value--;
      }      
    }
    else /* side == PLA_SIDE_LEFT */{
      if ( length_A != width_C ){
	PLA_Warning( "length of A does not match width of C" );
	value--;
      }      
    }
    if ( length_B != length_C ){
      PLA_Warning( "length of B does not match length of C" );
      value--;
    }      
    if ( width_B != width_C ){
      PLA_Warning( "width of B does not match width of C" );
      value--;
    }      
  }

  if ( PLA_CHECK_AGAINST_SEQUENTIAL ){
    PLA_Mscalar_create_conf_to( 
              alpha, PLA_ALL_ROWS, PLA_ALL_COLS, &alpha_cpy );
    PLA_Mscalar_create_conf_to( 
              A, PLA_ALL_ROWS, PLA_ALL_COLS, &A_cpy );
    PLA_Mscalar_create_conf_to( 
              B, PLA_ALL_ROWS, PLA_ALL_COLS, &B_cpy );
    PLA_Mscalar_create_conf_to( 
              beta, PLA_ALL_ROWS, PLA_ALL_COLS, &beta_cpy );
    PLA_Mscalar_create_conf_to( 
              C, PLA_ALL_ROWS, PLA_ALL_COLS, &C_cpy );

    PLA_Copy( alpha, alpha_cpy );
    PLA_Copy( A, A_cpy );
    PLA_Copy( B, B_cpy );
    PLA_Copy( beta, beta_cpy );
    if ( PLA_Local_equal_zero( beta_cpy ) )
      PLA_Obj_set_to_zero( C_cpy );
    else
      PLA_Copy( C, C_cpy );
  }

  PLA_Routine_stack_pop( routine_name );
				 
  return value;
}
コード例 #15
0
int PLA_Tri_red( int uplo, PLA_Obj A, PLA_Obj s, PLA_Obj Q )
/*
  PLA_Tri_red

  Purpose: Reduce symmetric matrix A to tridiagonal form using 
  Householder similarity transformations.

  input:
  uplo                    indicates whether A is stored in upper or
                          lower triangular part 
  A                       MATRIX to be reduced

  output:
  A                       Reduced matrix A.  Householder vectors used
                          to reduce A are stored below first subdiagonal
                          of A.
  s                       Scaling factors for the Householder transforms
                          computed to reduce A.
  Q                       if Q != NULL, Q equals the accumulation of 
                          Householder transforms.
*/
{
  PLA_Obj
    u = NULL,  u_B = NULL, 
    beta_B = NULL, beta_1 = NULL, beta_1_dup = NULL,
    A_BR = NULL, a_21 = NULL, A_21 = NULL, 
    q_11 = NULL, q_12 = NULL, q_21 = NULL, Q_22 = NULL;
  int
    size, value = PLA_SUCCESS;
  double time;

  if ( PLA_ERROR_CHECKING )    /* Perform parameter and error checking */
    value = PLA_Tri_red_enter( uplo, A, s, Q );

  if ( uplo != PLA_LOWER_TRIANGULAR ) 
    PLA_Abort( "only uplo == PLA_LOWER_TRIANGULAR currently supported",
		__LINE__, __FILE__ );

  /* Create a vector in which to compute the Householder vector */
  PLA_Mvector_create_conf_to( A, 1, &u );
  
  /* Create a duplicated multiscalar in which to hold the scaling factor
     for the Householder transform being computed */
  PLA_Obj_horz_split_2( s, 1, &beta_1,
			      PLA_DUMMY );
  PLA_Mscalar_create_conf_to( beta_1, PLA_ALL_ROWS, PLA_ALL_COLS, 
			      &beta_1_dup );

  /* Track the active parts of A, s, and u */
  PLA_Obj_view_all( A, &A_BR );    
  PLA_Obj_view_all( s, &beta_B );    
  PLA_Obj_view_all( u, &u_B );

  while ( TRUE ){  
    PLA_Obj_global_length( A_BR, &size );
    if ( 1 == size ) break;

    /* Partition A_BR = / alpha_11    *    \
                        \   a_21    A_BR   /  where alpha_11 is 1x1 */
    PLA_Obj_split_4( A_BR, 1, 1, PLA_DUMMY, PLA_DUMMY,
    		                 &a_21,     &A_BR );
    /* Split of the current element of vector s */
    PLA_Obj_horz_split_2( beta_B, 1, &beta_1,
                                     &beta_B );
    /* View the part of u in which to compute the Householder vector */
    PLA_Obj_horz_split_2( u_B, 1, PLA_DUMMY,
			          &u_B );

    /* Redistributed a_21 as a vector and compute Householder transform */
    PLA_Copy( a_21, u_B );  
    PLA_Compute_House_v( u_B, beta_1_dup );

    /* Place data back in A and s */
    PLA_Copy( u_B, a_21 );
    PLA_Local_copy( beta_1_dup, beta_1 );

    /* Update A_BR <- ( I - beta_1 u_B u_B^T ) A_BR ( I - beta_1 u_B u_B^T ) */
    PLA_Apply_sym_House( uplo, A_BR, u_B, beta_1_dup );
  }
  
  time = MPI_Wtime ();

  if ( Q != NULL ){
    /* Compute the orthogonal matrix */
    PLA_Obj_split_4( Q, 1, 1, &q_11, &q_12,
		              &q_21, &Q_22 );

    PLA_Obj_set_to_one ( q_11 );
    PLA_Obj_set_to_zero( q_12 );
    PLA_Obj_set_to_zero( q_21 );

    PLA_Obj_split_4( A, 1, -1, PLA_DUMMY, PLA_DUMMY,
		                &A_21,     PLA_DUMMY );

    PLA_Form_Q( PLA_NO_TRANSPOSE, A_21, s, Q_22 );
  }

  time = MPI_Wtime () - time;
  printf( " Form_Q = %f\n", time );



  /* Free the temporary objects */
  PLA_Obj_free( &u );
  PLA_Obj_free( &u_B );
  PLA_Obj_free( &beta_B );
  PLA_Obj_free( &beta_1 );
  PLA_Obj_free( &beta_1_dup );
  PLA_Obj_free( &A_BR );
  PLA_Obj_free( &a_21 );
  PLA_Obj_free( &A_21 );
  PLA_Obj_free( &q_11 );
  PLA_Obj_free( &q_12 );
  PLA_Obj_free( &q_21 );
  PLA_Obj_free( &Q_22 );

  if ( PLA_ERROR_CHECKING )   
    value = PLA_Tri_red_exit( uplo, A, s, Q );

  return PLA_SUCCESS;
}
コード例 #16
0
int main(int argc, char *argv[])
{
  MPI_Comm 
    comm = MPI_COMM_NULL;
  MPI_Datatype
    datatype;
  PLA_Template 
    templ = NULL;
  PLA_Obj  
    A_orig  = NULL, A      = NULL, Q = NULL, diag = NULL,  B = NULL,
    minus_one = NULL, zero   = NULL, one  = NULL;
  int      
    n,
    nb_distr, nb_alg,
    error, parameters, sequential,
    me, nprocs, nprows, npcols,
    itype;
  double 
    time,
    flops,
    d_abs_max,
    PLA_Local_abs_max();

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &me);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  if (me==0) {
    printf("enter mesh size:\n");
    scanf("%d%d", &nprows, &npcols );
    printf("mesh size  = %d x %d \n", nprows, npcols );
    printf("enter distr. block size:\n");
    scanf("%d", &nb_distr );
    printf("nb_distr = %d\n", nb_distr );
    printf("enter alg. block size:\n");
    scanf("%d", &nb_alg );
    printf("nb_alg = %d\n", nb_alg );
    printf("turn on error checking? (0 = NO, 1 = YES):\n");
    scanf("%d", &error );
    printf("error checking = %d\n", error );
    printf("turn on parameter checking? (0 = NO, 1 = YES):\n");
    scanf("%d", &parameters );
    printf("parameter checking = %d\n", parameters );
    printf("turn on sequential checking? (0 = NO, 1 = YES):\n");
    scanf("%d", &sequential );
    printf("sequential checking = %d\n", sequential );
  }
  MPI_Bcast(&nprows, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&npcols, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&nb_distr, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&nb_alg,   1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&error, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&parameters, 1, MPI_INT, 0, MPI_COMM_WORLD);
  MPI_Bcast(&sequential, 1, MPI_INT, 0, MPI_COMM_WORLD);

  pla_Environ_set_nb_alg( PLA_OP_ALL_ALG, nb_alg );

  PLA_Set_error_checking( error, parameters, sequential, FALSE );

/*  PLA_Comm_1D_to_2D_ratio(MPI_COMM_WORLD, 1.0, &comm); */
  PLA_Comm_1D_to_2D(MPI_COMM_WORLD, nprows, npcols, &comm); 

  PLA_Init(comm);
    
  PLA_Temp_create( nb_distr, 0, &templ );
    
  while ( TRUE ){
    if (me==0) {
      printf("enter datatype:\n");
      printf("-1 = quit\n");
      printf(" 0 = float\n");
      printf(" 1 = double\n");
      printf(" 2 = complex\n");
      printf(" 3 = double complex\n");
      scanf("%d", &itype );
      printf("itype = %d\n", itype );
    }
    MPI_Bcast(&itype, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if ( itype == -1 ) break;
    switch( itype ){
    case 0:
      datatype = MPI_FLOAT;
      break;
    case 1:
      datatype = MPI_DOUBLE;
      break;
    case 2:
      datatype = MPI_COMPLEX;
      break;
    case 3:
      datatype = MPI_DOUBLE_COMPLEX;
      break;
    default:
      PLA_Abort( "unknown datatype", __LINE__, __FILE__ );
    }

    if (me==0) {
      printf("enter n:\n");
      scanf("%d", &n );
      printf("n = %d\n", n );
    }

    MPI_Bcast(&n,     1, MPI_INT, 0, MPI_COMM_WORLD);

    PLA_Matrix_create( datatype, 
		        n, 
		        n,
			templ, 
                        PLA_ALIGN_FIRST, 
                        PLA_ALIGN_FIRST, 
                        &A_orig ); 

    PLA_Matrix_create_conf_to( A_orig, &Q );

    PLA_Matrix_create_conf_to( A_orig, &A );

    PLA_Mvector_create( datatype,
		        n, 
		        1,
			templ, 
                        PLA_ALIGN_FIRST, 
                        &diag ); 

    PLA_Create_constants_conf_to( A, &minus_one, &zero, &one );

    create_diag( diag );

    PLA_Create_sym_eigenproblem( PLA_LOWER_TRIANGULAR, 3, diag, A_orig, Q );

    PLA_Copy( A_orig, A );

    MPI_Barrier( MPI_COMM_WORLD );
    time = MPI_Wtime ();

    PLA_Spectral_decomp( PLA_LOWER_TRIANGULAR, A, Q, diag );

    MPI_Barrier( MPI_COMM_WORLD );
    time = MPI_Wtime () - time;

    /******* Check answer *******/

    /* Make A_orig symmetric */
      PLA_Symmetrize( PLA_LOWER_TRIANGULAR, A_orig );

    PLA_Matrix_create_conf_to( A_orig, &B );

    PLA_Obj_set_to_zero( A );
    PLA_Obj_set_diagonal( A, diag );

    /* A_orig = A_orig - Q diag Q^T */
    PLA_Gemm( PLA_NO_TRANSPOSE, PLA_NO_TRANSPOSE,
	       one, Q, A, zero, B );
    PLA_Gemm( PLA_NO_TRANSPOSE, PLA_TRANSPOSE,
	       minus_one, B, Q, one, A_orig );

    /* Extract absolute value of entry with largest absolute value in A_orig */
    d_abs_max = PLA_Local_abs_max( A_orig );

    if ( d_abs_max > 0.000000001 )
      printf( "large error detected: %le\n", d_abs_max );
      
    flops = 4.0/3.0 * n * n * n;

    if ( me == 0 ) 
      printf("%d time = %f, MFLOPS/node = %10.4lf \n", n, time,
	     flops / time * 1.0e-6 / nprocs );

    PLA_Obj_free( &A_orig );
    PLA_Obj_free( &A );
    PLA_Obj_free( &Q );
    PLA_Obj_free( &diag );
    PLA_Obj_free( &B );
    PLA_Obj_free( &minus_one );
    PLA_Obj_free( &zero );
    PLA_Obj_free( &one );
  }

  PLA_Temp_free(&templ);
  PLA_Finalize( );
  MPI_Finalize( );
}
int PLA_Local_matrix_infinity_norm( PLA_Obj A, PLA_Obj alpha )
{
  int 
    length_A, width_A,
    length_alpha, width_alpha,
    lda;

  MPI_Datatype
    datatype;

  PLA_Obj_local_length( A, &length_A );
  PLA_Obj_local_width( A,  &width_A );

  PLA_Obj_local_length( alpha, &length_alpha );
  PLA_Obj_local_width ( alpha, &width_alpha );

  if ( length_alpha == 0 || width_alpha == 0 ) 
    return PLA_SUCCESS;

  if ( length_A == 0 || width_A == 0 ){
    PLA_Obj_set_to_zero( alpha );
    return PLA_SUCCESS;
  }

  PLA_Obj_local_ldim( A, &lda );
  PLA_Obj_datatype ( A, &datatype );

  if ( datatype == MPI_DOUBLE ){
    double
      *buff_A, *buff_alpha,
      row_nrm1;
    int
      i;

    PLA_Obj_local_buffer( A, (void **) &buff_A );
    PLA_Obj_local_buffer( alpha, (void **) &buff_alpha );

    *buff_alpha = 0.0;
    for ( i=0; i<length_A; i++ ){
      row_nrm1 = PLA_dasum( &width_A, buff_A + i, &lda );
      *buff_alpha = max( *buff_alpha, row_nrm1 );
    }
  }
  else if ( datatype == MPI_FLOAT ){
    float
      *buff_A, *buff_alpha,
      row_nrm1;
    int
      i;

    PLA_Obj_local_buffer( A, (void **) &buff_A );
    PLA_Obj_local_buffer( alpha, (void **) &buff_alpha );

    *buff_alpha = 0.0;
    for ( i=0; i<length_A; i++ ){
      row_nrm1 = PLA_sasum( &width_A, buff_A + i, &lda );
      *buff_alpha = max( *buff_alpha, row_nrm1 );
    }
  }
  else if ( datatype == MPI_DOUBLE_COMPLEX ){
    PLA_DOUBLE_COMPLEX
      *buff_A;
    double
      *buff_alpha, row_nrm1;
    int
      i;

    PLA_Obj_local_buffer( A, (void **) &buff_A );
    PLA_Obj_local_buffer( alpha, (void **) &buff_alpha );

    *buff_alpha = 0.0;
    for ( i=0; i<length_A; i++ ){
      row_nrm1 = PLA_dzasum( &width_A, buff_A + i, &lda );
      *buff_alpha = max( *buff_alpha, row_nrm1 );
    }
  }
  else if ( datatype == MPI_COMPLEX ){
    PLA_COMPLEX
      *buff_A;
    float
      *buff_alpha, row_nrm1;
    int
      i;

    PLA_Obj_local_buffer( A, (void **) &buff_A );
    PLA_Obj_local_buffer( alpha, (void **) &buff_alpha );

    *buff_alpha = 0.0;
    for ( i=0; i<length_A; i++ ){
      row_nrm1 = PLA_scasum( &width_A, buff_A + i, &lda );
      *buff_alpha = max( *buff_alpha, row_nrm1 );
    }
  }
  else
    PLA_Warning( "PLA_Local_matrix_infinity_norm: datatype not yet implemented" );

  return PLA_SUCCESS;
}
コード例 #18
0
void PLA_JVTrsv_lt( int diag, PLA_Obj A, PLA_Obj b )
{
  PLA_Obj   A_TL = NULL,       A_BL = NULL,
            A_10 = NULL,       A_11 = NULL,
            A_20 = NULL,       A_21 = NULL,
            br = NULL,         xc = NULL,
            b_1 = NULL,        b_L = NULL,
            br_1 = NULL,       br_L = NULL,
            xc_T = NULL,
            xc_1 = NULL,       xc_2 = NULL,
            minus_one = NULL,  one = NULL;
  PLA_Template templ = NULL;
  int nb_distr, nb_out, nprocs, size;

  PLA_Obj_template( A, &templ );
  PLA_Temp_nb( templ, &nb_distr );
  PLA_Temp_comm_all_size( templ, &nprocs );
  nb_out = nprocs * nb_distr;   
/*  nb_out = 10000; */

  PLA_Pmvector_create_conf_to(A, PLA_PROJ_ONTO_COL, PLA_ALL_COLS, 1, &xc );
  PLA_Pmvector_create_conf_to(A, PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS, 1, &br );

  PLA_Obj_set_to_zero( br );
  PLA_Obj_set_to_zero( xc );

  PLA_Create_constants_conf_to( A, &minus_one, NULL, &one );

  PLA_Obj_global_length( A, &size );
  PLA_Obj_horz_split_2( A, size,       &A_TL, 
                                       &A_BL ); 
  PLA_Obj_horz_split_2( xc, size,      &xc_T, 
                                       &xc_2 );
  PLA_Obj_view_all( br, &br_L );
  PLA_Obj_view_all( b,  &b_L );

  while( TRUE ) {
    PLA_Obj_global_length( A_TL, &size );
    if ( 0 == ( size = min( size, nb_out ) ) ) break;

    PLA_Obj_vert_split_2( A_BL, -size,   &A_20, &A_21 );
    PLA_Obj_split_4( A_TL, -size, -size,   &A_TL, PLA_DUMMY, 
                                           &A_10, &A_11 );    

    PLA_Obj_horz_split_2( b_L, -size,    &b_L,  
                                         &b_1 );
    PLA_Obj_vert_split_2( br_L, -size,   &br_L, &br_1 );
    PLA_Obj_horz_split_2( xc_T, -size,   &xc_T, &xc_1 );

    PLA_Local_gemv( PLA_TRANS, minus_one, A_21, xc_2, one, br_1 ); 

    PLA_JVTrsv_sub_lt( diag, A_11, b_1, br_1, xc_1 ); 

    PLA_Obj_view_shift( A_BL,       -size,
                              0,            -size,
                                       0 );
    PLA_Obj_view_shift( xc_2,      -size,
                              0,             0,
                                    0 );
  }

  PLA_Obj_free(&A_TL);       PLA_Obj_free(&A_BL);
  PLA_Obj_free(&A_10);       PLA_Obj_free(&A_11);
  PLA_Obj_free(&A_20);       PLA_Obj_free(&A_21);

  PLA_Obj_free(&b_1);        PLA_Obj_free(&b_L);
  PLA_Obj_free(&br);
  PLA_Obj_free(&br_1);       PLA_Obj_free(&br_L);
  PLA_Obj_free(&xc);         PLA_Obj_free(&xc_T);   
  PLA_Obj_free(&xc_1);       PLA_Obj_free(&xc_2);

  PLA_Obj_free(&minus_one);  PLA_Obj_free(&one);


  return;
}
コード例 #19
0
int PLA_Compute_WY( PLA_Obj A_mv, PLA_Obj s, PLA_Obj W_mv, PLA_Obj Y_mv )

/* to use this routine, we need to modify the calling of
* PLA_QR_right
*/
/*
  Purpose: Compute WY transform from Householder vectors stored
           in A and s.

  Note: Utility routine used as part of computation of QR factorization
        of a matrix.
           
  Input:  A_mv    --   General mxn matrix A   
                       (PLA_MVECTOR)
          s       --   vector of scaling factors
                       (MSCALAR of width=n, duplicated to all nodes)
          W_mv, Y_mv
                  --   matrices for storing W and Y which define the 
                       WY transform.
                       (PLA_MVECTOR of width n)

  Output: W_mv, Y_mv
                  --   W and Y which define the WY transform.

  Assumptions:  n<=m

  Return value: PLA_SUCCESS unless input parameter error is detected.
*/
{
  PLA_Obj   a_B1    = NULL,   A_mv_BR = NULL,   
            s_cur   = NULL,   beta    = NULL,   
            W_L     = NULL,   w_1     = NULL,   W_R = NULL,
            W_BR    = NULL,   w_B1    = NULL,
            Y_BL    = NULL,   y_B1    = NULL,   Y_BR = NULL,
            y_11    = NULL,   u_loc   = NULL,   u    = NULL,
                              u_loc_L = NULL,   u_L  = NULL,
            minus_one = NULL, zero    = NULL,   one     = NULL;
  int       global_width;

  PLA_Create_constants_conf_to( A_mv, &minus_one, &zero, &one );

  PLA_Obj_set_to_zero( W_mv );
  PLA_Obj_set_to_zero( Y_mv );


  /* A_mv_BR tracks the active part of A_mv */
  PLA_Obj_view_all( A_mv, &A_mv_BR );
  /* s_cur   tracks the active part of s */
  PLA_Obj_view_all( s,    &s_cur );

  /* W_L tracks the part of W already computed, W_R the part yet to be 
     computed.  Ditto for W_BR, Y_BL and Y_BR */  
  PLA_Obj_vert_split_2( W_mv, 0,  &W_L,      &W_R );
  PLA_Obj_vert_split_2( W_mv, 0,  PLA_DUMMY, &W_BR );
  PLA_Obj_vert_split_2( Y_mv, 0,  &Y_BL,     &Y_BR );

  /* Create duplicated multiscalar to hold u and local contributions to u */
  PLA_Mscalar_create_conf_to( s, PLA_ALL_ROWS, PLA_ALL_COLS, &u );
  PLA_Mscalar_create_conf_to( s, PLA_ALL_ROWS, PLA_ALL_COLS, &u_loc );

  /* u_L and u_loc_L tracks the part of u and u_loc 
     corresponding to Y_BL and Y_BR, respectively */
  PLA_Obj_horz_split_2( u, 0,      &u_L,     PLA_DUMMY );
  PLA_Obj_horz_split_2( u_loc, 0,  &u_loc_L, PLA_DUMMY );

  while( TRUE ){
    PLA_Obj_global_width( A_mv_BR, &global_width );
    if ( 0 == global_width ) break;

    /* Split off next column of A_mv_BR and columns of W and Y 
       to be computed */
    PLA_Obj_vert_split_2( A_mv_BR,  1,     &a_B1, PLA_DUMMY );
    PLA_Obj_vert_split_2( W_R,      1,     &w_1,  &W_R );
    PLA_Obj_vert_split_2( W_BR,     1,     &w_B1, PLA_DUMMY );
    PLA_Obj_vert_split_2( Y_BR,     1,     &y_B1, PLA_DUMMY );

    /* Split off current scaling factor beta */
    PLA_Obj_horz_split_2( s_cur,    1,     &beta,
                                           &s_cur );

    /* y_B1 = a_B1 with first element set to 1 */
    PLA_Local_copy( a_B1, y_B1 );
    PLA_Obj_horz_split_2( y_B1, 1, &y_11, 
                                   PLA_DUMMY );
    PLA_Obj_set_to_one( y_11 );

    /* w_1 = - beta ( /   0   \
                    \  y_B1 / + W_L Y_BL y_B1 ) */
    PLA_Local_copy( y_B1, w_B1 );

    PLA_Local_gemv( PLA_TRANS, one, Y_BL, y_B1, zero, u_loc_L );

    PLA_Reduce( u_loc_L, MPI_SUM, u_L );

    PLA_Obj_global_width( W_L, &global_width );
    if ( global_width > 0 )
      PLA_Local_gemv( PLA_NO_TRANS, one, W_L, u_L, one, w_1 );

    PLA_Local_scal( beta, w_1 );
/*    PLA_Local_scal( minus_one, w_1 ); */

    /* Update views */
    PLA_Obj_view_shift( W_L,    0,
                            0,      1,
                                0 );

    PLA_Obj_view_shift( Y_BL,    1,
                            0,        1,
                                 0 );


    PLA_Obj_view_shift( u_L,     0,
                            0,        0,
                                 1 );

    PLA_Obj_view_shift( u_loc_L,     0,
                               0,        0,
                                     1 );
    
    PLA_Obj_split_4( A_mv_BR, 1, 1,   PLA_DUMMY, PLA_DUMMY,
                                      PLA_DUMMY, &A_mv_BR);
    PLA_Obj_split_4( Y_BR, 1, 1,      PLA_DUMMY, PLA_DUMMY,
                                      PLA_DUMMY, &Y_BR );
    PLA_Obj_split_4( W_BR,    1, 1,   PLA_DUMMY, PLA_DUMMY,
                                      PLA_DUMMY, &W_BR );
  }

  /* Clean up temporary objects */
  PLA_Obj_free( &a_B1 );    PLA_Obj_free( &A_mv_BR );   
  PLA_Obj_free( &s_cur );  PLA_Obj_free( &beta );   
  PLA_Obj_free( &W_L );    PLA_Obj_free( &w_1 );   PLA_Obj_free( &W_R );
  PLA_Obj_free( &W_BR );    PLA_Obj_free( &w_B1 );
  PLA_Obj_free( &Y_BL );   PLA_Obj_free( &y_B1 );   PLA_Obj_free( &Y_BR );
  PLA_Obj_free( &y_11 ); 
  PLA_Obj_free( &u );    PLA_Obj_free( &u_loc ); 
  PLA_Obj_free( &u_L );    PLA_Obj_free( &u_loc_L ); 
  PLA_Obj_free( &minus_one );
  PLA_Obj_free( &zero );   PLA_Obj_free( &one );

  return PLA_SUCCESS;
}  
コード例 #20
0
int PLA_Symv_enter( int uplo,
                    PLA_Obj alpha, PLA_Obj A, PLA_Obj x,
                    PLA_Obj beta,  PLA_Obj y )
{
    int
    value = PLA_SUCCESS,
    size, length_A, width_A, length_x, width_x, length_y, width_y,
    objtype, proj_onto;
    char
    routine_name[ 35 ] = "PLA_Symv";

    PLA_Routine_stack_push( routine_name );

    PLA_Routine_stack_push( "PLA_Symv_enter" );

    old_size_malloced = PLA_Total_size_malloced( );

    if ( PLA_CHECK_PARAMETERS ) {
        /* Check if uplo parameters are valid */
        if ( !PLA_Valid_uplo_parameter( uplo ) ) {
            PLA_Warning( "Invalid parameter uplo" );
            value--;
        }

        /* Check if alpha is valid multiscalar of size 1x1 */

        if ( alpha == NULL || !PLA_Valid_object( alpha ) ) {
            PLA_Warning( "Invalid object alpha" );
            value--;
        }

        PLA_Obj_objtype( alpha, &objtype );
        if ( objtype != PLA_MSCALAR ) {
            PLA_Warning( "Invalid objtype for alpha" );
            value--;
        }

        PLA_Obj_global_length( alpha, &size );
        if ( size != 1 ) {
            PLA_Warning( "Invalid global length for alpha" );
            value--;
        }

        PLA_Obj_global_width( alpha, &size );
        if ( size != 1 ) {
            PLA_Warning( "Invalid global width for alpha" );
            value--;
        }

        /* Check if A is valid matrix */

        if ( A == NULL || !PLA_Valid_object( A ) ) {
            PLA_Warning( "Invalid object A" );
            value--;
        }

        PLA_Obj_objtype( A, &objtype );
        if ( objtype != PLA_MATRIX ) {
            PLA_Warning( "Invalid objtype for A" );
            value--;
        }

        PLA_Obj_global_length( A, &length_A );
        PLA_Obj_global_width ( A, &width_A );

        if ( length_A != width_A ) {
            PLA_Warning( "A is not square" );
            value--;
        }

        /* Check if x is valid vector */

        if ( x == NULL || !PLA_Valid_object( x ) ) {
            PLA_Warning( "Invalid object x" );
            value--;
        }

        PLA_Obj_objtype( x, &objtype );
        if ( objtype != PLA_MATRIX && objtype != PLA_MVECTOR
                && objtype != PLA_PMVECTOR ) {
            PLA_Warning( "Invalid objtype for x" );
            value--;
        }

        PLA_Obj_project_onto( x, &proj_onto );
        if ( proj_onto == PLA_PROJ_ONTO_COL ) {
            PLA_Obj_global_length( x, &length_x );
            PLA_Obj_global_width( x, &width_x );
        }
        else {
            PLA_Obj_global_width( x, &length_x );
            PLA_Obj_global_length( x, &width_x );
        }

        if ( width_x != 1 ) {
            PLA_Warning( "x is not of width 1" );
            value--;
        }

        /* Check if beta is valid multiscalar of size 1x1 */

        if ( beta == NULL || !PLA_Valid_object( beta ) ) {
            PLA_Warning( "Invalid object beta" );
            value--;
        }

        PLA_Obj_objtype( beta, &objtype );
        if ( objtype != PLA_MSCALAR ) {
            PLA_Warning( "Invalid objtype for beta" );
            value--;
        }

        PLA_Obj_global_length( beta, &size );
        if ( size != 1 ) {
            PLA_Warning( "Invalid global length for beta" );
            value--;
        }

        PLA_Obj_global_width( beta, &size );
        if ( size != 1 ) {
            PLA_Warning( "Invalid global width for beta" );
            value--;
        }

        /* Check if y is valid vector */

        if ( y == NULL || !PLA_Valid_object( y ) ) {
            PLA_Warning( "Invalid object y" );
            value--;
        }

        PLA_Obj_objtype( y, &objtype );
        if ( objtype != PLA_MATRIX && objtype != PLA_MVECTOR
                && objtype != PLA_PMVECTOR ) {
            PLA_Warning( "Invalid objtype for y" );
            value--;
        }

        PLA_Obj_project_onto( y, &proj_onto );
        if ( proj_onto == PLA_PROJ_ONTO_COL ) {
            PLA_Obj_global_length( y, &length_y );
            PLA_Obj_global_width( y, &width_y );
        }
        else {
            PLA_Obj_global_width( y, &length_y );
            PLA_Obj_global_length( y, &width_y );
        }

        if ( width_y != 1 ) {
            PLA_Warning( "y is not of width 1" );
            value--;
        }

        /* Check if dimensions match */

        if ( length_A != length_y ) {
            PLA_Warning( "length of A does not match length of y" );
            value--;
        }

        if ( width_A != length_x ) {
            PLA_Warning( "width of A does not match length of x" );
            value--;
        }
    }

    if ( PLA_CHECK_AGAINST_SEQUENTIAL ) {
        PLA_Mscalar_create_conf_to(
            alpha, PLA_ALL_ROWS, PLA_ALL_COLS, &alpha_cpy );
        PLA_Mscalar_create_conf_to(
            A, PLA_ALL_ROWS, PLA_ALL_COLS, &A_cpy );
        PLA_Mscalar_create_conf_to(
            x, PLA_ALL_ROWS, PLA_ALL_COLS, &x_cpy );
        PLA_Mscalar_create_conf_to(
            beta, PLA_ALL_ROWS, PLA_ALL_COLS, &beta_cpy );
        PLA_Mscalar_create_conf_to(
            y, PLA_ALL_ROWS, PLA_ALL_COLS, &y_cpy );

        PLA_Copy( alpha, alpha_cpy );
        PLA_Copy( A, A_cpy );
        PLA_Copy( x, x_cpy );
        PLA_Copy( beta, beta_cpy );
        if ( PLA_Local_equal_zero( beta_cpy ) )
            PLA_Obj_set_to_zero( y_cpy );
        else
            PLA_Copy( y, y_cpy );
    }

    PLA_Routine_stack_pop( routine_name );

    return value;
}