void main1_() { /* for xpress.com */ static Integer three = (Integer) 3, IONE = (Integer) 1; static Integer IZERO = (Integer) 0; static DoublePrecision DZERO = (DoublePrecision) 0.0e0; Integer index; Integer nocare_, norder_, nonode_, ihost_, ialnod_, ialprc_; Integer me_, host_, nproc_; char range, order; Integer n, ii, me, indx, k, i, jndx, iii; Integer iseed[4]; Integer *mapA, *mapB, *mapZ; Integer *mapvecA, *mapvecB, *mapvecZ; Integer *iscratch; DoublePrecision **iptr; Integer is_size, rsize, ptr_size; Integer nprocs, isize; Integer info; DoublePrecision *scratch, *eval, *dptr; DoublePrecision *diagA, *subdiagA, *diagB, *subdiagB; DoublePrecision *matrixA, *matrixB, *matrixZ; DoublePrecision **vecA, **vecB, **vecZ; DoublePrecision **vecAA, **vecBB, **vecZZ; DoublePrecision res, t_com; DoublePrecision time1, time2; DoublePrecision mxclock_(); #ifdef TIMING extern TIMINGG test_timing; #endif static Integer countlist(); extern void geneig_res(); extern void b_ortho(); extern void tim_com(); extern void mxend_(); extern void mxinit_(), mxtime_(); extern void mxpara_(); extern Integer mxnprc_(); extern Integer mxmynd_(); extern void memreq_(); extern Integer nnodes_(); extern Integer ci_size_(); extern void pdsygv_(); extern DoublePrecision dlarnd_(); extern DoublePrecision dasum_(); extern DoublePrecision fabs(); /* extern char malloc(); */ extern void dspgv2_(); mxinit_(); me = mxmynd_(); nprocs = mxnprc_(); #ifdef TIMING test_timing.choleski = 0.0e0; test_timing.inverse = 0.0e0; test_timing.conjug = 0.0e0; test_timing.householder = 0.0e0; test_timing.pstebz = 0.0e0; test_timing.pstein = 0.0e0; test_timing.mxm5x = 0.0e0; test_timing.mxm25 = 0.0e0; test_timing.pdspevx = 0.0e0; test_timing.pdspgvx = 0.0e0; #endif k = 0; n = 500; diagA = (DoublePrecision *) malloc( n * sizeof(DoublePrecision)); subdiagA = (DoublePrecision *) malloc( n * sizeof(DoublePrecision)); diagB = (DoublePrecision *) malloc( n * sizeof(DoublePrecision)); subdiagB = (DoublePrecision *) malloc( n * sizeof(DoublePrecision)); if( diagA == NULL || subdiagA == NULL || diagB == NULL || subdiagB == NULL ) { fprintf(stderr, " me = %d: ERROR not enough memory for diagA or subdiagA, ...\n", me ); exit(-1); } iscratch = (Integer *) malloc ( (4*n + 100) * sizeof(Integer)); if ((mapA = (Integer *) malloc( n * sizeof(Integer))) == NULL ) { fprintf(stderr, " me = %d: ERROR not enough memory for mapA %d \n", me, n ); exit(-1); } if ((mapB = (Integer *) malloc( n * sizeof(Integer))) == NULL ) { fprintf(stderr, " me = %d: ERROR in memory allocation, not enough memory for mapB \n"); exit(-1); } if ((mapZ = (Integer *) malloc( n * sizeof(Integer))) == NULL ) { fprintf(stderr, " ERROR in memory allocation, not enough memory for mapZ \n"); exit(-1); } /* set the column mapping of processors */ for ( ii = 0; ii < n; ii++ ) { indx = ( ii % nprocs); mapA[ii] = 0; mapB[ii] = 0; } for ( ii = 0 ; ii < n; ii++ ) { indx = ( ii % nprocs); mapZ[ii] = 0; } /* if ( nprocs > 2 ) { mapZ[0] = nprocs-1; for ( ii = 1; ii < n; ii++) { indx = ( ii % (nprocs - 1)); mapZ[ii] = indx; } } else { for ( ii = 0; ii < n; ii++) { indx = ( ii % nprocs ); mapZ[ii] = indx; } } */ for ( i = 0; i < 3; i++ ) iseed[i] = 1; iseed[3] = 2*me*100 + 3; /* for symmetric matrix with this data distribution */ ii = ci_size_( &me, &n, mapA ); if ( ii > 0 ) { if ( (matrixA = (DoublePrecision *) malloc( ii * sizeof(DoublePrecision))) == NULL ) { fprintf(stderr, " me %d ERROR in memory allocation, not enough memory for matrixA memory size = %d \n", me, ii); exit(-1); } } dptr = matrixA; for ( indx = 0; indx < ii; indx++ ) { *( dptr++ ) = 0.0e0; } ii = countlist ( me, mapA, &n ); if ( ii > 0 ) { if ( ( vecA = ( DoublePrecision ** ) malloc ( ii * sizeof(DoublePrecision *))) == NULL ) { fprintf(stderr, "me = %d: ERROR in memory allocation, not enough memory for vecA %d \n", me, ii ); exit(-1); } } else { if ( ( vecA = ( DoublePrecision ** ) malloc ( n * sizeof(DoublePrecision *))) == NULL ) { fprintf(stderr, "me = %d: ERROR in memory allocation, not enough memory for vecA %d \n", me, n ); exit(-1); } } i = 0; dptr = matrixA; for ( indx = 0; indx < n; indx++ ) { if ( mapA[indx] == me ) { vecA[i] = dptr; i++; dptr += ( n - indx); } } i = 0; for ( indx = 0; indx < n; indx++ ){ /* * A is symmetric, tri-diagonal. Set diagA, subdiagA equal * to diagonal and subdiagonal parts of matrix. * diagA and subdiagA are used to compute residual. */ diagA[indx] = 1.0/( indx + 1 ); subdiagA[indx] = -1.0e0; if ( mapA[indx] == me ) { vecA[i][0] = 1.0/( indx + 1 ); if ( indx != (n-1)) vecA[i][1] = -1.0e0; i++; } } subdiagA[0] = 0.0e0; ii = ci_size_( &me, &n, mapB ); if ( (matrixB = (DoublePrecision *) malloc( ii * sizeof(DoublePrecision))) == NULL ) { fprintf(stderr, " me %d ERROR in memory allocation, not enough memory for matrixB \n", me); exit(-1); } zero_out ( ii, matrixB); dptr = matrixB; for ( indx = 0; indx < ii; indx++ ) { *( dptr++ ) = 0.0e0; } ii = countlist ( me, mapB, &n ); if ( ( vecB = ( DoublePrecision ** ) malloc ( ii * sizeof(DoublePrecision *))) == NULL ) { fprintf(stderr, "me = %d: ERROR in memory allocation, not enough memory for vecA \n", me); exit(-1); } i = 0; dptr = matrixB; for ( indx = 0; indx < n; indx++ ) { /* * B is symmetric, tri-diagonal. Set diagB, subdiagB equal * to diagonal and subdiagonal parts of matrix. * diagB and subdiagB are used to compute residual. */ diagB[indx] = 20.0e0; subdiagB[indx] = -1.0e0; if ( mapB[indx] == me ) { /* column */ vecB[i] = dptr; vecB[i][0] = 20.0e0; if ( indx != ( n-1)) vecB[i][1]= -1.0e0; dptr += ( n-indx); i++; } } subdiagB[0] = 0.0e0; /* use the utility routine count_list to determine the number of columns of Z that are stored on this processor using the above distribution */ ii = countlist ( me, mapZ, &n ); if ( ( vecZ = ( DoublePrecision ** ) malloc ( ii * sizeof(DoublePrecision *))) == NULL ) { fprintf(stderr, "me = %d: ERROR in memory allocation, not enough memory for vecA allocation = %d \n", me, ii); exit(-1); } if ( (matrixZ = (DoublePrecision *) malloc( ii * n * sizeof(DoublePrecision))) == NULL ) { fprintf(stderr, "me = %d: ERROR in memory allocation, not enough memory for matrixZ \n", me); exit(-1); } dptr = matrixZ; i = ii*n; zero_out( i, matrixZ ); dptr = matrixZ; k = 0; for ( i = 0; i < ii; i++ ) { vecZ[i] = dptr; dptr += n; } if ( (eval = (DoublePrecision *) malloc( n * sizeof(DoublePrecision ))) == NULL ) { fprintf(stderr, "me = %d: ERROR in memory allocation, not enough memory for eigenvalue space \n", me); exit(-1); } index = 0; /* * fprintf(stderr, "me = %d: just before memreq \n", me); */ rsize = 0; isize = 0; ptr_size = 0; /* for ( iii = 0; iii < n; iii++ ) fprintf(stderr, " me = %ld iii = %ld mapA = %ld mapB = %ld mapZ = %ld \n", me, iii, mapA[iii], mapB[iii], mapZ[iii]); */ memreq_( &index, &n, mapA, mapB, mapZ, &isize, &rsize, &ptr_size, iscratch ); /* * fprintf(stderr, "me = %d: just after memreq isize = %d rsize = %d ptr_size %d \n", me, isize, rsize, ptr_size); */ free(iscratch); if ( (iscratch = (Integer *) malloc( 2*isize * sizeof(Integer))) == NULL ) { fprintf(stderr, " me = %d ERROR in memory allocation, not enough memory for integer scratch space \n", me); exit(-1); } rsize = 2 * rsize; if ( (scratch = (DoublePrecision *) malloc( rsize * sizeof(DoublePrecision))) == NULL ) { fprintf(stderr, " me %d ERROR in memory allocation, not enough memory for DoublePrecision scratch space \n", me); exit(-1); } if ( (iptr = (DoublePrecision **) malloc( 2*ptr_size * sizeof(DoublePrecision *))) == NULL ) { fprintf(stderr, " me %d ERROR in memory allocation, not enough memory for pointer scratch space \n", me); exit(-1); } mxsync_(); if( me == 0 ) fprintf(stderr, " geneig_la \n" ); #ifdef TIMING mxsync_(); time1 = mxclock_(); #endif time1 = mxclock_(); /* * indx = 1; * for ( iii = 0; iii < 1; iii++ ){ * mxtime_( &IZERO, &t_com ); * pdspgv ( &indx, &n, vecA, mapA, vecB, mapB, vecZ, mapZ, eval, iscratch, * &isize, iptr, &ptr_size ,scratch, &rsize, &info); * } */ indx = 1; range = 'V'; order = 'L'; dspgv2_( &indx, &range, &order, &n, matrixA, matrixB, eval, matrixZ, &n, scratch, iscratch, &info); fflush(stdout); #ifdef TIMING mxsync_(); test_timing.pdspgvx = mxclock_() - time1; mxtime_( &IONE, &t_com ); ii = 0; if ( n < 30 ){ if ( info == 0 ) { for ( k = 0; k < n; k++ ) { if ( mapZ[k] == me ) { *scratch = dasum_( &n , vecZ[ii], &IONE ); ii++; } } } } if (me == 0 ){ fprintf(stderr, " n = %d nprocs = %d \n", n, nprocs); fprintf(stderr, " pdspgvx = %f \n", test_timing.pdspgvx); } #endif geneig_res( &n, diagA, subdiagA, diagB, subdiagB, vecZ, mapZ, eval, iscratch, scratch, &res, &info); if (me == 0 ) fprintf(stderr, " A Z - D B Z residual = %g \n", res); i = 0; for ( indx = 0; indx < n; indx++ ) { if ( mapB[indx] == me ) { ii = n-indx; zero_out( ii, vecB[i] ); vecB[i][0] = 20.0e0; if ( indx != ( n-1)) vecB[i][1]= -1.0e0; i++; } } mxsync_(); b_ortho( &n, vecB, mapB, &n, vecZ, mapZ, iptr, iscratch, scratch, &res, &info); if( me == 0 ) fprintf(stderr, " Z' B Z - I residual = %g \n", res); ii = 0; if ( n < 30 ){ if ( info == 0 ) { for ( k = 0; k < n; k++ ) { if ( mapZ[k] == me ) { *scratch = dasum_( &n , vecZ[ii], &IONE ); ii++; } } } } free(iptr); free(scratch); free(iscratch); free(eval); free(matrixZ); free(vecZ); free(vecB); free(matrixB); free(vecA); free(matrixA); free(mapZ); free(mapB); free(mapA); return; /* mxpend_(); */ }
void main1_() #endif { /* for xpress.com */ static Integer IZERO = (Integer) 0; Integer index, icounter; Integer n, ii, me, indx, k, i, neleZ, neleA; Integer *mapA, mapB[1], *mapZ; Integer *iscratch; DoublePrecision **iptr; DoublePrecision *dd, *ee; Integer rsize, ptr_size; Integer nprocs, isize; Integer info, m; DoublePrecision *scratch, *eval, *dptr; DoublePrecision *matrixA, *matrixZ; DoublePrecision **vecA, **vecZ; DoublePrecision res, t_com; #ifdef TIMING static Integer IONE = (Integer) 1; DoublePrecision time1, time2, timex; extern TIMINGG test_timing; #endif extern void tim_com(); extern void mxend_(); extern void mxinit_(), mxtime_(); extern DoublePrecision mxclock_(); extern void mxpara_(); extern Integer mxnprc_(); extern Integer mxmynd_(); extern void memreq_(); extern Integer ci_size_(); extern DoublePrecision dasum_(); extern void pdspev(); extern void tresid(), ortho(); mxinit_(); me = mxmynd_(); nprocs = mxnprc_(); #ifdef TIMING test_timing.choleski = 0.0e0; test_timing.inverse = 0.0e0; test_timing.conjug = 0.0e0; test_timing.householder = 0.0e0; test_timing.pstebz = 0.0e0; test_timing.pstein = 0.0e0; test_timing.mxm5x = 0.0e0; test_timing.mxm25 = 0.0e0; test_timing.pdspevx = 0.0e0; test_timing.pdspgvx = 0.0e0; #endif /* while (1) { */ icounter = 20; m = 20; while(1) { n = 2*icounter + 1; nprocs = mxnprc_(); printf(" n = %d nprocs = %d \n", n, nprocs); if ((dd = (DoublePrecision *) malloc( n * sizeof(DoublePrecision))) == NULL ) { fprintf(stderr, " me = %d: ERROR in memory allocation, not enough memory for dd %d \n", me, n ); exit(-1); } if ((ee = (DoublePrecision *) malloc( n * sizeof(DoublePrecision))) == NULL ) { fprintf(stderr, " me = %d: ERROR in memory allocation, not enough memory for ee %d \n", me, n ); exit(-1); } if ((mapA = (Integer *) malloc( n * sizeof(Integer))) == NULL ) { fprintf(stderr, " me = %d: ERROR in memory allocation, not enough memory for mapA %d \n", me, n ); exit(-1); } if ((mapZ = (Integer *) malloc( n * sizeof(Integer))) == NULL ) { fprintf(stderr, " ERROR in memory allocation, not enough memory for mapZ \n"); exit(-1); } /* set the column mapping of processors */ for ( ii = 0; ii < n; ii++ ) { indx = ( ii % nprocs); mapA[ii] = indx; } for ( ii = 0 ; ii < n; ii++ ) { indx = ( ii % nprocs); mapZ[ii] = indx; } /* for symmetric matrix with this data distribution */ ii = ci_size_( &me, &n, mapA ); neleA = ii; if ( ii > 0 ) { if ( (matrixA = (DoublePrecision *) malloc( ii * sizeof(DoublePrecision))) == NULL ) { fprintf(stderr, " me %d ERROR in memory allocation, not enough memory for matrixA memory size = %d \n", me, ii); exit(-1); } } ii = countlist ( me, mapA, &n ); if ( ii > 0 ) { if ( ( vecA = ( DoublePrecision ** ) malloc ( ii * sizeof(DoublePrecision *))) == NULL ) { fprintf(stderr, "me = %d: ERROR in memory allocation, not enough memory for vecA %d \n", me, ii ); exit(-1); } } else { if ( ( vecA = ( DoublePrecision ** ) malloc ( n * sizeof(DoublePrecision *))) == NULL ) { fprintf(stderr, "me = %d: ERROR in memory allocation, not enough memory for vecA %d \n", me, n ); exit(-1); } } i = 0; dptr = matrixA; for ( indx = 0; indx < n; indx++ ) { if ( mapA[indx] == me ) { vecA[i] = dptr; i++; dptr += ( n - indx); } } /* wilkinson's matrix */ ee[0] = 0.e0; for ( indx = 1; indx < n; indx++) ee[indx] = 1.0e0; /* ee[indx] = 1.e0; */ i = 0; for ( indx = 0; indx < m; indx++){ dd[indx] = (DoublePrecision) ( m-indx ); if ( mapA[indx] == me ){ vecA[i][0] = (DoublePrecision) ( m-indx ); vecA[i][1] = 1.; i++; } } dd[m] = 0.e0; if ( mapA[m] == me ) { vecA[i][0] = 0.; vecA[i][1] = 1.; i++; } for ( indx = m+1; indx < n; indx++){ dd[indx] = (DoublePrecision) indx-m; if ( mapA[indx] == me ){ vecA[i][0] = (DoublePrecision) indx-m; if ( indx != n-1) vecA[i][1] = 1; i++; } } /* use the utility routine count_list to determine the number of columns of Z that are stored on this processor using the cve distribution */ ii = countlist ( me, mapZ, &n ); if ( ( vecZ = ( DoublePrecision ** ) malloc ( ii * sizeof(DoublePrecision *))) == NULL ) { fprintf(stderr, "me = %d: ERROR in memory allocation, not enough memory for vecA allocation = %d \n", me, ii); exit(-1); } if ( (matrixZ = (DoublePrecision *) malloc( ii * n * sizeof(DoublePrecision))) == NULL ) { fprintf(stderr, "me = %d: ERROR in memory allocation, not enough memory for matrixZ \n", me); exit(-1); } neleZ = ii*n; dptr = matrixZ; k = 0; for ( i = 0; i < ii; i++ ) { vecZ[i] = dptr; dptr += n; } if ( (eval = (DoublePrecision *) malloc( n * sizeof(DoublePrecision ))) == NULL ) { fprintf(stderr, "me = %d: ERROR in memory allocation, not enough memory for eigenvalue space \n", me); exit(-1); } index = 1; /* fprintf(stderr, "me = %d: just before memreq \n", me); */ rsize = 0; isize = 0; ptr_size = 0; iscratch = ( Integer *) malloc( 6*n*sizeof(Integer)); memreq_( &index, &n, mapA, mapB, mapZ, &isize, &rsize, &ptr_size, iscratch ); /* fprintf(stderr, "me = %d: just after memreq isize = %d rsize = %d ptr_size %d \n", me, isize, rsize, ptr_size); */ free(iscratch); if ( (iscratch = (Integer *) malloc( 4* isize * sizeof(Integer))) == NULL ) { fprintf(stderr, " me = %d ERROR in memory allocation, not enough memory for integer scratch space \n", me); exit(-1); } if ( (scratch = (DoublePrecision *) malloc( 4*rsize * sizeof(DoublePrecision))) == NULL ) { fprintf(stderr, " me %d ERROR in memory allocation, not enough memory for DoublePrecision scratch space \n", me); exit(-1); } if ( (iptr = (DoublePrecision **) malloc( 4*ptr_size * sizeof(DoublePrecision *))) == NULL ) { fprintf(stderr, " me %d ERROR in memory allocation, not enough memory for pointer scratch space \n", me); exit(-1); } if( me == 0 ) fprintf(stderr, " Wilkinson \n" ); for ( ii = 0; ii < 1; ii++ ) { /* set data modified by pdspevx */ zero_out( neleZ, matrixZ ); zero_out( neleA, matrixA ); for ( k = 0; k < n; k++ ) { indx = ( k % nprocs); mapZ[k] = indx; } k = 0; for ( indx = 0; indx < n; indx++ ){ if ( me == mapA[indx] ) { vecA[k][0] = dd[indx]; if ( indx != n-1 ) vecA[k][1] = ee[indx+1]; k++; } } #ifdef TIMING time1 = mxclock_(); mxsync_(); time1 = mxclock_(); #endif mxtime_( &IZERO, &t_com ); pdspev( &n, vecA, mapA, vecZ, mapZ, eval, iscratch, &isize, iptr, &ptr_size ,scratch, &rsize, &info); if ( me == 0 ) for ( k = 0; k < n; k++ ) printf(" driver wilk k = %d eval %f \n", k, eval[k]); mxsync_(); #ifdef TIMING timex = mxclock_(); mxtime_( &IONE, &t_com ); if( ii == 0 ) time2 = timex - time1; #endif if (!NO_EVEC){ tresid( &n, &n, dd, ee, vecZ, mapZ, eval, iscratch, scratch, &res, &info); if( me == 0 ) fprintf(stderr, " iteration # %d : A Z - Z D residual = %g \n", ii, res); ortho( &n, &n, vecZ, mapZ, iptr, iscratch, scratch, &res, &info); if( me == 0 ) fprintf(stderr, " iteration # %d : Z' Z - I residual = %g \n", ii, res); } } #ifdef TIMING test_timing.pdspevx = timex - time1; if (!NO_EVEC){ ii = 0; if ( info == 0 ) { for ( k = 0; k < n; k++ ) { if ( mapZ[k] == me ) { *scratch = dasum_( &n , vecZ[ii], &IONE ); ii++; } } } } if (me == 0 ){ fprintf(stderr, " n = %d nprocs = %d \n", n, nprocs); fprintf(stderr, " time1 = %f \n", time2); fprintf(stderr, " pdspgvx = %f \n", test_timing.pdspgvx); fprintf(stderr, " pdspevx = %f \n", test_timing.pdspevx); fprintf(stderr, " choleski = %f \n", test_timing.choleski); fprintf(stderr, " inverse = %f \n", test_timing.inverse); fprintf(stderr, " conjug = %f \n", test_timing.conjug); fprintf(stderr, " householder = %f \n", test_timing.householder); fprintf(stderr, " mxm5x = %f \n", test_timing.mxm5x); fprintf(stderr, " mxm25 = %f \n", test_timing.mxm25); fprintf(stderr, " pstein = %f \n", test_timing.pstein); fprintf(stderr, " pstebz = %f \n", test_timing.pstebz); } /* Compute and print commmunication time */ tim_com( test_timing.pdspevx, t_com, iscratch, scratch ); #endif free(iptr); free(scratch); free(iscratch); free(eval); free(matrixZ); free(vecZ); free(vecA); free(matrixA); free(mapZ); free(mapA); icounter+=abs(random() % 31 ); } return; }