int main ( int argc, char * argv[]) { SCOTCH_Num vertnbr; /* Number of vertices */ SCOTCH_Graph grafdat; /* Source graph */ SCOTCH_Ordering ordedat; /* Graph ordering */ SCOTCH_Num * permtab; /* Permutation array */ SCOTCH_Strat stradat; /* Ordering strategy */ SCOTCH_Num straval; char * straptr; int flagval; Clock runtime[2]; /* Timing variables */ int i, j; errorProg ("gord"); if ((argc >= 2) && (argv[1][0] == '?')) { /* If need for help */ usagePrint (stdout, C_usageList); return (0); } flagval = C_FLAGNONE; /* Default behavior */ straval = 0; /* No strategy flags */ straptr = NULL; SCOTCH_stratInit (&stradat); for (i = 0; i < C_FILENBR; i ++) /* Set default stream pointers */ C_fileTab[i].pntr = (C_fileTab[i].mode[0] == 'r') ? stdin : stdout; for (i = 1; i < argc; i ++) { /* Loop for all option codes */ if ((argv[i][0] != '-') || (argv[i][1] == '\0') || (argv[i][1] == '.')) { /* If found a file name */ if (C_fileNum < C_FILEARGNBR) /* File name has been given */ C_fileTab[C_fileNum ++].name = argv[i]; else errorPrint ("main: too many file names given"); } else { /* If found an option name */ switch (argv[i][1]) { case 'C' : case 'c' : /* Strategy selection parameters */ for (j = 2; argv[i][j] != '\0'; j ++) { switch (argv[i][j]) { case 'B' : case 'b' : straval |= SCOTCH_STRATBALANCE; break; case 'Q' : case 'q' : straval |= SCOTCH_STRATQUALITY; break; case 'S' : case 's' : straval |= SCOTCH_STRATSPEED; break; case 'T' : case 't' : straval |= SCOTCH_STRATSAFETY; break; default : errorPrint ("main: invalid strategy selection option '%c'", argv[i][j]); } } break; case 'H' : /* Give the usage message */ case 'h' : usagePrint (stdout, C_usageList); return (0); case 'M' : /* Output separator mapping */ case 'm' : flagval |= C_FLAGMAPOUT; if (argv[i][2] != '\0') C_filenamemapout = &argv[i][2]; break; case 'O' : /* Ordering strategy */ case 'o' : straptr = &argv[i][2]; SCOTCH_stratExit (&stradat); SCOTCH_stratInit (&stradat); SCOTCH_stratGraphOrder (&stradat, straptr); break; case 'T' : /* Output separator tree */ case 't' : flagval |= C_FLAGTREOUT; if (argv[i][2] != '\0') C_filenametreout = &argv[i][2]; break; case 'V' : fprintf (stderr, "gord, version " SCOTCH_VERSION_STRING "\n"); fprintf (stderr, "Copyright 2004,2007,2008,2010-2012 IPB, Universite de Bordeaux, INRIA & CNRS, France\n"); fprintf (stderr, "This software is libre/free software under CeCILL-C -- see the user's manual for more information\n"); return (0); case 'v' : /* Output control info */ for (j = 2; argv[i][j] != '\0'; j ++) { switch (argv[i][j]) { case 'S' : case 's' : flagval |= C_FLAGVERBSTR; break; case 'T' : case 't' : flagval |= C_FLAGVERBTIM; break; default : errorPrint ("main: unprocessed parameter '%c' in '%s'", argv[i][j], argv[i]); } } break; default : errorPrint ("main: unprocessed option '%s'", argv[i]); } } } fileBlockOpen (C_fileTab, C_FILENBR); /* Open all files */ clockInit (&runtime[0]); clockStart (&runtime[0]); SCOTCH_graphInit (&grafdat); /* Create graph structure */ SCOTCH_graphLoad (&grafdat, C_filepntrsrcinp, -1, 2); /* Read source graph */ SCOTCH_graphSize (&grafdat, &vertnbr, NULL); /* Get graph characteristics */ if (straval != 0) { if (straptr != NULL) errorPrint ("main: options '-c' and '-o' are exclusive"); SCOTCH_stratGraphOrderBuild (&stradat, straval, 0, 0.2); } clockStop (&runtime[0]); /* Get input time */ clockInit (&runtime[1]); clockStart (&runtime[1]); if ((permtab = (SCOTCH_Num *) memAlloc (vertnbr * sizeof (SCOTCH_Num))) == NULL) { errorPrint ("main: out of memory"); return (1); } SCOTCH_graphOrderInit (&grafdat, &ordedat, permtab, NULL, NULL, NULL, NULL); /* Create ordering */ SCOTCH_graphOrderCompute (&grafdat, &ordedat, &stradat); /* Perform ordering */ clockStop (&runtime[1]); /* Get ordering time */ #ifdef SCOTCH_DEBUG_ALL if (SCOTCH_graphOrderCheck (&grafdat, &ordedat) != 0) return (1); #endif /* SCOTCH_DEBUG_ALL */ clockStart (&runtime[0]); SCOTCH_graphOrderSave (&grafdat, &ordedat, C_filepntrordout); /* Write ordering */ if (flagval & C_FLAGMAPOUT) /* If mapping wanted */ SCOTCH_graphOrderSaveMap (&grafdat, &ordedat, C_filepntrmapout); /* Write mapping */ if (flagval & C_FLAGTREOUT) /* If separator tree wanted */ SCOTCH_graphOrderSaveTree (&grafdat, &ordedat, C_filepntrtreout); /* Write tree */ clockStop (&runtime[0]); /* Get output time */ if (flagval & C_FLAGVERBSTR) { fprintf (C_filepntrlogout, "S\tStrat="); SCOTCH_stratSave (&stradat, C_filepntrlogout); putc ('\n', C_filepntrlogout); } if (flagval & C_FLAGVERBTIM) { fprintf (C_filepntrlogout, "T\tOrder\t\t%g\nT\tI/O\t\t%g\nT\tTotal\t\t%g\n", (double) clockVal (&runtime[1]), (double) clockVal (&runtime[0]), (double) clockVal (&runtime[0]) + (double) clockVal (&runtime[1])); } fileBlockClose (C_fileTab, C_FILENBR); /* Always close explicitely to end eventual (un)compression tasks */ SCOTCH_graphOrderExit (&grafdat, &ordedat); SCOTCH_stratExit (&stradat); SCOTCH_graphExit (&grafdat); memFree (permtab); #ifdef COMMON_PTHREAD pthread_exit ((void *) 0); /* Allow potential (un)compression tasks to complete */ #endif /* COMMON_PTHREAD */ return (0); }
void kass(int levelk, int rat, SymbolMatrix * symbptr, PASTIX_INT baseval, PASTIX_INT vertnbr, PASTIX_INT edgenbr, PASTIX_INT * verttab, PASTIX_INT * edgetab, Order * orderptr, MPI_Comm pastix_comm) { PASTIX_INT snodenbr; PASTIX_INT *snodetab = NULL; PASTIX_INT *treetab = NULL; PASTIX_INT *ia = NULL; PASTIX_INT *ja = NULL; PASTIX_INT i, j, n; PASTIX_INT ind; csptr mat; PASTIX_INT *tmpj = NULL; PASTIX_INT *perm = NULL; PASTIX_INT *iperm = NULL; PASTIX_INT newcblknbr; PASTIX_INT *newrangtab = NULL; Dof dofstr; Clock timer1; double nnzS; int procnum; (void)edgenbr; MPI_Comm_rank(pastix_comm,&procnum); #ifdef DEBUG_KASS print_one("--- kass begin ---\n"); #endif /* graphData (graphptr, */ /* (SCOTCH_Num * )&baseval, */ /* (SCOTCH_Num * )&vertnbr, */ /* (SCOTCH_Num **)&verttab, */ /* NULL, NULL, NULL, */ /* (SCOTCH_Num * )&edgenbr, */ /* (SCOTCH_Num **)&edgetab, */ /* NULL); */ n = vertnbr; ia = verttab; ja = edgetab; perm = orderptr->permtab; iperm = orderptr->peritab; /*** Convert Fortran to C numbering ***/ if(baseval == 1) { for(i=0;i<=n;i++) ia[i]--; for(i=0;i<n;i++) for(j=ia[i];j<ia[i+1];j++) ja[j]--; for(i=0;i<n;i++) orderptr->permtab[i]--; for(i=0;i<n;i++) orderptr->peritab[i]--; } MALLOC_INTERN(treetab, n, PASTIX_INT); #ifndef SCOTCH_SNODE /*if(rat != -1 )*/ { /***** FIND THE SUPERNODE PARTITION FROM SCRATCH ********/ /*** Find the supernodes of the direct factorization ***/ MALLOC_INTERN(snodetab, n+1, PASTIX_INT); clockInit(&timer1); clockStart(&timer1); find_supernodes(n, ia, ja, perm, iperm, &snodenbr, snodetab, treetab); clockStop(&timer1); print_one("Time to find the supernode (direct) %.3g s \n", clockVal(&timer1)); /*memfree(treetab);*/ print_one("Number of supernode for direct factorization %ld \n", (long)snodenbr); } #else /*else*/ { /***** USE THE SUPERNODE PARTITION OF SCOTCH ********/ snodenbr = orderptr->cblknbr; MALLOC_INTERN(snodetab, n+1, PASTIX_INT); memCpy(snodetab, orderptr->rangtab, sizeof(PASTIX_INT)*(snodenbr+1)); print_one("Number of column block found in scotch (direct) %ld \n", (long)snodenbr); } #endif /****************************************/ /* Convert the graph */ /****************************************/ MALLOC_INTERN(mat, 1, struct SparRow); initCS(mat, n); MALLOC_INTERN(tmpj, n, PASTIX_INT); /**** Convert and permute the matrix in sparrow form ****/ /**** The diagonal is not present in the CSR matrix, we have to put it in the matrix ***/ bzero(tmpj, sizeof(PASTIX_INT)*n); for(i=0;i<n;i++) { /*** THE GRAPH DOES NOT CONTAIN THE DIAGONAL WE ADD IT ***/ tmpj[0] = i; ind = 1; for(j=ia[i];j<ia[i+1];j++) tmpj[ind++] = ja[j]; mat->nnzrow[i] = ind; MALLOC_INTERN(mat->ja[i], ind, PASTIX_INT); memCpy(mat->ja[i], tmpj, sizeof(PASTIX_INT)*ind); mat->ma[i] = NULL; } CS_Perm(mat, perm); /*** Reorder the matrix ***/ sort_row(mat); memFree(tmpj); /***** COMPUTE THE SYMBOL MATRIX OF ILU(K) WITH AMALGAMATION *****/ kass_symbol(mat, levelk, (double)(rat)/100.0, perm, iperm, snodenbr, snodetab, treetab, &newcblknbr, &newrangtab, symbptr, pastix_comm); cleanCS(mat); memFree(mat); memFree(treetab); dofInit(&dofstr); dofConstant(&dofstr, 0, symbptr->nodenbr, 1); nnzS = recursive_sum(0, symbptr->cblknbr-1, nnz, symbptr, &dofstr); print_one("Number of non zero in the non patched symbol matrix = %g, fillrate1 %.3g \n", nnzS+n, (nnzS+n)/(ia[n]/2.0 +n)); dofExit(&dofstr); if(symbolCheck(symbptr) != 0) { errorPrint("SymbolCheck after kass_symbol."); ASSERT(0, MOD_KASS); } if(levelk != -1) { /********************************************************/ /** ADD BLOCKS IN ORDER TO GET A REAL ELIMINATION TREE **/ /********************************************************/ Patch_SymbolMatrix(symbptr); } dofInit(&dofstr); dofConstant(&dofstr, 0, symbptr->nodenbr, 1); nnzS = recursive_sum(0, symbptr->cblknbr-1, nnz, symbptr, &dofstr); dofExit(&dofstr); print_one("Number of block in final symbol matrix = %ld \n", (long)symbptr->bloknbr); print_one("Number of non zero in final symbol matrix = %g, fillrate2 %.3g \n", nnzS+n, (nnzS+n)/(ia[n]/2.0 +n)); if(symbolCheck(symbptr) != 0) { errorPrint("SymbolCheck after Patch_SymbolMatrix."); ASSERT(0, MOD_KASS); } #ifdef DEBUG_KASS print_one("--- kass end ---\n"); #endif memFree(snodetab); orderptr->cblknbr = newcblknbr; memFree(orderptr->rangtab); orderptr->rangtab = newrangtab; }
void kass_symbol(csptr mat, PASTIX_INT levelk, double rat, PASTIX_INT *perm, PASTIX_INT *iperm, PASTIX_INT snodenbr, PASTIX_INT *snodetab, PASTIX_INT *streetab, PASTIX_INT *cblknbr, PASTIX_INT **rangtab, SymbolMatrix *symbmtx, MPI_Comm pastix_comm) { /**************************************************************************************/ /* This function computes a symbolic factorization ILU(k) given a CSR matrix and an */ /* ordering. Then it computes a block partition of the factor to get BLAS3 */ /* efficiency */ /* NOTE: the CSC matrix is given symmetrized and without the diagonal */ /**************************************************************************************/ PASTIX_INT i, j; PASTIX_INT nnzL; PASTIX_INT *iperm2 = NULL; PASTIX_INT *treetab = NULL; PASTIX_INT n; csptr P; Clock timer1; int procnum; MPI_Comm_rank(pastix_comm,&procnum); n = mat->n; MALLOC_INTERN(iperm2, n, PASTIX_INT); /*compact_graph(mat, NULL, NULL, NULL);*/ /*** Compute the ILU(k) pattern of the quotient matrix ***/ MALLOC_INTERN(P, 1, struct SparRow); initCS(P, n); print_one("Level of fill = %ld\nAmalgamation ratio = %d \n", (long)levelk, (int)(rat*100)); clockInit(&timer1); clockStart(&timer1); if(levelk == -1) { /***** FACTORISATION DIRECT *******/ /***** (Re)compute also the streetab (usefull when SCOTCH_SNODE is active) ***/ SF_Direct(mat, snodenbr, snodetab, streetab, P); clockStop(&timer1); print_one("Time to compute scalar symbolic direct factorization %.3g s \n", clockVal(&timer1)); #ifdef DEBUG_KASS print_one("non-zeros in P = %ld \n", (long)CSnnz(P)); #endif nnzL = 0; for(i=0;i<P->n;i++) { PASTIX_INT ncol; ncol = snodetab[i+1]-snodetab[i]; nnzL += (ncol*(ncol+1))/2; #ifdef DEBUG_KASS ASSERT(P->nnzrow[i] >= ncol, MOD_KASS); if(P->nnzrow[i] >= n) fprintf(stderr,"P->nnzrow[%ld] = %ld \n", (long)i, (long)P->nnzrow[i]); ASSERT(P->nnzrow[i] < n, MOD_KASS); #endif nnzL += (P->nnzrow[i]-ncol)*ncol; } #ifdef DEBUG_KASS print_one("NNZL = %ld \n", (long)nnzL); #endif } else { /***** FACTORISATION INCOMPLETE *******/ nnzL = SF_level(2, mat, levelk, P); clockStop(&timer1); print_one("Time to compute scalar symbolic factorization of ILU(%ld) %.3g s \n", (long)levelk, clockVal(&timer1)); } print_one("Scalar nnza = %ld nnzlk = %ld, fillrate0 = %.3g \n", (long)( CSnnz(mat) + n)/2, (long)nnzL, (double)nnzL/(double)( (CSnnz(mat)+n)/2.0 )); /** Sort the rows of the symbolic matrix */ sort_row(P); clockInit(&timer1); clockStart(&timer1); if(levelk != -1) { /********************************/ /** Compute the "k-supernodes" **/ /********************************/ #ifdef KS assert(levelk >= 0); KSupernodes(P, rat, snodenbr, snodetab, cblknbr, rangtab); #else #ifdef SCOTCH_SNODE if(rat == -1) assert(0); /** do not have treetab with this version of Scotch **/ #endif MALLOC_INTERN(treetab, P->n, PASTIX_INT); for(j=0;j<snodenbr;j++) { for(i=snodetab[j];i<snodetab[j+1]-1;i++) treetab[i] = i+1; /*** Version generale ****/ if(streetab[j] == -1 || streetab[j] == j) treetab[i] = -1; else treetab[i]=snodetab[streetab[j]]; /*** Version restricted inside the supernode (like KSupernodes) ***/ /*treetab[snodetab[j+1]-1] = -1;*/ /** this should give the same results than KSupernodes **/ } /** NEW ILUK + DIRECT **/ amalgamate(rat, P, -1, NULL, treetab, cblknbr, rangtab, iperm2, pastix_comm); memFree(treetab); for(i=0;i<n;i++) iperm2[i] = iperm[iperm2[i]]; memcpy(iperm, iperm2, sizeof(PASTIX_INT)*n); for(i=0;i<n;i++) perm[iperm[i]] = i; #endif } else{ /*if(0)*/ { amalgamate(rat, P, snodenbr, snodetab, streetab, cblknbr, rangtab, iperm2, pastix_comm); /** iperm2 is the iperm vector of P **/ for(i=0;i<n;i++) iperm2[i] = iperm[iperm2[i]]; memcpy(iperm, iperm2, sizeof(PASTIX_INT)*n); for(i=0;i<n;i++) perm[iperm[i]] = i; } /*else { fprintf(stderr, "RAT = 0 SKIP amalgamation \n"); *cblknbr = snodenbr; MALLOC_INTERN(*rangtab, snodenbr+1, PASTIX_INT); memcpy(*rangtab, snodetab, sizeof(PASTIX_INT)*(snodenbr+1)); }*/ } clockStop(&timer1); print_one("Time to compute the amalgamation of supernodes %.3g s\n", clockVal(&timer1)); print_one("Number of cblk in the amalgamated symbol matrix = %ld \n", (long)*cblknbr); Build_SymbolMatrix(P, *cblknbr, *rangtab, symbmtx); print_one("Number of block in the non patched symbol matrix = %ld \n", (long)symbmtx->bloknbr); memFree(iperm2); cleanCS(P); memFree(P); }
int main ( int argc, char * argv[]) { SCOTCH_Dgraph grafdat; SCOTCH_Dordering ordedat; SCOTCH_Strat stradat; SCOTCH_Num straval; char * straptr; int flagval; int procglbnbr; int proclocnum; int protglbnum; /* Root process */ Clock runtime[2]; /* Timing variables */ double reduloctab[12]; /* 3 * (min, max, sum) */ double reduglbtab[12]; MPI_Datatype redutype; MPI_Op reduop; int i, j; #ifdef SCOTCH_PTHREAD int thrdlvlreqval; int thrdlvlproval; #endif /* SCOTCH_PTHREAD */ errorProg ("dgord"); #ifdef SCOTCH_PTHREAD thrdlvlreqval = MPI_THREAD_MULTIPLE; if (MPI_Init_thread (&argc, &argv, thrdlvlreqval, &thrdlvlproval) != MPI_SUCCESS) errorPrint ("main: Cannot initialize (1)"); if (thrdlvlreqval > thrdlvlproval) errorPrint ("main: MPI implementation is not thread-safe: recompile without SCOTCH_PTHREAD"); #else /* SCOTCH_PTHREAD */ if (MPI_Init (&argc, &argv) != MPI_SUCCESS) errorPrint ("main: Cannot initialize (2)"); #endif /* SCOTCH_PTHREAD */ MPI_Comm_size (MPI_COMM_WORLD, &procglbnbr); /* Get communicator data */ MPI_Comm_rank (MPI_COMM_WORLD, &proclocnum); protglbnum = 0; /* Assume root process is process 0 */ if ((argc >= 2) && (argv[1][0] == '?')) { /* If need for help */ usagePrint (stdout, C_usageList); return (0); } SCOTCH_randomProc (proclocnum); /* Record process number to initialize pseudo-random seed */ flagval = C_FLAGNONE; /* Default behavior */ straval = 0; /* No strategy flags */ straptr = NULL; SCOTCH_stratInit (&stradat); fileBlockInit (C_fileTab, C_FILENBR); /* Set default stream pointers */ for (i = 1; i < argc; i ++) { /* Loop for all option codes */ if ((argv[i][0] != '-') || (argv[i][1] == '\0') || (argv[i][1] == '.')) { /* If found a file name */ if (C_fileNum < C_FILEARGNBR) /* File name has been given */ fileBlockName (C_fileTab, C_fileNum ++) = argv[i]; else errorPrint ("main: too many file names given"); } else { /* If found an option name */ switch (argv[i][1]) { case 'B' : case 'b' : flagval |= C_FLAGBLOCK; break; case 'C' : case 'c' : /* Strategy selection parameters */ for (j = 2; argv[i][j] != '\0'; j ++) { switch (argv[i][j]) { case 'B' : case 'b' : straval |= SCOTCH_STRATBALANCE; break; case 'Q' : case 'q' : straval |= SCOTCH_STRATQUALITY; break; case 'S' : case 's' : straval |= SCOTCH_STRATSPEED; break; case 'T' : case 't' : straval |= SCOTCH_STRATSAFETY; break; case 'X' : case 'x' : straval |= SCOTCH_STRATSCALABILITY; break; default : errorPrint ("main: invalid strategy selection option '%c'", argv[i][j]); } } break; #ifdef SCOTCH_DEBUG_ALL case 'D' : case 'd' : flagval |= C_FLAGDEBUG; break; #endif /* SCOTCH_DEBUG_ALL */ case 'H' : /* Give the usage message */ case 'h' : usagePrint (stdout, C_usageList); return (0); case 'M' : /* Output separator mapping */ case 'm' : flagval |= C_FLAGMAPOUT; if (argv[i][2] != '\0') C_filenamemapout = &argv[i][2]; break; case 'O' : /* Ordering strategy */ case 'o' : straptr = &argv[i][2]; SCOTCH_stratExit (&stradat); SCOTCH_stratInit (&stradat); SCOTCH_stratDgraphOrder (&stradat, straptr); break; case 'R' : /* Root process (if necessary) */ case 'r' : protglbnum = atoi (&argv[i][2]); if ((protglbnum < 0) || (protglbnum >= procglbnbr) || ((protglbnum == 0) && (argv[i][2] != '0'))) errorPrint ("main: invalid root process number"); break; case 'T' : /* Output separator tree */ case 't' : flagval |= C_FLAGTREOUT; if (argv[i][2] != '\0') C_filenametreout = &argv[i][2]; break; case 'V' : fprintf (stderr, "dgord, version " SCOTCH_VERSION_STRING "\n"); fprintf (stderr, "Copyright 2007-2012,2014 IPB, Universite de Bordeaux, INRIA & CNRS, France\n"); fprintf (stderr, "This software is libre/free software under CeCILL-C -- see the user's manual for more information\n"); return (0); case 'v' : /* Output control info */ for (j = 2; argv[i][j] != '\0'; j ++) { switch (argv[i][j]) { case 'A' : case 'a' : #ifdef COMMON_MEMORY_TRACE flagval |= C_FLAGVERBMEM; #else /* COMMON_MEMORY_TRACE */ errorPrint ("main: not compiled with COMMON_MEMORY_TRACE"); #endif /* COMMON_MEMORY_TRACE */ break; case 'S' : case 's' : flagval |= C_FLAGVERBSTR; break; case 'T' : case 't' : flagval |= C_FLAGVERBTIM; break; default : errorPrint ("main: unprocessed parameter '%c' in '%s'", argv[i][j], argv[i]); } } break; default : errorPrint ("main: unprocessed option '%s'", argv[i]); } } } #ifdef SCOTCH_DEBUG_ALL if ((flagval & C_FLAGDEBUG) != 0) { fprintf (stderr, "Proc %4d of %d, pid %d\n", proclocnum, procglbnbr, getpid ()); if (proclocnum == protglbnum) { /* Synchronize on keybord input */ char c; printf ("Waiting for key press...\n"); scanf ("%c", &c); } MPI_Barrier (MPI_COMM_WORLD); } #endif /* SCOTCH_DEBUG_ALL */ fileBlockOpenDist (C_fileTab, C_FILENBR, procglbnbr, proclocnum, protglbnum); /* Open all files */ clockInit (&runtime[0]); clockStart (&runtime[0]); SCOTCH_dgraphInit (&grafdat, MPI_COMM_WORLD); SCOTCH_dgraphLoad (&grafdat, C_filepntrsrcinp, -1, 0); if (straval != 0) { if (straptr != NULL) errorPrint ("main: options '-c' and '-o' are exclusive"); SCOTCH_stratDgraphOrderBuild (&stradat, straval, (SCOTCH_Num) procglbnbr, 0, 0.2); } clockStop (&runtime[0]); /* Get input time */ clockInit (&runtime[1]); #ifdef SCOTCH_DEBUG_ALL if ((flagval & C_FLAGDEBUG) != 0) MPI_Barrier (MPI_COMM_WORLD); #endif /* SCOTCH_DEBUG_ALL */ clockStart (&runtime[1]); SCOTCH_dgraphGhst (&grafdat); /* Compute it once for good */ SCOTCH_dgraphOrderInit (&grafdat, &ordedat); SCOTCH_dgraphOrderCompute (&grafdat, &ordedat, &stradat); clockStop (&runtime[1]); /* Get ordering time */ #ifdef SCOTCH_DEBUG_ALL if ((flagval & C_FLAGDEBUG) != 0) MPI_Barrier (MPI_COMM_WORLD); #endif /* SCOTCH_DEBUG_ALL */ clockStart (&runtime[0]); if (proclocnum == protglbnum) { if ((flagval & C_FLAGBLOCK) == 0) SCOTCH_dgraphOrderSave (&grafdat, &ordedat, C_filepntrordout); else SCOTCH_dgraphOrderSaveBlock (&grafdat, &ordedat, C_filepntrordout); if ((flagval & C_FLAGMAPOUT) != 0) /* If mapping wanted */ SCOTCH_dgraphOrderSaveMap (&grafdat, &ordedat, C_filepntrmapout); /* Write mapping */ if ((flagval & C_FLAGTREOUT) != 0) /* If separator tree wanted */ SCOTCH_dgraphOrderSaveTree (&grafdat, &ordedat, C_filepntrtreout); /* Write tree */ } else { if ((flagval & C_FLAGBLOCK) == 0) SCOTCH_dgraphOrderSave (&grafdat, &ordedat, NULL); else SCOTCH_dgraphOrderSaveBlock (&grafdat, &ordedat, NULL); if ((flagval & C_FLAGMAPOUT) != 0) SCOTCH_dgraphOrderSaveMap (&grafdat, &ordedat, NULL); if ((flagval & C_FLAGTREOUT) != 0) SCOTCH_dgraphOrderSaveTree (&grafdat, &ordedat, NULL); } clockStop (&runtime[0]); #ifdef SCOTCH_DEBUG_ALL if ((flagval & C_FLAGDEBUG) != 0) MPI_Barrier (MPI_COMM_WORLD); #endif /* SCOTCH_DEBUG_ALL */ MPI_Type_contiguous (3, MPI_DOUBLE, &redutype); MPI_Type_commit (&redutype); MPI_Op_create ((MPI_User_function *) dgordStatReduceOp, 1, &reduop); if ((flagval & C_FLAGVERBTIM) != 0) { reduloctab[0] = reduloctab[1] = reduloctab[2] = (double) clockVal (&runtime[1]); reduloctab[3] = reduloctab[4] = reduloctab[5] = (double) clockVal (&runtime[0]); reduloctab[6] = reduloctab[7] = reduloctab[8] = reduloctab[0] + reduloctab[3]; MPI_Allreduce (&reduloctab[0], &reduglbtab[0], 3, redutype, reduop, MPI_COMM_WORLD); } #ifdef COMMON_MEMORY_TRACE if ((flagval & C_FLAGVERBMEM) != 0) { reduloctab[9] = reduloctab[10] = reduloctab[11] = (double) memMax (); MPI_Allreduce (&reduloctab[9], &reduglbtab[9], 1, redutype, reduop, MPI_COMM_WORLD); } #endif /* COMMON_MEMORY_TRACE */ MPI_Op_free (&reduop); MPI_Type_free (&redutype); if (C_filepntrlogout != NULL) { if ((flagval & C_FLAGVERBSTR) != 0) { fprintf (C_filepntrlogout, "S\tStrat="); SCOTCH_stratSave (&stradat, C_filepntrlogout); putc ('\n', C_filepntrlogout); } if ((flagval & C_FLAGVERBTIM) != 0) { fprintf (C_filepntrlogout, "T\tOrder\tmin=%g\tmax=%g\tavg=%g\nT\tI/O\tmin=%g\tmax=%g\tavg=%g\nT\tTotal\tmin=%g\tmax=%g\tavg=%g\n", reduglbtab[0], reduglbtab[1], reduglbtab[2] / (double) procglbnbr, reduglbtab[3], reduglbtab[4], reduglbtab[5] / (double) procglbnbr, reduglbtab[6], reduglbtab[7], reduglbtab[8] / (double) procglbnbr); } #ifdef COMMON_MEMORY_TRACE if ((flagval & C_FLAGVERBMEM) != 0) fprintf (C_filepntrlogout, "A\tMemory\tmin=%g\tmax=%g\tavg=%g\n", reduglbtab[9], reduglbtab[10], reduglbtab[11] / (double) procglbnbr); #endif /* COMMON_MEMORY_TRACE */ } fileBlockClose (C_fileTab, C_FILENBR); /* Always close explicitely to end eventual (un)compression tasks */ SCOTCH_dgraphOrderExit (&grafdat, &ordedat); SCOTCH_dgraphExit (&grafdat); SCOTCH_stratExit (&stradat); MPI_Finalize (); #ifdef COMMON_PTHREAD pthread_exit ((void *) 0); /* Allow potential (un)compression tasks to complete */ #endif /* COMMON_PTHREAD */ return (0); }
int main ( int argc, char * argv[]) { SCOTCH_Graph grafdat; /* Source graph */ SCOTCH_Num grafflag; /* Source graph properties */ SCOTCH_Arch archdat; /* Target architecture */ SCOTCH_Strat stradat; /* Mapping strategy */ SCOTCH_Mapping mapdat; /* Mapping data */ Clock runtime[2]; /* Timing variables */ double kbalval; /* Imbalance tolerance value */ int flagval; SCOTCH_Num straval; char * straptr; int i, j; flagval = C_FLAGNONE; /* Default behavior */ straval = 0; /* No strategy flags */ straptr = NULL; i = strlen (argv[0]); if ((i >= 5) && (strncmp (argv[0] + i - 5, "gpart", 5) == 0)) { flagval |= C_FLAGPART; C_paraNbr = 1; /* One more parameter */ C_fileNbr = 3; /* One less file to provide */ errorProg ("gpart"); } else errorProg ("gmap"); intRandInit (); if ((argc >= 2) && (argv[1][0] == '?')) { /* If need for help */ usagePrint (stdout, C_usageList); return (0); } grafflag = 0; /* Use vertex and edge weights */ SCOTCH_stratInit (&stradat); /* Set default mapping strategy */ kbalval = 0.01; /* Set default load imbalance value */ for (i = 0; i < C_FILENBR; i ++) /* Set default stream pointers */ C_fileTab[i].pntr = (C_fileTab[i].mode[0] == 'r') ? stdin : stdout; for (i = 1; i < argc; i ++) { /* Loop for all option codes */ if ((argv[i][0] != '-') || (argv[i][1] == '\0') || (argv[i][1] == '.')) { /* If found a file name */ if (C_paraNum < C_paraNbr) { /* If number of parameters not reached */ if ((C_partNbr = atoi (argv[i])) < 1) /* Get the number of parts */ errorPrint ("main: invalid number of parts (\"%s\")", argv[i]); C_paraNum ++; continue; /* Process the other parameters */ } if (C_fileNum < C_fileNbr) /* A file name has been given */ C_fileTab[C_fileNum ++].name = argv[i]; else errorPrint ("main: too many file names given"); } else { /* If found an option name */ switch (argv[i][1]) { case 'B' : case 'b' : flagval |= C_FLAGKBALVAL; kbalval = atof (&argv[i][2]); if ((kbalval < 0.0) || (kbalval > 1.0) || ((kbalval == 0.0) && ((argv[i][2] != '0') && (argv[i][2] != '.')))) { errorPrint ("main: invalid load imbalance ratio"); } break; case 'C' : case 'c' : /* Strategy selection parameters */ for (j = 2; argv[i][j] != '\0'; j ++) { switch (argv[i][j]) { case 'B' : case 'b' : straval |= SCOTCH_STRATBALANCE; break; case 'Q' : case 'q' : straval |= SCOTCH_STRATQUALITY; break; case 'S' : case 's' : straval |= SCOTCH_STRATSPEED; break; case 'T' : case 't' : straval |= SCOTCH_STRATSAFETY; break; default : errorPrint ("main: invalid strategy selection option (\"%c\")", argv[i][j]); } } break; case 'H' : /* Give the usage message */ case 'h' : usagePrint (stdout, C_usageList); return (0); case 'M' : case 'm' : straptr = &argv[i][2]; SCOTCH_stratExit (&stradat); SCOTCH_stratInit (&stradat); SCOTCH_stratGraphMap (&stradat, straptr); break; case 'S' : case 's' : /* Source graph parameters */ for (j = 2; argv[i][j] != '\0'; j ++) { switch (argv[i][j]) { case 'E' : case 'e' : grafflag |= 2; /* Do not load edge weights */ break; case 'V' : case 'v' : grafflag |= 1; /* Do not load vertex weights */ break; default : errorPrint ("main: invalid source graph option (\"%c\")", argv[i][j]); } } break; case 'V' : fprintf (stderr, "gmap/gpart, version " SCOTCH_VERSION_STRING "\n"); fprintf (stderr, "Copyright 2004,2007,2008,2010 ENSEIRB, INRIA & CNRS, France\n"); fprintf (stderr, "This software is libre/free software under CeCILL-C -- see the user's manual for more information\n"); return (0); case 'v' : /* Output control info */ for (j = 2; argv[i][j] != '\0'; j ++) { switch (argv[i][j]) { case 'M' : case 'm' : flagval |= C_FLAGVERBMAP; break; case 'S' : case 's' : flagval |= C_FLAGVERBSTR; break; case 'T' : case 't' : flagval |= C_FLAGVERBTIM; break; default : errorPrint ("main: unprocessed parameter \"%c\" in \"%s\"", argv[i][j], argv[i]); } } break; default : errorPrint ("main: unprocessed option (\"%s\")", argv[i]); } } } if ((flagval & C_FLAGPART) != 0) { /* If program run as the partitioner */ C_fileTab[3].name = C_fileTab[2].name; /* Put provided file names at their right place */ C_fileTab[2].name = C_fileTab[1].name; C_fileTab[1].name = "-"; } fileBlockOpen (C_fileTab, C_FILENBR); /* Open all files */ clockInit (&runtime[0]); clockStart (&runtime[0]); SCOTCH_graphInit (&grafdat); /* Create graph structure */ SCOTCH_graphLoad (&grafdat, C_filepntrsrcinp, -1, grafflag); /* Read source graph */ SCOTCH_archInit (&archdat); /* Create architecture structure */ if ((flagval & C_FLAGPART) != 0) /* If program run as the partitioner */ SCOTCH_archCmplt (&archdat, C_partNbr); /* Create a complete graph of proper size */ else { SCOTCH_archLoad (&archdat, C_filepntrtgtinp); /* Read target architecture */ C_partNbr = SCOTCH_archSize (&archdat); } if ((straval != 0) || ((flagval & C_FLAGKBALVAL) != 0)) { if (straptr != NULL) errorPrint ("main: options '-b' / '-c' and '-m' are exclusive"); SCOTCH_stratGraphMapBuild (&stradat, straval, (SCOTCH_Num) C_partNbr, kbalval); } clockStop (&runtime[0]); /* Get input time */ clockInit (&runtime[1]); clockStart (&runtime[1]); SCOTCH_graphMapInit (&grafdat, &mapdat, &archdat, NULL); SCOTCH_graphMapCompute (&grafdat, &mapdat, &stradat); /* Perform mapping */ clockStop (&runtime[1]); /* Get computation time */ clockStart (&runtime[0]); SCOTCH_graphMapSave (&grafdat, &mapdat, C_filepntrmapout); /* Write mapping */ clockStop (&runtime[0]); /* Get output time */ if (flagval & C_FLAGVERBSTR) { fprintf (C_filepntrlogout, "S\tStrat="); SCOTCH_stratSave (&stradat, C_filepntrlogout); putc ('\n', C_filepntrlogout); } if (flagval & C_FLAGVERBTIM) { fprintf (C_filepntrlogout, "T\tMapping\t\t%g\nT\tI/O\t\t%g\nT\tTotal\t\t%g\n", (double) clockVal (&runtime[1]), (double) clockVal (&runtime[0]), (double) clockVal (&runtime[0]) + (double) clockVal (&runtime[1])); } if (flagval & C_FLAGVERBMAP) SCOTCH_graphMapView (&grafdat, &mapdat, C_filepntrlogout); fileBlockClose (C_fileTab, C_FILENBR); /* Always close explicitely to end eventual (un)compression tasks */ SCOTCH_graphMapExit (&grafdat, &mapdat); SCOTCH_graphExit (&grafdat); SCOTCH_stratExit (&stradat); SCOTCH_archExit (&archdat); #ifdef COMMON_PTHREAD pthread_exit ((void *) 0); /* Allow potential (un)compression tasks to complete */ #endif /* COMMON_PTHREAD */ return (0); }
int main(int argc, char ** argv) { unsigned int iseed = (unsigned int)time(NULL); int n; int lda; PASTIX_FLOAT *A; PASTIX_FLOAT *B; PASTIX_FLOAT *B_save; PASTIX_FLOAT *B_res; CU_FLOAT *d_A; CU_FLOAT *d_B; Clock clk; Clock clk_wt; PASTIX_FLOAT alpha = 1.0; double time_CPU; double time_CUDA; double time_CUDA_wt; int ops = n*n; if (argc != 3) { usage(argv[0]); return 1; } READ_INT(n, 1); READ_INT(lda, 2); srand (iseed); MALLOC_INTERN(A, n*lda, PASTIX_FLOAT); MALLOC_INTERN(B, n*lda, PASTIX_FLOAT); MALLOC_INTERN(B_save, n*lda, PASTIX_FLOAT); MALLOC_INTERN(B_res, n*lda, PASTIX_FLOAT); FILL(A, n*lda); FILL(B, n*lda); memcpy(B_save, B, n*lda*sizeof(PASTIX_FLOAT)); clockInit(&(clk)); clockStart(&(clk)); DimTrans(A, lda, n, B); clockStop(&(clk)); time_CPU = clockVal(&(clk)); PRINT_TIME("GETRA on CPU", time_CPU, ops); clockInit(&(clk_wt)); clockStart(&(clk_wt)); CUDA_CALL(cudaMalloc((void*)&(d_A), lda*n*sizeof(PASTIX_FLOAT))); CUDA_CALL(cudaMemcpy((void*)d_A, A, lda*n*sizeof(PASTIX_FLOAT), cudaMemcpyHostToDevice)); CUDA_CALL(cudaMalloc((void*)&(d_B), lda*n*sizeof(PASTIX_FLOAT))); CUDA_CALL(cudaMemcpy((void*)d_B, B_save, lda*n*sizeof(PASTIX_FLOAT), cudaMemcpyHostToDevice)); clockInit(&(clk)); clockStart(&(clk)); getra_cuda(d_A, lda, d_B, lda, n); clockStop(&(clk)); CUDA_CALL(cudaMemcpy((void*)B_res, d_B, lda*n*sizeof(PASTIX_FLOAT), cudaMemcpyDeviceToHost)); CUDA_CALL(cudaFree(d_A)); CUDA_CALL(cudaFree(d_B)); clockStop(&(clk_wt)); time_CUDA = clockVal(&(clk)); time_CUDA_wt = clockVal(&(clk_wt)); COMPARE_TIME("GETRA on GPU", time_CUDA, ops, time_CPU); COMPARE_TIME("GETRA on GPU with transfer", time_CUDA_wt, ops, time_CPU); COMPARE_RES(B, B_res); memFree_null(A); memFree_null(B); memFree_null(B_save); memFree_null(B_res); return EXIT_SUCCESS; }