コード例 #1
0
ファイル: CRscalapack.c プロジェクト: rforge/bglr
int CRSF_tcrossprod(int dim[], int iMyRank) {

	int iMemSize;
	int localRowSizeOfA;
	int localColSizeOfA;
	int localColSizeOfB;
	int localRowSizeOfC;
	int localColSizeOfC;
	double *dpWork = NULL;

	int ipZero[] = { 0, 1, 2, 3 };
	int NPRow = dim[6];
	int NPCol = dim[7];
	int MyRow = iMyRank / NPCol;
	int MyCol = iMyRank % NPCol;

	int rowOfA = dim[0];
	int colOfA = dim[1];
	int colOfB =  rowOfA;
	int rowBlockSize = dim[4];
	int colBlockSize = dim[5];

	int iLLD_A = 0;
	int iLLD_C = 0;

	localRowSizeOfA = F77_CALL(numroc)(&rowOfA, &rowBlockSize, &MyRow, ipZero, &NPRow);
	localColSizeOfA = F77_CALL(numroc)(&colOfA, &colBlockSize, &MyCol, ipZero, &NPCol);

	localColSizeOfB = F77_CALL(numroc)(&colOfB, &colBlockSize, &MyCol, ipZero, &NPCol);

	localRowSizeOfC = localRowSizeOfA;
	localColSizeOfC = localColSizeOfB;

	if ( localColSizeOfA > 0 )
		iLLD_A = max( 1, localRowSizeOfA );
	else
		iLLD_A = 1;

	if ( localColSizeOfC > 0 )
		iLLD_C = max( 1, localRowSizeOfC );
	else
		iLLD_C = 1;

	/* I am calculating memory for only two matrices A & C(output). */
        iMemSize = iLLD_A * localColSizeOfA + iLLD_C * localColSizeOfC + colBlockSize + 1;

	D_Rprintf (( "%d/%d: Mem size for me = %d\n", MyRow, MyCol, iMemSize ));

	dpWork = (double *) malloc(sizeof(double) * iMemSize);
	memset(dpWork, 0xcc, sizeof(double) * iMemSize);

	D_Rprintf (("%d:  Allocated a work vector of size %d bytes\n",
				iMyRank, sizeof(double) * iMemSize));

	F77_CALL(callpdgetcp)(dim, dpWork, &iMemSize);

	free(dpWork);
	return 0;
}
コード例 #2
0
ファイル: CRscalapack.c プロジェクト: rforge/bglr
/* **** CRSF_qr ****
 * This function is a C interface to the fortran implemented 
 * scalapack driver function "callpdgeqrf" that performs
 * qr decomposition on the input data.  
 */
int CRSF_qr(int dim[], int iMyRank) {

	int iMemSize = 0;
	double *dpWork = NULL;

	int ipZero[] = { 0, 1, 2, 3 };
	int NPRow = dim[6];
	int NPCol = dim[7];
	int MyRow = iMyRank / NPCol;
	int MyCol = iMyRank % NPCol;

	int rowOfA = dim[0];
	int colOfA = dim[1];
	int rowBlockSize = dim[4];
	int colBlockSize = dim[5];

	/* Calculate required memory size */
	int localRowSizeOfA = F77_CALL(numroc)(&rowOfA, &rowBlockSize, &MyRow, ipZero, &NPRow);
	int localColSizeOfA = F77_CALL(numroc)(&colOfA, &colBlockSize, &MyCol, ipZero, &NPCol);
	int localSizeOfA = localRowSizeOfA * localColSizeOfA;

	int tauParam = min (rowOfA, colOfA);
	int lTau = F77_CALL(numroc)(&tauParam, &colBlockSize, &MyCol, ipZero, &NPCol);
	
	int iZero = 0;
	int iA = 1;
	int jA = 1;
	int iROff = rowBlockSize;
	int iCOff = colBlockSize;
	int iARow = F77_CALL(indxg2p)(&iA, &rowBlockSize, &MyRow,&iZero, &NPRow);
	int iACol = F77_CALL(indxg2p)(&jA, &colBlockSize, &MyCol,&iZero, &NPCol);
	int mp0Param = rowOfA + iROff;
	int mP0 = F77_CALL(numroc)(&mp0Param, &rowBlockSize, &MyRow, &iARow, &NPRow);
	int nQ0Param = colOfA + iCOff;
	int nQ0 = F77_CALL(numroc)(&nQ0Param, &colBlockSize, &MyCol, &iACol, &NPCol);
	int lWork = colBlockSize * (mP0 + nQ0 + colBlockSize);
	
	iMemSize = localSizeOfA + lTau + lWork;

	dpWork = (double *) malloc(sizeof(double) * iMemSize);
	memset(dpWork, 0xcc, sizeof(double) * iMemSize);

	D_Rprintf (("After allocating memory .. \n "));
	
	F77_CALL(callpdgeqrf)(dim, dpWork, &iMemSize);

	D_Rprintf (("AFTER FORTRAN FUNCTION EXECUTION \n "));

	free (dpWork);

	return 0;
}
コード例 #3
0
ファイル: CRscalapack.c プロジェクト: rforge/bglr
/* **** CRSF_solve ****
 *  This function is a C interface to the fortran implemented 
 *  scalapack driver function "callpdgesv" that solves the 
 *  equations A * X = B for 'X'
 */
int CRSF_solve(int dim[], int iMyRank) {

	int iMemSize = 0;
	double *dpWork = NULL;

	int ipZero[] = { 0, 1, 2, 3 };
	int NPRow = dim[6];
	int NPCol = dim[7];
	int MyRow = iMyRank / NPCol;
	int MyCol = iMyRank % NPCol;

	int rowOfA = dim[0];
	int colOfA = dim[1];
	int rowOfB = dim[2];
	int colOfB = dim[3];
	int rowBlockSize = dim[4];
	int colBlockSize = dim[5];

	/* Calculate the MemSize */	
	int localRowSizeOfA = F77_CALL(numroc)(&rowOfA, &rowBlockSize, &MyRow, ipZero, &NPRow);
	int localColSizeOfA = F77_CALL(numroc)(&colOfA, &colBlockSize, &MyCol, ipZero, &NPCol);

	int localRowSizeOfB = F77_CALL(numroc)(&rowOfB, &rowBlockSize, &MyRow, ipZero, &NPRow);
	int localColSizeOfB = F77_CALL(numroc)(&colOfB, &colBlockSize, &MyCol, ipZero, &NPCol);


	int localSizeOfA = localRowSizeOfA * localColSizeOfA;
	int localSizeOfB = localRowSizeOfB * localColSizeOfB;
	int localSizeOfPiv = localRowSizeOfA + rowBlockSize;
	int localWorkSize = colBlockSize; 

	D_Rprintf(("Child: inside CRSF_solve \n "));
	

	iMemSize = localSizeOfA + localSizeOfB + localSizeOfPiv + localWorkSize;

	dpWork = (double *) malloc(sizeof(double) * iMemSize);
	memset(dpWork, 0xcc, sizeof(double) * iMemSize);

	D_Rprintf(("Child: After allocating memory .. \n "));
	
	F77_CALL(callpdgesv)(dim, dpWork, &iMemSize);

	D_Rprintf (("Child: AFTER FORTRAN FUNCTION EXECUTION \n "));

	free (dpWork);

	return 0;
}
コード例 #4
0
ファイル: ParallelAgent.c プロジェクト: cran/RScaLAPACK
/* ****  PA_Init ****
 * Check to see if MPI is initialized, and if it is not, then initialize it.
 * Returns 0 for success or non-zero on error.
 */
int PA_Init() {
	int flag;

#if OMPI
	dlopen("libmpi.so.0", RTLD_GLOBAL | RTLD_LAZY);
#endif

	if( MPI_Initialized(&flag) != MPI_SUCCESS){
		Rprintf("ERROR[1]: Failed in call MPI_Initialized \n");
		return 1;
	}

	if (flag){
		D_Rprintf((" MPI has already been Initialized \n"));
		return 0;
	} else {
		D_Rprintf(("PA: ***************** Initializing MPI\n"));
		MPI_Init(NULL, NULL);
		MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
		return 0;
	}
}
コード例 #5
0
ファイル: CRscalapack.c プロジェクト: rforge/bglr
/* ****  CR_CallScalapackFun  ****
 * This function calls the C wrappers around the Fortran wrappers around the
 * ScaLAPACK functions.
 * It basically does nothing but switch on the Function ID and call the
 * corresponding function.
 */ 
int CR_CallScalapackFn (int *ipGridAndDims, int iMyRank) {
	int iFunction = ipGridAndDims[8];

	switch(iFunction) {
		case 0:
			/* sla.gridInit, sla.gridExit, etc. */
			break;
		case 1:
			/* Solve */
			CRSF_solve (ipGridAndDims, iMyRank);
			break;
		case 2:
			/* SVD */
			CRSF_svd (ipGridAndDims, iMyRank);
			break;
		case 3:
			/* QR Decomposition */
			CRSF_qr(ipGridAndDims, iMyRank);
			break;
		case 4:
			/* Choleski Factorization */
			CRSF_chol(ipGridAndDims, iMyRank);
			break;
		case 5:
			/* Invert a (symmetric, positive definite, square) matrix from
			 * its Choleski decomposition */
			CRSF_chol2inv(ipGridAndDims, iMyRank);
			break;
		case 6:
			/* Eigen Value */
			CRSF_eigen(ipGridAndDims, iMyRank);
			break;
		case 7:
			/* Matrix Multiplication */
			/*D_Rprintf(("Got every thing, Need to run multiplication alg.\n")); */
			CRSF_multiply(ipGridAndDims, iMyRank);
			break;
                 case 8: 
                        /* tcrossprod */
                        D_Rprintf(("Got every thing, Need to run multiplication alg.\n"));
                        CRSF_tcrossprod(ipGridAndDims, iMyRank);
                        break;
	
		default:
			Rprintf("%d:%s:%d: ASSERT ERROR: Reached unreachable default in switch statement!\n", iMyRank, __FILE__, __LINE__);
			return -1;

	}	/* Endof  switch (iFunction) */

	return 0;
}
コード例 #6
0
ファイル: CRscalapack.c プロジェクト: rforge/bglr
/* **** CRSF_chol2inv **** 
 * This function is a C interface to the fortran implemented 
 * scalapack driver function "callpdpotri" that performs
 * inverting a matrix from its Choleski Factorization
 */ 
int CRSF_chol2inv(int dim[], int iMyRank) {

	int iMemSize = 0;
	double *dpWork = NULL;

	int ipZero[] = { 0, 1, 2, 3 };
	int NPRow = dim[6];
	int NPCol = dim[7];
	int MyRow = iMyRank / NPCol;
	int MyCol = iMyRank % NPCol;

	int rowOfA = dim[0];
	int colOfA = dim[1];
	int rowBlockSize = dim[4];
	int colBlockSize = dim[5];

	/* Calculate required memory size */
	int localRowSizeOfA = F77_CALL(numroc)(&rowOfA, &rowBlockSize, &MyRow, ipZero, &NPRow);
	int localColSizeOfA = F77_CALL(numroc)(&colOfA, &colBlockSize, &MyCol, ipZero, &NPCol);
	
	int localSizeOfA = localRowSizeOfA * localColSizeOfA;
	int workSpace = max (rowBlockSize, colBlockSize);

	iMemSize = localSizeOfA + workSpace;
	
	dpWork = (double *) malloc(sizeof(double) * iMemSize);
	memset(dpWork, 0xcc, sizeof(double) * iMemSize);

	D_Rprintf (("After allocating memory .. \n "));
	
	F77_CALL(callpdpotri)(dim, dpWork, &iMemSize);

	D_Rprintf (("AFTER FORTRAN FUNCTION EXECUTION \n "));

	free (dpWork);

	return 0;
}
コード例 #7
0
ファイル: ParallelAgent.c プロジェクト: cran/RScaLAPACK
/* ****  PA_SendData  ****
 *  This function sends the parameters to child 0 and then distributes the
 *  data to all of the child processes.  The data is distributed in the
 *  2D block/cyclic pattern required by the ScaLAPACK library.
 */
int PA_SendData(int ipDims[], double dpA[], double dpB[]) {
	int iFunction;

	MPI_Comm intercomm;
	iFunction = ipDims[8];

	/* Broadcast the Data Dimension to the child Processes */
	PA_ErrorHandler(MPI_Intercomm_merge(childComm, 0, &intercomm));


	PA_ErrorHandler(MPI_Bcast(ipDims,10,MPI_INT, 0, intercomm)); 

	/* If the function was sla.gridInit or sla.gridExit, then there is
	 * no data to distribute.
	 */
	if (iFunction == 0) {
		D_Rprintf(("PA: iFunction = 0, Just before returning\n"));
		return 0;
	} else {	/* Otherwise, distribute data as usual. */

		/* Check if ready to run */
		if ( PA_CheckFaultPriorRun () != 0){
			printf(" Memory related problems in one/all of Spawned Processes \n");
			printf(" Report the bug to: [email protected] \n");
			return -1;	
		}

		/* Distribute the first matrix*/
		PAdistData (dpA,ipDims, ipDims[0], ipDims[1]);

		if (ipDims[2] != 0 && ipDims[8] != 2){
			/* Distribute the Second matrix*/
			PAdistData (dpB,ipDims, ipDims[2], ipDims[3]);
		}

		return 0;
	}
}
コード例 #8
0
ファイル: CRscalapack.c プロジェクト: rforge/bglr
/* Receive - I/P Data dimensions and Process Grid Specifications from the parent
* 1. No. of rows in matrix A
* 2. No. of cols in matrix A
* 3. No. of rows in matrix B
* 4. No. of cols in matrix B
* 5. MB - Row Block size for matrix A
* 6. NB - Col Block size for matrix A
* 7. NPROW - Number of Process rows in the Process Grid - Row Block Size
* 8. NPCOL - Number of Process cols in the Process Grid - Col Block Size
* 9. Function id
* 10. Relaease Flag 
*/
int CR_GetInputParams(MPI_Comm mcParent, int *ipGridAndDims) {

	MPI_Comm parent;


	if (MPI_Comm_get_parent(&parent) != MPI_SUCCESS) {
		Rprintf("ERROR[2]: Getting Parent Comm ... FAILED .. EXITING !!\n");
		return AsInt(2);
	}

	if(MPI_Intercomm_merge(parent, 1, &intercomm)!= MPI_SUCCESS)
		return -1;

	if(MPI_Bcast(ipGridAndDims,10, MPI_INT, 0, intercomm) != MPI_SUCCESS) 
	{
		D_Rprintf(("Child: Broadcast error\n"));
		return -2;
	}
	else
	{
		return 0;
	}
}
コード例 #9
0
ファイル: ParallelAgent.c プロジェクト: cran/RScaLAPACK
/* **** PA_Exec ****
 * The ParallelAgent's equivelant of "main".  This function spawns the
 * children processes, sends them the data, and gets back the results.
 */
SEXP PA_Exec(SEXP scriptLocn, SEXP sxInputVector) {
	int iFunction;
	int iSpawnFlag = 1;
	int iNumProcs;
	int ipDims[10]= { 0,0,0,0,0,0,0,0,0,0 };
	double *dpA = NULL;
	double *dpB = NULL;
	int iStatus;
	int returnValue;
	SEXP sRet;
#ifndef DONT_SPAWN_R
	char *cpProgram = "R";   /* Program to call; Usually "R" */

	char *child_args[] = {
		"BATCH",
		"--no-save",
		CHAR(STRING_ELT((scriptLocn),0)),
		"abc.out",
		NULL };

#else
	/* There are four ways to call the child processes (all go through
	 * MPI_COMM_SPAWN):
	 * 1.  Spawn a copy of R and let it call the child process function through
	 *        an R script.  This approach simplified the MPI-BLACS interation,
	 *        at the cost of major overhead.
	 * 2.  Spawn a shell script (dfCRDriver) which runs the child process
	 *        driver.  Spawning the program directly is better, but there is a
	 *        problem with needing the shared library in the library path.
	 * 3.  Spawn a shell (sh) which first adds the path to the scalapack.so
	 *        library to LD_LIBRARY_PATH and then calls the driver program.
	 *        This eliminates the need for the extra script (dfCRDriver), while
	 *        also being several thousandths of a second faster.
	 * 4.  Spawn the driver program directly.  This requires building the
	 *        program differently so that it doesn't need to scalapack.so
	 *        shared library.  This is the preferred/current method.
	 */
	char *cpProgram;    /* =R_PACKAGE_DIR"/exec/dfCRDriver.sh"; */
	char *child_args[] = { NULL, NULL };
	int iLength;

	/* The scriptLocn (script location) variable contains the path to the
	 * executable directory (followed by the script name).  Extract the path,
	 * and use it for the executable's path.
	 */
	cpProgram = (char *) (CHAR(STRING_ELT((scriptLocn), 0)));
	iLength = strrchr(cpProgram, '/') - cpProgram;
	if (iLength < 0) {
		Rprintf("Path to script is not complete.  Unable to continue.\n");
		return R_NilValue;
	}
	cpProgram = (char *) malloc(sizeof(char) * (iLength + 12));
	if (cpProgram == NULL) {
		Rprintf("Memory allocation (%d bytes) failed!\n", sizeof(char) *
				(iLength + 12));
		return R_NilValue;
	}
	*(cpProgram) = '\0';
	strncat(cpProgram, CHAR(STRING_ELT((scriptLocn),0)), iLength);
	strncat(cpProgram, "/CRDriver", 10);

	D_Rprintf(("Child process: \"%s\" \"%s\" \"%s\"\n", cpProgram, child_args[0], child_args[1]));
#endif    /* Endof  If DONT_SPAWN_R is defined */

	/*  Begin by unpacking the input vector into all of the seperate variables
	 *  that get their values from it */
	if (PA_UnpackInput(sxInputVector, ipDims, &dpA, &dpB, &iNumProcs,
				&iFunction, &iSpawnFlag) != 0) {
		free(cpProgram);
		return R_NilValue;
	}

#if 0
// Guru
	int ig;
	for ( ig = 0; ig < 10; ig++ )
	{
		fprintf(stdout, "ipDims[%d] = %d\n", ig, ipDims[ig] );fflush(stdout);
	}


#endif

	/*  Initialize MPI (if it is already initialized, it won't be
	 *  initialized again).	*/
	if (PA_Init() != 0){
		Rprintf(" ERROR[1]: Failed while intializing MPI \n");
		free(cpProgram);
		return R_NilValue;
	}

	if (iSpawnFlag != 0  && iGlobalNumChildren != 0) {
		Rprintf(" Error:  Attempt to spawn a new grid without releasing the previous grid.\n");
		return R_NilValue;
	}

	if(iSpawnFlag == 0 && iGlobalNumChildren == 0){
		Rprintf(" Error: Process Grid not present and Spawn option is set FALSE \n");
		return R_NilValue;
	}

	int *ipErrcodes = (int *)Calloc(iNumProcs, int);



	/* Begin:  Spawn the child processes */
	if ( iSpawnFlag != 0 ) {
		/* Begin:  Spawn the child processes */
		D_Rprintf(("PA: Preparing to spawn %d child processes.\n", iNumProcs)); fflush(stdout);
		iStatus = MPI_Comm_spawn(cpProgram, child_args, iNumProcs, MPI_INFO_NULL,
				0, MPI_COMM_WORLD, &childComm, ipErrcodes);
		free(cpProgram);
		if (iStatus != MPI_SUCCESS) {
			Rprintf(" ERROR:  Failed to spawn (%d) child processes.\n", iNumProcs);
			return R_NilValue;
		}

		D_Rprintf(("SPAWNING SUCCESSFUL\n")); fflush(stdout);
		/* End:  Spawn the child processes */
		iGlobalNumChildren = iNumProcs;
		iNprows = ipDims[6];
		iNpcols = ipDims[7];
	}


	/* SPECIAL for SVD */
	/* If the function is SVD, the child process needs to know the nu,nv
	 * parameters.
	 */

	if (iFunction == 2) {
		ipDims[2] = (int) dpB[0];
		ipDims[3] = (int) dpB[1];
	}

	
	/* DATA DISTRIBUTION */
	/* The data is distributed by the PA to all of the child processes. */
	if ((returnValue = PA_SendData(ipDims, dpA, dpB)) == 0)	{
		D_Rprintf(("PA: DATA SENT TO CHILD PROCESSES.\n")); fflush(stdout);
	} else {	/* The send data failed, */ 
		Rprintf("ERROR [1] : DATA COULD NOT BE SENT TO CHILD PROCESSES.\n");
		iGlobalNumChildren = 0;
		iNprows = 0;
		iNpcols = 0;
		return R_NilValue;
	}
	D_Rprintf(("PA: back to exec.\n")); fflush(stdout);

	/* If the release flag == 1, then the grid will be (or was) released. */
	if (ipDims[9] == 1) 
	{
		iGlobalNumChildren = 0;
		iNprows = 0;
		iNpcols = 0;
	}

	/* If the function is sla.gridInit or sla.gridExit, just return. */
	if (iFunction == 0) 
	{
		// MPI_Comm_free(&intercomm);
		
		return R_NilValue;
	}

	/* GET BACK THE RESULT */
	sRet = PA_RecvResult(ipDims);

	return sRet;
}
コード例 #10
0
ファイル: ParallelAgent.c プロジェクト: cran/RScaLAPACK
/* ****  PA_RecvResult  ****
 *  After the computations are done, receive the result back from the children
 *  processes.  The parameters of the return values are sent by child 0, and
 *  the data is collected (from the 2D block/cyclic distribution) from all
 *  child processes.
 */
SEXP PA_RecvResult(int ipDims[])
{
	int iStatus;
	int iNumMatrices, iWorksize;
	int ipOutDims[3];
	SEXP sxRet = R_NilValue;
	SEXP sxTmp;
	int i;

	iWorksize = ipDims[5];
	D_Rprintf(("Preparing to receive .... "));

	/* First, get the number of matrices in the return object. */
	iStatus = MPI_Recv(&iNumMatrices,1,MPI_INT,0,NUMMATRICES_TAG,childComm,MPINULL);
	if (iStatus != MPI_SUCCESS) {
		PA_ErrorHandler(iStatus);
		return R_NilValue;
	}

	D_Rprintf(("%d matrices\n", iNumMatrices));

	if (iNumMatrices == 0){
		return R_NilValue;
	}

	/* Setup the R vector to hold the R matrix objects */
	PROTECT( sxRet = allocVector(VECSXP, iNumMatrices) );

	/* Main loop */
	for (i = 0; i < iNumMatrices; i++) {
		/* Get the dimensions of matrix i */
		iStatus = MPI_Recv(ipOutDims, 3, MPI_INT, 0, RECVRESULT_TAG+i, childComm, MPINULL);
		if (iStatus != MPI_SUCCESS) {
			PA_ErrorHandler(iStatus);
			UNPROTECT( 1 );
			return R_NilValue;
		}

		D_Rprintf(("Dimensions of matrix %d: [%d, %d], tag=%d\n", i,
					ipOutDims[1], ipOutDims[2], ipOutDims[0]));

		/* If the matrix is empty (size zero), handle the case seperately.
		 * SVD can return empty matrices.
		 */
		if (ipOutDims[1] == 0 || ipOutDims[2] == 0) {
			sxTmp = coerceVector(R_NilValue, NILSXP);
			SET_VECTOR_ELT(sxRet, i, sxTmp);
			continue;
		}

		/* If the matrix is actually a vector, handle that case. */
		if (ipOutDims[1] == 0) ipOutDims[1] = 1;
		if (ipOutDims[2] == 0) ipOutDims[2] = 1;

		/* Make an R array to hold the matrix */
		PROTECT( sxTmp = allocVector(REALSXP, ipOutDims[1]*ipOutDims[2]) );

		D_Rprintf(("Preparing to receive..."));
		/* Get matrix i */
		/* ipOutDims[0] is a flag saying whether to receive the matrix via
		 * a normal MPI call or via the PACollectData function */
		if (ipOutDims[0] == 1) {
			iStatus = MPI_Recv(REAL(sxTmp), ipOutDims[1]*ipOutDims[2],
					MPI_DOUBLE,	0, RECVVECORMAT_TAG+i, childComm, MPINULL);
			if (iStatus != MPI_SUCCESS) {
				PA_ErrorHandler(iStatus);
				return R_NilValue;
			}
		} else {
			/*F77_CALL(pacollectdata)(REAL(sxTmp), ipOutDims+1, ipOutDims+2,
					ipDims, &iWorksize);*/
			PAcollectData(REAL(sxTmp), ipDims, ipOutDims[1], ipOutDims[2]);
		}

		/* Set the dimensions of the matrix */
		PA_SetDim(sxTmp, 2, ipOutDims+1);
		/* Save the matrix into the vector */
		SET_VECTOR_ELT(sxRet, i, sxTmp);
		UNPROTECT( 1 );
	}

	UNPROTECT( 1 );

	return sxRet;
}
コード例 #11
0
ファイル: ParallelAgent.c プロジェクト: cran/RScaLAPACK
/* ****  PA_UnpackInput  ****
 * The input parameters/data to the ParallelAgent is given as an R vector.
 * This function unpacks the vector, putting the pieces into the appropriate
 * variables.
 */
int PA_UnpackInput(SEXP sxInputVector, int *ipDims, double **dppA,
		double **dppB, int *ipNumProcs, int *ipFunction, int *ipSpawnFlag) {
	SEXP s;
	int iMB;
	int ipReleaseFlag;


	/* First parameter is the first matrix. --populates ipDims[0] and ipDims[1] */
	/* Second parameter is the second matrix. --populates ipDims[2] and ipDims[3] */
	/* Third Parameter is number of Process Rows  -- populates ipDims[6] = NPROWS*/
	/* Fourth Parameter is number of Process Cols -- populates ipDims[7] = NPCOLS */
	/* Fifth Parameter is Block Size -- populates ipDims[4] =ipDims [5]= MB */
	/* Sixth Parameter is function identifier  -- populates ipDims[8] = functionID */
	/* Seventh parameter --- Instruction to Release Grid or not -- populates ipDims[9]=ReleaseFlag */
	/* Eighth parameter --- Instruction to Spawn Grid or not*/


	/* First parameter is the first matrix. --populates ipDims[0] and ipDims[1] */
	s = VECTOR_PTR(sxInputVector)[0];
	if (TYPEOF(s) != REALSXP) {
		Rprintf("1st parameter (Matrix A) is not an array of doubles.\n");
		return -1;
	}

	if (PA_GetTwoDims(s, ipDims) > 2) {
		Rprintf("1st parameter (Matrix A) has too many dimensions.\n");
		return -2;
	}

	/* If the object is one dimensional, set the second dimension to length 1 */
	if (ipDims[1] == 0) ipDims[1] = 1;

	/* Save a pointer to the first matrix */
	*dppA = REAL(s);

	/* Second parameter is the second matrix. --populates ipDims[2] and ipDims[3] */
	s = VECTOR_PTR(sxInputVector)[1];
	if (TYPEOF(s) != REALSXP) {
		Rprintf("2nd parameter (Matrix B) is not an array of doubles.\n");
		return -3;
	}

	if (PA_GetTwoDims(s, ipDims + 2) > 2) {
		Rprintf("2nd parameter (Matrix B) has too many dimensions.\n");
		return -4;
	}

	/* If the object is one dimensional, set the second dimension to length 1,
	 * unless the length is zero (i.e, there is no second matrix).    */
	if (ipDims[3] == 0 && LENGTH(s) != 0) ipDims[3] = 1;

	/* Save a pointer to the second matrix */
	*dppB = REAL(s);

	/* Third Parameter is number of Process Rows  -- populates ipDims[6] = NPROWS*/

	s = VECTOR_PTR(sxInputVector)[2];
	if (TYPEOF(s) != INTSXP) {
		Rprintf("Third parameter (number of row processors) is not an integer.\n");
		return -5;
	}
	if (LENGTH(s) != 1) {
		Rprintf("First parameter (number of row processors) is not a single number.\n");
		return -6;
	}
	ipDims[6] = INTEGER(s)[0];

	/* Fourth Parameter is number of Process Cols -- populates ipDims[7] = NPCOLS */
	s = VECTOR_PTR(sxInputVector)[3];
	if (TYPEOF(s) != INTSXP) {
		Rprintf("Fourth parameter (number of col processors) is not an integer.\n");
		return -7;
	}
	if (LENGTH(s) != 1) {
		Rprintf("Fourth parameter (number of col processors) is not a single number.\n");
		return -8;
	}
	ipDims[7] = INTEGER(s)[0];

	*ipNumProcs = ipDims[6] * ipDims[7];  /* iNumProcs = iNPRows * iNPCols  */
	

	/* Fifth Parameter is Block Size -- populates ipDims[4] =ipDims [5]= MB */

	s = VECTOR_PTR(sxInputVector)[4];
	if (TYPEOF(s) != INTSXP) {
		Rprintf("Fifth parameter (row block size of LHS matrix) is not an integer.\n");
		return -9;
	}
	if (LENGTH(s) != 1) {
		Rprintf("Fifth parameter (row block size of LHS matrix) is not a single number.\n");
		return -10;
	}
	iMB = INTEGER(s)[0];
	/* !!! If the block size is larger than the input size, reduce the block
	 * size down to the size of the input.  Needed for the eigen function.
	 */
	if (iMB > ipDims[0] && iMB > ipDims[1] && iMB > ipDims[2] &&
			iMB > ipDims[3])
		iMB = max(ipDims[0], max(ipDims[1], max(ipDims[2], ipDims[3])));

	

	/* Once upon a time, the block dimensions were taken as two inputs.  Now,
	 * the blocksize is forced to be square.
	 */
	ipDims[4] = ipDims[5] = iMB;

	/* Sixth Parameter is function identifier  -- populates ipDims[8] = functionID */
	s = VECTOR_PTR(sxInputVector)[5];
	if (TYPEOF(s) != INTSXP) {
		Rprintf("Sixth parameter (function) is not an integer.\n");
		return -11;
	}
	if (LENGTH(s) != 1) {
		Rprintf("Sixth parameter (function) is not a single number.\n");
		return -12;
	}

	*ipFunction = INTEGER(s)[0];
	if (*ipFunction < 0 || *ipFunction > 7) {
		Rprintf("Error:  Unknown function ID (%d).\n", *ipFunction);
		return -13;
	}

	ipDims[8]= *ipFunction;

	/* Seventh parameter --- Instruction to Release Grid or not -- populates ipDims[9]=ReleaseFlag */
	/* Populating ipDims array is complete ... */
	s = VECTOR_PTR(sxInputVector)[6];
	if (TYPEOF(s) != INTSXP) {
		Rprintf("Seventh parameter (function) is not an integer.\n");
		return -11;
	}

	ipReleaseFlag = INTEGER(s)[0];
	if(ipReleaseFlag != 0 && ipReleaseFlag != 1) {
		Rprintf ("Warning: Proper value for Release Flag= %d not used \n \t Release Flag is set to 1 \n", ipReleaseFlag );
		ipReleaseFlag = 1;  
	}

	ipDims[9]= ipReleaseFlag;

	/* ipSpawnFlag is the information sent from R to ParallelAgent requesting
	 * it either to spawn the processes or not to spawn (use the existing ones)
	 */

	/* Eighth parameter --- Instruction to Spawn Grid or not*/
	s = VECTOR_PTR(sxInputVector)[7];
	if (TYPEOF(s) != INTSXP) {
		Rprintf("Sixth parameter (function) is not an integer.\n");
		return -11;
	}
	*ipSpawnFlag = INTEGER(s)[0];

	D_Rprintf(("spawn flag : %d \n", *ipSpawnFlag));

	return 0;
}
コード例 #12
0
ファイル: CRscalapack.c プロジェクト: rforge/bglr
/* **** CRSF_svd ****
 *  This function is a C interface to the fortran implemented 
 *  scalapack driver function "callpdgesvd" that performs
 *  singular value decomposition 
 */
int CRSF_svd(int dim[], int iMyRank) {
	int iMemSize = 0;
	double *dpWork = NULL;

	int ipZero[] = { 0, 1, 2, 3 };
	int NPRow = dim[6];
	int NPCol = dim[7];
	int MyRow = iMyRank / NPCol;
	int MyCol = iMyRank % NPCol;

	int rowOfA = dim[0];
	int colOfA = dim[1];
	int rowBlockSize = dim[4];
	int colBlockSize = dim[5];

	/* Calculate the required mem size */
	int localRowSizeOfA = 0;
	int localColSizeOfA = 0;
	
	int localSizeOfA = 0;
	int singularVectorSize = 0;
	int localSizeOfU = 0;
	int localSizeOfVT = 0;
	int localWorkSpaceSize = 0;

	D_Rprintf ((" In CRSF_svd func .. 1 \n"));

	/* Get the values */
	localRowSizeOfA = F77_CALL(numroc)(&rowOfA, &rowBlockSize, &MyRow, ipZero, &NPRow);
	localColSizeOfA = F77_CALL(numroc)(&colOfA, &colBlockSize, &MyCol, ipZero, &NPCol);

	localSizeOfA = localRowSizeOfA * localColSizeOfA;
	singularVectorSize = colOfA;
	localSizeOfU = localRowSizeOfA * localRowSizeOfA ;
	localSizeOfVT = localColSizeOfA * localColSizeOfA ;


	if (iMyRank == 0)
	{
		/* Calculate workspace size and Broadcast */
		int wATOBD =0;
		int ictxt;
		int mp0 = 0, nq0 = 0;
		int temp1=-1, temp2=0;
		int lldA = max (1, localRowSizeOfA);
		
		int wBDTOSVD;
		int size = min (rowOfA, colOfA);
		int sizeP = F77_CALL(numroc)(&size, &rowBlockSize, &MyRow, ipZero, &NPRow);
		int sizeQ = F77_CALL(numroc)(&size, &colBlockSize, &MyCol, ipZero, &NPRow);
		int wANTU = 1;
		int NRU = localRowSizeOfA;
		int wANTVT = 1;
		int NCVT = localColSizeOfA;
		int wBDSQR;
		int wPDORMBRQLN;
		int wPDORMBRPRT;

		D_Rprintf ((" Process 0 ... in cntrl statement ..after init \n"));

#if MPICH
		F77_CALL(blacs_get)(&temp1, &temp2, &ictxt);
#else
		F77_CALL(blacs_get)(&temp1, &temp2, &ictxt);
#endif
		/* Calculate the value for WATOBD */

		D_Rprintf ((" Process 0 ... in cntrl statement ..1 \n"));
		mp0 = F77_CALL(numroc)(&rowOfA, &rowBlockSize, &MyRow, &ictxt , &NPRow);
		nq0 = F77_CALL(numroc)(&rowOfA, &colBlockSize, &MyCol, &lldA , &NPRow);

		wATOBD = rowBlockSize * (mp0 + nq0 + 1) + nq0 ;

		D_Rprintf ((" Process 0 ... in cntrl statement ..2 \n"));
		/* Calculate the value for WBDTOSVD */
		
		wBDSQR = max (1, 2*size + (2*size - 4) * max (wANTU, wANTVT)) ;
		wPDORMBRQLN = max (colBlockSize *(colBlockSize -1)/2 , (sizeQ + mp0)*colBlockSize) + colBlockSize * colBlockSize;
		wPDORMBRPRT= max (rowBlockSize *(rowBlockSize -1)/2 , (sizeP + nq0)*rowBlockSize) + rowBlockSize * rowBlockSize;

		wBDTOSVD = size * (wANTU * NRU + wANTVT * NCVT) + max (wBDSQR,max (wANTU * wPDORMBRQLN , wANTVT * wPDORMBRPRT));

		localWorkSpaceSize = 2 + 6* max( rowOfA, colOfA) + max (wATOBD, wBDTOSVD);

		D_Rprintf ((" Process 0 ... in cntrl statement ..3 \n"));

	}

	D_Rprintf (("  after cntrl statement ..rank = %d  \n", iMyRank));
	
	if (MPI_Bcast (&localWorkSpaceSize, 1, MPI_INT, 0, MPI_COMM_WORLD) != MPI_SUCCESS){
		Rprintf ("SVD: Failed during MPI_Bcast call ... Rank: %d Exiting \n", iMyRank); 
		exit (-1);
	} 
	
	iMemSize = localSizeOfA + singularVectorSize + localSizeOfU + localSizeOfVT + localWorkSpaceSize;
	
	D_Rprintf ((" After Broadcasting iMemSize = %d .. localWspace = %d ... rank = %d \n", iMemSize,localWorkSpaceSize, iMyRank));
	
	dpWork = (double *) malloc(sizeof(double) * iMemSize);
	memset(dpWork, 0xcc, sizeof(double) * iMemSize);

	D_Rprintf (("After allocating memory .. \n "));
	
	F77_CALL(callpdgesvd)(dim, dpWork, &iMemSize);

	D_Rprintf (("AFTER FORTRAN FUNCTION EXECUTION \n "));

	free(dpWork);

	return 0;

}
コード例 #13
0
ファイル: CRscalapack.c プロジェクト: rforge/bglr
/* **** CRSF_eigen **** 
 * This function is a C interface to the fortran implemented 
 * scalapack driver function "callpdsyevd" that performs
 * eigen value decomposition or Spectral decomposition 
 */
int CRSF_eigen(int dim[], int iMyRank) {

	int iMemSize = 0;
	double *dpWork = NULL;

	int ipZero[] = { 0, 1, 2, 3 };
	int NPRow = dim[6];
	int NPCol = dim[7];
	int MyRow = iMyRank / NPCol;
	int MyCol = iMyRank % NPCol;

	int rowOfA = dim[0];
	int colOfA = dim[1];
	int rowBlockSize = dim[4];
	int colBlockSize = dim[5];

	/* Calculate required memory size */
	int sizeOfLWORK;
	
	int tRILWMIN;
	int np, nq;
	
	int localRowSizeOfA = F77_CALL(numroc)(&rowOfA, &rowBlockSize, &MyRow, ipZero, &NPRow);
	int localColSizeOfA = F77_CALL(numroc)(&colOfA, &colBlockSize, &MyCol, ipZero, &NPCol);

	int localSizeOfA = localRowSizeOfA * localColSizeOfA;
	int localSizeOfZ = localRowSizeOfA * localColSizeOfA;
	int localSizeOfW = colOfA;
	int sizeOfLIWORK = 7 * colOfA + 8 * NPCol +2;


	
	np =  F77_CALL(numroc)(&colOfA, &colBlockSize, &MyRow, ipZero, &NPRow);
	nq =  F77_CALL(numroc)(&colOfA, &colBlockSize, &MyCol, ipZero, &NPCol);
	tRILWMIN = 3 * colOfA + max ( colBlockSize * (np +1) , 3 * colBlockSize);
	sizeOfLWORK = max (1 + 6 * colOfA + 2*np*nq, tRILWMIN ) + 2 * colOfA;


#if 0
	fprintf(stdout, "%d: localSizeOfA = %d, localSizeOfZ=%d, localSizeOfW = %d, sizeOfLIWORK=%d\n", iMyRank, localSizeOfA, localSizeOfZ, localSizeOfW, sizeOfLIWORK );fflush(stdout);
#endif


	iMemSize = localSizeOfA + localSizeOfZ + localSizeOfW + sizeOfLIWORK + sizeOfLWORK ;

	dpWork = (double *) malloc(sizeof(double) * iMemSize);

	if (dpWork == NULL)
	{
		fprintf(stdout, "%s:%d - Error allocating memory.\n", __FILE__, __LINE__ );
		fflush(stdout);

	}
	memset(dpWork, 0xcc, sizeof(double) * iMemSize);

	D_Rprintf (("After allocating memory .. \n "));
	
	F77_CALL(callpdsyevd)(dim, dpWork, &iMemSize);

	D_Rprintf (("AFTER FORTRAN FUNCTION EXECUTION \n "));

	free (dpWork);

	return 0;
}