/* Function: modEuler * Description: Modified Euler Integrator using Implicit and Explicit * vi(t + h) = vi(t) + (ALPHA / h) * (gi(t) - xi(t)) + (h / mi) * Fext(t) * xi(t + h) = xi(t) + h * vi(t + h) * Input: None * Output: None */ void ModEuler(phyzx *phyzxObj, int mIndex, int deformMode) { point vertex, velocity, extVel, position, velDamp; point vDiff, velTotal, newPos, temp; matrix R, matTemp; memset( (void*)&temp, 0, sizeof(temp)); memset((void*)&extVel, 0, sizeof(point)); memset((void*)&velocity, 0, sizeof(point)); memset((void*)&position, 0, sizeof(point)); memset((void*)&vDiff, 0, sizeof(point)); memset((void*)&velTotal, 0, sizeof(point)); memset((void*)&newPos, 0, sizeof(point)); memset((void*)&phyzxObj->avgVel, 0, sizeof(point)); matInit(&R, 0, 0); matInit(&matTemp, 0, 0); if (deformMode == 3) quadDeformRot(&R, phyzxObj); for (unsigned int index = STARTFROM; index <= phyzxObj->model->numvertices; index++) { if (deformMode == 3) { // Compute Quadratic Deformation Goal Positions matMult(R, phyzxObj->q[index], &matTemp); // R(q) temp = matToPoint(matTemp); // Data type conversion pSUM(temp, phyzxObj->cmDeformed, phyzxObj->goal[index]); // g = R(q) + xcm } //end if else { // Compute Goal Positions matMult3331(phyzxObj->R, phyzxObj->relStableLoc[index], &temp); // R(xi0 - xcm0) pSUM(temp, phyzxObj->cmDeformed, phyzxObj->goal[index]); // g = R(xi0 - xcm0) + xcm } //end if vertex.x = phyzxObj->model->vertices[3*index]; vertex.y = phyzxObj->model->vertices[3*index + 1]; vertex.z = phyzxObj->model->vertices[3*index + 2];\ if (stickyFloor == 1) if (vertex.y <= -WALLDIST) continue; // Add user force if (mIndex == iMouseModel && lMouseVal == 2 && objectName != -1)// && index == objectName) { //point uForce; /*GLMnode *node; node = NBVStruct[objectName]; while (node->next != NULL) { pSUM(phyzxObj->extForce[node->index], userForce, phyzxObj->extForce[node->index]); node = node->next; } //end while*/ /*if (index != objectName) { point extPos = vMake(phyzxObj->model->vertices[3*objectName], phyzxObj->model->vertices[3*objectName+1], phyzxObj->model->vertices[3*objectName+2]); double dist = vecLeng(extPos, vertex); //if (dist > 0.04) //{ pMULTIPLY(userForce, (1.0/dist), uForce); pSUM(phyzxObj->extForce[index], uForce, phyzxObj->extForce[index]); //pDisp("user", userForce); //} //end if //else //{ //pSUM(phyzxObj->extForce[index], userForce, phyzxObj->extForce[index]); //} //end else } //end if else {*/ pSUM(phyzxObj->extForce[index], userForce, phyzxObj->extForce[index]); //} //end else } //end if // Explicit Euler Integrator for veloctiy -> vi(t + h) pDIFFERENCE(phyzxObj->goal[index], vertex, vDiff); // gi(t) - xi(t) pMULTIPLY(vDiff, (phyzxObj->alpha / phyzxObj->h), velocity); // vi(h) = (ALPHA / h) * (gi(t) - xi(t)) pMULTIPLY(phyzxObj->extForce[index], (phyzxObj->h / phyzxObj->mass[index]), extVel); // (h / mi) * Fext(t) // pMULTIPLY(phyzxObj->extForce[index], phyzxObj->h, extVel); // (h / mi) * Fext(t) pSUM(velocity, extVel, velTotal); // vi(h) = (ALPHA / h) * (gi(t) - xi(t)) + (h / mi) * Fext(t) pSUM(phyzxObj->velocity[index], velTotal, phyzxObj->velocity[index]); // vi(t + h) = vi(t) + vi(h) // Velocity Damping pMULTIPLY(phyzxObj->velocity[index], -phyzxObj->delta, velDamp); pSUM(phyzxObj->velocity[index], velDamp, phyzxObj->velocity[index]); // Implicity Euler Integrator for position pMULTIPLY(phyzxObj->velocity[index], phyzxObj->h, position); // xi(h) = h * vi(t + h) pSUM(vertex, position, newPos); // xi(t + h) = xi(t) + xi(h) // Store new position into data structure phyzxObj->model->vertices[3*index] = newPos.x; phyzxObj->model->vertices[3*index + 1] = newPos.y; phyzxObj->model->vertices[3*index + 2] = newPos.z; pSUM(phyzxObj->avgVel, phyzxObj->velocity[index], phyzxObj->avgVel); //if (objCollide) CheckForCollision(index, phyzxObj, mIndex); } //end for pMULTIPLY(phyzxObj->avgVel, 1.0 / phyzxObj->model->numvertices, phyzxObj->avgVel); delete[] R.data; delete[] matTemp.data; } //end ModEuler()
// WARNING mxSize is the matrixSize here NORMAL_API DSP_STATUS helloDSP_Execute(IN Uint32 mxSize, Uint8 processorId, Uint32* matrixA, Uint32* matrixB, Uint32* matrixC) { DSP_STATUS status = DSP_SOK; Uint16 sequenceNumber = 0; Uint16 msgId = 0; Uint32 i, j; ControlMsg *msg; Uint8 flag = 0; Uint32 matrixD[mxSize * mxSize]; Uint32 numElements, numMessages, elementCount, messageCount; Uint32 sizeElements, numProdMessages, matrixCount, prodElements; Char8 ascii_string[STRING_SIZE + 1]; Char8 null_string[STRING_SIZE + 1] = {'\0','\0','\0','\0','\0','\0'}; myStrcpy(ascii_string, null_string); SYSTEM_0Print("Entered helloDSP_Execute ()\n"); // Wait for the first DSP is awake message status = MSGQ_get(SampleGppMsgq, WAIT_FOREVER, (MsgqMsg *) &msg); if (DSP_FAILED(status)) { SYSTEM_1Print("MSGQ_get () failed. Status = [0x%x]\n", status); } // TODO possibly verify the data? SYSTEM_1Print("Received message: %s\n", (Uint32) msg->arg1); SYSTEM_0Print("Generated matrices:\n"); // Generate the matrices after the DSPLink is established matrixGen(mxSize, matrixA, matrixB); // Have to translate the Int32 matrix elements to string elements // or the communication protocol prodElements = (mxSize * mxSize); numElements = (mxSize * mxSize * 2); sizeElements = numElements * STRING_SIZE; numMessages = ((sizeElements - 1) / ARG_MSG) + 1; numProdMessages = (((sizeElements / 2) - 1) / ARG_MSG) + 1; //SYSTEM_2Print("NumElements: %d, NumMessages: %d\n", numElements, numMessages); // WARNING Sending 5 Char8 each for loop // Start sending the matrices to the DSP which is not waiting for (messageCount = 0, elementCount = 0; messageCount < numMessages; messageCount++) { // First send a message, then receive //for ( ; (((elementCount - (messageCount * ARG_MSG)) * STRING_SIZE) < ARG_MSG ) && elementCount < numElements; elementCount++) #if defined (PROFILE) SYSTEM_GetStartTimeDspEnc(); #endif for (; (elementCount - (messageCount * ARG_MSG)) < ARG_MSG && elementCount < numElements; elementCount++) { //SYSTEM_0Print("Putting element in a message\n"); //itoa if(elementCount < prodElements) { SYSTEM_itoa(matrixA[elementCount], ascii_string, 10); //SYSTEM_1Print("Looping through string: %s in MatrixA\n", (Uint32) ascii_string); } else { SYSTEM_itoa(matrixB[elementCount - prodElements], ascii_string, 10); //SYSTEM_1Print("Looping through string: %s in MatrixB\n", (Uint32) ascii_string); } //SYSTEM_0Print("After SYSTEM_itoa\n"); // loop through characters of the string for (i = 0; i < STRING_SIZE; i++) { msg->arg1[((elementCount * STRING_SIZE) - (messageCount * ARG_MSG)) + i] = ascii_string[i]; //SYSTEM_sprintf(msg->arg1[(elementCount - (messageCount * ARG_MSG)) + i] = ascii_string[i]; } // clean the string myStrcpy(ascii_string, null_string); } #if defined (PROFILE) SYSTEM_GetEndTimeDspEnc(); #endif //SYSTEM_0Print("Filled a single message\n"); // After filling a single message, should send it to the DSP and wait for a reply // unless it is the last one if (DSP_SUCCEEDED(status)) { //SYSTEM_0Print("DSP succeeded after filling\n"); #if defined (PROFILE) SYSTEM_GetStartTimeDspMes(); #endif msgId = MSGQ_getMsgId(msg); MSGQ_setMsgId(msg, msgId); // TODO set the command flag of the msg to distinguish status = MSGQ_put(SampleDspMsgq, (MsgqMsg) msg); if (DSP_FAILED(status)) { MSGQ_free((MsgqMsg) msg); SYSTEM_1Print("MSGQ_put () failed. Status = [0x%x]\n", status); } #if defined (PROFILE) else { SYSTEM_GetEndTimeDspMes(); SYSTEM_GetStartTimeDspCalc(); } #endif } //SYSTEM_0Print("Message send\n"); sequenceNumber++; // Make sure that the sequenceNumber stays within the permitted // range for applications. if (sequenceNumber == MSGQ_INTERNALIDSSTART) { //SYSTEM_0Print("Something with sequences\n"); sequenceNumber = 0; } // If it is the last message, don't wait for an acknowledge if (messageCount + 1 < numMessages) { // Wait for a response of the DSP before sending a reply status = MSGQ_get(SampleGppMsgq, WAIT_FOREVER, (MsgqMsg *) &msg); if (DSP_FAILED(status)) { SYSTEM_1Print("MSGQ_get () failed. Status = [0x%x]\n", status); } //SYSTEM_1Print("Received: %s\n", (Uint32) msg->arg1); } } //SYSTEM_0Print("Sending completed..\n"); // TODO start receiving the product matrix // WARNING wait and acknowledge except for last loop for (messageCount = 0, elementCount = 0, matrixCount = 0; messageCount < numProdMessages; messageCount++) { status = MSGQ_get(SampleGppMsgq, WAIT_FOREVER, (MsgqMsg *) &msg); if (messageCount == 0) { #if defined (PROFILE) SYSTEM_GetEndTimeDspCalc(); #endif SYSTEM_0Print("\nProduct matrix on DSP:\n"); } //SYSTEM_1Print("Message received: %s\n", (Uint32) msg->arg1); if (DSP_FAILED(status)) { SYSTEM_1Print("MSGQ_get () failed. Status = [0x%x]\n", status); } // Put the received message in the matrixC for (; matrixCount < prodElements && (elementCount - (messageCount * ARG_MSG)) < ARG_MSG && elementCount < (prodElements * STRING_SIZE); matrixCount++, elementCount += STRING_SIZE) { // atoi for (i = 0; i < STRING_SIZE; i++) { ascii_string[i] = msg->arg1[elementCount + i]; } ascii_string[5] = '\0'; /* if (matrixCount >= 72) { SYSTEM_1Print("Ascii string received: %s\n", ascii_string); } */ // Put it in the matrixC matrixC[matrixCount] = atoi(ascii_string); // print the string if (matrixCount % mxSize == 0) { SYSTEM_0Print("\n"); } SYSTEM_1Print("%d ", matrixC[matrixCount]); // Clean the string myStrcpy(ascii_string, null_string); } // If this is not the last message, send an acknowledge if (messageCount + 1 < numProdMessages) { // Send the same message received in earlier MSGQ_get () call. if (DSP_SUCCEEDED(status)) { msgId = MSGQ_getMsgId(msg); MSGQ_setMsgId(msg, msgId); status = MSGQ_put(SampleDspMsgq, (MsgqMsg) msg); if (DSP_FAILED(status)) { MSGQ_free((MsgqMsg) msg); SYSTEM_1Print("MSGQ_put () failed. Status = [0x%x]\n", status); } } sequenceNumber++; // Make sure that the sequenceNumber stays within the permitted // range for applications. if (sequenceNumber == MSGQ_INTERNALIDSSTART) { sequenceNumber = 0; } } } SYSTEM_0Print("\n"); MSGQ_free((MsgqMsg) msg); //SYSTEM_0Print("After freeing the message..\n"); SYSTEM_0Print("\nProduct matrix on GPP:\n"); #if defined (PROFILE) SYSTEM_GetStartTimeGpp(); #endif matMult(matrixA, matrixB, matrixD, mxSize); #if defined (PROFILE) SYSTEM_GetEndTimeGpp(); #endif // compare the matrices for(i=0; i<mxSize; i++) { for(j=0; j<mxSize; j++) { if(matrixC[i * mxSize + j] != matrixD[i * mxSize + j]) { SYSTEM_2Print("Matrices are not equal row: %d, column: %d\n", i, j); flag = 1; break; } } } if (flag == 0) { SYSTEM_0Print("\nMatrix products are equal\n"); } SYSTEM_0Print("Leaving helloDSP_Execute ()\n"); #if defined (PROFILE) if (DSP_SUCCEEDED(status)) { SYSTEM_GetProfileInfoGpp(); SYSTEM_GetProfileInfoDsp(numMessages); //is numProdMessages interesting? } #endif return status; }
int main(int argc, const char *argv[]) { // Seed the random number generator using time srand48((unsigned int) time(NULL)); // Dimension of the operation with defaul value int N = PROBSIZE; // Specify operation: 0 MatMult; 1 MatVecMult int opr = 0; // Whether to verify the result or not int verif = 0; // Whether to display the result or not int disp = 0; // Whether to call the naive implementation int execNaive = 1; // Whether to call the optimized implementation int execOPT = 1; // Parse command line { int arg_index = 1; int print_usage = 0; while (arg_index < argc) { if ( strcmp(argv[arg_index], "-N") == 0 ) { arg_index++; N = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-operation") == 0 ) { arg_index++; opr = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-help") == 0 ) { print_usage = 1; break; } else if( strcmp(argv[arg_index], "-verif") == 0 ) { arg_index++; verif = 1; if(execNaive==0 || execOPT==0) { printf("***Must call both naive and optimized when running verification\n"); print_usage = 1; break; } } else if( strcmp(argv[arg_index], "-disp") == 0 ) { arg_index++; disp = 1; } else if( strcmp(argv[arg_index], "-naive") == 0 ) { arg_index++; execNaive = 1; execOPT = 0; if(verif==1) { printf("***Must call both naive and optimized when running verification\n"); print_usage = 1; break; } } else if( strcmp(argv[arg_index], "-OPT") == 0 ) { arg_index++; execOPT = 1; execNaive = 0; if(verif==1) { printf("***Must call both naive and optimized when running verification\n"); print_usage = 1; break; } } else { printf("***Invalid argument: %s\n", argv[arg_index]); print_usage = 1; break; } } if (print_usage) { printf("\n"); printf("Usage: %s [<options>]\n", argv[0]); printf("\n"); printf(" -N <N> : problem size (default: %d)\n", PROBSIZE); printf(" -operation <ID> : Operation ID = 0 for MatMult or ID = 1 for MatVecMult\n"); printf(" -verif : Activate verification\n"); printf(" -disp : Display result (use only for small N!)\n"); printf(" -naive : Run only naive implementation\n"); printf(" -OPT : Run only optimized implementation\n"); printf(" -help : Display this message\n"); printf("\n"); } if (print_usage) return 0; } // Perform operation switch(opr) { case 0: /* Matrix-matrix multiply */ { printf("Performing matrix-matrix multiply operation\n"); double *matA, *matB, *matC1, *matC2; // Allocate memory matA = (double *) malloc(N*N * sizeof(double)); matB = (double *) malloc(N*N * sizeof(double)); if(execNaive) matC1 = (double *) malloc(N*N * sizeof(double)); if(execOPT) matC2 = (double *) malloc(N*N * sizeof(double)); // Initialize matrix values randInitialize(N*N,matA); randInitialize(N*N,matB); clock_t tic, toc; double tm; if(execNaive) { // Perform naive matA x matB = matC1 tic = clock(); matMult(N,matA,matB,matC1); toc = clock(); tm = (double)(toc - tic) / CLOCKS_PER_SEC; printf("Elapsed time for naive mat-mat mult.: %f seconds\n",tm); } if(execOPT) { // Perform optimized matA x matB = matC2 tic = clock(); //matMult_opt(N,matA,matB,matC2); toc = clock(); tm = (double)(toc - tic) / CLOCKS_PER_SEC; printf("Elapsed time for optimized mat-mat mult.: %f seconds\n",tm); } // Verify results (compare the two matrices) if(verif) compareVecs(N*N,matC2,matC1); // Display results (don't use for large matrices) if(disp) { displayMat(N,N,matA); printf("\n"); displayMat(N,N,matB); printf("\n"); displayMat(N,N,matC1); printf("\n"); displayMat(N,N,matC2); } // Free memory free(matA); free(matB); if(execNaive) free(matC1); if(execOPT) free(matC2); } break; case 1: /* Matrix-vector multiply */ { printf("Performing matrix-vector multiply operation\n"); double *matA, *vecB, *vecC1,*vecC2; // Allocate memory matA = (double *) malloc(N*N * sizeof(double)); vecB = (double *) malloc(N*N * sizeof(double)); if(execNaive) vecC1 = (double *) malloc(N*N * sizeof(double)); if(execOPT) vecC2 = (double *) malloc(N*N * sizeof(double)); // Initialize values randInitialize(N*N,matA); randInitialize(N,vecB); clock_t tic, toc; double tm; if(execNaive) { // Perform naive matA x vecB = vecC1 tic = clock(); matVecMult(N,matA,vecB,vecC1); toc = clock(); tm = (double)(toc - tic) / CLOCKS_PER_SEC; printf("Elapsed time for naive mat-vec mult.: %f seconds\n",tm); } if(execOPT) { // Perform optimized matA x vecB = vecC2 tic = clock(); matVecMult_opt(N,matA,vecB,vecC2); toc = clock(); tm = (double)(toc - tic) / CLOCKS_PER_SEC; printf("Elapsed time for optimized mat-vec mult.: %f seconds\n",tm); } // Verify results if(verif) compareVecs(N,vecC2,vecC1); // Display results (don't use for large matrices) if(disp) { displayMat(N,N,matA); printf("\n"); displayVec(N,vecB); printf("\n"); displayVec(N,vecC1); printf("\n"); } // Free memory free(matA); free(vecB); if(execNaive) free(vecC1); if(execOPT) free(vecC2); } break; default: printf(" Invalid operation ID\n"); return 0; } return 0; }
int main(int argc, char** argv) { int i, j; Timer neonTime; int16_t *mat1, *mat2; int32_t prod[sizeof(int32_t)*matrix_size][sizeof(int32_t)*matrix_size]; /* Get argument size */ matrix_size = atoi(argv[1]); if(matrix_size < 0 || matrix_size > 512) { printf("Matrix size must be between 0 and 512.\n"); return -1; } /* Initialize timer */ initTimer(&neonTime, "NEON Time"); /* Allocate matrices */ mat1 = malloc(matrix_size * matrix_size * sizeof(int16_t)); mat2 = malloc(matrix_size * matrix_size * sizeof(int16_t)); if (mat1 == NULL || mat2 == NULL) { printf("Out of memory\n"); } /* Initialize matrices */ for (i = 0; i < matrix_size; i++) { for (j = 0; j < matrix_size; j++) { mat1[i*matrix_size + j] = i+j*2; } } for(i = 0; i < matrix_size; i++) { for (j = 0; j < matrix_size; j++) { mat2[i*matrix_size + j] = i+j*3; } } /* Run the multiplication */ startTimer(&neonTime); matMult(mat1,mat2,prod); stopTimer(&neonTime); printTimer(&neonTime); /* for (i = 0;i < matrix_size; i++) { printf("\n"); for (j = 0; j < matrix_size; j++) { printf("\t%d ", prod[i][j]); } } printf("\nDone !!! \n"); } */ return 0; }
int main (int argc, char *argv[]) { int n1, n2, n3, n4; int numberOfPermutations; int i, j, k, l, p, ii, jj, m; char *a; int *iordre; int *corder; double *b; double *c; double *CAprime; double ssqeigvB; double ssqeigvC; double tracetot; double temp, prob; double *traceper; double dACC; FILE *f, *outf; long idum; int iGE, iGEF1, iGEF2, dummyINT; char fileNameA[1024], fileNameB[1024], fileNameC[1024], outFileName[1024]; double F1, F2, trace0, prob1, prob2, F1per, F2per; double time; MPI_Status msgStatus; int jobMsg[3]; double resultMsg[7]; int jobID; int nonZero; parameters *params; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &processID); MPI_Comm_size(MPI_COMM_WORLD, &numOfWorkers); params = (parameters *)malloc(sizeof(parameters)); nonZero = 0; get_args(argc, argv, params, processID); numberOfPermutations = params->permutations; n1 = params->n1; n2 = params->n2; n3 = params->n3; n4 = params->n4; strcpy(fileNameA, params->fileNameA); strcpy(fileNameB, params->fileNameB); strcpy(fileNameC, params->fileNameC); strcpy(outFileName, params->outFileName); if(processID == 0) { time = gettime(); outf = fopen(outFileName, "w"); fprintf(outf, "Permutations: %d N1 %d N2 %d, N3 %d N4 %d\n", numberOfPermutations - 1, n1, n2, n3, n4); } traceper = (double *)malloc(sizeof(double) * numberOfPermutations); iordre = (int *) malloc (sizeof(int) * n2); corder = (int *) malloc (sizeof(int) * n2); a = (char *)malloc(sizeof(char) * n1 * n2); b = (double *)malloc(sizeof(double) * n4 * n1); CAprime = (double *)malloc(sizeof(double) * n3 * n1); c = (double *)malloc(sizeof(double) * n3 * n2); /******** READ DATA ***********************************************/ f = fopen(fileNameA, "r"); for(i = 0; i < n1; i++) for(j = 0; j < n2; j++) { int d, v; v = fscanf(f, "%d", &d); if(v == 0) { printf("Format Conversion Error while reading Matrix A(%s) at position A[%d][%d]\n", fileNameA, i, j); exit(-1); } if(v == EOF) { printf("End of File reached while reading Matrix A(%s) at position A[%d][%d]\n", fileNameA, i, j); exit(-1); } a[i * n2 + j] = (char)d; if(a[i * n2 + j] != 0) nonZero++; } fclose(f); f = fopen(fileNameB, "r"); for(i = 0; i < n1; i++) for(j = 0; j < n4; j++) { int v; #ifdef ROWS v = fscanf(f, "%lf",&b[j * n1 + i]); #else v = fscanf(f, "%lf",&b[i * n4 + j]); #endif if(v == 0) { printf("Format Conversion Error while reading Matrix B(%s) at position B[%d][%d]\n", fileNameB, i, j); exit(-1); } if(v == EOF) { printf("End of File reached while reading Matrix B(%s) at position B[%d][%d]\n", fileNameB, i, j); exit(-1); } } fclose(f); f = fopen(fileNameC, "r"); for(i = 0; i < n3; i++) for(j = 0; j < n2; j++) { int v; v = fscanf(f, "%lf",&c[i * n2 + j]); if(v == 0) { printf("Format Conversion Error while reading Matrix C(%s) at position C[%d][%d]\n", fileNameC, i, j); exit(-1); } if(v == EOF) { printf("End of File reached while reading Matrix C(%s) at position C[%d][%d]\n", fileNameC, i, j); exit(-1); } } fclose(f); ssqeigvB = 0.0; #ifdef ROWS for(i = 0; i < n4; i++) { temp = 0.0; for(j = 0; j < n1; j++) temp += b[i * n1 + j] * b[i * n1 + j]; ssqeigvB += temp * temp; } #else for(i = 0; i < n4; i++) { temp = 0.0; for(j = 0; j < n1; j++) temp += b[j * n4 + i] * b[j * n4 + i]; ssqeigvB += temp * temp; } #endif ssqeigvC = 0.0; for(i = 0; i < n3; i++) { temp = 0.0; for(j = 0; j < n2; j++) temp += c[i * n2 + j] * c[i * n2 + j]; ssqeigvC += temp * temp; } if(processID == 0) { fprintf(outf, "Sum of squared PCoA eigenvalues of B = %1.5f\n\n", ssqeigvB); fprintf(outf, "Sum of squared PCoA eigenvalues of C = %1.5f\n\n", ssqeigvC); } if(ssqeigvC > ssqeigvB) tracetot = ssqeigvC; else tracetot = ssqeigvB; if(processID == 0) fprintf(outf, "TraceTot = %1.5f\n\n", tracetot); { FILE *t; int readCount; if(processID == 0) printf("READING trace file %s\n", params->externalTraceFileName); t = fopen(params->externalTraceFileName, "r"); readCount = fread(((void *)traceper), sizeof(double), numberOfPermutations, t); fclose(t); if(readCount < numberOfPermutations) { printf("Error, external tracefile %s contains only %d entries but %d are required\n", params->externalTraceFileName, readCount, numberOfPermutations); exit(-1); } iGE = 1; for(p = 1; p < numberOfPermutations; p++) { if(traceper[p] >= traceper[0]) iGE++; } prob = (double)(iGE) / (double)(numberOfPermutations); if(processID == 0) { fprintf(outf, " Global test of cospeciation: ParaFitGlobal = %1.5f Prob = %1.5f\n\n", traceper[0], prob); fprintf(outf, " Test of individual host-parasite links:\n\n"); fprintf(outf, " F1 = ParaFitLink1 F2 = ParaFitLink2\n\n\n"); printf("Global test of cospeciation: ParaFitGlobal = %1.5f Prob = %1.5f\n\n", traceper[0], prob); } } if(processID == 0) { int count; jobQueue *jobs; int jobsSent, jobsReceived; resultVector *results; char *received; int resultCounter = 0; int lastFlush = 0; jobs = (jobQueue *)malloc(nonZero * sizeof(jobQueue)); results = (resultVector *)malloc(nonZero * sizeof(resultVector)); received = (char *)malloc(sizeof(char) * nonZero); count = 0; for(i = 0; i < n1; i++) for(j = 0; j < n2; j++) { if(a[i * n2 + j] != 0) { jobs[count].i = i; jobs[count].j = j; count++; } } for(i = 0; i < nonZero; i++) received[i] = 0; jobsReceived = nonZero; jobsSent = 0; while(jobsReceived > 0) { MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &msgStatus); switch(msgStatus.MPI_TAG) { case JOB_REQUEST: MPI_Recv(&dummyINT, 1, MPI_INT, msgStatus.MPI_SOURCE, JOB_REQUEST, MPI_COMM_WORLD, &msgStatus); if(jobsSent < nonZero) { jobMsg[0] = jobsSent; jobMsg[1] = jobs[jobsSent].i; jobMsg[2] = jobs[jobsSent].j; MPI_Send(jobMsg, 3, MPI_INT, msgStatus.MPI_SOURCE, COMPUTE, MPI_COMM_WORLD); jobsSent++; } break; case RESULT: MPI_Recv(resultMsg, 7, MPI_DOUBLE, msgStatus.MPI_SOURCE, RESULT, MPI_COMM_WORLD, &msgStatus); jobsReceived--; jobID = (int)resultMsg[0]; results[jobID].ii = (int)resultMsg[1]; results[jobID].jj = (int)resultMsg[2]; results[jobID].F1 = resultMsg[3]; results[jobID].prob1 = resultMsg[4]; results[jobID].F2 = resultMsg[5]; results[jobID].prob2 = resultMsg[6]; received[jobID] = 1; resultCounter++; if((resultCounter % (2 * numOfWorkers)) == 0) { while(lastFlush < nonZero && received[lastFlush] == 1) { printf("Parasite %d Host %d F1 = %1.5f Prob1 = %1.5f F2 = %1.5f Prob2 = %1.5f\n", results[lastFlush].ii + 1, results[lastFlush].jj + 1, results[lastFlush].F1, results[lastFlush].prob1, results[lastFlush].F2, results[lastFlush].prob2); fprintf(outf, "Parasite %d Host %d F1 = %1.5f Prob1 = %1.5f F2 = %1.5f Prob2 = %1.5f\n", results[lastFlush].ii + 1, results[lastFlush].jj + 1, results[lastFlush].F1, results[lastFlush].prob1, results[lastFlush].F2, results[lastFlush].prob2); lastFlush++; } } if(jobsSent < nonZero) { jobMsg[0] = jobsSent; jobMsg[1] = jobs[jobsSent].i; jobMsg[2] = jobs[jobsSent].j; MPI_Send(jobMsg, 3, MPI_INT, msgStatus.MPI_SOURCE, COMPUTE, MPI_COMM_WORLD); jobsSent++; } break; } } while(lastFlush < nonZero && received[lastFlush] == 1) { printf("Parasite %d Host %d F1 = %1.5f Prob1 = %1.5f F2 = %1.5f Prob2 = %1.5f\n", results[lastFlush].ii + 1, results[lastFlush].jj + 1, results[lastFlush].F1, results[lastFlush].prob1, results[lastFlush].F2, results[lastFlush].prob2); fprintf(outf, "Parasite %d Host %d F1 = %1.5f Prob1 = %1.5f F2 = %1.5f Prob2 = %1.5f\n", results[lastFlush].ii + 1, results[lastFlush].jj + 1, results[lastFlush].F1, results[lastFlush].prob1, results[lastFlush].F2, results[lastFlush].prob2); lastFlush++; } printf("There are %d host-parasite links in matrix A\n", nonZero); fprintf(outf, "There are %d host-parasite links in matrix A\n", nonZero); for(i = 1; i < numOfWorkers; i++) { MPI_Send(&dummyINT, 1, MPI_INT, i, FINALIZE, MPI_COMM_WORLD); } fclose(outf); printf("TIME %f\n", gettime() - time); goto FINISH; } else { MPI_Send(&dummyINT, 1, MPI_INT, 0, JOB_REQUEST, MPI_COMM_WORLD); while(1) { MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &msgStatus); switch(msgStatus.MPI_TAG) { case COMPUTE: MPI_Recv(jobMsg, 3, MPI_INT, 0, COMPUTE, MPI_COMM_WORLD, &msgStatus); jobID = jobMsg[0]; ii = jobMsg[1]; jj = jobMsg[2]; a[ii * n2 + jj] = 0; makeCAprime(n3, n1, n2, a, c, CAprime, corder); dACC = matMult(n3, n4, n1, CAprime, b); F1 = (traceper[0] - dACC); F2 = (traceper[0] - dACC)/(tracetot - traceper[0]); for(i = 0; i < n2; i++) iordre[i] = i; idum = -1; for(i = 0; i < NTURN; i++) ran2(&idum); iGEF1 = 1; iGEF2 = 1; for(p = 1; p < numberOfPermutations; p++) { permuteCAprime(n3, n1, n2, a, c, CAprime, &idum, iordre, corder); dACC = matMult(n3, n4, n1, CAprime, b); F1per = traceper[p] - dACC; F2per = (traceper[p] - dACC) / (tracetot - traceper[p]); if(F1per >= F1) iGEF1++; if(F2per >= F2) iGEF2++; } prob1 = (double)(iGEF1) / (double)(numberOfPermutations); prob2 = (double)(iGEF2) / (double)(numberOfPermutations); a[ii * n2 + jj] = 1; resultMsg[0] = (double)jobID; resultMsg[1] = (double)ii; resultMsg[2] = (double)jj; resultMsg[3] = F1; resultMsg[4] = prob1; resultMsg[5] = F2; resultMsg[6] = prob2; MPI_Send(resultMsg, 7, MPI_DOUBLE, 0, RESULT, MPI_COMM_WORLD); break; case FINALIZE: MPI_Recv(&dummyINT, 1, MPI_INT, 0, FINALIZE, MPI_COMM_WORLD, &msgStatus); goto FINISH; } } } FINISH: MPI_Finalize(); }