int compare(matrixMultAlg alg1, matrixMultAlg alg2, const int *A, const int *B, const int size) { int *C1, *C2; C1=(int*)malloc(sizeof(int)*size); C2=(int*)malloc(sizeof(int)*size); alg1(A,B,C1,size); alg2(A,B,C2,size); int result=matrixEqual(C1,C2,size); free(C1); free(C2); return result; }
void Renderer::makeSingleRenderCommandList(std::vector<RenderCommand*> commands) { int j = 0; // dont use with push_back_resize: some weird realloc error occurs //_renderCommands->reserveElements(commands.size()); for (auto i = commands.cbegin(); i < commands.cend(); i++, j++) { auto type = (*i)->getType(); if (type == RenderCommand::Type::ARBITRARY_VERTEX_COMMAND) { ArbitraryVertexCommand* avc = (ArbitraryVertexCommand*)(*i); bool newCommand = _lastWasFlushCommand; Material2D* currMaterial = avc->_material2d; bool transformOnCpu = avc->_transformOnCpu; ArbitraryVertexCommand::Data data = avc->_data; Mat4 modelView = avc->_mv; ssize_t vertexDataSize = avc->getVertexDataSize(); _lastWasFlushCommand = false; // process batching // check if buffer limit is exceeded if (_currentVertexBufferOffset + vertexDataSize > ARBITRARY_VBO_SIZE || _currentIndexBufferOffset + data.indexCount > ARBITRARY_INDEX_VBO_SIZE) { CCASSERT(false, "Exceeding the index or vertex buffer size"); } if (_firstAVC) { _vertexBatches->push_back_resize(VertexBatch()); _currentVertexBatch->material = currMaterial; _currentVertexBatch->indexed = avc->_isIndexed; _lastMaterial_skipBatching = currMaterial->_skipBatching && currMaterial->_id == MATERIAL_ID_DO_NOT_BATCH; newCommand = true; _firstAVC = false; } else { bool needsFilledVertexReset = _filledVertex + data.vertexCount > 0xFFFF; // meaning no index(short) could adress it anymore if (_isBufferSlicing) { bool vboFull = ((_currentVertexBufferOffset + vertexDataSize) - _lastVertexBufferSlicePos) > _vboByteSlice; needsFilledVertexReset |= vboFull; if (vboFull) { CCASSERT(vertexDataSize < _vboByteSlice, "commands vertex data is too big for slicing"); _lastVertexBufferSlicePos = _currentVertexBufferOffset; } } bool currMaterial_skipBatching = currMaterial->_skipBatching || currMaterial->_id == MATERIAL_ID_DO_NOT_BATCH; bool needFlushDueToDifferentMatrix = false; bool indexedStateDiffers = avc->_isIndexed != _lastCommandWasIndexed; needsFilledVertexReset |= indexedStateDiffers; // check if there need to be new batch due to different transform mode: // last command was cpu-transform and new one isnt -> new batch // last command was non-cpu-transform and new one is -> new batch // last command and new command are cpu-transformed, but dont share the same modelview -> new batch if (_lastAVC_was_NCT) { do { if (transformOnCpu) { needFlushDueToDifferentMatrix = true; break; } if (!matrixEqual(&_lastAVC_NCT_Matrix, &modelView)) { needFlushDueToDifferentMatrix = true; _lastAVC_NCT_Matrix = modelView; } } while (0); } else if (!transformOnCpu) { needFlushDueToDifferentMatrix = true; _lastAVC_NCT_Matrix = modelView; } // check if: // curr material id differs from previous? // either curr or prev materials skipped batching? // there needs to be a _filledVertex reset // the above check returned new batch if (currMaterial->_id != _currentMaterial2dId || currMaterial_skipBatching || _lastMaterial_skipBatching || needsFilledVertexReset || needFlushDueToDifferentMatrix) { // set the previous vertex batch end render command index _currentVertexBatch->endRCIndex = _currentAVCommandCount; // go to next vertex batch nextVertexBatch(); // set material and starting render command index _currentVertexBatch->material = currMaterial; _currentVertexBatch->indexed = avc->_isIndexed; _currentVertexBatch->indexBufferHandle = 0; _currentVertexBatch->vertexBufferHandle = 0; _currentVertexBatch->startingRCIndex = _currentAVCommandCount; if (needsFilledVertexReset || _lastArbitraryCommand->_material2d->_vertexStreamAttributes.id != currMaterial->_vertexStreamAttributes.id) { // if needsFilledVertexReset is set or the vertex attrib format from the previous material is different from the current use new vertex offset _filledVertex = 0; _currentVertexBatch->indexBufferOffset = _currentIndexBufferOffset; _currentVertexBatch->vertexBufferOffset = _currentVertexBufferOffset; } else { // use the offsets from the previous one _currentVertexBatch->indexBufferOffset = _previousVertexBatch->indexBufferOffset; _currentVertexBatch->vertexBufferOffset = _previousVertexBatch->vertexBufferOffset; } _previousVertexBatch->indexBufferUsageEnd = _currentVertexBatch->indexBufferUsageStart = _currentIndexBufferOffset; _previousVertexBatch->vertexBufferUsageEnd = _currentVertexBatch->vertexBufferUsageStart = _currentVertexBufferOffset; newCommand = true; } } _lastAVC_was_NCT = !transformOnCpu; _lastCommandWasIndexed = avc->_isIndexed; _currentMaterial2dId = currMaterial->_id; // data copying logic memcpy(_currentVertexBuffer, data.vertexData, vertexDataSize); if (transformOnCpu) { // treat the first 12 byte (3 floats) as a Vec3 and transform it using the modelView byte* ptr = _currentVertexBuffer; byte* endPtr = ptr + vertexDataSize; int stride = currMaterial->_vertexStreamAttributes.stride; while (ptr < endPtr) { Vec3* vec = reinterpret_cast<Vec3*>(ptr); modelView.transformPoint(vec); ptr += stride; } } if (data.indexCount != 0) { // copy index data if (_filledVertex == 0) { // special case when the vertex buffer offset is 0 memcpy(_currentIndexBuffer, data.indexData, sizeof(short) * data.indexCount); } else { GLushort* ptr = _currentIndexBuffer; GLushort* endPtr = ptr + data.indexCount; GLushort* srcPtr = (GLushort*)data.indexData; while (ptr < endPtr) { *(ptr++) = *(srcPtr++) + _filledVertex; } } } // adjust buffers and offset _currentIndexBuffer += data.indexCount; _currentVertexBuffer += vertexDataSize; _currentVertexBufferOffset += vertexDataSize; _currentIndexBufferOffset += data.indexCount; _filledVertex += data.vertexCount; // if newCommand is set create a new avc and init it if (newCommand) { ArbitraryVertexCommand* avc = _avcPool1->pop(); // the data value doesnt really matters here avc->init(0, currMaterial, data, modelView, transformOnCpu, 0); _currentAVCommandCount++; _lastArbitraryCommand = avc; _renderCommands->push_back_resize(avc); _avcPool2->push(avc); } else { // do nothing } _lastArbitraryCommand = avc; } else { _lastWasFlushCommand = true; if (type == RenderCommand::Type::GROUP_COMMAND) { makeSingleRenderCommandList(_renderGroups[reinterpret_cast<GroupCommand*>(*i)->getRenderQueueID()]); //_renderCommands->reserveElements(commands.size() - j); continue; } _renderCommands->push_back_resize(*i); continue; } } }
int main(int argc, char * argv[]) { int rank_grid, rank_row, rank_col; int coordinates[2]; int node_total_size; int node_dim_size; int elem_dim_size; int subelem_dim_size; int * scatter_sendcount; int * scatter_displacement; int gridinit_num_dims = 2; int gridinit_dims[2] = {0,0}; int gridinit_periods[2] = {0,0}; int gridinit_reorder = 1; MPI_Comm mpi_comm_grid, mpi_comm_row, mpi_comm_col; MPI_Datatype mpi_type_submatrix, mpi_type_submatrix_vector; MPI_Request fox_send_request, fox_recv_request; int fox_sendto, fox_recfrom, fox_sendtag, fox_rectag; int fox_broadcaster; double *mat_a, *mat_b, *mat_c; double *A_mine, *B_old, *B_new, *C_mine, *A_bcast; double *mat_verify; int i, j, k; int verify = 0; int verbose = 0; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &node_total_size); double starttime, endtime; starttime = MPI_Wtime(); // Set up cartesian coordinate grid MPI_Dims_create(node_total_size, gridinit_num_dims, gridinit_dims); MPI_Cart_create(MPI_COMM_WORLD, gridinit_num_dims, gridinit_dims, gridinit_periods, gridinit_reorder, &mpi_comm_grid); // ** Get the grid coordinates of this process. MPI_Comm_rank(mpi_comm_grid, &rank_grid); MPI_Cart_coords(mpi_comm_grid, rank_grid, gridinit_num_dims, coordinates); // ** Set up column communicators. MPI_Comm_split(mpi_comm_grid, coordinates[1], coordinates[0], &mpi_comm_col); MPI_Comm_rank(mpi_comm_col, &rank_col); // ** Set up row communicators MPI_Comm_split(mpi_comm_grid, coordinates[0], coordinates[1], &mpi_comm_row); MPI_Comm_rank(mpi_comm_row, &rank_row); // Get the number of processors per dimension in grid. MPI_Comm_size(mpi_comm_row, &node_dim_size); // ******************************************** // ** CHECK SANITY OF AND SET UP ENVIRONMENT ** // ******************************************** // Check that number of parameters is sane. if(argc < 2) { if(rank_grid == 0) printf("Usage: foxmatrix N\n N = randomize NxN matrices.\n"); MPI_Finalize(); return -1; } // Get the number of elements per dimension in matrices from arguments. elem_dim_size = atoi(argv[1]); // Check that number of processors is sane. if(sqrt(node_total_size) != (double) ((int) sqrt(node_total_size))) { if(rank_grid == 0) printf("Not a square number of processors.\n"); MPI_Finalize(); return -1; } // Check that it is possible to split matrix over the processors. if(elem_dim_size % node_dim_size != 0) { if(rank_grid == 0) printf("Cannot split elements evenly over processors.\n"); MPI_Finalize(); return -1; } // Calculate the size (in one dimension) of the submatrices. subelem_dim_size = elem_dim_size / node_dim_size; // Check if the user has given the verify/verbose commands. if(argc == 3 && strcmp(argv[2], "verify") == 0) verify = 1; else if(argc == 3 && strcmp(argv[2], "verbose") == 0) verbose = 1; else if(argc == 4 && strcmp(argv[2], "verbose") == 0 && strcmp(argv[3], "verify") == 0) { verbose = 1; verify = 1; } else if(argc == 4 && strcmp(argv[2], "verify") == 0 && strcmp(argv[3], "verbose") == 0) { verbose = 1; verify = 1; } // Create datatype used for transmitting submatrices. // Idea of using vector+struct taken from http://www.mcs.anl.gov/research/projects/mpi/tutorial/mpiexmpl/src4/scatter/C/solution.html. MPI_Type_vector(subelem_dim_size, subelem_dim_size, elem_dim_size, MPI_DOUBLE, &mpi_type_submatrix_vector); int sm_blocklength[2] = {1, 1}; MPI_Aint sm_displacement[2] = {0, subelem_dim_size * sizeof(double)}; MPI_Datatype sm_types[2] = {mpi_type_submatrix_vector, MPI_UB}; MPI_Type_struct(2, sm_blocklength, sm_displacement, sm_types, &mpi_type_submatrix); MPI_Type_commit(&mpi_type_submatrix); // ** CREATE MATRICES AND SET UP SCATTERV/GATHERV VARIABLES ** if(rank_grid == 0) { // Create matrices on rank 0. mat_a = (double *) malloc(elem_dim_size * elem_dim_size * sizeof(double)); mat_b = (double *) malloc(elem_dim_size * elem_dim_size * sizeof(double)); mat_c = (double *) malloc(elem_dim_size * elem_dim_size * sizeof(double)); // Randomize matrix contents. randomMatrixInit(); randomMatrix(mat_a, elem_dim_size); randomMatrix(mat_b, elem_dim_size); // Allocate memory for storing scattering information. scatter_sendcount = (int *) malloc(node_total_size * sizeof(int)); scatter_displacement = (int *) malloc(node_total_size * sizeof(int)); // Set up scatter/gather arguments. int sit; for(sit = 0; sit < node_total_size; sit++) { scatter_sendcount[sit] = 1; if(sit == 0) scatter_displacement[sit] = 0; else { scatter_displacement[sit] = scatter_displacement[sit - 1] + 1; if(sit % node_dim_size == 0) // At end of line, go to start of next submatrix. scatter_displacement[sit] += node_dim_size * (subelem_dim_size - 1); } } } A_mine = (double *) malloc(subelem_dim_size*subelem_dim_size*sizeof(double)); A_bcast = (double *) malloc(subelem_dim_size*subelem_dim_size*sizeof(double)); B_old = (double *) malloc(subelem_dim_size*subelem_dim_size*sizeof(double)); B_new = (double *) malloc(subelem_dim_size*subelem_dim_size*sizeof(double)); C_mine = (double *) malloc(subelem_dim_size*subelem_dim_size*sizeof(double)); zeroMatrix(C_mine, subelem_dim_size); // ** DISTRIBUTE THE SUBMATRICES TO THE GRID NODES ** MPI_Scatterv(mat_a, scatter_sendcount, scatter_displacement, mpi_type_submatrix, A_mine, subelem_dim_size * subelem_dim_size, MPI_DOUBLE, 0, mpi_comm_grid); MPI_Scatterv(mat_b, scatter_sendcount, scatter_displacement, mpi_type_submatrix, B_new, subelem_dim_size * subelem_dim_size, MPI_DOUBLE, 0, mpi_comm_grid); // ** PERFORM FOX'S ALGORITHM FOR MATRIX MULTIPLICATION ** for(k = 0; k < node_dim_size; k++) { // **** BROADCAST A **** // // Decide who broadcasts this iteration. fox_broadcaster = (k + rank_col) % node_dim_size; // Copy matrix to the broadcast variable of the node that shall broadcast. if(rank_row == fox_broadcaster) copyMatrix(A_bcast, A_mine, subelem_dim_size); // Perform the broadcasting of the A matrix. MPI_Bcast(A_bcast, subelem_dim_size * subelem_dim_size, MPI_DOUBLE, fox_broadcaster, mpi_comm_row); // **** CREATE COPY OF B **** // // Wait for everyone to get their new B. If k = 0 everyone has it scattered. if(k != 0) MPI_Wait(&fox_recv_request, MPI_STATUS_IGNORE); // Make a copy of B so we can overwrite the old one. copyMatrix(B_old, B_new, subelem_dim_size); // **** SHIFT B **** // // Find which node to send to, and which to recieve from (B matrix). fox_recfrom = ((rank_col + 1) % node_dim_size); fox_sendto = ((rank_col - 1) % node_dim_size); if(fox_sendto < 0) fox_sendto = node_dim_size - 1; fox_sendtag = 1000 + fox_sendto; fox_rectag = 1000 + rank_col; // Send the B matrix. MPI_Isend(B_old, subelem_dim_size * subelem_dim_size, MPI_DOUBLE, fox_sendto, fox_sendtag, mpi_comm_col, &fox_send_request); // Receive the B matrix. MPI_Irecv(B_new, subelem_dim_size * subelem_dim_size, MPI_DOUBLE, fox_recfrom, fox_rectag, mpi_comm_col, &fox_recv_request); // Perform matrix multiplication on the local submatrix. naiveMatrixMult(A_bcast, B_old, C_mine, subelem_dim_size); } // ** GATHER DATA ** MPI_Barrier(MPI_COMM_WORLD); // Collect C from submatrices. MPI_Gatherv(C_mine, subelem_dim_size * subelem_dim_size, MPI_DOUBLE, mat_c, scatter_sendcount, scatter_displacement, mpi_type_submatrix, 0, mpi_comm_grid); // ** PRESENT DATA ** if(verbose && !rank_grid) { printf("** Will print matrix C from 0:\n"); printMatrix(mat_c, elem_dim_size); } // ** VERIFICATION OF CORRECTNESS ** if(verify && !rank_grid) { // Allocate memory for verification matrix. mat_verify = (double *) malloc(elem_dim_size * elem_dim_size * sizeof(double)); // Initialize verification matrix to zeroes. zeroMatrix(mat_verify, elem_dim_size); // Do the naive multiplication. naiveMatrixMult(mat_a, mat_b, mat_verify, elem_dim_size); // Print the correct matrix. if(verbose) { printf("** Print correct matrix from 0:\n"); printMatrix(mat_verify, elem_dim_size); } // Check equality between matrices. if(matrixEqual(mat_c, mat_verify, elem_dim_size)) printf("\n Ok!\n\n"); else printf("\n FAIL!\n\n"); // Free the memory used by the verification matrix. free(mat_verify); } // ** FINALIZE MPI ** MPI_Barrier(MPI_COMM_WORLD); if(rank_grid==0) { endtime = MPI_Wtime(); printf("%f\n", endtime - starttime); } MPI_Finalize(); // ** CLEANUP ** if(A_mine) free(A_mine); if(A_bcast) free(A_bcast); if(B_old) free(B_old); if(B_new) free(B_new); if(C_mine) free(C_mine); // Local rank 0 cleanup. if(rank_grid == 0) { free(mat_a); free(mat_b); free(mat_c); } return 0; }
void loop(){ DEBUG(("%d", api.getTime())); api.getMyZRState(me); api.getOtherZRState(other); aboveOtherPos[0] = other[0]; aboveOtherPos[1] = other[1]; if (game.getMemoryFilled() != 2 /*&& game.getEnergy() >= 3*/) { mathVecSubtract(vecBtwnSph, other, me, 3); mathVecNormalize(vecBtwnSph, 3); api.setAttitudeTarget(vecBtwnSph); } if ((game.getEnergy() > 3) && (game.getPicPoints() > 0) && canTakePic()) { game.takePic(); } if (game.getMemoryFilled() == 2) { api.setAttitudeTarget(earth); if (matrixEqual(me+6, earth) && game.getEnergy() >= 3) { game.uploadPics(); } } state = setState(); if (game.getNumMirrorsHeld() > 0) game.useMirror(); //bunch of states switch (state) { case 0://Get items DEBUG(("State 0")); if (!(sphColor)) { if (game.hasItem(8) == -1) { moveFast(mir2); } else if (game.hasItem(4) == -1) { if (game.getEnergy() > 3) moveFast(score2); else api.setPositionTarget(me); } } else { if (game.hasItem(7) == -1) { moveFast(mir1); } else if (game.hasItem(5) == -1) { if (game.getEnergy() > 3) moveFast(score3); else api.setPositionTarget(me); } } //Not worth it to get the other mirror /*else if (game.hasItem(7) == -1 && !(sphColor)) { moveFast(mir1); } else if (game.hasItem(8) && sphColor) { moveFast(mir2); }*/ //Bottom score a waste of energy? /*else if (game.hasItem(3) == -1) { if (game.getEnergy() > 3) moveFast(score1); else api.setPositionTarget(me); }*/ //Top score object /*else if (game.hasItem(6) == -1) { if (game.getEnergy() > 3) moveFast(score4); else api.setPositionTarget(me); }*/ break; case 1://Stay at top DEBUG(("State 1")); if (dist(me, other) > 0.5) { api.setPositionTarget(aboveOtherPos); } else { api.setPositionTarget(me); } break; case 2://Spam upload DEBUG(("State 2")); if (game.getMemoryFilled() != 0 && game.getEnergy() > 2) game.uploadPics(); break; /*case 3://Try to ram DEBUG(("Case 3")); moveFast(otherPos); break;*/ } }
int main(int argc, char *argv[]) { int numtasks, rank, dest, source, rc, count, tag=1; MPI_Status Stat; // Seed random number generator. randomMatrixInit(); MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &numtasks); MPI_Comm_rank(MPI_COMM_WORLD, &rank); double *A; double *B; if(argc==1) { printf("Specify matrix size\n"); return 1; } int size=atoi(argv[1]); A = (double*)malloc(sizeof(double)*size*size); B = (double*)malloc(sizeof(double)*size*size); randomMatrix(A,size); randomMatrix(B,size); // int Atest[]={1,2,3,4,5,6,7,8,9}; // int Btest[]={3,2,1,6,5,4,4,3,2}; // A=Atest; // B=Btest; if(numtasks>size*size) numtasks=size*size; int minCells=size*size/(numtasks); int extra=size*size-minCells*(numtasks); int pad=0; if (rank == 0) { dest = 1; source = 1; double *C; C = (double*)malloc(sizeof(double)*size*size); double *ret = (double*)malloc(sizeof(double)*size*size/(numtasks)+1); for(int cell=0; cell<minCells; cell++) { C[cell]=0; for(int j=0; j<size; j++) C[cell]+=A[(cell/size)*size+j]*B[j*size+cell%size]; // printf("Calculating job %d. C[%d]=%d\n", rank, cell, C[cell]); } for(int i=1; i<numtasks; i++) { int noCells=minCells; if(i>=numtasks-extra) { pad=i-numtasks+extra; noCells++; } rc = MPI_Recv(ret, noCells, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &Stat); for(int j=0; j<noCells; j++) { int cell = minCells*i+pad+j; // if(cell>size*size-1) // printf(" KUK!\n"); C[cell]=ret[j]; // printf("C[%d]=ret[%d]=%d\n",cell, j, ret[j]); } } // printMatrix(A, size); // printf("*\n"); // printMatrix(B, size); // printf("=\n"); // printMatrix(C, size); // printf("\nControl:\n"); double *D; if(argc==3 && strcmp(argv[2],"verify") == 0) { D = (double*)malloc(sizeof(double)*size*size); naiveMatrixMult(A,B,D,size); // printMatrix(D, size); if(matrixEqual(C,D,size)) printf("\n Ok!\n\n"); else printf("\n FAIL!\n\n"); free(D); } free(A); free(B); free(C); } else if (rank+1 <= size*size) { int noCells=minCells; if(rank>=numtasks-extra) { pad=rank-numtasks+extra; noCells++; } // printf("noCells: %d extra: %d rank: %d\n", noCells, extra, rank); double *ret = (double*)calloc(noCells,sizeof(double)); for(int i=0; i<noCells; i++) { ret[i] = 0; int cell=minCells*(rank)+pad+i; for(int j=0; j<size; j++) ret[i]+=A[(cell/size)*size+j]*B[j*size+cell%size]; // printf("Calculating job %d. C[%d]=%d\n", rank, cell, ret[i]); } rc = MPI_Send(ret, noCells, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD); } else { } MPI_Finalize(); return 0; }