int blockMatrixMultiply(double* a, double* b, double* out, int n, int m){ const int s = n/m; const int r = n - s*m; const int matrix_size = n*n; const int block_size = m*m; const int string_size = m*n; const int small_block_size = r*m; const int smallest_block_size = r*r; int i, j, k; int res = zeroMatrix(out, matrix_size); if (res!=0){ printf("zeroMatrix failed!\n\t--blockmatrixmultiply\n"); } double* const temp_block = new double[block_size]; if (r>0){ for (i=0; i<s; i++){ for (j=0; j<s; j++){ for (k=0; k<s; k++){ res = simpleMatrixMultiply(a+i*string_size+k*block_size, b+k*string_size+j*block_size, temp_block, m, m, m); res = addToMatrix(out+i*string_size+j*block_size, temp_block, block_size); } res = simpleMatrixMultiply(a+i*string_size+s*block_size, b+s*string_size+j*small_block_size, temp_block, m, r, m); res = addToMatrix(out+i*string_size+j*block_size, temp_block, block_size); } for (k=0; k<s; k++){ res = simpleMatrixMultiply(a+i*string_size+k*block_size, b+k*string_size+s*block_size, temp_block, m, m, r); res = addToMatrix(out+i*string_size+s*block_size, temp_block, small_block_size); } res = simpleMatrixMultiply(a+i*string_size+s*block_size, b+s*string_size+s*small_block_size, temp_block, m, r, r); res = addToMatrix(out+i*string_size+s*block_size, temp_block, small_block_size); } for (j=0; j<s; j++){ for (k=0; k<s; k++){ res = simpleMatrixMultiply(a+s*string_size+k*small_block_size, b+k*string_size+j*block_size, temp_block, r, m, m); res = addToMatrix(out+s*string_size+j*small_block_size, temp_block, small_block_size); } res = simpleMatrixMultiply(a+s*string_size+s*small_block_size, b+s*string_size+j*small_block_size, temp_block, r, r, m); res = addToMatrix(out+s*string_size+j*small_block_size, temp_block, small_block_size); } for (k=0; k<s; k++){ res = simpleMatrixMultiply(a+s*string_size+k*small_block_size, b+k*string_size+s*block_size, temp_block, r, m, r); res = addToMatrix(out+s*string_size+s*small_block_size, temp_block, smallest_block_size); } res = simpleMatrixMultiply(a+s*string_size+s*small_block_size, b+s*string_size+s*small_block_size, temp_block, r, r, r); res = addToMatrix(out+s*string_size+s*small_block_size, temp_block, smallest_block_size); } else{ for (i=0; i<s; i++){ for (j=0; j<s; j++){ for (k=0; k<s; k++){ res = simpleMatrixMultiply(a+i*string_size+k*block_size, b+k*string_size+j*block_size, temp_block, m, m, m); res = addToMatrix(out+i*string_size+j*block_size, temp_block, block_size); } } } } delete[] temp_block; return 0; }
void setMatrix(PETScMatrix& m, std::initializer_list<double> values) { m.setZero(); addToMatrix(m, values); }