/* Basic sparse inverse, aka Takahashi inverse. */ SparseMatrix* sparse_inverse_a(const SparseMatrix *L, SparseMatrix *P, int *w) { int i, j, k, ks, kL, kP; int delw = 0; double dinv; if (P == NULL) P = allocate_symmetric(L, NULL); if (w == NULL) { delw = 1; w = (int*) malloc(P->N * sizeof(int)); } validate_matrices(L, P); /* Initialise workspace to index diagonal element of each column */ for (i = 0; i < P->N; ++i) { k = P->j[i]; /* get column-start index from j */ while (P->i[k] != i) /* find diagonal element */ ++k; w[i] = k; } /* Compute sparse P */ j = L->N; /* column index, initialised at one-past-end */ for (k = L->nz-1; k >= 0; --k, --ks) { /* compute entries in reverse order */ /* If element (k) is at bottom of column (j-1) in L */ if (k == L->j[j]-1) { ks = P->j[j]-1; /* get index (ks) of bottom of column (j-1) in P */ dinv = 1.0 / L->x[L->j[--j]]; /* shift to col (--j), and compute dinv */ assert(j == L->i[L->j[j]]); /* check entry L->j[j] is the diagonal */ } /* If current element (k) is on diagonal, initialise with dinv, otherwise 0. */ if (k == L->j[j]) P->x[ks] = dinv; else P->x[ks] = 0; /* Compute summation part. */ i = L->i[k]; /* get row index (i) of element (k) */ kL = L->j[j+1] - 1; /* get bottom row of column (j) in L */ kP = P->j[i+1] - 1; /* get bottom row of column (i) in P */ while (L->i[kL] > j && kP > P->j[i]) { /* iterate upwards */ if (L->i[kL] == P->i[kP]) P->x[ks] -= L->x[kL--] * P->x[kP--]; else if (L->i[kL] < P->i[kP]) --kP; else --kL; } /* Multiply by dinv. */ P->x[ks] *= dinv; /* Store symmetric entry. */ assert(w[i] >= P->j[i]); /* index must remain inside column (i) of P */ assert(P->i[w[i]] == j); /* symmetric row index must equal column index (j) */ P->x[w[i]--] = P->x[ks]; } if (delw) free(w); return P; }
int main(int argc, char **argv) { struct timeval start, stop; unsigned long long t, asm_tot = 0, asm_inv_tot = 0, c_tot = 0, c_inv_tot = 0, c_2_tot = 0; long *A = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *B = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *R1 = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *R2 = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); fill_matrix(MATRIX_SIZE, A); fill_matrix(MATRIX_SIZE, B); int i; for (i = 1; i <= NUM_RUNS; ++i) { //printf("Run %d, %s\n", i, RUN_INFO); printf("Run %d\n", i); // Assembly gettimeofday(&start, NULL); asm_multiply(A, B, R1); gettimeofday(&stop, NULL); t = ((stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec) - start.tv_usec; asm_tot += t; printf("Asm-func: %llu Microseconds\n", t); // C gettimeofday(&start, NULL); c_multiply(A, B, R2); gettimeofday(&stop, NULL); t = ((stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec) - start.tv_usec; c_tot += t; printf("C-func: %llu Microseconds\n", t); } printf("\n"); printf( "Asm-func average: %llu\n", asm_tot / NUM_RUNS); printf( "C-func average: %llu\n", c_tot / NUM_RUNS); validate_matrices(R1, R2); free(A); free(B); free(R1); free(R2); return 0; }
/* Allocate a sparse matrix with non-zero pattern equal to L+L', but without initialising any values. */ SparseMatrix* allocate_symmetric(const SparseMatrix *L, SparseMatrix *P) { int i, k, sum; int *jL, *jP; if (P == NULL) P = create_sparse_matrix(L->N, NZ_SYM(L->nz, L->N)); validate_matrices(L, P); /* Initialise P->j with zeros */ for (i = 0; i < P->N+1; ++i) P->j[i] = 0; /* Count number of non-zeros per column, store in P->j */ jL = L->j; jP = P->j + 1; /* offset by 1 to allow in-place calculations later */ for (i = 0, k = 0; i < L->nz; ++i) { if (i >= jL[k+1]) ++k; /* if (i) in new column, shift (k) to next col */ ++jP[k]; /* increment count for current column */ if (k != L->i[i]) /* if non-diagonal term */ ++jP[L->i[i]]; /* increment count for symmetric column */ } /* Cumulative sum on jP, starting from 0 */ for (i = 0, sum = 0; i < P->N; ++i) { int tmp = jP[i]; jP[i] = sum; sum += tmp; } /* Compute P->i, using jP for cunning in-place calculation of P->j */ for (i = 0; i < L->N; ++i) /* for each column (i) in L */ for (k = jL[i]; k < jL[i+1]; ++k) { int j = L->i[k]; /* get k-th row index (j) */ P->i[jP[i]++] = j; /* store row index (j) into col (i) in P */ if (i != j) /* if not a diagonal term, make symmetry */ P->i[jP[j]++] = i; /* store row index (i) into col (j) in P */ } return P; }
int main(int argc, char **argv) { if(DEBUG_ASM) { long C[] = {2, 1,2,3,4}; long D[] = {2, 5,6,7,8}; long R3[5]; asm_inv_multiply(C, D, R3); printmatrix(R3); } else { struct timeval start, stop; unsigned long long t, asm_tot = 0, asm_inv_tot = 0, c_tot = 0, c_inv_tot = 0, c_2_tot = 0; FILE *f = fopen("testr.txt", "a"); long *A = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *B = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *R1 = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *R2 = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *R3 = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *R4 = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); long *R5 = (long *) malloc((MATRIX_SIZE * MATRIX_SIZE + 1) * sizeof(long)); fill_matrix(MATRIX_SIZE, A); fill_matrix(MATRIX_SIZE, B); int i; for (i = 1; i <= NUM_RUNS; ++i) { fprintf(f,"Run %d, %s\n", i, RUN_INFO); printf("Run %d, %s\n", i, RUN_INFO); // Assembly gettimeofday(&start, NULL); asm_multiply(A, B, R1); gettimeofday(&stop, NULL); t = ((stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec) - start.tv_usec; asm_tot += t; fprintf(f,"Asm-func: %llu Microseconds\n", t); printf("Asm-func: %llu Microseconds\n", t); // Inverted assembly gettimeofday(&start, NULL); asm_inv_multiply(A, B, R2); gettimeofday(&stop, NULL); t = ((stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec) - start.tv_usec; asm_inv_tot += t; fprintf(f,"Asm-inv-func: %llu Microseconds\n", t); printf("Asm-inv-func: %llu Microseconds\n", t); // C gettimeofday(&start, NULL); c_multiply(A, B, R3); gettimeofday(&stop, NULL); t = ((stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec) - start.tv_usec; c_tot += t; fprintf(f,"C-func: %llu Microseconds\n", t); printf("C-func: %llu Microseconds\n", t); // Inverted C gettimeofday(&start, NULL); inv_c_multiply(A, B, R4); gettimeofday(&stop, NULL); t = ((stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec) - start.tv_usec; c_inv_tot += t; fprintf(f,"inv C-func: %llu Microseconds\n\n", t); printf("inv C-func: %llu Microseconds\n\n", t); // C 2 gettimeofday(&start, NULL); c_multiply_2(A, B, R5); gettimeofday(&stop, NULL); t = ((stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec) - start.tv_usec; c_2_tot += t; fprintf(f,"C2-func: %llu Microseconds\n\n", t); printf("C2-func: %llu Microseconds\n\n", t); } fprintf(f,"Average on %s\n", RUN_INFO); fprintf(f, "Asm-func average: %llu\n", asm_tot / NUM_RUNS); fprintf(f, "Asm-inv-func average: %llu\n", asm_inv_tot / NUM_RUNS); fprintf(f, "C-func average: %llu\n", c_tot / NUM_RUNS); fprintf(f, "C-inv-func average: %llu\n", c_inv_tot / NUM_RUNS); fprintf(f, "C2-func average: %llu\n\n________\n", c_2_tot / NUM_RUNS); fclose(f); validate_matrices(R1, R2, R3, R4); free(A); free(B); free(R1); free(R2); free(R3); free(R4); } return 0; }
/* Using pointers rather than indexing, but still computing P in column order */ SparseMatrix* sparse_inverse_b(const SparseMatrix *L, SparseMatrix *P, int *w) { int i, j, kL, kP; int freew = 0; double dinv; const double *pLt, *pLb, *pPt, *pPb; /* Allocate memory, if necessary */ if (P == NULL) P = allocate_symmetric(L, NULL); validate_matrices(L, P); if (w == NULL) { freew = 1; w = (int*) malloc(P->N * sizeof(int)); } /* Initialise workspace to index diagonal element for each column */ for (j = 0; j < P->N; ++j) { kP = P->j[j]; /* get start index for col (j) in P */ while (P->i[kP] != j) /* find diagonal element in col (j) */ ++kP; w[j] = kP; } /* Compute sparse P */ j = L->N; /* column index, initialised at one-past-end */ for (kL = L->nz-1; kL >= 0; --kL, --kP) { /* compute entries in reverse order */ /* Check if element (k) starts new column, ie, k indexes bottom of column (j-1) in L */ if (kL == L->j[j]-1) { kP = P->j[j]-1; /* get index of bottom of column (j-1) in P */ --j; /* shift one column left (ie, j = j-1) */ /* Get pointers for */ kPj = P->j[j]-1; /* get index of bottom of column (j-1) in P */ --j; /* shift one column left (ie, j = j-1) */ /* Cache indices for top and one-past-bottom of column (j) in L */ kLb = k+1; kLt = L->j[j]; /* Compute diagonal term for column j */ dinv = 1.0 / L->x[kLt]; } /* Column summation part */ i = L->i[k]; /* get row index (i) of element (k) */ kPt = P->j[i]+1; /* get next-after-diagonal in column (i) in P */ kPb = P->j[i+1]; /* get one-past-bottom of column (i) in P */ for (kL = kLt+1; kL < kLb; ++kL, ++kPt) { } /* Multiply by dinv. */ P->x[kPj] *= dinv; /* Store symmetric entry in P */ P->x[w[i]--] = P->x[kPj]; /* We shift w[i] up by one, ready for next symmetric entry */ } /* Work complete, free workspace if it was created locally */ if (freew) free(w); return P; }