static void numberCompressions_generic( uint32_t nInterfaces, uint32_t (*bitsUsedForLabel)(const uint64_t label), uint32_t (*bitsUsedForNumber)(const uint32_t number), uint64_t (*getCompressed)(const uint32_t number, const uint32_t bitsUsed), uint32_t (*getDecompressed)(const uint64_t label, const uint32_t bitsUsed)) { uint8_t bitWidths[64] = { 0 }; for (uint32_t i = 0; i < nInterfaces; ++i) { bitWidths[bitsUsedForNumber(i)] = 1; } for (uint32_t bits = 0; bits < 64; ++bits) { if (!bitWidths[bits]) { continue; } for (uint32_t i = 0; i < nInterfaces; ++i) { /* only check for greater-or-equal bit widths */ if (bits < bitsUsedForNumber(i)) { continue; } uint64_t label = getCompressed(i, bits); if (1 == i) { Assert_always(1 == label); continue; } Assert_always(bits == bitsUsedForLabel(label)); Assert_always(i == getDecompressed(label, bits)); } } for (uint64_t label = 0; label < 0x10000u; ++label) { uint32_t bits = bitsUsedForLabel(label); Assert_always(1 == bitWidths[bits]); if (1 == (label & 0xf)) { Assert_always(4 == bits); Assert_always(1 == getDecompressed(label, 4)); } else { uint32_t i = getDecompressed(label, bits); Assert_always(i < nInterfaces); } } }
static void numberCompressions_generic( uint32_t nInterfaces, uint32_t (*bitsUsedForLabel)(const uint64_t label), uint32_t (*bitsUsedForNumber)(const uint32_t number), uint64_t (*getCompressed)(const uint32_t number, const uint32_t bitsUsed), uint32_t (*getDecompressed)(const uint64_t label, const uint32_t bitsUsed), struct EncodingScheme* (* defineScheme)(struct Allocator* alloc) ) { uint8_t bitWidths[64] = { 0 }; for (uint32_t i = 0; i < nInterfaces; ++i) { bitWidths[bitsUsedForNumber(i)] = 1; } for (uint32_t bits = 0; bits < 64; ++bits) { if (!bitWidths[bits]) { continue; } for (uint32_t i = 0; i < nInterfaces; ++i) { /* only check for greater-or-equal bit widths */ if (bits < bitsUsedForNumber(i)) { continue; } uint64_t label = getCompressed(i, bits); if (1 == i) { Assert_true(1 == label); continue; } Assert_true(bits == bitsUsedForLabel(label)); Assert_true(i == getDecompressed(label, bits)); } } for (uint64_t label = 0; label < 0x10000u; ++label) { uint32_t bits = bitsUsedForLabel(label); Assert_true(1 == bitWidths[bits]); if (1 == (label & Bits_maxBits64(bits))) { //Assert_true(4 == bits); Assert_true(1 == getDecompressed(label, bits)); } else { uint32_t i = getDecompressed(label, bits); Assert_true(i < nInterfaces); } } struct Allocator* alloc = MallocAllocator_new(20000); struct EncodingScheme* scheme = defineScheme(alloc); for (uint32_t i = 0; i < nInterfaces; i++) { for (int j = 0; j < scheme->count; j++) { int bits = scheme->forms[j].prefixLen + scheme->forms[j].bitCount; if ((int)bitsUsedForNumber(i) > bits) { continue; } uint64_t label = getCompressed(i, bits); for (int k = j; k < scheme->count; k++) { uint64_t labelB = EncodingScheme_convertLabel(scheme, label, k); if (1 == i && k != 0) { Assert_true(1 == label); Assert_true(EncodingScheme_convertLabel_INVALID == labelB); continue; } int bitsB = bitsUsedForLabel(labelB); Assert_true(bitsB == scheme->forms[k].prefixLen + scheme->forms[k].bitCount || (i == 1 && bitsB == 4)); Assert_true(i == getDecompressed(labelB, bitsB)); uint64_t labelC = EncodingScheme_convertLabel(scheme, labelB, j); Assert_true(labelC == label); } } } Allocator_free(alloc); }
int main() { int nRow, nCol, nnZero; double** matrixA; int *aijRow, *aijCol; int *crsRow, *crsCol; int nCrsRow = 0; double *value; int i, j, k; double *vec, *b, *b_star; srand(time(NULL)); // float real_time, proc_time, mflops; //long long flpins; // Open the file FILE* pf; //pf = fopen("matrix.output", "r"); pf = fopen("matrix.test", "r"); if (pf == NULL) { fprintf(stderr, "Can't open input file!\n"); return EXIT_FAILURE; } // Read in the matrix in AIJ Format fscanf(pf, "%d %d %d", &nRow, &nCol, &nnZero); printf("The matrix is in %d(Row) * %d(Col) size with %d non-zero elements!\n", nRow, nCol, nnZero); // Allocate memory for AIJ format aijRow = (int*)malloc(sizeof(int)*nnZero); aijCol = (int*)malloc(sizeof(int)*nnZero); value = (double*)malloc(sizeof(double)*nnZero); // Read in the AIJ format matrix for(i = 0; i < nnZero; ++i){ // Note: The index stored is not C style! fscanf(pf, "%d %d %lf", &aijRow[i], &aijCol[i], &value[i]); } fclose(pf); // It is impossible to get the length of a dynamic allocated array using sizeof!! printf("The matrix is has %d(Row), %d(Col), %d(non-zero) in AIJ!\n", nnZero, nnZero, nnZero); // Allocate memory for CRS format nCrsRow = getCompressed(nnZero, aijRow); crsRow = (int*)malloc(sizeof(int)*nCrsRow); crsCol = (int*)malloc(sizeof(int)*nnZero); printf("The matrix is has %d(Row), %d(Col), %d(non-zero) in CRS!\n", nCrsRow, nnZero, nnZero); getValue(crsRow, aijRow, nnZero); memcpy(crsCol, aijCol, nnZero*sizeof(int)); printf("\ncrsRow is: "); for(i = 0; i < nCrsRow; ++i) printf("%d\t", crsRow[i]); printf("\ncrsCol is: "); for(i = 0; i < nnZero; ++i) printf("%d\t", crsCol[i]); // Write the CRS format to a file pf = fopen("matrix.output.crs", "w"); if (pf == NULL) { fprintf(stderr, "Can't open output file!\n"); return EXIT_FAILURE; } k = 0; fprintf(pf, "%d %d %d\n", nCrsRow, nnZero, nnZero); for(i = 0; i < nCrsRow; ++i) { fprintf(pf, "%d\t", crsRow[i]); for(j = crsRow[i]; j < crsRow[i+1]; ++j) { fprintf(pf, "\t\t%d\t%lf\n", crsCol[k], value[k]); k++; } } // Perform matrix-vector product // Ax = b, A(nRow*nCol), x(nCol*1), b(nRow*1) vec = (double*)malloc(sizeof(double)*nCol); b = (double*)malloc(sizeof(double)*nRow); for(i = 0; i < nCol; ++i) vec[i] = rand()%100; for(i = 0; i < nRow; ++i) b[i] = 0.0; // Report Mflops/s rate using PAPI /* Setup PAPI library and begin collecting data from the counters */ //PAPI_flops(&real_time, &proc_time, &flpins, &mflops); k = 0; for(i = 0; i < nCrsRow; ++i) for(j = crsRow[i]; j < crsRow[i+1]; ++j) { b[i] += value[k] * vec[crsCol[j-1]-1]; k++; } //PAPI_flops(&real_time, &proc_time, &flpins, &mflops); //printf("Real_time:\t%f\nProc_time:\t%f\nTotal flpins:\t%lld\nMFLOPS:\t\t%f\n", real_time, proc_time, flpins, mflops); //PAPI_shutdown(); printf("vec:\n"); for (i = 0; i < nCol; ++i) printf("%lf\t", vec[i]); printf("\nb:\n"); for (i = 0; i < nRow; ++i) printf("%lf\t", b[i]); // Verify the correctness of the code // Construct the original matrix matrixA = (double**)malloc(sizeof(double)*nRow); for(i = 0; i < nRow; ++i) matrixA[i] = (double*)malloc(sizeof(double)*nCol); for(i = 0; i < nRow; ++i) for(j = 0; j < nCol; ++j) matrixA[i][j] = 0.0; for(i = 0; i < nnZero; ++i) matrixA[aijRow[i]-1][aijCol[i]-1] = value[i]; for(i = 0; i < nRow; ++i) { for(j = 0; j < nCol; ++j) printf("%lf\t", matrixA[i][j]); printf("\n"); } b_star = (double*)malloc(sizeof(double)*nCol); for(i = 0; i < nRow; ++i) b_star[i] = 0.0; for(i = 0; i < nRow; ++i) for (j = 0; j < nCol; ++j) b_star[i] += matrixA[i][j] * vec[j]; printf("\nb_star:\n"); for (i = 0; i < nRow; ++i) printf("%lf\t", b_star[i]); if(norm(b, b_star, nCol) < 1e-16) { printf("The program of vec-mat product is right!\n"); } else { printf("The error is too large and valued as \n!%lf", norm(b, b_star, nCol)); } return EXIT_SUCCESS; }