int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks) { starpu_data_handle_t *dataAp = malloc(nblocks*nblocks*sizeof(starpu_data_handle_t)); /* monitor and partition the A matrix into blocks : * one block is now determined by 2 unsigned (i,j) */ unsigned bi, bj; for (bj = 0; bj < nblocks; bj++) for (bi = 0; bi < nblocks; bi++) { starpu_matrix_data_register(&dataAp[bi+nblocks*bj], STARPU_MAIN_RAM, (uintptr_t)matA[bi+nblocks*bj], size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); } unsigned i; for (i = 0; i < size; i++) ipiv[i] = i; struct piv_s *piv_description = malloc(nblocks*sizeof(struct piv_s)); unsigned block; for (block = 0; block < nblocks; block++) { piv_description[block].piv = ipiv; piv_description[block].first = block * (size / nblocks); piv_description[block].last = (block + 1) * (size / nblocks); } double timing; int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing); if (ret) return ret; unsigned n = starpu_matrix_get_nx(dataAp[0])*nblocks; double flop = (2.0f*n*n*n)/3.0f; PRINTF("# size\tms\tGFlops"); if (bound) PRINTF("\tTms\tTGFlops"); PRINTF("\n"); PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f); if (bound) { double min; starpu_bound_compute(&min, NULL, 0); PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f); } PRINTF("\n"); for (bj = 0; bj < nblocks; bj++) for (bi = 0; bi < nblocks; bi++) { starpu_data_unregister(dataAp[bi+nblocks*bj]); } free(dataAp); return ret; }
void STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks) { starpu_data_handle dataA; /* monitor and partition the A matrix into blocks : * one block is now determined by 2 unsigned (i,j) */ starpu_matrix_data_register(&dataA, 0, (uintptr_t)matA, ld, size, size, sizeof(TYPE)); /* We already enforce deps by hand */ starpu_data_set_sequential_consistency_flag(dataA, 0); struct starpu_data_filter f; f.filter_func = starpu_vertical_block_filter_func; f.filter_arg = nblocks; struct starpu_data_filter f2; f2.filter_func = starpu_block_filter_func; f2.filter_arg = nblocks; starpu_data_map_filters(dataA, 2, &f, &f2); unsigned i; for (i = 0; i < size; i++) ipiv[i] = i; struct piv_s *piv_description = malloc(nblocks*sizeof(struct piv_s)); unsigned block; for (block = 0; block < nblocks; block++) { piv_description[block].piv = ipiv; piv_description[block].first = block * (size / nblocks); piv_description[block].last = (block + 1) * (size / nblocks); } #if 0 unsigned j; for (j = 0; j < nblocks; j++) for (i = 0; i < nblocks; i++) { printf("BLOCK %d %d %p\n", i, j, &matA[i*(size/nblocks) + j * (size/nblocks)*ld]); } #endif double timing; timing = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding); fprintf(stderr, "Computation took (in ms)\n"); fprintf(stderr, "%2.2f\n", timing/1000); unsigned n = starpu_matrix_get_nx(dataA); double flop = (2.0f*n*n*n)/3.0f; fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f)); /* gather all the data */ starpu_data_unpartition(dataA, 0); }
void STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks) { starpu_data_handle *dataAp = malloc(nblocks*nblocks*sizeof(starpu_data_handle)); /* monitor and partition the A matrix into blocks : * one block is now determined by 2 unsigned (i,j) */ unsigned bi, bj; for (bj = 0; bj < nblocks; bj++) for (bi = 0; bi < nblocks; bi++) { starpu_matrix_data_register(&dataAp[bi+nblocks*bj], 0, (uintptr_t)matA[bi+nblocks*bj], size/nblocks, size/nblocks, size/nblocks, sizeof(TYPE)); /* We already enforce deps by hand */ starpu_data_set_sequential_consistency_flag(dataAp[bi+nblocks*bj], 0); } unsigned i; for (i = 0; i < size; i++) ipiv[i] = i; struct piv_s *piv_description = malloc(nblocks*sizeof(struct piv_s)); unsigned block; for (block = 0; block < nblocks; block++) { piv_description[block].piv = ipiv; piv_description[block].first = block * (size / nblocks); piv_description[block].last = (block + 1) * (size / nblocks); } double timing; timing = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding); fprintf(stderr, "Computation took (in ms)\n"); fprintf(stderr, "%2.2f\n", timing/1000); unsigned n = starpu_matrix_get_nx(dataAp[0])*nblocks; double flop = (2.0f*n*n*n)/3.0f; fprintf(stderr, "Synthetic GFlops : %2.2f\n", (flop/timing/1000.0f)); for (bj = 0; bj < nblocks; bj++) for (bi = 0; bi < nblocks; bi++) { starpu_data_unregister(dataAp[bi+nblocks*bj]); } }
int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks) { starpu_data_handle_t dataA; /* monitor and partition the A matrix into blocks : * one block is now determined by 2 unsigned (i,j) */ starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(TYPE)); struct starpu_data_filter f = { .filter_func = starpu_matrix_filter_vertical_block, .nchildren = nblocks }; struct starpu_data_filter f2 = { .filter_func = starpu_matrix_filter_block, .nchildren = nblocks }; starpu_data_map_filters(dataA, 2, &f, &f2); unsigned i; for (i = 0; i < size; i++) ipiv[i] = i; struct piv_s *piv_description = malloc(nblocks*sizeof(struct piv_s)); unsigned block; for (block = 0; block < nblocks; block++) { piv_description[block].piv = ipiv; piv_description[block].first = block * (size / nblocks); piv_description[block].last = (block + 1) * (size / nblocks); } double timing; int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing); if (ret) return ret; unsigned n = starpu_matrix_get_nx(dataA); double flop = (2.0f*n*n*n)/3.0f; PRINTF("# size\tms\tGFlops"); if (bound) PRINTF("\tTms\tTGFlops"); PRINTF("\n"); PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f); if (bound) { double min; starpu_bound_compute(&min, NULL, 0); PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f); } PRINTF("\n"); /* gather all the data */ starpu_data_unpartition(dataA, STARPU_MAIN_RAM); starpu_data_unregister(dataA); free(piv_description); return ret; }