static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks) { double start; double end; int ret; /* create all the DAG nodes */ unsigned i,j,k; if (bound) starpu_bound_start(bounddeps, boundprio); start = starpu_timing_now(); for (k = 0; k < nblocks; k++) { ret = create_task_11(dataA, k); if (ret == -ENODEV) return ret; for (i = k+1; i<nblocks; i++) { ret = create_task_12(dataA, k, i); if (ret == -ENODEV) return ret; ret = create_task_21(dataA, k, i); if (ret == -ENODEV) return ret; } starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, k)); for (i = k+1; i<nblocks; i++) for (j = k+1; j<nblocks; j++) { ret = create_task_22(dataA, k, i, j); if (ret == -ENODEV) return ret; } for (i = k+1; i<nblocks; i++) { starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, i)); starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, i, k)); } } /* stall the application until the end of computations */ starpu_task_wait_for_all(); end = starpu_timing_now(); if (bound) starpu_bound_stop(); double timing = end - start; unsigned n = starpu_matrix_get_nx(dataA); double flop = (2.0f*n*n*n)/3.0f; PRINTF("# size\tms\tGFlops"); if (bound) PRINTF("\tTms\tTGFlops"); PRINTF("\n"); PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f); if (bound) { double min; starpu_bound_compute(&min, NULL, 0); PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f); } PRINTF("\n"); return 0; }
static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks) { double start; double end; struct starpu_task *entry_task = NULL; /* create all the DAG nodes */ unsigned i,j,k; start = starpu_timing_now(); for (k = 0; k < nblocks; k++) { struct starpu_task *task = create_task_11(dataA, k); /* we defer the launch of the first task */ if (k == 0) { entry_task = task; } else { int ret = starpu_task_submit(task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } } for (j = k+1; j<nblocks; j++) { create_task_21(dataA, k, j); for (i = k+1; i<nblocks; i++) { if (i <= j) create_task_22(dataA, k, i, j); } } } /* schedule the codelet */ int ret = starpu_task_submit(entry_task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); exit(0); } /* stall the application until the end of computations */ starpu_tag_wait(TAG11(nblocks-1)); starpu_data_unpartition(dataA, STARPU_MAIN_RAM); end = starpu_timing_now(); double timing = end - start; unsigned n = starpu_matrix_get_nx(dataA); double flop = (1.0f*n*n*n)/3.0f; PRINTF("# size\tms\tGFlops\n"); PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, (flop/timing/1000.0f)); }
static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks, unsigned reclevel) { int ret; /* create a new codelet */ struct starpu_task *entry_task = NULL; /* create all the DAG nodes */ unsigned i,j,k; starpu_data_handle_t dataA; /* monitor and partition the A matrix into blocks : * one block is now determined by 2 unsigned (i,j) */ starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float)); starpu_data_set_sequential_consistency_flag(dataA, 0); struct starpu_data_filter f = { .filter_func = starpu_matrix_filter_vertical_block, .nchildren = nblocks }; struct starpu_data_filter f2 = { .filter_func = starpu_matrix_filter_block, .nchildren = nblocks }; starpu_data_map_filters(dataA, 2, &f, &f2); for (k = 0; k < nbigblocks; k++) { struct starpu_task *task = create_task_11(dataA, k, reclevel); /* we defer the launch of the first task */ if (k == 0) { entry_task = task; } else { ret = starpu_task_submit(task); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } for (j = k+1; j<nblocks; j++) { ret = create_task_21(dataA, k, j, reclevel); if (ret == -ENODEV) return 77; for (i = k+1; i<nblocks; i++) { if (i <= j) { ret = create_task_22(dataA, k, i, j, reclevel); if (ret == -ENODEV) return 77; } } } } /* schedule the codelet */ ret = starpu_task_submit(entry_task); if (STARPU_UNLIKELY(ret == -ENODEV)) { FPRINTF(stderr, "No worker may execute this task\n"); return 77; } if (nblocks == nbigblocks) { /* stall the application until the end of computations */ starpu_tag_wait(TAG11_AUX(nblocks-1, reclevel)); starpu_data_unpartition(dataA, STARPU_MAIN_RAM); starpu_data_unregister(dataA); return 0; } else { STARPU_ASSERT(reclevel == 0); unsigned ndeps_tags = (nblocks - nbigblocks)*(nblocks - nbigblocks); starpu_tag_t *tag_array = malloc(ndeps_tags*sizeof(starpu_tag_t)); STARPU_ASSERT(tag_array); unsigned ind = 0; for (i = nbigblocks; i < nblocks; i++) for (j = nbigblocks; j < nblocks; j++) { if (i <= j) tag_array[ind++] = TAG22_AUX(nbigblocks - 1, i, j, reclevel); } starpu_tag_wait_array(ind, tag_array); free(tag_array); starpu_data_unpartition(dataA, STARPU_MAIN_RAM); starpu_data_unregister(dataA); float *newmatA = &matA[nbigblocks*(size/nblocks)*(ld+1)]; return cholesky_grain_rec(newmatA, size/nblocks*(nblocks - nbigblocks), ld, (nblocks - nbigblocks)*2, (nblocks - nbigblocks)*2, reclevel+1); } }