static void _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
{
	double start;
	double end;

	struct starpu_task *entry_task = NULL;

	/* create all the DAG nodes */
	unsigned i,j,k;

	start = starpu_timing_now();

	for (k = 0; k < nblocks; k++)
	{
		struct starpu_task *task = create_task_11(dataA, k);
		/* we defer the launch of the first task */
		if (k == 0)
		{
			entry_task = task;
		}
		else
		{
			int ret = starpu_task_submit(task);
                        if (STARPU_UNLIKELY(ret == -ENODEV))
			{
                                FPRINTF(stderr, "No worker may execute this task\n");
                                exit(0);
                        }

		}

		for (j = k+1; j<nblocks; j++)
		{
			create_task_21(dataA, k, j);

			for (i = k+1; i<nblocks; i++)
			{
				if (i <= j)
					create_task_22(dataA, k, i, j);
			}
		}
	}

	/* schedule the codelet */
	int ret = starpu_task_submit(entry_task);
        if (STARPU_UNLIKELY(ret == -ENODEV))
	{
                FPRINTF(stderr, "No worker may execute this task\n");
                exit(0);
        }


	/* stall the application until the end of computations */
	starpu_tag_wait(TAG11(nblocks-1));

	starpu_data_unpartition(dataA, STARPU_MAIN_RAM);

	end = starpu_timing_now();


	double timing = end - start;

	unsigned n = starpu_matrix_get_nx(dataA);

	double flop = (1.0f*n*n*n)/3.0f;

	PRINTF("# size\tms\tGFlops\n");
	PRINTF("%u\t%.0f\t%.1f\n", n, timing/1000, (flop/timing/1000.0f));
}
static int dw_codelet_facto_pivot(starpu_data_handle_t *dataAp,
				  struct piv_s *piv_description,
				  unsigned nblocks,
				  starpu_data_handle_t (* get_block)(starpu_data_handle_t *, unsigned, unsigned, unsigned),
				  double *timing)
{
	double start;
	double end;
	int ret;

	/* create all the DAG nodes */
	unsigned i,j,k;

	if (bound)
		starpu_bound_start(bounddeps, boundprio);

	start = starpu_timing_now();

	for (k = 0; k < nblocks; k++)
	{
	     ret = create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);
	     if (ret == -ENODEV) return ret;

		for (i = 0; i < nblocks; i++)
		{
			if (i != k)
			{
			     ret = create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block);
			     if (ret == -ENODEV) return ret;
			}
		}

		for (i = k+1; i<nblocks; i++)
		{
		     ret = create_task_12(dataAp, nblocks, k, i, get_block);
		     if (ret == -ENODEV) return ret;
		     ret = create_task_21(dataAp, nblocks, k, i, get_block);
		     if (ret == -ENODEV) return ret;
		}
		starpu_data_wont_use(get_block(dataAp, nblocks, k, k));

		for (i = k+1; i<nblocks; i++)
		     for (j = k+1; j<nblocks; j++)
		     {
			  ret = create_task_22(dataAp, nblocks, k, i, j, get_block);
			  if (ret == -ENODEV) return ret;
		     }
		for (i = k+1; i<nblocks; i++)
		{
		    starpu_data_wont_use(get_block(dataAp, nblocks, k, i));
		    starpu_data_wont_use(get_block(dataAp, nblocks, i, k));
		}
	}

	/* stall the application until the end of computations */
	starpu_task_wait_for_all();

	end = starpu_timing_now();

	if (bound)
		starpu_bound_stop();

	*timing = end - start;
	return 0;
}
Exemplo n.º 3
0
static int dw_codelet_facto_v3(starpu_data_handle_t dataA, unsigned nblocks)
{
	double start;
	double end;
	int ret;

	/* create all the DAG nodes */
	unsigned i,j,k;

	if (bound)
		starpu_bound_start(bounddeps, boundprio);

	start = starpu_timing_now();

	for (k = 0; k < nblocks; k++)
	{
		ret = create_task_11(dataA, k);
		if (ret == -ENODEV) return ret;

		for (i = k+1; i<nblocks; i++)
		{
		     ret = create_task_12(dataA, k, i);
		     if (ret == -ENODEV) return ret;
		     ret = create_task_21(dataA, k, i);
		     if (ret == -ENODEV) return ret;
		}
		starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, k));

		for (i = k+1; i<nblocks; i++)
		     for (j = k+1; j<nblocks; j++)
		     {
			  ret = create_task_22(dataA, k, i, j);
			  if (ret == -ENODEV) return ret;
		     }
		for (i = k+1; i<nblocks; i++)
		{
		    starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, k, i));
		    starpu_data_wont_use(starpu_data_get_sub_data(dataA, 2, i, k));
		}
	}

	/* stall the application until the end of computations */
	starpu_task_wait_for_all();

	end = starpu_timing_now();

	if (bound)
		starpu_bound_stop();

	double timing = end - start;
	unsigned n = starpu_matrix_get_nx(dataA);
	double flop = (2.0f*n*n*n)/3.0f;

	PRINTF("# size\tms\tGFlops");
	if (bound)
		PRINTF("\tTms\tTGFlops");
	PRINTF("\n");
	PRINTF("%u\t%.0f\t%.1f", n, timing/1000, flop/timing/1000.0f);
	if (bound)
	{
		double min;
		starpu_bound_compute(&min, NULL, 0);
		PRINTF("\t%.0f\t%.1f", min, flop/min/1000000.0f);
	}
	PRINTF("\n");

	return 0;
}
Exemplo n.º 4
0
static double dw_codelet_facto_pivot(starpu_data_handle *dataAp,
					struct piv_s *piv_description,
					unsigned nblocks,
					starpu_data_handle (* get_block)(starpu_data_handle *, unsigned, unsigned, unsigned))
{
	struct timeval start;
	struct timeval end;

	/* create all the DAG nodes */
	unsigned i,j,k;

   starpu_event first_event = starpu_event_create();
   events = malloc(sizeof(starpu_event) * nblocks * nblocks);
   pivots = malloc(sizeof(starpu_event) * nblocks);

   EVENT(0,0) = first_event;
   starpu_event_retain(first_event);

	for (k = 0; k < nblocks; k++)
	{
		create_task_11_pivot(dataAp, nblocks, k, piv_description, get_block);

		for (i = 0; i < nblocks; i++)
		{
			if (i != k)
				create_task_pivot(dataAp, nblocks, piv_description, k, i, get_block);
		}
	
		for (i = k+1; i<nblocks; i++)
		{
			create_task_12(dataAp, nblocks, k, i, get_block);
			create_task_21(dataAp, nblocks, k, i, get_block);
		}

		for (i = k+1; i<nblocks; i++)
		{
			for (j = k+1; j<nblocks; j++)
			{
				create_task_22(dataAp, nblocks, k, i, j, get_block);
			}
		}

		for (i = 0; i < nblocks; i++)
		{
			if (i != k)
				starpu_event_release(PIVOT(i));
		}
	}

   starpu_event last_event = EVENT(nblocks-1, nblocks-1);
   starpu_event_retain(last_event);

   for (i=0; i<nblocks; i++)
      for (j=0; j<nblocks && i<=j; j++)
         starpu_event_release(EVENT(i,j));

   free(events);
   free(pivots);

	/*FIXME:  we should wait for all the pivot tasks too */

	/* schedule the codelet */
	gettimeofday(&start, NULL);
   starpu_event_trigger(first_event);

	/* stall the application until the end of computations */
	starpu_event_wait(last_event);
	gettimeofday(&end, NULL);

	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
	return timing;
}
static int cholesky_grain_rec(float *matA, unsigned size, unsigned ld, unsigned nblocks, unsigned nbigblocks, unsigned reclevel)
{
	int ret;

	/* create a new codelet */
	struct starpu_task *entry_task = NULL;

	/* create all the DAG nodes */
	unsigned i,j,k;

	starpu_data_handle_t dataA;

	/* monitor and partition the A matrix into blocks :
	 * one block is now determined by 2 unsigned (i,j) */
	starpu_matrix_data_register(&dataA, STARPU_MAIN_RAM, (uintptr_t)matA, ld, size, size, sizeof(float));

	starpu_data_set_sequential_consistency_flag(dataA, 0);

	struct starpu_data_filter f =
	{
		.filter_func = starpu_matrix_filter_vertical_block,
		.nchildren = nblocks
	};

	struct starpu_data_filter f2 =
	{
		.filter_func = starpu_matrix_filter_block,
		.nchildren = nblocks
	};

	starpu_data_map_filters(dataA, 2, &f, &f2);

	for (k = 0; k < nbigblocks; k++)
	{
		struct starpu_task *task = create_task_11(dataA, k, reclevel);
		/* we defer the launch of the first task */
		if (k == 0)
		{
			entry_task = task;
		}
		else
		{
			ret = starpu_task_submit(task);
			if (ret == -ENODEV) return 77;
			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
		}

		for (j = k+1; j<nblocks; j++)
		{
		     	ret = create_task_21(dataA, k, j, reclevel);
			if (ret == -ENODEV) return 77;

			for (i = k+1; i<nblocks; i++)
			{
				if (i <= j)
				{
				     ret = create_task_22(dataA, k, i, j, reclevel);
				     if (ret == -ENODEV) return 77;
				}
			}
		}
	}

	/* schedule the codelet */
	ret = starpu_task_submit(entry_task);
	if (STARPU_UNLIKELY(ret == -ENODEV))
	{
		FPRINTF(stderr, "No worker may execute this task\n");
		return 77;
	}

	if (nblocks == nbigblocks)
	{
		/* stall the application until the end of computations */
		starpu_tag_wait(TAG11_AUX(nblocks-1, reclevel));
		starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
		starpu_data_unregister(dataA);
		return 0;
	}
	else
	{
		STARPU_ASSERT(reclevel == 0);
		unsigned ndeps_tags = (nblocks - nbigblocks)*(nblocks - nbigblocks);

		starpu_tag_t *tag_array = malloc(ndeps_tags*sizeof(starpu_tag_t));
		STARPU_ASSERT(tag_array);

		unsigned ind = 0;
		for (i = nbigblocks; i < nblocks; i++)
		for (j = nbigblocks; j < nblocks; j++)
		{
			if (i <= j)
				tag_array[ind++] = TAG22_AUX(nbigblocks - 1, i, j, reclevel);
		}

		starpu_tag_wait_array(ind, tag_array);

		free(tag_array);

		starpu_data_unpartition(dataA, STARPU_MAIN_RAM);
		starpu_data_unregister(dataA);

		float *newmatA = &matA[nbigblocks*(size/nblocks)*(ld+1)];

		return cholesky_grain_rec(newmatA, size/nblocks*(nblocks - nbigblocks), ld, (nblocks - nbigblocks)*2, (nblocks - nbigblocks)*2, reclevel+1);
	}
}