void _starpu_mpi_comm_amounts_inc(MPI_Comm comm, unsigned dst, MPI_Datatype datatype, int count)
{
	int src, size;

	if (stats_enabled == 0) return;

	starpu_mpi_comm_rank(comm, &src);
	MPI_Type_size(datatype, &size);

	_STARPU_MPI_DEBUG(1, "[%d] adding %d to %d\n", src, count*size, dst);

	comm_amount[dst] += count*size;
}
void _starpu_mpi_cache_stats_init(MPI_Comm comm)
{
	stats_enabled = starpu_get_env_number("STARPU_MPI_CACHE_STATS");
	if (stats_enabled == -1)
	{
		stats_enabled = 0;
	}
	if (stats_enabled == 0) return;

	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU is executed with STARPU_MPI_CACHE_STATS=1, which slows down a bit\n");

	starpu_mpi_comm_size(comm, &world_size);
	_STARPU_MPI_DEBUG(1, "allocating for %d nodes\n", world_size);

	comm_cache_amount = (size_t *) calloc(world_size, sizeof(size_t));
}