Пример #1
0
/* This filter function takes a CSR matrix, and divides it into nparts with the
 * same number of rows. */
static void csr_filter_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts)
{
	struct starpu_csr_interface *csr_father = (struct starpu_csr_interface *) father_interface;
	struct starpu_csr_interface *csr_child = (struct starpu_csr_interface *) child_interface;

	uint32_t nrow = csr_father->nrow;
	size_t elemsize = csr_father->elemsize;
	uint32_t firstentry = csr_father->firstentry;

	/* Every sub-parts should contain the same number of non-zero entries */
	uint32_t chunk_size = (nrow + nparts - 1)/nparts;
	uint32_t *rowptr = csr_father->rowptr;

	uint32_t first_index = id*chunk_size - firstentry;
	uint32_t local_firstentry = rowptr[first_index];
	
	uint32_t child_nrow = STARPU_MIN(chunk_size, nrow - id*chunk_size);
	uint32_t local_nnz = rowptr[first_index + child_nrow] - rowptr[first_index]; 
	
	csr_child->nnz = local_nnz;
	csr_child->nrow = child_nrow;
	csr_child->firstentry = local_firstentry;
	csr_child->elemsize = elemsize;
	
	if (csr_father->nzval)
	{
		csr_child->rowptr = &csr_father->rowptr[first_index];
		csr_child->colind = &csr_father->colind[local_firstentry];
		csr_child->nzval = csr_father->nzval + local_firstentry * elemsize;
	}
}
Пример #2
0
void _starpu_timing_init(void)
{
  static starpu_tick_t t1, t2;
  int i;

  if (inited) return;

  residual = (unsigned long long)1 << 63;
  
  for(i = 0; i < 20; i++)
    {
      STARPU_GET_TICK(t1);
      STARPU_GET_TICK(t2);
      residual = STARPU_MIN(residual, TICK_RAW_DIFF(t1, t2));
    }
  
  {
    struct timeval tv1,tv2;
    
    STARPU_GET_TICK(t1);
    gettimeofday(&tv1,0);
    usleep(500000);
    STARPU_GET_TICK(t2);
    gettimeofday(&tv2,0);
    scale = ((tv2.tv_sec*1e6 + tv2.tv_usec) -
	     (tv1.tv_sec*1e6 + tv1.tv_usec)) / 
      (double)(TICK_DIFF(t1, t2));
  }

  STARPU_GET_TICK(reference_start_tick);

  inited = 1;
}
Пример #3
0
void print_results(void)
{
	unsigned row;

	for (row = 0; row < STARPU_MIN(size, 16); row++)
	{
		printf("%2.2f\t%2.2f\n", vector_in_ptr[row], vector_out_ptr[row]);
	}
}
Пример #4
0
static double find_list_min(double *y, unsigned n)
{
	double min = 1.0e30;

	unsigned i;
	for (i = 0; i < n; i++)
	{
		min = STARPU_MIN(min, y[i]);
	}

	return min;
}
Пример #5
0
void minmax_redux_cpu_func(void *descr[], void *cl_arg)
{
	TYPE *array_dst = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]);
	TYPE *array_src = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]);

	/* Compute the min value */
	TYPE min_dst = array_dst[0];
	TYPE min_src = array_src[0];
	array_dst[0] = STARPU_MIN(min_dst, min_src);

	/* Compute the max value */
	TYPE max_dst = array_dst[1];
	TYPE max_src = array_src[1];
	array_dst[1] = STARPU_MAX(max_dst, max_src);
}
Пример #6
0
static int _compar_data_paths(const unsigned pathA[], unsigned depthA,
				const unsigned pathB[], unsigned depthB)
{
	unsigned level;
	unsigned depth = STARPU_MIN(depthA, depthB);

	for (level = 0; level < depth; level++)
	{
		if (pathA[level] != pathB[level])
			return (pathA[level] < pathB[level])?-1:1;
	}

	/* If this is the same path */
	if (depthA == depthB)
		return 0;

	/* A is a subdata of B or B is a subdata of A, so the smallest one is
	 * the father of the other (we take this convention). */
	return (depthA < depthB)?-1:1;
}
Пример #7
0
void minmax_cpu_func(void *descr[], void *cl_arg)
{
	/* The array containing the values */
	TYPE *local_array = (TYPE *)STARPU_VECTOR_GET_PTR(descr[0]);
	unsigned n = STARPU_VECTOR_GET_NX(descr[0]);

	TYPE *minmax = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[1]);

	TYPE local_min = minmax[0];
	TYPE local_max = minmax[1];

	/* Compute the min and the max elements in the array */
	unsigned i;
	for (i = 0; i < n; i++)
	{
		TYPE val = local_array[i];
		local_min = STARPU_MIN(local_min, val);
		local_max = STARPU_MAX(local_max, val);
	}

	minmax[0] = local_min;
	minmax[1] = local_max;
}
Пример #8
0
static void cpu_mult(void *descr[], STARPU_ATTRIBUTE_UNUSED  void *arg)
{
	TYPE *subA = (TYPE *)STARPU_MATRIX_GET_PTR(descr[0]);
	TYPE *subB = (TYPE *)STARPU_MATRIX_GET_PTR(descr[1]);
	TYPE *subC = (TYPE *)STARPU_MATRIX_GET_PTR(descr[2]);

	unsigned nxC = STARPU_MATRIX_GET_NX(descr[2]);
	unsigned nyC = STARPU_MATRIX_GET_NY(descr[2]);
	unsigned nyA = STARPU_MATRIX_GET_NY(descr[0]);

	unsigned ldA = STARPU_MATRIX_GET_LD(descr[0]);
	unsigned ldB = STARPU_MATRIX_GET_LD(descr[1]);
	unsigned ldC = STARPU_MATRIX_GET_LD(descr[2]);

	int worker_size = starpu_combined_worker_get_size();

	if (worker_size == 1)
	{
		/* Sequential CPU task */
		CPU_GEMM("N", "N", nxC, nyC, nyA, (TYPE)1.0, subA, ldA, subB, ldB, (TYPE)0.0, subC, ldC);
	}
	else
	{
		/* Parallel CPU task */
		unsigned rank = starpu_combined_worker_get_rank();

		unsigned block_size = (nyC + worker_size - 1)/worker_size;
		unsigned new_nyC = STARPU_MIN(nyC, block_size*(rank+1)) - block_size*rank;

		STARPU_ASSERT(nyC = STARPU_MATRIX_GET_NY(descr[1]));

		TYPE *new_subB = &subB[block_size*rank];
		TYPE *new_subC = &subC[block_size*rank];

		CPU_GEMM("N", "N", nxC, new_nyC, nyA, (TYPE)1.0, subA, ldA, new_subB, ldB, (TYPE)0.0, new_subC, ldC);
	}
}
Пример #9
0
void starpu_block_filter_func_vector(void *father_interface, void *child_interface, __attribute__((unused)) struct starpu_data_filter *f, unsigned id, unsigned nchunks)
{
        starpu_vector_interface_t *vector_father = father_interface;
        starpu_vector_interface_t *vector_child = child_interface;
	
	uint32_t nx = vector_father->nx;
	size_t elemsize = vector_father->elemsize;

	STARPU_ASSERT(nchunks <= nx);

	uint32_t chunk_size = (nx + nchunks - 1)/nchunks;
	size_t offset = id*chunk_size*elemsize;

	uint32_t child_nx = 
	  STARPU_MIN(chunk_size, nx - id*chunk_size);

	vector_child->nx = child_nx;
	vector_child->elemsize = elemsize;

	if (vector_father->ptr) {
	  vector_child->ptr = vector_father->ptr + offset;
	  vector_child->dev_handle = vector_father->dev_handle;
	  vector_child->offset = vector_father->offset + offset;
	}
}


void starpu_vector_divide_in_2_filter_func(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, __attribute__((unused)) unsigned nchunks)
{
        /* there cannot be more than 2 chunks */
Пример #10
0
int main(int argc, char **argv)
{
	int ret;
	unsigned part;
	double timing;
	double start, end;
	unsigned row, pos;
	unsigned ind;

	/* CSR matrix description */
	float *nzval;
	uint32_t nnz;
	uint32_t *colind;
	uint32_t *rowptr;
	
	/* Input and Output vectors */
	float *vector_in_ptr;
	float *vector_out_ptr;

	/*
	 *	Parse command-line arguments
	 */
	parse_args(argc, argv);

	/*
	 *	Launch StarPU
	 */
	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

	/*
	 *	Create a 3-band sparse matrix as input example
	 */
	nnz = 3*size-2;
	starpu_malloc((void **)&nzval, nnz*sizeof(float));
	starpu_malloc((void **)&colind, nnz*sizeof(uint32_t));
	starpu_malloc((void **)&rowptr, (size+1)*sizeof(uint32_t));
	assert(nzval && colind && rowptr);

	/* fill the matrix */
	for (row = 0, pos = 0; row < size; row++)
	{
		rowptr[row] = pos;

		if (row > 0)
		{
			nzval[pos] = 1.0f;
			colind[pos] = row-1;
			pos++;
		}
		
		nzval[pos] = 5.0f;
		colind[pos] = row;
		pos++;

		if (row < size - 1)
		{
			nzval[pos] = 1.0f;
			colind[pos] = row+1;
			pos++;
		}
	}

	STARPU_ASSERT(pos == nnz);

	rowptr[size] = nnz;
	
	/* initiate the 2 vectors */
	starpu_malloc((void **)&vector_in_ptr, size*sizeof(float));
	starpu_malloc((void **)&vector_out_ptr, size*sizeof(float));
	assert(vector_in_ptr && vector_out_ptr);

	/* fill them */
	for (ind = 0; ind < size; ind++)
	{
		vector_in_ptr[ind] = 2.0f;
		vector_out_ptr[ind] = 0.0f;
	}

	/*
	 *	Register the CSR matrix and the 2 vectors
	 */
	starpu_csr_data_register(&sparse_matrix, STARPU_MAIN_RAM, nnz, size, (uintptr_t)nzval, colind, rowptr, 0, sizeof(float));
	starpu_vector_data_register(&vector_in, STARPU_MAIN_RAM, (uintptr_t)vector_in_ptr, size, sizeof(float));
	starpu_vector_data_register(&vector_out, STARPU_MAIN_RAM, (uintptr_t)vector_out_ptr, size, sizeof(float));

	/*
	 *	Partition the CSR matrix and the output vector
	 */
	csr_f.nchildren = nblocks;
	vector_f.nchildren = nblocks;
	starpu_data_partition(sparse_matrix, &csr_f);
	starpu_data_partition(vector_out, &vector_f);

	/*
	 *	If we use OpenCL, we need to compile the SpMV kernel
	 */
#ifdef STARPU_USE_OPENCL
	compile_spmv_opencl_kernel();
#endif

	start = starpu_timing_now();

	/*
	 *	Create and submit StarPU tasks
	 */
	for (part = 0; part < nblocks; part++)
	{
		struct starpu_task *task = starpu_task_create();
		task->cl = &spmv_cl;
	
		task->handles[0] = starpu_data_get_sub_data(sparse_matrix, 1, part);
		task->handles[1] = vector_in;
		task->handles[2] = starpu_data_get_sub_data(vector_out, 1, part);
	
		ret = starpu_task_submit(task);
		if (STARPU_UNLIKELY(ret == -ENODEV))
		{
			FPRINTF(stderr, "No worker may execute this task\n");
			exit(0);
		}
	}

	starpu_task_wait_for_all();
	end = starpu_timing_now();

	/*
	 *	Unregister the CSR matrix and the output vector
	 */
	starpu_data_unpartition(sparse_matrix, STARPU_MAIN_RAM);
	starpu_data_unpartition(vector_out, STARPU_MAIN_RAM);

	/*
	 *	Unregister data
	 */
	starpu_data_unregister(sparse_matrix);
	starpu_data_unregister(vector_in);
	starpu_data_unregister(vector_out);

	/*
	 *	Display the result
	 */
	for (row = 0; row < STARPU_MIN(size, 16); row++)
	{
                FPRINTF(stdout, "%2.2f\t%2.2f\n", vector_in_ptr[row], vector_out_ptr[row]);
	}

	starpu_free(nzval);
	starpu_free(colind);
	starpu_free(rowptr);
	starpu_free(vector_in_ptr);
	starpu_free(vector_out_ptr);

	/*
	 *	Stop StarPU
	 */
	starpu_shutdown();

	timing = end - start;
	FPRINTF(stderr, "Computation took (in ms)\n");
	FPRINTF(stdout, "%2.2f\n", timing/1000);

	return 0;
}
Пример #11
0
/* y = ax^b + c 
 * 	return 0 if success, -1 otherwise
 * 	if success, a, b and c are modified
 * */
int _starpu_regression_non_linear_power(struct starpu_history_list_t *ptr, double *a, double *b, double *c)
{
	unsigned n = find_list_size(ptr);

	unsigned *x = malloc(n*sizeof(unsigned));
	STARPU_ASSERT(x);

	double *y = malloc(n*sizeof(double));
	STARPU_ASSERT(y);

	dump_list(x, y, ptr);

	double cmin = 0.0;
	double cmax = find_list_min(y, n);
	
	unsigned iter;

	double err = 100000.0;

	for (iter = 0; iter < MAXREGITER; iter++)
	{
		double c1, c2;
		double r1, r2;
		
		double radius = 0.01;

		c1 = cmin + (0.5-radius)*(cmax - cmin);
		c2 = cmin + (0.5+radius)*(cmax - cmin);

		r1 = test_r(c1, n, x, y);
		r2 = test_r(c2, n, x, y);

		double err1, err2;
		err1 = fabs(1.0 - r1);
		err2 = fabs(1.0 - r2);

		if (err1 < err2)
		{
			cmax = (cmin + cmax)/2;
		}
		else {
			/* 2 is better */
			cmin = (cmin + cmax)/2;
		}

		if (fabs(err - STARPU_MIN(err1, err2)) < EPS)
		{
			err = STARPU_MIN(err1, err2);
			break;
		}

		err = STARPU_MIN(err1, err2);
	}

	*c = (cmin + cmax)/2;

	*b = compute_b(*c, n, x, y); 
	*a = exp(compute_a(*c, *b, n, x, y));

	free(x);
	free(y);

	return 0;
}