Пример #1
0
// CPU threads-----------------------------------------------------------------
void run_cpu_threads(XYZ *in, XYZ *outp, int n_tasks, float alpha, int n_threads, int n_gpu_threads, int in_size_i, int in_size_j,
    int out_size_i, int out_size_j
#ifdef CUDA_8_0
    , std::atomic_int *worklist
#endif
    ) {

    std::vector<std::thread> cpu_threads;
    for(int k = 0; k < n_threads; k++) {
        cpu_threads.push_back(std::thread([=]() {

#ifdef CUDA_8_0
            Partitioner p = partitioner_create(n_tasks, alpha, k, n_threads, worklist);
#else
            Partitioner p = partitioner_create(n_tasks, alpha, k, n_threads);
#endif

            const int wg_in_J = divceil(out_size_j, n_gpu_threads);
            const int wg_in_I = divceil(out_size_i, n_gpu_threads);

            for(int t = cpu_first(&p); cpu_more(&p); t = cpu_next(&p)) {
                const int my_s1 = t / wg_in_J;
                const int my_s0 = t % wg_in_J;

                int Row = my_s1 * n_gpu_threads;
                int Col = my_s0 * n_gpu_threads;
                T   bi;
                T   bj;
                T   mui, muj;

                for(int i = Row; i < Row + n_gpu_threads; i++) {
                    mui = i / (T)(out_size_i - 1);
                    for(int j = Col; j < Col + n_gpu_threads; j++) {
                        muj = j / (T)(out_size_j - 1);
                        if(i < out_size_i && j < out_size_j) {
                            XYZ out = {0, 0, 0};
#pragma unroll
                            for(int ki = 0; ki <= in_size_i; ki++) {
                                bi = BezierBlend(ki, mui, in_size_i);
#pragma unroll
                                for(int kj = 0; kj <= in_size_j; kj++) {
                                    bj = BezierBlend(kj, muj, in_size_j);
                                    out.x += (in[ki * (in_size_j + 1) + kj].x * bi * bj);
                                    out.y += (in[ki * (in_size_j + 1) + kj].y * bi * bj);
                                    out.z += (in[ki * (in_size_j + 1) + kj].z * bi * bj);
                                }
                            }
                            outp[i * out_size_j + j] = out;
                        }
                    }
                }
            }

        }));
    }
    std::for_each(cpu_threads.begin(), cpu_threads.end(), [](std::thread &t) { t.join(); });
}
Пример #2
0
void MATRIX_init()
{
	__HAL_RCC_GPIOA_CLK_ENABLE();
	__HAL_RCC_GPIOB_CLK_ENABLE();
	__HAL_RCC_GPIOC_CLK_ENABLE();
	
	for (uint8_t i = 0; i < nrows; ++i) {
		IO_config(rows[i], OUTPUT);
	}
	for (uint8_t i = 0; i < ncols; ++i) {
		IO_config(cols[i], INPUT);
	}
	
	states = malloc(divceil(nrows*ncols, 8));
	memset(states, 0, divceil(nrows*ncols, 8));
	last_scancode = malloc(nrows*ncols);
}